├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── CNV.R
    ├── Merge_methylation.R
    ├── RNA_seq.R
    ├── SNP.R
    ├── TCGA_id_conversion.R
    ├── arrayDiff.R
    ├── calculate_mean_module.R
    ├── calculate_mean_profile.R
    ├── data.R
    ├── fpkm_count_conversion.r
    ├── metap.R
    ├── prepareChi.r
    ├── rep.R
    └── sysdata.rda
├── README.Rmd
├── README.md
├── data
    ├── GSE66705_sample2.rda
    ├── geneExpress.rda
    ├── gene_cov.rda
    ├── kegg_liver.rda
    ├── module.rda
    ├── profile.rda
    └── ventricle.rda
├── docs
    ├── news
    │   └── index.html
    ├── reference
    │   ├── arrayDiff.html
    │   └── index.html
    └── sitemap.xml
├── inst
    └── extdata
    │   ├── build_data.R
    │   ├── cnv
    │       ├── 00de3
    │       │   └── HONGS_p_TCGAb3_75_76_77_NSP_G.txt
    │       ├── 00e8
    │       │   └── MICHE_p_TCGAb_428_429_NS.txt
    │       ├── 00f9
    │       │   └── MINAE_p_TCGA_200_202_203_S.txt
    │       └── 0a01
    │       │   └── KNELT_p_TCGA_b123_131_S.txt
    │   ├── methy
    │       ├── 0a0b4
    │       │   ├── jhu-usc.e.H.4.lvl-3.TCGA-13-1405-01A-01D-0460-05.g.txt
    │       │   └── logs
    │       │   │   └── file1.parcel
    │       ├── 0a6b
    │       │   ├── jhu-usc.e.H.10.lvl-3.TCGA-30-1880-01A-01D-0652-05.g.txt
    │       │   └── logs
    │       │   │   └── file2.parcel
    │       ├── 0ae7
    │       │   ├── jhu-usc.H.8.l.I.TCGA-30-1714-01A-02D-0563-05.g.txt
    │       │   └── logs
    │       │   │   └── file3.parcel
    │       └── 0b32
    │       │   ├── jhu-usc.e.H.5.l.TCGA-13-1510-01A-02D-0475-05.gdc_hg38.txt
    │       │   └── logs
    │       │       └── file4.parcel
    │   └── tcga_cli
    │       ├── at.orl.TCGA-2V-A95S.xml
    │       ├── ati.org.TCGA-2Y-A9GT.xml
    │       └── ats.org.TCGA-2Y-A9GS.xml
├── man
    ├── GSE66705_sample2.Rd
    ├── Merge_methy_tcga.Rd
    ├── SNP_QC.Rd
    ├── array_preprocess.Rd
    ├── cal_mean_module.Rd
    ├── cluster_array.Rd
    ├── combine_pvalue.Rd
    ├── countToFpkm.Rd
    ├── countToTpm.Rd
    ├── differential_RNA.Rd
    ├── differential_SNP.Rd
    ├── differential_SNP_GEO.Rd
    ├── differential_SNP_tcga.Rd
    ├── differential_array.Rd
    ├── differential_cnv.Rd
    ├── differential_limma.Rd
    ├── differential_methy.Rd
    ├── fpkmToTpm.Rd
    ├── geneExpress.Rd
    ├── gene_ave.Rd
    ├── gene_cov.Rd
    ├── get_geo_array.Rd
    ├── id_conversion_TCGA.Rd
    ├── kegg_liver.Rd
    ├── module.Rd
    ├── prepare_chi.Rd
    ├── profile.Rd
    ├── repAssign.Rd
    ├── repRemove.Rd
    └── ventricle.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── fpkmToTpm.R
    │   ├── test_Merge_methy_tcga.R
    │   ├── test_arrayDiff.R
    │   ├── test_cal_mean_module.R
    │   ├── test_countToFpkm.R
    │   ├── test_countToTpm.R
    │   ├── test_diff_RNA_ucsc.R
    │   ├── test_differential_CNV.R
    │   ├── test_differential_RNA.R
    │   ├── test_differential_limma.R
    │   ├── test_gene_ave.R
    │   ├── test_prepare_chi.R
    │   ├── test_repAssign.R
    │   └── test_repRemove.R
└── vignettes
    ├── .gitignore
    └── GeoTcgaData.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^data-raw$
 4 | ^README\.Rmd$
 5 | ^cran-comments\.md$
 6 | ^NEWS\.md$
 7 | ^.*\.jpg$
 8 | ^\.github$
 9 | ^\.json$
10 | ^Thumbs\.db$
11 | ^build_site.R$
12 | ^pkgdown$
13 | ^\.DS_Store
14 | ^_drake\.R$
15 | ^\.drake$
16 | ^\.drake_history$
17 | ^\.future$
18 | ^\.git$
19 | ^\.github$
20 | ^\.gitignore$
21 | ^\.Rapp.history$
22 | ^\.RData$
23 | ^\.Rbuildignore$
24 | ^\.Rhistory$
25 | ^\.lintr$
26 | ^CODE_OF_CONDUCT.md$
27 | ^CONTRIBUTING.md$
28 | ^pkgdown\.R$
29 | ^pkgdown\.sh$
30 | ^paper\.bib$
31 | ^paper\.md$
32 | ^LICENSE$
33 | ^NOTICE$
34 | vignettes/.*\.html
35 | vignettes/.*\.md
36 | vignettes/.*\.log
37 | vignettes/.*_files
38 | ^.*\.css$
39 | ^.*\.gcda$
40 | ^.*\.gcno$
41 | ^.*\.js$
42 | ^.*\.log$
43 | ^.*\.out$
44 | ^.*\.nfs.*$
45 | ^.*\.svg$
46 | ^.*\.yaml$
47 | ^.*\.yml$
48 | ^docs$
49 | ^_pkgdown\.yml$
50 | ^codemeta\.json$
51 | ^\.httr-oauth$
52 | ^CRAN-RELEASE$
53 | CONDUCT.md
54 | 
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GeoTcgaData
 2 | Type: Package
 3 | Title: Processing Various Types of Data on GEO and TCGA
 4 | Version: 1.99.2
 5 | Authors@R: person(given = "Erqiang", family = "Hu", email = "13766876214@163.com", role  = c("aut", "cre"), comment = c(ORCID = "0000-0002-1798-7513"))
 6 | Description: Gene Expression Omnibus(GEO) and The Cancer Genome Atlas (TCGA) 
 7 |     provide us with a wealth of data, such as RNA-seq, DNA Methylation, SNP
 8 |     and Copy number variation data. It's easy to download data from TCGA using the 
 9 |     gdc tool, but processing these data into a format suitable for bioinformatics 
10 |     analysis requires more work. This R package was developed to handle these data.
11 | Depends: R (>= 4.2.0)
12 | License: Artistic-2.0
13 | Encoding: UTF-8
14 | RoxygenNote: 7.2.3
15 | Suggests: 
16 |     knitr,
17 |     rmarkdown,
18 |     DESeq2,
19 |     S4Vectors,
20 |     ChAMP,
21 |     impute,
22 |     tidyr,
23 |     clusterProfiler,
24 |     org.Hs.eg.db,
25 |     edgeR,
26 |     limma,
27 |     quantreg,
28 |     minfi,
29 |     IlluminaHumanMethylation450kanno.ilmn12.hg19,
30 |     dearseq,
31 |     NOISeq,
32 |     testthat (>= 3.0.0),
33 |     CATT,
34 |     TCGAbiolinks,
35 |     enrichplot,
36 |     GEOquery,
37 |     BiocGenerics
38 | VignetteBuilder: knitr
39 | Imports: 
40 |     utils,
41 |     data.table,
42 |     plyr,
43 |     cqn,
44 |     topconfects,
45 |     stats,
46 |     SummarizedExperiment,
47 |     methods
48 | Language: en-US
49 | URL: https://github.com/YuLab-SMU/GeoTcgaData
50 | BugReports: https://github.com/YuLab-SMU/GeoTcgaData/issues
51 | biocViews: GeneExpression, DifferentialExpression, RNASeq, 
52 | 	CopyNumberVariation, Microarray, Software, DNAMethylation,
53 |     DifferentialMethylation, SNP, ATACSeq, MethylationArray
54 | Config/testthat/edition: 3
55 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(Merge_methy_tcga)
 4 | export(SNP_QC)
 5 | export(array_preprocess)
 6 | export(cal_mean_module)
 7 | export(cluster_array)
 8 | export(combine_pvalue)
 9 | export(countToFpkm)
10 | export(countToTpm)
11 | export(differential_CNV)
12 | export(differential_RNA)
13 | export(differential_SNP)
14 | export(differential_SNP_GEO)
15 | export(differential_SNP_tcga)
16 | export(differential_array)
17 | export(differential_limma)
18 | export(differential_methy)
19 | export(fpkmToTpm)
20 | export(gene_ave)
21 | export(get_geo_array)
22 | export(id_conversion_TCGA)
23 | export(prepare_chi)
24 | export(repAssign)
25 | export(repRemove)
26 | import(cqn)
27 | import(methods)
28 | importFrom(SummarizedExperiment,assays)
29 | importFrom(SummarizedExperiment,colData)
30 | importFrom(plyr,rename)
31 | importFrom(stats,as.dist)
32 | importFrom(stats,cor)
33 | importFrom(stats,cutree)
34 | importFrom(stats,hclust)
35 | importFrom(stats,na.fail)
36 | importFrom(stats,p.adjust)
37 | importFrom(stats,pchisq)
38 | importFrom(stats,pnorm)
39 | importFrom(stats,qnorm)
40 | importFrom(utils,methods)
41 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # GeoTcgaData 0.99.2
 2 | 
 3 | + use SummarizedExperiment input (2023-1-29, Sun)
 4 | + fix return value of `differential_array` (2022-10-8, Sat)
 5 | + fix gene length bug in `countToTpm()` and `countToFpkm()`(2022_9_22, Tue)
 6 | + fix a bug in `id_conversion` (2022-8-27, Sat)
 7 | + fix a bug in `differential_RNA(useTopconfects = TRUE)` (2022-8-12, Fir)
 8 | + add function `methydifferential_ucsc` and `methydifferential_limma`(2021-10-24, Sun)
 9 | + update hgnc_file data(2021-10-24, Sun)
10 | + add function `differential_RNA` to do difference analysis of RNA-seq data(2021-7-20, Tue)
11 | + add data hgnc_file
12 | + update function: id_ava()
13 | + add functions: ann_merge(), countToFpkm(), countToTpm()
14 | 
15 | 


--------------------------------------------------------------------------------
/R/CNV.R:
--------------------------------------------------------------------------------
 1 | #' Do difference analysis of gene level copy number variation data
 2 | #'
 3 | #' @param cnvData data.frame of CNV data, each column is a sample, 
 4 | #' and each row is a CNV. 
 5 | #' @param sampleGroup vector of sample group
 6 | #' @param method method to do diffenenital analysis, 
 7 | #' one of "Chisquare", "fisher",
 8 | #' and "CATT"(Cochran-Armitage trend test)
 9 | #' @param adjust.method adjust.method, one of "holm", "hochberg", "hommel", 
10 | #' "bonferroni", "BH", "BY", "fdr", and "none". 
11 | #' @param ... parameters for "Chisquare", "fisher",
12 | #' and "CATT"(Cochran-Armitage trend test)
13 | #' @return data.frame with pvalue and estimate
14 | #' @export
15 | #' 
16 | #' @examples
17 | #' \donttest{
18 | #' # use TCGAbiolinks data as example
19 | #' library(TCGAbiolinks)
20 | #' query <- GDCquery(
21 | #'         project = "TCGA-ACC",
22 | #'         data.category = "Copy Number Variation",
23 | #'         data.type = "Gene Level Copy Number",
24 | #'         access = "open"
25 | #' )
26 | #' GDCdownload(query)
27 | #' cnvData <- GDCprepare(query)
28 | #' aa <- assays(cnvData)$copy_number
29 | #' bb <- aa
30 | #' aa[bb == 2] <- 0
31 | #' aa[bb < 2] <- -1
32 | #' aa[bb > 2] <- 1
33 | #' sampleGroup <- sample(c("A", "B"), ncol(cnvData), replace = TRUE)
34 | #' diffCnv <- differential_CNV(aa, sampleGroup)
35 | #'
36 | #' # Use sangerbox CNV data as example
37 | #' cnvData <- fread("Merge_GeneLevelCopyNumber.txt")
38 | #' class(cnvData) <- "data.frame"
39 | #' rownames(cnvData) <- cnvData[, 1]
40 | #' cnvData <- cnvData[, -c(1, 2, 3)]
41 | #' sampleGroup <- sample(c("A", "B"), ncol(cnvData), replace = TRUE)
42 | #' diffCnv <- differential_CNV(cnvData, sampleGroup)
43 | #' }
44 | #' # use random data as example
45 | #' aa <- matrix(sample(c(0, 1, -1), 200, replace = TRUE), 25, 8)
46 | #' rownames(aa) <- paste0("gene", 1:25)
47 | #' colnames(aa) <- paste0("sample", 1:8)
48 | #' sampleGroup <- sample(c("A", "B"), ncol(aa), replace = TRUE)
49 | #' diffCnv <- differential_CNV(aa, sampleGroup)
50 | differential_CNV <- function(cnvData, sampleGroup,
51 |                     method = "Chisquare",
52 |                     adjust.method = "BH", ...) {
53 |     type1 <- which(sampleGroup == unique(sampleGroup)[1])
54 |     type2 <- which(sampleGroup == unique(sampleGroup)[2])
55 |     pvalue <- rep(1, nrow(cnvData))
56 |     estimate <- rep(0, nrow(cnvData))
57 |     for (i in seq_len(nrow(cnvData))) {
58 |         type1_freq <- table(as.character(cnvData[i, type1]))
59 |         type2_freq <- table(as.character(cnvData[i, type2]))
60 |         df <- data.frame(
61 |             type1 = as.numeric(type1_freq[c("-1", "0", "1")]),
62 |             type2 = as.numeric(type2_freq[c("-1", "0", "1")])
63 |         )
64 |         df[is.na(df)] <- 0
65 |         df <- df[rowSums(df) > 0, ]
66 |         if (nrow(df) > 2) {
67 |             if (method == "fisher") {
68 |                 fish <- stats::fisher.test(df, ...)
69 |                 pvalue[i] <- fish$p.value
70 |                 if (nrow(df) == 2) {
71 |                         estimate[i] <- fish$estimate
72 |                 }
73 |             }
74 | 
75 |             if (method == "Chisquare") {
76 |                     pvalue[i] <- stats::chisq.test(df, ...)$p.value
77 |             }
78 | 
79 |             if(method == "CATT") {
80 |                     pvalue[i] <- CATT::CATT(table = t(df), ...)$p.value
81 |             }
82 |         }
83 |     }
84 | 
85 |     adj.P.Val <- stats::p.adjust(pvalue, method = adjust.method)
86 |     gene <- gsub("\\..*", "", rownames(cnvData))
87 |     result <- data.frame(gene = gene, P.Value = pvalue,
88 |         adj.P.Val = adj.P.Val, estimate = estimate)
89 |     rownames(result) <- gene
90 |     result
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/R/Merge_methylation.R:
--------------------------------------------------------------------------------
  1 | #' Get methylation difference gene
  2 | #'
  3 | #' @title differential_methy
  4 | #' @rdname differential_methy
  5 | #' @param cpgData data.frame of cpg beta value, , or SummarizedExperiment object
  6 | #' @param sampleGroup vector of sample group
  7 | #' @param groupCol group column
  8 | #' @param combineMethod method to combine the cpg pvalues, 
  9 | #' a function or one of "stouffer", "fisher" and "rhoScores".
 10 | #' @param missing_value Method to impute missing expression data,
 11 | #' one of "zero" and "knn".
 12 | #' @param cpg2gene data.frame to annotate cpg locus to gene
 13 | #' @param normMethod Method to do normalization: "PBC" or "BMIQ".
 14 | #' @param region region of genes, one of "Body", "TSS1500", "TSS200",
 15 | #' "3'UTR", "1stExon", "5'UTR", and "IGR". Only used when cpg2gene is NULL.
 16 | #' @param model if "cpg", step1: calculate difference cpgs;
 17 | #' step2: calculate difference genes.
 18 | #' if "gene", step1: calculate the methylation level of genes;
 19 | #' step2: calculate difference genes.
 20 | #' @param adjust.method character string specifying the method
 21 | #' used to adjust p-values for multiple testing.
 22 | #' See \link{p.adjust} for possible values.
 23 | #' @param ucscData Logical, whether the data comes from UCSC Xena.
 24 | #' @param adjPvalCutoff adjusted pvalue cutoff
 25 | #' @importFrom stats p.adjust
 26 | #' @return data.frame
 27 | #' @export
 28 | #' @examples
 29 | #' \donttest{
 30 | #' # use TCGAbiolinks data
 31 | #' library(TCGAbiolinks)
 32 | #' query <- GDCquery(project = "TCGA-ACC",
 33 | #'     data.category = "DNA Methylation",
 34 | #'     data.type = "Methylation Beta Value",
 35 | #'     platform = "Illumina Human Methylation 450")
 36 | #' GDCdownload(query, method = "api", files.per.chunk = 5,
 37 | #'     directory = Your_Path)
 38 | #' merge_result <- Merge_methy_tcga(Your_Path_to_DNA_Methylation_data)
 39 | #' library(ChAMP) # To avoid reporting errors
 40 | #' differential_gene <- differential_methy(cpgData = merge_result,
 41 | #'     sampleGroup = sample(c("C","T"),
 42 | #'     ncol(merge_result[[1]]), replace = TRUE))
 43 | #' }
 44 | #' # use user defined data
 45 | #' library(ChAMP)
 46 | #' cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
 47 | #' rownames(cpgData) <- paste0("cpg", seq_len(200))
 48 | #' colnames(cpgData) <- paste0("sample", seq_len(10))
 49 | #' sampleGroup <- c(rep("group1", 5), rep("group2", 5))
 50 | #' names(sampleGroup) <- colnames(cpgData)
 51 | #' cpg2gene <- data.frame(cpg = rownames(cpgData), 
 52 | #'     gene = rep(paste0("gene", seq_len(20)), 10))
 53 | #' result <- differential_methy(cpgData, sampleGroup, 
 54 | #'     cpg2gene = cpg2gene, normMethod = NULL)
 55 | #' # use SummarizedExperiment object input
 56 | #' library(ChAMP)
 57 | #' cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
 58 | #' rownames(cpgData) <- paste0("cpg", seq_len(200))
 59 | #' colnames(cpgData) <- paste0("sample", seq_len(10))
 60 | #' sampleGroup <- c(rep("group1", 5), rep("group2", 5))
 61 | #' names(sampleGroup) <- colnames(cpgData)
 62 | #' cpg2gene <- data.frame(cpg = rownames(cpgData), 
 63 | #'     gene = rep(paste0("gene", seq_len(20)), 10))
 64 | #' colData <- S4Vectors::DataFrame(
 65 | #'     row.names = colnames(cpgData),
 66 | #'     group = sampleGroup
 67 | #' )
 68 | #' data <- SummarizedExperiment::SummarizedExperiment(
 69 | #'          assays=S4Vectors::SimpleList(counts=cpgData),
 70 | #'          colData = colData)
 71 | #' result <- differential_methy(cpgData = data, 
 72 | #'     groupCol = "group", normMethod = NULL, 
 73 | #'     cpg2gene = cpg2gene)  
 74 | differential_methy  <- function(cpgData, sampleGroup,
 75 |                     groupCol,
 76 |                     # combineMethod = RobustRankAggreg::rhoScores,
 77 |                     combineMethod = "stouffer",
 78 |                     missing_value = "knn", 
 79 |                     cpg2gene = NULL,
 80 |                     normMethod = "PBC", 
 81 |                     region = "TSS1500",
 82 |                     model = "gene",
 83 |                     adjust.method = "BH",
 84 |                     adjPvalCutoff = 0.05,
 85 |                     ucscData = FALSE) {
 86 |     region <- match.arg(region, c("Body", "TSS1500", "TSS200",
 87 |         "3'UTR", "1stExon", "5'UTR", "IGR"))
 88 |     model <- match.arg(model, c("cpg", "gene"))
 89 | 
 90 |     if (inherits(cpgData,  "SummarizedExperiment")) {
 91 |         cpgData2 <- cpgData
 92 |         cpgData <- assays(cpgData2)$counts
 93 |         sampleGroup <- colData(cpgData2)[, groupCol]
 94 |         names(sampleGroup) <- rownames(colData(cpgData2))
 95 |     } else {
 96 |         if (inherits(cpgData,  "SummarizedExperiment")) { 
 97 |             cpgData <- cpgData[[1]]
 98 |         }
 99 |     }
100 | 
101 | 
102 |     if (ucscData) {
103 |         class(methy) <- "data.frame"
104 |         rownames(methy) <- methy[, 1]
105 |         cpgs <- rownames(methy)
106 |         methy <- methy[, -1]
107 |         group <- sampleGroup
108 |         if (is.null(group)) {
109 |             group <- lapply(colnames(methy), function(x) {
110 |                 strsplit(x, "-")[[1]][4]
111 |             }) |> unlist()
112 |     
113 |             group <- substring(group, 1, 1)
114 |         }
115 |     }
116 | 
117 | 
118 | 
119 |     cpgData <- as.matrix(cpgData)
120 |     # Use KNN to fill in missing values
121 |     if (missing_value == "zero") {
122 |         cpgData[is.na(cpgData)] <- 0
123 |         data.m <- cpgData
124 |     } else {
125 |         data.m <- impute::impute.knn(cpgData)$data
126 |     }
127 | 
128 |     # normalize data
129 |     myNorm <- data.m
130 |     if (!is.null(normMethod)) {
131 |         myNorm <- ChAMP::champ.norm(beta = data.m, rgSet = NULL, 
132 |             mset = NULL, method = normMethod)
133 |     }
134 |     if (!is.null(cpg2gene)) {
135 |         cpg_gene <- cpg2gene
136 |     } else {
137 |         cpg_gene <- get_cpg_annotation(region = region)
138 |     }
139 | 
140 | 
141 |     if (model == "gene") {
142 |         cpg_gene <-  split(cpg_gene[, 2], cpg_gene[, 1])   
143 |         genes <- unlist(lapply(cpg_gene, function(x) {paste(x,collapse = ";")}))
144 |         cpg_gene <- data.frame(cpg = names(cpg_gene), gene = genes)
145 |         rownames(cpg_gene) <- cpg_gene[, 1]
146 |         myNorm <- as.data.frame(myNorm)
147 |         myNorm$gene <- cpg_gene[rownames(myNorm), 2]
148 |         # myNorm <- myNorm[, c(ncol(myNorm), 1:(ncol(myNorm) - 1))]
149 |         myNorm <- myNorm[, c(ncol(myNorm), seq_len(ncol(myNorm) - 1))]
150 |         myNorm <- myNorm[!is.na(myNorm$gene), ]
151 | 
152 | 
153 |         myNorm$gene <- as.character(myNorm$gene)
154 |         myNorm2 <- repAssign(myNorm, ";")
155 |         myNorm3 <- gene_ave(myNorm2)
156 | 
157 |         ## use limma to do differential expression analysis
158 |         gene_pvalue <- differential_limma(myNorm3, group = sampleGroup,
159 |             adjust.method = adjust.method)
160 |         gene_pvalue$gene <- rownames(gene_pvalue)
161 |     } else {
162 |         # Identify Differential Methylation Positions (DMP)
163 |         myDMP <- ChAMP::champ.DMP(beta = myNorm,
164 |             pheno = sampleGroup, adjPVal = 1)
165 |         myDMP <- as.data.frame(myDMP)
166 | 
167 |         # use cpg_gene to annotate CpGs
168 |         pvalues <- cpg_gene
169 |         pvalues$pvalue <- myDMP[cpg_gene[, 1], 4]
170 |         # rownames(pvalues) <- pvalues[, 1]
171 |         pvalues <- pvalues[!is.na(pvalues$pvalue), ]
172 |         
173 |         if (is.function(combineMethod)) {
174 |             gene_pvalue <- stats::aggregate(pvalues[, 4],
175 |                 by = list(pvalues[, 2]),
176 |                 # FUN = combine_pvalue, combineMethod = combineMethod
177 |                 FUN = combineMethod
178 |             )
179 |             colnames(gene_pvalue) <- c("gene", "pvalue")
180 |         } else {
181 |             aa <- pvalues$pvalue
182 |             bb <- split(aa, pvalues$gene)
183 |             gene_pvalue <- data.frame(gene = names(bb), 
184 |                 pvalue = unlist(lapply(bb, function(x) x[1])))
185 |             if (combineMethod == "stouffer") {
186 |                 
187 |                 myBetas <- myNorm[pvalues$cpg, ]
188 |                 myBetas <- split(as.data.frame(myBetas), pvalues$gene)
189 |                 correl <- lapply(myBetas, function(x) cor(t(x)))
190 |                 weights <- lapply(correl, function(x) 1/apply(x^2,1,sum))
191 |                 
192 |                 for (i in seq_len(nrow(gene_pvalue))) {
193 |                     if (length(bb[[i]]) > 1) {
194 |                         gene_pvalue[i, 2] <- sumz(bb[[i]], weights[[i]])$p
195 |                     }       
196 |                 }
197 |             }
198 | 
199 |             if (combineMethod == "fisher") {
200 |                 for (i in seq_len(nrow(gene_pvalue))) {
201 |                     if (length(bb[[i]]) > 1) {
202 |                         gene_pvalue[i, 2] <- sumlog(bb[[i]])$p
203 |                     }       
204 |                 }
205 |             }
206 |         }
207 |         
208 | 
209 | 
210 |         # get logFC of genes
211 |         myNorm2 <- myNorm[pvalues[, 1], ]
212 |         myNorm2 <- stats::aggregate(myNorm2,
213 |             by = list(pvalues[, 2]), FUN = mean)
214 | 
215 |         myNorm2 <- myNorm2[myNorm2[, 1] != "", ]
216 |         rownames(myNorm2) <- myNorm2[, 1]
217 |         myNorm2 <- myNorm2[, -1]
218 |         groups <- sort(unique(sampleGroup))
219 |         mean1 <- rowMeans(myNorm2[, sampleGroup == groups[1]], na.rm = TRUE)
220 |         mean2 <- rowMeans(myNorm2[, sampleGroup == groups[2]], na.rm = TRUE)
221 |         logFC <- mean1 - mean2            
222 | 
223 |         gene_pvalue$logFC <- logFC[gene_pvalue[, 1]]
224 |         colnames(gene_pvalue) <- c("gene", "P.Value", "logFC")
225 |         gene_pvalue$gene <- as.character(gene_pvalue$gene)
226 |         gene_pvalue$adj.P.Val <- p.adjust(gene_pvalue$P.Value,
227 |             method = adjust.method)
228 |         rownames(gene_pvalue) <- gene_pvalue$gene
229 |     }
230 |     gene_pvalue <- gene_pvalue[gene_pvalue$adj.P.Val < adjPvalCutoff, ]
231 |     return(gene_pvalue)                        
232 | }
233 | 
234 | 
235 | 
236 | #' differential_limma
237 | #'
238 | #' @param df data.frame of the omic data
239 | #' @param group a vector, group of samples.
240 | #' @param adjust.method adjust.method.
241 | #' @return data.frame
242 | #' @export
243 | #' @examples
244 | #' df <- matrix(runif(200), 25, 8)
245 | #' df <- as.data.frame(df)
246 | #' rownames(df) <- paste0("gene", 1:25)
247 | #' colnames(df) <- paste0("sample", 1:8)
248 | #' group <- sample(c("group1", "group2"), 8, replace = TRUE)
249 | #' result <- differential_limma(df = df, group = group)
250 | differential_limma <- function(df, group, adjust.method = "BH") {
251 |     groups <- unique(group)
252 |     # if group is a numberic vector(even for c("0", "1")), will get errors.
253 |     group <- gsub(groups[1], "nromal", group)
254 |     group <- gsub(groups[2], "disease", group)
255 |     design <- stats::model.matrix(~ 0 + factor(group))
256 |     colnames(design) <- levels(factor(group))
257 |     contrast.matrix <- limma::makeContrasts(
258 |         contrasts = paste(colnames(design)[2:1],
259 |         collapse = "-"
260 |     ), levels = colnames(design))
261 | 
262 |     fit <- limma::lmFit(df, design)
263 |     fit <- limma::contrasts.fit(fit, contrast.matrix)
264 |     fit <- limma::eBayes(fit)
265 |     limma::topTable(fit, adjust.method = adjust.method, number = Inf)
266 |     ## or limma::topTable(fit, coef = 1, adjust='BH', number=Inf)
267 |     ## contrasts.fit is not necessory
268 |     # groups <- unique(group)
269 |     # group <- gsub(groups[1], "nromal", group)
270 |     # group <- gsub(groups[2], "disease", group)
271 |     # design <- stats::model.matrix(~factor(group))
272 | 
273 |     # fit2 <- lmFit(df, design)
274 |     # fit2 <- eBayes(fit2)
275 |     # topTable(fit2,coef=2, adjust='BH', number=Inf)
276 | 
277 |     ## coef parameter is not necessory：
278 |     # opTable(fit2, adjust='BH', number=Inf)
279 | }
280 | 
281 | #' Merge methylation data downloaded from TCGA
282 | #'
283 | #' When the methylation data is downloaded from TCGA, 
284 | #' each sample is saved in a folder, which contains the methylation value file 
285 | #' and the descriptive file. This function can directly 
286 | #' extract and consolidate all folders.
287 | #' @param dirr a string for the directory of methylation data download from tcga
288 | #' useing the tools gdc
289 | #' @return a matrix, a combined methylation expression spectrum matrix
290 | #' @export
291 | #'
292 | #' @examples
293 | #' merge_result <- Merge_methy_tcga(system.file(file.path("extdata", "methy"),
294 | #'     package = "GeoTcgaData"))
295 | Merge_methy_tcga <- function(dirr = NULL) {
296 |     options(warn = -1)
297 |     # file_num=1
298 |     if (is.null(dirr)) stop("please give your directory of methylation data!")
299 |     tcga_dir <- dir(dirr)
300 |     filePath <- file.path(dirr, tcga_dir[1])
301 |     methyFile <- get_methy_df(filePath)
302 |     methyResult <- matrix(0, nrow = nrow(methyFile), ncol = length(tcga_dir))
303 |     rownames(methyResult) <- methyFile[, "Composite Element REF"]
304 |     samples <- rep(0, length(tcga_dir))
305 |     methyResult[, 1] <- methyFile[, 2]
306 |     samples[1] <- colnames(methyFile)[2]
307 |     message("file", 1, " is over")
308 |     for (i in 2:length(tcga_dir)) {
309 |         message("file", i, " is over")
310 |         filePath <- file.path(dirr, tcga_dir[i])
311 |         methyFile <- get_methy_df(filePath)
312 |         methyResult[, i] <- methyFile[, 2]
313 |         samples[i] <- colnames(methyFile)[2]
314 |         gc()
315 |     }
316 |     colnames(methyResult) <- samples
317 |     cpg_info <- methyFile[, -2]
318 |     return(list(methyResult = methyResult, cpg_info = cpg_info))
319 | }
320 | 
321 | #' Read methylated data file and turn it into data frame
322 | #'
323 | #' @param filePath Path of files
324 | #' @return data.frame
325 | #' @noRd
326 | get_methy_df <- function(filePath) {
327 |     methyDir <- dir(filePath)
328 |     for (j in seq_len(length(methyDir))) {
329 |         if (length(grep("jhu-usc", methyDir[j])) > 0) {
330 |             file_name <- file.path(filePath, dir(filePath)[j])
331 |             sample <- unlist(strsplit(dir(filePath)[j], "\\."))[6]
332 |         }
333 |     }
334 |     methyFile <- data.table::fread(file_name, header = TRUE)
335 |     class(methyFile) <- "data.frame"
336 |     colnames(methyFile)[2] <- sample
337 |     return(methyFile)
338 | }
339 | 
340 | 
341 | get_cpg_annotation <- function(region = "TSS1500") {
342 |     ## library to avoid errors.
343 |     # library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
344 |     ann <- minfi::getAnnotation(
345 |                 IlluminaHumanMethylation450kanno.ilmn12.hg19::IlluminaHumanMethylation450kanno.ilmn12.hg19)
346 |     ann <- as.data.frame(ann)
347 |     cpg_gene <- ann[, c("Name", "UCSC_RefGene_Name", "UCSC_RefGene_Group")]
348 |     # cpg_gene <- cpg_gene[grep(region, cpg_gene$UCSC_RefGene_Group), ]
349 |     cpg_gene <- cpg_gene[cpg_gene[, 2] != "", ]
350 |     genelist <- strsplit(cpg_gene[, 2], ";")
351 |     regionlist <- strsplit(cpg_gene[, 3], ";")
352 |     geneLength <- unlist(lapply(genelist, length))
353 |     cpgs <- rep(cpg_gene[, 1], times = geneLength)
354 |     cpg_gene2 <- data.frame(cpg = cpgs, gene = unlist(genelist), 
355 |         region = unlist(regionlist))
356 |     cpg_gene2 <- cpg_gene2[grep(region, cpg_gene2$region), ]
357 |     return(unique(cpg_gene2))
358 | }
359 | 


--------------------------------------------------------------------------------
/R/RNA_seq.R:
--------------------------------------------------------------------------------
  1 | #' Do difference analysis of RNA-seq data
  2 | #'
  3 | #' @title differential_RNA
  4 | #' @rdname differential_RNA
  5 | #' @param counts a dataframe or numeric matrix of raw counts data, 
  6 | #' or SummarizedExperiment object
  7 | #' @param group sample groups
  8 | #' @param groupCol group column
  9 | #' @param method one of "DESeq2", "edgeR" , "limma", "dearseq",
 10 | #' "NOISeq", "Wilcoxon", and "auto".
 11 | #' @param geneLength a vector of gene length.
 12 | #' @param gccontent a vector of gene GC content.
 13 | #' @param filter if TRUE, use filterByExpr to filter genes.
 14 | #' @param edgeRNorm if TRUE, use edgeR to do normalization for dearseq method.
 15 | #' @param adjust.method character string specifying the method used to
 16 | #' adjust p-values for multiple testing.
 17 | #' See \link{p.adjust} for possible values.
 18 | #' @param useTopconfects if TRUE, use topconfects to provide a
 19 | #'    more biologically useful ranked gene list.
 20 | #' @param ucscData Logical, whether the data comes from UCSC Xena.
 21 | #' @importFrom plyr rename
 22 | #' @importFrom SummarizedExperiment assays
 23 | #' @importFrom SummarizedExperiment colData
 24 | #' @importFrom utils methods
 25 | #' @import methods
 26 | #' @import cqn
 27 | #' @return data.frame
 28 | #' @export
 29 | #'
 30 | #' @examples
 31 | #' \donttest{
 32 | # use `TCGAbiolinks` to download TCGA data
 33 | #' library(TCGAbiolinks)
 34 | #'
 35 | #' query <- GDCquery(
 36 | #'     project = "TCGA-ACC",
 37 | #'     data.category = "Transcriptome Profiling",
 38 | #'     data.type = "Gene Expression Quantification",
 39 | #'     workflow.type = "STAR - Counts"
 40 | #' )
 41 | #'
 42 | #' GDCdownload(query,
 43 | #'     method = "api", files.per.chunk = 3,
 44 | #'     directory = Your_Path
 45 | #' )
 46 | #'
 47 | #' dataRNA <- GDCprepare(
 48 | #'     query = query, directory = Your_Path,
 49 | #'     save = TRUE, save.filename = "dataRNA.RData"
 50 | #' )
 51 | #' ## get raw count matrix
 52 | #' dataPrep <- TCGAanalyze_Preprocessing(
 53 | #'     object = dataRNA,
 54 | #'     cor.cut = 0.6,
 55 | #'     datatype = "STAR - Counts"
 56 | #' )
 57 | #'
 58 | #' # Use `differential_RNA` to do difference analysis.
 59 | #' # We provide the data of human gene length and GC content in `gene_cov`.
 60 | #' group <- sample(c("grp1", "grp2"), ncol(dataPrep), replace = TRUE)
 61 | #' library(cqn) # To avoid reporting errors: there is no function "rq"
 62 | #' ## get gene length and GC content
 63 | #' library(org.Hs.eg.db)
 64 | #' genes_bitr <- bitr(rownames(gene_cov),
 65 | #'     fromType = "ENTREZID", toType = "ENSEMBL",
 66 | #'     OrgDb = org.Hs.eg.db, drop = TRUE
 67 | #' )
 68 | #' genes_bitr <- genes_bitr[!duplicated(genes_bitr[, 2]), ]
 69 | #' gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
 70 | #' rownames(gene_cov2) <- genes_bitr$ENSEMBL
 71 | #' genes <- intersect(rownames(dataPrep), rownames(gene_cov2))
 72 | #' dataPrep <- dataPrep[genes, ]
 73 | #' geneLength <- gene_cov2(genes, "length")
 74 | #' gccontent <- gene_cov2(genes, "GC")
 75 | #' names(geneLength) <- names(gccontent) <- genes
 76 | #' ##    Difference analysis
 77 | #' DEGAll <- differential_RNA(
 78 | #'     counts = dataPrep, group = group,
 79 | #'     geneLength = geneLength, gccontent = gccontent
 80 | #' )
 81 | #' # Use `clusterProfiler` to do enrichment analytics:
 82 | #' diffGenes <- DEGAll$logFC
 83 | #' names(diffGenes) <- rownames(DEGAll)
 84 | #' diffGenes <- sort(diffGenes, decreasing = TRUE)
 85 | #' library(clusterProfiler)
 86 | #' library(enrichplot)
 87 | #' library(org.Hs.eg.db)
 88 | #' gsego <- gseGO(gene = diffGenes, OrgDb = org.Hs.eg.db, keyType = "ENSEMBL")
 89 | #' dotplot(gsego)
 90 | #' }
 91 | #' # use user-defined data
 92 | #' df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
 93 | #' df <- as.data.frame(df)
 94 | #' rownames(df) <- paste0("gene", 1:25)
 95 | #' colnames(df) <- paste0("sample", 1:16)
 96 | #' group <- sample(c("group1", "group2"), 16, replace = TRUE)
 97 | #' result <- differential_RNA(counts = df, group = group,
 98 | #'     filte = FALSE, method = "Wilcoxon")
 99 | #' # use SummarizedExperiment object input
100 | #' df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
101 | #' rownames(df) <- paste0("gene", 1:25)
102 | #' colnames(df) <- paste0("sample", 1:16)
103 | #' group <- sample(c("group1", "group2"), 16, replace = TRUE)
104 | #' 
105 | #' nrows <- 200; ncols <- 20
106 | #'  counts <- matrix(
107 | #'    runif(nrows * ncols, 1, 1e4), nrows,
108 | #'    dimnames = list(paste0("cg",1:200),paste0("S",1:20))
109 | #' )
110 | #' 
111 | #' colData <- S4Vectors::DataFrame(
112 | #'   row.names = paste0("sample", 1:16),
113 | #'   group = group
114 | #' )
115 | #' data <- SummarizedExperiment::SummarizedExperiment(
116 | #'          assays=S4Vectors::SimpleList(counts=df),
117 | #'          colData = colData)
118 | #' 
119 | #' result <- differential_RNA(counts = data, groupCol = "group",
120 | #'     filte = FALSE, method = "Wilcoxon") 
121 | #' @importFrom plyr rename
122 | #' @import cqn
123 | differential_RNA <- function(counts, group, groupCol, method = "limma", 
124 |                     geneLength = NULL,
125 |                     gccontent = NULL, filter = TRUE, edgeRNorm = TRUE,
126 |                     adjust.method = "BH", useTopconfects = TRUE, 
127 |                     ucscData = FALSE) {
128 |     method <- match.arg(method, c("DESeq2", "edgeR", "limma",
129 |         "dearseq", "Wilcoxon", "NOISeq", "auto"))
130 | 
131 |     if (ucscData) {
132 |         class(counts) <- "data.frame"
133 |         counts[, 1] <- gsub("\\..*", "", counts[, 1])
134 |         rownames(counts) <- counts[, 1]
135 |         counts <- counts[, -1]
136 |         counts <- round(2^counts) - 1
137 |     }
138 | 
139 | 
140 |     if (inherits(counts,  "SummarizedExperiment")) {
141 |         se <- counts
142 |         counts <- assays(se)$counts
143 |         group <- colData(se)[, groupCol]
144 |         names(group) <- rownames(colData(se))
145 |     }
146 | 
147 | 
148 |     cols <- !duplicated(colnames(counts))
149 |     counts <- counts[, cols]
150 |     group <- group[cols]
151 |     if (min(table(group)) > 4 && method == "auto") {
152 |         method <- "Wilcoxon"
153 |     } else {
154 |         method <- "limma"
155 |     }
156 | 
157 |     ## use cqn to correct the bias
158 |     correct<- TRUE
159 |     uCovar <- NULL
160 |     if (is.null(geneLength) || is.null(gccontent)) {
161 |         correct<- FALSE
162 |     } else {
163 |         genes_gc <- intersect(names(geneLength), names(gccontent))
164 |         uCovar <- data.frame(length = geneLength[genes_gc],
165 |             gccontent = gccontent[genes_gc])
166 |         rownames(uCovar) <- genes_gc
167 |         counts <- counts[genes_gc, ]
168 |     }
169 |     d.mont <- edgeR::DGEList(counts = counts, group = group, genes = uCovar)
170 |     if (filter) {
171 |         keep <- edgeR::filterByExpr(d.mont)
172 |         d.mont <- d.mont[keep, keep.lib.sizes = FALSE]
173 |         counts <- counts[keep, ]
174 |     }
175 |     if (correct) {
176 |         geneLength <- geneLength[rownames(counts)]
177 |         gccontent <- gccontent[rownames(counts)]
178 |         cqn.subset <- cqn::cqn(counts, lengths = geneLength, x = gccontent)
179 |     }
180 | 
181 | 
182 |     if (method == "DESeq2") {
183 |         coldata <- data.frame(group)
184 | 
185 |         dds <- DESeq2::DESeqDataSetFromMatrix(
186 |             countData = counts,
187 |             colData = coldata, design = ~group
188 |         )
189 |         
190 | 
191 |         if (correct) {
192 |             cqnOffset <- cqn.subset$glm.offset
193 |             cqnNormFactors <- exp(cqnOffset)
194 |             ## divide out the geometric mean
195 |             ## https://support.bioconductor.org/p/89239/
196 |             ## https://support.bioconductor.org/p/95683/
197 |             normFactors <- cqnNormFactors / exp(rowMeans(log(cqnNormFactors)))
198 |             DESeq2::normalizationFactors(dds) <- normFactors
199 |         }
200 |         DEGAll <- DESeq2::DESeq(dds)
201 |         DEGAll_table <- NULL
202 |         if (useTopconfects) {
203 |             DEGAll_table <-
204 |                 topconfects::deseq2_confects(DEGAll, step = 0.05)$table
205 |             rownames(DEGAll_table) <- DEGAll_table$name
206 |         }
207 |         DEGAll <- DEGAll |>
208 |             DESeq2::results(pAdjustMethod = adjust.method) |>
209 |             as.data.frame() |>
210 |             rename(c("log2FoldChange" = "logFC")) |>
211 |             rename(c("pvalue" = "P.Value")) |>
212 |             rename(c("padj" = "adj.P.Val"))
213 |         DEGAll$length <- geneLength[rownames(DEGAll)]
214 |         DEGAll$gccontent <- gccontent[rownames(DEGAll)]
215 |         # DEGAll <- DEGAll[, c(ncol(DEGAll) - 1,
216 |         #     ncol(DEGAll), 1:(ncol(DEGAll) - 2))]
217 |         DEGAll <- DEGAll[, c(ncol(DEGAll) - 1, ncol(DEGAll),
218 |             seq_len(ncol(DEGAll) - 2))]
219 |         if (!is.null(DEGAll_table)) {
220 |             genes <- intersect(rownames(DEGAll), rownames(DEGAll_table))
221 |             DEGAll <- cbind(DEGAll[genes, ], DEGAll_table[genes, ])
222 |             DEGAll <- DEGAll[order(DEGAll$P.Value), ]
223 |         }
224 |     } else {
225 |         if (correct) {
226 |             ## with cqn, there is no need to normalize
227 |             ## using the normalization tools
228 |             ## from edgeR, such as calcNormFactors.
229 |             d.mont$offset <- cqn.subset$glm.offset
230 |         } else {
231 |             ## TMM Normalization
232 |             d.mont <- edgeR::calcNormFactors(d.mont, method = "TMM")
233 |         }
234 |         if (method == "edgeR") {
235 |             # design <- stats::model.matrix(~ group)
236 |             design <- stats::model.matrix(~ d.mont$sample$group)
237 |             if (min(table(d.mont$sample$group)) > 1) {
238 |                 d.mont <- edgeR::estimateDisp(d.mont, design) |>
239 |                     edgeR::estimateGLMCommonDisp(design = design)
240 |                 DEGAll <- edgeR::glmQLFit(d.mont, design = design)
241 |                 DEGAll_table <- NULL
242 |                 if (useTopconfects) {
243 |                     DEGAll_table <- topconfects::edger_confects(DEGAll,
244 |                         fdr = 0.05,
245 |                         coef = ncol(DEGAll$design),
246 |                         step = 0.05
247 |                     )$table
248 |                     rownames(DEGAll_table) <- DEGAll_table$name
249 |                 }
250 |                 # edgeR::topTags(n = nrow(d.mont$counts)) |>
251 |                 DEGAll <- DEGAll |>
252 |                     edgeR::glmQLFTest(coef = ncol(DEGAll$design)) |>
253 |                     edgeR::topTags(n = Inf, adjust.method = adjust.method) |>
254 |                     as.data.frame() |>
255 |                     rename(c("FDR" = "adj.P.Val")) |>
256 |                     rename(c("PValue" = "P.Value"))
257 |                 if (!is.null(DEGAll_table)) {
258 |                     genes <- intersect(rownames(DEGAll), rownames(DEGAll_table))
259 |                     DEGAll <- cbind(DEGAll[genes, ], DEGAll_table[genes, ])
260 |                     DEGAll <- DEGAll[order(DEGAll$P.Value), ]
261 |                 }
262 |             } else {
263 |                 DEGAll <- edgeR::glmFit(d.mont, dispersion = 0)
264 |                 DEGAll <- DEGAll |>
265 |                     edgeR::glmLRT(coef = ncol(DEGAll$design)) |>
266 |                     edgeR::topTags(n = Inf, adjust.method = adjust.method) |>
267 |                     as.data.frame() |>
268 |                     rename(c("FDR" = "adj.P.Val")) |>
269 |                     rename(c("PValue" = "P.Value"))
270 |             }
271 |         }
272 | 
273 |         if (method == "limma") {
274 |             comparison <- paste(unique(group), collapse = "-")
275 |             group <- factor(group)
276 |             design <- stats::model.matrix(~ 0 + group)
277 |             colnames(design) <- levels(group)
278 |             contrast.matrix <- limma::makeContrasts(
279 |                 contrasts = comparison,
280 |                 levels = design
281 |             )
282 |             DEGAll <- limma::voom(d.mont, design = design, plot = FALSE) |>
283 |                 limma::lmFit(design)
284 |             DEGAll_table <- NULL
285 |             if (useTopconfects) {
286 |                 DEGAll_table <- topconfects::limma_confects(DEGAll,
287 |                     coef = 1,
288 |                     fdr = 0.05
289 |                 )$table
290 |                 rownames(DEGAll_table) <- DEGAll_table$name
291 |             }
292 |             DEGAll <- DEGAll |>
293 |                 limma::contrasts.fit(contrast.matrix) |>
294 |                 limma::eBayes() |>
295 |                 limma::topTable(number = Inf, adjust.method = adjust.method)
296 |             if (!is.null(DEGAll_table)) {
297 |                 genes <- intersect(rownames(DEGAll), rownames(DEGAll_table))
298 |                 DEGAll <- cbind(DEGAll[genes, ], DEGAll_table[genes, ])
299 |                 DEGAll <- DEGAll[order(DEGAll$P.Value), ]
300 |             }
301 |         }
302 | 
303 |         if (method == "dearseq") {
304 |             group[group == unique(group)[1]] <- 1
305 |             group[group == unique(group)[2]] <- 2
306 |             conditions <- matrix(as.numeric(group), ncol = 1)
307 |             dearseqTest <- "asymptotic"
308 |             if (edgeRNorm) {
309 |                 count_norm <- edgeR::cpm(d.mont, log = TRUE)
310 |                 DEGAll <- dearseq::dear_seq(
311 |                     exprmat = count_norm, variables2test = conditions,
312 |                     which_test = dearseqTest, parallel_comp = FALSE,
313 |                     preprocessed = TRUE
314 |                 )
315 |             } else {
316 |                 DEGAll <- dearseq::dear_seq(
317 |                     exprmat = as.matrix(counts), variables2test = conditions,
318 |                     which_test = dearseqTest, parallel_comp = FALSE,
319 |                     preprocessed = FALSE,
320 |                     padjust_methods = adjust.method
321 |                 )
322 |             }
323 |             DEGAll <- DEGAll$pvals |>
324 |                 rename(c("adjPval" = "adj.P.Val")) |>
325 |                 rename(c("rawPval" = "P.Value"))
326 |         }
327 | 
328 |         if (method == "Wilcoxon") {
329 |             count_norm <- edgeR::cpm(d.mont, log = TRUE) |> as.data.frame()
330 |             pvalues <- rep(0, nrow(count_norm))
331 | 
332 |             count_disease <- as.matrix(count_norm[, group == unique(group)[1]])
333 |             count_normal <- as.matrix(count_norm[, group == unique(group)[2]])
334 |             for (i in seq_len(length(pvalues))) {
335 |                 pvalues[i] <- stats::wilcox.test(count_disease[i, ],
336 |                     count_normal[i, ])$p.value
337 |             }
338 |             fdr <- stats::p.adjust(pvalues, method = adjust.method)
339 |             DEGAll <- data.frame(P.Value = pvalues, adj.P.Val = fdr)
340 |             rownames(DEGAll) <- rownames(count_norm)
341 |         }
342 | 
343 |         if (method == "NOISeq") {
344 |             conditions <- factor(group)
345 |             data <- NOISeq::readData(data = counts,
346 |                 factors = as.data.frame(conditions))
347 |             res <- NOISeq::noiseqbio(data,
348 |                 k = 0.5, norm = "tmm", factor = "conditions",
349 |                 random.seed = 12345, filter = 1, cv.cutoff = 100, cpm = 1
350 |             )
351 |             DEGAll <- NOISeq::degenes(res, q = 0, M = NULL) |>
352 |                 rename(c("prob" = "P.Value"))
353 |             DEGAll$adj.P.Val <- DEGAll$P.Value
354 |         }
355 |     }
356 |     if ("P.Value" %in% colnames(DEGAll)) {
357 |         DEGAll <- DEGAll[!is.na(DEGAll[, "P.Value"]), ]
358 |     }
359 | 
360 |     return(DEGAll)
361 | }
362 | 


--------------------------------------------------------------------------------
/R/SNP.R:
--------------------------------------------------------------------------------
  1 | #' Do difference analysis of SNP data
  2 | #'
  3 | #' @param snpDf data.frame of SNP data, each column is a sample, 
  4 | #' and each row is a SNP. 
  5 | #' @param sampleGroup vector of sample group.
  6 | #' @param combineMethod Method of combining the
  7 | #' pvalue of multiple snp in a gene.
  8 | #' @return data.frame
  9 | #' @export
 10 | #'
 11 | #' @examples
 12 | #' \donttest{
 13 | #' library(TCGAbiolinks)
 14 | #' query <- GDCquery(
 15 | #'     project = "TCGA-CHOL",
 16 | #'     data.category = "Simple Nucleotide Variation",
 17 | #'     access = "open",
 18 | #'     legacy = FALSE,
 19 | #'     data.type = "Masked Somatic Mutation",
 20 | #'     workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
 21 | #' )
 22 | #' GDCdownload(query)
 23 | #' data_snp <- GDCprepare(query)
 24 | #' samples <- unique(data_snp$Tumor_Sample_Barcode)
 25 | #' sampleGroup <- sample(c("A", "B"), length(samples), replace = TRUE)
 26 | #' names(sampleGroup) <- samples
 27 | #' pvalue <- differential_SNP_tcga(snpData = data_snp, 
 28 | #'     sampleGroup = sampleGroup)
 29 | #' }
 30 | #' # use demo data
 31 | #' snpDf <- matrix(sample(c("mutation", NA), 100, replace = TRUE), 10, 10)
 32 | #' snpDf <- as.data.frame(snpDf)
 33 | #' sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
 34 | #' result <- differential_SNP(snpDf, sampleGroup)
 35 | differential_SNP <- function(snpDf, sampleGroup, combineMethod = min) {
 36 |     snpDf[!is.na(snpDf)] <- "mutation"
 37 |     snpDf[is.na(snpDf)] <- "wild"
 38 |     sampleGroup <- sampleGroup[!is.na(sampleGroup)]
 39 |     type1 <- which(sampleGroup == names(table(sampleGroup))[1])
 40 |     type2 <- which(sampleGroup == names(table(sampleGroup))[2])
 41 |     pvalue <- rep(0, nrow(snpDf))
 42 |     estimate <- rep(0, nrow(snpDf))
 43 |     for (i in seq_len(nrow(snpDf))) {
 44 |         type1_freq <- table(as.character(snpDf[i, type1]))
 45 |         type2_freq <- table(as.character(snpDf[i, type2]))
 46 |         df <- data.frame(
 47 |             type1 = as.numeric(type1_freq[c("wild", "mutation")]),
 48 |             type2 = as.numeric(type2_freq[c("wild", "mutation")])
 49 |         )
 50 |         df[is.na(df)] <- 0
 51 |         fish <- stats::fisher.test(df)
 52 |         pvalue[i] <- fish$p.value
 53 |         estimate[i] <- fish$estimate
 54 |     }
 55 |     names(pvalue) <- names(estimate) <- sub("_.*", "", rownames(snpDf))
 56 |     if (!is.null(combineMethod)) {
 57 |         pvalue <- stats::aggregate(pvalue, by = list(names(pvalue)),
 58 |             FUN = combineMethod)
 59 |         estimate <- stats::aggregate(estimate,
 60 |             by = list(names(estimate)), FUN = mean)
 61 |         return(data.frame(gene = pvalue[, 1], pvalue = pvalue[, 2],
 62 |             estimate = estimate[, 2]))
 63 |     } else {
 64 |         return(data.frame(pvalue = pvalue, estimate = estimate))
 65 |     }
 66 | }
 67 | 
 68 | #' combine pvalues of SNP difference analysis result
 69 | #'
 70 | #' @param snpResult data.frame of SNP difference analysis result.
 71 | #' @param snp2gene data frame of two column: snp and gene.
 72 | #' @param combineMethod Method of combining the
 73 | #' pvalue of multiple snp in a gene.
 74 | #' @return data.frame
 75 | #' @export
 76 | #' @examples
 77 | #' snpResult <- data.frame(pvalue = runif(100), estimate = runif(100))
 78 | #' rownames(snpResult) <- paste0("snp", seq_len(100))
 79 | #' snp2gene <- data.frame(snp = rownames(snpResult), 
 80 | #'     gene = rep(paste0("gene", seq_len(20)), 5))
 81 | #' result <- combine_pvalue(snpResult, snp2gene)
 82 | combine_pvalue <- function(snpResult, snp2gene, combineMethod = min) {
 83 |         pvalue <- snpResult$pvalue
 84 |         estimate <- snpResult$estimate
 85 |         genes <- snp2gene[, 2]
 86 |         names(genes) <- snp2gene[, 1]
 87 |         snps <- rownames(snpResult)
 88 |         names(pvalue) <- names(estimate) <- genes[snps]
 89 |         pvalue <- stats::aggregate(pvalue, by = list(names(pvalue)),
 90 |             FUN = combineMethod)
 91 |         estimate <- stats::aggregate(estimate, by = list(names(estimate)),
 92 |             FUN = mean)
 93 |         return(data.frame(gene = pvalue[, 1], pvalue = pvalue[, 2],
 94 |             estimate = estimate[, 2]))
 95 | 
 96 | }
 97 | 
 98 | #' Do difference analysis of SNP data downloaded from TCGAbiolinks
 99 | #'
100 | #' @param snpData data.frame of SNP data downloaded from TCGAbiolinks
101 | #' @param sampleGroup vector of sample group
102 | #' @param combineMethod Method of combining the pvalue of
103 | #' multiple snp in a gene.
104 | #' @return data.frame
105 | #' @export
106 | #'
107 | #' @examples
108 | #' \donttest{
109 | #' library(TCGAbiolinks)
110 | #' query <- GDCquery(
111 | #'     project = "TCGA-CHOL",
112 | #'     data.category = "Simple Nucleotide Variation",
113 | #'     access = "open",
114 | #'     legacy = FALSE,
115 | #'     data.type = "Masked Somatic Mutation",
116 | #'     workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
117 | #' )
118 | #' GDCdownload(query)
119 | #' data_snp <- GDCprepare(query)
120 | #' samples <- unique(data_snp$Tumor_Sample_Barcode)
121 | #' sampleGroup <- sample(c("A", "B"), length(samples), replace = TRUE)
122 | #' names(sampleGroup) <- samples
123 | #' pvalue <- differential_SNP_tcga(snpData = data_snp, 
124 | #'     sampleGroup = sampleGroup)
125 | #' }
126 | #' # use demo data
127 | #' snpDf <- matrix(sample(c("mutation", NA), 100, replace = TRUE), 10, 10)
128 | #' snpDf <- as.data.frame(snpDf)
129 | #' sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
130 | #' result <- differential_SNP(snpDf, sampleGroup)
131 | differential_SNP_tcga <- function(snpData, sampleGroup, combineMethod = NULL) {
132 |     Tumor_Sample_Barcode <- Variant_Classification <- NULL
133 |     snpName <- paste(snpData$Hugo_Symbol, snpData$Start_Position, sep = "_")
134 |     # snpData <- snpData[, c("Hugo_Symbol", "Start_Position", "Chromosome",
135 |     #         "Variant_Classification", "Tumor_Sample_Barcode",
136 |     #         "Variant_Type", "dbSNP_RS", "Mutation_Status",
137 |     #         # "MAX_AF",
138 |     #         "Reference_Allele", "Tumor_Seq_Allele1", "Tumor_Seq_Allele2",
139 |     #         "Match_Norm_Validation_Allele1", "Match_Norm_Validation_Allele2")]
140 |     snpData <- snpData[, c("Variant_Classification", "Tumor_Sample_Barcode")]
141 |     snpData$snp <- snpName
142 |     snpData <- tidyr::spread(snpData, Tumor_Sample_Barcode,
143 |         Variant_Classification)
144 |     snpData <- as.data.frame(snpData)
145 |     i <- match(colnames(snpData), names(sampleGroup))
146 |     sampleGroup <- sampleGroup[i]
147 |     rownames(snpData) <- snpData$snp
148 |     snpData <- snpData[, -1]
149 |     pvalue <- differential_SNP(snpDf = snpData, sampleGroup = sampleGroup,
150 |         combineMethod = combineMethod)
151 |     return(pvalue)
152 | }
153 | 
154 | #' Do difference analysis of SNP data downloaded from GEO
155 | #'
156 | #' @param snpData data.frame of SNP data downloaded from GEO
157 | #' @param sampleGroup vector of sample group
158 | #' @param method one of "Chisquare", "fisher",
159 | #' and "CATT"(Cochran-Armitage trend test)
160 | #' @return data.frame
161 | #' @export
162 | #' @examples
163 | #' \donttest{
164 | #' file1 <- read.table("GSE66903_series_matrix.txt.gz",
165 | #'     fill=TRUE, comment.char="!", header = TRUE)
166 | #' rownames(file1) <- file1[, 1]
167 | #' snpData <- file1[, -1]
168 | #' sampleGroup <- sample(c("A", "B"), ncol(snpData ), replace = TRUE)
169 | #' names(sampleGroup) <- colnames(snpData)
170 | #' snpData <- SNP_QC(snpData)
171 | #' sampleGroup <- sample(c("A", "B"), ncol(snpData ), replace = TRUE)
172 | #' result1 <- differential_SNP_GEO(snpData = snpData,
173 | #'     sampleGroup = sampleGroup, method = "Chisquare")
174 | #' }
175 | #' # use demo data
176 | #' snpDf <- matrix(sample(c("AA", "Aa", "aa"), 100, replace = TRUE), 10, 10)
177 | #' snpDf <- as.data.frame(snpDf)
178 | #' sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
179 | #' result <- differential_SNP_GEO(snpDf, sampleGroup, method = "fisher")
180 | differential_SNP_GEO <- function(snpData, sampleGroup, method = "Chisquare") {
181 |     snpDf <- as.matrix(snpData)
182 |     sampleGroup <- sampleGroup[!is.na(sampleGroup)]
183 |     type1 <- which(sampleGroup == names(table(sampleGroup))[1])
184 |     type2 <- which(sampleGroup == names(table(sampleGroup))[2])
185 |     pvalue <- rep(1, nrow(snpDf))
186 |     estimate <- rep(0, nrow(snpDf))
187 |     for (i in seq_len(nrow(snpDf))) {
188 |         type1_freq <- table(snpDf[i, type1])
189 |         type2_freq <- table(snpDf[i, type2])
190 |         types <- unique(snpDf[i, ])
191 |         df <- data.frame(
192 |             type1_freq = as.numeric(type1_freq[types]),
193 |             type2_freq = as.numeric(type2_freq[types])
194 |         )
195 |         df[is.na(df)] <- 0
196 |         if (nrow(df) > 2) {
197 |             if (method == "fisher") {
198 |                 fish <- stats::fisher.test(df)
199 |                 pvalue[i] <- fish$p.value
200 |                 if (nrow(df) == 2) {
201 |                         estimate[i] <- fish$estimate
202 |                 }
203 |             }
204 | 
205 |             if (method == "Chisquare") {
206 |                     pvalue[i] <- stats::chisq.test(df)$p.value
207 |             }
208 | 
209 |             if(method == "CATT") {
210 |                     pvalue[i] <- CATT::CATT(table = t(df))$p.value
211 |             }
212 |         }
213 | 
214 |     }
215 |     names(pvalue) <- names(estimate) <- rownames(snpDf)
216 | 
217 |     return(data.frame(pvalue = pvalue, estimate = estimate))
218 | }
219 | 
220 | 
221 | get_maf <- function(x) {
222 |     x <- x[x != "NoCall"]
223 |     freq <- strsplit(x, split = "") |> unlist() |> table()
224 |     min(freq) / sum(freq)
225 | }
226 | 
227 | get_hwe <- function(x) {
228 |     x <- x[x != "NoCall"]
229 |     aa <- table(x)
230 |     table_x <- as.numeric(aa)
231 |     names(table_x) <- names(aa)
232 |     # table_x <- table_x[sort(names(table_x))]
233 |     freq <- strsplit(x, split = "") |> unlist() |> table()
234 |     # freq <- freq[sort(names(freq))]
235 |     table_y <- rep(0, 3)
236 | 
237 | 
238 |     names(table_y) <- names(table_x)
239 |     freq1 <- freq[1]/ sum(freq)
240 |     freq2 <- freq[2]/ sum(freq)
241 |     sum_freq <- length(x)
242 |     table_y[paste0(names(freq)[1], names(freq)[1])] <- freq1 * freq1 * sum_freq
243 |     if (length(table_x) > 1) {
244 |         table_y[paste0(names(freq)[1],
245 |             names(freq)[2])] <- freq1 * freq2 * sum_freq * 2
246 |         table_y[paste0(names(freq)[2],
247 |             names(freq)[1])] <- freq1 * freq2 * sum_freq * 2
248 |     }
249 |     if (length(table_x) > 2) {
250 |             table_y[paste0(names(freq)[2],
251 |                 names(freq)[2])] <- freq2 * freq2 * sum_freq
252 |     }
253 |     df <- data.frame(table_x, table_y[names(table(x))])
254 |     stats::chisq.test(df)$p.value
255 | }
256 | 
257 | #' Do quality control of SNP data downloaded from TCGAbiolinks
258 | #'
259 | #' @param snpData data.frame of SNP data downloaded from TCGAbiolinks
260 | #' @param geon filters out all variants with missing call rates
261 | #' exceeding the provided value (default 0.02) to be removed
262 | #' @param mind filters out all samples with missing call rates exceeding
263 | #' the provided value (default 0.02) to be removed
264 | #' @param maf filters out all variants with minor allele frequency below
265 | #' the provided threshold
266 | #' @param hwe filters out all variants which have Hardy-Weinberg
267 | #' equilibrium exact test p-value below the provided threshold
268 | #' @param miss character of miss value
269 | #' @return data.frame
270 | #' @export
271 | #' @examples
272 | #' # use demo data
273 | #' snpDf <- matrix(sample(c("AA", "Aa", "aa"), 100, replace = TRUE), 10, 10)
274 | #' snpDf <- as.data.frame(snpDf)
275 | #' sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
276 | #' result <- SNP_QC(snpDf)
277 | SNP_QC <- function(snpData, geon = 0.02, mind = 0.02, maf = 0.05,
278 |                     hwe = 1e-6, miss = "NoCall") {
279 |     snpData_mat <- as.matrix(snpData)
280 |     ## filter by 0.2
281 |     aa <- snpData_mat |> apply(MARGIN = 2, FUN = function(x) {
282 |             table(x)[miss] / length(x)
283 |         }
284 |     )
285 | 
286 |     aa[is.na(aa)] <- 0
287 |     snpData_mat <- snpData_mat[, aa < 0.2]
288 | 
289 |     bb <- snpData_mat |> apply(MARGIN = 1, FUN = function(x) {
290 |         table(x)[miss] / length(x)
291 |         }
292 |     )
293 | 
294 |     bb[is.na(bb)] <- 0
295 |     snpData_mat <- snpData_mat[bb < 0.2, ]
296 | 
297 |     ## filter by cutoff
298 |     aa <- snpData_mat |> apply(MARGIN = 2, FUN = function(x)
299 |         {table(x)[miss] / length(x)})
300 | 
301 |     aa[is.na(aa)] <- 0
302 |     snpData_mat <- snpData_mat[, aa < mind]
303 | 
304 |     bb <- snpData_mat |> apply(MARGIN = 1, FUN = function(x)
305 |         {table(x)[miss] / length(x)})
306 | 
307 |     bb[is.na(bb)] <- 0
308 |     snpData_mat <- snpData_mat[bb < geon, ]
309 | 
310 |     ## maf
311 |     MAF <- snpData_mat |> apply(MARGIN = 1, FUN = get_maf)
312 |     snpData_mat <- snpData_mat[MAF > maf, ]
313 |     HWE <- snpData_mat |> apply(MARGIN = 1, FUN = get_hwe)
314 |     snpData_mat <- snpData_mat[HWE > hwe, ] |> as.data.frame()
315 |     return(snpData_mat)
316 | }
317 | 
318 | 
319 | 


--------------------------------------------------------------------------------
/R/TCGA_id_conversion.R:
--------------------------------------------------------------------------------
 1 | #' Convert ENSEMBL gene id to gene Symbol in TCGA
 2 | #'
 3 | #' @param profiles a data.frame of gene expression data, 
 4 | #' each column is a sample, 
 5 | #' and each row is a gene. 
 6 | #' @param toType one of 'keytypes(org.Hs.eg.db)'
 7 | #'
 8 | #' @return a data.frame, gene symbols and their expression value
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' library(org.Hs.eg.db)
13 | #' data(profile)
14 | #' result <- id_conversion_TCGA(profile)
15 | id_conversion_TCGA <- function(profiles, toType = "SYMBOL") {
16 |     rownames(profiles) <- gsub("\\..*", "", rownames(profiles))
17 |     genes <- clusterProfiler::bitr(rownames(profiles),
18 |         fromType = "ENSEMBL",
19 |         toType = toType, OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop = FALSE
20 |     )
21 | 
22 |     genes <- genes[!duplicated(genes[, 1]), ]
23 |     rownames(genes) <- genes[, 1]
24 |     profiles2 <- as.matrix(profiles)
25 |     rownames(profiles2) <- genes[rownames(profiles), 2]
26 |     return(profiles2)
27 | }
28 | 


--------------------------------------------------------------------------------
/R/arrayDiff.R:
--------------------------------------------------------------------------------
  1 | #' Differential analysis of Microarray data
  2 | #'
  3 | #' @param df data.frame of the omic data, each column is a sample, 
  4 | #' and each row is a gene. 
  5 | #' @param group a vector, group of samples.
  6 | #' @param method method to do differential analysis, 
  7 | #' one of "limma", "ttest", "wilcox".
  8 | #' @param adjust.method adjust.method, one of "holm", "hochberg", "hommel", 
  9 | #' "bonferroni", "BH", "BY", "fdr", and "none". 
 10 | #' @return data.frame
 11 | #' @export
 12 | #'
 13 | #' @examples
 14 | #' \donttest{
 15 | #' library(GeoTcgaData)
 16 | #' library(data.table)
 17 | #' # Use real GEO data as example
 18 | #' arrayData <- read.table("GSE54807_series_matrix.txt.gz",
 19 | #'     sep = "\t", header = TRUE,
 20 | #'         fill=TRUE, comment.char = "!", check.names=FALSE)
 21 | #' gpl <- fread("GPL6244-17930.txt", sep = "\t", header = TRUE)
 22 | #' gpl <- gpl[, c("ID", "gene_assignment")]
 23 | #' class(gpl) <- "data.frame"
 24 | #'
 25 | #' for (i in seq_len(nrow(gpl))) {
 26 | #'         aa <- strsplit(gpl[i, 2], " // ")[[1]][5]
 27 | #'         gpl[i, 2] <- as.character(strsplit(aa, " /// ")[[1]][1])
 28 | #' }
 29 | #' gpl[,1] <- as.character(gpl[,1])
 30 | #' arrayData[, 1] <- as.character(arrayData[, 1])
 31 | #' rownames(gpl) <- gpl[, 1]
 32 | #' arrayData[, 1] <- gpl[arrayData[, 1], 2]
 33 | #'
 34 | #'
 35 | #' arrayData <- repRemove(arrayData," /// ")
 36 | #'
 37 | #' # Remove rows that do not correspond to genes
 38 | #' arrayData <- arrayData[!is.na(arrayData[, 1]), ]
 39 | #' arrayData <- arrayData[!arrayData[, 1] == "", ]
 40 | #' arrayData <- arrayData[!arrayData[, 1] == "---", ]
 41 | #'
 42 | #'
 43 | #' arrayData <- arrayData[order(arrayData[, 1]), ]
 44 | #' arrayData <- gene_ave(arrayData, 1)
 45 | #'
 46 | #' keep <- apply(arrayData, 1, function(x) sum(x < 1) < (length(x)/2))
 47 | #' arrayData <- arrayData[keep, ]
 48 | #'
 49 | #' group <- c(rep("group1", 12), rep("group2", 12))
 50 | #' result <- differential_array(df = arrayData, group = group)
 51 | #' }
 52 | #' # Use random data as example
 53 | #' arrayData <- matrix(runif(200), 25, 8)
 54 | #' rownames(arrayData) <- paste0("gene", 1:25)
 55 | #' colnames(arrayData) <- paste0("sample", 1:8)
 56 | #' group <- c(rep("group1", 4), rep("group2", 4))
 57 | #' result <- differential_array(df = arrayData, group = group)
 58 | differential_array <- function(df, group, method = "limma", 
 59 |                                 adjust.method = "BH") {
 60 |     method <- match.arg(method, c("limma", "ttest", "wilcox"))
 61 |     if (method == "limma") {
 62 |         result <- differential_limma(df, group, adjust.method = adjust.method)
 63 |     } else {
 64 |         groups <- unique(group)
 65 |         which1 <- which(group == groups[1])
 66 |         which2 <- which(group == groups[2])
 67 |         P.Value <- rep(0, nrow(df))
 68 |         if (method == "ttest") {
 69 |             for (i in seq_len(length(P.Value))) {
 70 |                 P.Value[i] <- stats::t.test(df[i, which1],
 71 |                     df[i, which2])$p.value
 72 |             }
 73 |         } else {
 74 |             for (i in seq_len(length(P.Value))) {
 75 |                 P.Value[i] <- stats::wilcox.test(as.numeric(df[i, which1]),
 76 |                     as.numeric(df[i, which2]))$p.value
 77 |             }
 78 |         }
 79 |         adj.P.Val <- stats::p.adjust(P.Value, method = adjust.method)
 80 |         result <- data.frame(gene = rownames(df),
 81 |             P.Value = P.Value, adj.P.Val = adj.P.Val)
 82 |     }
 83 |     return(result)
 84 | }
 85 | 
 86 | 
 87 | #' Get Microarray matrix data from GEO
 88 | #'
 89 | #' @param gse GSE number, such as GSE781.
 90 | #'
 91 | #' @return a list of matrix
 92 | #' @export
 93 | #'
 94 | #' @examples
 95 | #' \donttest{
 96 | #' arraylist <- get_geo_array("GSE781")
 97 | #' }
 98 | get_geo_array <- function(gse) {
 99 |     gse <- GEOquery::getGEO(gse, GSEMatrix = FALSE, AnnotGPL = TRUE)
100 |     gselist <- vector("list", length(GEOquery::GPLList(gse)))
101 |     names(gselist) <- names(GEOquery::GPLList(gse))
102 |     gsmplatforms <- lapply(GEOquery::GSMList(gse),
103 |         function(x) {GEOquery::Meta(x)$platform_id})
104 |     for (i in seq_len(length(gselist))) {
105 |         gsmlist <- BiocGenerics::Filter(function(gsm) {
106 |             GEOquery::Meta(gsm)$platform_id==names(gselist)[i]},
107 |         GEOquery::GSMList(gse))
108 |         probesets <- GEOquery::Table(GEOquery::GPLList(gse)[[1]])$ID
109 |         data.matrix <- do.call('cbind',
110 |             lapply(gsmlist, function(x) {tab <- GEOquery::Table(x)
111 |                                         mymatch <- match(probesets,tab$ID_REF)
112 |                                         return(tab$VALUE[mymatch])
113 |                 }
114 |             )
115 |         )
116 |         data.matrix <- apply(data.matrix,2,
117 |             function(x) {as.numeric(as.character(x))})
118 |         gpl <- gse@gpls[names(gselist)[i]]
119 |         gpl <- gpl[[1]]@dataTable@table
120 |         genes <- gpl[match(probesets, gpl[, "ID"]), "Gene Symbol"]
121 |         rownames(data.matrix) <- genes
122 |         colnames(data.matrix) <- names(gsmlist)
123 |         gselist[[i]] <- data.matrix
124 |     }
125 | }
126 | 
127 | #' Preprocess of Microarray data
128 | #'
129 | #' @param x matrix of Microarray data, each column is a sample, 
130 | #' and each row is a gene. 
131 | #' @param missing_value Method to  impute missing expression data,
132 | #' one of "zero" and "knn".
133 | #' @param string a string, sep of the gene
134 | #'
135 | #' @return matrix
136 | #' @export 
137 | #'
138 | #' @examples
139 | #' \donttest{
140 | #' arraylist <- get_geo_array("GSE781")
141 | #' arraylist <- lapply(arraylist, array_preprocess)
142 | #' }
143 | array_preprocess <- function(x, missing_value = "knn", string = " /// ") {
144 |     ## filter
145 |     x <- x[!is.na(rownames(x)), ]
146 |     x <- x[rownames(x) != "", ]
147 |     aa <- rowSums(is.na(x))
148 |     x <- x[aa < ncol(x)/2, ]
149 | 
150 |     ## impute missing
151 |     if (missing_value == "zero") {
152 |         x[is.na(x)] <- 0
153 |     } else {
154 |         x <- impute::impute.knn(x)$data
155 |     }
156 | 
157 |     ## log
158 |     qx <- as.numeric(stats::quantile(x, 
159 |         c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm = TRUE))
160 |     LogC <- (qx[5] > 100) || (qx[6]-qx[1] > 50 && qx[2] > 0)
161 |     if (LogC) {
162 |         x[which(x <= 0)] <- 0.0001
163 |         x <- log2(x)
164 |     }
165 | 
166 |     ## gene id conversion
167 |     x <- cbind(rownames(x), x)
168 |     x <- repAssign(x, string)
169 |     # gene_ave(x)
170 | }
171 | 
172 | #' cluster probes of Microarray data
173 | #'
174 | #' @param x matrix of Microarray data, the first is the name of the gene, 
175 | #' and the others are the expression value.
176 | #' @param clusterCutoff Pearson correlation threshold 
177 | #' to cut off the hierarchical tree.
178 | #' @importFrom stats as.dist
179 | #' @importFrom stats cor
180 | #' @importFrom stats cutree
181 | #' @importFrom stats hclust
182 | #' @return data.frame
183 | #' @export 
184 | #'
185 | #' @examples
186 | #' \donttest{
187 | #' arraylist <- get_geo_array("GSE781")
188 | #' arraylist <- lapply(arraylist, array_preprocess)
189 | #' arraylist_cluster <- lapply(arraylist, cluster_array)
190 | #' }
191 | cluster_array <- function(x, clusterCutoff = 0.7) {
192 |     genes <- x[, 1]
193 |     uniqueGenes <- unique(genes)
194 |     x <- x[, -1]
195 |     matlist <- vector("list", length(uniqueGenes))
196 |     for (i in seq_len(length(uniqueGenes))) {
197 |         gene <- uniqueGenes[i]
198 |         probes <- which(genes == gene)
199 |         mat <-  x[probes, ]
200 |         if (length(probes) == 1) {
201 |             rownames(mat) <- gene
202 |             matlist[[i]] <- mat
203 |         } else {
204 |             probeCorrelation <- cor(t(mat),method = 'pearson')
205 |             ClusterResults <- hclust(as.dist(1-probeCorrelation), 
206 |                 method = "complete", members = NULL)
207 |             #plot(ClusterResults)
208 |             Clusters <- cutree(ClusterResults, h = clusterCutoff)
209 |             clusterDf <- matrix(0, length(unique(Clusters)), ncol(mat)) |> 
210 |                 as.data.frame()
211 |             for (j in seq_len(length(unique(Clusters)))) {
212 |                 tmpGeneProbes <-  which(Clusters == j)
213 |                 if (length(tmpGeneProbes) > 1) {
214 |                     clusterDf[j, ] <- colMeans(mat[tmpGeneProbes,])
215 |                 } else {
216 |                     clusterDf[j, ] <- mat[tmpGeneProbes,]
217 |                 }        
218 |             }
219 |             if (nrow(clusterDf) > 1) {
220 |                 rownames(clusterDf) <- 
221 |                     paste(gene, seq(nrow(clusterDf)), sep = "_")
222 |             } else {
223 |                 rownames(clusterDf) <- gene
224 |             }
225 |             
226 |             colnames(clusterDf) <- colnames(mat)    
227 |             matlist[[i]] <- clusterDf
228 |         }
229 |     }
230 |     matlist <- do.call("rbind", matlist)
231 | }


--------------------------------------------------------------------------------
/R/calculate_mean_module.R:
--------------------------------------------------------------------------------
 1 | #' Find the mean value of the gene in each module
 2 | #'
 3 | #' @param geneExpress a data.frame of gene expression data. 
 4 | #' Each column is a sample, and each row is a gene. 
 5 | #' @param module a data.frame of two column. The first column is module name, 
 6 | #' the second column are genes in this module.
 7 | #'
 8 | #' @return a data.frame, means the mean of gene expression value in
 9 | #' the same module
10 | #' @export
11 | #'
12 | #' @examples
13 | #' data(geneExpress)
14 | #' data(module)
15 | #' result <- cal_mean_module(geneExpress, module)
16 | cal_mean_module <- function(geneExpress, module) {
17 |     genes <- rownames(geneExpress)
18 |     output_module <- matrix(0, nrow(module), 2)
19 |     rownames(output_module) <- module[, 1]
20 |     for (i in seq_len(nrow(module))) {
21 |         modulen <- unlist(strsplit(module[i, 2], ","))
22 |         modulen <- intersect(modulen, genes)
23 |         modulenDf <- geneExpress[modulen, ]
24 |         output_module[i, ] <- colMeans(modulenDf)
25 |     }
26 |     as.data.frame(output_module)
27 | }
28 | 


--------------------------------------------------------------------------------
/R/calculate_mean_profile.R:
--------------------------------------------------------------------------------
 1 | #' Average the values of same genes in gene expression profile
 2 | #'
 3 | #' @param file_gene_ave a data.frame of gene expression data, 
 4 | #' each column is a sample, and each row is a gene. 
 5 | #' @param k a number, indicates which is the gene column.
 6 | #'
 7 | #' @return a data.frame, the values of same genes in gene expression profile
 8 | #' @export
 9 | #'
10 | #' @examples
11 | #' aa <- c("MARCH1", "MARC1", "MARCH1", "MARCH1", "MARCH1")
12 | #' bb <- c(2.969058399, 4.722410064, 8.165514853, 8.24243893, 8.60815086)
13 | #' cc <- c(3.969058399, 5.722410064, 7.165514853, 6.24243893, 7.60815086)
14 | #' file_gene_ave <- data.frame(aa = aa, bb = bb, cc = cc)
15 | #' colnames(file_gene_ave) <- c("Gene", "GSM1629982", "GSM1629983")
16 | #'
17 | #' result <- gene_ave(file_gene_ave, 1)
18 | gene_ave <- function(file_gene_ave, k = 1) {
19 |     x <- file_gene_ave[, -k]
20 |     file_gene_ave <- as.matrix(file_gene_ave)
21 |     rownames(file_gene_ave) <- file_gene_ave[, k]
22 |     # x <- file_gene_ave
23 |     ID <- rownames(file_gene_ave)
24 |     ID <- factor(ID, levels = unique(ID))
25 | 
26 |     y <- rowsum(x, ID, reorder = FALSE, na.rm = TRUE)
27 |     n <- rowsum(1L - is.na(x), ID, reorder = FALSE)
28 |     return(y / n)
29 | }
30 | 


--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' a data.frame of gene expression data
 3 | #'
 4 | #' It is a randomly generated expression data 
 5 | #' used as an example of functions in this package.
 6 | #' the rowname is gene symbols
 7 | #' the columns are gene expression values
 8 | #'
 9 | #' @format A data.frame with 10779 rows and 2 column
10 | #'
11 | "geneExpress"
12 | 
13 | # ' a matrix for Converting gene symbol to entrez_id or ensembl_gene_id
14 | # '
15 | # ' the hgnc data comes from HGNC website
16 | # ' the columns represent "symbol", "locus_group", "locus_type", 
17 | # ' "entrez_id" and "ensembl_gene_id"
18 | # '
19 | # ' @format A matrix with 37647 rows and 5 column
20 | # '
21 | "hgnc"
22 | 
23 | #' a matrix of gene expression data in TCGA
24 | #'
25 | #' It is a randomly generated expression data 
26 | #' used as an example of functions in this package.
27 | #' the first column represents the gene symbol
28 | #'
29 | #' the other columns represent the expression(FPKM) of genes
30 | #'
31 | #' @format A matrix with 10 rows and 10 column
32 | #'
33 | "profile"
34 | 
35 | #' a matrix of gene expression data in GEO
36 | #'
37 | #' It is a randomly generated expression data 
38 | #' used as an example of functions in this package.
39 | #' the first column represents the gene symbol
40 | #'
41 | #' the other columns represent the expression of genes
42 | #'
43 | #' @format A matrix with 32 rows and 20 column
44 | #'
45 | "ventricle"
46 | 
47 | #' a matrix of gene expression data in TCGA
48 | #'
49 | #' It is a randomly generated expression data 
50 | #' used as an example of functions in this package.
51 | #' the first column represents the gene symbol
52 | #'
53 | #' the other columns represent the expression(count) of genes
54 | #'
55 | #' @format A matrix with 100 rows and 150 column
56 | #'
57 | "kegg_liver"
58 | 
59 | #' a matrix of gene expression data in GEO
60 | #'
61 | #' the first column represents the gene symbol
62 | #'
63 | #' the other columns represent the expression of genes
64 | #'
65 | #' @format A matrix with 999 rows and 3 column
66 | #'
67 | "GSE66705_sample2"
68 | 
69 | #' a matrix of module name, gene symbols, and the number of gene symbols
70 | #'
71 | #' It is a randomly generated expression data 
72 | #' used as an example of functions in this package.
73 | #' @format A matrix with 176 rows and 3 column
74 | #'
75 | "module"
76 | 
77 | 
78 | # ' a matrix for Converting gene symbol.
79 | # ' 
80 | # ' the hgnc data comes from HGNC website
81 | # '
82 | # ' @format A matrix with 43547 rows and 52 column
83 | # '
84 | "hgnc_file"
85 | 
86 | 
87 | #' a data.frame of gene length and GC content
88 | #'
89 | #' the gene length and GC content data comes from 
90 | #' TxDb.Hsapiens.UCSC.hg38.knownGene and
91 | #' BSgenome.Hsapiens.UCSC.hg38
92 | #'
93 | #' @format A data.frame with 27341 rows and 2 column
94 | #'
95 | "gene_cov"
96 | 


--------------------------------------------------------------------------------
/R/fpkm_count_conversion.r:
--------------------------------------------------------------------------------
  1 | countToTpm_internal <- function(counts, effLen) {
  2 |     rate <- log(counts) - log(effLen)
  3 |     denom <- log(sum(exp(rate)))
  4 |     exp(rate - denom + log(1e6))
  5 | }
  6 | countToFpkm_internal <- function(counts, effLen) {
  7 |     N <- sum(counts)
  8 |     exp(log(counts) + log(1e9) - log(effLen) - log(N))
  9 | }
 10 | 
 11 | fpkmToTpm_internal <- function(fpkm) {
 12 |     exp(log(fpkm) - log(sum(fpkm)) + log(1e6))
 13 | }
 14 | 
 15 | countToEffCounts_internal <- function(counts, len, effLen) {
 16 |     counts * (len / effLen)
 17 | }
 18 | 
 19 | # if we have fpkm, then we can easily get the rate of counts/sum(counts).
 20 | # we can't get the real count value.
 21 | fpkmToCount_internal <- function(fpkm, effLen, N = 1e9) {
 22 |     # rate <- (fpkm * effLen)/10^9
 23 |     rate <- exp(log(fpkm) + log(effLen) - log(1e9))
 24 |     counts <- rate * N
 25 | }
 26 | 
 27 | 
 28 | 
 29 | 
 30 | #' Convert count to FPKM
 31 | #'
 32 | #' @param counts_matrix a matrix, colnames of counts_matrix are sample name,
 33 | #' rownames of counts_matrix are gene symbols
 34 | #' @param keyType keyType, one of keytypes(org.Hs.eg.db).
 35 | #' @param gene_cov data.frame of two column, the first column is gene length, 
 36 | #' the second column is gene GC content
 37 | #'
 38 | #' @return a matrix
 39 | #' @export
 40 | #'
 41 | #' @examples
 42 | #' data(gene_cov)
 43 | #' lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
 44 | #' rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
 45 | #' colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
 46 | #' result <- countToFpkm(lung_squ_count2,
 47 | #'     keyType = "SYMBOL",
 48 | #'     gene_cov = gene_cov
 49 | #' )
 50 | countToFpkm <- function(counts_matrix, keyType = "SYMBOL", gene_cov) {
 51 |     gene_cov2 <- gene_cov
 52 |     if (keyType != "ENTREZID") {
 53 |         genes_bitr <- clusterProfiler::bitr(rownames(gene_cov),
 54 |             fromType = "ENTREZID", toType = keyType,
 55 |             OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop = TRUE
 56 |         )
 57 |         genes_bitr <- genes_bitr[!duplicated(genes_bitr[, 2]), ]
 58 |         gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
 59 |         rownames(gene_cov2) <- genes_bitr[, 2]
 60 |     }
 61 |     genes_count <- intersect(rownames(counts_matrix), rownames(gene_cov2))
 62 |     counts_matrix_new <- counts_matrix[genes_count, ]
 63 |     gene_loc_len_new <- gene_cov2[genes_count, ]
 64 |     genes_length <- as.numeric(gene_loc_len_new[, 1])
 65 |     counts_matrix_new2 <- counts_matrix_new
 66 |     for (i in seq_len(dim(counts_matrix_new2)[2])) {
 67 |         counts_matrix_new2[, i] <- countToFpkm_internal(
 68 |             as.numeric(counts_matrix_new2[, i]),
 69 |             genes_length)
 70 |     }
 71 |     return(counts_matrix_new2)
 72 | }
 73 | 
 74 | 
 75 | #' Convert count to Tpm
 76 | #'
 77 | #' @param counts_matrix a matrix, colnames of counts_matrix are sample name,
 78 | #' rownames of counts_matrix are gene symbols
 79 | #' @param keyType keyType, one of keytypes(org.Hs.eg.db).
 80 | #' @param gene_cov data.frame of two column, the first column is gene length, 
 81 | #' the second column is gene GC content
 82 | #'
 83 | #' @return a matrix
 84 | #' @export
 85 | #'
 86 | #' @examples
 87 | #' data(gene_cov)
 88 | #' lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
 89 | #' rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
 90 | #' colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
 91 | #' result <- countToTpm(lung_squ_count2,
 92 | #'     keyType = "SYMBOL",
 93 | #'     gene_cov = gene_cov
 94 | #' )
 95 | countToTpm <- function(counts_matrix, keyType = "SYMBOL", gene_cov) {
 96 |     gene_cov2 <- gene_cov
 97 |     if (keyType != "ENTREZID") {
 98 |         genes_bitr <- clusterProfiler::bitr(rownames(gene_cov),
 99 |             fromType = "ENTREZID", toType = keyType,
100 |             OrgDb = org.Hs.eg.db::org.Hs.eg.db, drop = TRUE
101 |         )
102 |         genes_bitr <- genes_bitr[!duplicated(genes_bitr[, 2]), ]
103 |         gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
104 |         rownames(gene_cov2) <- genes_bitr[, 2]
105 |     }
106 |     genes_count <- intersect(rownames(counts_matrix), rownames(gene_cov2))
107 |     counts_matrix_new <- counts_matrix[genes_count, ]
108 |     gene_loc_len_new <- gene_cov2[genes_count, ]
109 |     genes_length <- as.numeric(gene_loc_len_new[, 1])
110 |     counts_matrix_new2 <- counts_matrix_new
111 |     for (i in seq_len(dim(counts_matrix_new2)[2])) {
112 |         counts_matrix_new2[, i] <- countToTpm_internal(
113 |             as.numeric(counts_matrix_new2[, i]),
114 |             genes_length)
115 |     }
116 |     return(counts_matrix_new2)
117 | }
118 | 
119 | 
120 | #' Convert fpkm to Tpm
121 | #'
122 | #' @param fpkm_matrix a matrix, colnames of fpkm_matrix are sample name,
123 | #' rownames of fpkm_matrix are genes
124 | #'
125 | #' @return a matrix
126 | #' @export
127 | #'
128 | #' @examples
129 | #' lung_squ_count2 <- matrix(c(0.11, 0.22, 0.43, 0.14, 0.875,
130 | #'     0.66, 0.77, 0.18, 0.29), ncol = 3)
131 | #' rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
132 | #' colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
133 | #' result <- fpkmToTpm(lung_squ_count2)
134 | fpkmToTpm <- function(fpkm_matrix) {
135 |     fpkm_matrix_new <- apply(fpkm_matrix, 2, fpkmToTpm_internal)
136 | }
137 | 


--------------------------------------------------------------------------------
/R/metap.R:
--------------------------------------------------------------------------------
 1 | # copy from metap package: https://CRAN.R-project.org/package=metap
 2 | #' @importFrom stats na.fail
 3 | #' @importFrom stats pchisq
 4 | #' @importFrom stats pnorm
 5 | #' @importFrom stats qnorm
 6 | sumz <- function(p, weights = NULL, data = NULL, subset = NULL,
 7 |    na.action = na.fail, log.p = FALSE, log.input = FALSE)  {
 8 |    if(is.null(data)) data <- sys.frame(sys.parent())
 9 |    mf <- match.call()
10 |    mf$data <- NULL
11 |    mf$subset <- NULL
12 |    mf$na.action <- NULL
13 |    mf[[1]] <- as.name("data.frame")
14 |    mf <- eval(mf, data)
15 |    if(!is.null(subset)) mf <- mf[subset,]
16 |    mf <- na.action(mf)
17 |    p <- as.numeric(mf$p)
18 |    weights <- mf$weights
19 |    noweights <- is.null(weights)
20 |    if(noweights) weights <- rep(1, length(p))
21 |    if(length(p) != length(weights)) warning("Length of p and weights differ")
22 |    if(log.input) {
23 |       keep <- p < 0
24 |    } else {
25 |       keep <- (p > 0) & (p < 1)
26 |    }
27 |    invalid <- sum(1L * keep) < 2
28 |    if(invalid) {
29 |       warning("Must have at least two valid p values")
30 |       res <- list(z = NA_real_, p = NA_real_,
31 |          validp = p[keep], weights = weights)
32 |    } else {
33 |       if(sum(1L * keep) != length(p)) {
34 |          warning("Some studies omitted")
35 |          omitw <- weights[!keep]
36 |          if((sum(1L * omitw) > 0) & !noweights)
37 |             warning("Weights omitted too")
38 |       }
39 |       zp <- (qnorm(p[keep], lower.tail = FALSE, log.p = log.input) %*%
40 |          weights[keep]) / sqrt(sum(weights[keep]^2))
41 |       res <- list(z = zp, p = pnorm(zp, lower.tail = FALSE,
42 |             log.p = log.p),
43 |          validp = p[keep], weights = weights)
44 |    }
45 |    class(res) <- c("sumz", "metap")
46 |    res
47 | }
48 | 
49 | # copy from metap package: https://CRAN.R-project.org/package=metap
50 | print.sumz <- function(x, ...) {
51 |    cat("sumz = ", x$z, "p = ", x$p, "\n")
52 |    invisible(x)
53 | }
54 | 
55 | # copy from metap package: https://CRAN.R-project.org/package=metap
56 | sumlog <- function(p, log.p = FALSE) {
57 |    keep <- (p > 0) & (p <= 1)
58 |    invalid <- sum(1L * keep) < 2
59 |    if(invalid) {
60 |       warning("Must have at least two valid p values")
61 |       res <- list(chisq = NA_real_, df = NA_integer_,
62 |          p = NA_real_, validp = p[keep])
63 |    } else {
64 |       lnp <- log(p[keep])
65 |       chisq <- (-2) * sum(lnp)
66 |       df <- 2 * length(lnp)
67 |       if(length(lnp) != length(p)) {
68 |          warning("Some studies omitted")
69 |       }
70 |       res <- list(chisq = chisq, df = df,
71 |          p = pchisq(chisq, df, lower.tail = FALSE,
72 |             log.p = log.p), validp = p[keep])
73 |     }
74 |    class(res) <- c("sumlog", "metap")
75 |    res
76 | }
77 | 
78 | # copy from metap package: https://CRAN.R-project.org/package=metap
79 | print.sumlog <- function(x, ...) {
80 |    cat("chisq = ", x$chisq, " with df = ", x$df, " p = ", x$p, "\n")
81 |    invisible(x)
82 | }
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/R/prepareChi.r:
--------------------------------------------------------------------------------
 1 | #' Preparer file for chi-square test
 2 | #'
 3 | #' @param cnv result of ann_merge()
 4 | #'
 5 | #' @return a matrix
 6 | #' @export
 7 | #'
 8 | #' @examples
 9 | #' cnv <- matrix(c(
10 | #'     -1.09150, -1.47120, -0.87050, -0.50880,
11 | #'     -0.50880, 2.0, 2.0, 2.0, 2.0, 2.0, 2.601962, 2.621332, 2.621332,
12 | #'     2.621332, 2.621332, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
13 | #'     2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
14 | #' ), nrow = 5)
15 | #' cnv <- as.data.frame(cnv)
16 | #' rownames(cnv) <- c("AJAP1", "FHAD1", "CLCNKB", "CROCCP2", "AL137798.3")
17 | #' colnames(cnv) <- c(
18 | #'     "TCGA-DD-A4NS-10A-01D-A30U-01", "TCGA-ED-A82E-01A-11D-A34Y-01",
19 | #'     "TCGA-WQ-A9G7-01A-11D-A36W-01", "TCGA-DD-AADN-01A-11D-A40Q-01",
20 | #'     "TCGA-ZS-A9CD-10A-01D-A36Z-01", "TCGA-DD-A1EB-11A-11D-A12Y-01"
21 | #' )
22 | #' cnv_chi_file <- prepare_chi(cnv)
23 | prepare_chi <- function(cnv) {
24 |     file1 <- cnv
25 |     sampless <- rep(0, ncol(file1))
26 |     for (i in seq_len(length(sampless)))
27 |     {
28 |         a <- unlist(strsplit(colnames(file1)[i], "-"))[4]
29 |         sampless[i] <- substring(a, 1, 1)
30 |     }
31 | 
32 |     cnv_chi <- matrix(0, nrow(cnv), 4)
33 |     rownames(cnv_chi) <- rownames(cnv)
34 |     colnames(cnv_chi) <- c("normalCNV", "normalWild", "tumorCNV", "tumorWild")
35 |     for (i in seq_len(nrow(file1)))
36 |     {
37 |         normalCNV <- 0
38 |         normalWild <- 0
39 |         tumorCNV <- 0
40 |         tumorWild <- 0
41 |         for (j in seq_len(ncol(file1)))
42 |         {
43 |             if ((sampless[j] == "1") &&
44 |                 (abs(as.numeric(file1[i, j]) - 2) > 0.5)) {
45 |                 normalCNV <- normalCNV + 1
46 |             }
47 |             if ((sampless[j] == "1") &&
48 |                 (abs(as.numeric(file1[i, j]) - 2) <= 0.5)) {
49 |                 normalWild <- normalWild + 1
50 |             }
51 |             if ((sampless[j] == "0") &&
52 |                 (abs(as.numeric(file1[i, j]) - 2) > 0.5)) {
53 |                 tumorCNV <- tumorCNV + 1
54 |             }
55 |             if ((sampless[j] == "0") &&
56 |                 (abs(as.numeric(file1[i, j]) - 2) <= 0.5)) {
57 |                 tumorWild <- tumorWild + 1
58 |             }
59 |         }
60 |         cnv_chi[i, ] <- c(normalCNV, normalWild, tumorCNV, tumorWild)
61 |     }
62 |     return(cnv_chi)
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/R/rep.R:
--------------------------------------------------------------------------------
 1 | #' Handle the case where one id corresponds to multiple genes
 2 | #'
 3 | #' @param input_file input file, a data.frame or a matrix, 
 4 | #' the first column should be genes.
 5 | #' @param string a string, sep of the gene
 6 | #'
 7 | #' @return a data.frame, when an id corresponds to multiple genes,
 8 | #' the expression value is assigned to each gene
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
13 | #'     "MARCH3 /// MARCH4", "MARCH1")
14 | #' bb <- c("2.969058399", "4.722410064", "8.165514853",
15 | #'     "8.24243893", "8.60815086")
16 | #' cc <- c("3.969058399", "5.722410064", "7.165514853", 
17 | #'     "6.24243893", "7.60815086")
18 | #' input_file <- data.frame(aa = aa, bb = bb, cc = cc)
19 | #'
20 | #' repAssign_result <- repAssign(input_file, " /// ")
21 | #'
22 | repAssign <- function(input_file, string) {
23 |     name <- colnames(input_file)[1]
24 |     genelist <- strsplit(input_file[, 1], string)
25 |     geneLength <- unlist(lapply(genelist, length))
26 |     input_file <- input_file[, -1]
27 |     output <- apply(input_file, 2, rep, times = geneLength)
28 |     output2 <- matrix(as.numeric(output), nrow = nrow(output))
29 |     colnames(output2) <- colnames(output)
30 |     output2 <- data.frame(unlist(genelist), output2, check.names = FALSE)
31 |     colnames(output2)[1] <- name
32 |     output2
33 | }
34 | 
35 | #' Handle the case where one id corresponds to multiple genes
36 | #'
37 | #' @param input_file input file, a data.frame or a matrix, 
38 | #' the first column should be genes.
39 | #' @param string a string,sep of the gene
40 | #'
41 | #' @return a data.frame, when an id corresponds to multiple genes,
42 | #' the expression value is deleted
43 | #' @export
44 | #'
45 | #' @examples
46 | #' aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
47 | #'     "MARCH3 /// MARCH4", "MARCH1")
48 | #' bb <- c("2.969058399", "4.722410064", "8.165514853", 
49 | #'     "8.24243893", "8.60815086")
50 | #' cc <- c("3.969058399", "5.722410064", "7.165514853", 
51 | #'     "6.24243893", "7.60815086")
52 | #' input_file <- data.frame(aa = aa, bb = bb, cc = cc)
53 | #' repRemove_result <- repRemove(input_file, " /// ")
54 | repRemove <- function(input_file, string) {
55 |     unKeep <- grep(string, input_file[, 1])
56 |     if (length(unKeep) > 0) input_file <- input_file[-unKeep, ]
57 |     input_file
58 | }
59 | 


--------------------------------------------------------------------------------
/R/sysdata.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/R/sysdata.rda


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/",
 12 |   out.width = "100%"
 13 | )
 14 | ```
 15 | 
 16 | # GeoTcgaData
 17 | 
 18 | The goal of GeoTcgaData is to deal with RNA-seq, DNA Methylation, single nucleotide Variation and Copy number variation data in GEO and TCGA.
 19 | 
 20 | ## :writing_hand: Authors
 21 | Erqiang Hu
 22 | 
 23 |  Department of Bioinformatics, School of Basic Medical Sciences, Southern Medical University.
 24 | 
 25 | 
 26 | ## :arrow\_double\_down: Installation
 27 | 
 28 | 
 29 | ```{r eval=FALSE}
 30 | if(!requireNamespace("devtools", quietly = TRUE))
 31 |     install.packages("devtools")
 32 | devtools::install_github("YuLab-SMU/GeoTcgaData")
 33 | ```
 34 | 
 35 | ```{r}
 36 | library(GeoTcgaData)
 37 | ```
 38 | GEO and TCGA provide us with a wealth of data, such as RNA-seq, DNA Methylation,   single nucleotide Variation and Copy number variation data. It's easy to download data from TCGA using the  gdc tool or `TCGAbiolinks`,  and some software provides organized TCGA data, such as [UCSC Xena](http://xena.ucsc.edu/) , [UCSCXenaTools](https://cran.r-project.org/package=UCSCXenaTools)，and [sangerbox](http://vip.sangerbox.com/), but processing these data into a format suitable for bioinformatics  analysis requires more work. This R package was developed to handle these data.
 39 | 
 40 | ## Example
 41 | 
 42 | This is a basic example which shows you how to solve a common problem:
 43 | 
 44 | ## RNA-seq data differential expression analysis
 45 | It is convenient to use [`TCGAbiolinks`](http://www.bioconductor.org/packages/release/bioc/vignettes/TCGAbiolinks/inst/doc/analysis.html)  or [`GDCRNATools`](https://bioconductor.org/packages/GDCRNATools/) to download and analysis Gene expression data.  `TCGAbiolinks` use `edgeR` package to do differential expression analysis, while `GDCRNATools` can implement three most commonly used methods: limma, edgeR , and DESeq2 to identify differentially expressed  genes (DEGs).
 46 | 
 47 | Alicia Oshlack  et. al. claimed that unlike the chip data, the RNA-seq data had one [bias](https://pubmed.ncbi.nlm.nih.gov/20132535/): the larger the transcript length / mean read count , the more likely it was to be  identified as a differential gene, [while there was no such trend in the chip data](https://pubmed.ncbi.nlm.nih.gov/19371405/).
 48 | 
 49 | 
 50 |  However, when we use their chip data for difference analysis( using the limma  package), we find that chip data has the same trend as RNA-seq data. And we also found this trend in the difference analysis results given by the data [authors](https://genome.cshlp.org/content/18/9/1509.long).
 51 | 
 52 |  
 53 | 
 54 | 
 55 |  It is worse noting that [only technical replicate data, which has small gene dispersions, shows this bias](https://pubmed.ncbi.nlm.nih.gov/28545404/). This is because in technical replicate RNA-seq data a long gene has more reads mapping to it compared to a short gene of similar expression,  and most of the statistical methods used to detect differential expression  have stronger detection ability for genes with more reads. However, we have not deduced why there is such a bias in the current difference analysis algorithms. 
 56 | 
 57 | Some software, such as [CQN](http://www.bioconductor.org/packages/cqn/) , present a [normalization algorithm](https://pubmed.ncbi.nlm.nih.gov/22285995/) to correct systematic biases(gene length bias and [GC-content bias](https://pubmed.ncbi.nlm.nih.gov/22177264/). But they did not provide sufficient evidence to prove that the correction is effective. We use the [Marioni dataset](https://pubmed.ncbi.nlm.nih.gov/19371405/) to verify the correction effect of CQN and find that there is still a deviation after correction:
 58 | 
 59 | 
 60 | 
 61 | [GOseq](http://bioconductor.org/packages/goseq/) based on [Wallenius' noncentral hypergeometric distribution](https://en.wikipedia.org/wiki/Wallenius%27_noncentral_hypergeometric_distribution) can effectively correct the gene length deviation in enrichment analysis. However, the current RNA-seq data often have no gene length bias, but only the expression amount(read count) bias, GOseq may overcorrect these data, correcting originally unbiased data into reverse bias.
 62 | 
 63 | 
 64 | GOseq also fails to correct for expression bias, therefore, read count bias correction is still a challenge for us.
 65 | 
 66 | 
 67 | use `TCGAbiolinks` to download TCGA data
 68 | 
 69 | ```{r eval=FALSE}
 70 | # download RNA-seq data
 71 | library(TCGAbiolinks)
 72 |                 
 73 | query <- GDCquery(project = "TCGA-ACC",
 74 |                   data.category = "Transcriptome Profiling",
 75 |                   data.type = "Gene Expression Quantification", 
 76 |                   workflow.type = "STAR - Counts")
 77 |                   
 78 | GDCdownload(query, method = "api", files.per.chunk = 3, 
 79 |     directory = Your_Path)
 80 | 
 81 | dataRNA <- GDCprepare(query = query, directory = Your_Path,
 82 |                       save = TRUE, save.filename = "dataRNA.RData")
 83 | ## get raw count matrix                         
 84 | dataPrep <- TCGAanalyze_Preprocessing(object = dataRNA,
 85 |                                       cor.cut = 0.6,
 86 |                                       datatype = "STAR - Counts")
 87 | 
 88 | ```
 89 | 
 90 | Use `differential_RNA` to do difference analysis. We provide the data of human gene length and GC content in `gene_cov`.
 91 | 
 92 | ```{r eval=FALSE}
 93 | group <- sample(c("grp1", "grp2"), ncol(dataPrep), replace = TRUE)
 94 | library(cqn) # To avoid reporting errors: there is no function "rq"
 95 | ## get gene length and GC content
 96 | library(org.Hs.eg.db)
 97 | genes_bitr <- bitr(rownames(gene_cov), fromType = "ENTREZID", toType = "ENSEMBL", 
 98 |          OrgDb = org.Hs.eg.db, drop = TRUE)
 99 | genes_bitr <- genes_bitr[!duplicated(genes_bitr[,2]), ]
100 | gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
101 | rownames(gene_cov2) <- genes_bitr$ENSEMBL
102 | genes <- intersect(rownames(dataPrep), rownames(gene_cov2))
103 | dataPrep <- dataPrep[genes, ]
104 | geneLength <- gene_cov2[genes, "length"]
105 | gccontent <- gene_cov2[genes, "GC"]
106 | names(geneLength) <- names(gccontent) <- genes
107 | ##  Difference analysis
108 | DEGAll <- differential_RNA(counts = dataPrep, group = group, 
109 |                    geneLength = geneLength, gccontent = gccontent)
110 | ```
111 | 
112 | Use `clusterProfiler` to do enrichment analytics:
113 | 
114 | ```{r eval=FALSE}
115 | diffGenes <- DEGAll$logFC
116 | names(diffGenes) <- rownames(DEGAll)
117 | diffGenes <- sort(diffGenes, decreasing = TRUE)
118 | library(clusterProfiler)
119 | library(enrichplot)
120 | library(org.Hs.eg.db)
121 | gsego <- gseGO(gene = diffGenes, OrgDb = org.Hs.eg.db, keyType = "ENSEMBL")
122 | dotplot(gsego)
123 | ```
124 | 
125 | 
126 | 
127 | ## DNA Methylation data integration 
128 | use `TCGAbiolinks` to download TCGA data. 
129 | 
130 | The codes may need to be modified if `TCGAbiolinks` updates. So please read its [documents](https://www.bioconductor.org/packages/release/bioc/html/TCGAbiolinks.html).
131 | 
132 | ```{r eval=FALSE}
133 | library(TCGAbiolinks)
134 | query <- GDCquery(project = "TCGA-ACC",
135 |                   data.category = "DNA Methylation",
136 |                   data.type = "Methylation Beta Value",
137 |                   platform = "Illumina Human Methylation 450")
138 | GDCdownload(query, method = "api", files.per.chunk = 5, directory = Your_Path)
139 | ```
140 | 
141 | The function `Merge_methy_tcga` could Merge methylation data downloaded from TCGA official website or TCGAbiolinks. This makes it easier to extract differentially methylated genes in the downstream analysis. For example:
142 | 
143 | ```{r eval=FALSE}
144 | merge_result <- Merge_methy_tcga(Your_Path_to_DNA_Methylation_data)
145 | ```
146 | Then use differential_methy() to do difference analysis.
147 | 
148 | ```{r eval=FALSE}
149 | # if (!requireNamespace("ChAMP", quietly = TRUE))
150 | #     BiocManager::install("ChAMP")
151 | library(ChAMP) # To avoid reporting errors
152 | differential_gene <- differential_methy(cpgData = merge_result, sampleGroup = sample(c("C","T"), 
153 |     ncol(merge_result[[1]]), replace = TRUE))
154 | ```
155 | 
156 | **Note:** `ChAMP`has a large number of dependent packages. If you cannot install it  successfully, you can download each dependent package separately(Source or Binary) and install it  locally.
157 | 
158 | 
159 | 
160 | If your methylation data was downloaded from [UCSC Xena](http://xena.ucsc.edu/), you can use `methydifferential_ucsc` to get differential genes.
161 | 
162 | ```{r eval=FALSE}
163 | methy_file <- "TCGA.THCA.sampleMap_HumanMethylation450.gz"
164 | methy <- fread(methy_file, sep = "\t", header = T)
165 | library(ChAMP)
166 | myImport <- champ.import(directory=system.file("extdata",package="ChAMPdata"))
167 | myfilter <- champ.filter(beta=myImport$beta,pd=myImport$pd,detP=myImport$detP,beadcount=myImport$beadcount)
168 | cpg_gene <- hm450.manifest.hg19[, c("probeID", "gene_HGNC")]
169 | ## or use IlluminaHumanMethylation450kanno.ilmn12.hg19 to get annotation data
170 | # library(IlluminaHumanMethylation450kanno.ilmn12.hg19)
171 | # ann <- getAnnotation(IlluminaHumanMethylation450kanno.ilmn12.hg19)
172 | # class(ann) <- "data.frame"
173 | # cpg_gene <- ann[,c("Name", "UCSC_RefGene_Name", "UCSC_RefGene_Group")]
174 | 
175 | methy_df <- differential_methy(methy, cpg_gene, ucscData = TRUE)
176 | ```
177 | 
178 | We provide three models to get methylation difference genes:  
179 | 
180 | if model = "cpg", step1: calculate difference cpgs; step2: calculate difference genes; 
181 | 
182 | if model = "gene", step1: calculate the methylation level of genes; step2: calculate difference genes.
183 | 
184 | We find that only model = "gene" has no deviation of CpG number. 
185 | 
186 | 
187 | Use `clusterProfiler` to do enrichment analytics:
188 | 
189 | ```{r eval=FALSE}
190 | differential_gene$p.adj <- p.adjust(differential_gene$pvalue)
191 | genes <- differential_gene[differential_gene$p.adj < 0.05, "gene"]
192 | library(clusterProfiler)
193 | library(enrichplot)
194 | library(org.Hs.eg.db)
195 | ego <- enrichGO(gene = genes, OrgDb = org.Hs.eg.db, keyType = "SYMBOL")
196 | dotplot(ego)
197 | ```
198 | 
199 | 
200 | 
201 | ## Copy number variation data integration and differential gene extraction
202 | 
203 | use TCGAbiolinks to download TCGA data(Gene Level Copy Number Scores)
204 | 
205 | ```{r eval=FALSE}
206 | library(TCGAbiolinks)
207 | query <- GDCquery(project = "TCGA-LGG",
208 |                   data.category = "Copy Number Variation",
209 |                   data.type = "Gene Level Copy Number Scores")
210 | 
211 | GDCdownload(query, method = "api", files.per.chunk = 5, directory = Your_Path)
212 | 
213 | data <- GDCprepare(query = query, 
214 |                    directory =  Your_Path) 
215 | ```
216 | 
217 | 
218 | 
219 | Do difference analysis of gene level copy number variation data using `differential_CNV`
220 | 
221 | ```{r eval=FALSE}
222 | class(data) <- "data.frame"
223 | cnvData <- data[, -c(1,2,3)]
224 | rownames(cnvData) <- data[, 1]
225 | sampleGroup  = sample(c("A","B"), ncol(cnvData), replace = TRUE)
226 | diffCnv <- differential_CNV(cnvData, sampleGroup)
227 | ```
228 | 
229 | Use `clusterProfiler` to do enrichment analytics:
230 | 
231 | ```{r eval=FALSE}
232 | pvalues <- diffCnv$pvalue * sign(diffCnv$odds)
233 | genes <- rownames(diffCnv)[diffCnv$pvalue < 0.05]
234 | library(clusterProfiler)
235 | library(enrichplot)
236 | library(org.Hs.eg.db)
237 | ego <- enrichGO(gene = genes, OrgDb = org.Hs.eg.db, keyType = "ENSEMBL")
238 | dotplot(ego)
239 | ```
240 | 
241 | 
242 | 
243 | ## Difference analysis of single nucleotide Variation data 
244 | 
245 | Use TCGAbiolinks to download TCGA data
246 | 
247 | ```{r eval=FALSE}
248 | library(TCGAbiolinks)
249 | query <- GDCquery(project = "TCGA-ACC",
250 |                   data.category = "Simple Nucleotide Variation",
251 |                   data.type = "Masked Somatic Mutation",
252 |                   workflow.type = "MuSE Variant Aggregation and Masking")
253 | 
254 | GDCdownload(query, method = "api", files.per.chunk = 5, directory = Your_Path)
255 | 
256 | data_snp <- GDCprepare(query = query, 
257 |                    directory =  Your_Path) 
258 | 
259 | ```
260 | 
261 | 
262 | 
263 | Use `differential_SNP_tcga` to do difference analysis
264 | 
265 | ```{r eval=FALSE}
266 | samples <- unique(data_snp$Tumor_Sample_Barcode)
267 | sampleType <- sample(c("A","B"), length(samples), replace = TRUE)
268 | names(sampleType) <- samples
269 | pvalue <- differential_SNP_tcga(snpData = data_snp, sampleType = sampleType)
270 | # merge pvalue
271 | 
272 | 
273 | ```
274 | 
275 | 
276 | 
277 | Use `clusterProfiler` to do enrichment analysis
278 | 
279 | ```{r eval=FALSE}
280 | pvalue2 <- sort(pvalue, decreasing = TRUE)
281 | library(clusterProfiler)
282 | library(enrichplot)
283 | library(org.Hs.eg.db)
284 | gsego <- gseGO(pvalue2, OrgDb = org.Hs.eg.db, keyType = "SYMBOL")
285 | dotplot(gsego)
286 | ```
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | ## GEO chip data processing
295 | 
296 | The function `gene_ave` could average the expression data of different ids for the same gene in the GEO chip data. For example:
297 | 
298 | ```{r eval=FALSE}
299 | aa <- c("MARCH1","MARC1","MARCH1","MARCH1","MARCH1")
300 | bb <- c(2.969058399,4.722410064,8.165514853,8.24243893,8.60815086)
301 | cc <- c(3.969058399,5.722410064,7.165514853,6.24243893,7.60815086)
302 | file_gene_ave <- data.frame(aa=aa,bb=bb,cc=cc)
303 | colnames(file_gene_ave) <- c("Gene", "GSM1629982", "GSM1629983")
304 | result <- gene_ave(file_gene_ave, 1)
305 | ```
306 | 
307 | Multiple genes symbols may correspond to a same chip id. The result of function `repAssign` is to assign the expression of this id to each gene, and function `repRemove` deletes the expression. For example:
308 | 
309 | ```{r}
310 | aa <- c("MARCH1 /// MMA","MARC1","MARCH2 /// MARCH3","MARCH3 /// MARCH4","MARCH1")
311 | bb <- c("2.969058399","4.722410064","8.165514853","8.24243893","8.60815086")
312 | cc <- c("3.969058399","5.722410064","7.165514853","6.24243893","7.60815086")
313 | input_file <- data.frame(aa=aa,bb=bb,cc=cc)
314 | 
315 | repAssign_result <- repAssign(input_file," /// ")
316 | repRemove_result <- repRemove(input_file," /// ")
317 | ```
318 | 
319 | ## Other downstream analyses
320 | 1. Especially, the function id_conversion could convert  ENSEMBL gene id to gene Symbol in TCGA. For example:
321 | 
322 | ```{r}
323 | data(profile)
324 | result <- id_conversion_TCGA(profile)
325 | ```
326 | 
327 | The parameter profile is a data.frame or matrix of gene expression data in TCGA.
328 | 
329 | **Note:** In previous versions(< 1.0.0) the `id_conversion` and `id_conversion` used HGNC data to convert human gene id.  In future versions, we will use `clusterProfiler::bitr` for ID conversion. 
330 | 
331 | ```{r}
332 | library(clusterProfiler)
333 | library(org.Hs.eg.db)
334 | bitr(c("A2ML1", "A2ML1-AS1", "A4GALT", "A12M1", "AAAS"), fromType = "SYMBOL", 
335 |      toType = "ENSEMBL", OrgDb = org.Hs.eg.db, drop = FALSE)
336 | ```
337 | 
338 | 
339 | 
340 | 2. The function `countToFpkm` and `countToTpm` could convert count data to FPKM or TPM data.
341 | 
342 | ```{r}
343 | data(gene_cov)
344 | lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
345 | rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
346 | colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
347 | result <- countToFpkm(lung_squ_count2,
348 |   keyType = "SYMBOL",
349 |   gene_cov = gene_cov
350 | )
351 | result
352 | ```
353 | 
354 | ```{r}
355 | lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
356 | rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
357 | colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
358 | result <- countToTpm(lung_squ_count2,
359 |   keyType = "SYMBOL",
360 |   gene_cov = gene_cov
361 | )
362 | result
363 | ```
364 | 
365 | **Note:** Now the combined clinical data can be downloaded directly from [TCGAbiolinks](http://www.bioconductor.org/packages/release/bioc/vignettes/TCGAbiolinks/inst/doc/clinical.html).
366 | 
367 | ```{r eval=FALSE}
368 | library(TCGAbiolinks)
369 | ## get BCR Biotab data
370 | query <- GDCquery(project = "TCGA-ACC", 
371 |                   data.category = "Clinical",
372 |                   data.type = "Clinical Supplement", 
373 |                   data.format = "BCR Biotab")
374 | GDCdownload(query)
375 | clinical.BCRtab.all <- GDCprepare(query)
376 | names(clinical.BCRtab.all)
377 | 
378 | ## get indexed data
379 | clinical <- GDCquery_clinic(project = "TCGA-ACC", type = "clinical")
380 | ```
381 | 


--------------------------------------------------------------------------------
/data/GSE66705_sample2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/GSE66705_sample2.rda


--------------------------------------------------------------------------------
/data/geneExpress.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/geneExpress.rda


--------------------------------------------------------------------------------
/data/gene_cov.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/gene_cov.rda


--------------------------------------------------------------------------------
/data/kegg_liver.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/kegg_liver.rda


--------------------------------------------------------------------------------
/data/module.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/module.rda


--------------------------------------------------------------------------------
/data/profile.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/profile.rda


--------------------------------------------------------------------------------
/data/ventricle.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GeoTcgaData/544df0696c75c5a8bb378a8154d41a5269d7d38e/data/ventricle.rda


--------------------------------------------------------------------------------
/docs/news/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Changelog • GeoTcgaData</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Changelog"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
 3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
 6 |     
 7 | 
 8 |     <div class="container template-news">
 9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
10 |   <div class="container">
11 |     <div class="navbar-header">
12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
13 |         <span class="sr-only">Toggle navigation</span>
14 |         <span class="icon-bar"></span>
15 |         <span class="icon-bar"></span>
16 |         <span class="icon-bar"></span>
17 |       </button>
18 |       <span class="navbar-brand">
19 |         <a class="navbar-link" href="../index.html">GeoTcgaData</a>
20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.1.1.993</span>
21 |       </span>
22 |     </div>
23 | 
24 |     <div id="navbar" class="navbar-collapse collapse">
25 |       <ul class="nav navbar-nav"><li>
26 |   <a href="../articles/GeoTcgaData.html">Get started</a>
27 | </li>
28 | <li>
29 |   <a href="../reference/index.html">Reference</a>
30 | </li>
31 | <li>
32 |   <a href="../news/index.html">Changelog</a>
33 | </li>
34 |       </ul><ul class="nav navbar-nav navbar-right"></ul></div><!--/.nav-collapse -->
35 |   </div><!--/.container -->
36 | </div><!--/.navbar -->
37 | 
38 |       
39 | 
40 |       </header><div class="row">
41 |   <div class="col-md-9 contents">
42 |     <div class="page-header">
43 |       <h1 data-toc-skip>Changelog <small></small></h1>
44 |       
45 |     </div>
46 | 
47 |     <div class="section level2">
48 | <h2 class="page-header" data-toc-text="1.1.1.993" id="geotcgadata-111993">GeoTcgaData 1.1.1.993<a class="anchor" aria-label="anchor" href="#geotcgadata-111993"></a></h2>
49 | <ul><li>fix return value of <code>differential_array</code> (2022_10_8, Sat)</li>
50 | <li>fix gene length bug in <code><a href="../reference/countToTpm.html">countToTpm()</a></code> and <code><a href="../reference/countToFpkm.html">countToFpkm()</a></code>(2022_9_22, Tue)</li>
51 | <li>fix a bug in <code>id_conversion</code> (2022-8-27, Sat)</li>
52 | </ul></div>
53 |     <div class="section level2">
54 | <h2 class="page-header" data-toc-text="1.1.1" id="geotcgadata-111">GeoTcgaData 1.1.1<small>2022-08-12</small><a class="anchor" aria-label="anchor" href="#geotcgadata-111"></a></h2>
55 | <ul><li>fix a bug in <code>differential_RNA(useTopconfects = TRUE)</code> (2022-8-12, Fir)</li></ul></div>
56 |     <div class="section level2">
57 | <h2 class="page-header" data-toc-text="1.0.3" id="geotcgadata-103">GeoTcgaData 1.0.3<a class="anchor" aria-label="anchor" href="#geotcgadata-103"></a></h2>
58 | <ul><li>add function <code>methydifferential_ucsc</code> and <code>methydifferential_limma</code>(2021-10-24, Sun)</li></ul></div>
59 |     <div class="section level2">
60 | <h2 class="page-header" data-toc-text="1.0.2" id="geotcgadata-102">GeoTcgaData 1.0.2<small>2021-10-24</small><a class="anchor" aria-label="anchor" href="#geotcgadata-102"></a></h2>
61 | <ul><li>update hgnc_file data(2021-10-24, Sun)</li></ul></div>
62 |     <div class="section level2">
63 | <h2 class="page-header" data-toc-text="1.0.0.1" id="geotcgadata-1001">GeoTcgaData 1.0.0.1<small>2021-07-26</small><a class="anchor" aria-label="anchor" href="#geotcgadata-1001"></a></h2>
64 | <ul><li>add function <code>differential_RNA</code> to do difference analysis of RNA-seq data(2021-7-20, Tue)</li></ul></div>
65 |     <div class="section level2">
66 | <h2 class="page-header" data-toc-text="0.2.1" id="geotcgadata-021">GeoTcgaData 0.2.1<a class="anchor" aria-label="anchor" href="#geotcgadata-021"></a></h2>
67 | <ul><li>add data hgnc_file</li>
68 | <li>update function: id_ava()</li>
69 | </ul></div>
70 |     <div class="section level2">
71 | <h2 class="page-header" data-toc-text="0.2.0" id="geotcgadata-020">GeoTcgaData 0.2.0<small>2019-09-23</small><a class="anchor" aria-label="anchor" href="#geotcgadata-020"></a></h2>
72 | <ul><li>add functions: ann_merge(), countToFpkm(), countToTpm()</li></ul></div>
73 |   </div>
74 | 
75 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
76 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
77 |     </nav></div>
78 | 
79 | </div>
80 | 
81 | 
82 |       <footer><div class="copyright">
83 |   <p></p><p>Developed by Erqiang Hu.</p>
84 | </div>
85 | 
86 | <div class="pkgdown">
87 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.9000.</p>
88 | </div>
89 | 
90 |       </footer></div>
91 | 
92 |   
93 | 
94 | 
95 |   
96 | 
97 |   </body></html>
98 | 
99 | 


--------------------------------------------------------------------------------
/docs/reference/index.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1.0"><title>Function reference • GeoTcgaData</title><!-- jquery --><script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script><!-- Bootstrap --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous"><script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script><!-- bootstrap-toc --><link rel="stylesheet" href="../bootstrap-toc.css"><script src="../bootstrap-toc.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- pkgdown --><link href="../pkgdown.css" rel="stylesheet"><script src="../pkgdown.js"></script><meta property="og:title" content="Function reference"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
  3 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
  4 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
  5 | <![endif]--></head><body data-spy="scroll" data-target="#toc">
  6 |     
  7 | 
  8 |     <div class="container template-reference-index">
  9 |       <header><div class="navbar navbar-default navbar-fixed-top" role="navigation">
 10 |   <div class="container">
 11 |     <div class="navbar-header">
 12 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 13 |         <span class="sr-only">Toggle navigation</span>
 14 |         <span class="icon-bar"></span>
 15 |         <span class="icon-bar"></span>
 16 |         <span class="icon-bar"></span>
 17 |       </button>
 18 |       <span class="navbar-brand">
 19 |         <a class="navbar-link" href="../index.html">GeoTcgaData</a>
 20 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="">1.1.1.993</span>
 21 |       </span>
 22 |     </div>
 23 | 
 24 |     <div id="navbar" class="navbar-collapse collapse">
 25 |       <ul class="nav navbar-nav"><li>
 26 |   <a href="../articles/GeoTcgaData.html">Get started</a>
 27 | </li>
 28 | <li>
 29 |   <a href="../reference/index.html">Reference</a>
 30 | </li>
 31 | <li>
 32 |   <a href="../news/index.html">Changelog</a>
 33 | </li>
 34 |       </ul><ul class="nav navbar-nav navbar-right"></ul></div><!--/.nav-collapse -->
 35 |   </div><!--/.container -->
 36 | </div><!--/.navbar -->
 37 | 
 38 |       
 39 | 
 40 |       </header><div class="row">
 41 |   <div class="contents col-md-9">
 42 |     <div class="page-header">
 43 |       <h1>Reference</h1>
 44 |     </div>
 45 | 
 46 |     <table class="ref-index"><colgroup><col class="alias"><col class="title"></colgroup><tbody><tr><th colspan="2">
 47 |           <h2 id="all-functions">All functions <a href="#all-functions" class="anchor" aria-hidden="true"></a></h2>
 48 |           <p class="section-desc"></p>
 49 |         </th>
 50 |       </tr></tbody><tbody><tr><td>
 51 |           <p><code><a href="differential_array.html">differential_array()</a></code> </p>
 52 |         </td>
 53 |         <td><p>differential_array</p></td>
 54 |       </tr><tr><td>
 55 |           <p><code><a href="cal_mean_module.html">cal_mean_module()</a></code> </p>
 56 |         </td>
 57 |         <td><p>Find the mean value of the gene in each module</p></td>
 58 |       </tr><tr><td>
 59 |           <p><code><a href="countToFpkm.html">countToFpkm()</a></code> </p>
 60 |         </td>
 61 |         <td><p>Convert count to FPKM</p></td>
 62 |       </tr><tr><td>
 63 |           <p><code><a href="countToTpm.html">countToTpm()</a></code> </p>
 64 |         </td>
 65 |         <td><p>Convert count to Tpm</p></td>
 66 |       </tr><tr><td>
 67 |           <p><code><a href="differential_cnv.html">differential_cnv()</a></code> </p>
 68 |         </td>
 69 |         <td><p>Do chi-square test to find differential genes</p></td>
 70 |       </tr><tr><td>
 71 |           <p><code><a href="differential_CNV.html">differential_CNV()</a></code> </p>
 72 |         </td>
 73 |         <td><p>Do difference analysis of gene level copy number variation data</p></td>
 74 |       </tr><tr><td>
 75 |           <p><code><a href="differential_limma.html">differential_limma()</a></code> </p>
 76 |         </td>
 77 |         <td><p>differential_limma</p></td>
 78 |       </tr><tr><td>
 79 |           <p><code><a href="differential_RNA.html">differential_RNA()</a></code> </p>
 80 |         </td>
 81 |         <td><p>Do difference analysis of RNA-seq data</p></td>
 82 |       </tr><tr><td>
 83 |           <p><code><a href="differential_RNA_ucsc.html">differential_RNA_ucsc()</a></code> </p>
 84 |         </td>
 85 |         <td><p>Do difference analysis of RNA-seq data downloaded from ucsc</p></td>
 86 |       </tr><tr><td>
 87 |           <p><code><a href="differential_SNP.html">differential_SNP()</a></code> </p>
 88 |         </td>
 89 |         <td><p>Do difference analysis of SNP data</p></td>
 90 |       </tr><tr><td>
 91 |           <p><code><a href="differential_SNP_tcga.html">differential_SNP_tcga()</a></code> </p>
 92 |         </td>
 93 |         <td><p>Do difference analysis of SNP data downloaded from TCGAbiolinks</p></td>
 94 |       </tr><tr><td>
 95 |           <p><code><a href="fpkmToTpm_matrix.html">fpkmToTpm_matrix()</a></code> </p>
 96 |         </td>
 97 |         <td><p>Convert fpkm to Tpm</p></td>
 98 |       </tr><tr><td>
 99 |           <p><code><a href="geneExpress.html">geneExpress</a></code> </p>
100 |         </td>
101 |         <td><p>a data.frame of gene expression data</p></td>
102 |       </tr><tr><td>
103 |           <p><code><a href="gene_ave.html">gene_ave()</a></code> </p>
104 |         </td>
105 |         <td><p>Average the values of same genes in gene expression profile</p></td>
106 |       </tr><tr><td>
107 |           <p><code><a href="gene_cov.html">gene_cov</a></code> </p>
108 |         </td>
109 |         <td><p>a data.frame of gene length and GC content</p></td>
110 |       </tr><tr><td>
111 |           <p><code><a href="GSE66705_sample2.html">GSE66705_sample2</a></code> </p>
112 |         </td>
113 |         <td><p>a matrix of gene expression data in GEO</p></td>
114 |       </tr><tr><td>
115 |           <p><code><a href="id_ava.html">id_ava()</a></code> </p>
116 |         </td>
117 |         <td><p>Gene id conversion types</p></td>
118 |       </tr><tr><td>
119 |           <p><code><a href="id_conversion.html">id_conversion()</a></code> </p>
120 |         </td>
121 |         <td><p>Convert  ENSEMBL gene id to gene Symbol in TCGA</p></td>
122 |       </tr><tr><td>
123 |           <p><code><a href="id_conversion.html">id_conversion()</a></code> </p>
124 |         </td>
125 |         <td><p>Gene id conversion</p></td>
126 |       </tr><tr><td>
127 |           <p><code><a href="kegg_liver.html">kegg_liver</a></code> </p>
128 |         </td>
129 |         <td><p>a matrix of gene expression data in TCGA</p></td>
130 |       </tr><tr><td>
131 |           <p><code><a href="Merge_methy_tcga.html">Merge_methy_tcga()</a></code> </p>
132 |         </td>
133 |         <td><p>Merge methylation data downloaded from TCGA</p></td>
134 |       </tr><tr><td>
135 |           <p><code><a href="methyDiff.html">methyDiff()</a></code> </p>
136 |         </td>
137 |         <td><p>Get methylation difference gene</p></td>
138 |       </tr><tr><td>
139 |           <p><code><a href="methydifferential_ucsc.html">methydifferential_ucsc()</a></code> </p>
140 |         </td>
141 |         <td><p>Title</p></td>
142 |       </tr><tr><td>
143 |           <p><code><a href="module.html">module</a></code> </p>
144 |         </td>
145 |         <td><p>a matrix of module name, gene symbols, and the number of gene symbols</p></td>
146 |       </tr><tr><td>
147 |           <p><code><a href="prepare_chi.html">prepare_chi()</a></code> </p>
148 |         </td>
149 |         <td><p>Preparer file for chi-square test</p></td>
150 |       </tr><tr><td>
151 |           <p><code><a href="profile.html">profile</a></code> </p>
152 |         </td>
153 |         <td><p>a matrix of gene expression data in TCGA</p></td>
154 |       </tr><tr><td>
155 |           <p><code><a href="repAssign.html">repAssign()</a></code> </p>
156 |         </td>
157 |         <td><p>Handle the case where one id corresponds to multiple genes</p></td>
158 |       </tr><tr><td>
159 |           <p><code><a href="repRemove.html">repRemove()</a></code> </p>
160 |         </td>
161 |         <td><p>Handle the case where one id corresponds to multiple genes</p></td>
162 |       </tr><tr><td>
163 |           <p><code><a href="tcga_cli_deal.html">tcga_cli_deal()</a></code> </p>
164 |         </td>
165 |         <td><p>Combine clinical information obtained from TCGA and extract survival data</p></td>
166 |       </tr><tr><td>
167 |           <p><code><a href="ventricle.html">ventricle</a></code> </p>
168 |         </td>
169 |         <td><p>a matrix of gene expression data in GEO</p></td>
170 |       </tr></tbody></table></div>
171 | 
172 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
173 |     <nav id="toc" data-toggle="toc" class="sticky-top"><h2 data-toc-skip>Contents</h2>
174 |     </nav></div>
175 | </div>
176 | 
177 | 
178 |       <footer><div class="copyright">
179 |   <p></p><p>Developed by Erqiang Hu.</p>
180 | </div>
181 | 
182 | <div class="pkgdown">
183 |   <p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.9000.</p>
184 | </div>
185 | 
186 |       </footer></div>
187 | 
188 |   
189 | 
190 | 
191 |   
192 | 
193 |   </body></html>
194 | 
195 | 


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
  3 |   <url>
  4 |     <loc>/404.html</loc>
  5 |   </url>
  6 |   <url>
  7 |     <loc>/articles/GeoTcgaData.html</loc>
  8 |   </url>
  9 |   <url>
 10 |     <loc>/articles/index.html</loc>
 11 |   </url>
 12 |   <url>
 13 |     <loc>/authors.html</loc>
 14 |   </url>
 15 |   <url>
 16 |     <loc>/CONDUCT.html</loc>
 17 |   </url>
 18 |   <url>
 19 |     <loc>/CONTRIBUTING.html</loc>
 20 |   </url>
 21 |   <url>
 22 |     <loc>/index.html</loc>
 23 |   </url>
 24 |   <url>
 25 |     <loc>/LICENSE-text.html</loc>
 26 |   </url>
 27 |   <url>
 28 |     <loc>/news/index.html</loc>
 29 |   </url>
 30 |   <url>
 31 |     <loc>/reference/differential_array.html</loc>
 32 |   </url>
 33 |   <url>
 34 |     <loc>/reference/cal_mean_module.html</loc>
 35 |   </url>
 36 |   <url>
 37 |     <loc>/reference/classify_sample.html</loc>
 38 |   </url>
 39 |   <url>
 40 |     <loc>/reference/countToFpkm.html</loc>
 41 |   </url>
 42 |   <url>
 43 |     <loc>/reference/countToTpm.html</loc>
 44 |   </url>
 45 |   <url>
 46 |     <loc>/reference/differential_cnv.html</loc>
 47 |   </url>
 48 |   <url>
 49 |     <loc>/reference/differential_CNV.html</loc>
 50 |   </url>
 51 |   <url>
 52 |     <loc>/reference/differential_gene.html</loc>
 53 |   </url>
 54 |   <url>
 55 |     <loc>/reference/differential_limma.html</loc>
 56 |   </url>
 57 |   <url>
 58 |     <loc>/reference/differential_RNA.html</loc>
 59 |   </url>
 60 |   <url>
 61 |     <loc>/reference/differential_RNA_ucsc.html</loc>
 62 |   </url>
 63 |   <url>
 64 |     <loc>/reference/differential_SNP.html</loc>
 65 |   </url>
 66 |   <url>
 67 |     <loc>/reference/differential_SNP_tcga.html</loc>
 68 |   </url>
 69 |   <url>
 70 |     <loc>/reference/fpkmToTpm_matrix.html</loc>
 71 |   </url>
 72 |   <url>
 73 |     <loc>/reference/geneExpress.html</loc>
 74 |   </url>
 75 |   <url>
 76 |     <loc>/reference/gene_ave.html</loc>
 77 |   </url>
 78 |   <url>
 79 |     <loc>/reference/gene_cov.html</loc>
 80 |   </url>
 81 |   <url>
 82 |     <loc>/reference/GSE66705_sample2.html</loc>
 83 |   </url>
 84 |   <url>
 85 |     <loc>/reference/id_ava.html</loc>
 86 |   </url>
 87 |   <url>
 88 |     <loc>/reference/id_conversion.html</loc>
 89 |   </url>
 90 |   <url>
 91 |     <loc>/reference/id_conversion.html</loc>
 92 |   </url>
 93 |   <url>
 94 |     <loc>/reference/index.html</loc>
 95 |   </url>
 96 |   <url>
 97 |     <loc>/reference/kegg_liver.html</loc>
 98 |   </url>
 99 |   <url>
100 |     <loc>/reference/Merge_methy_tcga.html</loc>
101 |   </url>
102 |   <url>
103 |     <loc>/reference/methyDiff.html</loc>
104 |   </url>
105 |   <url>
106 |     <loc>/reference/methydifferential_ucsc.html</loc>
107 |   </url>
108 |   <url>
109 |     <loc>/reference/module.html</loc>
110 |   </url>
111 |   <url>
112 |     <loc>/reference/prepare_chi.html</loc>
113 |   </url>
114 |   <url>
115 |     <loc>/reference/profile.html</loc>
116 |   </url>
117 |   <url>
118 |     <loc>/reference/repAssign.html</loc>
119 |   </url>
120 |   <url>
121 |     <loc>/reference/repRemove.html</loc>
122 |   </url>
123 |   <url>
124 |     <loc>/reference/tcga_cli_deal.html</loc>
125 |   </url>
126 |   <url>
127 |     <loc>/reference/ventricle.html</loc>
128 |   </url>
129 | </urlset>
130 | 


--------------------------------------------------------------------------------
/inst/extdata/build_data.R:
--------------------------------------------------------------------------------
 1 | #setwd("E:\\GeoTcgaData_work")
 2 | hgnc_file <- data.table::fread("E:\\GeoTcgaData_work\\hgnc_complete_set.txt", 
 3 |   sep = "\t", header = TRUE)
 4 | hgnc_file <- dplyr::select(hgnc_file, -c("alias_symbol", "alias_name", 
 5 |   "prev_symbol", "lsdb", "agr"))
 6 | class(hgnc_file) <- "data.frame"
 7 | gene_loc_len <- GeoTcgaData:::gene_loc_len
 8 | hgnc <- GeoTcgaData:::hgnc
 9 | genePos <- GeoTcgaData:::genePos
10 | hgnc_file <- GeoTcgaData:::hgnc_file
11 | usethis::use_data(hgnc_file, hgnc, gene_loc_len, 
12 |   internal = TRUE, compress = "xz", overwrite = TRUE)
13 | 
14 | 
15 | ## gene_cov
16 | library(TxDb.Hsapiens.UCSC.hg38.knownGene)
17 | library(BSgenome.Hsapiens.UCSC.hg38)
18 | hg38_TxDb <- TxDb.Hsapiens.UCSC.hg38.knownGene
19 | hg38 <- BSgenome.Hsapiens.UCSC.hg38
20 | calc_gene_cov <- function(TxDB, BSGENOME){
21 |     Gene <- genes(TxDB, single.strand.genes.only = FALSE)
22 |     Exon <- exons(x = TxDB)
23 |     Overlap <- findOverlaps(Exon, Gene)
24 |     Exon <- Exon[queryHits(Overlap)]
25 |     mcols(Exon)$gene_id <- mcols(Gene[subjectHits(Overlap)])$gene_id
26 |     Exon <- split(Exon, mcols(Exon)$gene_id)
27 |     Exon <- reduce(Exon)
28 |     calculate_cov <- function(x){
29 |         xlen <- sum(width(x))
30 |         xseq <- BSgenome::getSeq(BSGENOME, x)
31 |         xGC <- sum(Biostrings::letterFrequency(xseq, 'GC'))/xlen
32 |         c(xlen, xGC)
33 |     }
34 |     gene_cov <- lapply(Exon, calculate_cov)
35 |     gene_cov <- gene_cov[names(Gene)]
36 |     gene_cov <- t(as.data.frame(gene_cov))
37 |     rownames(gene_cov) <- names(Gene)
38 |     colnames(gene_cov) <- c('length', 'GC')
39 |     as.data.frame(gene_cov)
40 | }
41 | gene_cov <- calc_gene_cov(TxDB = hg38_TxDb, BSGENOME = hg38)
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/inst/extdata/cnv/00de3/HONGS_p_TCGAb3_75_76_77_NSP_G.txt:
--------------------------------------------------------------------------------
 1 | GDC_Aliquot	Chromosome	Start	End	Num_Probes	Segment_Mean
 2 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	1	3301765	55785707	28262	0.0033
 3 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	1	55792696	55827469	30	0.5462
 4 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	1	55829348	87233893	20119	-0.0012
 5 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	1	87237285	87239700	8	-1.1169
 6 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	1	87243121	247650984	81057	0.0018
 7 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	480597	145900182	76793	0.0034
 8 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	145907622	145908689	2	-2.1771
 9 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	145908860	193722432	27110	0.0069
10 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	193724639	193733648	7	-1.0391
11 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	193737212	230912395	21841	0.0103
12 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	230912836	230913055	2	-1.5502
13 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	2	230913581	241537572	6229	0.0076
14 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	2170634	170801846	93068	0.0017
15 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	170804047	170833713	18	0.6177
16 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	170834686	181217584	5282	-0.0004
17 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	181220309	181225348	7	-0.8341
18 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	181239123	186849637	2727	0.002
19 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	186850494	186859338	15	0.5654
20 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	3	186860325	197812401	5891	0.0045
21 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	1059384	5815967	2403	0.0006
22 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	5817427	5817557	2	-1.9893
23 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	5817654	29171694	15280	0.001
24 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	29179931	29180476	2	-1.5391
25 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	29180581	52137599	10803	0.0037
26 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	52138506	52139008	2	-1.7386
27 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	52141824	110472535	32956	0.0009
28 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	110475372	110475792	2	-1.7461
29 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	4	110477052	187842528	41865	-0.0004
30 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	5	913983	95230802	50545	0.0016
31 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	5	95236087	95237226	3	-1.9218
32 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	5	95237881	180934240	50395	0.0013
33 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	6	1011760	4574891	2512	-0.0041
34 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	6	4575676	4581489	4	-1.5749
35 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	6	4581494	31204878	16832	0.0056
36 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	6	31208262	31208825	2	1.6481
37 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	6	31210138	170596889	77652	0.0046
38 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	7	664936	40174597	23004	-0.001
39 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	7	40175592	40219954	28	0.5835
40 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	7	40223821	158592540	58784	0.001
41 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	8	667625	144182542	82073	0.0016
42 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	9	789794	138044505	68363	-0.0004
43 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	10	366509	30944103	21620	0.0034
44 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	10	30957229	30978973	27	-0.7166
45 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	10	30979829	133411599	59493	0.0011
46 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	11	456012	26339913	16706	-0.003
47 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	11	26342749	26345172	2	-1.7145
48 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	11	26345232	123812391	53610	0.0006
49 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	11	123812566	123812839	2	-1.7801
50 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	11	123816297	134272740	7131	0.0028
51 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	12	780472	132605822	74211	0.002
52 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	13	18874255	114226675	56874	0.0028
53 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	14	20033191	27398580	3634	0.003
54 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	14	27399274	27507409	51	-0.8739
55 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	14	27507968	38840075	6666	-0.0016
56 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	14	38841818	38863301	12	-1.178
57 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	14	38865374	105533894	39335	0.001
58 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	15	23437561	101344124	44708	0
59 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	16	603333	53175083	18211	0.0032
60 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	16	53177717	53177940	2	-2.6236
61 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	16	53178250	89317317	22941	0.0007
62 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	17	1074619	38769230	14904	-0.0016
63 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	17	38769928	38772885	2	-1.8733
64 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	17	38772947	82959812	22329	0.0021
65 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	18	326691	5947124	2994	-0.0029
66 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	18	5947875	5948061	2	-1.9811
67 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	18	5948488	79349796	39875	0.0014
68 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	19	283868	58370362	24118	0.0013
69 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	20	472817	63588502	37516	-0.0009
70 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	21	13974127	46262057	20561	-0.0025
71 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	22	16934932	31381621	5794	-0.0031
72 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	22	31382595	31384583	2	-1.4397
73 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	22	31388932	48940621	11270	-0.0013
74 | 8dd570dd-2a44-4d3d-bcf2-d9c5c21d00f9	X	3236359	155677414	63397	0.0049
75 | 


--------------------------------------------------------------------------------
/inst/extdata/cnv/00e8/MICHE_p_TCGAb_428_429_NS.txt:
--------------------------------------------------------------------------------
 1 | GDC_Aliquot	Chromosome	Start	End	Num_Probes	Segment_Mean
 2 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	1	3301765	247650984	129760	0.0004
 3 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	480597	9808346	5787	0.0102
 4 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	9809447	9810242	3	-1.5053
 5 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	9813820	70029123	35550	0.0044
 6 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	70031855	70032363	3	-0.9245
 7 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	70036199	76542146	3636	0.0025
 8 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	76552672	76572000	7	-0.9465
 9 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	76580922	221887086	75389	0.0053
10 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	221889381	221894838	4	-1.11
11 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	2	221895559	241537572	11830	0.0033
12 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	2170634	10517554	4836	0.0046
13 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	10519258	10520849	2	-1.5264
14 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	10523441	65061928	31955	0.0024
15 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	65061958	65062362	2	-1.3193
16 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	65066087	65446355	334	-0.0061
17 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	65446789	65446808	2	-1.1371
18 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	65448972	80884445	9563	-0.0025
19 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	80886020	80887429	3	-1.267
20 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	3	80890988	197812401	60481	0.0033
21 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	1059384	36534634	21978	0.0017
22 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	36538381	36538411	2	-1.44
23 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	36544419	39205698	1886	-0.0025
24 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	39208967	39210064	2	-1.2461
25 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	39210390	57634986	8283	0.0016
26 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	57635142	57635246	3	-0.9563
27 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	57639758	60483928	1644	0.0017
28 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	60485425	60486464	2	-1.3006
29 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	60486811	111043051	28100	0.001
30 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	111043145	111043496	4	-0.8891
31 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	4	111044539	187842528	41595	0.0019
32 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	913983	17462220	10901	-0.002
33 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	17747964	17805789	62	-1.0687
34 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	17810843	135486593	62140	0.0009
35 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	135489795	135489993	2	-2.9358
36 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	135492523	173001547	23795	0.0018
37 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	173003677	173010638	5	-1.0247
38 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	5	173011703	180934240	4204	0.0039
39 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	6	1011760	145256409	82006	0.0011
40 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	6	145256510	145257320	2	-1.2561
41 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	6	145259422	148392567	2017	0.0148
42 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	6	148393488	148401973	2	-1.4044
43 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	6	148402609	170596889	13141	0.0051
44 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	664936	3762998	1146	-0.0044
45 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	3766579	3766898	2	-1.2036
46 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	3767918	7516194	1521	0.0115
47 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	7518409	7518634	2	-1.2823
48 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	7522447	75626698	35601	0.0044
49 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	75630440	75631353	3	-1.1212
50 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	75631392	102171122	13386	0.0017
51 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	102175051	102175356	2	-1.405
52 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	102176035	126079078	12799	0.0035
53 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	126079396	126079402	2	-1.2042
54 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	7	126081764	158592540	17468	0.0054
55 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	8	667625	144182542	82217	0.0013
56 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	9	789794	138044505	68474	0.0012
57 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	10	366509	17480443	12572	-0.0088
58 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	10	17480684	17480700	2	-1.3838
59 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	10	17481042	133411599	68725	0.0002
60 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	11	456012	134272740	77584	0.0027
61 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	12	780472	132605822	74347	0.0021
62 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	13	18874255	32367536	8277	-0.0004
63 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	13	32367551	32371795	2	-1.3959
64 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	13	32373678	68558328	19791	-0.0026
65 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	13	68565286	68565738	7	-0.6895
66 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	13	68568076	114226675	28917	0.0019
67 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	14	20033191	103895954	49346	-0.0001
68 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	14	103900849	103901391	2	-1.2198
69 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	14	103902012	105533894	433	-0.005
70 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	15	23437561	101344124	44793	0.0021
71 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	16	603333	20586231	9228	0.0011
72 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	16	20595007	20604353	6	0.791
73 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	16	20622023	83760390	29282	0.0026
74 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	16	83760392	83760401	2	-1.6412
75 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	16	83760419	89317317	2698	0.0049
76 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	17	1074619	82959812	37293	0.0006
77 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	18	326691	66976937	36698	0.0002
78 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	18	66978926	67010720	16	0.5568
79 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	18	67014755	79349796	6252	0.0046
80 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	19	283868	58370362	24154	0.0016
81 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	472817	14289450	8860	0.001
82 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	14291481	14296040	2	-1.1918
83 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	14299785	31449137	8201	0.0001
84 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	31452707	31461069	6	-0.6285
85 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	31465102	39405820	4137	0.0048
86 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	39410348	39413403	3	-1.1142
87 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	39417485	46267140	4547	0.0047
88 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	46269102	46269251	2	-1.921
89 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	20	46270491	63588502	11803	0.0052
90 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	21	13974127	24213071	6276	-0.0001
91 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	21	24215936	24216026	3	-1.3743
92 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	21	24216643	46262057	14320	0.0035
93 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	22	16934932	48940621	17094	0.0001
94 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	X	3236359	13853883	3996	-0.0026
95 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	X	13854003	13854349	3	-3.0405
96 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	X	13856180	79841790	26054	0.01
97 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	X	79842702	79844341	2	-5.3043
98 | 8b50b805-0ecd-46f5-b986-aa9d1ba0831f	X	79849126	155677414	33730	0.0124
99 | 


--------------------------------------------------------------------------------
/inst/extdata/methy/0a0b4/jhu-usc.e.H.4.lvl-3.TCGA-13-1405-01A-01D-0460-05.g.txt:
--------------------------------------------------------------------------------
 1 | Composite Element REF	Beta_value	Chromosome	Start	End	Gene_Symbol	Gene_Type	Transcript_ID	Position_to_TSS	CGI_Coordinate	Feature_Type
 2 | cg00000292	0.869906238572323	chr16	28878779	28878780	ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000357084.6;ENST00000395503.7;ENST00000536376.4;ENST00000562185.4;ENST00000563975.1	373;290;-1275;-465;-83	CGI:chr16:28879633-28880547	N_Shore
 3 | cg00002426	0.116136770722113	chr3	57757816	57757817	SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000295951.6;ENST00000295952.6;ENST00000383718.6;ENST00000428312.4;ENST00000449503.5;ENST00000467901.1	1585;368;261;257;257;514	CGI:chr3:57756198-57757263	S_Shore
 4 | cg00003994	0.406591482378321	chr7	15686237	15686238	MEOX2	protein_coding	ENST00000262041.5	576	CGI:chr7:16399497-16399700	.
 5 | cg00005847	0.2148836012527	chr2	176164345	176164346	AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5	protein_coding;protein_coding;protein_coding;protein_coding;antisense	ENST00000468418.4;ENST00000249440.4;ENST00000410016.4;ENST00000432796.2;ENST00000608941.1	13259;267;3453;27387;1372	CGI:chr2:176164685-176165509	N_Shore
 6 | cg00006414	0.2148836012527	chr7	149125745	149125746	RN7SL521P;ZNF398;ZNF425;ZNF425	misc_RNA;protein_coding;protein_coding;protein_coding	ENST00000488398.3;ENST00000426851.5;ENST00000378061.5;ENST00000483014.1	242;-672;602;562	CGI:chr7:149126122-149127136	N_Shore
 7 | cg00007981	0.0111896937323695	chr11	94129428	94129429	PANX1;PANX1	protein_coding;protein_coding	ENST00000227638.6;ENST00000436171.2	499;498	CGI:chr11:94128394-94129607	Island
 8 | cg00008493	0.994159645759505	chr14	93347431	93347432	COX8C;UNC79	protein_coding;protein_coding	ENST00000342144.2;ENST00000256339.7	239;14211	CGI:chr14:93347137-93347765	Island
 9 | cg00008713	0.00826646235457973	chr18	11980954	11980955	IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000269159.6;ENST00000383376.8;ENST00000588752.4;ENST00000588927.4;ENST00000589238.4;ENST00000590107.4;ENST00000590138.1;ENST00000625802.1	-475;-563;-72;-717;-84;-512;-658;-475	CGI:chr18:11980484-11982143	Island
10 | cg00009407	0.0108723860129016	chr14	88824577	88824578	TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000338104.9;ENST00000345383.8;ENST00000346301.7;ENST00000354441.9;ENST00000358622.8;ENST00000380656.5;ENST00000536576.4;ENST00000553718.1;ENST00000554686.4;ENST00000555057.4;ENST00000556077.4;ENST00000556567.4;ENST00000556651.4;ENST00000614125.3;ENST00000622513.3	-80;423;-80;-84;423;-86;423;-18;-163;-96;423;-75;-109;423;-48	CGI:chr14:88824574-88825011	Island
11 | cg00010193	0.71090039937439	chr4	1151428	1151429	AC092535.3;TMED11P	antisense;unitary_pseudogene	ENST00000417557.1;ENST00000479478.4	55;2299	CGI:chr4:1144210-1146033	.
12 | cg00011459	0.90483721870109	chr16	8796568	8796569	PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;TMEM186;TMEM186	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000268261.7;ENST00000562318.4;ENST00000562448.1;ENST00000564030.4;ENST00000564069.1;ENST00000565221.4;ENST00000565896.4;ENST00000566196.4;ENST00000566540.4;ENST00000566604.4;ENST00000566983.4;ENST00000568602.4;ENST00000569958.4;ENST00000570076.4;ENST00000570134.4;ENST00000333050.6;ENST00000564869.1	-1250;-1275;-1275;-1254;-1345;-1300;-1275;-1272;-1275;-1270;7744;-1275;-1300;-1283;-1271;1081;1075	CGI:chr16:8797465-8798071	N_Shore
13 | cg00012199	0.013190421994616	chr14	20682865	20682866	ANG;RNASE4	protein_coding;protein_coding	ENST00000336811.9;ENST00000555835.2	-1313;-1236	CGI:chr14:20682759-20683065	Island
14 | cg00012386	0.00562937717912839	chr1	227734811	227734812	JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000366758.6;ENST00000438896.2;ENST00000465251.1;ENST00000480590.1;ENST00000485807.1;ENST00000615711.3;ENST00000620518.3;ENST00000315781.8;ENST00000366759.7;ENST00000366760.4;ENST00000418653.4;ENST00000426344.4;ENST00000470038.1;ENST00000475930.1;ENST00000491439.1;ENST00000617596.3	601;601;-439;266;-433;601;473;-186;-186;6153;-621;-626;-1363;6192;6103;-643	CGI:chr1:227734942-227735730	N_Shore
15 | cg00012792	0.0130914072814042	chr6	8064260	8064261	BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5-TXNDC5;EEF1E1-BLOC1S5	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000244777.5;ENST00000397457.5;ENST00000543936.4;ENST00000627748.1;ENST00000439343.2;ENST00000397456.2	134;155;155;137;105;38271	CGI:chr6:8063813-8064532	Island


--------------------------------------------------------------------------------
/inst/extdata/methy/0a0b4/logs/file1.parcel:
--------------------------------------------------------------------------------
  1 | cintervaltree.intervaltree
  2 | IntervalTree
  3 | p0
  4 | ((lp1
  5 | cintervaltree.interval
  6 | Interval
  7 | p2
  8 | (I0
  9 | I1048576
 10 | (dp3
 11 | S'md5sum'
 12 | p4
 13 | S'007d09d1333971eb947569f6a4ca3b36'
 14 | p5
 15 | stp6
 16 | Rp7
 17 | ag2
 18 | (I1048576
 19 | I2097152
 20 | (dp8
 21 | g4
 22 | S'9dd013a350555afe41f63985987d2182'
 23 | p9
 24 | stp10
 25 | Rp11
 26 | ag2
 27 | (I2097152
 28 | I3145728
 29 | (dp12
 30 | g4
 31 | S'2d7aa1a88fbb9e6c47364461aac67ad4'
 32 | p13
 33 | stp14
 34 | Rp15
 35 | ag2
 36 | (I3145728
 37 | I4194304
 38 | (dp16
 39 | g4
 40 | S'2ab2df63fdb543e5a103cf3e6701e579'
 41 | p17
 42 | stp18
 43 | Rp19
 44 | ag2
 45 | (I4194304
 46 | I5242880
 47 | (dp20
 48 | g4
 49 | S'267a2c4eb24b1b3ec2776291c75e9cc4'
 50 | p21
 51 | stp22
 52 | Rp23
 53 | ag2
 54 | (I5242880
 55 | I6291456
 56 | (dp24
 57 | g4
 58 | S'cce6ce104a1cedca0335d622c43a4b3b'
 59 | p25
 60 | stp26
 61 | Rp27
 62 | ag2
 63 | (I6291456
 64 | I7340032
 65 | (dp28
 66 | g4
 67 | S'849ececa93632641aca3615754a8bac8'
 68 | p29
 69 | stp30
 70 | Rp31
 71 | ag2
 72 | (I7340032
 73 | I8388608
 74 | (dp32
 75 | g4
 76 | S'9296760f45ccc5105c12a4ca81dc4b0d'
 77 | p33
 78 | stp34
 79 | Rp35
 80 | ag2
 81 | (I8388608
 82 | I9437184
 83 | (dp36
 84 | g4
 85 | S'103e09b9b48151c4a7049220ae2ac54f'
 86 | p37
 87 | stp38
 88 | Rp39
 89 | ag2
 90 | (I9437184
 91 | I9956275
 92 | (dp40
 93 | g4
 94 | S'860d32161e9b8a1cff47d8126124bee0'
 95 | p41
 96 | stp42
 97 | Rp43
 98 | atp44
 99 | Rp45
100 | .


--------------------------------------------------------------------------------
/inst/extdata/methy/0a6b/jhu-usc.e.H.10.lvl-3.TCGA-30-1880-01A-01D-0652-05.g.txt:
--------------------------------------------------------------------------------
 1 | Composite Element REF	Beta_value	Chromosome	Start	End	Gene_Symbol	Gene_Type	Transcript_ID	Position_to_TSS	CGI_Coordinate	Feature_Type
 2 | cg00000292	0.796523939932179	chr16	28878779	28878780	ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000357084.6;ENST00000395503.7;ENST00000536376.4;ENST00000562185.4;ENST00000563975.1	373;290;-1275;-465;-83	CGI:chr16:28879633-28880547	N_Shore
 3 | cg00002426	0.0770646801597857	chr3	57757816	57757817	SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000295951.6;ENST00000295952.6;ENST00000383718.6;ENST00000428312.4;ENST00000449503.5;ENST00000467901.1	1585;368;261;257;257;514	CGI:chr3:57756198-57757263	S_Shore
 4 | cg00003994	0.0832823123959163	chr7	15686237	15686238	MEOX2	protein_coding	ENST00000262041.5	576	CGI:chr7:16399497-16399700	.
 5 | cg00005847	0.504193627239696	chr2	176164345	176164346	AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5	protein_coding;protein_coding;protein_coding;protein_coding;antisense	ENST00000468418.4;ENST00000249440.4;ENST00000410016.4;ENST00000432796.2;ENST00000608941.1	13259;267;3453;27387;1372	CGI:chr2:176164685-176165509	N_Shore
 6 | cg00006414	0.2148836012527	chr7	149125745	149125746	RN7SL521P;ZNF398;ZNF425;ZNF425	misc_RNA;protein_coding;protein_coding;protein_coding	ENST00000488398.3;ENST00000426851.5;ENST00000378061.5;ENST00000483014.1	242;-672;602;562	CGI:chr7:149126122-149127136	N_Shore
 7 | cg00007981	0.0228410194656724	chr11	94129428	94129429	PANX1;PANX1	protein_coding;protein_coding	ENST00000227638.6;ENST00000436171.2	499;498	CGI:chr11:94128394-94129607	Island
 8 | cg00008493	0.98921474501248	chr14	93347431	93347432	COX8C;UNC79	protein_coding;protein_coding	ENST00000342144.2;ENST00000256339.7	239;14211	CGI:chr14:93347137-93347765	Island
 9 | cg00008713	0.00849766793256932	chr18	11980954	11980955	IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000269159.6;ENST00000383376.8;ENST00000588752.4;ENST00000588927.4;ENST00000589238.4;ENST00000590107.4;ENST00000590138.1;ENST00000625802.1	-475;-563;-72;-717;-84;-512;-658;-475	CGI:chr18:11980484-11982143	Island
10 | cg00009407	0.00600256591763238	chr14	88824577	88824578	TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000338104.9;ENST00000345383.8;ENST00000346301.7;ENST00000354441.9;ENST00000358622.8;ENST00000380656.5;ENST00000536576.4;ENST00000553718.1;ENST00000554686.4;ENST00000555057.4;ENST00000556077.4;ENST00000556567.4;ENST00000556651.4;ENST00000614125.3;ENST00000622513.3	-80;423;-80;-84;423;-86;423;-18;-163;-96;423;-75;-109;423;-48	CGI:chr14:88824574-88825011	Island
11 | cg00010193	0.78874066422712	chr4	1151428	1151429	AC092535.3;TMED11P	antisense;unitary_pseudogene	ENST00000417557.1;ENST00000479478.4	55;2299	CGI:chr4:1144210-1146033	.
12 | cg00011459	0.950438041209892	chr16	8796568	8796569	PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;TMEM186;TMEM186	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000268261.7;ENST00000562318.4;ENST00000562448.1;ENST00000564030.4;ENST00000564069.1;ENST00000565221.4;ENST00000565896.4;ENST00000566196.4;ENST00000566540.4;ENST00000566604.4;ENST00000566983.4;ENST00000568602.4;ENST00000569958.4;ENST00000570076.4;ENST00000570134.4;ENST00000333050.6;ENST00000564869.1	-1250;-1275;-1275;-1254;-1345;-1300;-1275;-1272;-1275;-1270;7744;-1275;-1300;-1283;-1271;1081;1075	CGI:chr16:8797465-8798071	N_Shore
13 | cg00012199	0.0119642491262264	chr14	20682865	20682866	ANG;RNASE4	protein_coding;protein_coding	ENST00000336811.9;ENST00000555835.2	-1313;-1236	CGI:chr14:20682759-20683065	Island
14 | cg00012386	0.00622611896739758	chr1	227734811	227734812	JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000366758.6;ENST00000438896.2;ENST00000465251.1;ENST00000480590.1;ENST00000485807.1;ENST00000615711.3;ENST00000620518.3;ENST00000315781.8;ENST00000366759.7;ENST00000366760.4;ENST00000418653.4;ENST00000426344.4;ENST00000470038.1;ENST00000475930.1;ENST00000491439.1;ENST00000617596.3	601;601;-439;266;-433;601;473;-186;-186;6153;-621;-626;-1363;6192;6103;-643	CGI:chr1:227734942-227735730	N_Shore
15 | cg00012792	0.0125193760585237	chr6	8064260	8064261	BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5-TXNDC5;EEF1E1-BLOC1S5	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000244777.5;ENST00000397457.5;ENST00000543936.4;ENST00000627748.1;ENST00000439343.2;ENST00000397456.2	134;155;155;137;105;38271	CGI:chr6:8063813-8064532	Island


--------------------------------------------------------------------------------
/inst/extdata/methy/0a6b/logs/file2.parcel:
--------------------------------------------------------------------------------
  1 | cintervaltree.intervaltree
  2 | IntervalTree
  3 | p0
  4 | ((lp1
  5 | cintervaltree.interval
  6 | Interval
  7 | p2
  8 | (I0
  9 | I1048576
 10 | (dp3
 11 | S'md5sum'
 12 | p4
 13 | S'65301bc6a5c8f6838727b70bae72c2b4'
 14 | p5
 15 | stp6
 16 | Rp7
 17 | ag2
 18 | (I1048576
 19 | I2097152
 20 | (dp8
 21 | g4
 22 | S'd1f6b8a428f3244126f82be0a2b0935d'
 23 | p9
 24 | stp10
 25 | Rp11
 26 | ag2
 27 | (I2097152
 28 | I3145728
 29 | (dp12
 30 | g4
 31 | S'42d333e2cced5dc15faba0ca5ef122d5'
 32 | p13
 33 | stp14
 34 | Rp15
 35 | ag2
 36 | (I3145728
 37 | I4194304
 38 | (dp16
 39 | g4
 40 | S'e3551f0661c5c17354a9729d8662383a'
 41 | p17
 42 | stp18
 43 | Rp19
 44 | ag2
 45 | (I4194304
 46 | I5242880
 47 | (dp20
 48 | g4
 49 | S'e38664c435365f5a24408639e7418093'
 50 | p21
 51 | stp22
 52 | Rp23
 53 | ag2
 54 | (I5242880
 55 | I6291456
 56 | (dp24
 57 | g4
 58 | S'250eef3445b553a91902aee338ff4ac4'
 59 | p25
 60 | stp26
 61 | Rp27
 62 | ag2
 63 | (I6291456
 64 | I7340032
 65 | (dp28
 66 | g4
 67 | S'd029fdc9cf26c7f6483bb4d39a36d848'
 68 | p29
 69 | stp30
 70 | Rp31
 71 | ag2
 72 | (I7340032
 73 | I8388608
 74 | (dp32
 75 | g4
 76 | S'f2d9dc657140e23354beeff8a56a9052'
 77 | p33
 78 | stp34
 79 | Rp35
 80 | ag2
 81 | (I8388608
 82 | I9437184
 83 | (dp36
 84 | g4
 85 | S'21352044be69cbd84c4a724957f596d7'
 86 | p37
 87 | stp38
 88 | Rp39
 89 | ag2
 90 | (I9437184
 91 | I9956847
 92 | (dp40
 93 | g4
 94 | S'47b7380df258324a491e9cf284778f98'
 95 | p41
 96 | stp42
 97 | Rp43
 98 | atp44
 99 | Rp45
100 | .


--------------------------------------------------------------------------------
/inst/extdata/methy/0ae7/jhu-usc.H.8.l.I.TCGA-30-1714-01A-02D-0563-05.g.txt:
--------------------------------------------------------------------------------
 1 | Composite Element REF	Beta_value	Chromosome	Start	End	Gene_Symbol	Gene_Type	Transcript_ID	Position_to_TSS	CGI_Coordinate	Feature_Type
 2 | cg00000292	0.771395097495164	chr16	28878779	28878780	ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000357084.6;ENST00000395503.7;ENST00000536376.4;ENST00000562185.4;ENST00000563975.1	373;290;-1275;-465;-83	CGI:chr16:28879633-28880547	N_Shore
 3 | cg00002426	0.0233901750159108	chr3	57757816	57757817	SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000295951.6;ENST00000295952.6;ENST00000383718.6;ENST00000428312.4;ENST00000449503.5;ENST00000467901.1	1585;368;261;257;257;514	CGI:chr3:57756198-57757263	S_Shore
 4 | cg00003994	0.028704838704695	chr7	15686237	15686238	MEOX2	protein_coding	ENST00000262041.5	576	CGI:chr7:16399497-16399700	.
 5 | cg00005847	0.903451857318154	chr2	176164345	176164346	AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5	protein_coding;protein_coding;protein_coding;protein_coding;antisense	ENST00000468418.4;ENST00000249440.4;ENST00000410016.4;ENST00000432796.2;ENST00000608941.1	13259;267;3453;27387;1372	CGI:chr2:176164685-176165509	N_Shore
 6 | cg00006414	0.2148836012527	chr7	149125745	149125746	RN7SL521P;ZNF398;ZNF425;ZNF425	misc_RNA;protein_coding;protein_coding;protein_coding	ENST00000488398.3;ENST00000426851.5;ENST00000378061.5;ENST00000483014.1	242;-672;602;562	CGI:chr7:149126122-149127136	N_Shore
 7 | cg00007981	0.00887051985245265	chr11	94129428	94129429	PANX1;PANX1	protein_coding;protein_coding	ENST00000227638.6;ENST00000436171.2	499;498	CGI:chr11:94128394-94129607	Island
 8 | cg00008493	0.985460674026316	chr14	93347431	93347432	COX8C;UNC79	protein_coding;protein_coding	ENST00000342144.2;ENST00000256339.7	239;14211	CGI:chr14:93347137-93347765	Island
 9 | cg00008713	0.0102627571258565	chr18	11980954	11980955	IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000269159.6;ENST00000383376.8;ENST00000588752.4;ENST00000588927.4;ENST00000589238.4;ENST00000590107.4;ENST00000590138.1;ENST00000625802.1	-475;-563;-72;-717;-84;-512;-658;-475	CGI:chr18:11980484-11982143	Island
10 | cg00009407	0.00664922618318322	chr14	88824577	88824578	TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000338104.9;ENST00000345383.8;ENST00000346301.7;ENST00000354441.9;ENST00000358622.8;ENST00000380656.5;ENST00000536576.4;ENST00000553718.1;ENST00000554686.4;ENST00000555057.4;ENST00000556077.4;ENST00000556567.4;ENST00000556651.4;ENST00000614125.3;ENST00000622513.3	-80;423;-80;-84;423;-86;423;-18;-163;-96;423;-75;-109;423;-48	CGI:chr14:88824574-88825011	Island
11 | cg00010193	0.624378233369569	chr4	1151428	1151429	AC092535.3;TMED11P	antisense;unitary_pseudogene	ENST00000417557.1;ENST00000479478.4	55;2299	CGI:chr4:1144210-1146033	.
12 | cg00011459	0.916476198575747	chr16	8796568	8796569	PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;TMEM186;TMEM186	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000268261.7;ENST00000562318.4;ENST00000562448.1;ENST00000564030.4;ENST00000564069.1;ENST00000565221.4;ENST00000565896.4;ENST00000566196.4;ENST00000566540.4;ENST00000566604.4;ENST00000566983.4;ENST00000568602.4;ENST00000569958.4;ENST00000570076.4;ENST00000570134.4;ENST00000333050.6;ENST00000564869.1	-1250;-1275;-1275;-1254;-1345;-1300;-1275;-1272;-1275;-1270;7744;-1275;-1300;-1283;-1271;1081;1075	CGI:chr16:8797465-8798071	N_Shore
13 | cg00012199	0.00897505734686159	chr14	20682865	20682866	ANG;RNASE4	protein_coding;protein_coding	ENST00000336811.9;ENST00000555835.2	-1313;-1236	CGI:chr14:20682759-20683065	Island
14 | cg00012386	0.00783581050121143	chr1	227734811	227734812	JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000366758.6;ENST00000438896.2;ENST00000465251.1;ENST00000480590.1;ENST00000485807.1;ENST00000615711.3;ENST00000620518.3;ENST00000315781.8;ENST00000366759.7;ENST00000366760.4;ENST00000418653.4;ENST00000426344.4;ENST00000470038.1;ENST00000475930.1;ENST00000491439.1;ENST00000617596.3	601;601;-439;266;-433;601;473;-186;-186;6153;-621;-626;-1363;6192;6103;-643	CGI:chr1:227734942-227735730	N_Shore
15 | cg00012792	0.00972612443887198	chr6	8064260	8064261	BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5-TXNDC5;EEF1E1-BLOC1S5	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000244777.5;ENST00000397457.5;ENST00000543936.4;ENST00000627748.1;ENST00000439343.2;ENST00000397456.2	134;155;155;137;105;38271	CGI:chr6:8063813-8064532	Island


--------------------------------------------------------------------------------
/inst/extdata/methy/0ae7/logs/file3.parcel:
--------------------------------------------------------------------------------
  1 | cintervaltree.intervaltree
  2 | IntervalTree
  3 | p0
  4 | ((lp1
  5 | cintervaltree.interval
  6 | Interval
  7 | p2
  8 | (I0
  9 | I1048576
 10 | (dp3
 11 | S'md5sum'
 12 | p4
 13 | S'aead104e6ece37828f89eb77288fe918'
 14 | p5
 15 | stp6
 16 | Rp7
 17 | ag2
 18 | (I1048576
 19 | I2097152
 20 | (dp8
 21 | g4
 22 | S'0773e3100764340b3cc404950897a6fb'
 23 | p9
 24 | stp10
 25 | Rp11
 26 | ag2
 27 | (I2097152
 28 | I3145728
 29 | (dp12
 30 | g4
 31 | S'86ebefee3b2dfc27c2d0b5c9117c971d'
 32 | p13
 33 | stp14
 34 | Rp15
 35 | ag2
 36 | (I3145728
 37 | I4194304
 38 | (dp16
 39 | g4
 40 | S'c05863055e7581a14e6be321d08a2511'
 41 | p17
 42 | stp18
 43 | Rp19
 44 | ag2
 45 | (I4194304
 46 | I5242880
 47 | (dp20
 48 | g4
 49 | S'3fbdb5a920e9f2796cb6dbe7e5485bd7'
 50 | p21
 51 | stp22
 52 | Rp23
 53 | ag2
 54 | (I5242880
 55 | I6291456
 56 | (dp24
 57 | g4
 58 | S'2dfd8445618501c7100903d44dfe0ab5'
 59 | p25
 60 | stp26
 61 | Rp27
 62 | ag2
 63 | (I6291456
 64 | I7340032
 65 | (dp28
 66 | g4
 67 | S'38e743a0a65c8f211f9199cd85ea97db'
 68 | p29
 69 | stp30
 70 | Rp31
 71 | ag2
 72 | (I7340032
 73 | I8388608
 74 | (dp32
 75 | g4
 76 | S'c030bea01e6543263fdf62e8e3504edd'
 77 | p33
 78 | stp34
 79 | Rp35
 80 | ag2
 81 | (I8388608
 82 | I9437184
 83 | (dp36
 84 | g4
 85 | S'a43d34869fe5ba12c29279dd5bb4c9ba'
 86 | p37
 87 | stp38
 88 | Rp39
 89 | ag2
 90 | (I9437184
 91 | I9958646
 92 | (dp40
 93 | g4
 94 | S'1d1cc80e21505dd920dd473d6cda718f'
 95 | p41
 96 | stp42
 97 | Rp43
 98 | atp44
 99 | Rp45
100 | .


--------------------------------------------------------------------------------
/inst/extdata/methy/0b32/jhu-usc.e.H.5.l.TCGA-13-1510-01A-02D-0475-05.gdc_hg38.txt:
--------------------------------------------------------------------------------
 1 | Composite Element REF	Beta_value	Chromosome	Start	End	Gene_Symbol	Gene_Type	Transcript_ID	Position_to_TSS	CGI_Coordinate	Feature_Type
 2 | cg00000292	0.930908590439207	chr16	28878779	28878780	ATP2A1;ATP2A1;ATP2A1;ATP2A1;ATP2A1	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000357084.6;ENST00000395503.7;ENST00000536376.4;ENST00000562185.4;ENST00000563975.1	373;290;-1275;-465;-83	CGI:chr16:28879633-28880547	N_Shore
 3 | cg00002426	0.104268079362755	chr3	57757816	57757817	SLMAP;SLMAP;SLMAP;SLMAP;SLMAP;SLMAP	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000295951.6;ENST00000295952.6;ENST00000383718.6;ENST00000428312.4;ENST00000449503.5;ENST00000467901.1	1585;368;261;257;257;514	CGI:chr3:57756198-57757263	S_Shore
 4 | cg00003994	0.0401792073366475	chr7	15686237	15686238	MEOX2	protein_coding	ENST00000262041.5	576	CGI:chr7:16399497-16399700	.
 5 | cg00005847	0.780428197539084	chr2	176164345	176164346	AC009336.19;HOXD3;HOXD3;HOXD3;RP11-387A1.5	protein_coding;protein_coding;protein_coding;protein_coding;antisense	ENST00000468418.4;ENST00000249440.4;ENST00000410016.4;ENST00000432796.2;ENST00000608941.1	13259;267;3453;27387;1372	CGI:chr2:176164685-176165509	N_Shore
 6 | cg00006414	0.780428197539084	chr7	149125745	149125746	RN7SL521P;ZNF398;ZNF425;ZNF425	misc_RNA;protein_coding;protein_coding;protein_coding	ENST00000488398.3;ENST00000426851.5;ENST00000378061.5;ENST00000483014.1	242;-672;602;562	CGI:chr7:149126122-149127136	N_Shore
 7 | cg00007981	0.0220541697023661	chr11	94129428	94129429	PANX1;PANX1	protein_coding;protein_coding	ENST00000227638.6;ENST00000436171.2	499;498	CGI:chr11:94128394-94129607	Island
 8 | cg00008493	0.976977997665785	chr14	93347431	93347432	COX8C;UNC79	protein_coding;protein_coding	ENST00000342144.2;ENST00000256339.7	239;14211	CGI:chr14:93347137-93347765	Island
 9 | cg00008713	0.0150960841510609	chr18	11980954	11980955	IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2;IMPA2	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000269159.6;ENST00000383376.8;ENST00000588752.4;ENST00000588927.4;ENST00000589238.4;ENST00000590107.4;ENST00000590138.1;ENST00000625802.1	-475;-563;-72;-717;-84;-512;-658;-475	CGI:chr18:11980484-11982143	Island
10 | cg00009407	0.0881331292749216	chr14	88824577	88824578	TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8;TTC8	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000338104.9;ENST00000345383.8;ENST00000346301.7;ENST00000354441.9;ENST00000358622.8;ENST00000380656.5;ENST00000536576.4;ENST00000553718.1;ENST00000554686.4;ENST00000555057.4;ENST00000556077.4;ENST00000556567.4;ENST00000556651.4;ENST00000614125.3;ENST00000622513.3	-80;423;-80;-84;423;-86;423;-18;-163;-96;423;-75;-109;423;-48	CGI:chr14:88824574-88825011	Island
11 | cg00010193	0.630671624077345	chr4	1151428	1151429	AC092535.3;TMED11P	antisense;unitary_pseudogene	ENST00000417557.1;ENST00000479478.4	55;2299	CGI:chr4:1144210-1146033	.
12 | cg00011459	0.885521863234167	chr16	8796568	8796569	PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;PMM2;TMEM186;TMEM186	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000268261.7;ENST00000562318.4;ENST00000562448.1;ENST00000564030.4;ENST00000564069.1;ENST00000565221.4;ENST00000565896.4;ENST00000566196.4;ENST00000566540.4;ENST00000566604.4;ENST00000566983.4;ENST00000568602.4;ENST00000569958.4;ENST00000570076.4;ENST00000570134.4;ENST00000333050.6;ENST00000564869.1	-1250;-1275;-1275;-1254;-1345;-1300;-1275;-1272;-1275;-1270;7744;-1275;-1300;-1283;-1271;1081;1075	CGI:chr16:8797465-8798071	N_Shore
13 | cg00012199	0.0341206451263948	chr14	20682865	20682866	ANG;RNASE4	protein_coding;protein_coding	ENST00000336811.9;ENST00000555835.2	-1313;-1236	CGI:chr14:20682759-20683065	Island
14 | cg00012386	0.0116998743835203	chr1	227734811	227734812	JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;JMJD4;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47;SNAP47	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000366758.6;ENST00000438896.2;ENST00000465251.1;ENST00000480590.1;ENST00000485807.1;ENST00000615711.3;ENST00000620518.3;ENST00000315781.8;ENST00000366759.7;ENST00000366760.4;ENST00000418653.4;ENST00000426344.4;ENST00000470038.1;ENST00000475930.1;ENST00000491439.1;ENST00000617596.3	601;601;-439;266;-433;601;473;-186;-186;6153;-621;-626;-1363;6192;6103;-643	CGI:chr1:227734942-227735730	N_Shore
15 | cg00012792	0.0149194095384671	chr6	8064260	8064261	BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5;BLOC1S5-TXNDC5;EEF1E1-BLOC1S5	protein_coding;protein_coding;protein_coding;protein_coding;protein_coding;protein_coding	ENST00000244777.5;ENST00000397457.5;ENST00000543936.4;ENST00000627748.1;ENST00000439343.2;ENST00000397456.2	134;155;155;137;105;38271	CGI:chr6:8063813-8064532	Island


--------------------------------------------------------------------------------
/inst/extdata/methy/0b32/logs/file4.parcel:
--------------------------------------------------------------------------------
  1 | cintervaltree.intervaltree
  2 | IntervalTree
  3 | p0
  4 | ((lp1
  5 | cintervaltree.interval
  6 | Interval
  7 | p2
  8 | (I0
  9 | I1048576
 10 | (dp3
 11 | S'md5sum'
 12 | p4
 13 | S'efa0b12c00fcd025f9bb24b4bbafdc26'
 14 | p5
 15 | stp6
 16 | Rp7
 17 | ag2
 18 | (I1048576
 19 | I2097152
 20 | (dp8
 21 | g4
 22 | S'86c446ea359acb897b2b622d6d8ff3b8'
 23 | p9
 24 | stp10
 25 | Rp11
 26 | ag2
 27 | (I2097152
 28 | I3145728
 29 | (dp12
 30 | g4
 31 | S'6bae43a0c5c6de9d628114a4987eee28'
 32 | p13
 33 | stp14
 34 | Rp15
 35 | ag2
 36 | (I3145728
 37 | I4194304
 38 | (dp16
 39 | g4
 40 | S'0703a5f69cd342d85bc0dffe4daf0432'
 41 | p17
 42 | stp18
 43 | Rp19
 44 | ag2
 45 | (I4194304
 46 | I5242880
 47 | (dp20
 48 | g4
 49 | S'07b7d7041ec060fdfa4b35fb73b0324b'
 50 | p21
 51 | stp22
 52 | Rp23
 53 | ag2
 54 | (I5242880
 55 | I6291456
 56 | (dp24
 57 | g4
 58 | S'42c174280504a41536414c7c4119ebca'
 59 | p25
 60 | stp26
 61 | Rp27
 62 | ag2
 63 | (I6291456
 64 | I7340032
 65 | (dp28
 66 | g4
 67 | S'4643bfaad70e5fd1e50df13889207d86'
 68 | p29
 69 | stp30
 70 | Rp31
 71 | ag2
 72 | (I7340032
 73 | I8388608
 74 | (dp32
 75 | g4
 76 | S'803aeea62bad0d6a31e63f3e6647e9c4'
 77 | p33
 78 | stp34
 79 | Rp35
 80 | ag2
 81 | (I8388608
 82 | I9437184
 83 | (dp36
 84 | g4
 85 | S'86fe78bd9182ab0920b0e3d2882206f6'
 86 | p37
 87 | stp38
 88 | Rp39
 89 | ag2
 90 | (I9437184
 91 | I9944322
 92 | (dp40
 93 | g4
 94 | S'052e8964fbc18d41c883c15feceae565'
 95 | p41
 96 | stp42
 97 | Rp43
 98 | atp44
 99 | Rp45
100 | .


--------------------------------------------------------------------------------
/man/GSE66705_sample2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{GSE66705_sample2}
 5 | \alias{GSE66705_sample2}
 6 | \title{a matrix of gene expression data in GEO}
 7 | \format{
 8 | A matrix with 999 rows and 3 column
 9 | }
10 | \usage{
11 | GSE66705_sample2
12 | }
13 | \description{
14 | the first column represents the gene symbol
15 | }
16 | \details{
17 | the other columns represent the expression of genes
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/Merge_methy_tcga.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Merge_methylation.R
 3 | \name{Merge_methy_tcga}
 4 | \alias{Merge_methy_tcga}
 5 | \title{Merge methylation data downloaded from TCGA}
 6 | \usage{
 7 | Merge_methy_tcga(dirr = NULL)
 8 | }
 9 | \arguments{
10 | \item{dirr}{a string for the directory of methylation data download from tcga
11 | useing the tools gdc}
12 | }
13 | \value{
14 | a matrix, a combined methylation expression spectrum matrix
15 | }
16 | \description{
17 | When the methylation data is downloaded from TCGA, 
18 | each sample is saved in a folder, which contains the methylation value file 
19 | and the descriptive file. This function can directly 
20 | extract and consolidate all folders.
21 | }
22 | \examples{
23 | merge_result <- Merge_methy_tcga(system.file(file.path("extdata", "methy"),
24 |     package = "GeoTcgaData"))
25 | }
26 | 


--------------------------------------------------------------------------------
/man/SNP_QC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SNP.R
 3 | \name{SNP_QC}
 4 | \alias{SNP_QC}
 5 | \title{Do quality control of SNP data downloaded from TCGAbiolinks}
 6 | \usage{
 7 | SNP_QC(
 8 |   snpData,
 9 |   geon = 0.02,
10 |   mind = 0.02,
11 |   maf = 0.05,
12 |   hwe = 1e-06,
13 |   miss = "NoCall"
14 | )
15 | }
16 | \arguments{
17 | \item{snpData}{data.frame of SNP data downloaded from TCGAbiolinks}
18 | 
19 | \item{geon}{filters out all variants with missing call rates
20 | exceeding the provided value (default 0.02) to be removed}
21 | 
22 | \item{mind}{filters out all samples with missing call rates exceeding
23 | the provided value (default 0.02) to be removed}
24 | 
25 | \item{maf}{filters out all variants with minor allele frequency below
26 | the provided threshold}
27 | 
28 | \item{hwe}{filters out all variants which have Hardy-Weinberg
29 | equilibrium exact test p-value below the provided threshold}
30 | 
31 | \item{miss}{character of miss value}
32 | }
33 | \value{
34 | data.frame
35 | }
36 | \description{
37 | Do quality control of SNP data downloaded from TCGAbiolinks
38 | }
39 | \examples{
40 | # use demo data
41 | snpDf <- matrix(sample(c("AA", "Aa", "aa"), 100, replace = TRUE), 10, 10)
42 | snpDf <- as.data.frame(snpDf)
43 | sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
44 | result <- SNP_QC(snpDf)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/array_preprocess.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arrayDiff.R
 3 | \name{array_preprocess}
 4 | \alias{array_preprocess}
 5 | \title{Preprocess of Microarray data}
 6 | \usage{
 7 | array_preprocess(x, missing_value = "knn", string = " /// ")
 8 | }
 9 | \arguments{
10 | \item{x}{matrix of Microarray data, each column is a sample, 
11 | and each row is a gene.}
12 | 
13 | \item{missing_value}{Method to  impute missing expression data,
14 | one of "zero" and "knn".}
15 | 
16 | \item{string}{a string, sep of the gene}
17 | }
18 | \value{
19 | matrix
20 | }
21 | \description{
22 | Preprocess of Microarray data
23 | }
24 | \examples{
25 | \donttest{
26 | arraylist <- get_geo_array("GSE781")
27 | arraylist <- lapply(arraylist, array_preprocess)
28 | }
29 | }
30 | 


--------------------------------------------------------------------------------
/man/cal_mean_module.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_mean_module.R
 3 | \name{cal_mean_module}
 4 | \alias{cal_mean_module}
 5 | \title{Find the mean value of the gene in each module}
 6 | \usage{
 7 | cal_mean_module(geneExpress, module)
 8 | }
 9 | \arguments{
10 | \item{geneExpress}{a data.frame of gene expression data. 
11 | Each column is a sample, and each row is a gene.}
12 | 
13 | \item{module}{a data.frame of two column. The first column is module name, 
14 | the second column are genes in this module.}
15 | }
16 | \value{
17 | a data.frame, means the mean of gene expression value in
18 | the same module
19 | }
20 | \description{
21 | Find the mean value of the gene in each module
22 | }
23 | \examples{
24 | data(geneExpress)
25 | data(module)
26 | result <- cal_mean_module(geneExpress, module)
27 | }
28 | 


--------------------------------------------------------------------------------
/man/cluster_array.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arrayDiff.R
 3 | \name{cluster_array}
 4 | \alias{cluster_array}
 5 | \title{cluster probes of Microarray data}
 6 | \usage{
 7 | cluster_array(x, clusterCutoff = 0.7)
 8 | }
 9 | \arguments{
10 | \item{x}{matrix of Microarray data, the first is the name of the gene, 
11 | and the others are the expression value.}
12 | 
13 | \item{clusterCutoff}{Pearson correlation threshold 
14 | to cut off the hierarchical tree.}
15 | }
16 | \value{
17 | data.frame
18 | }
19 | \description{
20 | cluster probes of Microarray data
21 | }
22 | \examples{
23 | \donttest{
24 | arraylist <- get_geo_array("GSE781")
25 | arraylist <- lapply(arraylist, array_preprocess)
26 | arraylist_cluster <- lapply(arraylist, cluster_array)
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/man/combine_pvalue.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SNP.R
 3 | \name{combine_pvalue}
 4 | \alias{combine_pvalue}
 5 | \title{combine pvalues of SNP difference analysis result}
 6 | \usage{
 7 | combine_pvalue(snpResult, snp2gene, combineMethod = min)
 8 | }
 9 | \arguments{
10 | \item{snpResult}{data.frame of SNP difference analysis result.}
11 | 
12 | \item{snp2gene}{data frame of two column: snp and gene.}
13 | 
14 | \item{combineMethod}{Method of combining the
15 | pvalue of multiple snp in a gene.}
16 | }
17 | \value{
18 | data.frame
19 | }
20 | \description{
21 | combine pvalues of SNP difference analysis result
22 | }
23 | \examples{
24 | snpResult <- data.frame(pvalue = runif(100), estimate = runif(100))
25 | rownames(snpResult) <- paste0("snp", seq_len(100))
26 | snp2gene <- data.frame(snp = rownames(snpResult), 
27 |     gene = rep(paste0("gene", seq_len(20)), 5))
28 | result <- combine_pvalue(snpResult, snp2gene)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/countToFpkm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fpkm_count_conversion.r
 3 | \name{countToFpkm}
 4 | \alias{countToFpkm}
 5 | \title{Convert count to FPKM}
 6 | \usage{
 7 | countToFpkm(counts_matrix, keyType = "SYMBOL", gene_cov)
 8 | }
 9 | \arguments{
10 | \item{counts_matrix}{a matrix, colnames of counts_matrix are sample name,
11 | rownames of counts_matrix are gene symbols}
12 | 
13 | \item{keyType}{keyType, one of keytypes(org.Hs.eg.db).}
14 | 
15 | \item{gene_cov}{data.frame of two column, the first column is gene length, 
16 | the second column is gene GC content}
17 | }
18 | \value{
19 | a matrix
20 | }
21 | \description{
22 | Convert count to FPKM
23 | }
24 | \examples{
25 | data(gene_cov)
26 | lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
27 | rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
28 | colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
29 | result <- countToFpkm(lung_squ_count2,
30 |     keyType = "SYMBOL",
31 |     gene_cov = gene_cov
32 | )
33 | }
34 | 


--------------------------------------------------------------------------------
/man/countToTpm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fpkm_count_conversion.r
 3 | \name{countToTpm}
 4 | \alias{countToTpm}
 5 | \title{Convert count to Tpm}
 6 | \usage{
 7 | countToTpm(counts_matrix, keyType = "SYMBOL", gene_cov)
 8 | }
 9 | \arguments{
10 | \item{counts_matrix}{a matrix, colnames of counts_matrix are sample name,
11 | rownames of counts_matrix are gene symbols}
12 | 
13 | \item{keyType}{keyType, one of keytypes(org.Hs.eg.db).}
14 | 
15 | \item{gene_cov}{data.frame of two column, the first column is gene length, 
16 | the second column is gene GC content}
17 | }
18 | \value{
19 | a matrix
20 | }
21 | \description{
22 | Convert count to Tpm
23 | }
24 | \examples{
25 | data(gene_cov)
26 | lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
27 | rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
28 | colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
29 | result <- countToTpm(lung_squ_count2,
30 |     keyType = "SYMBOL",
31 |     gene_cov = gene_cov
32 | )
33 | }
34 | 


--------------------------------------------------------------------------------
/man/differential_RNA.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/RNA_seq.R
  3 | \name{differential_RNA}
  4 | \alias{differential_RNA}
  5 | \title{differential_RNA}
  6 | \usage{
  7 | differential_RNA(
  8 |   counts,
  9 |   group,
 10 |   groupCol,
 11 |   method = "limma",
 12 |   geneLength = NULL,
 13 |   gccontent = NULL,
 14 |   filter = TRUE,
 15 |   edgeRNorm = TRUE,
 16 |   adjust.method = "BH",
 17 |   useTopconfects = TRUE,
 18 |   ucscData = FALSE
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{counts}{a dataframe or numeric matrix of raw counts data, 
 23 | or SummarizedExperiment object}
 24 | 
 25 | \item{group}{sample groups}
 26 | 
 27 | \item{groupCol}{group column}
 28 | 
 29 | \item{method}{one of "DESeq2", "edgeR" , "limma", "dearseq",
 30 | "NOISeq", "Wilcoxon", and "auto".}
 31 | 
 32 | \item{geneLength}{a vector of gene length.}
 33 | 
 34 | \item{gccontent}{a vector of gene GC content.}
 35 | 
 36 | \item{filter}{if TRUE, use filterByExpr to filter genes.}
 37 | 
 38 | \item{edgeRNorm}{if TRUE, use edgeR to do normalization for dearseq method.}
 39 | 
 40 | \item{adjust.method}{character string specifying the method used to
 41 | adjust p-values for multiple testing.
 42 | See \link{p.adjust} for possible values.}
 43 | 
 44 | \item{useTopconfects}{if TRUE, use topconfects to provide a
 45 | more biologically useful ranked gene list.}
 46 | 
 47 | \item{ucscData}{Logical, whether the data comes from UCSC Xena.}
 48 | }
 49 | \value{
 50 | data.frame
 51 | }
 52 | \description{
 53 | Do difference analysis of RNA-seq data
 54 | }
 55 | \examples{
 56 | \donttest{
 57 | library(TCGAbiolinks)
 58 | 
 59 | query <- GDCquery(
 60 |     project = "TCGA-ACC",
 61 |     data.category = "Transcriptome Profiling",
 62 |     data.type = "Gene Expression Quantification",
 63 |     workflow.type = "STAR - Counts"
 64 | )
 65 | 
 66 | GDCdownload(query,
 67 |     method = "api", files.per.chunk = 3,
 68 |     directory = Your_Path
 69 | )
 70 | 
 71 | dataRNA <- GDCprepare(
 72 |     query = query, directory = Your_Path,
 73 |     save = TRUE, save.filename = "dataRNA.RData"
 74 | )
 75 | ## get raw count matrix
 76 | dataPrep <- TCGAanalyze_Preprocessing(
 77 |     object = dataRNA,
 78 |     cor.cut = 0.6,
 79 |     datatype = "STAR - Counts"
 80 | )
 81 | 
 82 | # Use `differential_RNA` to do difference analysis.
 83 | # We provide the data of human gene length and GC content in `gene_cov`.
 84 | group <- sample(c("grp1", "grp2"), ncol(dataPrep), replace = TRUE)
 85 | library(cqn) # To avoid reporting errors: there is no function "rq"
 86 | ## get gene length and GC content
 87 | library(org.Hs.eg.db)
 88 | genes_bitr <- bitr(rownames(gene_cov),
 89 |     fromType = "ENTREZID", toType = "ENSEMBL",
 90 |     OrgDb = org.Hs.eg.db, drop = TRUE
 91 | )
 92 | genes_bitr <- genes_bitr[!duplicated(genes_bitr[, 2]), ]
 93 | gene_cov2 <- gene_cov[genes_bitr$ENTREZID, ]
 94 | rownames(gene_cov2) <- genes_bitr$ENSEMBL
 95 | genes <- intersect(rownames(dataPrep), rownames(gene_cov2))
 96 | dataPrep <- dataPrep[genes, ]
 97 | geneLength <- gene_cov2(genes, "length")
 98 | gccontent <- gene_cov2(genes, "GC")
 99 | names(geneLength) <- names(gccontent) <- genes
100 | ##    Difference analysis
101 | DEGAll <- differential_RNA(
102 |     counts = dataPrep, group = group,
103 |     geneLength = geneLength, gccontent = gccontent
104 | )
105 | # Use `clusterProfiler` to do enrichment analytics:
106 | diffGenes <- DEGAll$logFC
107 | names(diffGenes) <- rownames(DEGAll)
108 | diffGenes <- sort(diffGenes, decreasing = TRUE)
109 | library(clusterProfiler)
110 | library(enrichplot)
111 | library(org.Hs.eg.db)
112 | gsego <- gseGO(gene = diffGenes, OrgDb = org.Hs.eg.db, keyType = "ENSEMBL")
113 | dotplot(gsego)
114 | }
115 | # use user-defined data
116 | df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
117 | df <- as.data.frame(df)
118 | rownames(df) <- paste0("gene", 1:25)
119 | colnames(df) <- paste0("sample", 1:16)
120 | group <- sample(c("group1", "group2"), 16, replace = TRUE)
121 | result <- differential_RNA(counts = df, group = group,
122 |     filte = FALSE, method = "Wilcoxon")
123 | # use SummarizedExperiment object input
124 | df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
125 | rownames(df) <- paste0("gene", 1:25)
126 | colnames(df) <- paste0("sample", 1:16)
127 | group <- sample(c("group1", "group2"), 16, replace = TRUE)
128 | 
129 | nrows <- 200; ncols <- 20
130 |  counts <- matrix(
131 |    runif(nrows * ncols, 1, 1e4), nrows,
132 |    dimnames = list(paste0("cg",1:200),paste0("S",1:20))
133 | )
134 | 
135 | colData <- S4Vectors::DataFrame(
136 |   row.names = paste0("sample", 1:16),
137 |   group = group
138 | )
139 | data <- SummarizedExperiment::SummarizedExperiment(
140 |          assays=S4Vectors::SimpleList(counts=df),
141 |          colData = colData)
142 | 
143 | result <- differential_RNA(counts = data, groupCol = "group",
144 |     filte = FALSE, method = "Wilcoxon") 
145 | }
146 | 


--------------------------------------------------------------------------------
/man/differential_SNP.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SNP.R
 3 | \name{differential_SNP}
 4 | \alias{differential_SNP}
 5 | \title{Do difference analysis of SNP data}
 6 | \usage{
 7 | differential_SNP(snpDf, sampleGroup, combineMethod = min)
 8 | }
 9 | \arguments{
10 | \item{snpDf}{data.frame of SNP data, each column is a sample, 
11 | and each row is a SNP.}
12 | 
13 | \item{sampleGroup}{vector of sample group.}
14 | 
15 | \item{combineMethod}{Method of combining the
16 | pvalue of multiple snp in a gene.}
17 | }
18 | \value{
19 | data.frame
20 | }
21 | \description{
22 | Do difference analysis of SNP data
23 | }
24 | \examples{
25 | \donttest{
26 | library(TCGAbiolinks)
27 | query <- GDCquery(
28 |     project = "TCGA-CHOL",
29 |     data.category = "Simple Nucleotide Variation",
30 |     access = "open",
31 |     legacy = FALSE,
32 |     data.type = "Masked Somatic Mutation",
33 |     workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
34 | )
35 | GDCdownload(query)
36 | data_snp <- GDCprepare(query)
37 | samples <- unique(data_snp$Tumor_Sample_Barcode)
38 | sampleGroup <- sample(c("A", "B"), length(samples), replace = TRUE)
39 | names(sampleGroup) <- samples
40 | pvalue <- differential_SNP_tcga(snpData = data_snp, 
41 |     sampleGroup = sampleGroup)
42 | }
43 | # use demo data
44 | snpDf <- matrix(sample(c("mutation", NA), 100, replace = TRUE), 10, 10)
45 | snpDf <- as.data.frame(snpDf)
46 | sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
47 | result <- differential_SNP(snpDf, sampleGroup)
48 | }
49 | 


--------------------------------------------------------------------------------
/man/differential_SNP_GEO.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SNP.R
 3 | \name{differential_SNP_GEO}
 4 | \alias{differential_SNP_GEO}
 5 | \title{Do difference analysis of SNP data downloaded from GEO}
 6 | \usage{
 7 | differential_SNP_GEO(snpData, sampleGroup, method = "Chisquare")
 8 | }
 9 | \arguments{
10 | \item{snpData}{data.frame of SNP data downloaded from GEO}
11 | 
12 | \item{sampleGroup}{vector of sample group}
13 | 
14 | \item{method}{one of "Chisquare", "fisher",
15 | and "CATT"(Cochran-Armitage trend test)}
16 | }
17 | \value{
18 | data.frame
19 | }
20 | \description{
21 | Do difference analysis of SNP data downloaded from GEO
22 | }
23 | \examples{
24 | \donttest{
25 | file1 <- read.table("GSE66903_series_matrix.txt.gz",
26 |     fill=TRUE, comment.char="!", header = TRUE)
27 | rownames(file1) <- file1[, 1]
28 | snpData <- file1[, -1]
29 | sampleGroup <- sample(c("A", "B"), ncol(snpData ), replace = TRUE)
30 | names(sampleGroup) <- colnames(snpData)
31 | snpData <- SNP_QC(snpData)
32 | sampleGroup <- sample(c("A", "B"), ncol(snpData ), replace = TRUE)
33 | result1 <- differential_SNP_GEO(snpData = snpData,
34 |     sampleGroup = sampleGroup, method = "Chisquare")
35 | }
36 | # use demo data
37 | snpDf <- matrix(sample(c("AA", "Aa", "aa"), 100, replace = TRUE), 10, 10)
38 | snpDf <- as.data.frame(snpDf)
39 | sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
40 | result <- differential_SNP_GEO(snpDf, sampleGroup, method = "fisher")
41 | }
42 | 


--------------------------------------------------------------------------------
/man/differential_SNP_tcga.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SNP.R
 3 | \name{differential_SNP_tcga}
 4 | \alias{differential_SNP_tcga}
 5 | \title{Do difference analysis of SNP data downloaded from TCGAbiolinks}
 6 | \usage{
 7 | differential_SNP_tcga(snpData, sampleGroup, combineMethod = NULL)
 8 | }
 9 | \arguments{
10 | \item{snpData}{data.frame of SNP data downloaded from TCGAbiolinks}
11 | 
12 | \item{sampleGroup}{vector of sample group}
13 | 
14 | \item{combineMethod}{Method of combining the pvalue of
15 | multiple snp in a gene.}
16 | }
17 | \value{
18 | data.frame
19 | }
20 | \description{
21 | Do difference analysis of SNP data downloaded from TCGAbiolinks
22 | }
23 | \examples{
24 | \donttest{
25 | library(TCGAbiolinks)
26 | query <- GDCquery(
27 |     project = "TCGA-CHOL",
28 |     data.category = "Simple Nucleotide Variation",
29 |     access = "open",
30 |     legacy = FALSE,
31 |     data.type = "Masked Somatic Mutation",
32 |     workflow.type = "Aliquot Ensemble Somatic Variant Merging and Masking"
33 | )
34 | GDCdownload(query)
35 | data_snp <- GDCprepare(query)
36 | samples <- unique(data_snp$Tumor_Sample_Barcode)
37 | sampleGroup <- sample(c("A", "B"), length(samples), replace = TRUE)
38 | names(sampleGroup) <- samples
39 | pvalue <- differential_SNP_tcga(snpData = data_snp, 
40 |     sampleGroup = sampleGroup)
41 | }
42 | # use demo data
43 | snpDf <- matrix(sample(c("mutation", NA), 100, replace = TRUE), 10, 10)
44 | snpDf <- as.data.frame(snpDf)
45 | sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
46 | result <- differential_SNP(snpDf, sampleGroup)
47 | }
48 | 


--------------------------------------------------------------------------------
/man/differential_array.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arrayDiff.R
 3 | \name{differential_array}
 4 | \alias{differential_array}
 5 | \title{Differential analysis of Microarray data}
 6 | \usage{
 7 | differential_array(df, group, method = "limma", adjust.method = "BH")
 8 | }
 9 | \arguments{
10 | \item{df}{data.frame of the omic data, each column is a sample, 
11 | and each row is a gene.}
12 | 
13 | \item{group}{a vector, group of samples.}
14 | 
15 | \item{method}{method to do differential analysis, 
16 | one of "limma", "ttest", "wilcox".}
17 | 
18 | \item{adjust.method}{adjust.method, one of "holm", "hochberg", "hommel", 
19 | "bonferroni", "BH", "BY", "fdr", and "none".}
20 | }
21 | \value{
22 | data.frame
23 | }
24 | \description{
25 | Differential analysis of Microarray data
26 | }
27 | \examples{
28 | \donttest{
29 | library(GeoTcgaData)
30 | library(data.table)
31 | # Use real GEO data as example
32 | arrayData <- read.table("GSE54807_series_matrix.txt.gz",
33 |     sep = "\t", header = TRUE,
34 |         fill=TRUE, comment.char = "!", check.names=FALSE)
35 | gpl <- fread("GPL6244-17930.txt", sep = "\t", header = TRUE)
36 | gpl <- gpl[, c("ID", "gene_assignment")]
37 | class(gpl) <- "data.frame"
38 | 
39 | for (i in seq_len(nrow(gpl))) {
40 |         aa <- strsplit(gpl[i, 2], " // ")[[1]][5]
41 |         gpl[i, 2] <- as.character(strsplit(aa, " /// ")[[1]][1])
42 | }
43 | gpl[,1] <- as.character(gpl[,1])
44 | arrayData[, 1] <- as.character(arrayData[, 1])
45 | rownames(gpl) <- gpl[, 1]
46 | arrayData[, 1] <- gpl[arrayData[, 1], 2]
47 | 
48 | 
49 | arrayData <- repRemove(arrayData," /// ")
50 | 
51 | # Remove rows that do not correspond to genes
52 | arrayData <- arrayData[!is.na(arrayData[, 1]), ]
53 | arrayData <- arrayData[!arrayData[, 1] == "", ]
54 | arrayData <- arrayData[!arrayData[, 1] == "---", ]
55 | 
56 | 
57 | arrayData <- arrayData[order(arrayData[, 1]), ]
58 | arrayData <- gene_ave(arrayData, 1)
59 | 
60 | keep <- apply(arrayData, 1, function(x) sum(x < 1) < (length(x)/2))
61 | arrayData <- arrayData[keep, ]
62 | 
63 | group <- c(rep("group1", 12), rep("group2", 12))
64 | result <- differential_array(df = arrayData, group = group)
65 | }
66 | # Use random data as example
67 | arrayData <- matrix(runif(200), 25, 8)
68 | rownames(arrayData) <- paste0("gene", 1:25)
69 | colnames(arrayData) <- paste0("sample", 1:8)
70 | group <- c(rep("group1", 4), rep("group2", 4))
71 | result <- differential_array(df = arrayData, group = group)
72 | }
73 | 


--------------------------------------------------------------------------------
/man/differential_cnv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CNV.R
 3 | \name{differential_CNV}
 4 | \alias{differential_CNV}
 5 | \title{Do difference analysis of gene level copy number variation data}
 6 | \usage{
 7 | differential_CNV(
 8 |   cnvData,
 9 |   sampleGroup,
10 |   method = "Chisquare",
11 |   adjust.method = "BH",
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{cnvData}{data.frame of CNV data, each column is a sample, 
17 | and each row is a CNV.}
18 | 
19 | \item{sampleGroup}{vector of sample group}
20 | 
21 | \item{method}{method to do diffenenital analysis, 
22 | one of "Chisquare", "fisher",
23 | and "CATT"(Cochran-Armitage trend test)}
24 | 
25 | \item{adjust.method}{adjust.method, one of "holm", "hochberg", "hommel", 
26 | "bonferroni", "BH", "BY", "fdr", and "none".}
27 | 
28 | \item{...}{parameters for "Chisquare", "fisher",
29 | and "CATT"(Cochran-Armitage trend test)}
30 | }
31 | \value{
32 | data.frame with pvalue and estimate
33 | }
34 | \description{
35 | Do difference analysis of gene level copy number variation data
36 | }
37 | \examples{
38 | \donttest{
39 | # use TCGAbiolinks data as example
40 | library(TCGAbiolinks)
41 | query <- GDCquery(
42 |         project = "TCGA-ACC",
43 |         data.category = "Copy Number Variation",
44 |         data.type = "Gene Level Copy Number",
45 |         access = "open"
46 | )
47 | GDCdownload(query)
48 | cnvData <- GDCprepare(query)
49 | aa <- assays(cnvData)$copy_number
50 | bb <- aa
51 | aa[bb == 2] <- 0
52 | aa[bb < 2] <- -1
53 | aa[bb > 2] <- 1
54 | sampleGroup <- sample(c("A", "B"), ncol(cnvData), replace = TRUE)
55 | diffCnv <- differential_CNV(aa, sampleGroup)
56 | 
57 | # Use sangerbox CNV data as example
58 | cnvData <- fread("Merge_GeneLevelCopyNumber.txt")
59 | class(cnvData) <- "data.frame"
60 | rownames(cnvData) <- cnvData[, 1]
61 | cnvData <- cnvData[, -c(1, 2, 3)]
62 | sampleGroup <- sample(c("A", "B"), ncol(cnvData), replace = TRUE)
63 | diffCnv <- differential_CNV(cnvData, sampleGroup)
64 | }
65 | # use random data as example
66 | aa <- matrix(sample(c(0, 1, -1), 200, replace = TRUE), 25, 8)
67 | rownames(aa) <- paste0("gene", 1:25)
68 | colnames(aa) <- paste0("sample", 1:8)
69 | sampleGroup <- sample(c("A", "B"), ncol(aa), replace = TRUE)
70 | diffCnv <- differential_CNV(aa, sampleGroup)
71 | }
72 | 


--------------------------------------------------------------------------------
/man/differential_limma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Merge_methylation.R
 3 | \name{differential_limma}
 4 | \alias{differential_limma}
 5 | \title{differential_limma}
 6 | \usage{
 7 | differential_limma(df, group, adjust.method = "BH")
 8 | }
 9 | \arguments{
10 | \item{df}{data.frame of the omic data}
11 | 
12 | \item{group}{a vector, group of samples.}
13 | 
14 | \item{adjust.method}{adjust.method.}
15 | }
16 | \value{
17 | data.frame
18 | }
19 | \description{
20 | differential_limma
21 | }
22 | \examples{
23 | df <- matrix(runif(200), 25, 8)
24 | df <- as.data.frame(df)
25 | rownames(df) <- paste0("gene", 1:25)
26 | colnames(df) <- paste0("sample", 1:8)
27 | group <- sample(c("group1", "group2"), 8, replace = TRUE)
28 | result <- differential_limma(df = df, group = group)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/differential_methy.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/Merge_methylation.R
  3 | \name{differential_methy}
  4 | \alias{differential_methy}
  5 | \title{differential_methy}
  6 | \usage{
  7 | differential_methy(
  8 |   cpgData,
  9 |   sampleGroup,
 10 |   groupCol,
 11 |   combineMethod = "stouffer",
 12 |   missing_value = "knn",
 13 |   cpg2gene = NULL,
 14 |   normMethod = "PBC",
 15 |   region = "TSS1500",
 16 |   model = "gene",
 17 |   adjust.method = "BH",
 18 |   adjPvalCutoff = 0.05,
 19 |   ucscData = FALSE
 20 | )
 21 | }
 22 | \arguments{
 23 | \item{cpgData}{data.frame of cpg beta value, , or SummarizedExperiment object}
 24 | 
 25 | \item{sampleGroup}{vector of sample group}
 26 | 
 27 | \item{groupCol}{group column}
 28 | 
 29 | \item{combineMethod}{method to combine the cpg pvalues, 
 30 | a function or one of "stouffer", "fisher" and "rhoScores".}
 31 | 
 32 | \item{missing_value}{Method to impute missing expression data,
 33 | one of "zero" and "knn".}
 34 | 
 35 | \item{cpg2gene}{data.frame to annotate cpg locus to gene}
 36 | 
 37 | \item{normMethod}{Method to do normalization: "PBC" or "BMIQ".}
 38 | 
 39 | \item{region}{region of genes, one of "Body", "TSS1500", "TSS200",
 40 | "3'UTR", "1stExon", "5'UTR", and "IGR". Only used when cpg2gene is NULL.}
 41 | 
 42 | \item{model}{if "cpg", step1: calculate difference cpgs;
 43 | step2: calculate difference genes.
 44 | if "gene", step1: calculate the methylation level of genes;
 45 | step2: calculate difference genes.}
 46 | 
 47 | \item{adjust.method}{character string specifying the method
 48 | used to adjust p-values for multiple testing.
 49 | See \link{p.adjust} for possible values.}
 50 | 
 51 | \item{adjPvalCutoff}{adjusted pvalue cutoff}
 52 | 
 53 | \item{ucscData}{Logical, whether the data comes from UCSC Xena.}
 54 | }
 55 | \value{
 56 | data.frame
 57 | }
 58 | \description{
 59 | Get methylation difference gene
 60 | }
 61 | \examples{
 62 | \donttest{
 63 | # use TCGAbiolinks data
 64 | library(TCGAbiolinks)
 65 | query <- GDCquery(project = "TCGA-ACC",
 66 |     data.category = "DNA Methylation",
 67 |     data.type = "Methylation Beta Value",
 68 |     platform = "Illumina Human Methylation 450")
 69 | GDCdownload(query, method = "api", files.per.chunk = 5,
 70 |     directory = Your_Path)
 71 | merge_result <- Merge_methy_tcga(Your_Path_to_DNA_Methylation_data)
 72 | library(ChAMP) # To avoid reporting errors
 73 | differential_gene <- differential_methy(cpgData = merge_result,
 74 |     sampleGroup = sample(c("C","T"),
 75 |     ncol(merge_result[[1]]), replace = TRUE))
 76 | }
 77 | # use user defined data
 78 | library(ChAMP)
 79 | cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
 80 | rownames(cpgData) <- paste0("cpg", seq_len(200))
 81 | colnames(cpgData) <- paste0("sample", seq_len(10))
 82 | sampleGroup <- c(rep("group1", 5), rep("group2", 5))
 83 | names(sampleGroup) <- colnames(cpgData)
 84 | cpg2gene <- data.frame(cpg = rownames(cpgData), 
 85 |     gene = rep(paste0("gene", seq_len(20)), 10))
 86 | result <- differential_methy(cpgData, sampleGroup, 
 87 |     cpg2gene = cpg2gene, normMethod = NULL)
 88 | # use SummarizedExperiment object input
 89 | library(ChAMP)
 90 | cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
 91 | rownames(cpgData) <- paste0("cpg", seq_len(200))
 92 | colnames(cpgData) <- paste0("sample", seq_len(10))
 93 | sampleGroup <- c(rep("group1", 5), rep("group2", 5))
 94 | names(sampleGroup) <- colnames(cpgData)
 95 | cpg2gene <- data.frame(cpg = rownames(cpgData), 
 96 |     gene = rep(paste0("gene", seq_len(20)), 10))
 97 | colData <- S4Vectors::DataFrame(
 98 |     row.names = colnames(cpgData),
 99 |     group = sampleGroup
100 | )
101 | data <- SummarizedExperiment::SummarizedExperiment(
102 |          assays=S4Vectors::SimpleList(counts=cpgData),
103 |          colData = colData)
104 | result <- differential_methy(cpgData = data, 
105 |     groupCol = "group", normMethod = NULL, 
106 |     cpg2gene = cpg2gene)  
107 | }
108 | 


--------------------------------------------------------------------------------
/man/fpkmToTpm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fpkm_count_conversion.r
 3 | \name{fpkmToTpm}
 4 | \alias{fpkmToTpm}
 5 | \title{Convert fpkm to Tpm}
 6 | \usage{
 7 | fpkmToTpm(fpkm_matrix)
 8 | }
 9 | \arguments{
10 | \item{fpkm_matrix}{a matrix, colnames of fpkm_matrix are sample name,
11 | rownames of fpkm_matrix are genes}
12 | }
13 | \value{
14 | a matrix
15 | }
16 | \description{
17 | Convert fpkm to Tpm
18 | }
19 | \examples{
20 | lung_squ_count2 <- matrix(c(0.11, 0.22, 0.43, 0.14, 0.875,
21 |     0.66, 0.77, 0.18, 0.29), ncol = 3)
22 | rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
23 | colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
24 | result <- fpkmToTpm(lung_squ_count2)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/geneExpress.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{geneExpress}
 5 | \alias{geneExpress}
 6 | \title{a data.frame of gene expression data}
 7 | \format{
 8 | A data.frame with 10779 rows and 2 column
 9 | }
10 | \usage{
11 | geneExpress
12 | }
13 | \description{
14 | It is a randomly generated expression data 
15 | used as an example of functions in this package.
16 | the rowname is gene symbols
17 | the columns are gene expression values
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/gene_ave.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calculate_mean_profile.R
 3 | \name{gene_ave}
 4 | \alias{gene_ave}
 5 | \title{Average the values of same genes in gene expression profile}
 6 | \usage{
 7 | gene_ave(file_gene_ave, k = 1)
 8 | }
 9 | \arguments{
10 | \item{file_gene_ave}{a data.frame of gene expression data, 
11 | each column is a sample, and each row is a gene.}
12 | 
13 | \item{k}{a number, indicates which is the gene column.}
14 | }
15 | \value{
16 | a data.frame, the values of same genes in gene expression profile
17 | }
18 | \description{
19 | Average the values of same genes in gene expression profile
20 | }
21 | \examples{
22 | aa <- c("MARCH1", "MARC1", "MARCH1", "MARCH1", "MARCH1")
23 | bb <- c(2.969058399, 4.722410064, 8.165514853, 8.24243893, 8.60815086)
24 | cc <- c(3.969058399, 5.722410064, 7.165514853, 6.24243893, 7.60815086)
25 | file_gene_ave <- data.frame(aa = aa, bb = bb, cc = cc)
26 | colnames(file_gene_ave) <- c("Gene", "GSM1629982", "GSM1629983")
27 | 
28 | result <- gene_ave(file_gene_ave, 1)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/gene_cov.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{gene_cov}
 5 | \alias{gene_cov}
 6 | \title{a data.frame of gene length and GC content}
 7 | \format{
 8 | A data.frame with 27341 rows and 2 column
 9 | }
10 | \usage{
11 | gene_cov
12 | }
13 | \description{
14 | the gene length and GC content data comes from 
15 | TxDb.Hsapiens.UCSC.hg38.knownGene and
16 | BSgenome.Hsapiens.UCSC.hg38
17 | }
18 | \keyword{datasets}
19 | 


--------------------------------------------------------------------------------
/man/get_geo_array.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arrayDiff.R
 3 | \name{get_geo_array}
 4 | \alias{get_geo_array}
 5 | \title{Get Microarray matrix data from GEO}
 6 | \usage{
 7 | get_geo_array(gse)
 8 | }
 9 | \arguments{
10 | \item{gse}{GSE number, such as GSE781.}
11 | }
12 | \value{
13 | a list of matrix
14 | }
15 | \description{
16 | Get Microarray matrix data from GEO
17 | }
18 | \examples{
19 | \donttest{
20 | arraylist <- get_geo_array("GSE781")
21 | }
22 | }
23 | 


--------------------------------------------------------------------------------
/man/id_conversion_TCGA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TCGA_id_conversion.R
 3 | \name{id_conversion_TCGA}
 4 | \alias{id_conversion_TCGA}
 5 | \title{Convert ENSEMBL gene id to gene Symbol in TCGA}
 6 | \usage{
 7 | id_conversion_TCGA(profiles, toType = "SYMBOL")
 8 | }
 9 | \arguments{
10 | \item{profiles}{a data.frame of gene expression data, 
11 | each column is a sample, 
12 | and each row is a gene.}
13 | 
14 | \item{toType}{one of 'keytypes(org.Hs.eg.db)'}
15 | }
16 | \value{
17 | a data.frame, gene symbols and their expression value
18 | }
19 | \description{
20 | Convert ENSEMBL gene id to gene Symbol in TCGA
21 | }
22 | \examples{
23 | library(org.Hs.eg.db)
24 | data(profile)
25 | result <- id_conversion_TCGA(profile)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/kegg_liver.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{kegg_liver}
 5 | \alias{kegg_liver}
 6 | \title{a matrix of gene expression data in TCGA}
 7 | \format{
 8 | A matrix with 100 rows and 150 column
 9 | }
10 | \usage{
11 | kegg_liver
12 | }
13 | \description{
14 | It is a randomly generated expression data 
15 | used as an example of functions in this package.
16 | the first column represents the gene symbol
17 | }
18 | \details{
19 | the other columns represent the expression(count) of genes
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/module.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{module}
 5 | \alias{module}
 6 | \title{a matrix of module name, gene symbols, and the number of gene symbols}
 7 | \format{
 8 | A matrix with 176 rows and 3 column
 9 | }
10 | \usage{
11 | module
12 | }
13 | \description{
14 | It is a randomly generated expression data 
15 | used as an example of functions in this package.
16 | }
17 | \keyword{datasets}
18 | 


--------------------------------------------------------------------------------
/man/prepare_chi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prepareChi.r
 3 | \name{prepare_chi}
 4 | \alias{prepare_chi}
 5 | \title{Preparer file for chi-square test}
 6 | \usage{
 7 | prepare_chi(cnv)
 8 | }
 9 | \arguments{
10 | \item{cnv}{result of ann_merge()}
11 | }
12 | \value{
13 | a matrix
14 | }
15 | \description{
16 | Preparer file for chi-square test
17 | }
18 | \examples{
19 | cnv <- matrix(c(
20 |     -1.09150, -1.47120, -0.87050, -0.50880,
21 |     -0.50880, 2.0, 2.0, 2.0, 2.0, 2.0, 2.601962, 2.621332, 2.621332,
22 |     2.621332, 2.621332, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
23 |     2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
24 | ), nrow = 5)
25 | cnv <- as.data.frame(cnv)
26 | rownames(cnv) <- c("AJAP1", "FHAD1", "CLCNKB", "CROCCP2", "AL137798.3")
27 | colnames(cnv) <- c(
28 |     "TCGA-DD-A4NS-10A-01D-A30U-01", "TCGA-ED-A82E-01A-11D-A34Y-01",
29 |     "TCGA-WQ-A9G7-01A-11D-A36W-01", "TCGA-DD-AADN-01A-11D-A40Q-01",
30 |     "TCGA-ZS-A9CD-10A-01D-A36Z-01", "TCGA-DD-A1EB-11A-11D-A12Y-01"
31 | )
32 | cnv_chi_file <- prepare_chi(cnv)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/profile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{profile}
 5 | \alias{profile}
 6 | \title{a matrix of gene expression data in TCGA}
 7 | \format{
 8 | A matrix with 10 rows and 10 column
 9 | }
10 | \usage{
11 | profile
12 | }
13 | \description{
14 | It is a randomly generated expression data 
15 | used as an example of functions in this package.
16 | the first column represents the gene symbol
17 | }
18 | \details{
19 | the other columns represent the expression(FPKM) of genes
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/man/repAssign.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rep.R
 3 | \name{repAssign}
 4 | \alias{repAssign}
 5 | \title{Handle the case where one id corresponds to multiple genes}
 6 | \usage{
 7 | repAssign(input_file, string)
 8 | }
 9 | \arguments{
10 | \item{input_file}{input file, a data.frame or a matrix, 
11 | the first column should be genes.}
12 | 
13 | \item{string}{a string, sep of the gene}
14 | }
15 | \value{
16 | a data.frame, when an id corresponds to multiple genes,
17 | the expression value is assigned to each gene
18 | }
19 | \description{
20 | Handle the case where one id corresponds to multiple genes
21 | }
22 | \examples{
23 | aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
24 |     "MARCH3 /// MARCH4", "MARCH1")
25 | bb <- c("2.969058399", "4.722410064", "8.165514853",
26 |     "8.24243893", "8.60815086")
27 | cc <- c("3.969058399", "5.722410064", "7.165514853", 
28 |     "6.24243893", "7.60815086")
29 | input_file <- data.frame(aa = aa, bb = bb, cc = cc)
30 | 
31 | repAssign_result <- repAssign(input_file, " /// ")
32 | 
33 | }
34 | 


--------------------------------------------------------------------------------
/man/repRemove.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rep.R
 3 | \name{repRemove}
 4 | \alias{repRemove}
 5 | \title{Handle the case where one id corresponds to multiple genes}
 6 | \usage{
 7 | repRemove(input_file, string)
 8 | }
 9 | \arguments{
10 | \item{input_file}{input file, a data.frame or a matrix, 
11 | the first column should be genes.}
12 | 
13 | \item{string}{a string,sep of the gene}
14 | }
15 | \value{
16 | a data.frame, when an id corresponds to multiple genes,
17 | the expression value is deleted
18 | }
19 | \description{
20 | Handle the case where one id corresponds to multiple genes
21 | }
22 | \examples{
23 | aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
24 |     "MARCH3 /// MARCH4", "MARCH1")
25 | bb <- c("2.969058399", "4.722410064", "8.165514853", 
26 |     "8.24243893", "8.60815086")
27 | cc <- c("3.969058399", "5.722410064", "7.165514853", 
28 |     "6.24243893", "7.60815086")
29 | input_file <- data.frame(aa = aa, bb = bb, cc = cc)
30 | repRemove_result <- repRemove(input_file, " /// ")
31 | }
32 | 


--------------------------------------------------------------------------------
/man/ventricle.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{ventricle}
 5 | \alias{ventricle}
 6 | \title{a matrix of gene expression data in GEO}
 7 | \format{
 8 | A matrix with 32 rows and 20 column
 9 | }
10 | \usage{
11 | ventricle
12 | }
13 | \description{
14 | It is a randomly generated expression data 
15 | used as an example of functions in this package.
16 | the first column represents the gene symbol
17 | }
18 | \details{
19 | the other columns represent the expression of genes
20 | }
21 | \keyword{datasets}
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/tests.html
 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
 8 | 
 9 | library(testthat)
10 | library(GeoTcgaData)
11 | 
12 | test_check("GeoTcgaData")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/fpkmToTpm.R:
--------------------------------------------------------------------------------
1 | test_that("can parse example fpkmToTpm_matrix", {
2 |     lung_squ_count2 <- matrix(c(0.11, 0.22, 0.43, 0.14, 0.875,
3 |         0.66, 0.77, 0.18, 0.29), ncol = 3)
4 |     rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
5 |     colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
6 |     result <- fpkmToTpm(lung_squ_count2)
7 |     expect_equal(dim(lung_squ_count2), dim(result))
8 | })
9 | 


--------------------------------------------------------------------------------
/tests/testthat/test_Merge_methy_tcga.R:
--------------------------------------------------------------------------------
1 | test_that("can parse example Merge_methy_tcga", {
2 |     merge_result <- Merge_methy_tcga(system.file(file.path("extdata", "methy"), 
3 |       package = "GeoTcgaData"))
4 |     expect_equal(names(merge_result), c("methyResult", "cpg_info"))
5 | })


--------------------------------------------------------------------------------
/tests/testthat/test_arrayDiff.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example differential_array", {
 2 |     # skip_on_bioc()
 3 |     arrayData <- matrix(runif(200), 25, 8)
 4 |     rownames(arrayData) <- paste0("gene", 1:25)
 5 |     colnames(arrayData) <- paste0("sample", 1:8)
 6 |     group <- c(rep("group1", 4), rep("group2", 4))
 7 |     result <- differential_array(df = arrayData, group = group)
 8 |     expect_true( "P.Value"    %in% colnames(result))
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test_cal_mean_module.R:
--------------------------------------------------------------------------------
1 | test_that("can parse example cal_mean_module", {
2 |     data(geneExpress)
3 |     data(module)
4 |     result <- cal_mean_module(geneExpress, module)
5 |     expect_equal( ncol(result) , ncol(geneExpress))
6 | })


--------------------------------------------------------------------------------
/tests/testthat/test_countToFpkm.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example countToFpkm_matrix", {
 2 |     data(gene_cov)
 3 |     lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
 4 |     rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
 5 |     colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
 6 |     result <- countToFpkm(lung_squ_count2,
 7 |         keyType = "SYMBOL",
 8 |         gene_cov = gene_cov
 9 |     )
10 |     expect_equal(dim(lung_squ_count2), dim(result))
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_countToTpm.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example countToTpm_matrix", {
 2 |     data(gene_cov)
 3 |     lung_squ_count2 <- matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
 4 |     rownames(lung_squ_count2) <- c("DISC1", "TCOF1", "SPPL3")
 5 |     colnames(lung_squ_count2) <- c("sample1", "sample2", "sample3")
 6 |     result <- countToTpm(lung_squ_count2,
 7 |         keyType = "SYMBOL",
 8 |         gene_cov = gene_cov
 9 |     )
10 |     expect_equal(dim(lung_squ_count2), dim(result))
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_diff_RNA_ucsc.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example diff_RNA_ucsc", {
 2 |     skip_on_cran()
 3 |     df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
 4 |     df <- as.data.frame(df)
 5 |     rownames(df) <- paste0("gene", 1:25)
 6 |     colnames(df) <- paste0("sample", 1:16)
 7 |     df <- log2(df + 1)
 8 |     group <- sample(c("group1", "group2"), 16, replace = TRUE)
 9 |     df <- cbind(rownames(df), df)
10 |     result <- differential_RNA(counts = df, group = group, 
11 |         filte = FALSE, method = "limma", ucscData = TRUE)
12 |     expect_true( "P.Value"    %in% colnames(result))
13 | })
14 | 


--------------------------------------------------------------------------------
/tests/testthat/test_differential_CNV.R:
--------------------------------------------------------------------------------
1 | test_that("can parse example diff_CNV", {
2 |     aa <- matrix(sample(c(0, 1, -1), 200, replace = TRUE), 25, 8)
3 |     rownames(aa) <- paste0("gene", 1:25)
4 |     colnames(aa) <- paste0("sample", 1:8)
5 |     sampleGroup <- sample(c("A", "B"), ncol(aa), replace = TRUE)
6 |     diffCnv <- differential_CNV(aa, sampleGroup)
7 |     expect_true( "P.Value"  %in% colnames(diffCnv))
8 | })
9 | 


--------------------------------------------------------------------------------
/tests/testthat/test_differential_RNA.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example differential_RNA", {
 2 |     skip_on_cran()
 3 |     df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
 4 |     df <- as.data.frame(df)
 5 |     rownames(df) <- paste0("gene", 1:25)
 6 |     colnames(df) <- paste0("sample", 1:16)
 7 |     group <- sample(c("group1", "group2"), 16, replace = TRUE)
 8 |     result <- differential_RNA(counts = df, group = group, 
 9 |         filte = FALSE, method = "Wilcoxon")
10 |     expect_true( "P.Value"    %in% colnames(result))
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_differential_limma.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example Diff_limma", {
 2 |     df <- matrix(runif(200), 25, 8)
 3 |     df <- as.data.frame(df)
 4 |     rownames(df) <- paste0("gene", 1:25)
 5 |     colnames(df) <- paste0("sample", 1:8)
 6 |     group <- sample(c("group1", "group2"), 8, replace = TRUE)
 7 |     result <- differential_limma(df = df, group = group)
 8 |     expect_true( "P.Value"  %in% colnames(result))
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test_gene_ave.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example gene_ave", {
 2 |     aa <- c("MARCH1", "MARC1", "MARCH1", "MARCH1", "MARCH1")
 3 |     bb <- c(2.969058399, 4.722410064, 8.165514853, 8.24243893, 8.60815086)
 4 |     cc <- c(3.969058399, 5.722410064, 7.165514853, 6.24243893, 7.60815086)
 5 |     file_gene_ave <- data.frame(aa = aa, bb = bb, cc = cc)
 6 |     colnames(file_gene_ave) <- c("Gene", "GSM1629982", "GSM1629983")
 7 |     
 8 |     result <- gene_ave(file_gene_ave, 1)
 9 |     expect_equal(sort(unique(file_gene_ave[, 1])),  
10 |         sort(unique(rownames(result))))
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_prepare_chi.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example prepare_chi", {
 2 |     cnv <- matrix(c(
 3 |         -1.09150, -1.47120, -0.87050, -0.50880,
 4 |         -0.50880, 2.0, 2.0, 2.0, 2.0, 2.0, 2.601962, 2.621332, 2.621332,
 5 |         2.621332, 2.621332, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
 6 |         2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0
 7 |     ), nrow = 5)
 8 |     cnv <- as.data.frame(cnv)
 9 |     rownames(cnv) <- c("AJAP1", "FHAD1", "CLCNKB", "CROCCP2", "AL137798.3")
10 |     colnames(cnv) <- c(
11 |         "TCGA-DD-A4NS-10A-01D-A30U-01", "TCGA-ED-A82E-01A-11D-A34Y-01",
12 |         "TCGA-WQ-A9G7-01A-11D-A36W-01", "TCGA-DD-AADN-01A-11D-A40Q-01",
13 |         "TCGA-ZS-A9CD-10A-01D-A36Z-01", "TCGA-DD-A1EB-11A-11D-A12Y-01"
14 |     )
15 |     cnv_chi_file <- prepare_chi(cnv)
16 |     expect_true("normalCNV" %in% colnames(cnv_chi_file))
17 | })
18 | 


--------------------------------------------------------------------------------
/tests/testthat/test_repAssign.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example rep1", {
 2 |     aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
 3 |         "MARCH3 /// MARCH4", "MARCH1")
 4 |     bb <- c("2.969058399", "4.722410064", "8.165514853",
 5 |         "8.24243893", "8.60815086")
 6 |     cc <- c("3.969058399", "5.722410064", "7.165514853", 
 7 |         "6.24243893", "7.60815086")
 8 |     input_file <- data.frame(aa = aa, bb = bb, cc = cc)
 9 |     
10 |     repAssign_result <- repAssign(input_file, " /// ")
11 |     expect_true(!(" /// " %in% repAssign_result[, 1]))
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test_repRemove.R:
--------------------------------------------------------------------------------
 1 | test_that("can parse example rep1", {
 2 |     aa <- c("MARCH1 /// MMA", "MARC1", "MARCH2 /// MARCH3", 
 3 |         "MARCH3 /// MARCH4", "MARCH1")
 4 |     bb <- c("2.969058399", "4.722410064", "8.165514853", 
 5 |         "8.24243893", "8.60815086")
 6 |     cc <- c("3.969058399", "5.722410064", "7.165514853", 
 7 |         "6.24243893", "7.60815086")
 8 |     input_file <- data.frame(aa = aa, bb = bb, cc = cc)
 9 |     repRemove_result <- repRemove(input_file, " /// ")
10 |     expect_true(!(" /// " %in% repRemove_result[, 1]))
11 | })
12 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/GeoTcgaData.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "GeoTcgaData"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{GeoTcgaData}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | ---
  9 | 
 10 | ```{r, include = FALSE}
 11 | knitr::opts_chunk$set(
 12 |     collapse = TRUE,
 13 |     comment = "#>"
 14 | )
 15 | ```
 16 | --------
 17 | 
 18 | ## Authors
 19 | Erqiang Hu
 20 | 
 21 | Department of Bioinformatics, School of Basic Medical Sciences, 
 22 | Southern Medical University.
 23 | 
 24 | 
 25 | ## Introduction
 26 | GEO and TCGA provide us with a wealth of data, such as RNA-seq, DNA Methylation,
 27 | single nucleotide Variation and Copy number variation data. 
 28 | It's easy to download data from TCGA using the  gdc tool or `TCGAbiolinks`, 
 29 | and some software provides organized TCGA data, such as 
 30 | [UCSC Xena](http://xena.ucsc.edu/) , UCSCXenaTools, and 
 31 | [sangerbox](http://vip.sangerbox.com/), but processing these data into a format
 32 | suitable for bioinformatics  analysis requires more work. This R package was 
 33 | developed to handle these data.
 34 | 
 35 | ```{r setup}
 36 | library(GeoTcgaData)
 37 | ```
 38 | 
 39 | ## Example
 40 | 
 41 | This is a basic example which shows you how to solve a common problem:
 42 | 
 43 | ### RNA-seq data differential expression analysis
 44 | 
 45 | It is convenient to use TCGAbiolinks  or 
 46 | [`GDCRNATools`](https://bioconductor.org/packages/GDCRNATools/) to download 
 47 | and analysis Gene expression data.  `TCGAbiolinks` use `edgeR` package to do 
 48 | differential expression analysis, while `GDCRNATools` can implement three most 
 49 | commonly used methods: limma, edgeR , and DESeq2 to identify differentially 
 50 | expressed  genes (DEGs).
 51 | 
 52 | Alicia Oshlack  et al. claimed that unlike the chip data, 
 53 | the RNA-seq data had one [bias](https://pubmed.ncbi.nlm.nih.gov/20132535/)[1]: 
 54 | the larger the transcript length / mean read count , the more likely it was to 
 55 | be  identified as a differential gene, 
 56 | while there was no such trend in the 
 57 | [chip data](https://pubmed.ncbi.nlm.nih.gov/19371405/)[2].
 58 | 
 59 | 
 60 |  However, when we use their chip data for difference analysis
 61 |  (using the limma package), we find that chip data has the same trend as 
 62 |  RNA-seq data. And we also found this trend in the difference analysis results 
 63 |  given by the data 
 64 |  [authors](https://genome.cshlp.org/content/18/9/1509.long)[3].
 65 | 
 66 |  
 67 | 
 68 |  It is worse noting that only technical replicate data, which has small gene 
 69 |  dispersions, shows this [bias](https://pubmed.ncbi.nlm.nih.gov/28545404/)[4]. 
 70 |  This is because in technical replicate RNA-seq data a long gene has more 
 71 |  reads mapping to it compared to a short gene of similar expression, 
 72 |  and most of the statistical methods used to detect differential expression
 73 |  have stronger detection ability for genes with more reads. However, we have 
 74 |  not deduced why there is such a bias in the current difference 
 75 |  analysis algorithms. 
 76 | 
 77 | Some software, such as [CQN](http://www.bioconductor.org/packages/cqn/) ,
 78 | present a 
 79 | [normalization algorithm](https://pubmed.ncbi.nlm.nih.gov/22285995/) [5]
 80 | to correct systematic biases(gene length bias and 
 81 | [GC-content bias](https://pubmed.ncbi.nlm.nih.gov/22177264/)[6]. 
 82 | But they did not provide sufficient evidence to prove that the correction is 
 83 | effective. We use the 
 84 | [Marioni dataset](https://pubmed.ncbi.nlm.nih.gov/19371405/)[2] to verify the
 85 | correction effect of CQN and find that there is still a deviation 
 86 | after correction:
 87 | 
 88 | 
 89 | 
 90 | [GOseq](http://bioconductor.org/packages/goseq/) [1]based on 
 91 | Wallenius' noncentral hypergeometric distribution can effectively correct the 
 92 | gene length deviation in enrichment analysis. However, the current RNA-seq data 
 93 | often have no gene length bias, but only the expression amount(read count) 
 94 | bias, GOseq may overcorrect these data, correcting originally unbiased data 
 95 | into reverse bias.
 96 | 
 97 | GOseq also fails to correct for expression bias, therefore, read count bias 
 98 | correction is still a challenge for us.
 99 | 
100 | ```{r, message=FALSE, warning=FALSE}
101 | # use user-defined data
102 | df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
103 | df <- as.data.frame(df)
104 | rownames(df) <- paste0("gene", 1:25)
105 | colnames(df) <- paste0("sample", 1:16)
106 | group <- sample(c("group1", "group2"), 16, replace = TRUE)
107 | result <- differential_RNA(counts = df, group = group,
108 |     filte = FALSE, method = "Wilcoxon")
109 | # use SummarizedExperiment object input
110 | df <- matrix(rnbinom(400, mu = 4, size = 10), 25, 16)
111 | rownames(df) <- paste0("gene", 1:25)
112 | colnames(df) <- paste0("sample", 1:16)
113 | group <- sample(c("group1", "group2"), 16, replace = TRUE)
114 | 
115 | nrows <- 200; ncols <- 20
116 | counts <- matrix(
117 |     runif(nrows * ncols, 1, 1e4), nrows,
118 |     dimnames = list(paste0("cg",1:200),paste0("S",1:20))
119 | )
120 | 
121 | colData <- S4Vectors::DataFrame(
122 |   row.names = paste0("sample", 1:16),
123 |   group = group
124 | )
125 | data <- SummarizedExperiment::SummarizedExperiment(
126 |          assays=S4Vectors::SimpleList(counts=df),
127 |          colData = colData)
128 | 
129 | result <- differential_RNA(counts = data, groupCol = "group",
130 |     filte = FALSE, method = "Wilcoxon") 
131 | ```
132 | 
133 | 
134 | ### DNA Methylation data integration 
135 | 
136 | use `TCGAbiolinks` data. 
137 | 
138 | The codes may need to be modified if `TCGAbiolinks` updates. 
139 | So please read its documents.
140 | 
141 | ```{r, message=FALSE, warning=FALSE}
142 | # use user defined data
143 | library(ChAMP)
144 | cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
145 | rownames(cpgData) <- paste0("cpg", seq_len(200))
146 | colnames(cpgData) <- paste0("sample", seq_len(10))
147 | sampleGroup <- c(rep("group1", 5), rep("group2", 5))
148 | names(sampleGroup) <- colnames(cpgData)
149 | cpg2gene <- data.frame(cpg = rownames(cpgData), 
150 |     gene = rep(paste0("gene", seq_len(20)), 10))
151 | result <- differential_methy(cpgData, sampleGroup, 
152 |     cpg2gene = cpg2gene, normMethod = NULL)
153 | # use SummarizedExperiment object input
154 | library(ChAMP)
155 | cpgData <- matrix(runif(2000), nrow = 200, ncol = 10)
156 | rownames(cpgData) <- paste0("cpg", seq_len(200))
157 | colnames(cpgData) <- paste0("sample", seq_len(10))
158 | sampleGroup <- c(rep("group1", 5), rep("group2", 5))
159 | names(sampleGroup) <- colnames(cpgData)
160 | cpg2gene <- data.frame(cpg = rownames(cpgData), 
161 |     gene = rep(paste0("gene", seq_len(20)), 10))
162 | colData <- S4Vectors::DataFrame(
163 |     row.names = colnames(cpgData),
164 |     group = sampleGroup
165 | )
166 | data <- SummarizedExperiment::SummarizedExperiment(
167 |          assays=S4Vectors::SimpleList(counts=cpgData),
168 |          colData = colData)
169 | result <- differential_methy(cpgData = data, 
170 |     groupCol = "group", normMethod = NULL, 
171 |     cpg2gene = cpg2gene)  
172 | ```
173 | **Note:** `ChAMP`has a large number of dependent packages.
174 | If you cannot install it  successfully, you can download each dependent package 
175 | separately(Source or Binary) and install it  locally.
176 | 
177 | We provide two models to get methylation difference genes:  
178 | 
179 | if model = "cpg", step1: calculate difference cpgs; 
180 | step2: calculate difference genes; 
181 | 
182 | if model = "gene", step1: calculate the methylation level of genes;
183 | step2: calculate difference genes.
184 | 
185 | We find that only model = "gene" has no deviation of CpG number. 
186 | 
187 | 
188 | ### Copy number variation data integration and differential gene extraction
189 | 
190 | use TCGAbiolinks to download TCGA data(Gene Level Copy Number Scores)
191 | 
192 | ```{r, message=FALSE, warning=FALSE}
193 | # use random data as example
194 | aa <- matrix(sample(c(0, 1, -1), 200, replace = TRUE), 25, 8)
195 | rownames(aa) <- paste0("gene", 1:25)
196 | colnames(aa) <- paste0("sample", 1:8)
197 | sampleGroup <- sample(c("A", "B"), ncol(aa), replace = TRUE)
198 | diffCnv <- differential_CNV(aa, sampleGroup)
199 | ```
200 | 
201 | 
202 | 
203 | ### Difference analysis of single nucleotide Variation data 
204 | We provide SNP_QC function to do quality control of SNP data
205 | ```{r, message=FALSE, warning=FALSE}
206 | snpDf <- matrix(sample(c("AA", "Aa", "aa"), 100, replace = TRUE), 10, 10)
207 | snpDf <- as.data.frame(snpDf)
208 | sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
209 | result <- SNP_QC(snpDf)
210 | ```
211 | 
212 | Then use differential_SNP to do differential analysis.
213 | ```{r, message=FALSE, warning=FALSE}
214 | #' snpDf <- matrix(sample(c("mutation", NA), 100, replace = TRUE), 10, 10)
215 | #' snpDf <- as.data.frame(snpDf)
216 | #' sampleGroup <- sample(c("A", "B"), 10, replace = TRUE)
217 | #' result <- differential_SNP(snpDf, sampleGroup)
218 | ```
219 | 
220 | 
221 | ### GEO chip data processing
222 | The function `gene_ave` could average the expression data of different 
223 | ids for the same gene in the GEO chip data. For example:
224 | 
225 | ```{r, message=FALSE, warning=FALSE}
226 | aa <- c("MARCH1","MARC1","MARCH1","MARCH1","MARCH1")
227 | bb <- c(2.969058399,4.722410064,8.165514853,8.24243893,8.60815086)
228 | cc <- c(3.969058399,5.722410064,7.165514853,6.24243893,7.60815086)
229 | file_gene_ave <- data.frame(aa=aa,bb=bb,cc=cc)
230 | colnames(file_gene_ave) <- c("Gene", "GSM1629982", "GSM1629983")
231 | result <- gene_ave(file_gene_ave, 1)
232 | ```
233 | 
234 | Multiple genes symbols may correspond to a same chip id. The result of 
235 | function `repAssign` is to assign the expression of this id to each gene, 
236 | and function `repRemove` deletes the expression. For example:
237 | 
238 | ```{r}
239 | aa <- c("MARCH1 /// MMA","MARC1","MARCH2 /// MARCH3",
240 |         "MARCH3 /// MARCH4","MARCH1")
241 | bb <- c("2.969058399","4.722410064","8.165514853","8.24243893","8.60815086")
242 | cc <- c("3.969058399","5.722410064","7.165514853","6.24243893","7.60815086")
243 | input_file <- data.frame(aa=aa,bb=bb,cc=cc)
244 | repAssign_result <- repAssign(input_file," /// ")
245 | repRemove_result <- repRemove(input_file," /// ")
246 | ```
247 | 
248 | ### Other downstream analyses
249 | 
250 | 1. The function `id_conversion_TCGA` could convert  ENSEMBL gene id to 
251 | gene Symbol in TCGA. For example:
252 | 
253 | ```{r, message=FALSE, warning=FALSE}
254 | data(profile)
255 | result <- id_conversion_TCGA(profile)
256 | ```
257 | 
258 | The parameter `profile` is a data.frame or matrix of gene expression 
259 | data in TCGA.
260 | 
261 | **Note:** In previous versions(< 1.0.0) the `id_conversion` and 
262 | `id_conversion_TCGA` used HGNC data to convert human gene id.  
263 | In future versions, we will use `clusterProfiler::bitr` for ID conversion. 
264 | 
265 | 
266 | 2. The function `countToFpkm` and `countToTpm` could convert 
267 | count data to FPKM or TPM data.
268 | 
269 | ```{r}
270 | data(gene_cov)
271 | lung_squ_count2 <- matrix(c(1,2,3,4,5,6,7,8,9),ncol=3)
272 | rownames(lung_squ_count2) <- c("DISC1","TCOF1","SPPL3")
273 | colnames(lung_squ_count2) <- c("sample1","sample2","sample3")
274 | result <- countToFpkm(lung_squ_count2, keyType = "SYMBOL", 
275 |     gene_cov = gene_cov)
276 | ```
277 | 
278 | ```{r, message=FALSE, warning=FALSE}
279 | data(gene_cov)
280 | lung_squ_count2 <- matrix(c(0.11,0.22,0.43,0.14,0.875,
281 |     0.66,0.77,0.18,0.29),ncol=3)
282 | rownames(lung_squ_count2) <- c("DISC1","TCOF1","SPPL3")
283 | colnames(lung_squ_count2) <- c("sample1","sample2","sample3")
284 | result <- countToTpm(lung_squ_count2, keyType = "SYMBOL", 
285 |     gene_cov = gene_cov)
286 | ```
287 | 
288 | ```{r}
289 | sessionInfo()
290 | ```
291 | 
292 | ## References
293 | 1. Young MD, Wakefield MJ, Smyth GK, Oshlack A (2010) Gene ontology analysis 
294 | for RNA-seq: accounting for selection bias. Genome Biol 11: R14.
295 | 2. Oshlack A, Wakefield MJ (2009) Transcript length bias in RNA-seq data 
296 | confounds systems biology. Biol Direct 4: 14.
297 | 3. Marioni JC, Mason CE, Mane SM, Stephens M, Gilad Y (2008) RNA-seq: an 
298 | assessment of technical reproducibility and comparison with gene expression 
299 | arrays. Genome Res 18: 1509-1517.
300 | 4. Yoon S, Nam D (2017) Gene dispersion is the key determinant of the read 
301 | count bias in differential expression analysis of RNA-seq data. 
302 | BMC Genomics 18: 408.
303 | 5. Hansen KD, Irizarry RA, Wu Z (2012) Removing technical variability in 
304 | RNA-seq data using conditional quantile normalization. 
305 | Biostatistics 13: 204-216.
306 | 6. Risso D, Schwartz K, Sherlock G, Dudoit S (2011) GC-content normalization
307 | for RNA-Seq data. BMC Bioinformatics 12: 480.
308 | 
309 | 


--------------------------------------------------------------------------------