├── .Rbuildignore
├── .gitignore
├── .svnignore
├── CONDUCT.md
├── DESCRIPTION
├── Makefile
├── NAMESPACE
├── NEWS
├── NEWS.md
├── R
    ├── GOSemSim-package.R
    ├── ICMethods.R
    ├── OntDb.R
    ├── RcppExports.R
    ├── TCSSMethod.R
    ├── TCSScutoff.R
    ├── WangMethod.R
    ├── buildGOmap.R
    ├── clusterSim.R
    ├── combineMethods.R
    ├── computeIC.R
    ├── gene2GO.R
    ├── geneSim.R
    ├── goSim.R
    ├── godata.R
    ├── mclusterSim.R
    ├── mgeneSim.R
    ├── mgoSim.R
    ├── parseGAF.R
    ├── processTCSS.R
    ├── readBlast2go.R
    ├── termSim.R
    ├── utilities.R
    └── zzz.R
├── README.Rmd
├── README.md
├── appveyor.yml
├── data
    └── gotbl.rda
├── inst
    └── CITATION
├── man
    ├── GOSemSim-package.Rd
    ├── GOSemSimDATA-class.Rd
    ├── buildGOmap.Rd
    ├── clusterSim.Rd
    ├── combineScores.Rd
    ├── geneSim.Rd
    ├── goSim.Rd
    ├── go_term_table.Rd
    ├── godata.Rd
    ├── infoContentMethod.Rd
    ├── load_OrgDb.Rd
    ├── mclusterSim.Rd
    ├── mgeneSim.Rd
    ├── mgoSim.Rd
    ├── read-gaf.Rd
    ├── read.blast2go.Rd
    ├── tcss_cutoff.Rd
    ├── termSim.Rd
    └── wangMethod_internal.Rd
├── src
    ├── GOSemSim.dll
    ├── ICmethod.cpp
    └── RcppExports.cpp
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-GO.R
    │   ├── test-Wang.R
    │   └── test-load_OrgDb.R
└── vignettes
    └── GOSemSim.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | .svnignore
 2 | ^.*\.DS_Store
 3 | Makefile
 4 | README.Rmd
 5 | appveyor.yml
 6 | .travis.yml
 7 | 
 8 | docs
 9 | mkdocs
10 | .github
11 | site_src
12 | CONDUCT.md
13 | README.md
14 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .svn
 3 | __pycache__
 4 | *~
 5 | *.Rhistory
 6 | __pycache__
 7 | __init__.py
 8 | __init__.pyc
 9 | src/*.o
10 | src/*.so
11 | 


--------------------------------------------------------------------------------
/.svnignore:
--------------------------------------------------------------------------------
1 | .git
2 | .travis.yml
3 | appveyor.yml
4 | docs
5 | mkdocs
6 | .github
7 | 


--------------------------------------------------------------------------------
/CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Code of Conduct
 2 | 
 3 | As contributors and maintainers of this project, we pledge to respect all people who 
 4 | contribute through reporting issues, posting feature requests, updating documentation,
 5 | submitting pull requests or patches, and other activities.
 6 | 
 7 | We are committed to making participation in this project a harassment-free experience for
 8 | everyone, regardless of level of experience, gender, gender identity and expression,
 9 | sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.
10 | 
11 | Examples of unacceptable behavior by participants include the use of sexual language or
12 | imagery, derogatory comments or personal attacks, trolling, public or private harassment,
13 | insults, or other unprofessional conduct.
14 | 
15 | Project maintainers have the right and responsibility to remove, edit, or reject comments,
16 | commits, code, wiki edits, issues, and other contributions that are not aligned to this 
17 | Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
18 | from the project team.
19 | 
20 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
21 | opening an issue or contacting one or more of the project maintainers.
22 | 
23 | This Code of Conduct is adapted from the Contributor Covenant 
24 | (http:contributor-covenant.org), version 1.0.0, available at 
25 | http://contributor-covenant.org/version/1/0/0/
26 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GOSemSim
 2 | Type: Package
 3 | Title: GO-terms Semantic Similarity Measures
 4 | Version: 2.35.0
 5 | Authors@R: c( person(given = "Guangchuang", family = "Yu",             email = "guangchuangyu@gmail.com", role  = c("aut", "cre")),
 6 |               person(given = "Alexey",      family = "Stukalov",       email = "astukalov@gmail.com",     role  = "ctb"),
 7 |               person(given= "Pingfan",      family = "Guo",            email = "1178431277@qq.com",       role  = "ctb"),
 8 |               person(given = "Chuanle",     family = "Xiao",           email = "xiaochuanle@126.com",     role  = "ctb"),
 9 |               person(given = "Lluís",       family = "Revilla Sancho", email = "lluis.revilla@gmail.com", role  = "ctb")
10 |              )
11 | Maintainer: Guangchuang Yu <guangchuangyu@gmail.com>
12 | Description: The semantic comparisons of Gene Ontology (GO) annotations provide
13 |     quantitative ways to compute similarities between genes and gene groups,
14 |     and have became important basis for many bioinformatics analysis approaches.
15 |     GOSemSim is an R package for semantic similarity computation among GO terms,
16 |     sets of GO terms, gene products and gene clusters. GOSemSim implemented five
17 |     methods proposed by Resnik, Schlicker, Jiang, Lin and Wang respectively.
18 | Depends:
19 |     R (>= 3.5.0)
20 | LinkingTo: Rcpp
21 | Imports:
22 |     AnnotationDbi,
23 |     DBI,
24 |     digest,
25 |     GO.db,
26 |     methods,
27 |     rlang,
28 |     R.utils,
29 |     stats,
30 |     utils,
31 |     yulab.utils (>= 0.1.6)
32 | Suggests:
33 |     AnnotationHub,
34 |     BiocManager,
35 |     clusterProfiler,
36 |     DOSE,
37 |     knitr,
38 |     org.Hs.eg.db,
39 |     prettydoc,
40 |     rappdirs,
41 |     readr,
42 |     rmarkdown,
43 |     testthat,
44 |     tidyr,
45 |     tidyselect,
46 |     ROCR
47 | VignetteBuilder: knitr
48 | ByteCompile: true
49 | License: Artistic-2.0
50 | Encoding: UTF-8
51 | URL: https://yulab-smu.top/biomedical-knowledge-mining-book/
52 | BugReports: https://github.com/YuLab-SMU/GOSemSim/issues
53 | Packaged: 2012-08-29 06:44:50 UTC; root
54 | biocViews: Annotation, GO, Clustering, Pathways, Network, Software
55 | RoxygenNote: 7.3.2
56 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | PKGNAME := $(shell sed -n "s/Package: *\([^ ]*\)/\1/p" DESCRIPTION)
 2 | PKGVERS := $(shell sed -n "s/Version: *\([^ ]*\)/\1/p" DESCRIPTION)
 3 | PKGSRC  := $(shell basename `pwd`)
 4 | BIOCVER := RELEASE_3_21
 5 | 
 6 | all: rd check clean
 7 | 
 8 | alldocs: rd readme
 9 | 
10 | rd:
11 | 	Rscript -e 'roxygen2::roxygenise(".")'
12 | 
13 | readme:
14 | 	Rscript -e 'rmarkdown::render("README.Rmd")'
15 | 
16 | build:
17 | 	# cd ..;\
18 | 	# R CMD build $(PKGSRC)
19 | 	Rscript -e 'devtools::build()'
20 | 	
21 | install:
22 | 	cd ..;\
23 | 	R CMD INSTALL $(PKGNAME)_$(PKGVERS).tar.gz
24 | 
25 | check: 
26 | 	# cd ..;\
27 | 	# Rscript -e 'rcmdcheck::rcmdcheck("$(PKGNAME)_$(PKGVERS).tar.gz")'
28 | 	Rscript -e 'devtools::check()'
29 | 
30 | check2: build
31 | 	cd ..;\
32 | 	R CMD check $(PKGNAME)_$(PKGVERS).tar.gz;\
33 | 
34 | bioccheck:
35 | 	cd ..;\
36 | 	Rscript -e 'BiocCheck::BiocCheck("$(PKGNAME)_$(PKGVERS).tar.gz")'
37 | 
38 | clean:
39 | 	cd ..;\
40 | 	$(RM) -r $(PKGNAME).Rcheck/
41 | 
42 | 
43 | gitmaintain:
44 | 	git gc --auto;\
45 | 	git prune -v;\
46 | 	git fsck --full
47 | 
48 | 
49 | update:
50 | 	git fetch --all;\
51 | 	git checkout devel;\
52 | 	git merge upstream/devel
53 | 
54 | rmrelease:
55 | 	git branch -D $(BIOCVER)
56 | 
57 | release:
58 | 	git checkout $(BIOCVER);\
59 | 	git fetch --all
60 | 
61 | biocinit:
62 | 	git remote add upstream git@git.bioconductor.org:packages/$(PKGNAME).git;\
63 | 	git fetch --all
64 | 
65 | push:
66 | 	git push upstream devel;\
67 | 	git push origin devel
68 | 	#git checkout github;\
69 | 	#git merge -m 'merge from bioc repo' upstream/devel;\
70 | 	#git push -f origin HEAD:devel;\
71 | 	#git checkout devel
72 | 
73 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(buildGOmap)
 4 | export(clusterSim)
 5 | export(combineScores)
 6 | export(geneSim)
 7 | export(goSim)
 8 | export(godata)
 9 | export(load_OrgDb)
10 | export(mclusterSim)
11 | export(mgeneSim)
12 | export(mgoSim)
13 | export(parse_gff)
14 | export(read.blast2go)
15 | export(read.gaf)
16 | export(tcss_cutoff)
17 | export(termSim)
18 | exportClasses(GOSemSimDATA)
19 | importClassesFrom(AnnotationDbi,AnnotationDb)
20 | importFrom(AnnotationDbi,columns)
21 | importFrom(AnnotationDbi,keys)
22 | importFrom(AnnotationDbi,loadDb)
23 | importFrom(AnnotationDbi,metadata)
24 | importFrom(AnnotationDbi,select)
25 | importFrom(AnnotationDbi,toTable)
26 | importFrom(DBI,dbReadTable)
27 | importFrom(GO.db,GO.db)
28 | importFrom(GO.db,GOBPANCESTOR)
29 | importFrom(GO.db,GOBPOFFSPRING)
30 | importFrom(GO.db,GOBPPARENTS)
31 | importFrom(GO.db,GOCCANCESTOR)
32 | importFrom(GO.db,GOCCOFFSPRING)
33 | importFrom(GO.db,GOCCPARENTS)
34 | importFrom(GO.db,GOMFANCESTOR)
35 | importFrom(GO.db,GOMFOFFSPRING)
36 | importFrom(GO.db,GOMFPARENTS)
37 | importFrom(GO.db,GOTERM)
38 | importFrom(R.utils,gunzip)
39 | importFrom(digest,digest)
40 | importFrom(methods,is)
41 | importFrom(methods,new)
42 | importFrom(methods,setClass)
43 | importFrom(methods,setMethod)
44 | importFrom(methods,setRefClass)
45 | importFrom(methods,slot)
46 | importFrom(rlang,check_installed)
47 | importFrom(stats,na.omit)
48 | importFrom(stats,setNames)
49 | importFrom(utils,getFromNamespace)
50 | importFrom(utils,read.delim)
51 | importFrom(utils,setTxtProgressBar)
52 | importFrom(utils,stack)
53 | importFrom(utils,txtProgressBar)
54 | importFrom(yulab.utils,ls2df)
55 | importFrom(yulab.utils,yulab_msg)
56 | importMethodsFrom(AnnotationDbi,as.list)
57 | importMethodsFrom(AnnotationDbi,columns)
58 | importMethodsFrom(AnnotationDbi,dbconn)
59 | importMethodsFrom(AnnotationDbi,keys)
60 | importMethodsFrom(AnnotationDbi,keytypes)
61 | importMethodsFrom(AnnotationDbi,mget)
62 | importMethodsFrom(AnnotationDbi,select)
63 | importMethodsFrom(AnnotationDbi,toTable)
64 | useDynLib(GOSemSim)
65 | 


--------------------------------------------------------------------------------
/NEWS:
--------------------------------------------------------------------------------
  1 | CHANGES IN VERSION 2.3.1
  2 | ------------------------
  3 |  o new project site using blogdown <2017-09-28, Thu>
  4 |  o speed up by pre-calculating GO similarities <2017-05-22, Mon>
  5 |    + contributed by Lluís Revilla Sancho
  6 |    + https://github.com/GuangchuangYu/GOSemSim/pull/13
  7 | 
  8 | CHANGES IN VERSION 2.2.0
  9 | ------------------------
 10 |  o BioC 3.5 release <2017-04-26, Wed>
 11 | 
 12 | CHANGES IN VERSION 2.1.3
 13 | ------------------------
 14 |  o friendly error message for using IC method without IC computed <2017-02-17, Fri>
 15 |    + https://github.com/GuangchuangYu/GOSemSim/issues/11
 16 |  o fixed https://github.com/GuangchuangYu/GOSemSim/issues/9 <2016-12-20, Tue>
 17 | 
 18 | CHANGES IN VERSION 2.1.2
 19 | ------------------------
 20 |  o use prettydoc for vignette <2016-11-30, Wed>
 21 |  o remove using BiocStyle <2016-11-23, Wed>
 22 | 
 23 | CHANGES IN VERSION 2.1.1
 24 | ------------------------
 25 |  o update startup message <2016-11-09, Wed>
 26 | 
 27 | CHANGES IN VERSION 2.0.0
 28 | ------------------------
 29 |  o BioC 3.4 released <2016-10-18, Tue>
 30 | 
 31 | CHANGES IN VERSION 1.99.4
 32 | ------------------------
 33 |  o fixed NOTE in R check <2016-08-12, Fri>
 34 |  o add unit test using testthat <2016-08-11, Thu>
 35 | 
 36 | CHANGES IN VERSION 1.99.3
 37 | ------------------------
 38 |  o changes to satisfy meshes package <2016-08-05, Fri>
 39 | 
 40 | CHANGES IN VERSION 1.99.2
 41 | ------------------------
 42 |  o fixed Rcpp issue <2016-07-19, Tue>
 43 |    + https://github.com/GuangchuangYu/GOSemSim/issues/6
 44 | 
 45 | CHANGES IN VERSION 1.99.1
 46 | ------------------------
 47 |  o update vignette <2016-07-14, Thu>
 48 | 
 49 | CHANGES IN VERSION 1.99.0
 50 | ------------------------
 51 |  o support all organisms that have OrgDb object <2016-07-05, Tue>
 52 |  o optimize Wang method <2016-07-04, Mon>
 53 | 
 54 | CHANGES IN VERSION 1.31.2
 55 | ------------------------
 56 |  o use byte compiler <2016-05-18, Wed>
 57 |  o https://github.com/Bioconductor-mirror/GOSemSim/commit/71c29280c560e0293569121aeeecb0ed7b37055a
 58 | 
 59 | CHANGES IN VERSION 1.31.1
 60 | ------------------------
 61 |  o https://github.com/Bioconductor-mirror/GOSemSim/commit/a829a50a017b90f08c41b5955df176dfad333d06
 62 | 
 63 | CHANGES IN VERSION 1.30.0
 64 | ------------------------
 65 |  o BioC 3.3 released <2016-05-05, Thu>
 66 | 
 67 | CHANGES IN VERSION 1.29.2
 68 | ------------------------
 69 |  o update IC data <2016-04-21, Mon>
 70 | 
 71 | CHANGES IN VERSION 1.29.1
 72 | ------------------------
 73 |  o fixed R check <2016-03-05, Sat>
 74 | 
 75 | CHANGES IN VERSION 1.27.4
 76 | ------------------------
 77 |  o update information content data <2015-10-08, Thu>
 78 |  o convert vignette from Rnw to Rmd <2015-06-23, Tue>
 79 | 
 80 | CHANGES IN VERSION 1.27.3
 81 | ------------------------
 82 |  o bug fixed in getSupported_Org <2015-05-31, Sun>
 83 | 
 84 | CHANGES IN VERSION 1.27.2
 85 | ------------------------
 86 |  o deprecate 'worm' and use 'celegans' instead.
 87 | 
 88 | CHANGES IN VERSION 1.27.1
 89 | ------------------------
 90 |  o add external documents <2015-05-07, Thu>
 91 | 
 92 | CHANGES IN VERSION 1.25.5
 93 | ------------------------
 94 |  o update information content files <2015-03-12, Thu>
 95 | 
 96 | CHANGES IN VERSION 1.25.4
 97 | ------------------------
 98 |  o update vignette and add DOSE citation <2015-02-13, Fri>
 99 | 
100 | CHANGES IN VERSION 1.25.3
101 | ------------------------
102 |  o add doi in CITATION <2015-01-28, Wed>
103 |  o update vignette using BiocStyle <2015-01-26, Mon>
104 | 
105 | CHANGES IN VERSION 1.25.2
106 | ------------------------
107 |  o add BugReports URL <2014-12-17, Wed>
108 | 
109 | CHANGES IN VERSION 1.25.1
110 | ------------------------
111 |  o import Rcpp <2014-10-23, Thu>
112 | 
113 | CHANGES IN VERSION 1.23.2
114 | ------------------------
115 |  o fast IC-method implemented, contributed by Alexey Stukalov <2014-09-27, Sat>
116 | 
117 | CHANGES IN VERSION 1.23.1
118 | ------------------------
119 |  o add support of species coelicolor and gondii <2014-09-03, Wed>
120 | 
121 | CHANGES IN VERSION 1.21.3
122 | ------------------------
123 |  o fixed minor bug in combineMethods <2013-12-16, Mon>
124 | 
125 | CHANGES IN VERSION 1.19.3
126 | ------------------------
127 |  o add getSupported_Org function for accessing all the names of supported organisms <2013-07-09, Mon>
128 | 
129 | CHANGES IN VERSION 1.19.2
130 | ------------------------
131 |  o export getDb and loadGOMap <2013-07-09, Mon>
132 |  o update vignettes <2013-07-9, Mon>
133 | 
134 | CHANGES IN VERSION 1.19.1
135 | ------------------------
136 |  o update vignettes <2013-06-13, Thu>
137 | 
138 | CHANGES IN VERSION 1.17.1
139 | ------------------------
140 |  o update IC data for next release <2013-03-08, Fri>
141 |  o after removing NA row/col of similarity matrix, if only one row/col remains,
142 |    R will turn it to be a vector, and combineScore function will not work properly. This bug was fixed <2013-01-11, Fri>
143 |  o bug fixed of infoContentMethod, now return NA when ID is not belong to the ontology <2012-10-11, Thu>
144 | 
145 | CHANGES IN VERSION 1.15.3
146 | ------------------------
147 |  o remove all the S4 classes and methods <2012-09-12, Wed>
148 |  o add progress bar for mgeneSim <2012-09-12, Wed>
149 |  o re-implement calculating semantic values in Wang's method <2012-09-12, Wed>
150 |  o update IC data for next release <2012-09-12, Wed>
151 |  o bug fixed in getSV <2012-09-13, Thu>
152 | 
153 | CHANGES IN VERSION 1.15.2
154 | ------------------------
155 |  o re-implement gene2GO <2012-09-7, Fri>
156 |  o information content based methods implemented in c++ <2012-09-5, Wed>
157 | 
158 | CHANGES IN VERSION 1.15.1
159 | ------------------------
160 |  o export termSim, which can be used in
161 |    other ontological semantic similarity measurement <2012-06-14, Thu>
162 |  o update vignette. <2012-06-14, Thu>
163 | 
164 | CHANGES IN VERSION 1.13.6
165 | ------------------------
166 |  o remove dependency of organism annotation packages. <2012-03-09, Fri>
167 |    User not need to install all these annotation packages for using GOSemSim.
168 |    User only need to install the specific organism annotation package they want to calculate.
169 |  o update IC data sets for 1.14 release. <2012-03-30, Fri>
170 | 
171 | 
172 | CHANGES IN VERSION 1.13.5
173 | ------------------------
174 |  o fixed BibTeX database file .bib.
175 |    month = , must be month = someMonth,
176 |    leave it blank will cause texi2dvi failed. <2012-03-01, Thu>
177 | 
178 | CHANGES IN VERSION 1.13.4
179 | ------------------------
180 |  o update vignette. <2012-02-28, Tue>
181 | 
182 | CHANGES IN VERSION 1.13.3
183 | ------------------------
184 |  o bug fixed for multiple annotation. <2012-02-01, Fri>
185 | 
186 | CHANGES IN VERSION 1.13.2
187 | ------------------------
188 |  o update infoContentMethod to make it consistent between DOSE and GOSemSim. <2011-12-31, Sat>
189 | 
190 | CHANGES IN VERSION 1.13.1
191 | ------------------------
192 |  o remove dependency of DOSE
193 |  o remove Streptomyces coelicolor support, as the genome wide annotation package contributor no longer supports it.
194 | 
195 | CHANGES IN VERSION 1.11.2
196 | ------------------------
197 |  o Using methods implemented in DOSE for semantic similarity calculation.
198 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # GOSemSim 2.34.0
  2 | 
  3 | + Bioconductor RELEASE_3_21 (2025-04-17, Thu)
  4 | 
  5 | # GOSemSim 2.32.0
  6 | 
  7 | + Bioconductor RELEASE_3_20 (2024-10-30, Wed)
  8 | 
  9 | # GOSemSim 2.31.1
 10 | 
 11 | + `get_rel_df()` to access ontology relation data frame required by the Wang method (2024-08-13, Tue)
 12 | + a set of 'OntDb' methods for accessing sqlite data generated by the 'obolite' package (2024-08-13, Tue)
 13 | + use `yulab.utils::yulab_msg()` for startup message (2024-07-26, Fri)
 14 | 
 15 | # GOSemSim 2.30.0
 16 | 
 17 | + Bioconductor RELEASE_3_19 (2024-05-15, Wed)
 18 | 
 19 | # GOSemSim 2.29.2
 20 | 
 21 | + update `buildGOmap()` parameter to consistent with `enricher()` and `GSEA()` (2024-02-06, Tue, #47)
 22 | 
 23 | # GOSemSim 2.29.1
 24 | 
 25 | + extend `godata()` to support passing a data.frame (can be output of `read.gaf()` or `read.blast2go()`) to 'annoDb' (2023-01-16, Tue)
 26 | + deprecate 'OrgDb' and introduce new parameter 'annoDb' in `godata()` 
 27 | + standardize the output of `read.gaf()` and `read.blast2go()`
 28 | + optimize `buildGOmap()`
 29 | 
 30 | # GOSemSim 2.28.0
 31 | 
 32 | + Bioconductor RELEASE_3_18 (2023-10-25, Wed)
 33 | 
 34 | # GOSemSim 2.27.3
 35 | 
 36 | + use `check_installed()` to check package dependency (2023-09-12, Tue, #43)
 37 | 
 38 | # GOSemSim 2.27.2
 39 | 
 40 | + `read.blast2go()` to parse 'blast2go' result (2023-07-10, Mon, #41, #42)
 41 | + move `buildGOmap()` and `read.gaf()` from 'clusterProfiler' (2023-07-10, Mon)
 42 | 
 43 | # GOSemSim 2.27.1
 44 | 
 45 | + semantic similarity measurement support for MPO (2023-04-06, Thu)
 46 | + TCSS semantic similarity measurement support for DO and MPO (2023-04-06, Thu)
 47 | 
 48 | # GOSemSim 2.26.0
 49 | 
 50 | + Bioconductor RELEASE_3_17 (2023-05-03, Wed)
 51 | 
 52 | # GOSemSim 2.24.0
 53 | 
 54 | + Bioconductor RELEASE_3_16 (2022-11-02, Wed)
 55 | 
 56 | # GOSemSim 2.23.1
 57 | 
 58 | + Replacing DO.db with HDO.db (2022-07-29, Mon)
 59 | 
 60 | # GOSemSim 2.22.0
 61 | 
 62 | + Bioconductor 3.15 release
 63 | 
 64 | # GOSemSim 2.21.1
 65 | 
 66 | + Avoid eval-parse in `load_OrgDb()` (2022-01-10, Mon)
 67 | 
 68 | # GOSemSim 2.20
 69 | 
 70 | + Bioconductor 3.14 release
 71 | 
 72 | # GOSemSim 2.19.1
 73 | 
 74 | + TCSS method (@qibaiqi, #35; 2021-08-02, Mon)
 75 | 
 76 | # GOSemSim 2.18.0
 77 | 
 78 | + Bioconductor 3.13 release
 79 | 
 80 | # GOSemSim 2.17.1
 81 | 
 82 | + bug fixed according to the update of GO.db (2020-10-29, Thu)
 83 |   - <https://github.com/YuLab-SMU/GOSemSim/issues/32>
 84 |   
 85 | # GOSemSim 2.16.0
 86 | 
 87 | + Bioconductor 3.12 release (2020-10-28, Wed)
 88 | 
 89 | # GOSemSim 2.15.2
 90 | 
 91 | + new site, <https://yulab-smu.top/biomedical-knowledge-mining-book/> for documentation (2020-09-04, Fri)
 92 | + update vignette
 93 | + update `data/gotbl`
 94 | 
 95 | # GOSemSim 2.15.1
 96 | 
 97 | + bug fixed of IC method when input IDs contain invalid terms. (2020-07-25, Sat)
 98 | 
 99 | # GOSemSim 2.14.0
100 | 
101 | + Bioconductor 3.11 release
102 | 
103 | 
104 | # GOSemSim 2.13.1
105 | 
106 | + add new citation (2020-03-19, Thu)
107 | + fixed compiling error due to the change of Rcpp 
108 |   - <https://github.com/YuLab-SMU/GOSemSim/issues/27>
109 | 
110 | # GOSemSim 2.7.1
111 | 
112 | + `mgeneSim` and `mclusterSim` now always return matrix (2018-08-08, Wed)
113 |     - <https://www.biostars.org/p/330642/#331598>
114 | 
115 | # GOSemSim 2.5.1
116 | 
117 | + return NA for deprecated IDs (2018-01-09, Fri)
118 |     - <https://support.bioconductor.org/p/105822/#105840>
119 | 


--------------------------------------------------------------------------------
/R/GOSemSim-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | 
 5 | 
 6 | ##'Information content of GO terms
 7 | ##'
 8 | ##'These datasets are the information contents of GOterms.
 9 | ##'
10 | ##'
11 | ##'@name go_term_table
12 | ##'@aliases GO gotbl
13 | ##'@docType data
14 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
15 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
16 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
17 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
18 | ##'PMID: 20179076
19 | ##'@keywords datasets
20 | NULL
21 | 
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/R/ICMethods.R:
--------------------------------------------------------------------------------
  1 | ##' Information Content Based Methods for semantic similarity measuring
  2 | ##'
  3 | ##' implemented for methods proposed by Resnik, Jiang, Lin and Schlicker.
  4 | ##' @title information content based methods
  5 | ##' @param ID1 Ontology Term
  6 | ##' @param ID2 Ontology Term
  7 | ##' @param method one of "Resnik", "Jiang", "Lin" and "Rel", "TCSS".
  8 | ##' @param godata GOSemSimDATA object
  9 | ##' @return semantic similarity score
 10 | ##' @useDynLib GOSemSim
 11 | ##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
 12 | infoContentMethod <- function(ID1,
 13 |                               ID2,
 14 |                               method,
 15 |                               godata) {
 16 |     ## IC is biased
 17 |     ## because the IC of a term is dependent of its children but not on its parents.
 18 |     ont <- godata@ont
 19 |     IC <- godata@IC
 20 | 
 21 |     if (length(IC) == 0) {
 22 |         stop("IC data not found, please re-generate your `semData` with `computeIC=TRUE`...")
 23 |     }
 24 | 
 25 |     if (ont %in% c("MF", "BP", "CC", "HDO", "MPO", "HPO")) {
 26 |         ## .anc <- tryCatch(getAncestors(ont)[union(ID1,ID2)], error=function(e) NULL)
 27 |         ## if (is.null(.anc)) {
 28 |         ##     ## https://support.bioconductor.org/p/105822/
 29 |         ##     return(NA)
 30 |         ## }
 31 |         ## .anc <- AnnotationDbi::as.list(.anc)
 32 | 
 33 |         ## if some IDs are not valid, the above code will leading to return NA directly.
 34 |         .anc <- getAncestors(ont)
 35 |         allid <- union(ID1, ID2)
 36 |         .anc <- .anc[allid]
 37 |         .anc <- .anc[!vapply(.anc, is.empty, logical(1))]
 38 | 
 39 |         ## invalid_ids <- c(ID1[!ID1 %in% names(.anc)], ID2[!ID2 %in% names(.anc)])
 40 |         ## if (length(invalid_ids) > 0) {
 41 |         ##     message("The following IDs are not valid and will be removed:", paste(invalid_ids, collapse=","))
 42 |         ##     # allid <- allid[!allid %in% invalid_ids]
 43 |         ## }
 44 |     } else {
 45 |         mesh_getAnc <- eval(parse(text="meshes:::getAncestors"))
 46 |         .anc <- lapply(union(ID1, ID2), mesh_getAnc)
 47 |         names(.anc) <- union(ID1, ID2)
 48 |     }
 49 |     return ( infoContentMethod_cpp( ID1, ID2,
 50 |                  .anc, IC,
 51 |                  method, ont ) )
 52 | }
 53 | 
 54 | is.empty <- function(x) {
 55 |     if (is.null(x)) return(TRUE)
 56 |     if (all(is.na(x))) return(TRUE)
 57 |     return(FALSE)
 58 | }
 59 | 
 60 | 
 61 | ## infoContentMethod <- function(ID1,
 62 | ##                               ID2,
 63 | ##                               ont="DO",
 64 | ##                               method,
 65 | ##                               organism="human") {
 66 | ##     IC <- getIC(organism, ont)
 67 | 
 68 | ##     ## more specific term, larger IC value.
 69 | ##     ## Normalized, all divide the most informative IC.
 70 | ##     ## all IC values range from 0(root node) to 1(most specific node)
 71 | ##     mic <- max(IC[IC!=Inf])
 72 | 
 73 | ##     if (ont == "DO") {
 74 | ##         topNode <- "DOID:4"
 75 | ##     } else {
 76 | ##         topNode <- "all"
 77 | ##     }
 78 | 
 79 | ##     IC[topNode] = 0
 80 | 
 81 | ##     ic1 <- IC[ID1]/mic
 82 | ##     ic2 <- IC[ID2]/mic
 83 | 
 84 | ##     if (ic1 == 0 || ic2 == 0)
 85 | ##         return (NA)
 86 | 
 87 | ##     ancestor1 <- getAncestors(ont)[[ID1]]
 88 | ##     ancestor2 <- getAncestors(ont)[[ID2]]
 89 | ##     if (ID1 == ID2) {
 90 | ##         commonAncestor <- ID1
 91 | ##     } else if (ID1 %in% ancestor2) {
 92 | ##         commonAncestor <- ID1
 93 | ##     } else if (ID2 %in% ancestor1) {
 94 | ##         commonAncestor <- ID2
 95 | ##     } else {
 96 | ##         commonAncestor <- intersect(ancestor1, ancestor2)
 97 | ##     }
 98 | ##     if (length(commonAncestor) == 0) return (NA)
 99 | 
100 | ##     ##Information Content of the most informative common ancestor (MICA)
101 | ##     mica <- max(IC[commonAncestor])/mic
102 | 
103 | ##     ## IC is biased
104 | ##     ## because the IC of a term is dependent of its children but not on its parents.
105 | ##     sim <- switch(method,
106 | ##                   Resnik = mica, ## Resnik does not consider how distant the terms are from their common ancestor.
107 | ##                   ## Lin and Jiang take that distance into account.
108 | ##                   Lin = 2*mica/(ic1+ic2),
109 | ##                   Jiang = 1 - min(1, -2*mica + ic1 + ic2),
110 | ##                   Rel = 2*mica/(ic1+ic2)*(1-exp(-mica*mic))  ## mica*mic equals to the original IC value. and exp(-mica*mic) equals to the probability of the term's occurence.
111 | ##                   )
112 | ##     return (sim)
113 | ## }
114 | 


--------------------------------------------------------------------------------
/R/OntDb.R:
--------------------------------------------------------------------------------
  1 | ##' @importClassesFrom AnnotationDbi AnnotationDb
  2 | ##' @importFrom methods setRefClass
  3 | setRefClass("OntDb", contains="AnnotationDb")
  4 | 
  5 | #' @importMethodsFrom AnnotationDbi keys
  6 | #' @importMethodsFrom AnnotationDbi toTable
  7 | setMethod("keys", "OntDb",
  8 |     function(x, keytype, ...){
  9 |         if(missing(keytype)) keytype <- "id"
 10 |         term <- toTable(x)
 11 |         term[, keytype]
 12 |     }
 13 | )
 14 | 
 15 | #' @importMethodsFrom AnnotationDbi keytypes
 16 | setMethod("keytypes", "OntDb",
 17 |     function(x) {
 18 |         c("id", "term")
 19 |     }
 20 | 
 21 | )
 22 | 
 23 | 
 24 | #' @importFrom DBI dbReadTable
 25 | setMethod("toTable", "OntDb",
 26 |     function(x) {
 27 |         dbReadTable(dbconn(x), 'term') |>
 28 |         setNames(c("id", "term"))
 29 |     }
 30 | )
 31 | 
 32 | 
 33 | 
 34 | #' @importMethodsFrom AnnotationDbi select
 35 | #' @importMethodsFrom AnnotationDbi dbconn
 36 | setMethod("select", "OntDb",
 37 |     function(x, keys, columns, keytype, ...){
 38 |         if (missing(keytype)) keytype <- "id"
 39 |         keytype <- match.arg(keytype, c("id","term"))
 40 |         strKeys <- paste0("\"", keys, "\"", collapse = ",")
 41 |         if (keytype == "term") {
 42 |             sql_key <- paste("SELECT doid FROM do_term WHERE term in (",
 43 |                 strKeys, ")")
 44 |             doids <- dbQuery(dbconn(x), sql_key)[, 1]
 45 |             strKeys <- paste0("\"", doids, "\"", collapse = ",")
 46 |         }
 47 |         columns <- unique(c("id", columns))
 48 | 
 49 |         sqls <- paste("SELECT ", paste(columns, collapse = ","),
 50 |             " FROM term")
 51 |         columns2 <- setdiff(columns, c("id", "term"))
 52 |         for (col in columns2) {
 53 |             leftJoin <- paste0("LEFT JOIN  ", col, " USING (id)")
 54 |             sqls <- c(sqls, leftJoin)
 55 |         }
 56 |         sqls <- c(sqls, paste0("WHERE term.id in (", strKeys, ")"))
 57 |         sqls <- paste(sqls, collapse = " ")
 58 |         res <- dbQuery(dbconn(x), sqls)
 59 |         res
 60 |     }
 61 | )
 62 | 
 63 | dbQuery <- getFromNamespace("dbQuery", "AnnotationDbi")
 64 | 
 65 | #' @importMethodsFrom AnnotationDbi columns
 66 | setMethod("columns", "OntDb",
 67 |     function(x) {
 68 |         c("id","term", "alias", "synonym", "parent", "children",
 69 |             "ancestor", "offspring")
 70 |     }
 71 | )
 72 | 
 73 | 
 74 | get_onto_data <- function(ont = "HDO", output='list', table="offspring") {
 75 |     x <- load_onto(ont)
 76 |     output <- match.arg(output, c("data.frame", "list"))
 77 |     res <- dbReadTable(dbconn(x), table)
 78 |     if (output == 'data.frame') return(res)
 79 | 
 80 |     # column 1 is ID, column 2 is the related term
 81 |     split(res[,2], res[,1]) 
 82 | }
 83 | 
 84 | #' @importFrom digest digest
 85 | #' @importFrom AnnotationDbi loadDb
 86 | #' @importFrom R.utils gunzip
 87 | load_onto <- function(onto = "HDO") {
 88 |     .env <- get_gosemsim_env()
 89 |     .onto <- sprintf(".onto_%s", onto)
 90 |     if (exists(.onto, envir=.env)) {
 91 |         db <- get(.onto, envir=.env)
 92 |         return(db)
 93 |     }
 94 | 
 95 |     dir <- rappdirs::user_data_dir("GOSemSim", appauthor=NULL)
 96 | 
 97 |     if (!dir.exists(dir)) dir.create(dir)
 98 | 
 99 |     dbfile0 <- sprintf("%s.sqlite", onto)
100 |     dbfile <- file.path(dir, dbfile0)
101 | 
102 |     if (file.exists(dbfile)) {
103 |         md5 <- read.delim('https://yulab-smu.top/DOSE/md5.txt', header=FALSE)
104 |         md5_remote <- md5[md5[,1] == dbfile0, 2]
105 |         md5_local <- digest::digest(dbfile, algo='md5', file=TRUE)
106 |         if (md5_remote != md5_local) {
107 |             msg <- sprintf("%s is outdated, download the latest version...\n", dbfile0)
108 |             cat(msg)
109 |             need_dl <- TRUE
110 |         } else {
111 |             need_dl <- FALSE
112 |         }
113 |     } else {
114 |         msg <- sprintf("%s is not found, download it online...\n", dbfile0)
115 |         cat(msg)
116 |         need_dl <- TRUE
117 |     }
118 | 
119 |     if (need_dl) {
120 |         url <- sprintf('https://yulab-smu.top/DOSE/%s.gz', dbfile0)
121 |         gzdbfile <- sprintf("%s.gz", dbfile)
122 |         yulab.utils:::mydownload(url, gzdbfile)
123 |         R.utils::gunzip(gzdbfile, overwrite = TRUE)
124 |     } 
125 | 
126 |     db <- loadDb(dbfile)
127 |     assign(.onto, db, envir = .env)
128 |     return(db)
129 | }
130 | 
131 | 


--------------------------------------------------------------------------------
/R/RcppExports.R:
--------------------------------------------------------------------------------
1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3 | 
4 | infoContentMethod_cpp <- function(id1_, id2_, anc_, ic_, method_, ont_) {
5 |     .Call('_GOSemSim_infoContentMethod_cpp', PACKAGE = 'GOSemSim', id1_, id2_, anc_, ic_, method_, ont_)
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/R/TCSSMethod.R:
--------------------------------------------------------------------------------
  1 | #' Method TCSS for semantic similarity measuring
  2 | #'
  3 | #' @param t1 term vector
  4 | #' @param t2 term vector
  5 | #' @param semData GOSemSimDATA object
  6 | #'
  7 | #' @return vector, similarity score for t1 and t2
  8 | #' @noRd
  9 | #'
 10 | #' @examples
 11 | #' library(org.Hs.eg.db)
 12 | #' semdata <- godata(org.Hs.eg.db,
 13 | #'   keytype = "ENTREZID", ont = "BP",
 14 | #'   computeIC = TRUE, processTCSS = TRUE, cutoff = NULL
 15 | #' )
 16 | #' termSim("GO:0000012", "GO:0009987", semdata, method = "TCSS")
 17 | tcssMethod <- function(t1, t2, semData) {
 18 |     matrix(mapply(tcssMethod_internal,
 19 |                   rep(t1, length(t2)),
 20 |                   rep(t2, each = length(t1)),
 21 |                   MoreArgs = list(semData = semData)),
 22 |     dimnames = list(t1, t2), ncol = length(t2)
 23 |     )
 24 | }
 25 | 
 26 | #' process one term with one term
 27 | #'
 28 | #' @param ID1 term
 29 | #' @param ID2 term
 30 | #' @param semData GOSemSimDATA object
 31 | #'
 32 | #' @return numeric, similarity score for ID1 and ID2
 33 | #' @noRd
 34 | #'
 35 | tcssMethod_internal <- function(ID1, ID2, semData) {
 36 |     tcssdata <- semData@tcssdata
 37 |     ont <- semData@ont
 38 | 
 39 |     if (length(tcssdata) == 0) {
 40 |         stop("tcssdata not found, please re-generate your `semData` with `tcssprocess = TRUE`...")
 41 |     }
 42 | 
 43 |     GOs <- names(tcssdata$clusid)
 44 | 
 45 |     if ((!ID1 %in% GOs) || (!ID2 %in% GOs)) return(NA)
 46 | 
 47 |     # get common ancestors
 48 |     com_anc <- ancestors_in_common(ID1 = ID1, ID2 = ID2, ont = ont)
 49 |     com_anc <- com_anc[com_anc %in% GOs]
 50 | 
 51 |     if (length(com_anc) == 0) {
 52 |         return(NA)
 53 |     }
 54 | 
 55 |     sim_value <- lapply(com_anc, function(x) {
 56 |         vapply(tcssdata$ica[tcssdata$clusid[[x]]], function(y) y[x], numeric(1))
 57 |     })
 58 |     sim_value <- unlist(sim_value)
 59 | 
 60 |     ## # belonged cluster-ids for each ID
 61 |     ## clus1_list <- tcssdata[[ID1]][["clusid"]]
 62 |     ## clus2_list <- tcssdata[[ID2]][["clusid"]]
 63 | 
 64 |     ## # calculate within different clusters
 65 |     ## sim_value <- unlist(mapply(calc_lca,
 66 |     ##                            rep(clus1_list, length(clus2_list)),
 67 |     ##                            rep(clus2_list, each = length(clus1_list)),
 68 |     ##                            MoreArgs = list(
 69 |     ##                                ID1 = ID1, ID2 = ID2,
 70 |     ##                                tcssdata = tcssdata,
 71 |     ##                                com_anc = com_anc, ont = ont
 72 |     ##                            )))
 73 | 
 74 |     if (is.null(sim_value) || length(sim_value) == 0) {
 75 |         return(NA)
 76 |     }
 77 |     # here max value means lowest common ancestor
 78 |     max(sim_value)
 79 | }
 80 | 
 81 | ## #' calculate lowest common ancestors' value
 82 | ## #'
 83 | ## #' @param clus1 character, cluster-id for ID1
 84 | ## #' @param clus2 character, cluster-id for ID2
 85 | ## #' @param ID1 term
 86 | ## #' @param ID2 term
 87 | ## #' @param tcssdata list, belonged clusters and its elements for all nodes
 88 | ## #' @param com_anc character, common ancestors
 89 | ## #' @param ont ontology
 90 | ## #'
 91 | ## #' @return numeric/NULL, similarity value for ID1 with clus1 and ID2 with clus2
 92 | ## #' @noRd
 93 | ## #'
 94 | ## calc_lca <- function(clus1, clus2, ID1, ID2, tcssdata, com_anc, ont) {
 95 | ##     # "meta" only represents meta-graph, do not have relations
 96 | ##     if (clus1 == "meta" || clus2 == "meta") {
 97 | ##         return(NULL)
 98 | ##     }
 99 | ##     # if the two clusters are the same one
100 | ##     if (identical(clus1, clus2)) {
101 | ##         # all cluster-nodes inside cluster
102 | ##         clus_content <- tcssdata[[clus1]]
103 | ##     } else {
104 | ##         # all cluster-nodes inside "meta" cluster
105 | ##         # common ancestors are from clus1 and clus2
106 | ##         clus_content <- tcssdata[["meta"]]
107 | ##         com_anc <- ancestors_in_common(ID1 = clus1, ID2 = clus2, ont = ont)
108 | ##     }
109 | 
110 | ##     # get common ancestors' position, nomatch helps not to introduce NA
111 | ##     com_anc_pos <- match(com_anc, clus_content[["GO"]], nomatch = 0)
112 | ##     # common ancestors' ica value is all the possible sim value
113 | ##     sim_value <- clus_content[["ica"]][com_anc_pos]
114 | 
115 | ##     if (is.null(sim_value) || length(sim_value) == 0) {
116 | ##         return(NULL)
117 | ##     }
118 | ##     # here max similarity value means one of lowest common ancestors
119 | ##     max(sim_value)
120 | ## }
121 | 
122 | #' collect common ancestors
123 | #'
124 | #' @param ID1 term
125 | #' @param ID2 term
126 | #' @param ont ontology
127 | #'
128 | #' @return character, common ancestors for ID1 and ID2
129 | #' @noRd
130 | #'
131 | ancestors_in_common <- function(ID1, ID2, ont) {
132 |     ancestor1 <- ancestors_envir(ID1, ont)
133 |     ancestor2 <- ancestors_envir(ID2, ont)
134 | 
135 |     if (ID1 == ID2 || ID1 %in% ancestor2) {
136 |         return(ID1)
137 |     }
138 | 
139 |     if (ID2 %in% ancestor1) {
140 |         return(ID2)
141 |     }
142 | 
143 |     setdiff(intersect(ancestor1, ancestor2), "all")
144 | }
145 | 
146 | #' get ancestors from environment
147 | #'
148 | #' @param ID term
149 | #' @param ont ontology
150 | #'
151 | #' @return ancestors for ID
152 | #' @noRd
153 | ancestors_envir <- function(ID, ont) {
154 |     if (!exists(".ancCache")) .initial()
155 |     .ancCache <- get(".ancCache", envir = .GlobalEnv)
156 | 
157 |     if (exists(ID, envir = .ancCache)) {
158 |         return(get(ID, envir = .ancCache))
159 |     }
160 |     ancestors <- getAncestors(ont)[[ID]]
161 |     assign(ID, ancestors, envir = .ancCache)
162 |     return(ancestors)
163 | }
164 | 


--------------------------------------------------------------------------------
/R/TCSScutoff.R:
--------------------------------------------------------------------------------
  1 | #' determine the topological cutoff for TCSS method
  2 | #'
  3 | #' @param OrgDb OrgDb object
  4 | #' @param keytype keytype
  5 | #' @param ont ontology : "BP", "MF", "CC"
  6 | #' @param combine_method "max", "BMA", "avg", "rcmax", "rcmax.avg"
  7 | #' @param ppidata A data.frame contains positive set and negative set.
  8 | #' Positive set is PPI pairs that already verified.
  9 | #' ppidata has three columns, column 1 and 2 are character, column 3
 10 | #' must be logical value:TRUE/FALSE.
 11 | #'
 12 | #' @return numeric, topological cutoff for given parameters
 13 | #' @export
 14 | #'
 15 | #' @examples
 16 | #' \dontrun{
 17 | #'     library(org.Hs.eg.db)
 18 | #'     library(STRINGdb)
 19 | #'
 20 | #'     string_db <- STRINGdb$new(version = "11.0", species = 9606,
 21 | #'     score_threshold = 700)
 22 | #'     string_proteins <- string_db$get_proteins()
 23 | #'
 24 | #'     #get relationship
 25 | #'     ppi <- string_db$get_interactions(string_proteins$protein_external_id)
 26 | #'
 27 | #'     ppi$from <- vapply(ppi$from, function(e)
 28 | #'                        strsplit(e, "9606.")[[1]][2], character(1))
 29 | #'     ppi$to <- vapply(ppi$to, function(e)
 30 | #'                        strsplit(e, "9606.")[[1]][2], character(1))
 31 | #'     len <- nrow(ppi)
 32 | #'
 33 | #'     #select length
 34 | #'     s_len <- 100
 35 | #'     pos_1 <- sample(len, s_len, replace = T)
 36 | #'     #negative set
 37 | #'     pos_2 <- sample(len, s_len, replace = T)
 38 | #'     pos_3 <- sample(len, s_len, replace = T)
 39 | #'     #union as ppidata
 40 | #'     ppidata <- data.frame(pro1 = c(ppi$from[pos_1], ppi$from[pos_2]),
 41 | #'      pro2 = c(ppi$to[pos_1], ppi$to[pos_3]),
 42 | #'      label = c(rep(TRUE, s_len), rep(FALSE, s_len)),
 43 | #'      stringsAsFactors = FALSE)
 44 | #'
 45 | #'     cutoff <- tcss_cutoff(OrgDb = org.Hs.eg.db, keytype = "ENSEMBLPROT",
 46 | #'     ont = "BP", combine_method = "max", ppidata)
 47 | #' }
 48 | tcss_cutoff <- function(OrgDb = NULL, keytype = "ENTREZID", ont,
 49 |                         combine_method = "max", ppidata) {
 50 | 
 51 |   semdata <- godata(OrgDb, keytype = keytype, ont = ont, computeIC = TRUE,
 52 |                     processTCSS = FALSE, cutoff = NULL)
 53 |   #cutoff is in the range of ICT value
 54 |   IC <- semdata@IC
 55 |   GO <- names(IC[!is.infinite(IC)])
 56 |   offspring <- getOffsprings(ont) 
 57 |   
 58 |   #compute ICT value for each term
 59 |   ICT <- computeICT(GO, offspring)
 60 |   #cutoffs, all possible cutoff values
 61 |   cutoffs <- seq(0.1, max(ICT) + 0.1, by = 0.1)
 62 |   #all genes/proteins that have none-zero annotations
 63 |   all_pro <- unique(semdata@geneAnno[, keytype])
 64 |   #filter the ppidata
 65 |   filtered_ppidata <- create_filtered_ppidata(all_pro, ppidata = ppidata)
 66 |   #calculate the similarity value for filtered_ppidata
 67 |   predict_result <- lapply(cutoffs, computePre,
 68 |                            filtered_ppidata = filtered_ppidata,
 69 |                            semdata = semdata,
 70 |                            combine_method = combine_method)
 71 | 
 72 |   #calculate the auc and F1_score
 73 |   auc_F1_score <- calc_auc_F1_score(predict_result,
 74 |                                     filtered_ppidata = filtered_ppidata)
 75 |   #decide the most appropriate cutoff
 76 |   decide_cutoff(auc_F1_score, cutoffs = cutoffs)
 77 | }
 78 | 
 79 | #' keep the proteins with none-zero annotations
 80 | #'
 81 | #' @param all_pro all proteins that have none-zero annotations
 82 | #' @param ppidata data.frame, already verified PPI data
 83 | #' @importFrom stats na.omit
 84 | #'
 85 | #' @return data.frame, annotated protein pairs and their labels
 86 | #' @noRd
 87 | create_filtered_ppidata <- function(all_pro, ppidata) {
 88 |   #check data type
 89 |   if (!(is.character(ppidata[, 1]) && is.character(ppidata[, 2]) &&
 90 |         is.logical(ppidata[, 3]))) {
 91 |     stop("ppidata must be a data.frame with three columns:character, character, logical")
 92 |   }
 93 | 
 94 |   ppidata <- na.omit(ppidata)
 95 | 
 96 |   #remove proteins that have zero annotations
 97 |   len1 <- ppidata[, 1] %in% all_pro
 98 |   len2 <- ppidata[, 2] %in% all_pro
 99 |   ppidata_exist <- ppidata[len1 & len2, ]
100 |   filtered_ppidata <- unique(ppidata_exist)
101 | 
102 |   len <- dim(filtered_ppidata)[1]
103 | 
104 |   if (len == 0) {
105 |     stop("filtered ppidata is empty, none items have GO annotation. Please input more data.")
106 |   }
107 | 
108 |   nTrue <- sum(filtered_ppidata[, 3])
109 |   nFalse <- len - nTrue
110 | 
111 |   if (nTrue == len || nFalse == len) {
112 |     stop("The filtered ppidata lacks the necessary label:TRUE and FALSE. Please input more data.")
113 |   }
114 | 
115 |   message(paste("positive set has", nTrue,
116 |                 "PPI pairs, negative set has", nFalse, "PPI pairs"))
117 | 
118 |   return(filtered_ppidata)
119 | }
120 | 
121 | #' compute prediction value on filtered ppidata
122 | #'
123 | #' @param cutoff numeric, topological cutoff
124 | #' @param filtered_ppidata data.frame, annotated protein pairs and their labels
125 | #' @param semdata GOSemSimDATA object
126 | #' @param combine_method "max" "BMA", "avg", "rcmax", "rcmax.avg"
127 | #' @return list, the prediction value for the input cutoff
128 | #' @noRd
129 | #'
130 | computePre <- function(cutoff, filtered_ppidata, semdata,
131 |                        combine_method) {
132 |   #tcssdata is updated with this input cutoff
133 |   tcssdata <- process_tcss(semdata@ont, semdata@IC, cutoff = cutoff)
134 | 
135 |   semdata@tcssdata <- tcssdata
136 |   #similarity value is calculated with the semdata
137 |   mapply(geneSim, MoreArgs = list(semData = semdata,
138 |                                   measure = "TCSS",
139 |                                   combine = combine_method,
140 |                                   drop = FALSE),
141 |          filtered_ppidata[, 1], filtered_ppidata[, 2])
142 | }
143 | 
144 | #' calculate auc and F1-score
145 | #'
146 | #' @param predict_result list, prediction value for all cutoffs
147 | #' @param filtered_ppidata data.frame, annotated protein pairs and their labels
148 | #' @return data.frame, auc and F1-score value for different cutoffs
149 | #' @importFrom methods slot
150 | #' @importFrom rlang check_installed
151 | #' @noRd
152 | #'
153 | calc_auc_F1_score <- function(predict_result, filtered_ppidata) {
154 |   # checking whether package 'ROCR' being installed
155 |   check_installed('ROCR', 'for`calc_auc_F1_score()`.')
156 |   # the label for PPIs, TRUE/FALSE
157 |   label <- filtered_ppidata[, 3]
158 |   #geneSim returns one value and two characters in once calculation
159 |   value_pos <- seq(from = 1, to = length(label) * 3, by = 3)
160 |   #just the similarity value
161 |   pre_value <- lapply(predict_result, function(p) as.numeric(p[value_pos]))
162 |   #returned value may contains NA
163 |   pos_stay <- !is.na(pre_value[[1]])
164 |   label <- label[pos_stay]
165 | 
166 |   # prediction object
167 |   pred <- lapply(pre_value, function(e) {
168 |     ROCR::prediction(e[pos_stay], label,
169 |                      label.ordering = c(FALSE, TRUE)
170 |     )
171 |   })
172 |   # performance object for auc
173 |   perf_auc <- lapply(pred, ROCR::performance, measure = "auc")
174 |   # auc value
175 |   auc <- unlist(lapply(perf_auc, function(e) slot(e, "y.values")[[1]]))
176 |   # performance object for F1-score
177 |   perf_F1_score <- lapply(pred, ROCR::performance, measure = "f")
178 |   # F1-score value, average value at different semantic similarity cutoffs
179 |   F1_score <- unlist(lapply(perf_F1_score, function(e) {
180 |     mean(slot(e, "y.values")[[1]], na.rm = TRUE)
181 |   }))
182 | 
183 |   #return as data.frame
184 |   return(data.frame(auc = auc,
185 |                     F1_score = F1_score,
186 |                     stringsAsFactors = F))
187 | }
188 | 
189 | #' select the most appropriate cutoff
190 | #'
191 | #' @param auc_F1_score data.frame, auc and F1-score value for different cutoffs
192 | #' @param cutoffs vector, all possible cutoff values
193 | #' @return vector, topological cutoff for given parameters
194 | #' @noRd
195 | #'
196 | decide_cutoff <- function(auc_F1_score, cutoffs) {
197 |   #product value satisfies the "both maximized" requirement
198 |   auc_mutiply_F1 <- auc_F1_score[, "auc"] * auc_F1_score[, "F1_score"]
199 |   #get the max product value
200 |   pos <- which(auc_mutiply_F1 == max(auc_mutiply_F1))
201 | 
202 |   if (length(pos) == 1)  return(cutoffs[pos])
203 | 
204 |   #if not only one pair of auc and F1-score have same product
205 |   #take the one with larger auc
206 |   select_auc <- auc_F1_score[pos, "auc"]
207 | 
208 |   auc_pos <- which(select_auc == max(select_auc))
209 | 
210 |   if (length(auc_pos) == 1) return(cutoffs[pos[auc_pos]])
211 | 
212 |   #if more than one pair of auc and F1-score are both same
213 |   #take the smaller cutoff for time saving
214 |   return(cutoffs[pos[min(auc_pos)]])
215 | }
216 | 


--------------------------------------------------------------------------------
/R/WangMethod.R:
--------------------------------------------------------------------------------
  1 | wangMethod <- function(t1, t2, ont) {
  2 |     matrix( mapply( wangMethod_internal,
  3 |                    rep( t1, length(t2) ),
  4 |                    rep( t2, each=length(t1) ),
  5 |                    MoreArgs = list( ont = ont ) ),
  6 |            dimnames = list( t1, t2 ), ncol=length(t2) ) 
  7 | }
  8 | 
  9 | 
 10 | ##' Method Wang for semantic similarity measuring
 11 | ##'
 12 | ##'
 13 | ##' @title wangMethod
 14 | ##' @param ID1 Ontology Term
 15 | ##' @param ID2 Ontology Term
 16 | ##' @param ont Ontology
 17 | ##' @return semantic similarity score
 18 | ##' @author Guangchuang Yu \url{https://yulab-smu.top}
 19 | wangMethod_internal <- function(ID1, ID2, ont="BP") {
 20 |     if (ID1 == ID2)
 21 |         return (sim=1)
 22 | 
 23 |     if (ont %in% c("BP", "CC", "MF")) {
 24 |         .GOSemSimEnv <- get_gosemsim_env()
 25 |         rel_df <- get("gotbl", envir=.GOSemSimEnv)
 26 |     } else if (ont %in% c("HDO", "HPO", "MPO")) {
 27 |         rel_df <- get_rel_df(ont)
 28 |     } else {
 29 |         .meshesEnv <- get(".meshesEnv", envir=.GlobalEnv)
 30 |         rel_df <- get("meshtbl", envir=.meshesEnv)
 31 |     } 
 32 |     
 33 |     
 34 |     sv.a <- getSV(ID1, ont, rel_df)
 35 |     sv.b <- getSV(ID2, ont, rel_df)
 36 | 
 37 |     if(all(is.na(sv.a)) || all(is.na(sv.b)))
 38 |         return (NA)
 39 | 
 40 |     idx         <- intersect(names(sv.a), names(sv.b))
 41 |     inter.sva   <- sv.a[idx]
 42 |     inter.svb   <- sv.b[idx]
 43 |     if (is.null(inter.sva) ||
 44 |         is.null(inter.svb) ||
 45 |         length(inter.sva) == 0 ||
 46 |         length(inter.svb) ==0) {
 47 |         return (NA)
 48 |     } 
 49 |     
 50 |     sim <- sum(inter.sva,inter.svb) / sum(sv.a, sv.b)
 51 |     return(sim)
 52 | }
 53 | 
 54 | get_rel_df <- function(ont) {
 55 |     ontbl <- sprintf("%stbl", ont)
 56 |     .GOSemSimEnv <- get_gosemsim_env()
 57 | 
 58 |     if (exists(ontbl, envir=.GOSemSimEnv)) {
 59 |         res <- get(ontbl, envir=.GOSemSimEnv)
 60 |         return(res)
 61 |     }
 62 | 
 63 |     ont_db <- load_onto(ont)
 64 |     gtb <- toTable(ont_db)
 65 |     gtb <- gtb[,1, drop=FALSE]
 66 |     gtb <- unique(gtb)
 67 | 
 68 |     id <- gtb$id
 69 |     parent <- getParents(ont)
 70 |     pid <- parent[id]
 71 |     cid <- rep(names(pid), times=sapply(pid, length))
 72 | 
 73 |     ptb <- data.frame(id=cid,
 74 |                       relationship = 'other',
 75 |                       parent = unlist(pid),
 76 |                       Ontology = ont,
 77 |                       stringsAsFactors = FALSE)
 78 | 
 79 |     rel_df <- merge(gtb, ptb, by="id")
 80 |     rel_df <- rel_df[!is.na(rel_df$id), ]
 81 |     rel_df <- rel_df[!is.na(rel_df$parent), ]
 82 | 
 83 |     assign(ontbl, rel_df, envir = .GOSemSimEnv)
 84 |     return(rel_df)
 85 | }
 86 | 
 87 | 
 88 | getSV <- function(ID, ont, rel_df, weight=NULL) {
 89 |     if (!exists(".SemSimCache")) .initial()
 90 |     .SemSimCache <- get(".SemSimCache", envir=.GlobalEnv)
 91 |     
 92 |     if( exists(ID, envir=.SemSimCache) ) {
 93 |         sv <- get(ID, envir=.SemSimCache)
 94 |         return(sv)
 95 |     }
 96 | 
 97 |     if (ont == "HDO") {
 98 |         topNode <- "DOID:4"
 99 |     } else if (ont == "MPO") {
100 |        topNode <- "MP:0000001"
101 |     } else {
102 |         topNode <- "all"
103 |     }
104 |     
105 |     if (ID == topNode) {
106 |         sv <- 1
107 |         names(sv) <- topNode
108 |         return (sv)
109 |     }
110 |     
111 |     if (is.null(weight)) {
112 |         weight <- c(0.8, 0.6, 0.7)
113 |         names(weight) <- c("is_a", "part_of", "other")
114 |     }
115 | 
116 |     rel_df <- rel_df[rel_df$Ontology == ont,]
117 |     if (! 'relationship' %in% colnames(rel_df))
118 |         rel_df$relationship <- "other"
119 |     
120 |     rel_df$relationship[!rel_df$relationship %in% c("is_a", "part_of")] <- "other"
121 |     
122 | 
123 |     sv <- 1
124 |     names(sv) <- ID
125 |     allid <- ID
126 | 
127 |     idx <- which(rel_df[,1] %in% ID)
128 |     while (length(idx) != 0) {
129 |         p <- rel_df[idx,]
130 |         pid <- p$parent
131 |         allid <- c(allid, pid)
132 |         
133 |         sv <- c(sv, weight[p$relationship]*sv[p[,1]])
134 |         names(sv) <- allid
135 |         idx <- which(rel_df[,1] %in% pid)
136 |     }
137 | 
138 |     sv <- sv[!is.na(names(sv))]
139 |     sv <- sv[!duplicated(names(sv))]
140 | 
141 |     if (!(ont %in% c("DO", "MPO")))
142 |         sv[topNode] <- 0
143 | 
144 |     if( ! exists(ID, envir=.SemSimCache) ) {
145 |         assign(ID,
146 |                sv,
147 |                envir=.SemSimCache)
148 |     }
149 |     
150 |     return(sv)
151 | }
152 | 
153 | 


--------------------------------------------------------------------------------
/R/buildGOmap.R:
--------------------------------------------------------------------------------
 1 | ##' Addding indirect GO annotation
 2 | ##'
 3 | ##' provided by a data.frame of GO TERM (column 1), GENE (column 2) and ONTOLOGY (optional) that
 4 | ##' describes GO direct annotation, 
 5 | ##' this function will add indirect GO annotation of genes.
 6 | ##' @title buildGOmap
 7 | ##' @param TERM2GENE data.frame with two or three columns of GO TERM, GENE and ONTOLOGY (optional)
 8 | ##' @return data.frame, GO annotation with direct and indirect annotation
 9 | ##' @importMethodsFrom AnnotationDbi as.list
10 | ##' @importFrom GO.db GOMFANCESTOR
11 | ##' @importFrom GO.db GOBPANCESTOR
12 | ##' @importFrom GO.db GOCCANCESTOR
13 | ##' @export
14 | ##' @author Yu Guangchuang
15 | buildGOmap <- function(TERM2GENE) {
16 |     mfanc <- as.list(GOMFANCESTOR)
17 |     ccanc <- as.list(GOCCANCESTOR)
18 |     bpanc <- as.list(GOBPANCESTOR)
19 | 
20 |     if (!'ONTOLOGY' %in% names(TERM2GENE)) {
21 |         anc <- c(mfanc, ccanc, bpanc)
22 |         res <- buildGOmap_internal(TERM2GENE, anc)
23 |         return(res)
24 |     }
25 | 
26 |     anc <- list(MF=mfanc, CC=ccanc, BP=bpanc)
27 |     y <- split(TERM2GENE, TERM2GENE$ONTOLOGY)
28 | 
29 |     res <- lapply(names(y), function(i) {
30 |         d <- buildGOmap_internal(y[[i]], anc[[i]])
31 |         d$ONTOLOGY <- i
32 |         return(d)
33 |     }) |> do.call('rbind', args = _)
34 | 
35 |     return(res)
36 | }
37 | 
38 | ##' @importFrom stats setNames
39 | ##' @importFrom yulab.utils ls2df
40 | buildGOmap_internal <- function(TERM2GENE, anc) {
41 |     res <- setNames(anc[TERM2GENE[,1]], TERM2GENE[,2]) |> 
42 |         ls2df() |>
43 |         unique()
44 | 
45 |     res <- setNames(res[, c(2,1)], names(TERM2GENE)[1:2])
46 |     res <- res[res[,1] != "all", ]
47 |     res <- rbind(TERM2GENE[,1:2], res)
48 |     return(res)
49 | }
50 | 
51 | # old and slow version
52 | ##' @importMethodsFrom AnnotationDbi mget
53 | ##' @importFrom utils stack
54 | buildGOmap2 <- function(gomap) {
55 | 
56 |     ## remove empty GO annotation
57 |     gomap <- gomap[gomap[,1] != "", ]
58 |     
59 |     Gene2GO <- split(as.character(gomap[,1]), as.character(gomap[,2]))
60 | 
61 |     Gene2ALLGO <- lapply(Gene2GO,
62 |                          function(i) {
63 |                              mfans <- unlist(mget(i, GOMFANCESTOR, ifnotfound=NA))
64 |                              bpans <- unlist(mget(i, GOBPANCESTOR, ifnotfound=NA))
65 |                              ccans <- unlist(mget(i, GOCCANCESTOR, ifnotfound=NA))
66 |                              ans <- c(mfans, bpans, ccans)
67 |                              ans <- ans[ !is.na(ans) ]
68 |                              ans <- c(i, ans)
69 |                              ans <- unique(ans)
70 |                              ans <- ans[ans != "all"]
71 |                              return(ans)
72 |                          })
73 | 
74 |     ## AMF <- as.list(GOMFANCESTOR)
75 |     ## ACC <- as.list(GOCCANCESTOR)
76 |     ## ABP <- as.list(GOBPANCESTOR)
77 | 
78 |     ## Gene2ALLGO <- lapply(Gene2GO, function(i) {
79 |     ##     mfans <- AMF[i]
80 |     ##     bpans <- ABP[i]
81 |     ##     ccans <- ACC[i]
82 |     ##     ans <- unlist(c(mfans,  bpans,  ccans))
83 |     ##     ans <- ans[ !is.na(ans) ]
84 |     ##     ans <- c(i, ans)
85 |     ##     ans <- unique(ans)
86 |     ##     ans <- ans[ans != "all"]
87 |     ##     return(ans)
88 |     ## })
89 | 
90 |     go2gene <- stack(Gene2ALLGO)
91 |     colnames(go2gene) <- c("GO", "Gene")
92 |     
93 |     return(go2gene)
94 | }
95 | 


--------------------------------------------------------------------------------
/R/clusterSim.R:
--------------------------------------------------------------------------------
 1 | ##'Semantic Similarity Between Two Gene Clusters
 2 | ##'
 3 | ##'Given two gene clusters, this function calculates semantic similarity between
 4 | ##'them.
 5 | ##'
 6 | ##'
 7 | ##'@param cluster1 A set of gene IDs.
 8 | ##'@param cluster2 Another set of gene IDs.
 9 | ##'@param semData GOSemSimDATA object
10 | ##'@param measure One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.
11 | ##'@param drop A set of evidence codes based on which certain annotations are
12 | ##'dropped. Use NULL to keep all GO annotations.
13 | ##'@param combine One of "max", "avg", "rcmax", "BMA" methods, for combining
14 | ##'semantic similarity scores of multiple GO terms associated with protein or
15 | ##'multiple proteins assiciated with protein cluster.
16 | ##'@return similarity
17 | ##'@seealso \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
18 | ##'\code{\link{mgeneSim}} \code{\link{mclusterSim}}
19 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
20 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
21 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
22 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
23 | ##'PMID: 20179076
24 | ##'
25 | ##'@keywords manip
26 | ##' @export
27 | ##'@examples
28 | ##'
29 | ##'     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
30 | ##'     cluster1 <- c("835", "5261","241", "994")
31 | ##'	cluster2 <- c("307", "308", "317", "321", "506", "540", "378", "388", "396")
32 | ##'	clusterSim(cluster1, cluster2, semData=d, measure="Wang")
33 | ##'
34 | clusterSim <- function(cluster1, cluster2, semData, measure="Wang", drop="IEA", combine="BMA"){
35 |     cgo1 <- sapply(cluster1, gene2GO, semData, dropCodes=drop)
36 |     cgo2 <- sapply(cluster2, gene2GO, semData, dropCodes=drop)
37 |     cgo1 <- unlist(cgo1)
38 |     cgo2 <- unlist(cgo2)
39 |     res <- mgoSim(cgo1, cgo2, semData, measure=measure, combine=combine)
40 |     return(res)
41 | }
42 | 


--------------------------------------------------------------------------------
/R/combineMethods.R:
--------------------------------------------------------------------------------
 1 | ##'combining similarity matrix to similarity score
 2 | ##'
 3 | ##'Functions for combining similarity matrix to similarity score
 4 | ##'
 5 | ##'
 6 | ##'@param SimScores similarity matrix
 7 | ##'@param combine combine method
 8 | ##'@return similarity value
 9 | ##'@export
10 | ##'@author Guangchuang Yu \url{http://guangchuangyu.github.io}
11 | combineScores <- function(SimScores, combine) {
12 | 
13 |     if (length(combine) == 0) {  #if not define combine
14 |         return(round(SimScores, digits=3))
15 |     }
16 | 
17 |     ## if combine was defined...
18 |     if(!sum(!is.na(SimScores))) return (NA)
19 | 
20 |     if (is.vector(SimScores) || nrow(SimScores)==1 || ncol(SimScores)==1) {
21 |         if (combine == "avg") {
22 |             return(round(mean(SimScores, na.rm=TRUE), digits=3))
23 |         } else {
24 |             return (round(max(SimScores, na.rm=TRUE), digits=3))
25 |         }
26 |     }
27 | 
28 | 
29 |     row.na.idx <- apply(SimScores, 1, function(i) all(is.na(i)))
30 |     if (any(row.na.idx)) {
31 |         SimScores <- SimScores[-which(row.na.idx), ]
32 |     }
33 | 
34 |     if (! is.null(dim(SimScores)) ) {
35 |         col.na.idx <- apply(SimScores, 2, function(i) all(is.na(i)))
36 |         if (any(col.na.idx)) {
37 |             SimScores <- SimScores[ , -which(col.na.idx)]
38 |         }
39 |     }
40 |     if (is.vector(SimScores) || nrow(SimScores)==1 || ncol(SimScores)==1) {
41 |         if (combine == "avg") {
42 |             return(round(mean(SimScores, na.rm=TRUE), digits=3))
43 |         } else {
44 |             return (round(max(SimScores, na.rm=TRUE), digits=3))
45 |         }
46 |     }
47 | 
48 |     if (combine        == "avg") {
49 |         result   <- mean(SimScores, na.rm=TRUE)
50 |     } else if (combine == "max") {
51 |         result   <- max(SimScores, na.rm=TRUE)
52 |     } else if (combine == "rcmax") {
53 |         rowScore <- mean(apply(SimScores, 1, max, na.rm=TRUE))
54 |         colScore <- mean(apply(SimScores, 2, max, na.rm=TRUE))
55 |         result   <- max(rowScore, colScore)
56 |     } else if (combine == "rcmax.avg" || combine == "BMA") {
57 |         result   <- sum( apply(SimScores, 1, max, na.rm=TRUE),
58 |                         apply(SimScores, 2, max, na.rm=TRUE)
59 |                         ) / sum(dim(SimScores))
60 |     }
61 | 
62 |     return (round(result, digits=3))
63 | }
64 | 


--------------------------------------------------------------------------------
/R/computeIC.R:
--------------------------------------------------------------------------------
 1 | computeIC <- function(goAnno, ont) {
 2 |     ## goAnno, see godata function
 3 |     if (!exists(".GOSemSimEnv")) .initial()
 4 |     .GOSemSimEnv <- get(".GOSemSimEnv", envir=.GlobalEnv)
 5 |     godata <- get("gotbl", envir=.GOSemSimEnv)        
 6 |     
 7 |     goids <- unique(godata[godata$Ontology == ont, "go_id"])
 8 |     ## all GO terms appearing in an given ontology ###########
 9 |     goterms=goAnno$GO
10 |     gocount <- table(goterms)
11 |     ## goid of specific organism and selected category.
12 |     goname  <- names(gocount) 
13 | 
14 |     ## ensure goterms not appearing in the specific annotation have 0 frequency..
15 |     go.diff        <- setdiff(goids, goname)
16 |     m              <- double(length(go.diff))
17 |     names(m)       <- go.diff
18 |     gocount        <- as.vector(gocount)
19 |     names(gocount) <- goname
20 |     gocount        <- c(gocount, m)
21 | 
22 |     Offsprings <- getOffsprings(ont) 
23 |         
24 |     cnt <- gocount[goids] + sapply(goids, function(i) sum(gocount[Offsprings[[i]]], na.rm=TRUE))
25 |     names(cnt) <- goids
26 |     
27 |     ## the probabilities of occurrence of GO terms in a specific corpus.
28 |     p <- cnt/sum(gocount)
29 |     ## IC of GO terms was quantified as the negative log likelihood.
30 |     IC <- -log(p)
31 |     return(IC)
32 | }
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/R/gene2GO.R:
--------------------------------------------------------------------------------
 1 | gene2GO <- function(gene, godata, dropCodes) {
 2 |     goAnno <- godata@geneAnno
 3 |     if (! "EVIDENCE" %in% colnames(goAnno)) {
 4 |         warning("Evidence codes not found, 'drop' parameter will be ignored...")
 5 |     } else {
 6 |         goAnno <- goAnno[!goAnno$EVIDENCE %in% dropCodes,]
 7 |     }
 8 |     go <- as.character(unique(goAnno[goAnno[,1] == gene, "GO"]))
 9 |     go[!is.na(go)]
10 | }
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/R/geneSim.R:
--------------------------------------------------------------------------------
 1 | ##'Semantic Similarity Between two Genes
 2 | ##'
 3 | ##'Given two genes, this function will calculate the semantic similarity between
 4 | ##'them, and return their semantic similarity and the corresponding GO terms
 5 | ##'
 6 | ##'
 7 | ##'@param gene1 Entrez gene id.
 8 | ##'@param gene2 Another entrez gene id.
 9 | ##'@param semData GOSemSimDATA object
10 | ##'@param measure One of "Resnik", "Lin", "Rel", "Jiang" "TCSS" and "Wang" methods.
11 | ##'@param drop A set of evidence codes based on which certain annotations are
12 | ##'dropped. Use NULL to keep all GO annotations.
13 | ##'@param combine One of "max", "avg", "rcmax", "BMA" methods, for combining
14 | ##'semantic similarity scores of multiple GO terms associated with protein or
15 | ##'multiple proteins assiciated with protein cluster.
16 | ##'@return list of similarity value and corresponding GO.
17 | ##'@seealso \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{mgeneSim}}
18 | ##'\code{\link{clusterSim}} \code{\link{mclusterSim}}
19 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
20 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
21 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
22 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
23 | ##'PMID: 20179076
24 | ##'
25 | ##'@keywords manip
26 | ##' @export
27 | ##'@examples
28 | ##'
29 | ##' d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
30 | ##'	geneSim("241", "251", semData=d, measure="Wang")
31 | ##'
32 | geneSim <- function(gene1, gene2, semData, measure="Wang", drop="IEA", combine="BMA"){
33 |     go1 <- gene2GO(gene1, semData, dropCodes=drop)
34 |     go2 <- gene2GO(gene2, semData, dropCodes=drop)
35 |     if (length(go1) == 0 || length(go2) == 0)
36 |         return (NA)
37 |     res <- mgoSim(go1, go2, semData=semData, measure=measure, combine=combine)
38 |     return (list(geneSim=res, GO1=go1, GO2=go2))
39 | }
40 | 
41 | 


--------------------------------------------------------------------------------
/R/goSim.R:
--------------------------------------------------------------------------------
 1 | ##' Semantic Similarity Between Two GO Terms
 2 | ##'
 3 | ##' Given two GO IDs, this function calculates their semantic similarity.
 4 | ##'
 5 | ##'
 6 | ##' @param GOID1 GO ID 1.
 7 | ##' @param GOID2 GO ID 2.
 8 | ##' @param semData GOSemSimDATA object
 9 | ##' @param measure One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.
10 | ##' @return similarity
11 | ##' @seealso \code{\link{mgoSim}}
12 | ##' \code{\link{geneSim}}
13 | ##' \code{\link{mgeneSim}}
14 | ##' \code{\link{clusterSim}}
15 | ##' \code{\link{mclusterSim}}
16 | ##' @references Yu et al. (2010) GOSemSim: an R package for measuring semantic
17 | ##' similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
18 | ##' England), 26:7 976--978, April 2010. ISSN 1367-4803
19 | ##' \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
20 | ##' PMID: 20179076
21 | ##' @keywords manip
22 | ##' @export
23 | ##' @examples
24 | ##' 
25 | ##'     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
26 | ##'	goSim("GO:0004022", "GO:0005515", semData=d, measure="Wang")
27 | ##' 
28 | goSim <- function(GOID1, GOID2, semData, measure="Wang") {
29 |     res <- termSim(GOID1, GOID2, semData, method=measure)
30 |     res <- as.numeric(res)
31 |     return(round(res,digits=3))
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/R/godata.R:
--------------------------------------------------------------------------------
  1 | ##' prepare GO DATA for measuring semantic similarity
  2 | ##'
  3 | ##'
  4 | ##' @title godata
  5 | ##' @param OrgDb OrgDb object (will be removed in future, please use annoDb instead)
  6 | ##' @param annoDb GO annotation database, 
  7 | ##' can be OrgDb or a data.frame contains three columns of 'GENE', 'GO' and 'ONTOLOGY'.
  8 | ##' @param keytype keytype
  9 | ##' @param ont one of 'BP', 'MF', 'CC'
 10 | ##' @param computeIC logical, whether computer IC
 11 | ##' @param processTCSS logical, whether to process TCSS
 12 | ##' @param cutoff cutoff of TCSS
 13 | ##' @return GOSemSimDATA object
 14 | ##' @importFrom AnnotationDbi keys
 15 | ##' @importFrom AnnotationDbi select
 16 | ##' @importFrom AnnotationDbi metadata
 17 | ##' @importFrom methods new
 18 | ##' @export
 19 | ##' @author Guangchuang Yu
 20 | godata <- function(OrgDb = NULL, annoDb = NULL, keytype = "ENTREZID",
 21 |                    ont, computeIC = TRUE,
 22 |                    processTCSS = FALSE, cutoff = NULL) {
 23 |     if (processTCSS) computeIC <- TRUE
 24 | 
 25 |     ont <- toupper(ont)
 26 |     ont <- match.arg(ont, c("BP", "CC", "MF"))
 27 | 
 28 |     if (is.null(OrgDb) && is.null(annoDb)) {
 29 |         return(new("GOSemSimDATA",
 30 |         ont = ont
 31 |         ))
 32 |     }
 33 | 
 34 |     if (!is.null(OrgDb)) {
 35 |       warning("use 'annoDb' instead of 'OrgDb'")
 36 |       annoDb <- OrgDb
 37 |     }
 38 |     if (is.character(annoDb)) {
 39 |       annoDb <- load_OrgDb(annoDb) 
 40 |     }
 41 | 
 42 |     md <- data.frame()
 43 |     if (inherits(annoDb, 'OrgDb')) {
 44 |       goAnno <- parse_orgDb(annoDb, keytype)
 45 |       md <- metadata(annoDb)
 46 |     } else if (inherits(annoDb, 'gson')) {
 47 |       ## to be supported
 48 |     } else { # for data.frame
 49 |       goAnno <- check_goAnno(annoDb)
 50 |     }
 51 | 
 52 |     goAnno <- goAnno[goAnno$ONTOLOGY == ont, ]
 53 |     if (computeIC) {
 54 |         message("preparing IC data...")
 55 |         IC <- computeIC(goAnno, ont)
 56 |         if (processTCSS) {
 57 |             message("preparing TCSS data...")
 58 |             tcssdata <- process_tcss(ont, IC = IC, cutoff = cutoff)
 59 |         }
 60 |     }
 61 | 
 62 |     res <- new("GOSemSimDATA",
 63 |       keys = unique(goAnno[,1]),
 64 |       ont = ont,
 65 |       geneAnno = goAnno,
 66 |       metadata = md
 67 |     )
 68 |     if (computeIC) {
 69 |         res@IC <- IC
 70 |         if (processTCSS) {
 71 |             res@tcssdata <- tcssdata
 72 |         }
 73 |     }
 74 | 
 75 |     return(res)
 76 | }
 77 | 
 78 | check_goAnno <- function(goAnno) {
 79 |   # check whether the data frame contains neccessary columns.
 80 | 
 81 |   ## suppose 1st column is GENE ID and should contains GO and ONTOLOGY columns
 82 |   ## maybe we should force names(goAnno)[1] == "GENE"
 83 |   if (!all(c("GO", "ONTOLOGY") %in% names(goAnno))) {
 84 |     stop("annoDb as a data.frame should contains 'GO' and 'ONTOLOGY' columns.")
 85 |   }
 86 | 
 87 |   return(goAnno)
 88 | }
 89 | 
 90 | parse_orgDb <- function(OrgDb, keytype) {
 91 |     kk <- keys(OrgDb, keytype = keytype)
 92 |     message("preparing gene to GO mapping data...")
 93 |     goAnno <- suppressMessages(
 94 |         select(OrgDb,
 95 |         keys = kk, keytype = keytype,
 96 |         columns = c("GO", "ONTOLOGY")
 97 |         )
 98 |     )
 99 | 
100 |     goAnno <- goAnno[!is.na(goAnno$GO), ]
101 |     return(goAnno)
102 | }
103 | 
104 | ##' Class "GOSemSimDATA"
105 | ##' This class stores IC and gene to go mapping for semantic similarity measurement
106 | ##'
107 | ##'
108 | ##' @name GOSemSimDATA-class
109 | ##' @aliases GOSemSimDATA-class
110 | ##'   show,GOSemSimDATA-method
111 | ##'
112 | ##' @docType class
113 | ##' @slot keys gene ID
114 | ##' @slot ont ontology
115 | ##' @slot IC IC data
116 | ##' @slot geneAnno gene to GO mapping
117 | ##' @slot tcssdata tcssdata
118 | ##' @slot metadata metadata
119 | ##' @exportClass GOSemSimDATA
120 | ##' @keywords classes
121 | ##' @importFrom methods setClass
122 | setClass("GOSemSimDATA",
123 |   representation = representation(
124 |     keys = "character",
125 |     ont = "character",
126 |     IC = "numeric",
127 |     geneAnno = "data.frame",
128 |     tcssdata = "list",
129 |     metadata = "data.frame"
130 |   )
131 | )
132 | 
133 | ##' @importFrom methods setMethod
134 | setMethod(
135 |   "show", signature(object = "GOSemSimDATA"),
136 |   function(object) {
137 |     cat("#\n# DATA for Semantic Similarity calculation ...\n#\n")
138 |   }
139 | )
140 | 


--------------------------------------------------------------------------------
/R/mclusterSim.R:
--------------------------------------------------------------------------------
 1 | ##'Pairwise Semantic Similarities for a List of Gene Clusters
 2 | ##'
 3 | ##'Given a list of gene clusters, this function calculates pairwise semantic
 4 | ##'similarities.
 5 | ##'
 6 | ##'
 7 | ##'@param clusters A list of gene clusters.
 8 | ##'@param semData GOSemSimDATA object
 9 | ##'@param measure One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.
10 | ##'@param drop A set of evidence codes based on which certain annotations are
11 | ##'dropped. Use NULL to keep all GO annotations.
12 | ##'@param combine One of "max", "avg", "rcmax", "BMA" methods, for combining
13 | ##'semantic similarity scores of multiple GO terms associated with protein or
14 | ##'multiple proteins assiciated with protein cluster.
15 | ##'@return similarity matrix
16 | ##'@seealso \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
17 | ##'\code{\link{mgeneSim}} \code{\link{clusterSim}}
18 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
19 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
20 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
21 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
22 | ##'PMID: 20179076
23 | ##'
24 | ##'@keywords manip
25 | ##' @export
26 | ##'@examples
27 | ##'
28 | ##'  d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
29 | ##'  cluster1 <- c("835", "5261","241")
30 | ##'  cluster2 <- c("578","582")
31 | ##'  cluster3 <- c("307", "308", "317")
32 | ##'  clusters <- list(a=cluster1, b=cluster2, c=cluster3)
33 | ##'  mclusterSim(clusters, semData=d, measure="Wang")
34 | ##'
35 | mclusterSim <- function(clusters, semData, measure="Wang", drop="IEA", combine="BMA") {
36 |     n <- length(clusters)
37 |     cluster_gos <- list()
38 |     for (i in 1:n) {
39 |         cluster_gos[[i]] <- sapply(clusters[[i]], gene2GO, semData, dropCodes=drop)
40 |     }
41 | 
42 |     uniqueGO <-  unique(unlist(cluster_gos))
43 |     go_matrix <- mgoSim(uniqueGO, uniqueGO, semData, measure = measure, combine = NULL)
44 | 
45 |     scores <- matrix(NA, nrow=n, ncol=n)
46 |     rownames(scores) <- names(clusters)
47 |     colnames(scores) <- names(clusters)
48 | 
49 |     for (i in seq_along(clusters)) {
50 |         gos1 <- unlist(cluster_gos[[i]])
51 |         gos1 <- gos1[!is.na(gos1)]
52 |         for (j in seq_len(i)) {
53 |             gos2 <- unlist(cluster_gos[[j]])
54 |             gos2 <- gos2[!is.na(gos2)]
55 |             if (length(gos1) != 0 && length(gos2) !=0)
56 |                 scores[i,j] <- combineScores(go_matrix[gos1, gos2, drop=FALSE], combine=combine)
57 |                 scores[j,i] <- scores[i,j]
58 |         }
59 |     }
60 |     removeRowNA <- apply(!is.na(scores), 1, sum)>0
61 |     removeColNA <- apply(!is.na(scores), 2, sum)>0
62 |     return(scores[removeRowNA, removeColNA, drop=FALSE])
63 | }
64 | 


--------------------------------------------------------------------------------
/R/mgeneSim.R:
--------------------------------------------------------------------------------
 1 | ##'Pairwise Semantic Similarity for a List of Genes
 2 | ##'
 3 | ##'Given a list of genes, this function calculates pairwise semantic
 4 | ##'similarities.
 5 | ##'
 6 | ##'
 7 | ##'@param genes A list of entrez gene IDs.
 8 | ##'@param semData GOSemSimDATA object
 9 | ##'@param measure One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.
10 | ##'@param drop A set of evidence codes based on which certain annotations are
11 | ##'dropped. Use NULL to keep all GO annotations.
12 | ##'@param combine One of "max", "avg", "rcmax", "BMA" methods, for combining
13 | ##'semantic similarity scores of multiple GO terms associated with protein or
14 | ##'multiple proteins assiciated with protein cluster.
15 | ##' @param verbose show progress bar or not.
16 | ##'@return similarity matrix
17 | ##'@seealso \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
18 | ##'\code{\link{clusterSim}} \code{\link{mclusterSim}}
19 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
20 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
21 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
22 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
23 | ##'PMID: 20179076
24 | ##'
25 | ##'@keywords manip
26 | ##' @importFrom utils setTxtProgressBar
27 | ##' @importFrom utils txtProgressBar
28 | ##'@export
29 | ##'@examples
30 | ##'
31 | ##' d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
32 | ##'	mgeneSim(c("835", "5261","241"), semData=d, measure="Wang")
33 | ##'
34 | mgeneSim <- function (genes, semData, measure="Wang", drop="IEA", combine="BMA", verbose=TRUE) {
35 |     genes <- unique(as.character(genes))
36 |     n <- length(genes)
37 |     scores <- matrix(NA, nrow=n, ncol=n)
38 |     rownames(scores) <- genes
39 |     colnames(scores) <- genes
40 | 
41 |     gos <- lapply(genes, gene2GO, godata=semData, dropCodes=drop)
42 |     uniqueGO <-  unique(unlist(gos))
43 |     go_matrix <- mgoSim(uniqueGO, uniqueGO, semData, measure = measure, combine = NULL)
44 |     if (verbose) {
45 |       cnt <- 1
46 |       pb <- txtProgressBar(min=0, max=sum(1:n), style=3)
47 |     }
48 |     for (i in seq_along(genes)) {
49 |         for (j in seq_len(i)){
50 |             if (verbose) {
51 |                 setTxtProgressBar(pb, cnt)
52 |                 cnt <- cnt + 1
53 |             }
54 |             scores[i,j] <- combineScores(go_matrix[gos[[i]], gos[[j]]], combine = combine)
55 |             scores[j,i] <- scores[i,j]
56 |         }
57 |     }
58 |     if (verbose)
59 |         close(pb)
60 |     removeRowNA <- apply(!is.na(scores), 1, sum)>0
61 |     removeColNA <- apply(!is.na(scores), 2, sum)>0
62 |     return(scores[removeRowNA, removeColNA, drop=FALSE])
63 | }
64 | 
65 | 


--------------------------------------------------------------------------------
/R/mgoSim.R:
--------------------------------------------------------------------------------
 1 | ##'Semantic Similarity Between two GO terms lists
 2 | ##'
 3 | ##'Given two GO term sets, this function will calculate the semantic similarity
 4 | ##'between them, and return their semantic similarity
 5 | ##'
 6 | ##'
 7 | ##'@param GO1 A set of go terms.
 8 | ##'@param GO2 Another set of go terms.
 9 | ##'@param semData GOSemSimDATA object
10 | ##'@param measure One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.
11 | ##'@param combine One of "max", "avg", "rcmax", "BMA" methods, for combining
12 | ##'semantic similarity scores of multiple GO terms associated with protein or
13 | ##'multiple proteins assiciated with protein cluster.
14 | ##'@return similarity
15 | ##'@seealso \code{\link{goSim}} \code{\link{geneSim}} \code{\link{mgeneSim}}
16 | ##'\code{\link{clusterSim}} \code{\link{mclusterSim}}
17 | ##'@references Yu et al. (2010) GOSemSim: an R package for measuring semantic
18 | ##'similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
19 | ##'England), 26:7 976--978, April 2010. ISSN 1367-4803
20 | ##'\url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
21 | ##'PMID: 20179076
22 | ##'
23 | ##'@keywords manip
24 | ##' @export
25 | ##'@examples
26 | ##'
27 | ##'     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
28 | ##'	go1 <- c("GO:0004022", "GO:0004024", "GO:0004023")
29 | ##'	go2 <- c("GO:0009055", "GO:0020037")
30 | ##'	mgoSim("GO:0003824", go2, semData=d, measure="Wang")
31 | ##'	mgoSim(go1, go2, semData=d, measure="Wang")
32 | ##'
33 | mgoSim <- function(GO1, GO2, semData, measure="Wang", combine="BMA"){
34 |     scores <- termSim(GO1, GO2, semData, method=measure)
35 |     res <- combineScores(scores, combine)
36 |     return(round(res, digits=3))
37 | }
38 | 


--------------------------------------------------------------------------------
/R/parseGAF.R:
--------------------------------------------------------------------------------
 1 | ##' parse GAF files
 2 | ##'
 3 | ##' given a GAF file, this function extracts the information from it
 4 | ##' @title read.gaf
 5 | ##' @rdname read-gaf
 6 | ##' @param file GAF file
 7 | ##' @param asis logical, whether output the original contains of the file and only works if 'add_indirect_GO = FALSE'
 8 | ##' @param add_indirect_GO whether to add indirect GO annotation 
 9 | ##' @return A data.frame. Original table if 'asis' works, otherwise contains 3 conlumns of 'GENE', 'GO' and 'ONTOLOGY'
10 | ##' @export
11 | read.gaf <- function(file, asis = FALSE, add_indirect_GO = FALSE) {
12 |   GafFile <- read.gaf2(file)
13 |   if (!add_indirect_GO && asis) return(GafFile)
14 | 
15 |   new.data.frame <- GafFile[, c("DB_Object_ID", "GOID", "Aspect")]
16 |   names(new.data.frame) <- c("GENE", "GO", "ONTOLOGY")
17 |   ont <- setNames(c("MF", "CC", "BP"), c("F", "C", "P"))
18 |   new.data.frame$ONTOLOGY <- ont[new.data.frame$ONTOLOGY]
19 | 
20 |   if (!add_indirect_GO) return(new.data.frame)
21 | 
22 |   ## use buildGOmap function to append indirect annotation
23 |   buildGOmap(new.data.frame)
24 | }
25 | 
26 | ##' @importFrom GO.db GO.db
27 | ##' @importFrom AnnotationDbi columns
28 | goid2term <- function(simplify = TRUE) {
29 |   go.ALL <- AnnotationDbi::select(GO.db, keys(GO.db), columns(GO.db))
30 |   if (simplify) {
31 |     go.ALL <- go.ALL[, c("GOID", "TERM")]
32 |   }
33 | 
34 |   return(go.ALL)
35 | }
36 | 
37 | ##' @rdname read-gaf
38 | ##' @export
39 | parse_gff <- read.gaf
40 | 
41 | # only read the file with selected columns
42 | ##' @importFrom utils read.delim
43 | read.gaf2 <- function(GafFile, nrows = -1) {
44 |   cat("Reading ", GafFile, ": ", sep = "")
45 |   GafFile <-
46 |     read.delim(
47 |       GafFile,
48 |       sep = "\t",
49 |       as.is = TRUE,
50 |       quote = "\"",
51 |       fill = TRUE,
52 |       header = FALSE,
53 |       nrows = nrows,
54 |       comment.char = "!"
55 |     )
56 |   GafFile <- GafFile[, c(2, 3, 5, 7, 9, 10)]
57 |   names(GafFile) <- c(
58 |     "DB_Object_ID",
59 |     "DB_Object_Symbol",
60 |     "GOID",
61 |     "Evidence_Code",
62 |     "Aspect",
63 |     "DB_Object_Name"
64 |   )
65 |   cat("found", nrow(GafFile), "rows in this GAF file")
66 |   return(GafFile)
67 | }
68 | 


--------------------------------------------------------------------------------
/R/processTCSS.R:
--------------------------------------------------------------------------------
  1 | #' prepare tcss data for TCSS to calculate semantic similarity
  2 | #'
  3 | #' @param ont ontology
  4 | #' @param IC information content
  5 | #' @param cutoff the topology cutoff, users can use tcss_cutoff() function to calculate cutoff value  
  6 | #'
  7 | #' @return list, belonged clusters and its elements for all nodes
  8 | #' @noRd
  9 | process_tcss <- function(ont, IC, cutoff = NULL) {
 10 |     ## if (length(IC) == 0) {
 11 |     ##     stop("IC data not found, please re-generate your `semData` with `computeIC = TRUE`...")
 12 |     ## }
 13 | 
 14 |     if (is.null(cutoff)) {
 15 |         message("As cutoff value is not provided, default value based on human will be used")
 16 |         cutoff <- switch(ont,
 17 |                          MF = 3.5,
 18 |                          BP = 3.5,
 19 |                          CC = 3.2,
 20 |                          DO = 3.5,
 21 |                          MPO = 3.5,
 22 |                          HPO = 3.5
 23 |                          )
 24 |     } else if (cutoff <= 0) {
 25 |         stop("cutoff value must be positive")
 26 |     }
 27 | 
 28 |     GO <- names(IC[!is.infinite(IC)])
 29 | 
 30 |     offspring <- getOffsprings(ont) 
 31 |     # calculate ICT
 32 |     ICT <- computeICT(GO, offspring = offspring)
 33 |     # nodes smaller than cutoff are meta-terms
 34 |     meta_terms <- create_meta_terms(ICT = ICT, cutoff = cutoff)
 35 |     # if two parent-child nodes' ICT value too close
 36 |     meta_terms <- remove_close(meta_terms, ont = ont, ICT = ICT)
 37 |     # relationship between cluster-id and its elements
 38 |     meta_graph <- create_sub_terms(meta_terms, offspring = offspring)
 39 | 
 40 |     # get the max IC value for each graph
 41 |     meta_maxIC <- calc_maxIC(meta_graph, IC = IC)
 42 |     ica <- lapply(seq_along(meta_graph), function(i) {
 43 |         IC[meta_graph[[i]]] / meta_maxIC[i]
 44 |     })
 45 |     names(ica) <- meta_terms
 46 | 
 47 |     aa <- utils::stack(meta_graph)
 48 |     bb <- split(as.character(aa$ind), aa$values)
 49 |     clusid <- bb[GO]
 50 | 
 51 |     res <- list(
 52 |         ## meta_terms == names(meta_graph)
 53 |         meta_graph = meta_graph,
 54 |         ica = ica,
 55 |         clusid = clusid
 56 |     )
 57 | 
 58 |     return(res)
 59 | }
 60 | 
 61 | #' compute ICT (Topology Information Content) for each term
 62 | #'
 63 | #' @param GO character, all go terms, species specific
 64 | #' @param offspring list, offspring nodes
 65 | #'
 66 | #' @return numeric, ICT value
 67 | #' @noRd
 68 | #'
 69 | computeICT <- function(GO, offspring) {
 70 |     filtered_offspring <- offspring[GO]
 71 |     all <- length(GO)
 72 |     num <- -log10(1 / all)
 73 | 
 74 |     res <- vapply(filtered_offspring, function(off) {
 75 |         if (any(is.na(off))) {
 76 |             ## only term itself
 77 |             num
 78 |         } else {
 79 |             ## add term itself
 80 |             -log10((sum(off %in% GO) + 1) / all)
 81 |         }
 82 |     }, numeric(1))
 83 |     names(res) <- GO
 84 |     return(res)
 85 | }
 86 | 
 87 | 
 88 | #' all nodes with ICT value under cutoff are meta_terms
 89 | #'
 90 | #' @param ICT numeric, ICT value with corresponding GO term as name attributes
 91 | #' @param cutoff numeric, topological cutoff
 92 | #'
 93 | #' @return character, sub-graph-root nodes
 94 | #' @noRd
 95 | create_meta_terms <- function(ICT, cutoff) {
 96 |     res <- ICT[ICT <= cutoff]
 97 |     names(res)
 98 | }
 99 | 
100 | #' calculate every graph's max IC value
101 | #'
102 | #' @param meta_terms character, all cluster ids but "meta"
103 | #' @param GO_element the contained elements
104 | #' @param IC numeric, ICT value
105 | #'
106 | #' @return numeric, max IC in different graphs
107 | #' @noRd
108 | #'
109 | calc_maxIC <- function(meta_graph, IC) {
110 |     # mic : max IC value of all terms
111 |     mic <- max(IC[is.finite(IC)])
112 | 
113 |     meta_maxIC <- vapply(meta_graph,
114 |                          function(t) {
115 |                                         #all the IC value of elements
116 |                              all <- IC[t]
117 |                                         #all <- all[!is.infinite(all)]
118 |                                         # all <- all[!is.infinite(all) & !is.na(all)]
119 |                              all <- all[is.finite(all)]
120 |                                         # if value is empty, assign the mic value
121 |                              if (length(all) == 0) mic else max(all)
122 |                          }, numeric(1))
123 |     return(meta_maxIC)
124 | }
125 | 
126 | #' get the relationship between clusters and elements
127 | #'
128 | #' @param meta_terms character, all cluster ids but "meta"
129 | #' @param offspring list
130 | #'
131 | #' @return data.frame, relationship between clusters and elements
132 | #' @noRd
133 | #'
134 | create_sub_terms <- function(meta_terms, offspring) {
135 |     #all terms within cluster
136 |     element <- lapply(meta_terms, function(term) {
137 |         #when no offspring, all shows NA
138 |         all <- offspring[[term]]
139 |         # other sub-root-node
140 |         other_sub <- intersect(all, meta_terms)
141 |         # other sub-root-node's offspring
142 |         other_sub_offs <- unlist(offspring[other_sub])
143 |         # remove
144 |         terms <- setdiff(all, c(other_sub_offs, other_sub))
145 |         # add term itself
146 |         terms <- c(terms, term)
147 |     })
148 |     ## #add "meta" cluster
149 |     ## element$"meta" <- meta_terms
150 |     ## #to unfold into a data.frame
151 |     ## len <- lapply(element, length)
152 | 
153 |     ## res <- data.frame(cluster = rep(c(meta_terms, "meta"), times = len),
154 |     ##                   element = unlist(element))
155 |     ## return(na.omit(res))
156 | 
157 |     names(element) <- meta_terms
158 |     return(element)
159 | }
160 | 
161 | #' remove close relation in meta_terms
162 | #'
163 | #' @param meta_terms character, all cluster ids but "meta"
164 | #' @param ont ontology
165 | #' @param ICT numeric, topological information content value
166 | #'
167 | #' @return character, meta_terms with fewer nodes
168 | #' @noRd
169 | #'
170 | remove_close <- function(meta_terms, ont, ICT) {
171 |     parents <- getParents(ont) 
172 | 
173 |     # reserve all nodes in advance
174 |     all_ <- meta_terms
175 |     for (term1 in meta_terms) {
176 |         # parent term
177 |         obj <- intersect(parents[[term1]], all_)
178 |         for (term2 in obj) {
179 |             if (ICT[term2] != 0 && ICT[term1] / ICT[term2] <= 1.2) {
180 |                 # remove when satisfying the condition
181 |                 meta_terms <- setdiff(meta_terms, term1)
182 |                 break
183 |             }
184 |         }
185 |     }
186 |     # return the left nodes
187 |     return(meta_terms)
188 | }
189 | 
190 | 


--------------------------------------------------------------------------------
/R/readBlast2go.R:
--------------------------------------------------------------------------------
 1 | ##'
 2 | ##' given a BLAST2GO file, this function extracts the information from it and make it use for TERM2GENE.
 3 | ##' @title read.blast2go
 4 | ##' @param file BLAST2GO file
 5 | ##' @param add_indirect_GO whether add indirect GO annotation 
 6 | ##' @importFrom rlang check_installed
 7 | ##' @return a data frame with three columns: GENE, GO and ONTOLOGY
 8 | ##' @export
 9 | read.blast2go <- function(file, add_indirect_GO = FALSE) {
10 |     check_installed("readr", 'for `read.blast2go()`.')
11 |     check_installed('tidyr', 'for `read.blast2go()`.')
12 |     check_installed('tidyselect', 'for `read.blaset2go()`.')
13 | 
14 |     # blast2go <- utils::read.table(file, header = TRUE, sep = "\t", stringsAsFactors = FALSE, fill = TRUE) # has bugs
15 |     blast2go <- yulab.utils::yread(file, readr::read_delim)
16 |     x <- blast2go[, c("Sequence Name", "GO Accession", "GO Domains")]
17 |     names(x) <- c("GENE", "GO", "ONTOLOGY")
18 |     x <- x[!is.na(x[["GO"]]), ]
19 | 
20 |     y <- tidyr::separate_rows(x, tidyselect::all_of(c("GO", "ONTOLOGY")), sep = ", ")
21 |     y$GO <- sub("^\\s+", "", y$GO)
22 |     y$ONTOLOGY <- sub("^\\s+", "", y$ONTOLOGY)
23 |     
24 |     ont <- setNames(c("MF", "CC", "BP"), c("F", "C", "P"))
25 |     y$ONTOLOGY <- ont[y$ONTOLOGY]
26 |     y <- as.data.frame(y)
27 | 
28 |     if (!add_indirect_GO) {
29 |         return(y)
30 |     }
31 | 
32 |     buildGOmap(y)
33 | }
34 | 
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/R/termSim.R:
--------------------------------------------------------------------------------
 1 | ##'termSim
 2 | ##'
 3 | ##'measuring similarities between two term vectors.
 4 | ##'
 5 | ##'provide two term vectors, this function will calculate their similarities.
 6 | ##'
 7 | ##'@param t1 term vector
 8 | ##'@param t2 term vector
 9 | ##'@param method one of "Wang", "Resnik", "Rel", "Jiang", and "Lin", "TCSS".
10 | ##'@param semData GOSemSimDATA object
11 | ##'@return score matrix
12 | ##'@export
13 | ##'@author Guangchuang Yu \url{http://guangchuangyu.github.io}
14 | termSim <- function(t1,
15 |                     t2,
16 |                     semData,
17 |                     method=c("Wang","Resnik","Rel","Jiang","Lin", "TCSS")
18 |                     ) {
19 | 
20 |     method <- match.arg(method)
21 | 
22 |     if (all(is.na(t1)) || all(is.na(t2)))
23 |         return(NA)
24 | 
25 |     t1 <- t1[!is.na(t1)]
26 |     t2 <- t2[!is.na(t2)]
27 | 
28 |     ## genes in cluster may share go terms,
29 |     ## and should affect similarity of clusters in clusterSim and mclusterSim.
30 |     ##
31 |     ## t1 <- unique(t1)
32 |     ## t2 <- unique(t2)
33 | 
34 |     if ( method %in% c("Resnik", "Jiang", "Lin", "Rel") ) {
35 |         return(infoContentMethod(t1, t2, method = method, semData))
36 |     } else if (method == "Wang") {
37 |         return(wangMethod(t1, t2, semData@ont))
38 |     } else if ( method == "TCSS" ) {
39 |         return(tcssMethod(t1, t2, semData))
40 |     }
41 | }
42 | 


--------------------------------------------------------------------------------
/R/utilities.R:
--------------------------------------------------------------------------------
  1 | .initial <- function() {
  2 |     pos <- 1
  3 |     envir <- as.environment(pos) 
  4 |     assign(".GOSemSimEnv", new.env(), envir = envir)
  5 |     assign(".SemSimCache", new.env(), envir = envir)
  6 |     assign(".ancCache", new.env(), envir = envir)
  7 |     .GOSemSimEnv <- get(".GOSemSimEnv", envir=.GlobalEnv)
  8 |     
  9 |     tryCatch(utils::data(list="gotbl",
 10 |                          package="GOSemSim"))
 11 |     gotbl <- get("gotbl")
 12 |     assign("gotbl", gotbl, envir = .GOSemSimEnv)
 13 |     rm(gotbl, envir = .GlobalEnv)
 14 | }
 15 | 
 16 | get_gosemsim_env <- function() {
 17 |     if (!exists(".GOSemSimEnv")) {
 18 |         .initial()
 19 |     }
 20 |     get(".GOSemSimEnv")    
 21 | }
 22 | 
 23 | ##' load OrgDb
 24 | ##'
 25 | ##' 
 26 | ##' @title load_OrgDb
 27 | ##' @param OrgDb OrgDb object or OrgDb name
 28 | ##' @return OrgDb object
 29 | ##' @importFrom methods is
 30 | ##' @importFrom utils getFromNamespace 
 31 | ##' @export
 32 | ##' @author Guangchuang Yu \url{https://yulab-smu.top}
 33 | load_OrgDb <- function(OrgDb) {
 34 |     #if (is(OrgDb, "character")) {
 35 |     #    require(OrgDb, character.only = TRUE)
 36 |     #    OrgDb <- eval(parse(text=OrgDb))
 37 |     #}
 38 |     if (is(OrgDb, "character")) {
 39 |         OrgDb <- utils::getFromNamespace(OrgDb, OrgDb)
 40 |     } 
 41 |     
 42 |     return(OrgDb)
 43 | }
 44 | 
 45 | ##' @importFrom GO.db GOMFANCESTOR
 46 | ##' @importFrom GO.db GOBPANCESTOR
 47 | ##' @importFrom GO.db GOCCANCESTOR
 48 | getAncestors <- function(ont) {
 49 |     if (ont %in% c("MF", "BP", "CC")) {
 50 |         Ancestors <- switch(ont,
 51 |                             MF = GOMFANCESTOR,
 52 |                             BP = GOBPANCESTOR,
 53 |                             CC = GOCCANCESTOR
 54 |                             )
 55 |         anc <- AnnotationDbi::as.list(Ancestors)
 56 |         return(anc)
 57 |     }
 58 | 
 59 |     get_onto_data(ont, output = 'list', 'ancestor') 
 60 | }
 61 | 
 62 | ##' @importFrom GO.db GOMFPARENTS
 63 | ##' @importFrom GO.db GOBPPARENTS
 64 | ##' @importFrom GO.db GOCCPARENTS
 65 | getParents <- function(ont) {
 66 |     if (ont %in% c("MF", "BP", "CC")) {
 67 |         Parents <- switch(ont,
 68 |                         MF = GOMFPARENTS,
 69 |                         BP = GOBPPARENTS,
 70 |                         CC = GOCCPARENTS
 71 |                         )
 72 |         parent <- AnnotationDbi::as.list(Parents)
 73 |         return(parent)
 74 |     }
 75 | 
 76 |     get_onto_data(ont, output = 'list', 'parent') 
 77 | }
 78 | 
 79 | ##' @importFrom GO.db GOMFOFFSPRING
 80 | ##' @importFrom GO.db GOBPOFFSPRING
 81 | ##' @importFrom GO.db GOCCOFFSPRING
 82 | getOffsprings <- function(ont) {
 83 |     if (ont %in% c("MF", "BP", "CC")) {
 84 |         Offsprings <- switch(ont,
 85 |                         MF = GOMFOFFSPRING,
 86 |                         BP = GOBPOFFSPRING,
 87 |                         CC = GOCCOFFSPRING
 88 |                         )
 89 |         offspring <- AnnotationDbi::as.list(Offsprings)
 90 |         return(offspring)
 91 |     }
 92 | 
 93 |     get_onto_data(ont, output = 'list', 'offspring') 
 94 | }
 95 | 
 96 | ##' @importFrom GO.db GOTERM
 97 | ##' @importFrom AnnotationDbi toTable
 98 | prepare_relation_df <- function() {
 99 |     gtb <- toTable(GOTERM)
100 |     gtb <- gtb[,c(2:4)]
101 |     gtb <- unique(gtb)
102 |     
103 |     ptb <- lapply(c("BP", "MF", "CC"), function(ont) {
104 |         id <- with(gtb, go_id[Ontology == ont])
105 |         parentMap <- getParents(ont)
106 |         # pid <- AnnotationDbi::mget(id, parentMap)
107 |         pid <- parentMap[id]
108 | 
109 |         n <- sapply(pid, length)
110 |         cid <- rep(names(pid), times=n)
111 |         relationship <- unlist(lapply(pid, names))
112 |         
113 |         data.frame(id=cid,
114 |                    relationship=relationship,
115 |                    parent=unlist(pid),
116 |                    stringsAsFactors = FALSE)
117 |     }) 
118 |     ptb <- do.call('rbind', ptb)
119 | 
120 |     gotbl <- merge(gtb, ptb, by.x="go_id", by.y="id")
121 |     save(gotbl, file="gotbl.rda", compress="xz")
122 |     invisible(gotbl)
123 | }
124 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | ##' @importFrom yulab.utils yulab_msg
2 | .onAttach <- function(libname, pkgname) {
3 |   packageStartupMessage(yulab_msg(pkgname))
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output:
 3 |   md_document:
 4 |     variant: gfm
 5 | html_preview: false
 6 | ---
 7 | 
 8 | 
 9 | ```{r echo=FALSE, results="hide", message=FALSE}
10 | library("badger")
11 | library("yulab.utils")
12 | ```
13 | 
14 | #  GOSemSim: GO semantic similarity measurement
15 | 
16 | 
17 | [![Bioc](http://www.bioconductor.org/shields/years-in-bioc/GOSemSim.svg)](https://www.bioconductor.org/packages/devel/bioc/html/GOSemSim.html#since)
18 | [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
19 | [![platform](http://www.bioconductor.org/shields/availability/devel/GOSemSim.svg)](https://www.bioconductor.org/packages/devel/bioc/html/GOSemSim.html#archives)
20 | [![codecov](https://codecov.io/gh/GuangchuangYu/GOSemSim/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/GOSemSim/)
21 | 
22 | 
23 | `r badge_bioc_release("GOSemSim", "green")`
24 | `r badge_devel("guangchuangyu/GOSemSim", "green")`
25 | `r badge_bioc_download("GOSemSim", "total", "blue")`
26 | `r badge_bioc_download("GOSemSim", "month", "blue")`
27 | 
28 | ```{r comment="", echo=FALSE, results='asis'}
29 | cat(packageDescription('GOSemSim')$Description)
30 | ```
31 | 
32 | 
33 | 
34 | ## :writing_hand: Authors
35 | 
36 | Guangchuang YU <https://yulab-smu.top>
37 | 
38 | School of Basic Medical Sciences, Southern Medical University
39 | 
40 | 
41 | Learn more at <https://yulab-smu.top/contribution-knowledge-mining/>.
42 | 
43 | 
44 | If you use `r Biocpkg('GOSemSim')` in published research, please cite:
45 | 
46 | + **Yu G**. [Gene Ontology Semantic Similarity Analysis Using GOSemSim](http://dx.doi.org/10.1007/978-1-0716-0301-7_11). In: Kidder B. (eds) Stem Cell Transcriptional Networks. **_Methods in Molecular Biology_**, 2020, 2117:207-215. Humana, New York, NY. 
47 | + **Yu G**^#^, Li F^#^, Qin Y, Bo X^\*^, Wu Y and Wang S^\*^. [GOSemSim: an R package for measuring semantic similarity among GO terms and gene products](http://dx.doi.org/10.1093/bioinformatics/btq064). **_Bioinformatics_**. 2010, 26(7):976-978. 
48 | 
49 | 
50 | ## :arrow_double_down: Installation
51 | 
52 | Get the released version from Bioconductor:
53 | 
54 | ```r
55 | ## try http:// if https:// URLs are not supported
56 | if (!requireNamespace("BiocManager", quietly=TRUE))
57 |     install.packages("BiocManager")
58 | ## BiocManager::install("BiocUpgrade") ## you may need this
59 | BiocManager::install("GOSemSim")
60 | ```
61 | 
62 | Or the development version from github:
63 | 
64 | ```r
65 | ## install.packages("remotes")
66 | remotes::install_github("GuangchuangYu/GOSemSim")
67 | ```
68 | 
69 | 
70 | 
71 | ## :sparkling_heart: Contributing
72 | 
73 | We welcome any contributions! By participating in this project you agree to
74 | abide by the terms outlined in the [Contributor Code of Conduct](CONDUCT.md).
75 | 
76 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GOSemSim: GO semantic similarity measurement
 2 | 
 3 | [![Bioc](http://www.bioconductor.org/shields/years-in-bioc/GOSemSim.svg)](https://www.bioconductor.org/packages/devel/bioc/html/GOSemSim.html#since)
 4 | [![Project Status: Active - The project has reached a stable, usable
 5 | state and is being actively
 6 | developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
 7 | [![platform](http://www.bioconductor.org/shields/availability/devel/GOSemSim.svg)](https://www.bioconductor.org/packages/devel/bioc/html/GOSemSim.html#archives)
 8 | [![codecov](https://codecov.io/gh/GuangchuangYu/GOSemSim/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/GOSemSim/)
 9 | 
10 | [![](https://img.shields.io/badge/release%20version-2.30.0-green.svg)](https://www.bioconductor.org/packages/GOSemSim)
11 | [![](https://img.shields.io/badge/devel%20version-2.31.1-green.svg)](https://github.com/guangchuangyu/GOSemSim)
12 | [![](https://img.shields.io/badge/download-817806/total-blue.svg)](https://bioconductor.org/packages/stats/bioc/GOSemSim)
13 | [![](https://img.shields.io/badge/download-17918/month-blue.svg)](https://bioconductor.org/packages/stats/bioc/GOSemSim)
14 | 
15 | The semantic comparisons of Gene Ontology (GO) annotations provide
16 | quantitative ways to compute similarities between genes and gene groups,
17 | and have became important basis for many bioinformatics analysis
18 | approaches. GOSemSim is an R package for semantic similarity computation
19 | among GO terms, sets of GO terms, gene products and gene clusters.
20 | GOSemSim implemented five methods proposed by Resnik, Schlicker, Jiang,
21 | Lin and Wang respectively.
22 | 
23 | ## :writing_hand: Authors
24 | 
25 | Guangchuang YU <https://yulab-smu.top>
26 | 
27 | School of Basic Medical Sciences, Southern Medical University
28 | 
29 | Learn more at <https://yulab-smu.top/contribution-knowledge-mining/>.
30 | 
31 | If you use [GOSemSim](http://bioconductor.org/packages/GOSemSim) in
32 | published research, please cite:
33 | 
34 | - **Yu G**. [Gene Ontology Semantic Similarity Analysis Using
35 |   GOSemSim](http://dx.doi.org/10.1007/978-1-0716-0301-7_11). In:
36 |   Kidder B. (eds) Stem Cell Transcriptional Networks. ***Methods in
37 |   Molecular Biology***, 2020, 2117:207-215. Humana, New York, NY.
38 | - **Yu G**<sup>\#</sup>, Li F<sup>\#</sup>, Qin Y, Bo X<sup>\*</sup>, Wu
39 |   Y and Wang S<sup>\*</sup>. [GOSemSim: an R package for measuring
40 |   semantic similarity among GO terms and gene
41 |   products](http://dx.doi.org/10.1093/bioinformatics/btq064).
42 |   ***Bioinformatics***. 2010, 26(7):976-978.
43 | 
44 | ## :arrow_double_down: Installation
45 | 
46 | Get the released version from Bioconductor:
47 | 
48 | ``` r
49 | ## try http:// if https:// URLs are not supported
50 | if (!requireNamespace("BiocManager", quietly=TRUE))
51 |     install.packages("BiocManager")
52 | ## BiocManager::install("BiocUpgrade") ## you may need this
53 | BiocManager::install("GOSemSim")
54 | ```
55 | 
56 | Or the development version from github:
57 | 
58 | ``` r
59 | ## install.packages("remotes")
60 | remotes::install_github("GuangchuangYu/GOSemSim")
61 | ```
62 | 
63 | ## :sparkling_heart: Contributing
64 | 
65 | We welcome any contributions! By participating in this project you agree
66 | to abide by the terms outlined in the [Contributor Code of
67 | Conduct](CONDUCT.md).
68 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   ps: Bootstrap
12 | 
13 | # Adapt as necessary starting from here
14 | 
15 | build_script:
16 |   - travis-tool.sh install_bioc BiocStyle GO.db org.Hs.eg.db  
17 |   - travis-tool.sh install_deps
18 | 
19 | test_script:
20 |   - travis-tool.sh run_tests
21 | 
22 | on_failure:
23 |   - 7z a failure.zip *.Rcheck\*
24 |   - appveyor PushArtifact failure.zip
25 | 
26 | artifacts:
27 |   - path: '*.Rcheck\**\*.log'
28 |     name: Logs
29 | 
30 |   - path: '*.Rcheck\**\*.out'
31 |     name: Logs
32 | 
33 |   - path: '*.Rcheck\**\*.fail'
34 |     name: Logs
35 | 
36 |   - path: '*.Rcheck\**\*.Rout'
37 |     name: Logs
38 | 
39 |   - path: '\*_*.tar.gz'
40 |     name: Linux Package
41 | 
42 |   - path: '\*_*.zip'
43 |     name: Windows Package
44 | 
45 | notifications:
46 | - provider: Email
47 |   to:
48 |   - gcyu@connect.hku.hk
49 |   on_build_success: false
50 | 
51 | 


--------------------------------------------------------------------------------
/data/gotbl.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GOSemSim/507194f48737a687d34a566e136aa0cf340038e5/data/gotbl.rda


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite GOSemSim in publications use:")
 2 | 
 3 | citEntry(entry  ="ARTICLE",
 4 |          title = "Gene Ontology Semantic Similarity Analysis Using GOSemSim",
 5 |          author = person("Guangchuang", "Yu"),
 6 |          journal = "Methods in Molecular Biology",
 7 |          shortjournal = "Methods Mol. Biol.",
 8 |          year = "2020",
 9 |          volume = "2117",
10 |          pages = "207-215",
11 |          PMID = "31960380",
12 |          issn = "1940-6029",
13 |          doi = "10.1007/978-1-0716-0301-7_11",
14 |          textVersion = paste("Guangchuang Yu.",
15 |                              "Gene Ontology Semantic Similarity Analysis Using GOSemSim.",
16 |                              "In: Kidder B. (eds) Stem Cell Transcriptional Networks.",
17 |                              "Methods in Molecular Biology, 2020, 2117:207-215.",
18 |                              "Humana, New York, NY.")
19 | )
20 | 
21 | citEntry(entry  ="ARTICLE",
22 |          title  = "GOSemSim: an R package for measuring semantic similarity among GO terms and gene products",
23 |          author = personList(
24 |              as.person("Guangchuang Yu"),
25 |              as.person("Fei Li"),
26 |              as.person("Yide Qin"),
27 |              as.person("Xiaochen Bo"),
28 |              as.person("Yibo Wu"),
29 |              as.person("Shengqi Wang")
30 |              ),
31 |          journal = "Bioinformatics",
32 |          year    = "2010",
33 |          volume  = "26",
34 |          number  = "7",
35 |          pages   = "976-978",
36 |          PMID    = "20179076",
37 |          doi     = "10.1093/bioinformatics/btq064",
38 |          textVersion = paste("Guangchuang Yu, Fei Li, Yide Qin, Xiaochen Bo, Yibo Wu and Shengqi Wang.",
39 |              "GOSemSim: an R package for measuring semantic similarity among GO terms and gene products.",
40 |              "Bioinformatics 2010 26(7):976-978")
41 |          )
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/man/GOSemSim-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GOSemSim-package.R
 3 | \docType{package}
 4 | \name{GOSemSim-package}
 5 | \alias{GOSemSim}
 6 | \alias{GOSemSim-package}
 7 | \title{GOSemSim: GO-terms Semantic Similarity Measures}
 8 | \description{
 9 | The semantic comparisons of Gene Ontology (GO) annotations provide quantitative ways to compute similarities between genes and gene groups, and have became important basis for many bioinformatics analysis approaches. GOSemSim is an R package for semantic similarity computation among GO terms, sets of GO terms, gene products and gene clusters. GOSemSim implemented five methods proposed by Resnik, Schlicker, Jiang, Lin and Wang respectively.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://yulab-smu.top/biomedical-knowledge-mining-book/}
15 |   \item Report bugs at \url{https://github.com/YuLab-SMU/GOSemSim/issues}
16 | }
17 | 
18 | }
19 | \author{
20 | \strong{Maintainer}: Guangchuang Yu \email{guangchuangyu@gmail.com}
21 | 
22 | Other contributors:
23 | \itemize{
24 |   \item Alexey Stukalov \email{astukalov@gmail.com} [contributor]
25 |   \item Pingfan Guo \email{1178431277@qq.com} [contributor]
26 |   \item Chuanle Xiao \email{xiaochuanle@126.com} [contributor]
27 |   \item Lluís Revilla Sancho \email{lluis.revilla@gmail.com} [contributor]
28 | }
29 | 
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/man/GOSemSimDATA-class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/godata.R
 3 | \docType{class}
 4 | \name{GOSemSimDATA-class}
 5 | \alias{GOSemSimDATA-class}
 6 | \alias{show,GOSemSimDATA-method}
 7 | \title{Class "GOSemSimDATA"
 8 | This class stores IC and gene to go mapping for semantic similarity measurement}
 9 | \description{
10 | Class "GOSemSimDATA"
11 | This class stores IC and gene to go mapping for semantic similarity measurement
12 | }
13 | \section{Slots}{
14 | 
15 | \describe{
16 | \item{\code{keys}}{gene ID}
17 | 
18 | \item{\code{ont}}{ontology}
19 | 
20 | \item{\code{IC}}{IC data}
21 | 
22 | \item{\code{geneAnno}}{gene to GO mapping}
23 | 
24 | \item{\code{tcssdata}}{tcssdata}
25 | 
26 | \item{\code{metadata}}{metadata}
27 | }}
28 | 
29 | \keyword{classes}
30 | 


--------------------------------------------------------------------------------
/man/buildGOmap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/buildGOmap.R
 3 | \name{buildGOmap}
 4 | \alias{buildGOmap}
 5 | \title{buildGOmap}
 6 | \usage{
 7 | buildGOmap(TERM2GENE)
 8 | }
 9 | \arguments{
10 | \item{TERM2GENE}{data.frame with two or three columns of GO TERM, GENE and ONTOLOGY (optional)}
11 | }
12 | \value{
13 | data.frame, GO annotation with direct and indirect annotation
14 | }
15 | \description{
16 | Addding indirect GO annotation
17 | }
18 | \details{
19 | provided by a data.frame of GO TERM (column 1), GENE (column 2) and ONTOLOGY (optional) that
20 | describes GO direct annotation, 
21 | this function will add indirect GO annotation of genes.
22 | }
23 | \author{
24 | Yu Guangchuang
25 | }
26 | 


--------------------------------------------------------------------------------
/man/clusterSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clusterSim.R
 3 | \name{clusterSim}
 4 | \alias{clusterSim}
 5 | \title{Semantic Similarity Between Two Gene Clusters}
 6 | \usage{
 7 | clusterSim(
 8 |   cluster1,
 9 |   cluster2,
10 |   semData,
11 |   measure = "Wang",
12 |   drop = "IEA",
13 |   combine = "BMA"
14 | )
15 | }
16 | \arguments{
17 | \item{cluster1}{A set of gene IDs.}
18 | 
19 | \item{cluster2}{Another set of gene IDs.}
20 | 
21 | \item{semData}{GOSemSimDATA object}
22 | 
23 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.}
24 | 
25 | \item{drop}{A set of evidence codes based on which certain annotations are
26 | dropped. Use NULL to keep all GO annotations.}
27 | 
28 | \item{combine}{One of "max", "avg", "rcmax", "BMA" methods, for combining
29 | semantic similarity scores of multiple GO terms associated with protein or
30 | multiple proteins assiciated with protein cluster.}
31 | }
32 | \value{
33 | similarity
34 | }
35 | \description{
36 | Given two gene clusters, this function calculates semantic similarity between
37 | them.
38 | }
39 | \examples{
40 | 
41 |     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
42 |     cluster1 <- c("835", "5261","241", "994")
43 | cluster2 <- c("307", "308", "317", "321", "506", "540", "378", "388", "396")
44 | clusterSim(cluster1, cluster2, semData=d, measure="Wang")
45 | 
46 | }
47 | \references{
48 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
49 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
50 | England), 26:7 976--978, April 2010. ISSN 1367-4803
51 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
52 | PMID: 20179076
53 | }
54 | \seealso{
55 | \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
56 | \code{\link{mgeneSim}} \code{\link{mclusterSim}}
57 | }
58 | \keyword{manip}
59 | 


--------------------------------------------------------------------------------
/man/combineScores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/combineMethods.R
 3 | \name{combineScores}
 4 | \alias{combineScores}
 5 | \title{combining similarity matrix to similarity score}
 6 | \usage{
 7 | combineScores(SimScores, combine)
 8 | }
 9 | \arguments{
10 | \item{SimScores}{similarity matrix}
11 | 
12 | \item{combine}{combine method}
13 | }
14 | \value{
15 | similarity value
16 | }
17 | \description{
18 | Functions for combining similarity matrix to similarity score
19 | }
20 | \author{
21 | Guangchuang Yu \url{http://guangchuangyu.github.io}
22 | }
23 | 


--------------------------------------------------------------------------------
/man/geneSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/geneSim.R
 3 | \name{geneSim}
 4 | \alias{geneSim}
 5 | \title{Semantic Similarity Between two Genes}
 6 | \usage{
 7 | geneSim(gene1, gene2, semData, measure = "Wang", drop = "IEA", combine = "BMA")
 8 | }
 9 | \arguments{
10 | \item{gene1}{Entrez gene id.}
11 | 
12 | \item{gene2}{Another entrez gene id.}
13 | 
14 | \item{semData}{GOSemSimDATA object}
15 | 
16 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang" "TCSS" and "Wang" methods.}
17 | 
18 | \item{drop}{A set of evidence codes based on which certain annotations are
19 | dropped. Use NULL to keep all GO annotations.}
20 | 
21 | \item{combine}{One of "max", "avg", "rcmax", "BMA" methods, for combining
22 | semantic similarity scores of multiple GO terms associated with protein or
23 | multiple proteins assiciated with protein cluster.}
24 | }
25 | \value{
26 | list of similarity value and corresponding GO.
27 | }
28 | \description{
29 | Given two genes, this function will calculate the semantic similarity between
30 | them, and return their semantic similarity and the corresponding GO terms
31 | }
32 | \examples{
33 | 
34 | d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
35 | geneSim("241", "251", semData=d, measure="Wang")
36 | 
37 | }
38 | \references{
39 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
40 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
41 | England), 26:7 976--978, April 2010. ISSN 1367-4803
42 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
43 | PMID: 20179076
44 | }
45 | \seealso{
46 | \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{mgeneSim}}
47 | \code{\link{clusterSim}} \code{\link{mclusterSim}}
48 | }
49 | \keyword{manip}
50 | 


--------------------------------------------------------------------------------
/man/goSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/goSim.R
 3 | \name{goSim}
 4 | \alias{goSim}
 5 | \title{Semantic Similarity Between Two GO Terms}
 6 | \usage{
 7 | goSim(GOID1, GOID2, semData, measure = "Wang")
 8 | }
 9 | \arguments{
10 | \item{GOID1}{GO ID 1.}
11 | 
12 | \item{GOID2}{GO ID 2.}
13 | 
14 | \item{semData}{GOSemSimDATA object}
15 | 
16 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.}
17 | }
18 | \value{
19 | similarity
20 | }
21 | \description{
22 | Given two GO IDs, this function calculates their semantic similarity.
23 | }
24 | \examples{
25 | 
26 |     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
27 | goSim("GO:0004022", "GO:0005515", semData=d, measure="Wang")
28 | 
29 | }
30 | \references{
31 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
32 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
33 | England), 26:7 976--978, April 2010. ISSN 1367-4803
34 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
35 | PMID: 20179076
36 | }
37 | \seealso{
38 | \code{\link{mgoSim}}
39 | \code{\link{geneSim}}
40 | \code{\link{mgeneSim}}
41 | \code{\link{clusterSim}}
42 | \code{\link{mclusterSim}}
43 | }
44 | \keyword{manip}
45 | 


--------------------------------------------------------------------------------
/man/go_term_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GOSemSim-package.R
 3 | \docType{data}
 4 | \name{go_term_table}
 5 | \alias{go_term_table}
 6 | \alias{GO}
 7 | \alias{gotbl}
 8 | \title{Information content of GO terms}
 9 | \description{
10 | These datasets are the information contents of GOterms.
11 | }
12 | \references{
13 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
14 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
15 | England), 26:7 976--978, April 2010. ISSN 1367-4803
16 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
17 | PMID: 20179076
18 | }
19 | \keyword{datasets}
20 | 


--------------------------------------------------------------------------------
/man/godata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/godata.R
 3 | \name{godata}
 4 | \alias{godata}
 5 | \title{godata}
 6 | \usage{
 7 | godata(
 8 |   OrgDb = NULL,
 9 |   annoDb = NULL,
10 |   keytype = "ENTREZID",
11 |   ont,
12 |   computeIC = TRUE,
13 |   processTCSS = FALSE,
14 |   cutoff = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{OrgDb}{OrgDb object (will be removed in future, please use annoDb instead)}
19 | 
20 | \item{annoDb}{GO annotation database, 
21 | can be OrgDb or a data.frame contains three columns of 'GENE', 'GO' and 'ONTOLOGY'.}
22 | 
23 | \item{keytype}{keytype}
24 | 
25 | \item{ont}{one of 'BP', 'MF', 'CC'}
26 | 
27 | \item{computeIC}{logical, whether computer IC}
28 | 
29 | \item{processTCSS}{logical, whether to process TCSS}
30 | 
31 | \item{cutoff}{cutoff of TCSS}
32 | }
33 | \value{
34 | GOSemSimDATA object
35 | }
36 | \description{
37 | prepare GO DATA for measuring semantic similarity
38 | }
39 | \author{
40 | Guangchuang Yu
41 | }
42 | 


--------------------------------------------------------------------------------
/man/infoContentMethod.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ICMethods.R
 3 | \name{infoContentMethod}
 4 | \alias{infoContentMethod}
 5 | \title{information content based methods}
 6 | \usage{
 7 | infoContentMethod(ID1, ID2, method, godata)
 8 | }
 9 | \arguments{
10 | \item{ID1}{Ontology Term}
11 | 
12 | \item{ID2}{Ontology Term}
13 | 
14 | \item{method}{one of "Resnik", "Jiang", "Lin" and "Rel", "TCSS".}
15 | 
16 | \item{godata}{GOSemSimDATA object}
17 | }
18 | \value{
19 | semantic similarity score
20 | }
21 | \description{
22 | Information Content Based Methods for semantic similarity measuring
23 | }
24 | \details{
25 | implemented for methods proposed by Resnik, Jiang, Lin and Schlicker.
26 | }
27 | \author{
28 | Guangchuang Yu \url{https://guangchuangyu.github.io}
29 | }
30 | 


--------------------------------------------------------------------------------
/man/load_OrgDb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utilities.R
 3 | \name{load_OrgDb}
 4 | \alias{load_OrgDb}
 5 | \title{load_OrgDb}
 6 | \usage{
 7 | load_OrgDb(OrgDb)
 8 | }
 9 | \arguments{
10 | \item{OrgDb}{OrgDb object or OrgDb name}
11 | }
12 | \value{
13 | OrgDb object
14 | }
15 | \description{
16 | load OrgDb
17 | }
18 | \author{
19 | Guangchuang Yu \url{https://yulab-smu.top}
20 | }
21 | 


--------------------------------------------------------------------------------
/man/mclusterSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mclusterSim.R
 3 | \name{mclusterSim}
 4 | \alias{mclusterSim}
 5 | \title{Pairwise Semantic Similarities for a List of Gene Clusters}
 6 | \usage{
 7 | mclusterSim(clusters, semData, measure = "Wang", drop = "IEA", combine = "BMA")
 8 | }
 9 | \arguments{
10 | \item{clusters}{A list of gene clusters.}
11 | 
12 | \item{semData}{GOSemSimDATA object}
13 | 
14 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.}
15 | 
16 | \item{drop}{A set of evidence codes based on which certain annotations are
17 | dropped. Use NULL to keep all GO annotations.}
18 | 
19 | \item{combine}{One of "max", "avg", "rcmax", "BMA" methods, for combining
20 | semantic similarity scores of multiple GO terms associated with protein or
21 | multiple proteins assiciated with protein cluster.}
22 | }
23 | \value{
24 | similarity matrix
25 | }
26 | \description{
27 | Given a list of gene clusters, this function calculates pairwise semantic
28 | similarities.
29 | }
30 | \examples{
31 | 
32 |  d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
33 |  cluster1 <- c("835", "5261","241")
34 |  cluster2 <- c("578","582")
35 |  cluster3 <- c("307", "308", "317")
36 |  clusters <- list(a=cluster1, b=cluster2, c=cluster3)
37 |  mclusterSim(clusters, semData=d, measure="Wang")
38 | 
39 | }
40 | \references{
41 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
42 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
43 | England), 26:7 976--978, April 2010. ISSN 1367-4803
44 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
45 | PMID: 20179076
46 | }
47 | \seealso{
48 | \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
49 | \code{\link{mgeneSim}} \code{\link{clusterSim}}
50 | }
51 | \keyword{manip}
52 | 


--------------------------------------------------------------------------------
/man/mgeneSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mgeneSim.R
 3 | \name{mgeneSim}
 4 | \alias{mgeneSim}
 5 | \title{Pairwise Semantic Similarity for a List of Genes}
 6 | \usage{
 7 | mgeneSim(
 8 |   genes,
 9 |   semData,
10 |   measure = "Wang",
11 |   drop = "IEA",
12 |   combine = "BMA",
13 |   verbose = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{genes}{A list of entrez gene IDs.}
18 | 
19 | \item{semData}{GOSemSimDATA object}
20 | 
21 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.}
22 | 
23 | \item{drop}{A set of evidence codes based on which certain annotations are
24 | dropped. Use NULL to keep all GO annotations.}
25 | 
26 | \item{combine}{One of "max", "avg", "rcmax", "BMA" methods, for combining
27 | semantic similarity scores of multiple GO terms associated with protein or
28 | multiple proteins assiciated with protein cluster.}
29 | 
30 | \item{verbose}{show progress bar or not.}
31 | }
32 | \value{
33 | similarity matrix
34 | }
35 | \description{
36 | Given a list of genes, this function calculates pairwise semantic
37 | similarities.
38 | }
39 | \examples{
40 | 
41 | d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
42 | mgeneSim(c("835", "5261","241"), semData=d, measure="Wang")
43 | 
44 | }
45 | \references{
46 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
47 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
48 | England), 26:7 976--978, April 2010. ISSN 1367-4803
49 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
50 | PMID: 20179076
51 | }
52 | \seealso{
53 | \code{\link{goSim}} \code{\link{mgoSim}} \code{\link{geneSim}}
54 | \code{\link{clusterSim}} \code{\link{mclusterSim}}
55 | }
56 | \keyword{manip}
57 | 


--------------------------------------------------------------------------------
/man/mgoSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mgoSim.R
 3 | \name{mgoSim}
 4 | \alias{mgoSim}
 5 | \title{Semantic Similarity Between two GO terms lists}
 6 | \usage{
 7 | mgoSim(GO1, GO2, semData, measure = "Wang", combine = "BMA")
 8 | }
 9 | \arguments{
10 | \item{GO1}{A set of go terms.}
11 | 
12 | \item{GO2}{Another set of go terms.}
13 | 
14 | \item{semData}{GOSemSimDATA object}
15 | 
16 | \item{measure}{One of "Resnik", "Lin", "Rel", "Jiang", "TCSS" and "Wang" methods.}
17 | 
18 | \item{combine}{One of "max", "avg", "rcmax", "BMA" methods, for combining
19 | semantic similarity scores of multiple GO terms associated with protein or
20 | multiple proteins assiciated with protein cluster.}
21 | }
22 | \value{
23 | similarity
24 | }
25 | \description{
26 | Given two GO term sets, this function will calculate the semantic similarity
27 | between them, and return their semantic similarity
28 | }
29 | \examples{
30 | 
31 |     d <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
32 | go1 <- c("GO:0004022", "GO:0004024", "GO:0004023")
33 | go2 <- c("GO:0009055", "GO:0020037")
34 | mgoSim("GO:0003824", go2, semData=d, measure="Wang")
35 | mgoSim(go1, go2, semData=d, measure="Wang")
36 | 
37 | }
38 | \references{
39 | Yu et al. (2010) GOSemSim: an R package for measuring semantic
40 | similarity among GO terms and gene products \emph{Bioinformatics} (Oxford,
41 | England), 26:7 976--978, April 2010. ISSN 1367-4803
42 | \url{http://bioinformatics.oxfordjournals.org/cgi/content/abstract/26/7/976}
43 | PMID: 20179076
44 | }
45 | \seealso{
46 | \code{\link{goSim}} \code{\link{geneSim}} \code{\link{mgeneSim}}
47 | \code{\link{clusterSim}} \code{\link{mclusterSim}}
48 | }
49 | \keyword{manip}
50 | 


--------------------------------------------------------------------------------
/man/read-gaf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/parseGAF.R
 3 | \name{read.gaf}
 4 | \alias{read.gaf}
 5 | \alias{parse_gff}
 6 | \title{read.gaf}
 7 | \usage{
 8 | read.gaf(file, asis = FALSE, add_indirect_GO = FALSE)
 9 | 
10 | parse_gff(file, asis = FALSE, add_indirect_GO = FALSE)
11 | }
12 | \arguments{
13 | \item{file}{GAF file}
14 | 
15 | \item{asis}{logical, whether output the original contains of the file and only works if 'add_indirect_GO = FALSE'}
16 | 
17 | \item{add_indirect_GO}{whether to add indirect GO annotation}
18 | }
19 | \value{
20 | A data.frame. Original table if 'asis' works, otherwise contains 3 conlumns of 'GENE', 'GO' and 'ONTOLOGY'
21 | }
22 | \description{
23 | parse GAF files
24 | }
25 | \details{
26 | given a GAF file, this function extracts the information from it
27 | }
28 | 


--------------------------------------------------------------------------------
/man/read.blast2go.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/readBlast2go.R
 3 | \name{read.blast2go}
 4 | \alias{read.blast2go}
 5 | \title{read.blast2go}
 6 | \usage{
 7 | read.blast2go(file, add_indirect_GO = FALSE)
 8 | }
 9 | \arguments{
10 | \item{file}{BLAST2GO file}
11 | 
12 | \item{add_indirect_GO}{whether add indirect GO annotation}
13 | }
14 | \value{
15 | a data frame with three columns: GENE, GO and ONTOLOGY
16 | }
17 | \description{
18 | given a BLAST2GO file, this function extracts the information from it and make it use for TERM2GENE.
19 | }
20 | 


--------------------------------------------------------------------------------
/man/tcss_cutoff.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/TCSScutoff.R
 3 | \name{tcss_cutoff}
 4 | \alias{tcss_cutoff}
 5 | \title{determine the topological cutoff for TCSS method}
 6 | \usage{
 7 | tcss_cutoff(
 8 |   OrgDb = NULL,
 9 |   keytype = "ENTREZID",
10 |   ont,
11 |   combine_method = "max",
12 |   ppidata
13 | )
14 | }
15 | \arguments{
16 | \item{OrgDb}{OrgDb object}
17 | 
18 | \item{keytype}{keytype}
19 | 
20 | \item{ont}{ontology : "BP", "MF", "CC"}
21 | 
22 | \item{combine_method}{"max", "BMA", "avg", "rcmax", "rcmax.avg"}
23 | 
24 | \item{ppidata}{A data.frame contains positive set and negative set.
25 | Positive set is PPI pairs that already verified.
26 | ppidata has three columns, column 1 and 2 are character, column 3
27 | must be logical value:TRUE/FALSE.}
28 | }
29 | \value{
30 | numeric, topological cutoff for given parameters
31 | }
32 | \description{
33 | determine the topological cutoff for TCSS method
34 | }
35 | \examples{
36 | \dontrun{
37 |     library(org.Hs.eg.db)
38 |     library(STRINGdb)
39 | 
40 |     string_db <- STRINGdb$new(version = "11.0", species = 9606,
41 |     score_threshold = 700)
42 |     string_proteins <- string_db$get_proteins()
43 | 
44 |     #get relationship
45 |     ppi <- string_db$get_interactions(string_proteins$protein_external_id)
46 | 
47 |     ppi$from <- vapply(ppi$from, function(e)
48 |                        strsplit(e, "9606.")[[1]][2], character(1))
49 |     ppi$to <- vapply(ppi$to, function(e)
50 |                        strsplit(e, "9606.")[[1]][2], character(1))
51 |     len <- nrow(ppi)
52 | 
53 |     #select length
54 |     s_len <- 100
55 |     pos_1 <- sample(len, s_len, replace = T)
56 |     #negative set
57 |     pos_2 <- sample(len, s_len, replace = T)
58 |     pos_3 <- sample(len, s_len, replace = T)
59 |     #union as ppidata
60 |     ppidata <- data.frame(pro1 = c(ppi$from[pos_1], ppi$from[pos_2]),
61 |      pro2 = c(ppi$to[pos_1], ppi$to[pos_3]),
62 |      label = c(rep(TRUE, s_len), rep(FALSE, s_len)),
63 |      stringsAsFactors = FALSE)
64 | 
65 |     cutoff <- tcss_cutoff(OrgDb = org.Hs.eg.db, keytype = "ENSEMBLPROT",
66 |     ont = "BP", combine_method = "max", ppidata)
67 | }
68 | }
69 | 


--------------------------------------------------------------------------------
/man/termSim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/termSim.R
 3 | \name{termSim}
 4 | \alias{termSim}
 5 | \title{termSim}
 6 | \usage{
 7 | termSim(
 8 |   t1,
 9 |   t2,
10 |   semData,
11 |   method = c("Wang", "Resnik", "Rel", "Jiang", "Lin", "TCSS")
12 | )
13 | }
14 | \arguments{
15 | \item{t1}{term vector}
16 | 
17 | \item{t2}{term vector}
18 | 
19 | \item{semData}{GOSemSimDATA object}
20 | 
21 | \item{method}{one of "Wang", "Resnik", "Rel", "Jiang", and "Lin", "TCSS".}
22 | }
23 | \value{
24 | score matrix
25 | }
26 | \description{
27 | measuring similarities between two term vectors.
28 | }
29 | \details{
30 | provide two term vectors, this function will calculate their similarities.
31 | }
32 | \author{
33 | Guangchuang Yu \url{http://guangchuangyu.github.io}
34 | }
35 | 


--------------------------------------------------------------------------------
/man/wangMethod_internal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/WangMethod.R
 3 | \name{wangMethod_internal}
 4 | \alias{wangMethod_internal}
 5 | \title{wangMethod}
 6 | \usage{
 7 | wangMethod_internal(ID1, ID2, ont = "BP")
 8 | }
 9 | \arguments{
10 | \item{ID1}{Ontology Term}
11 | 
12 | \item{ID2}{Ontology Term}
13 | 
14 | \item{ont}{Ontology}
15 | }
16 | \value{
17 | semantic similarity score
18 | }
19 | \description{
20 | Method Wang for semantic similarity measuring
21 | }
22 | \author{
23 | Guangchuang Yu \url{https://yulab-smu.top}
24 | }
25 | 


--------------------------------------------------------------------------------
/src/GOSemSim.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YuLab-SMU/GOSemSim/507194f48737a687d34a566e136aa0cf340038e5/src/GOSemSim.dll


--------------------------------------------------------------------------------
/src/ICmethod.cpp:
--------------------------------------------------------------------------------
  1 | #include <Rcpp.h>
  2 | 
  3 | #define LOG_LEVEL 0
  4 | 
  5 | #if LOG_LEVEL >= 2
  6 | 
  7 | #define LOG_DEBUG( msg ) Rcpp::Rcout << msg << "\n";
  8 | 
  9 | #else
 10 | 
 11 | #define LOG_DEBUG( msg )
 12 | 
 13 | #endif
 14 | 
 15 | typedef double go_dist_func_t( double mica, double ic1, double ic2, double mic );
 16 | 
 17 | double go_dist_Resnik( double mica, double ic1, double ic2, double mic )
 18 | {
 19 |   // Resnik does not consider how distant the terms are from their common ancestor.
 20 |   return mica;
 21 | }
 22 | 
 23 | double go_dist_Lin( double mica, double ic1, double ic2, double mic )
 24 | {
 25 |   // Lin takes common ancestor distances into account.
 26 |   return 2 * mica / (ic1+ic2);
 27 | }
 28 | 
 29 | double go_dist_Jiang( double mica, double ic1, double ic2, double mic )
 30 | {
 31 |   // Jiang takes common ancestor distances into account.
 32 |   return std::max( 0.0, 1.0 - ( -2 * mica + ic1 + ic2 ) );
 33 | }
 34 | 
 35 | double go_dist_Rel( double mica, double ic1, double ic2, double mic )
 36 | {
 37 |   // mica*mic equals to the original IC value.
 38 |   // and exp(-mica*mic) equals to the probability of the term's occurence.
 39 |   return 2 * mica/(ic1+ic2) * (1-exp(-mica*mic));
 40 | }
 41 | 
 42 | // [[Rcpp::export]]
 43 | Rcpp::NumericMatrix infoContentMethod_cpp(
 44 |   Rcpp::StringVector&  id1_,
 45 |   Rcpp::StringVector&  id2_,
 46 |   Rcpp::List&          anc_,
 47 |   Rcpp::NumericVector& ic_,
 48 |   const std::string&   method_,
 49 |   const std::string&   ont_
 50 | ){
 51 |   go_dist_func_t* go_dist;
 52 |   // Resnik does not consider how distant the terms are from their common ancestor.
 53 |   //  Lin and Jiang take that distance into account.
 54 |   if (method_ == "Resnik") {
 55 |     go_dist = &go_dist_Resnik;
 56 |   }
 57 |   else if (method_ == "Lin") {
 58 |     go_dist = &go_dist_Lin;
 59 |   }
 60 |   else if (method_ == "Jiang") {
 61 |     go_dist = &go_dist_Jiang;
 62 |   }
 63 |   else if (method_ == "Rel") {
 64 |     go_dist = &go_dist_Rel;
 65 |   }
 66 |   else {
 67 |     throw std::runtime_error( "Unknown GO distance method" );
 68 |   }
 69 |   
 70 |   typedef std::string term_id_t;
 71 |   typedef std::set<term_id_t> term_set_t;
 72 | 
 73 |   // calculate the maximum IC and build the map of normalized IC
 74 |   typedef std::map<term_id_t, double> ic_map_t;
 75 |   ic_map_t normIcMap;
 76 |   // more specific term, larger IC value.
 77 |   // Normalized, all divide the most informative IC.
 78 |   // all IC values range from 0(root node) to 1(most specific node)
 79 |   double mic = NA_REAL;
 80 |   {
 81 |     Rcpp::StringVector icNames( ic_.names() );
 82 |     for (std::size_t i=0; i < ic_.size(); i++ ) {
 83 |       const double cic = ic_[i];
 84 |       if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
 85 |       if ( Rcpp::NumericVector::is_na( mic ) || mic < cic ) mic = cic;
 86 |     }
 87 |     LOG_DEBUG( "mic=" << mic );
 88 |     for (std::size_t i=0; i < ic_.size(); i++ ) {
 89 |       const double cic = ic_[i];
 90 |       if ( Rcpp::NumericVector::is_na( cic ) || cic == R_PosInf ) continue;
 91 |       normIcMap.insert( std::make_pair( (std::string) icNames[i], cic / mic ) );
 92 |     }
 93 |   }
 94 | 
 95 |   // set root node IC to 0
 96 |   if(ont_ == "DO") {
 97 |     normIcMap["DOID:4"] = 0;
 98 |   } else if(ont_ == "BP") {
 99 |     normIcMap["GO:0008150"] = 0;
100 |   } else if(ont_ == "CC") {
101 |     normIcMap["GO:0005575"] = 0;
102 |   } else if(ont_ == "MF") {
103 |     normIcMap["GO:0003674"] = 0;
104 |   } else if(ont_ == "MPO") {
105 |     normIcMap["MP:0000001"] = 0;
106 |   } else {
107 |     // seems to remove in GO.db 3.12.0
108 |     normIcMap["all"] = 0; 
109 |   }
110 | 
111 |   normIcMap["all"] = 0; 
112 | 
113 |   // convert anc_ into map of sets
114 |   typedef std::map<term_id_t, term_set_t> anc_map_t;
115 |   anc_map_t ancMap;
116 |   {
117 |     Rcpp::StringVector goTerms( anc_.names() );
118 |     for (std::size_t i=0; i < anc_.size(); i++ ) {
119 |       const std::vector<std::string> ancVec = Rcpp::as<std::vector<std::string> >( anc_[i] );
120 |       term_set_t ancestors( ancVec.begin(), ancVec.end() );
121 |       // term itself is also considered an ancestor
122 |       ancestors.insert( (std::string)goTerms[i] );
123 |       ancMap.insert( std::make_pair( (std::string) goTerms[i], ancestors ) );
124 |     }
125 |   }
126 | 
127 |   Rcpp::NumericMatrix res( id1_.size(), id2_.size() );
128 |   // res.attr("dimnames") = Rcpp::Rcpp_list2( id1_, id2_ );
129 |   rownames(res) = id1_;
130 |   colnames(res) = id2_;
131 |   for ( std::size_t i = 0; i < id1_.size(); i++ ) {
132 |     const std::string id1_term = (std::string)id1_[i];
133 |     const ic_map_t::const_iterator iIcIt = normIcMap.find( id1_term );
134 |     if ( iIcIt != normIcMap.end() && iIcIt->second != 0 ) {
135 |       const double iIc = iIcIt->second;
136 |       LOG_DEBUG( "ic[" << id1_term << "]=" << iIc );
137 |       const anc_map_t::const_iterator iAncsIt = ancMap.find( id1_term );
138 |       for ( std::size_t j = 0; j < id2_.size(); j++ ) {
139 |         const std::string id2_term = (std::string)id2_[j];
140 |         const ic_map_t::const_iterator jIcIt = normIcMap.find( id2_term );
141 |         if ( jIcIt != normIcMap.end() && jIcIt->second != 0 ) {
142 |           const anc_map_t::const_iterator jAncsIt = ancMap.find( id2_term );
143 |           // find common ancestors
144 |           term_set_t commonAncs;
145 |           if ( iAncsIt != ancMap.end() && jAncsIt != ancMap.end() ) {
146 |             std::set_intersection( iAncsIt->second.begin(), iAncsIt->second.end(),
147 |                             jAncsIt->second.begin(), jAncsIt->second.end(),
148 |                             std::inserter( commonAncs, commonAncs.end() ) );
149 |           }
150 |           LOG_DEBUG( "n(commonAncs(" << id1_term << "," << id2_term << "))=" << commonAncs.size() );
151 | 
152 |           // Information Content of the most informative common ancestor (MICA)
153 |           double mica = 0;
154 |           for ( term_set_t::const_iterator termIt = commonAncs.begin(); termIt != commonAncs.end(); ++termIt ) {
155 |             ic_map_t::const_iterator ancIcIt = normIcMap.find( *termIt );
156 |             if ( ancIcIt != normIcMap.end() && mica < ancIcIt->second ) mica = ancIcIt->second; 
157 |           }
158 |           LOG_DEBUG( "mica(" << id1_term << "," << id2_term << ")=" << mica );
159 |           res(i,j) = go_dist( mica, iIc, jIcIt->second, mic );
160 |         } else {
161 |           res(i,j) = NA_REAL;
162 |         }
163 |       }
164 |     } else {
165 |       for ( std::size_t j = 0; j < id2_.size(); j++ ) {
166 |         res(i,j) = NA_REAL;
167 |       }
168 |     }
169 |   }
170 |   return ( res );
171 | }
172 | 


--------------------------------------------------------------------------------
/src/RcppExports.cpp:
--------------------------------------------------------------------------------
 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | using namespace Rcpp;
 7 | 
 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 9 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
10 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
11 | #endif
12 | 
13 | // infoContentMethod_cpp
14 | Rcpp::NumericMatrix infoContentMethod_cpp(Rcpp::StringVector& id1_, Rcpp::StringVector& id2_, Rcpp::List& anc_, Rcpp::NumericVector& ic_, const std::string& method_, const std::string& ont_);
15 | RcppExport SEXP _GOSemSim_infoContentMethod_cpp(SEXP id1_SEXP, SEXP id2_SEXP, SEXP anc_SEXP, SEXP ic_SEXP, SEXP method_SEXP, SEXP ont_SEXP) {
16 | BEGIN_RCPP
17 |     Rcpp::RObject rcpp_result_gen;
18 |     Rcpp::RNGScope rcpp_rngScope_gen;
19 |     Rcpp::traits::input_parameter< Rcpp::StringVector& >::type id1_(id1_SEXP);
20 |     Rcpp::traits::input_parameter< Rcpp::StringVector& >::type id2_(id2_SEXP);
21 |     Rcpp::traits::input_parameter< Rcpp::List& >::type anc_(anc_SEXP);
22 |     Rcpp::traits::input_parameter< Rcpp::NumericVector& >::type ic_(ic_SEXP);
23 |     Rcpp::traits::input_parameter< const std::string& >::type method_(method_SEXP);
24 |     Rcpp::traits::input_parameter< const std::string& >::type ont_(ont_SEXP);
25 |     rcpp_result_gen = Rcpp::wrap(infoContentMethod_cpp(id1_, id2_, anc_, ic_, method_, ont_));
26 |     return rcpp_result_gen;
27 | END_RCPP
28 | }
29 | 
30 | static const R_CallMethodDef CallEntries[] = {
31 |     {"_GOSemSim_infoContentMethod_cpp", (DL_FUNC) &_GOSemSim_infoContentMethod_cpp, 6},
32 |     {NULL, NULL, 0}
33 | };
34 | 
35 | RcppExport void R_init_GOSemSim(DllInfo *dll) {
36 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
37 |     R_useDynamicSymbols(dll, FALSE);
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(GOSemSim)
3 | 
4 | test_check("GOSemSim")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-GO.R:
--------------------------------------------------------------------------------
 1 | library(GOSemSim)
 2 | library(GO.db)
 3 | 
 4 | context("GO")
 5 | 
 6 | test_that("parent node", {
 7 |     goid <- 'GO:0004022'
 8 |     x <- as.list(GOSemSim:::getParents('MF')[goid])
 9 |     expect_equal(names(x),goid)
10 | 
11 |     pid <- x[[1]]
12 |     expect_true( goid %in% as.list(GOMFCHILDREN[pid])[[1]] )
13 | })
14 | 
15 | 


--------------------------------------------------------------------------------
/tests/testthat/test-Wang.R:
--------------------------------------------------------------------------------
 1 | library(GOSemSim)
 2 | 
 3 | context("Wang")
 4 | 
 5 | test_that("Wang's method", {
 6 |     hsGO <- godata('org.Hs.eg.db', ont="MF", computeIC=FALSE)
 7 |     x <- goSim("GO:0004022", "GO:0005515", semData=hsGO, measure="Wang")
 8 |     expect_true(x >= 0 && x<=1)
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/testthat/test-load_OrgDb.R:
--------------------------------------------------------------------------------
 1 | library(GOSemSim)
 2 | 
 3 | context("load_OrgDb")
 4 | 
 5 | test_that("load OrgDb with package name", {
 6 |     db <- load_OrgDb("org.Hs.eg.db")
 7 |     expect_true(is(db, "OrgDb"))
 8 | })
 9 | 
10 | 


--------------------------------------------------------------------------------
/vignettes/GOSemSim.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "GO Semantic Similarity Analysis"
 3 | author: 
 4 | - name: Guangchuang Yu
 5 |   email: guangchuangyu@gmail.com
 6 |   affiliation: Department of Bioinformatics, School of Basic Medical Sciences, Southern Medical University
 7 | date: "`r Sys.Date()`"
 8 | output:
 9 |   prettydoc::html_pretty:
10 |     toc: true
11 |     theme: cayman
12 |     highlight: github
13 |   pdf_document:
14 |     toc: true
15 | vignette: >
16 |   % \VignetteIndexEntry{GOSemSim}
17 |   % \VignetteEngine{knitr::rmarkdown}
18 |   % \usepackage[utf8]{inputenc}
19 |   %\VignetteEncoding{UTF-8}
20 | ---
21 | 
22 | ```{r style, echo=FALSE, results="asis", message=FALSE}
23 | knitr::opts_chunk$set(tidy = FALSE,
24 |                       warning = FALSE,
25 |                       message = FALSE)
26 | ```
27 | 
28 | ```{r echo=FALSE, results="hide", message=FALSE}
29 | Biocpkg <- function (pkg) {
30 |     sprintf("[%s](http://bioconductor.org/packages/%s)", pkg, pkg)
31 | }
32 | ```
33 | 
34 | # Vignette
35 | 
36 | Please go to <https://yulab-smu.top/biomedical-knowledge-mining-book/> for the full vignette.
37 | 
38 | 
39 | # Citation
40 | 
41 | 
42 | If you use `r Biocpkg('GOSemSim')` in published research, please cite the most appropriate paper(s) from this list:
43 | 
44 | 1. **Yu G**. [Gene Ontology Semantic Similarity Analysis Using GOSemSim](http://dx.doi.org/10.1007/978-1-0716-0301-7_11). In: Kidder B. (eds) Stem Cell Transcriptional Networks. **_Methods in Molecular Biology_**, 2020, 2117:207-215. Humana, New York, NY. 
45 | 2. **Yu G**^#^, Li F^#^, Qin Y, Bo X^\*^, Wu Y and Wang S^\*^. [GOSemSim: an R package for measuring semantic similarity among GO terms and gene products](http://dx.doi.org/10.1093/bioinformatics/btq064). **_Bioinformatics_**. 2010, 26(7):976-978. 
46 | 
47 | 
48 | 
49 | # Need helps?
50 | 
51 | 
52 | For questions, please post to [Bioconductor support site](https://support.bioconductor.org/) and tag your post with *GOSemSim*.
53 | 
54 | 


--------------------------------------------------------------------------------