├── src ├── Makevars ├── Makevars.win ├── simona.dylib ├── term.h ├── dist.h ├── common_ancestor.h ├── utils.h ├── intersectToList_logical.cpp ├── permutation.cpp ├── partition.cpp ├── tree.cpp ├── term.cpp ├── traverse.h ├── annotation.cpp └── utils.cpp ├── LICENSE ├── inst ├── extdata │ ├── go_ic.rds │ ├── go_sim.rds │ ├── LCA_depth.rds │ ├── sig_go_tb.rds │ ├── sig_go_ids.rds │ └── onto_gene.R ├── CITATION └── scripts │ ├── download.R │ └── parse_ttl.pl ├── docs ├── reference │ ├── Rplot001.png │ ├── Rplot002.png │ └── dag_viz-1.png ├── articles │ ├── v1_dag_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-16-1.png │ │ │ ├── unnamed-chunk-17-1.png │ │ │ ├── unnamed-chunk-18-1.png │ │ │ ├── unnamed-chunk-20-1.png │ │ │ └── unnamed-chunk-21-1.png │ ├── v01_dag_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-18-1.png │ │ │ └── unnamed-chunk-21-1.png │ ├── v8_random_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ └── unnamed-chunk-7-1.png │ ├── v08_random_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ └── unnamed-chunk-7-1.png │ ├── v10_enrichmenr_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-7-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── v10_enrichment_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-7-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ ├── unnamed-chunk-9-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ └── unnamed-chunk-15-1.png │ ├── v07_dag_visualization_files │ │ ├── figure-html │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ └── unnamed-chunk-19-1.png │ │ ├── DiagrammeR-styles-0.2 │ │ │ └── styles.css │ │ ├── htmltools-fill-0.5.8.1 │ │ │ └── fill.css │ │ ├── grViz-binding-1.0.10 │ │ │ └── grViz.js │ │ └── grViz-binding-1.0.11 │ │ │ └── grViz.js │ ├── v4_information_content_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-4-1.png │ │ │ └── unnamed-chunk-5-1.png │ ├── v7_dag_visualization_files │ │ ├── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-11-1.png │ │ │ ├── unnamed-chunk-12-1.png │ │ │ ├── unnamed-chunk-13-1.png │ │ │ ├── unnamed-chunk-14-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ │ ├── DiagrammeR-styles-0.2 │ │ │ └── styles.css │ │ └── grViz-binding-1.0.10 │ │ │ └── grViz.js │ ├── v04_information_content_files │ │ └── figure-html │ │ │ └── unnamed-chunk-5-1.png │ └── main.css ├── deps │ └── data-deps.txt ├── pkgdown.yml ├── link.svg ├── bootstrap-toc.css ├── docsearch.js ├── pkgdown.js └── bootstrap-toc.js ├── tests ├── test-all.R └── testthat │ ├── tests_utils.R │ ├── tests_group_sim.R │ ├── tests_tree.R │ ├── tests_partition.R │ ├── tests_constructor.R │ ├── tests_dist.R │ ├── tests_reorder.R │ ├── tests_term.R │ └── tests_common_ancestor.R ├── vignettes ├── simona.Rmd ├── v09_shiny.Rmd ├── main.css └── v02_GO.Rmd ├── man ├── dag_shiny.Rd ├── print.print_source.Rd ├── show-ontology_DAG-method.Rd ├── dag_has_terms.Rd ├── add_annotation.Rd ├── method_param.Rd ├── create_ontology_DAG_from_igraph.Rd ├── dag_all_terms.Rd ├── dag_as_igraph.Rd ├── dag_root.Rd ├── random_terms.Rd ├── mcols_ontology_DAG.Rd ├── all_methods.Rd ├── dag_distinct_ancestors.Rd ├── annotation.Rd ├── dag_depth.Rd ├── dag_terms.Rd ├── n_terms.Rd ├── dag_treelize.Rd ├── simona_opt.Rd ├── dag_filter.Rd ├── dag_reorder.Rd ├── partition_by_level.Rd ├── dag_longest_dist_to_offspring.Rd ├── subset.Rd ├── distance.Rd ├── dag_enrich_on_offsprings.Rd ├── create_ontology_DAG_from_GO_db.Rd ├── dag_random.Rd ├── ontology.Rd ├── n_annotations.Rd ├── dag_enrich_on_offsprings_by_permutation.Rd ├── dag_enrich_on_items.Rd ├── ontology_DAG-class.Rd ├── import_obo.Rd ├── create_ontology_DAG.Rd ├── common_ancestor.Rd └── dag_viz.Rd ├── .Rbuildignore ├── .gitignore ├── NEWS ├── _pkgdown.yml ├── DESCRIPTION ├── R ├── zzz.R ├── partition.R ├── dist.R └── utils.R └── NAMESPACE /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | 3 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: Zuguang Gu 3 | -------------------------------------------------------------------------------- /src/simona.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/src/simona.dylib -------------------------------------------------------------------------------- /inst/extdata/go_ic.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/inst/extdata/go_ic.rds -------------------------------------------------------------------------------- /inst/extdata/go_sim.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/inst/extdata/go_sim.rds -------------------------------------------------------------------------------- /inst/extdata/LCA_depth.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/inst/extdata/LCA_depth.rds -------------------------------------------------------------------------------- /inst/extdata/sig_go_tb.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/inst/extdata/sig_go_tb.rds -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/Rplot002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/reference/Rplot002.png -------------------------------------------------------------------------------- /docs/reference/dag_viz-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/reference/dag_viz-1.png -------------------------------------------------------------------------------- /inst/extdata/sig_go_ids.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/inst/extdata/sig_go_ids.rds -------------------------------------------------------------------------------- /tests/test-all.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | suppressWarnings(suppressPackageStartupMessages(library(simona))) 3 | 4 | test_check("simona") 5 | -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-16-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-16-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-17-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-17-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-20-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-20-1.png -------------------------------------------------------------------------------- /docs/articles/v1_dag_files/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v1_dag_files/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/articles/v01_dag_files/figure-html/unnamed-chunk-18-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v01_dag_files/figure-html/unnamed-chunk-18-1.png -------------------------------------------------------------------------------- /docs/articles/v01_dag_files/figure-html/unnamed-chunk-21-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v01_dag_files/figure-html/unnamed-chunk-21-1.png -------------------------------------------------------------------------------- /docs/articles/v8_random_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v8_random_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/v8_random_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v8_random_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/v8_random_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v8_random_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/v8_random_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v8_random_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/v8_random_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v8_random_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/v08_random_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v08_random_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/v08_random_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v08_random_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/v08_random_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v08_random_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/v08_random_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v08_random_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/v08_random_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v08_random_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichmenr_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v10_enrichment_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-19-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v07_dag_visualization_files/figure-html/unnamed-chunk-19-1.png -------------------------------------------------------------------------------- /docs/articles/v4_information_content_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v4_information_content_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/v4_information_content_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v4_information_content_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-11-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-11-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-12-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-12-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-13-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-13-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-14-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-14-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v7_dag_visualization_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/v04_information_content_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jokergoo/simona/HEAD/docs/articles/v04_information_content_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/deps/data-deps.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /vignettes/simona.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The simona package" 3 | author: "Zuguang Gu (z.gu@dkfz.de)" 4 | date: '`r Sys.Date()`' 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{The simona package} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | Please go to the package's website: https://jokergoo.github.io/simona. 13 | -------------------------------------------------------------------------------- /src/term.h: -------------------------------------------------------------------------------- 1 | #ifndef __TERMS__ 2 | #define __TERMS__ 3 | 4 | 5 | double _calc_wang_s(List lt_children, List lt_children_relations, NumericVector contribution, int i_node, int i_end, LogicalVector l_background, bool correct = false, double c = 0.66667); 6 | NumericVector cpp_ic_wang(S4 dag, NumericVector contribution); 7 | IntegerVector cpp_max_leaves_id(S4 dag, IntegerVector nodes, NumericVector v); 8 | 9 | #endif 10 | 11 | -------------------------------------------------------------------------------- /man/dag_shiny.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shiny.R 3 | \name{dag_shiny} 4 | \alias{dag_shiny} 5 | \title{A shiny app for the DAG} 6 | \usage{ 7 | dag_shiny(dag) 8 | } 9 | \arguments{ 10 | \item{dag}{An \code{ontology_DAG} object.} 11 | } 12 | \description{ 13 | A shiny app for the DAG 14 | } 15 | \examples{ 16 | if(FALSE) { 17 | dag = create_ontology_DAG_from_GO_db() 18 | dag_shiny(dag) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /man/print.print_source.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.R 3 | \name{print.print_source} 4 | \alias{print.print_source} 5 | \title{Print the source} 6 | \usage{ 7 | \method{print}{print_source}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An object in the \code{print_source} class.} 11 | 12 | \item{...}{Other arguments.} 13 | } 14 | \description{ 15 | Print the source 16 | } 17 | \details{ 18 | Internally used. 19 | } 20 | -------------------------------------------------------------------------------- /man/show-ontology_DAG-method.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{show,ontology_DAG-method} 4 | \alias{show,ontology_DAG-method} 5 | \title{Print the ontology_DAG object} 6 | \usage{ 7 | \S4method{show}{ontology_DAG}(object) 8 | } 9 | \arguments{ 10 | \item{object}{An \code{ontology_DAG} object.} 11 | } 12 | \value{ 13 | No value is returned. 14 | } 15 | \description{ 16 | Print the ontology_DAG object 17 | } 18 | -------------------------------------------------------------------------------- /man/dag_has_terms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{dag_has_terms} 4 | \alias{dag_has_terms} 5 | \title{Whether the terms exist in the DAG} 6 | \usage{ 7 | dag_has_terms(dag, terms) 8 | } 9 | \arguments{ 10 | \item{dag}{An \code{ontology_DAG} object.} 11 | 12 | \item{terms}{A vector of term IDs.} 13 | } 14 | \value{ 15 | A logical vector. 16 | } 17 | \description{ 18 | Whether the terms exist in the DAG 19 | } 20 | -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/DiagrammeR-styles-0.2/styles.css: -------------------------------------------------------------------------------- 1 | .DiagrammeR,.grViz pre { 2 | white-space: pre-wrap; /* CSS 3 */ 3 | white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ 4 | white-space: -pre-wrap; /* Opera 4-6 */ 5 | white-space: -o-pre-wrap; /* Opera 7 */ 6 | word-wrap: break-word; /* Internet Explorer 5.5+ */ 7 | } 8 | 9 | .DiagrammeR g .label { 10 | font-family: Helvetica; 11 | font-size: 14px; 12 | color: #333333; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/DiagrammeR-styles-0.2/styles.css: -------------------------------------------------------------------------------- 1 | .DiagrammeR,.grViz pre { 2 | white-space: pre-wrap; /* CSS 3 */ 3 | white-space: -moz-pre-wrap; /* Mozilla, since 1999 */ 4 | white-space: -pre-wrap; /* Opera 4-6 */ 5 | white-space: -o-pre-wrap; /* Opera 7 */ 6 | word-wrap: break-word; /* Internet Explorer 5.5+ */ 7 | } 8 | 9 | .DiagrammeR g .label { 10 | font-family: Helvetica; 11 | font-size: 14px; 12 | color: #333333; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/dist.h: -------------------------------------------------------------------------------- 1 | #ifndef __DIST__ 2 | #define __DIST__ 3 | 4 | int cpp_tpl_shortest_path_length(S4 dag, int from, int to); 5 | int cpp_tpl_longest_path_length(S4 dag, int from, int to); 6 | IntegerVector cpp_tpl_shortest_path(S4 dag, int from, int to); 7 | IntegerVector cpp_tpl_longest_path(S4 dag, int from, int to); 8 | 9 | double cpp_tpl_shortest_path_sum_value(S4 dag, int from, int to, NumericVector value); 10 | double cpp_tpl_longest_path_sum_value(S4 dag, int from, int to, NumericVector value); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /tests/testthat/tests_utils.R: -------------------------------------------------------------------------------- 1 | 2 | x = runif(10) 3 | 4 | test_that("test cross_*", { 5 | expect_equal( 6 | simona:::cross_sum(x), 7 | outer(x, x, "+") 8 | ) 9 | 10 | expect_equal( 11 | simona:::cross_multiply(x), 12 | outer(x, x, "*") 13 | ) 14 | 15 | expect_equal( 16 | simona:::cross_minus(x), 17 | outer(x, x, "-") 18 | ) 19 | 20 | expect_equal( 21 | simona:::cross_min(x), 22 | outer(x, x, pmin) 23 | ) 24 | 25 | expect_equal( 26 | simona:::cross_max(x), 27 | outer(x, x, pmax) 28 | ) 29 | 30 | }) 31 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.11.4 2 | pkgdown: 2.0.9 3 | pkgdown_sha: ~ 4 | articles: 5 | simona: simona.html 6 | v01_dag: v01_dag.html 7 | v02_GO: v02_GO.html 8 | v03_import: v03_import.html 9 | v04_information_content: v04_information_content.html 10 | v05_term_similarity: v05_term_similarity.html 11 | v06_group_similarity: v06_group_similarity.html 12 | v07_dag_visualization: v07_dag_visualization.html 13 | v08_random: v08_random.html 14 | v09_shiny: v09_shiny.html 15 | v10_enrichment: v10_enrichment.html 16 | last_built: 2024-09-13T10:36Z 17 | 18 | -------------------------------------------------------------------------------- /vignettes/v09_shiny.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Shiny app" 3 | author: "Zuguang Gu ( z.gu@dkfz.de )" 4 | date: '`r Sys.Date()`' 5 | output: html_vignette 6 | --- 7 | 8 | 9 | The function `dag_shiny()` generates a Shiny application that allows interactive manipulation on the ontology. 10 | 11 | ```{r, eval = FALSE} 12 | library(simona) 13 | dag = create_ontology_DAG_from_GO_db() 14 | dag_shiny(dag) 15 | ``` 16 | 17 | 18 |

19 | 20 | 21 | 24 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("The methods within the code package can be cited as:") 2 | 3 | citEntry( 4 | entry = "article", 5 | title = "simona: a comprehensive R package for semantic similarity analysis on bio-ontologies", 6 | author = "Zuguang Gu", 7 | journal = "BMC Genomics", 8 | year = 2024, 9 | doi = "10.1186/s12864-024-10759-4", 10 | textVersion = "Gu, Z. (2024) simona: a comprehensive R package for semantic similarity analysis on bio-ontologies. BMC Genomics" 11 | ) 12 | 13 | citFooter("This free open-source software implements academic 14 | research by the authors and co-workers. If you use it, 15 | please support the project by citing the appropriate 16 | journal articles.") 17 | 18 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\..* 2 | README.* 3 | .travis.yml 4 | ^_pkgdown\.yml$ 5 | ^docs$ 6 | ^pkgdown$ 7 | build_pkg_site.R 8 | ^\.github$ 9 | ^.*\.Rproj$ 10 | ^\.Rproj\.user$ 11 | .DS_Store 12 | 13 | src/*.o$ 14 | src/*.so$ 15 | man/temp__* 16 | ^doc$ 17 | ^Meta$ 18 | vignettes/dag.svg 19 | vignettes/example.svg 20 | vignettes/jquery.min.js 21 | vignettes/jquery.sticky.js 22 | vignettes/main.css 23 | vignettes/v01_dag.Rmd 24 | vignettes/v02_GO.Rmd 25 | vignettes/v03_import.Rmd 26 | vignettes/v04_information_content.Rmd 27 | vignettes/v05_term_similarity.Rmd 28 | vignettes/v06_group_similarity.Rmd 29 | vignettes/v07_dag_visualization.Rmd 30 | vignettes/v08_random.Rmd 31 | vignettes/v09_shiny.Rmd 32 | vignettes/v10_enrichment.Rmd 33 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | 4 | # Example code in package build process 5 | *-Ex.R 6 | 7 | # R data files from past sessions 8 | .Rdata 9 | 10 | # RStudio files 11 | .Rproj.user/ 12 | 13 | .DS_Store 14 | 15 | 16 | src/*.o 17 | src/*.so 18 | man/temp__* 19 | /doc/ 20 | ./docs/* 21 | /Meta/ 22 | 23 | 24 | .renviron 25 | .rprofile 26 | .rproj 27 | .rproj.user 28 | .rhistory 29 | .rapp.history 30 | .o 31 | .sl 32 | .so 33 | .dylib 34 | .a 35 | .dll 36 | .def 37 | .ds_store 38 | unsrturl.bst 39 | .log 40 | .aux 41 | .backups 42 | .cproject 43 | .directory 44 | .dropbox 45 | .exrc 46 | .gdb.history 47 | .gitattributes 48 | .gitmodules 49 | .hgtags 50 | .project 51 | .seed 52 | .settings 53 | .tm_properties 54 | -------------------------------------------------------------------------------- /man/add_annotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{add_annotation} 4 | \alias{add_annotation} 5 | \title{Add annotations to the DAG object} 6 | \usage{ 7 | add_annotation(dag, annotation) 8 | } 9 | \arguments{ 10 | \item{dag}{An \code{ontology_DAG} object.} 11 | 12 | \item{annotation}{A list of character vectors which contain items annotated to the terms. Names of the list should be the term names. In the DAG, items 13 | annotated to a term will also be annotated to its parents. Such merging 14 | is applied automatically in the package.} 15 | } 16 | \value{ 17 | An \code{ontology_DAG} object. 18 | } 19 | \description{ 20 | Add annotations to the DAG object 21 | } 22 | -------------------------------------------------------------------------------- /src/common_ancestor.h: -------------------------------------------------------------------------------- 1 | #ifndef __ANCESTOR__ 2 | #define __ANCESTOR__ 3 | 4 | 5 | NumericMatrix cpp_max_ancestor_v(S4 dag, IntegerVector nodes, NumericVector v); 6 | IntegerMatrix cpp_max_ancestor_id(S4 dag, IntegerVector nodes, NumericVector v); 7 | IntegerMatrix cpp_distances(S4 dag, IntegerVector nodes, int type = 1); 8 | IntegerMatrix cpp_shortest_distances_via_NCA(S4 dag, IntegerVector nodes); 9 | IntegerMatrix cpp_distances_directed(S4 dag, IntegerVector nodes, int type = 1); 10 | IntegerMatrix cpp_longest_distances_directed(S4 dag, IntegerVector nodes); 11 | IntegerMatrix cpp_shortest_distances_directed(S4 dag, IntegerVector nodes); 12 | IntegerMatrix cpp_nearest_common_ancestor(S4 dag, IntegerVector nodes); 13 | 14 | #endif 15 | 16 | -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/htmltools-fill-0.5.8.1/fill.css: -------------------------------------------------------------------------------- 1 | @layer htmltools { 2 | .html-fill-container { 3 | display: flex; 4 | flex-direction: column; 5 | /* Prevent the container from expanding vertically or horizontally beyond its 6 | parent's constraints. */ 7 | min-height: 0; 8 | min-width: 0; 9 | } 10 | .html-fill-container > .html-fill-item { 11 | /* Fill items can grow and shrink freely within 12 | available vertical space in fillable container */ 13 | flex: 1 1 auto; 14 | min-height: 0; 15 | min-width: 0; 16 | } 17 | .html-fill-container > :not(.html-fill-item) { 18 | /* Prevent shrinking or growing of non-fill items */ 19 | flex: 0 0 auto; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /man/method_param.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/00_global.R 3 | \name{method_param} 4 | \alias{method_param} 5 | \title{All Papameters of a given method} 6 | \usage{ 7 | method_param(IC_method = NULL, term_sim_method = NULL, group_sim_method = NULL) 8 | } 9 | \arguments{ 10 | \item{IC_method}{A single IC method name.} 11 | 12 | \item{term_sim_method}{A single term similarity method name.} 13 | 14 | \item{group_sim_method}{A single group similarity method name.} 15 | } 16 | \value{ 17 | A vector of parameter names. 18 | } 19 | \description{ 20 | All Papameters of a given method 21 | } 22 | \examples{ 23 | method_param(IC_method = "IC_annotation") 24 | method_param(term_sim_method = "Sim_Wang_2007") 25 | } 26 | -------------------------------------------------------------------------------- /tests/testthat/tests_group_sim.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | 4 | #### test a small dag 5 | 6 | # b--d--f 7 | # / \ 8 | # a---c--e 9 | # upstream -> downstream 10 | annotation = list( 11 | "a" = 1:3, 12 | "b" = 3:4, 13 | "c" = 5, 14 | "d" = 7, 15 | "e" = 4:7, 16 | "f" = 8 17 | ) 18 | 19 | parents = c("a", "a", "b", "b", "c", "d") 20 | children = c("b", "c", "c", "d", "e", "f") 21 | 22 | dag = create_ontology_DAG(parents, children, relations = c("is_a", "part_of", "is_a", "part_of", "is_a", "part_of"), 23 | annotation = annotation) 24 | 25 | 26 | group1 = c("c", "e", "d") 27 | group2 = c("b", "d", "f") 28 | 29 | 30 | for(method in all_group_sim_methods()) { 31 | cat(method, "\n") 32 | print(group_sim(dag, group1, group2, method = method)) 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat/tests_tree.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | dag = create_ontology_DAG(c("a-h", "a-b", "a-c", "a-d", "b-e", "b-f", "c-g", "h-g", "d-e")) 5 | 6 | 7 | 8 | lt_children2 = cpp_mark_tree_links(dag) 9 | lt_children2 = lapply(lt_children2, function(x) { 10 | abs(x[x < 0]) 11 | }) 12 | 13 | n = length(lt_children2) 14 | parents = rep(seq_len(n), times = vapply(lt_children2, length, FUN.VALUE = integer(1))) 15 | children = unlist(lt_children2) 16 | 17 | tree1 = create_ontology_DAG(dag@terms[parents], dag@terms[children]) 18 | 19 | tree2 = dag_treelize(dag) 20 | 21 | 22 | test_that("test dag_treelize", { 23 | expect_identical(tree1@lt_children, tree2@lt_children) 24 | expect_identical(tree1@lt_parents, tree2@lt_parents) 25 | expect_identical(dag@terms, tree1@terms) 26 | expect_identical(dag@terms, tree2@terms) 27 | }) 28 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef __UTILS__ 2 | #define __UTILS__ 3 | 4 | void reset_logical_vector_to_false(LogicalVector& x); 5 | void reset_logical_vector_to_true(LogicalVector& x); 6 | void reset_numeric_vector_to_zero(NumericVector& x); 7 | void reset_integer_vector_to_zero(IntegerVector& x); 8 | IntegerVector _dag_depth(S4 dag); 9 | IntegerVector _which(LogicalVector l); 10 | LogicalVector integer_to_logical_vector(IntegerVector i, int n); 11 | IntegerVector cpp_match_index(IntegerVector ind1, IntegerVector ind2); 12 | void message(String s, bool appendLF = true); 13 | LogicalVector merge_two_logical_vectors_by_or(LogicalVector l1, LogicalVector l2); 14 | LogicalVector merge_two_logical_vectors_by_and(LogicalVector l1, LogicalVector l2); 15 | IntegerVector _order(NumericVector x); 16 | IntegerVector _order(IntegerVector x); 17 | 18 | #endif 19 | 20 | -------------------------------------------------------------------------------- /man/create_ontology_DAG_from_igraph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{create_ontology_DAG_from_igraph} 4 | \alias{create_ontology_DAG_from_igraph} 5 | \title{Create the ontology_DAG object from the igraph object} 6 | \usage{ 7 | create_ontology_DAG_from_igraph( 8 | g, 9 | relations = NULL, 10 | verbose = simona_opt$verbose 11 | ) 12 | } 13 | \arguments{ 14 | \item{g}{An \code{\link[igraph:aaa-igraph-package]{igraph::igraph}} object.} 15 | 16 | \item{relations}{A vector of relation types. The length of the vector should be the same as the number of edges in \code{g}.} 17 | 18 | \item{verbose}{Whether to print messages.} 19 | } 20 | \value{ 21 | An \code{ontology_DAG} object. 22 | } 23 | \description{ 24 | Create the ontology_DAG object from the igraph object 25 | } 26 | -------------------------------------------------------------------------------- /src/intersectToList_logical.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | using namespace Rcpp; 4 | #include 5 | 6 | 7 | // [[Rcpp::plugins(cpp11)]] 8 | // [[Rcpp::export]] 9 | List intersectToList_logical(List lt, StringVector x) { 10 | 11 | int n = lt.size(); 12 | List out(n); 13 | 14 | std::unordered_set seen; 15 | seen.insert(x.begin(), x.end()); 16 | 17 | for(int i = 0; i < n; i++) { 18 | 19 | StringVector v = as(lt[i]); 20 | LogicalVector l(v.size()); 21 | 22 | if(v.size()) { 23 | std::unordered_set seen2; 24 | 25 | for(int j = 0; j < v.size(); j ++) { 26 | l[j] = seen.find(v[j]) != seen.end() && seen2.insert(v[j]).second; 27 | } 28 | } 29 | 30 | out[i] = l; 31 | } 32 | 33 | return out; 34 | } 35 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /man/dag_all_terms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{dag_all_terms} 4 | \alias{dag_all_terms} 5 | \alias{dag_n_terms} 6 | \alias{dag_n_relations} 7 | \alias{dag_n_leaves} 8 | \title{Names of all terms} 9 | \usage{ 10 | dag_all_terms(dag) 11 | 12 | dag_n_terms(dag) 13 | 14 | dag_n_relations(dag) 15 | 16 | dag_n_leaves(dag) 17 | } 18 | \arguments{ 19 | \item{dag}{An \code{ontology_DAG} object.} 20 | } 21 | \value{ 22 | \code{dag_all_terms()} returns a vector of term names. \code{dag_n_terms()} returns 23 | a single iteger. 24 | } 25 | \description{ 26 | Names of all terms 27 | } 28 | \examples{ 29 | parents = c("a", "a", "b", "b", "c", "d") 30 | children = c("b", "c", "c", "d", "e", "f") 31 | dag = create_ontology_DAG(parents, children) 32 | dag_all_terms(dag) 33 | dag_n_terms(dag) 34 | } 35 | -------------------------------------------------------------------------------- /man/dag_as_igraph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{dag_as_igraph} 4 | \alias{dag_as_igraph} 5 | \title{Convert to an igraph object} 6 | \usage{ 7 | dag_as_igraph(dag) 8 | } 9 | \arguments{ 10 | \item{dag}{An \code{ontology_DAG} object.} 11 | } 12 | \value{ 13 | An \code{\link[igraph:aaa-igraph-package]{igraph::igraph}} object. 14 | } 15 | \description{ 16 | Convert to an igraph object 17 | } 18 | \details{ 19 | If \code{relations} is already set in \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}, relations are also set as an edge attribute in the \code{\link[igraph:aaa-igraph-package]{igraph::igraph}} object. 20 | } 21 | \examples{ 22 | parents = c("a", "a", "b", "b", "c", "d") 23 | children = c("b", "c", "c", "d", "e", "f") 24 | dag = create_ontology_DAG(parents, children) 25 | dag_as_igraph(dag) 26 | } 27 | -------------------------------------------------------------------------------- /man/dag_root.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{dag_root} 4 | \alias{dag_root} 5 | \alias{dag_leaves} 6 | \alias{dag_is_leaf} 7 | \title{Root or leaves of the DAG} 8 | \usage{ 9 | dag_root(dag, in_labels = TRUE) 10 | 11 | dag_leaves(dag, in_labels = TRUE) 12 | 13 | dag_is_leaf(dag, terms) 14 | } 15 | \arguments{ 16 | \item{dag}{An \code{ontology_DAG} object.} 17 | 18 | \item{in_labels}{Whether the terms are represented in their names or as integer indices?} 19 | 20 | \item{terms}{A vector of term names.} 21 | } 22 | \value{ 23 | A character or an integer vector. 24 | } 25 | \description{ 26 | Root or leaves of the DAG 27 | } 28 | \examples{ 29 | parents = c("a", "a", "b", "b", "c", "d") 30 | children = c("b", "c", "c", "d", "e", "f") 31 | dag = create_ontology_DAG(parents, children) 32 | dag_root(dag) 33 | dag_leaves(dag) 34 | } 35 | -------------------------------------------------------------------------------- /man/random_terms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrich.R 3 | \name{random_terms} 4 | \alias{random_terms} 5 | \alias{random_items} 6 | \title{Randomly sample terms/items} 7 | \usage{ 8 | random_terms(dag, n) 9 | 10 | random_items(dag, n) 11 | } 12 | \arguments{ 13 | \item{dag}{An \code{ontology_DAG} object.} 14 | 15 | \item{n}{Number of terms or items.} 16 | } 17 | \value{ 18 | A character vector of terms or items. 19 | } 20 | \description{ 21 | Randomly sample terms/items 22 | } 23 | \examples{ 24 | parents = c("a", "a", "b", "b", "c", "d") 25 | children = c("b", "c", "c", "d", "e", "f") 26 | annotation = list( 27 | "a" = c("t1", "t2", "t3"), 28 | "b" = c("t3", "t4"), 29 | "c" = "t5", 30 | "d" = "t7", 31 | "e" = c("t4", "t5", "t6", "t7"), 32 | "f" = "t8" 33 | ) 34 | dag = create_ontology_DAG(parents, children, annotation = annotation) 35 | random_terms(dag, 3) 36 | random_items(dag, 3) 37 | } 38 | -------------------------------------------------------------------------------- /src/permutation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | #include "utils.h" 4 | 5 | 6 | // [[Rcpp::export]] 7 | NumericMatrix cpp_random_aggregatioin(IntegerVector size, NumericVector value, int perm) { 8 | int n = size.size(); 9 | 10 | NumericMatrix m(n, perm); 11 | NumericVector v2; 12 | 13 | for(int i = 0; i < perm; i ++) { 14 | 15 | if(i % 10 == 0) { 16 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 17 | message("going through " + std::to_string(i) + " / " + std::to_string(perm) + " permutations ...", false); 18 | } 19 | 20 | for(int j = 0; j < n; j ++) { 21 | v2 = sample(value, size[j]); 22 | m(j, i) = mean(v2); 23 | } 24 | } 25 | 26 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 27 | message("going through " + std::to_string(perm) + " / " + std::to_string(perm) + " permutations ... Done.", true); 28 | 29 | 30 | return m; 31 | } 32 | -------------------------------------------------------------------------------- /man/mcols_ontology_DAG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{mcols,ontology_DAG-method} 4 | \alias{mcols,ontology_DAG-method} 5 | \alias{mcols<-,ontology_DAG-method} 6 | \title{Get or set meta columns on DAG} 7 | \usage{ 8 | \S4method{mcols}{ontology_DAG}(x, use.names = TRUE, ...) 9 | 10 | \S4method{mcols}{ontology_DAG}(x, ...) <- value 11 | } 12 | \arguments{ 13 | \item{x}{An \code{ontology_DAG} object.} 14 | 15 | \item{use.names}{Please ignore.} 16 | 17 | \item{...}{Other argument. For \code{mcols()}, it can be a vector of column names in the meta data frame.} 18 | 19 | \item{value}{A data frame or a matrix where rows should correspond to terms in \code{x@terms}.} 20 | } 21 | \value{ 22 | A data frame. 23 | } 24 | \description{ 25 | Get or set meta columns on DAG 26 | } 27 | \examples{ 28 | parents = c("a", "a", "b", "b", "c", "d") 29 | children = c("b", "c", "c", "d", "e", "f") 30 | dag = create_ontology_DAG(parents, children) 31 | mcols(dag) = data.frame(id = letters[1:6], v = 1:6) 32 | mcols(dag) 33 | mcols(dag, "id") 34 | dag 35 | } 36 | -------------------------------------------------------------------------------- /tests/testthat/tests_partition.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | 4 | dag = create_ontology_DAG( 5 | c("a - b", 6 | "a - c", 7 | "a - d", 8 | "b - e", 9 | "b - f", 10 | "c - g", 11 | "d - h", 12 | "g - i", 13 | "g - j", 14 | "h - k", 15 | "h - l", 16 | "i - m", 17 | "j - n", 18 | "k - o", 19 | "l - p") 20 | ) 21 | 22 | test_that("test partition_by_level", { 23 | expect_equal( 24 | partition_by_level(dag, level = 0), 25 | rep("a", 16) 26 | ) 27 | expect_equal( 28 | partition_by_level(dag, level = 1), 29 | c(NA, "b", "c", "d", "b", "b", "c", "d", "c", "c", "d", "d", "c", "c", "d", "d") 30 | ) 31 | expect_equal( 32 | partition_by_level(dag, level = 2), 33 | c(NA, NA, NA, NA, "e", "f", "g", "h", "g", "g", "h", "h", "g", "g", "h", "h") 34 | ) 35 | 36 | }) 37 | 38 | test_that("test partition_by_size", { 39 | expect_equal( 40 | partition_by_size(dag, size = 3), 41 | c(NA, "b", NA, NA, "b", "b", "g", "h", "g", "g", "h", "h", "g", "g", "h", "h") 42 | ) 43 | 44 | expect_equal( 45 | partition_by_size(dag, size = 6), 46 | c(NA, "b", "c", "d", "b", "b", "c", "d", "c", "c", "d", "d", "c", "c", "d", "d") 47 | ) 48 | }) 49 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | ## Version 1.7.1 2 | 3 | - add `ontology_reactome()` 4 | - add `rankdir` argument to `dag_as_DOT()`. 5 | - add `dag_is_offspring()`/`dag_is_ancestor()`. 6 | 7 | ---------------- 8 | 9 | ## Version 1.3.1 10 | 11 | - `cpp_check_cyclic_node()`: a term is marked as "checked" if it is tranversed. 12 | - `import_obo()`: ignore is_a that has gci_filter. 13 | - add `CA_terms()`. 14 | 15 | ----------------- 16 | 17 | ## Version 1.1.12 18 | 19 | - add `alternative_terms` argument in `create_ontology_DAG()` 20 | 21 | ---------------- 22 | 23 | ## Version 1.1.5 24 | 25 | - add `singleton_ontology()`. 26 | 27 | ----------------- 28 | 29 | ## Version 1.1.3 30 | 31 | - add `dag_shiny()` 32 | - add `dag_annotated_with_genes_*()` 33 | 34 | ------------------ 35 | 36 | ## Version 1.1.1 37 | 38 | - add functions to generate random DAGs 39 | - add dag_reorder() 40 | 41 | ------------------ 42 | 43 | ## Version 0.99.6 44 | 45 | - optimized Shen and SSDD methods 46 | 47 | ------------------ 48 | 49 | ## Version 0.99.4 50 | 51 | - Revised according to https://github.com/Bioconductor/Contributions/issues/3108 52 | 53 | ------------------ 54 | 55 | ## Version 0.99.0 56 | 57 | - First release. 58 | -------------------------------------------------------------------------------- /man/all_methods.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/00_global.R 3 | \name{all_term_IC_methods} 4 | \alias{all_term_IC_methods} 5 | \alias{all_term_sim_methods} 6 | \alias{all_group_sim_methods} 7 | \title{Supported methods} 8 | \usage{ 9 | all_term_IC_methods(require_anno = NULL) 10 | 11 | all_term_sim_methods(require_anno = NULL) 12 | 13 | all_group_sim_methods(require_anno = NULL) 14 | } 15 | \arguments{ 16 | \item{require_anno}{If it is set to \code{TRUE}, methods that require external annotations are only returned. If 17 | it is set to \code{FALSE}, methods that do not require annotations are returned. A value of \code{NULL} returns both.} 18 | } 19 | \value{ 20 | A character vector of all supported methods. 21 | } 22 | \description{ 23 | Supported methods 24 | } 25 | \details{ 26 | \itemize{ 27 | \item \code{all_term_IC_methods()}: A vector of all supported IC methods. 28 | \item \code{all_term_sim_methods()}: A vector of all supported term similarity methods. 29 | \item \code{all_group_sim_methods()}: A vector of all supported group similarity methods. 30 | } 31 | } 32 | \examples{ 33 | all_term_IC_methods() 34 | all_term_sim_methods() 35 | all_group_sim_methods() 36 | } 37 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | 3 | navbar: 4 | title: "simona" 5 | left: 6 | - text: "Articles" 7 | menu: 8 | - text: "1.ontology_DAG: a class for ontology data" 9 | href: articles/v01_dag.html 10 | - text: "2.Gene Ontology" 11 | href: articles/v02_GO.html 12 | - text: "3.Import ontology files" 13 | href: articles/v03_import.html 14 | - text: "4.Information content" 15 | href: articles/v04_information_content.html 16 | - text: "5.Term similarity" 17 | href: articles/v05_term_similarity.html 18 | - text: "6.Group similarity" 19 | href: articles/v06_group_similarity.html 20 | - text: "7.Visualize DAGs" 21 | href: articles/v07_dag_visualization.html 22 | - text: "8.Random DAGs" 23 | href: articles/v08_random.html 24 | - text: "9.Shiny app" 25 | href: articles/v09_shiny.html 26 | - text: "10.Functional enrichment" 27 | href: articles/v10_enrichment.html 28 | - text: "Reference" 29 | href: reference/index.html 30 | - text: "Supplementary" 31 | href: https://jokergoo.github.io/simona_supplementary/ 32 | - text: "OBO Foundry gallery" 33 | href: https://jokergoo.github.io/simona_supplementary/suppl6_OBOFoundry_gallery/OBOFoundry_viz.html 34 | 35 | 36 | -------------------------------------------------------------------------------- /man/dag_distinct_ancestors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transverse.R 3 | \name{dag_distinct_ancestors} 4 | \alias{dag_distinct_ancestors} 5 | \title{Distinct ancestors of a list of terms} 6 | \usage{ 7 | dag_distinct_ancestors( 8 | dag, 9 | terms, 10 | in_labels = TRUE, 11 | verbose = simona_opt$verbose 12 | ) 13 | } 14 | \arguments{ 15 | \item{dag}{An \code{ontology_DAG} object.} 16 | 17 | \item{terms}{A vector of term names.} 18 | 19 | \item{in_labels}{Whether the terms are represented in their names or as integer indices?} 20 | 21 | \item{verbose}{Whether to print messages. 22 | 23 | Consider a subgraph that contains \code{terms} and their offspring terms, induced from the complete DAG. 24 | the returned subset of terms are those with zero in-degree, or have no finite directional distance 25 | from others in the subgraph.} 26 | } 27 | \value{ 28 | An integer vector or a character vector depending on the value of \code{in_labels}. 29 | } 30 | \description{ 31 | For a given list of terms, it returns a subset of terms which have 32 | no ancestor relations to each other. 33 | } 34 | \examples{ 35 | parents = c("a", "a", "b", "b", "c", "d") 36 | children = c("b", "c", "c", "d", "e", "f") 37 | dag = create_ontology_DAG(parents, children) 38 | dag_distinct_ancestors(dag, c("c", "d", "e", "f")) 39 | } 40 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: simona 2 | Type: Package 3 | Title: Semantic Similarity on Bio-Ontologies 4 | Version: 1.7.1 5 | Date: 2025-06-11 6 | Authors@R: person("Zuguang", "Gu", email = "z.gu@dkfz.de", role = c("aut", "cre"), 7 | comment = c('ORCID'="0000-0002-7395-8709")) 8 | Depends: R (>= 4.1.0) 9 | Imports: methods, Rcpp, matrixStats, GetoptLong, grid, GlobalOptions, 10 | igraph, Polychrome, S4Vectors, xml2 (>= 1.3.3), circlize, ComplexHeatmap, 11 | grDevices, stats, utils, shiny, fastmatch 12 | Suggests: knitr, testthat, BiocManager, GO.db, org.Hs.eg.db, proxyC, AnnotationDbi, 13 | Matrix, DiagrammeR, ragg, png, InteractiveComplexHeatmap, UniProtKeywords, 14 | simplifyEnrichment, AnnotationHub, jsonlite 15 | LinkingTo: Rcpp 16 | VignetteBuilder: knitr 17 | Description: This package implements infrastructures for ontology analysis by offering 18 | efficient data structures, fast ontology traversal methods, and elegant visualizations. 19 | It provides a robust toolbox supporting over 70 methods for semantic similarity analysis. 20 | biocViews: Software, Annotation, GO, BiomedicalInformatics 21 | URL: https://github.com/jokergoo/simona 22 | BugReports: https://github.com/jokergoo/simona/issues 23 | SystemRequirements: Perl, Java 24 | License: MIT + file LICENSE 25 | RoxygenNote: 7.3.2 26 | Encoding: UTF-8 27 | Roxygen: list(markdown = TRUE) 28 | -------------------------------------------------------------------------------- /man/annotation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/annotation.R 3 | \name{term_annotations} 4 | \alias{term_annotations} 5 | \alias{annotated_terms} 6 | \title{Term-item associations} 7 | \usage{ 8 | term_annotations(dag, terms, return = "list") 9 | 10 | annotated_terms(dag, anno, return = "list") 11 | } 12 | \arguments{ 13 | \item{dag}{An \code{ontology_DAG} object.} 14 | 15 | \item{terms}{A vector of term names.} 16 | 17 | \item{return}{Whether the returned object is a list or a matrix?} 18 | 19 | \item{anno}{A vector of annotated item names.} 20 | } 21 | \value{ 22 | A list or a binary matrix showing annotation relations between terms and items. 23 | } 24 | \description{ 25 | Term-item associations 26 | } 27 | \details{ 28 | If an item is annotated to a term, all this term's ancestor terms are also annotated. 29 | } 30 | \examples{ 31 | parents = c("a", "a", "b", "b", "c", "d") 32 | children = c("b", "c", "c", "d", "e", "f") 33 | annotation = list( 34 | "a" = c("t1", "t2", "t3"), 35 | "b" = c("t3", "t4"), 36 | "c" = "t5", 37 | "d" = "t7", 38 | "e" = c("t4", "t5", "t6", "t7"), 39 | "f" = "t8" 40 | ) 41 | dag = create_ontology_DAG(parents, children, annotation = annotation) 42 | term_annotations(dag, letters[1:6]) 43 | term_annotations(dag, letters[1:6], return = "matrix") 44 | annotated_terms(dag, c("t1", "t2", "t3")) 45 | annotated_terms(dag, c("t1", "t2", "t3"), return = "matrix") 46 | } 47 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | 2 | #' @importFrom utils packageDescription 3 | .onAttach = function(libname, pkgname) { 4 | version = packageDescription(pkgname, fields = "Version") 5 | 6 | msg = paste0("======================================== 7 | ", pkgname, " version ", version, " 8 | Bioconductor page: http://bioconductor.org/packages/simona/ 9 | Github page: https://github.com/jokergoo/simona 10 | Documentation: https://jokergoo.github.io/simona/ 11 | 12 | If you use it in published research, please cite: 13 | Gu, Z. simona: a Comprehensive R package for Semantic Similarity 14 | Analysis on Bio-Ontologies. BMC Genomics, 2024. 15 | 16 | This message can be suppressed by: 17 | suppressPackageStartupMessages(library(simona)) 18 | ======================================== 19 | ") 20 | 21 | packageStartupMessage(msg) 22 | } 23 | 24 | 25 | finalize = function(env) { 26 | if(!is.null(simona_opt$robot_jar)) { 27 | if(grepl("robot_temp_", simona_opt$robot_jar)) { 28 | if(file.exists(simona_opt$robot_jar)) { 29 | file.remove(simona_opt$robot_jar) 30 | simona_opt$robot_jar = NULL 31 | } 32 | } 33 | } 34 | } 35 | 36 | .onLoad = function(libname, pkgname) { 37 | parent = parent.env(environment()) 38 | reg.finalizer(parent, finalize, onexit = TRUE) 39 | } 40 | 41 | .onUnload = function(libpath) { 42 | if(!is.null(simona_opt$robot_jar)) { 43 | if(grepl("robot_temp_", simona_opt$robot_jar)) { 44 | if(file.exists(simona_opt$robot_jar)) { 45 | file.remove(simona_opt$robot_jar) 46 | simona_opt$robot_jar = NULL 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /man/dag_depth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transverse.R 3 | \name{dag_depth} 4 | \alias{dag_depth} 5 | \alias{dag_height} 6 | \alias{dag_shortest_dist_from_root} 7 | \alias{dag_shortest_dist_to_leaves} 8 | \title{Depth and height in the DAG} 9 | \usage{ 10 | dag_depth(dag, terms = NULL, use_cache = TRUE) 11 | 12 | dag_height(dag, terms = NULL, use_cache = TRUE) 13 | 14 | dag_shortest_dist_from_root(dag, terms = NULL, use_cache = TRUE) 15 | 16 | dag_shortest_dist_to_leaves(dag, terms = NULL, use_cache = TRUE) 17 | } 18 | \arguments{ 19 | \item{dag}{An \code{ontology_DAG} object.} 20 | 21 | \item{terms}{A vector of term names. If it is set, the returned vector will be subsetted to the terms that have been set here.} 22 | 23 | \item{use_cache}{Internally used.} 24 | } 25 | \value{ 26 | An integer vector with length the same as the number of total terms in the DAG. 27 | } 28 | \description{ 29 | Depth and height in the DAG 30 | } 31 | \details{ 32 | The depth of a term in the DAG is defined as the maximal distance from the root. The height 33 | of a term in the DAG is the maximal finite distance to all leaf terms. 34 | 35 | \code{dag_shortest_dist_from_root()} and \code{dag_shortest_dist_to_leaves()} calculate the minimal distance from the root or to the leaves. 36 | The word "from" and "to" emphasize the distancer is directinoal. 37 | } 38 | \examples{ 39 | parents = c("a", "a", "b", "b", "c", "d") 40 | children = c("b", "c", "c", "d", "e", "f") 41 | dag = create_ontology_DAG(parents, children) 42 | dag_depth(dag) 43 | dag_height(dag) 44 | dag_shortest_dist_from_root(dag) 45 | dag_shortest_dist_to_leaves(dag) 46 | } 47 | -------------------------------------------------------------------------------- /man/dag_terms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transverse.R 3 | \name{dag_parents} 4 | \alias{dag_parents} 5 | \alias{dag_children} 6 | \alias{dag_siblings} 7 | \alias{dag_ancestors} 8 | \alias{dag_offspring} 9 | \title{Parent/child/ancestor/offspring terms} 10 | \usage{ 11 | dag_parents(dag, term, in_labels = TRUE) 12 | 13 | dag_children(dag, term, in_labels = TRUE) 14 | 15 | dag_siblings(dag, term, in_labels = TRUE) 16 | 17 | dag_ancestors(dag, term, in_labels = TRUE, include_self = FALSE) 18 | 19 | dag_offspring(dag, term, in_labels = TRUE, include_self = FALSE) 20 | } 21 | \arguments{ 22 | \item{dag}{An \code{ontology_DAG} object.} 23 | 24 | \item{term}{The value can be a vector of multiple term names. If it is a vector, it returns 25 | union of the upstream/downstream terms of the selected set of terms. For \code{dag_siblings()}, 26 | the value can only be a single term.} 27 | 28 | \item{in_labels}{Whether the terms are represented in their names or as integer indices?} 29 | 30 | \item{include_self}{For \code{dag_offspring()} and \code{dag_ancestors()}, this controls whether to also include the query term itself.} 31 | } 32 | \value{ 33 | An integer vector or a character vector depending on the value of \code{in_labels}. 34 | } 35 | \description{ 36 | Parent/child/ancestor/offspring terms 37 | } 38 | \examples{ 39 | parents = c("a", "a", "b", "b", "c", "d") 40 | children = c("b", "c", "c", "d", "e", "f") 41 | dag = create_ontology_DAG(parents, children) 42 | dag_parents(dag, "b") 43 | dag_parents(dag, "c", in_labels = FALSE) 44 | dag_children(dag, "b") 45 | dag_siblings(dag, "c") 46 | dag_ancestors(dag, "e") 47 | dag_ancestors(dag, "b") 48 | } 49 | -------------------------------------------------------------------------------- /man/n_terms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transverse.R 3 | \name{n_offspring} 4 | \alias{n_offspring} 5 | \alias{n_ancestors} 6 | \alias{n_connected_leaves} 7 | \alias{n_parents} 8 | \alias{n_children} 9 | \alias{avg_parents} 10 | \alias{avg_children} 11 | \title{Number of parent/child/ancestor/offspring/leaf terms} 12 | \usage{ 13 | n_offspring(dag, terms = NULL, use_cache = TRUE, include_self = FALSE) 14 | 15 | n_ancestors(dag, terms = NULL, use_cache = TRUE, include_self = FALSE) 16 | 17 | n_connected_leaves(dag, terms = NULL, use_cache = TRUE) 18 | 19 | n_parents(dag, terms = NULL) 20 | 21 | n_children(dag, terms = NULL) 22 | 23 | avg_parents(dag) 24 | 25 | avg_children(dag) 26 | } 27 | \arguments{ 28 | \item{dag}{An \code{ontology_DAG} object.} 29 | 30 | \item{terms}{A vector of term names. If the value is \code{NULL}, it returns for all terms in the DAG.} 31 | 32 | \item{use_cache}{Internally used.} 33 | 34 | \item{include_self}{For \code{n_offspring()} and \code{n_ancestors()}, this controls whether to also include the query term itself.} 35 | } 36 | \value{ 37 | An integer vector. 38 | } 39 | \description{ 40 | Number of parent/child/ancestor/offspring/leaf terms 41 | } 42 | \details{ 43 | For \code{n_connected_leaves()}, leaf nodes have value of 1. 44 | 45 | In \code{avg_parents()}, root term is removed. 46 | 47 | In \code{avg_children()}, leaf term is removed. 48 | } 49 | \examples{ 50 | parents = c("a", "a", "b", "b", "c", "d") 51 | children = c("b", "c", "c", "d", "e", "f") 52 | dag = create_ontology_DAG(parents, children) 53 | n_parents(dag) 54 | n_children(dag) 55 | n_offspring(dag) 56 | n_ancestors(dag) 57 | n_connected_leaves(dag) 58 | } 59 | -------------------------------------------------------------------------------- /man/dag_treelize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tree.R 3 | \name{dag_treelize} 4 | \alias{dag_treelize} 5 | \alias{dag_as_dendrogram} 6 | \alias{print.ontology_tree} 7 | \title{Reduce the DAG to a tree} 8 | \usage{ 9 | dag_treelize(dag, verbose = simona_opt$verbose) 10 | 11 | dag_as_dendrogram(dag) 12 | 13 | \method{print}{ontology_tree}(x, ...) 14 | } 15 | \arguments{ 16 | \item{dag}{An \code{ontology_DAG} object.} 17 | 18 | \item{verbose}{Whether to print messages.} 19 | 20 | \item{x}{An \code{ontology_DAG} object.} 21 | 22 | \item{...}{Ignored.} 23 | } 24 | \value{ 25 | A tree is also an \code{ontology_DAG} object. 26 | } 27 | \description{ 28 | Reduce the DAG to a tree 29 | } 30 | \details{ 31 | A tree is a reduced DAG where a child only has one parent. The reducing is applied by a breadth-first searching 32 | 33 | Starting from the root and on a certain depth (the depth is the maximal distance to root), for every term \code{t} on this depth, 34 | its child term \code{c} and parent-child relation are kept only when \code{depth(c) == depth(t) + 1}. If \code{c} is selected, it is 35 | marked as visited and will not be checked again. 36 | 37 | In this way, depths of all terms in the orignal DAG are still identical to the depths in the tree (see the Examples section). 38 | 39 | \code{dag_as_dendrogram()} coverts the tree to a \code{dendrogram} object. 40 | } 41 | \examples{ 42 | parents = c("a", "a", "b", "b", "c", "d") 43 | children = c("b", "c", "c", "d", "e", "f") 44 | dag = create_ontology_DAG(parents, children) 45 | tree = dag_treelize(dag) 46 | d1 = dag_depth(dag) 47 | d2 = dag_depth(tree) 48 | identical(d1, d2) 49 | 50 | dend = dag_as_dendrogram(tree) 51 | dend 52 | } 53 | -------------------------------------------------------------------------------- /man/simona_opt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/00_global.R 3 | \name{simona_opt} 4 | \alias{simona_opt} 5 | \title{Global options} 6 | \usage{ 7 | simona_opt(..., RESET = FALSE, READ.ONLY = NULL, LOCAL = FALSE, ADD = FALSE) 8 | } 9 | \arguments{ 10 | \item{...}{Name-value pairs for options.} 11 | 12 | \item{RESET}{Reset to default option values.} 13 | 14 | \item{READ.ONLY}{Only return read only options.} 15 | 16 | \item{LOCAL}{Only return local options.} 17 | 18 | \item{ADD}{Add new options.} 19 | } 20 | \value{ 21 | A single option value. 22 | } 23 | \description{ 24 | Global options 25 | } 26 | \details{ 27 | There are the following global options: 28 | \itemize{ 29 | \item \code{use_cache}: By default, information content of all terms is cached and reused. If \code{use_cache} is set to \code{FALSE}, IC will be re-calculated. 30 | \item \code{verbose}: Whether to print messages? 31 | \item \code{anno_uniquify}: In the annotation-based IC method, the union of items annotated to the term as well as all its offspring terms is used, which means 32 | the set of annotated items for the term is uniquified. If \code{anno_uniquify} is set to \code{FALSE}, the uniquification is not applied, we simply add the number 33 | of items annotated to the term and the numbers of items annotated to each of its offspring terms. 34 | \item \code{robot_jar}: Path of the \code{robot.jar} file. The file can be found from \url{https://github.com/ontodev/robot/releases}. 35 | } 36 | 37 | To set an option, you can use \code{$}: 38 | 39 | \if{html}{\out{
}}\preformatted{simona_opt$verbose = FALSE 40 | }\if{html}{\out{
}} 41 | 42 | or use it as a function: 43 | 44 | \if{html}{\out{
}}\preformatted{simona_opt(verbose = FALSE) 45 | }\if{html}{\out{
}} 46 | } 47 | \examples{ 48 | simona_opt 49 | } 50 | -------------------------------------------------------------------------------- /src/partition.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | #include "traverse.h" 5 | 6 | // [[Rcpp::export]] 7 | IntegerVector cpp_partition_by_size(S4 tree, int size) { 8 | List lt_children = tree.slot("lt_children"); 9 | int n = tree.slot("n_terms"); 10 | int root = tree.slot("root"); 11 | 12 | IntegerVector n_offspring = cpp_n_offspring_on_tree(tree, true); 13 | IntegerVector pa(n, -1); 14 | 15 | // breadth-first search 16 | IntegerVector current_nodes = {root}; 17 | while(current_nodes.size()) { 18 | IntegerVector current_nodes2; 19 | for(int i = 0; i < current_nodes.size(); i ++) { 20 | int i_node = current_nodes[i] - 1; 21 | IntegerVector children = lt_children[i_node]; 22 | 23 | if(children.size() == 0) { // leaf 24 | pa[i_node] = current_nodes[i]; 25 | } else { 26 | 27 | if(n_offspring[i_node] <= size) { 28 | Rcout << "getting offsprings for " << current_nodes[i] << "\n"; 29 | IntegerVector offspring = cpp_offspring(tree, current_nodes[i], true); 30 | pa[offspring-1] = current_nodes[i]; 31 | continue; 32 | } 33 | 34 | // check i_node's children 35 | bool all_small_children = true; 36 | for(int j = 0; j < children.size(); j ++) { 37 | if(n_offspring[children[j]-1] >= size) { 38 | all_small_children = false; 39 | } 40 | } 41 | if(all_small_children) { 42 | IntegerVector offspring = cpp_offspring(tree, current_nodes[i], true); 43 | pa[offspring-1] = current_nodes[i]; 44 | } else { 45 | for(int j = 0; j < children.size(); j ++) { 46 | if(n_offspring[children[j]-1] <= size) { 47 | IntegerVector offspring = cpp_offspring(tree, children[j], true); 48 | pa[offspring-1] = children[j]; 49 | } else { 50 | current_nodes2.push_back(children[j]); 51 | } 52 | } 53 | } 54 | } 55 | } 56 | current_nodes = current_nodes2; 57 | } 58 | 59 | return pa; 60 | } 61 | -------------------------------------------------------------------------------- /man/dag_filter.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter.R 3 | \name{dag_filter} 4 | \alias{dag_filter} 5 | \title{Filter the DAG} 6 | \usage{ 7 | dag_filter( 8 | dag, 9 | terms = NULL, 10 | relations = NULL, 11 | root = NULL, 12 | leaves = NULL, 13 | mcols_filter = NULL, 14 | namespace = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{dag}{An \code{ontology_DAG} object.} 19 | 20 | \item{terms}{A vector of term names. The sub-DAG will only contain these terms.} 21 | 22 | \item{relations}{A vector of relations. The sub-DAG will only contain these relations. 23 | Valid values of "relations" should correspond to the values set in the 24 | \code{relations} argument in the \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}. If \code{relations_DAG} is 25 | already provided, offspring relation types will all be selected. Note "is_a" 26 | is always included.} 27 | 28 | \item{root}{A vector of term names which will be used as roots of the sub-DAG. Only 29 | these with their offspring terms will be kept. If there are multiple root terms, 30 | a super root will be automatically added.} 31 | 32 | \item{leaves}{A vector of leaf terms. Only these with their ancestor terms will be kept.} 33 | 34 | \item{mcols_filter}{Filtering on columns in the meta data frame.} 35 | 36 | \item{namespace}{The prefix before ":" of the term IDs.} 37 | } 38 | \value{ 39 | An \code{ontology_DAG} object. 40 | } 41 | \description{ 42 | Filter the DAG 43 | } 44 | \details{ 45 | If the DAG is reduced into several disconnected parts after the filtering, a 46 | super root is automatically added. 47 | } 48 | \examples{ 49 | parents = c("a", "a", "b", "b", "c", "d") 50 | children = c("b", "c", "c", "d", "e", "f") 51 | dag = create_ontology_DAG(parents, children) 52 | dag_filter(dag, terms = c("b", "d", "f")) 53 | dag_filter(dag, root = "b") 54 | dag_filter(dag, leaves = c("c", "b")) 55 | dag_filter(dag, root = "b", leaves = "e") 56 | 57 | \donttest{ 58 | dag = create_ontology_DAG_from_GO_db() 59 | dag_filter(dag, relations = "is_a") 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /man/dag_reorder.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/reorder.R 3 | \name{dag_reorder} 4 | \alias{dag_reorder} 5 | \alias{dag_permutate_children} 6 | \title{Reorder the DAG} 7 | \usage{ 8 | dag_reorder(dag, value, verbose = simona_opt$verbose) 9 | 10 | dag_permutate_children(dag, verbose = simona_opt$verbose) 11 | } 12 | \arguments{ 13 | \item{dag}{An \code{ontology_Dag} object.} 14 | 15 | \item{value}{A vector of numeric values. See the \strong{Details} section.} 16 | 17 | \item{verbose}{Whether to print messages.} 18 | } 19 | \value{ 20 | An \code{ontology_DAG} object. 21 | } 22 | \description{ 23 | Reorder the DAG 24 | } 25 | \details{ 26 | In \code{dag_reorder()}, there are two ways to set the \code{value} argument. It can be a vector corresponding 27 | to all terms (in the same order as in \code{\link[=dag_all_terms]{dag_all_terms()}}) or a vector corresponding 28 | to all leaf terms (in the same order as in \code{\link[=dag_leaves]{dag_leaves()}}). If \code{value} corresponds 29 | to all terms, the score associates to each term is the average value of all its offspring terms. 30 | And if \code{value} corresponds to all leaf terms, the score for each term is the average of all its 31 | connectable leaves. 32 | 33 | The reordering is simply applied on each term to reorder its child terms. 34 | 35 | \code{dag_permutate_children()} randomly permute child terms under a term. 36 | } 37 | \examples{ 38 | parents = c("a", "a", "b", "b", "c", "d") 39 | children = c("b", "c", "c", "d", "e", "f") 40 | # by default, c and e locate on the left side, d and f locate on the right side 41 | dag = create_ontology_DAG(parents, children) 42 | dag_children(dag, "b") 43 | 44 | # move c and e to the right side of the diagram 45 | dag2 = dag_reorder(dag, value = c(1, 1, 10, 1, 10, 1)) 46 | dag_children(dag2, "b") 47 | 48 | # we can also only set values for leaf terms 49 | # there are two leaf terms c and e 50 | # we let v(c) > v(e) to move c to the right side of the diagram 51 | dag3 = dag_reorder(dag, value = c(10, 1)) 52 | dag_children(dag3, "b") 53 | } 54 | -------------------------------------------------------------------------------- /man/partition_by_level.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/partition.R 3 | \name{partition_by_level} 4 | \alias{partition_by_level} 5 | \alias{partition_by_size} 6 | \title{Partition the DAG} 7 | \usage{ 8 | partition_by_level(dag, level = 1, from = NULL, term_pos = NULL) 9 | 10 | partition_by_size(dag, size = round(dag_n_terms(dag)/5)) 11 | } 12 | \arguments{ 13 | \item{dag}{An \code{ontology_DAG} object.} 14 | 15 | \item{level}{Depth in the DAG to cut. The DAG is cut below terms (or cut the links to their child terms) with \code{depth == level}.} 16 | 17 | \item{from}{A list of terms to cut. If it is set, \code{level} is ignored.} 18 | 19 | \item{term_pos}{Internally used.} 20 | 21 | \item{size}{Number of terms in a cluster. The splitting stops on a term if all its child-trees are smaller than \code{size}.} 22 | } 23 | \value{ 24 | A character vector of top terms in each partition. 25 | } 26 | \description{ 27 | Partition the DAG 28 | } 29 | \details{ 30 | Let's call the terms below the \code{from} term as "top terms" because they will be on top of the sub-DAGs after the partitioning. 31 | It is possible that a term in the middle of the DAG can be traced back to more than one top terms. 32 | To partition all terms exclusively, a term partitioned to the sub-DAG from the top term with the largest distance to the term. 33 | If a term has the same largest distances to several top terms, a random top term is selected. 34 | 35 | In \code{partition_by_size()}, the DAG is first reduced to a tree where a child term only has one parent. 36 | The partition is done recursively by cutting into its child-trees. 37 | The splitting stops when all the child-trees have size less than \code{size}. 38 | 39 | \code{NA} is assigned to the \code{from} terms, their ancestor terms, and terms having infinite directed distance to \code{from} terms. 40 | } 41 | \examples{ 42 | \donttest{ 43 | dag = create_ontology_DAG_from_GO_db() 44 | pa = partition_by_level(dag) 45 | table(pa) 46 | pa = partition_by_size(dag, size = 1000) 47 | table(pa) 48 | } 49 | 1 50 | } 51 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /man/dag_longest_dist_to_offspring.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transverse.R 3 | \name{dag_longest_dist_to_offspring} 4 | \alias{dag_longest_dist_to_offspring} 5 | \alias{dag_shortest_dist_to_offspring} 6 | \alias{dag_longest_dist_from_ancestors} 7 | \alias{dag_shortest_dist_from_ancestors} 8 | \title{Distance from all ancestors/to all offspring in the DAG} 9 | \usage{ 10 | dag_longest_dist_to_offspring(dag, from, terms = NULL, background = NULL) 11 | 12 | dag_shortest_dist_to_offspring(dag, from, terms = NULL, background = NULL) 13 | 14 | dag_longest_dist_from_ancestors(dag, to, terms = NULL, background = NULL) 15 | 16 | dag_shortest_dist_from_ancestors(dag, to, terms = NULL, background = NULL) 17 | } 18 | \arguments{ 19 | \item{dag}{An \code{ontology_DAG} object.} 20 | 21 | \item{from}{A single term name or a vector of term names.} 22 | 23 | \item{terms}{A vector of term names. If it is set, the returned vector will be subsetted to the terms that have been set here.} 24 | 25 | \item{background}{A vector of terms. Then the lookup will only be applied in this set of terms.} 26 | 27 | \item{to}{Same format as the \code{from} argument.} 28 | } 29 | \value{ 30 | An integer vector having length the same as the number of terms in the DAG. If terms are not 31 | reachable to the \code{from} or \code{to} terms, the corresponding value is -1. 32 | } 33 | \description{ 34 | Distance from all ancestors/to all offspring in the DAG 35 | } 36 | \details{ 37 | If \code{from} or \code{to} is a vector, for a specific, the longest/shortest distance among all \code{from}/\code{to} terms is taken. 38 | 39 | As a special case, when \code{from} is the root term, \code{dag_longest_dist_to_offspring()} is the same as \code{dag_depth()}, 40 | and when \code{to} are all leaf terms, \code{dag_longest_dist_to_offspring()} is the same as \code{dag_height()}. 41 | } 42 | \examples{ 43 | parents = c("a", "a", "b", "b", "c", "d") 44 | children = c("b", "c", "c", "d", "e", "f") 45 | dag = create_ontology_DAG(parents, children) 46 | dag_longest_dist_from_ancestors(dag, "e") 47 | dag_shortest_dist_from_ancestors(dag, "e") 48 | dag_longest_dist_to_offspring(dag, "b") 49 | } 50 | -------------------------------------------------------------------------------- /man/subset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/filter.R 3 | \name{[,ontology_DAG,ANY,ANY,missing-method} 4 | \alias{[,ontology_DAG,ANY,ANY,missing-method} 5 | \alias{[,ontology_DAG,ANY,ANY,ANY-method} 6 | \alias{[,ontology_DAG,ANY,missing,missing-method} 7 | \alias{[,ontology_DAG,ANY,missing,ANY-method} 8 | \alias{[,ontology_DAG,missing,ANY,missing-method} 9 | \alias{[,ontology_DAG,missing,ANY,ANY-method} 10 | \alias{[,ontology_DAG,missing,missing,missing-method} 11 | \alias{[,ontology_DAG,missing,missing,ANY-method} 12 | \alias{[[,ontology_DAG,character,missing-method} 13 | \title{Create sub-DAGs} 14 | \usage{ 15 | \S4method{[}{ontology_DAG,ANY,ANY,missing}(x, i, j, ..., drop = FALSE) 16 | 17 | \S4method{[}{ontology_DAG,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE) 18 | 19 | \S4method{[}{ontology_DAG,ANY,missing,missing}(x, i, j, ..., drop = FALSE) 20 | 21 | \S4method{[}{ontology_DAG,ANY,missing,ANY}(x, i, j, ..., drop = FALSE) 22 | 23 | \S4method{[}{ontology_DAG,missing,ANY,missing}(x, i, j, ..., drop = FALSE) 24 | 25 | \S4method{[}{ontology_DAG,missing,ANY,ANY}(x, i, j, ..., drop = FALSE) 26 | 27 | \S4method{[}{ontology_DAG,missing,missing,missing}(x, i, j, ..., drop = FALSE) 28 | 29 | \S4method{[}{ontology_DAG,missing,missing,ANY}(x, i, j, ..., drop = FALSE) 30 | 31 | \S4method{[[}{ontology_DAG,character,missing}(x, i, j, ...) 32 | } 33 | \arguments{ 34 | \item{x}{An \code{ontology_DAG} object.} 35 | 36 | \item{i}{A single term name. The value should be a character vector. It corresponds to the roots.} 37 | 38 | \item{j}{A single term name. The value should be a character vector. It corresponds to the leaves.} 39 | 40 | \item{...}{Ignored.} 41 | 42 | \item{drop}{Ignored.} 43 | } 44 | \value{ 45 | An \code{ontology_DAG} object. 46 | } 47 | \description{ 48 | Create sub-DAGs 49 | } 50 | \details{ 51 | It returns a sub-DAG taking node \code{i} as the root and \code{j} as the leaves. If \code{i} is a vector, a super root will be added. 52 | } 53 | \examples{ 54 | parents = c("a", "a", "b", "b", "c", "d") 55 | children = c("b", "c", "c", "d", "e", "f") 56 | dag = create_ontology_DAG(parents, children) 57 | dag["b"] 58 | dag[["b"]] 59 | dag["b", "f"] 60 | dag[, "f"] 61 | } 62 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /inst/extdata/onto_gene.R: -------------------------------------------------------------------------------- 1 | ## uniprot keywords 2 | 3 | 4 | library(UniProtKeywords) 5 | 6 | data(kw_parents) 7 | 8 | parents = unlist(kw_parents) 9 | children = rep(names(kw_parents), times = sapply(kw_parents, length)) 10 | 11 | dag = create_ontology_DAG(parents, children) 12 | 13 | data(kw_terms) 14 | 15 | meta = data.frame( 16 | id = sapply(kw_terms, function(x) x$Identifier), 17 | accession = sapply(kw_terms, function(x) x$Accession), 18 | name = sapply(kw_terms, function(x) x$Identifier), 19 | description = sapply(kw_terms, function(x) x$Description), 20 | category = sapply(kw_terms, function(x) paste(x$Category, collapse = "; ")) 21 | ) 22 | rownames(meta) = meta$id 23 | meta = meta[dag@terms, ] 24 | rownames(meta)[nrow(meta)] = simona:::SUPER_ROOT 25 | 26 | mcols(dag) = meta 27 | 28 | annotation = load_keyword_genesets("9606") 29 | dag = add_annotation(dag, annotation) 30 | 31 | 32 | ## The Human Phenotype Ontology 33 | 34 | # https://hpo.jax.org/app/data/annotations 35 | 36 | dag = import_obo("https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-10-09/hp-base.obo") 37 | 38 | tb = read.table(url("https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-10-09/genes_to_phenotype.txt"), sep = "\t", header = TRUE) 39 | annotation = split(tb$ncbi_gene_id, tb$hpo_id) 40 | 41 | dag = add_annotation(dag, annotation) 42 | 43 | 44 | ## Pathway ontology and many 45 | 46 | # https://download.rgd.mcw.edu/ontology/ 47 | 48 | library(rvest) 49 | 50 | html = read_html("https://download.rgd.mcw.edu/ontology/annotated_rgd_objects_by_ontology/") 51 | tb = html %>% html_element("table") %>% html_table() 52 | fn = tb[grep("_genes_", tb[[2]]), ][[2]] 53 | tb = data.frame(org = gsub("_.*$", "", fn), 54 | onto = gsub("^.*_", "", fn)) 55 | tb = tb[!tb$onto %in% c("go", "nbo", "mp", "cmo"), ] 56 | tb = tb[order(tb$onto), ] 57 | 58 | tb$anno_url = paste0("https://download.rgd.mcw.edu/ontology/annotated_rgd_objects_by_ontology/", tb$org, "_genes_", tb$onto) 59 | 60 | onto = c("chebi" = "https://purl.obolibrary.org/obo/chebi.obo", 61 | "pw" = "https://download.rgd.mcw.edu/ontology/pathway/pathway.obo", 62 | "rdo" = "https://download.rgd.mcw.edu/ontology/disease/RDO.obo", 63 | "vt" = "https://purl.obolibrary.org/obo/vt.owl", 64 | "hp" = "https://purl.obolibrary.org/obo/hp.obo") 65 | 66 | tb$onto_url = onto[tb$onto] 67 | ## MeSH 68 | 69 | dag = import_ttl("https://data.bioontology.org/ontologies/MESH/submissions/26/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb") 70 | 71 | 72 | -------------------------------------------------------------------------------- /man/distance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dist.R 3 | \name{shortest_distances_via_NCA} 4 | \alias{shortest_distances_via_NCA} 5 | \alias{longest_distances_via_LCA} 6 | \alias{shortest_distances_directed} 7 | \alias{longest_distances_directed} 8 | \title{Distance on the DAG} 9 | \usage{ 10 | shortest_distances_via_NCA(dag, terms, verbose = simona_opt$verbose) 11 | 12 | longest_distances_via_LCA(dag, terms, verbose = simona_opt$verbose) 13 | 14 | shortest_distances_directed(dag, terms, verbose = simona_opt$verbose) 15 | 16 | longest_distances_directed(dag, terms, verbose = simona_opt$verbose) 17 | } 18 | \arguments{ 19 | \item{dag}{An \code{ontology_DAG} object.} 20 | 21 | \item{terms}{A vector of term names.} 22 | 23 | \item{verbose}{Whether to print messages.} 24 | } 25 | \value{ 26 | A numeric distance matrix. 27 | } 28 | \description{ 29 | Distance on the DAG 30 | } 31 | \details{ 32 | Denote two terms as \code{a} and \code{b}, a common ancestor as \code{c}, and the distance function \code{d()} calculates the longest 33 | distance or the shortest distance depending on the function. 34 | \itemize{ 35 | \item \code{shortest_distances_via_NCA()}: It calculates the smallest \code{d(c, a) + d(c, b)} where \code{d()} calculates the shortest distance between two terms. In this case, 36 | \code{c} is the NCA (nearest common ancestor) of \code{a} and \code{b}. 37 | \item \code{longest_distances_via_LCA()}: It calculates the largest \code{d(c, a) + d(c, b)} where \code{d()} calculates the longest distance between two terms \emph{via the LCA (lowest common ancestor) term}. In this case, 38 | \code{c} is the LCA of \code{a} and \code{b}. 39 | \item \code{shortest_distances_directed()}: It calculates \code{d(a, b)} where \code{d()} calculates the shortest distance between two terms. The distance is only calculated when \code{a} is an ancestor of \code{b}, otherwise the distance value is -1. 40 | \item \code{longest_distances_directed()}: It calculates \code{d(a, b)} where \code{d()} calculates the longest distance between two terms. The distance is only calculated when \code{a} is an ancestor of \code{b}, otherwise the distance value is -1. 41 | } 42 | } 43 | \examples{ 44 | parents = c("a", "a", "b", "b", "c", "d") 45 | children = c("b", "c", "c", "d", "e", "f") 46 | dag = create_ontology_DAG(parents, children) 47 | shortest_distances_via_NCA(dag, letters[1:6]) 48 | longest_distances_via_LCA(dag, letters[1:6]) 49 | shortest_distances_directed(dag, letters[1:6]) 50 | longest_distances_directed(dag, letters[1:6]) 51 | } 52 | -------------------------------------------------------------------------------- /man/dag_enrich_on_offsprings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrich.R 3 | \name{dag_enrich_on_offsprings} 4 | \alias{dag_enrich_on_offsprings} 5 | \title{Enrichment analysis on offspring terms} 6 | \usage{ 7 | dag_enrich_on_offsprings(dag, terms, min_hits = 3, min_offspring = 10) 8 | } 9 | \arguments{ 10 | \item{dag}{An \code{ontology_DAG} object.} 11 | 12 | \item{terms}{A vector of term names.} 13 | 14 | \item{min_hits}{Minimal number of terms in an offspring set.} 15 | 16 | \item{min_offspring}{Minimal size of the offspring set.} 17 | } 18 | \value{ 19 | A data frame with the following columns: 20 | \itemize{ 21 | \item \code{term}: Term names. 22 | \item \code{n_hits}: Number of terms in \code{terms} intersecting to \code{t}'s offspring terms. 23 | \item \code{n_offspring}: Number of offspring terms of \code{t} (including \code{t} itself). 24 | \item \code{n_terms}: Number of terms in \code{term} intersecting to all terms in the DAG. 25 | \item \code{n_all}: Number of all terms in the DAG. 26 | \item \code{log2_fold_enrichment}: Defined as log2(observation/expected). 27 | \item \code{z_score}: Defined as (observed-expected)/sd. 28 | \item \code{p_value}: P-values from hypergeometric test. 29 | \item \code{p_adjust}: Adjusted p-values from the BH method. 30 | } 31 | 32 | The number of rows in the data frame is the same as the number of terms in the DAG. 33 | } 34 | \description{ 35 | The analysis task is to evaluate how significant a term includes \code{terms}. 36 | } 37 | \details{ 38 | Given a list of terms in \code{terms}, the function tests whether they are enriched in a term's offspring terms. 39 | The test is based on the hypergeometric distribution. In the following 2x2 contigency table, \code{S} is the set of \code{terms}, 40 | for a term \code{t} in the DAG, \code{T} is the set of its offspring plus the \code{t} itself, the aim is to test whether \code{S} is over-represented 41 | in \code{T}. 42 | 43 | If there is a significant p-value, we can say the term \code{t} preferably includes terms in \code{term}. 44 | 45 | \if{html}{\out{
}}\preformatted{+----------+------+----------+-----+ 46 | | | in S | not in S | all | 47 | +----------+------+----------+-----+ 48 | | in T | x11 | x12 | x10 | 49 | | not in T | x21 | x22 | x20 | 50 | +----------+------+----------+-----+ 51 | | all | x01 | x02 | x | 52 | +----------+------+----------+-----+ 53 | }\if{html}{\out{
}} 54 | } 55 | \examples{ 56 | \dontrun{ 57 | dag = create_ontology_DAG_from_GO_db() 58 | terms = random_terms(dag, 100) 59 | df = dag_enrich_on_offsprings(dag, terms) 60 | } 61 | 1 62 | } 63 | -------------------------------------------------------------------------------- /man/create_ontology_DAG_from_GO_db.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/go.R 3 | \name{create_ontology_DAG_from_GO_db} 4 | \alias{create_ontology_DAG_from_GO_db} 5 | \alias{alternative_GO_terms} 6 | \title{Create the ontology_DAG object from the GO.db package} 7 | \usage{ 8 | create_ontology_DAG_from_GO_db( 9 | namespace = "BP", 10 | relations = "part of", 11 | org_db = NULL, 12 | evidence_code = NULL, 13 | retrieve_alternative = FALSE, 14 | verbose = simona_opt$verbose 15 | ) 16 | 17 | alternative_GO_terms( 18 | tag = c("replaced_by", "alt_id", "consider"), 19 | version = NULL, 20 | verbose = TRUE 21 | ) 22 | } 23 | \arguments{ 24 | \item{namespace}{One of "BP", "CC" and "MF".} 25 | 26 | \item{relations}{Types of the GO term relations. In the \strong{GO.db} package, the GO term relations can be "is_a", "part_of", 27 | "regulates", "negatively regulates", "positively regulates". Note since "regulates" is a parent relation 28 | of "negatively regulates", "positively regulates", if "regulates" is selected, "negatively regulates" and "positively regulates" 29 | are also selected. Note "is_a" is always included.} 30 | 31 | \item{org_db}{The name of the organism package or the corresponding database object, e.g. \code{"org.Hs.eg.db"} or 32 | directly the \code{\link[org.Hs.eg.db:org.Hs.egBASE]{org.Hs.eg.db::org.Hs.eg.db}} object for human, then the gene annotation to GO terms will be added 33 | to the object. For other non-model organisms, consider to use the \strong{AnnotationHub} package to find one.} 34 | 35 | \item{evidence_code}{A vector of evidence codes for gene annotation to GO terms. See \url{https://geneontology.org/docs/guide-go-evidence-codes/}.} 36 | 37 | \item{retrieve_alternative}{Whether to retrieve alternative/obsolete GO terms from geneontology.org?} 38 | 39 | \item{verbose}{Whether to print messages.} 40 | 41 | \item{tag}{In the \code{go-basic.obo} file, there are three tags which define alternative GO terms: \code{replaced_by}, \code{alt_id} and \code{consider}. 42 | See https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.1} 43 | 44 | \item{version}{Version of the \code{go-basic.obo} file. By default it is the version for building \strong{GO.db} package. The value is a string in the format of "2024-01-17".} 45 | } 46 | \value{ 47 | An \code{ontology_DAG} object. 48 | 49 | A list of named vectors where names are alternative GO IDs and value vectors are current GO IDs in use. 50 | } 51 | \description{ 52 | Create the ontology_DAG object from the GO.db package 53 | 54 | Mappings between alternative GO terms to official GO terms 55 | } 56 | \examples{ 57 | dag = create_ontology_DAG_from_GO_db() 58 | dag 59 | } 60 | -------------------------------------------------------------------------------- /tests/testthat/tests_constructor.R: -------------------------------------------------------------------------------- 1 | 2 | library(testthat) 3 | 4 | test_that("test cyclic_node", { 5 | parents = c("a", "b", "c", "d") 6 | children = c("b", "c", "d", "b") 7 | expect_error( 8 | create_ontology_DAG(parents, children), 9 | "Found cyclic nodes" 10 | ) 11 | 12 | parents = c("a", "b", "c", "g", "h") 13 | children = c("b", "c", "d", "h", "i") 14 | expect_message( 15 | dag <- create_ontology_DAG(parents, children), 16 | "more than one root" 17 | ) 18 | expect_equal( 19 | length(dag@terms), 20 | length(unique(c(parents, children))) + 1 21 | ) 22 | 23 | parents = c("a", "b", "c", "d") 24 | children = c("b", "c", "d", "a") 25 | expect_error( 26 | create_ontology_DAG(parents, children), 27 | "There might exist a cycle" 28 | ) 29 | }) 30 | 31 | 32 | # b--d--f 33 | # / \ 34 | # a---c--e 35 | # upstream -> downstream 36 | 37 | parents = c("a", "a", "b", "b", "c", "d") 38 | children = c("b", "c", "c", "d", "e", "f") 39 | 40 | dag = create_ontology_DAG(parents, children) 41 | 42 | test_that("test sub-DAG", { 43 | expect_identical( 44 | dag[["c"]]@terms, 45 | c("c", "e") 46 | ) 47 | expect_identical( 48 | dag[["b"]]@terms, 49 | c("b", "c", "d", "e", "f") 50 | ) 51 | expect_error( 52 | dag["l"] 53 | ) 54 | 55 | expect_identical( 56 | dag["f"]@terms, 57 | "~~singleton~~" 58 | ) 59 | }) 60 | 61 | 62 | test_that("test DAG filter", { 63 | expect_identical( 64 | dag_filter(dag, terms = c("b", "d", "f"))@terms, 65 | c("b", "d", "f") 66 | ) 67 | expect_identical( 68 | dag_filter(dag, root = "b")@terms, 69 | c("b", "c", "d", "e", "f") 70 | ) 71 | expect_identical( 72 | dag_filter(dag, root = c("b", "c"))@terms, 73 | c("b", "c", "d", "e", "f") 74 | ) 75 | expect_identical( 76 | dag_filter(dag, leaves = c("c", "d"))@terms, 77 | c("a", "b", "c", "d") 78 | ) 79 | expect_identical( 80 | dag_filter(dag, leaves = c("b", "c"))@terms, 81 | c("a", "b", "c") 82 | ) 83 | expect_identical( 84 | dag_filter(dag, root = "b", leaves = "e")@terms, 85 | c("b", "c", "e") 86 | ) 87 | }) 88 | 89 | 90 | parents = c("a", "b", "c", "d", "e") 91 | children = c("b", "c", "d", "e", "b") 92 | 93 | test_that("test cyclic path", { 94 | expect_error( 95 | create_ontology_DAG(parents, children), 96 | "Found cyclic nodes" 97 | ) 98 | 99 | expect_message( 100 | create_ontology_DAG(parents, children, remove_cyclic_paths = TRUE), 101 | "Remove" 102 | ) 103 | }) 104 | 105 | parents = c("a", "b", "c", "d", "f", "g", "h") 106 | children = c("b", "c", "d", "e", "g", "h", "f") 107 | 108 | test_that("test isolated rings", { 109 | expect_error( 110 | create_ontology_DAG(parents, children), 111 | "Found isolated rings" 112 | ) 113 | 114 | expect_message( 115 | create_ontology_DAG(parents, children, remove_rings = TRUE), 116 | "Remove" 117 | ) 118 | }) 119 | -------------------------------------------------------------------------------- /man/dag_random.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/random.R 3 | \name{dag_random_tree} 4 | \alias{dag_random_tree} 5 | \alias{dag_add_random_children} 6 | \alias{dag_random} 7 | \title{Generate a random DAG} 8 | \usage{ 9 | dag_random_tree( 10 | n_children = 2, 11 | p_stop = 0, 12 | max = 2^10 - 1, 13 | verbose = simona_opt$verbose 14 | ) 15 | 16 | dag_add_random_children( 17 | dag, 18 | p_add = 0.1, 19 | new_children = c(1, 4), 20 | add_random_children_fun = NULL, 21 | verbose = simona_opt$verbose 22 | ) 23 | 24 | dag_random( 25 | n_children = 2, 26 | p_stop = 0, 27 | max = 2^10 - 1, 28 | p_add = 0.1, 29 | new_children = c(1, 4), 30 | verbose = simona_opt$verbose 31 | ) 32 | } 33 | \arguments{ 34 | \item{n_children}{Number of children of a term. The value can also be a vector of 35 | length two representing the range of the number of child terms.} 36 | 37 | \item{p_stop}{The probability of a term to stop growing.} 38 | 39 | \item{max}{Maximal number of terms.} 40 | 41 | \item{verbose}{Whether to print messages.} 42 | 43 | \item{dag}{An \code{ontology_DAG} object.} 44 | 45 | \item{p_add}{The probability to add children on each term.} 46 | 47 | \item{new_children}{The number or range of numbers of new children if a term is selected to add more children.} 48 | 49 | \item{add_random_children_fun}{A function to randomly add children from the DAG.} 50 | } 51 | \value{ 52 | An \code{ontology_DAG} object. 53 | } 54 | \description{ 55 | Generate a random DAG 56 | } 57 | \details{ 58 | \code{dag_random_tree()} generates a random DAG tree from the root term. In a certain step of 59 | the growing, let's denote the set of all leaf terms as L, then in the next round of growing, 60 | \code{floor(length(L)*p_stop)} leaf terms stop growing, and for the remaining leaf terms that 61 | continue to grow, each term will add child terms with number in uniformly sampled within \verb{[ n_children[1], n_children[2] ]}. 62 | The growing stops when the total number of terms in the DAG exceeds \code{max}. 63 | 64 | \code{dag_add_random_children()} adds more links in a DAG. Each term is associated with a probability \code{p_add} 65 | to add new links where the term, if it is selected, is as a parent term, linking to other terms in the DAG. 66 | The number of new child terms is controlled by \code{new_children} which can be a single number of a range. By default, 67 | new child terms of a term \code{t} are randomly selected from other terms that are lower than the term \code{t} 68 | (check the function \code{simona:::add_random_children}). The way how to randomly select new child terms for \code{t} 69 | can be controlled by a self-defined function for the \code{add_random_children_fun} argument. 70 | 71 | \code{dag_random()}: it simply wraps \code{dag_random_tree()} and \code{dag_add_random_children()}. 72 | } 73 | \examples{ 74 | tree = dag_random_tree() 75 | dag = dag_random() 76 | } 77 | -------------------------------------------------------------------------------- /man/ontology.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/anno_gene.R 3 | \name{ontology_kw} 4 | \alias{ontology_kw} 5 | \alias{ontology_chebi} 6 | \alias{ontology_hp} 7 | \alias{ontology_pw} 8 | \alias{ontology_rdo} 9 | \alias{ontology_vt} 10 | \alias{ontology_go} 11 | \alias{ontology_reactome} 12 | \title{Import ontologies already having gene annotations} 13 | \usage{ 14 | ontology_kw( 15 | organism = "human", 16 | gene_annotation = TRUE, 17 | verbose = simona_opt$verbose, 18 | ... 19 | ) 20 | 21 | ontology_chebi( 22 | organism = c("human", "mouse", "rat", "pig", "dog"), 23 | gene_annotation = TRUE, 24 | verbose = simona_opt$verbose, 25 | ... 26 | ) 27 | 28 | ontology_hp( 29 | organism = c("human", "mouse"), 30 | gene_annotation = TRUE, 31 | verbose = simona_opt$verbose, 32 | ... 33 | ) 34 | 35 | ontology_pw( 36 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"), 37 | gene_annotation = TRUE, 38 | verbose = simona_opt$verbose, 39 | ... 40 | ) 41 | 42 | ontology_rdo( 43 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"), 44 | gene_annotation = TRUE, 45 | verbose = simona_opt$verbose, 46 | ... 47 | ) 48 | 49 | ontology_vt( 50 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"), 51 | gene_annotation = TRUE, 52 | verbose = simona_opt$verbose, 53 | ... 54 | ) 55 | 56 | ontology_go(...) 57 | 58 | ontology_reactome( 59 | organism = "HSA", 60 | gene_annotation = TRUE, 61 | verbose = simona_opt$verbose, 62 | ... 63 | ) 64 | } 65 | \arguments{ 66 | \item{organism}{Organism.} 67 | 68 | \item{gene_annotation}{Whether to add gene annotations to the DAG.} 69 | 70 | \item{verbose}{Whether to print messages?} 71 | 72 | \item{...}{Pass to \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}.} 73 | } 74 | \description{ 75 | Import ontologies already having gene annotations 76 | } 77 | \details{ 78 | There are the following ontologies: 79 | \itemize{ 80 | \item \code{ontology_kw()}: UniProt Keywords. The list of supported organisms can be found in \code{\link[UniProtKeywords:load_keyword_genesets]{UniProtKeywords::load_keyword_genesets()}}. 81 | \item \code{ontology_chebi()}: Chemical Entities of Biological Interest. 82 | \item \code{ontology_hp()}: The Human Phenotype Ontology. 83 | \item \code{ontology_pw()}: Pathway Ontology. 84 | \item \code{ontology_rdo()}: RGD Disease Ontology. 85 | \item \code{ontology_vt()}: Vertebrate Trait Ontology. 86 | } 87 | 88 | The source of the original files can be found with \code{simona:::RGD_TB}. 89 | 90 | \code{ontology_go()} is an alias of \code{\link[=create_ontology_DAG_from_GO_db]{create_ontology_DAG_from_GO_db()}}. All arguments go there. 91 | 92 | Valid values for \code{organism} argument in \code{ontology_reactome()} are 93 | 94 | \if{html}{\out{
}}\preformatted{c("BTA", "CEL", "CFA", "DRE", "DDI", "DME", "GGA", "HSA", "MMU", 95 | "MTU", "PFA", "RNO", "SCE", "SPO", "SSC", "XTR") 96 | }\if{html}{\out{
}} 97 | } 98 | -------------------------------------------------------------------------------- /src/tree.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | #include "utils.h" 5 | #include "traverse.h" 6 | 7 | // [[Rcpp::export]] 8 | List cpp_mark_tree_links(S4 dag) { 9 | // links that will be used in the tree is marked as negativer ids 10 | 11 | List lt_children = dag.slot("lt_children"); 12 | IntegerVector depth = _dag_depth(dag); 13 | int n = lt_children.size(); 14 | 15 | List lt_children2 = clone(lt_children); 16 | 17 | int current_depth = 0; 18 | IntegerVector current = dag.slot("root"); 19 | LogicalVector l_current = integer_to_logical_vector(current-1, n); 20 | 21 | LogicalVector l_visited(n); 22 | l_visited[current[0]-1] = true; 23 | int i_visited = 1; 24 | 25 | while(i_visited < n) { 26 | 27 | current_depth ++; 28 | LogicalVector l_current2(n); 29 | for(int i = 0; i < n; i ++) { 30 | if(l_current[i]) { 31 | IntegerVector children = lt_children[i]; 32 | IntegerVector children2 = lt_children2[i]; 33 | 34 | if(children.size()) { 35 | for(int j = 0; j < children.size(); j ++) { 36 | if(depth[children[j]-1] == current_depth && !l_visited[children[j]-1]) { 37 | l_visited[children[j]-1] = true; 38 | l_current2[children[j]-1] = true; 39 | 40 | children2[j] = - children2[j]; 41 | 42 | i_visited ++; 43 | if(i_visited % 1000 == 0) { 44 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 45 | message("going through " + std::to_string(i_visited) + " / " + std::to_string(n) + " nodes ...", false); 46 | } 47 | } 48 | } 49 | } 50 | } 51 | } 52 | 53 | l_current = l_current2; 54 | } 55 | 56 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 57 | message("going through " + std::to_string(n) + " / " + std::to_string(n) + " nodes ... Done.", true); 58 | 59 | return lt_children2; 60 | } 61 | 62 | // lt_children: returned by cpp_mark_tree_links() 63 | // [[Rcpp::export]] 64 | List cpp_tree_lt_parents_from_children(List lt_children) { 65 | int n = lt_children.size(); 66 | 67 | IntegerVector n_parents(n); 68 | int ic; 69 | for(int i = 0; i < n; i ++) { 70 | IntegerVector children = lt_children[i]; 71 | for(int j = 0; j < children.size(); j ++) { 72 | if(children[j] < 0) { 73 | ic = -children[j]-1; 74 | n_parents[ic] ++; 75 | } 76 | } 77 | } 78 | 79 | List lt_parents(n); 80 | for(int i = 0; i < n; i ++) { 81 | IntegerVector parents(n_parents[i]); 82 | lt_parents[i] = parents; 83 | } 84 | 85 | IntegerVector ip(n); 86 | for(int i = 0; i < n; i ++) { 87 | IntegerVector children = lt_children[i]; 88 | for(int j = 0; j < children.size(); j ++) { 89 | if(children[j] < 0) { 90 | ic = -children[j] - 1; 91 | IntegerVector parents = lt_parents[ic]; 92 | parents[ ip[ic] ] = i + 1; 93 | ip[ic] ++; 94 | } 95 | } 96 | } 97 | 98 | return lt_parents; 99 | } 100 | -------------------------------------------------------------------------------- /man/n_annotations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/annotation.R 3 | \name{n_annotations} 4 | \alias{n_annotations} 5 | \alias{has_annotation} 6 | \title{Number of annotated items} 7 | \usage{ 8 | n_annotations( 9 | dag, 10 | terms = NULL, 11 | uniquify = simona_opt$anno_uniquify, 12 | use_cache = simona_opt$use_cache 13 | ) 14 | 15 | has_annotation(dag) 16 | } 17 | \arguments{ 18 | \item{dag}{An \code{ontology_DAG} object.} 19 | 20 | \item{terms}{A vector of term names. If it is set, the returned vector will be subsetted to the terms that have been set here.} 21 | 22 | \item{uniquify}{Whether to uniquify items that are annotated to the term? See \strong{Details}. It is suggested to always be \code{TRUE}.} 23 | 24 | \item{use_cache}{Internally used.} 25 | } 26 | \value{ 27 | \code{n_annotations()} returns an integer vector. 28 | 29 | \code{has_annotation()} returns a logical scalar. 30 | } 31 | \description{ 32 | Number of annotated items 33 | } 34 | \details{ 35 | Due to the nature of the DAG, a parent term includes all annotated items of its child terms, and an ancestor term includes 36 | all annotated items from its offspring recursively. In current tools, there are two different implementations to deal with 37 | such recursive merging. 38 | 39 | For a term \code{t}, denote \code{S_1}, \code{S_2}, ... as the sets of annotated items for its child 1, 2, ..., also denote \code{S_t} as the set 40 | of items that are \strong{directly} annotated to \code{t}. The first method takes the union of annotated items on \code{t} and all its child terms: 41 | 42 | \if{html}{\out{
}}\preformatted{n = length(union(S_t, S_1, S_2, ...)) 43 | }\if{html}{\out{
}} 44 | 45 | And the second method takes the sum of numbers of items on \code{t} and on all its child terms: 46 | 47 | \if{html}{\out{
}}\preformatted{n = sum(length(s_t) + length(S_1) + length(S_2) + ...) 48 | }\if{html}{\out{
}} 49 | 50 | In \code{n_annotations()}, when \code{uniquify = TRUE}, the first method is used; and when \code{uniquify = FALSE}, the second method is used. 51 | 52 | For some annotation sources, it is possible that an item is annotated to multiple terms, thus, the second method which simply 53 | adds numbers of all its child terms may not be proper because an item may be counted duplicatedly, thus over-estimating \code{n}. The two methods 54 | are identical only if an item is annotated to a unique term in the DAG. 55 | 56 | We suggest to always set \code{uniquify = TRUE} (the default), and the scenario of \code{uniquify = FALSE} is only for the testing or benchmarking purpose. 57 | } 58 | \examples{ 59 | parents = c("a", "a", "b", "b", "c", "d") 60 | children = c("b", "c", "c", "d", "e", "f") 61 | annotation = list( 62 | "a" = c("t1", "t2", "t3"), 63 | "b" = c("t3", "t4"), 64 | "c" = "t5", 65 | "d" = "t7", 66 | "e" = c("t4", "t5", "t6", "t7"), 67 | "f" = "t8" 68 | ) 69 | dag = create_ontology_DAG(parents, children, annotation = annotation) 70 | n_annotations(dag) 71 | } 72 | -------------------------------------------------------------------------------- /man/dag_enrich_on_offsprings_by_permutation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrich.R 3 | \name{dag_enrich_on_offsprings_by_permutation} 4 | \alias{dag_enrich_on_offsprings_by_permutation} 5 | \title{Enrichment analysis on offspring terms by permutation test} 6 | \usage{ 7 | dag_enrich_on_offsprings_by_permutation( 8 | dag, 9 | value, 10 | perm = 1000, 11 | min_offspring = 10, 12 | verbose = simona_opt$verbose 13 | ) 14 | } 15 | \arguments{ 16 | \item{dag}{An \code{ontology_DAG} object.} 17 | 18 | \item{value}{A numeric value. The value should correspond to terms in \code{dag@terms}.} 19 | 20 | \item{perm}{Number of permutations.} 21 | 22 | \item{min_offspring}{Minimal size of the offspring set.} 23 | 24 | \item{verbose}{Whether to print messages.} 25 | } 26 | \value{ 27 | A data frame with the following columns: 28 | \itemize{ 29 | \item \code{term}: Term names. 30 | \item \code{stats}: The statistics of terms. 31 | \item \code{n_offspring}: Number of offspring terms of \code{t} (including \code{t} itself). 32 | \item \code{log2_fold_enrichment}: defined as \code{log2(s/mean)} where \code{mean} is calculated from random permutation. 33 | \item \code{z_score}: Defined as \code{(s - mean)/sd} where \code{mean} and \code{sd} are calculated from random permutation. 34 | \item \code{p_value}: P-values from permutation test. 35 | \item \code{p_adjust}: Adjusted p-values from the BH method. 36 | } 37 | 38 | The number of rows in the data frame is the same as the number of terms in the DAG. 39 | } 40 | \description{ 41 | Enrichment analysis on offspring terms by permutation test 42 | } 43 | \details{ 44 | In the function \code{\link[=dag_enrich_on_offsprings]{dag_enrich_on_offsprings()}}, the statistic for testing is the number of terms in each category. Here 45 | this funtion makes the testing procedure more general 46 | 47 | The function tests whether a term \code{t}'s offspring terms have an over-represented pattern on values in \code{value}. 48 | Denote \code{T} as the set of \code{t}'s offspring terms plus \code{t} itself, and \code{v} as the numeric vector of \code{value}, we first 49 | calculate a score \code{s} based on values in \code{T}: 50 | 51 | \if{html}{\out{
}}\preformatted{s = mean_\{terms in T\}(v) 52 | }\if{html}{\out{
}} 53 | 54 | To construct a random version of \code{s}, we randomly sample \code{n_T} terms from the DAG where \code{n_T} is the size of set \code{T}: 55 | 56 | \if{html}{\out{
}}\preformatted{sr_i = mean_\{n_T randomly sampled terms\}(v) 57 | }\if{html}{\out{
}} 58 | 59 | where index \code{i} represents the i^th sampling. If we sample \code{k} times, the p-value is calculated as: 60 | 61 | \if{html}{\out{
}}\preformatted{p = sum_\{i in 1..k\}(I(sr_i > s))/k 62 | }\if{html}{\out{
}} 63 | } 64 | \examples{ 65 | \dontrun{ 66 | dag = create_ontology_DAG_from_GO_db() 67 | value = runif(dag_n_terms(dag)) # a set of random values 68 | df = dag_enrich_on_offsprings_by_permutation(dag, value) 69 | } 70 | 1 71 | } 72 | -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/grViz-binding-1.0.10/grViz.js: -------------------------------------------------------------------------------- 1 | HTMLWidgets.widget({ 2 | 3 | name: 'grViz', 4 | 5 | type: 'output', 6 | 7 | initialize: function(el, width, height) { 8 | 9 | return { 10 | // TODO: add instance fields as required 11 | }; 12 | }, 13 | 14 | renderValue: function(el, x, instance) { 15 | // Use this to sort of make our diagram responsive 16 | // or at a minimum fit within the bounds set by htmlwidgets 17 | // for the parent container 18 | function makeResponsive(el){ 19 | var svg = el.getElementsByTagName("svg")[0]; 20 | if (svg) { 21 | if (svg.width) {svg.removeAttribute("width")} 22 | if (svg.height) {svg.removeAttribute("height")} 23 | svg.style.width = "100%"; 24 | svg.style.height = "100%"; 25 | } 26 | } 27 | 28 | if (x.diagram !== "") { 29 | 30 | if (typeof x.config === "undefined"){ 31 | x.config = {}; 32 | x.config.engine = "dot"; 33 | x.config.options = {}; 34 | } 35 | 36 | try { 37 | 38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options); 39 | 40 | makeResponsive(el); 41 | 42 | if (HTMLWidgets.shinyMode) { 43 | // Get widget id 44 | var id = el.id; 45 | 46 | $("#" + id + " .node").click(function(e) { 47 | // Get node id 48 | var nodeid = e.currentTarget.id; 49 | // Get node text object and make an array 50 | var node_texts = $("#" + id + " #" + nodeid + " text"); 51 | //var node_path = $("#" + nodeid + " path")[0]; 52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray(); 53 | // Build return object *obj* with node-id, node text values and node fill 54 | var obj = { 55 | id: nodeid, 56 | //fill: node_path.attributes.fill.nodeValue, 57 | //outerHMTL: node_path.outerHTML, 58 | nodeValues: text_array 59 | }; 60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click)) 61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"}); 62 | }); 63 | } 64 | 65 | // set up a container for tasks to perform after completion 66 | // one example would be add callbacks for event handling 67 | // styling 68 | if (typeof x.tasks !== "undefined") { 69 | if ((typeof x.tasks.length === "undefined") || 70 | (typeof x.tasks === "function")) { 71 | // handle a function not enclosed in array 72 | // should be able to remove once using jsonlite 73 | x.tasks = [x.tasks]; 74 | } 75 | x.tasks.map(function(t){ 76 | // for each tasks add it to the mermaid.tasks with el 77 | t.call(el); 78 | }); 79 | } 80 | } catch(e){ 81 | var p = document.createElement("pre"); 82 | p.innerText = e; 83 | el.appendChild(p); 84 | } 85 | } 86 | 87 | }, 88 | 89 | resize: function(el, width, height, instance) { 90 | } 91 | }); 92 | -------------------------------------------------------------------------------- /docs/articles/v07_dag_visualization_files/grViz-binding-1.0.11/grViz.js: -------------------------------------------------------------------------------- 1 | HTMLWidgets.widget({ 2 | 3 | name: 'grViz', 4 | 5 | type: 'output', 6 | 7 | initialize: function(el, width, height) { 8 | 9 | return { 10 | // TODO: add instance fields as required 11 | }; 12 | }, 13 | 14 | renderValue: function(el, x, instance) { 15 | // Use this to sort of make our diagram responsive 16 | // or at a minimum fit within the bounds set by htmlwidgets 17 | // for the parent container 18 | function makeResponsive(el){ 19 | var svg = el.getElementsByTagName("svg")[0]; 20 | if (svg) { 21 | if (svg.width) {svg.removeAttribute("width")} 22 | if (svg.height) {svg.removeAttribute("height")} 23 | svg.style.width = "100%"; 24 | svg.style.height = "100%"; 25 | } 26 | } 27 | 28 | if (x.diagram !== "") { 29 | 30 | if (typeof x.config === "undefined"){ 31 | x.config = {}; 32 | x.config.engine = "dot"; 33 | x.config.options = {}; 34 | } 35 | 36 | try { 37 | 38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options); 39 | 40 | makeResponsive(el); 41 | 42 | if (HTMLWidgets.shinyMode) { 43 | // Get widget id 44 | var id = el.id; 45 | 46 | $("#" + id + " .node").click(function(e) { 47 | // Get node id 48 | var nodeid = e.currentTarget.id; 49 | // Get node text object and make an array 50 | var node_texts = $("#" + id + " #" + nodeid + " text"); 51 | //var node_path = $("#" + nodeid + " path")[0]; 52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray(); 53 | // Build return object *obj* with node-id, node text values and node fill 54 | var obj = { 55 | id: nodeid, 56 | //fill: node_path.attributes.fill.nodeValue, 57 | //outerHMTL: node_path.outerHTML, 58 | nodeValues: text_array 59 | }; 60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click)) 61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"}); 62 | }); 63 | } 64 | 65 | // set up a container for tasks to perform after completion 66 | // one example would be add callbacks for event handling 67 | // styling 68 | if (typeof x.tasks !== "undefined") { 69 | if ((typeof x.tasks.length === "undefined") || 70 | (typeof x.tasks === "function")) { 71 | // handle a function not enclosed in array 72 | // should be able to remove once using jsonlite 73 | x.tasks = [x.tasks]; 74 | } 75 | x.tasks.map(function(t){ 76 | // for each tasks add it to the mermaid.tasks with el 77 | t.call(el); 78 | }); 79 | } 80 | } catch(e){ 81 | var p = document.createElement("pre"); 82 | p.innerText = e; 83 | el.appendChild(p); 84 | } 85 | } 86 | 87 | }, 88 | 89 | resize: function(el, width, height, instance) { 90 | } 91 | }); 92 | -------------------------------------------------------------------------------- /docs/articles/v7_dag_visualization_files/grViz-binding-1.0.10/grViz.js: -------------------------------------------------------------------------------- 1 | HTMLWidgets.widget({ 2 | 3 | name: 'grViz', 4 | 5 | type: 'output', 6 | 7 | initialize: function(el, width, height) { 8 | 9 | return { 10 | // TODO: add instance fields as required 11 | }; 12 | }, 13 | 14 | renderValue: function(el, x, instance) { 15 | // Use this to sort of make our diagram responsive 16 | // or at a minimum fit within the bounds set by htmlwidgets 17 | // for the parent container 18 | function makeResponsive(el){ 19 | var svg = el.getElementsByTagName("svg")[0]; 20 | if (svg) { 21 | if (svg.width) {svg.removeAttribute("width")} 22 | if (svg.height) {svg.removeAttribute("height")} 23 | svg.style.width = "100%"; 24 | svg.style.height = "100%"; 25 | } 26 | } 27 | 28 | if (x.diagram !== "") { 29 | 30 | if (typeof x.config === "undefined"){ 31 | x.config = {}; 32 | x.config.engine = "dot"; 33 | x.config.options = {}; 34 | } 35 | 36 | try { 37 | 38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options); 39 | 40 | makeResponsive(el); 41 | 42 | if (HTMLWidgets.shinyMode) { 43 | // Get widget id 44 | var id = el.id; 45 | 46 | $("#" + id + " .node").click(function(e) { 47 | // Get node id 48 | var nodeid = e.currentTarget.id; 49 | // Get node text object and make an array 50 | var node_texts = $("#" + id + " #" + nodeid + " text"); 51 | //var node_path = $("#" + nodeid + " path")[0]; 52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray(); 53 | // Build return object *obj* with node-id, node text values and node fill 54 | var obj = { 55 | id: nodeid, 56 | //fill: node_path.attributes.fill.nodeValue, 57 | //outerHMTL: node_path.outerHTML, 58 | nodeValues: text_array 59 | }; 60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click)) 61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"}); 62 | }); 63 | } 64 | 65 | // set up a container for tasks to perform after completion 66 | // one example would be add callbacks for event handling 67 | // styling 68 | if (typeof x.tasks !== "undefined") { 69 | if ((typeof x.tasks.length === "undefined") || 70 | (typeof x.tasks === "function")) { 71 | // handle a function not enclosed in array 72 | // should be able to remove once using jsonlite 73 | x.tasks = [x.tasks]; 74 | } 75 | x.tasks.map(function(t){ 76 | // for each tasks add it to the mermaid.tasks with el 77 | t.call(el); 78 | }); 79 | } 80 | } catch(e){ 81 | var p = document.createElement("pre"); 82 | p.innerText = e; 83 | el.appendChild(p); 84 | } 85 | } 86 | 87 | }, 88 | 89 | resize: function(el, width, height, instance) { 90 | } 91 | }); 92 | -------------------------------------------------------------------------------- /man/dag_enrich_on_items.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/enrich.R 3 | \name{dag_enrich_on_items} 4 | \alias{dag_enrich_on_items} 5 | \alias{dag_enrich_on_genes} 6 | \title{Enrichment analysis on the number of annotated items} 7 | \usage{ 8 | dag_enrich_on_items(dag, items, min_hits = 5, min_items = 10) 9 | 10 | dag_enrich_on_genes(dag, genes, min_hits = 5, min_genes = 10) 11 | } 12 | \arguments{ 13 | \item{dag}{An \code{ontology_DAG} object.} 14 | 15 | \item{items}{A vector of item names.} 16 | 17 | \item{min_hits}{Minimal number of items in the term set.} 18 | 19 | \item{min_items}{Minimal size of the term set.} 20 | 21 | \item{genes}{A vector of gene IDs. The gene ID type can be found by directly printing the \code{ontology_DAG} object.} 22 | 23 | \item{min_genes}{Minimal number of genes.} 24 | } 25 | \value{ 26 | A data frame with the following columns: 27 | \itemize{ 28 | \item \code{term}: Term names. 29 | \item \code{n_hits}: Number of items in \code{items} intersecting to \code{t}'s annotated items. 30 | \item \code{n_anno}: Number of annotated items of \code{t}. Specifically for \code{dag_enrich_on_genes()}, this column 31 | is renamed to \code{n_gs}. 32 | \item \code{n_items}: Number of items in \code{items} intersecting to all annotated items in the DAG. Specifically 33 | for \code{dag_enrich_on_genes()}, this column is renamed to \code{n_genes}. 34 | \item \code{n_all}: Number of all annotated items in the DAG. 35 | \item \code{log2_fold_enrichment}: Defined as log2(observation/expected). 36 | \item \code{z_score}: Defined as (observed-expected)/sd. 37 | \item \code{p_value}: P-values from hypergeometric test. 38 | \item \code{p_adjust}: Adjusted p-values from the BH method. 39 | } 40 | 41 | The number of rows in the data frame is the same as the number of terms in the DAG. 42 | } 43 | \description{ 44 | The analysis task is to evaluate which terms the given items are enriched to. 45 | } 46 | \details{ 47 | The function tests whether the list of items are enriched in terms on the DAG. 48 | The test is based on the hypergeometric distribution. In the following 2x2 contigency table, \code{S} is the set of \code{items}, 49 | for a term \code{t} in the DAG, \code{T} is the set of items annotated to \code{t} (by automatically merging from its offspring terms), 50 | the aim is to test whether \code{S} is over-represented in \code{T}. 51 | 52 | The universal set \code{all} correspond to the full set of items annotated to the DAG. 53 | 54 | \if{html}{\out{
}}\preformatted{+----------+------+----------+-----+ 55 | | | in S | not in S | all | 56 | +----------+------+----------+-----+ 57 | | in T | x11 | x12 | x10 | 58 | | not in T | x21 | x22 | x20 | 59 | +----------+------+----------+-----+ 60 | | all | x01 | x02 | x | 61 | +----------+------+----------+-----+ 62 | }\if{html}{\out{
}} 63 | 64 | \code{dag_enrich_on_genes()} is the same as \code{dag_enrich_on_items()} which only changes the argument \code{item} to \code{gene}. 65 | } 66 | \examples{ 67 | \dontrun{ 68 | dag = create_ontology_DAG_from_GO_db(org_db = "org.Hs.eg.db") 69 | items = random_items(dag, 1000) 70 | df = dag_enrich_on_items(dag, items) 71 | } 72 | 1 73 | } 74 | -------------------------------------------------------------------------------- /R/partition.R: -------------------------------------------------------------------------------- 1 | 2 | #' Partition the DAG 3 | #' 4 | #' @param dag An `ontology_DAG` object. 5 | #' @param level Depth in the DAG to cut. The DAG is cut below terms (or cut the links to their child terms) with `depth == level`. 6 | #' @param from A list of terms to cut. If it is set, `level` is ignored. 7 | #' @param term_pos Internally used. 8 | #' 9 | #' @details 10 | #' Let's call the terms below the `from` term as "top terms" because they will be on top of the sub-DAGs after the partitioning. 11 | #' It is possible that a term in the middle of the DAG can be traced back to more than one top terms. 12 | #' To partition all terms exclusively, a term partitioned to the sub-DAG from the top term with the largest distance to the term. 13 | #' If a term has the same largest distances to several top terms, a random top term is selected. 14 | #' 15 | #' In `partition_by_size()`, the DAG is first reduced to a tree where a child term only has one parent. 16 | #' The partition is done recursively by cutting into its child-trees. 17 | #' The splitting stops when all the child-trees have size less than `size`. 18 | #' 19 | #' `NA` is assigned to the `from` terms, their ancestor terms, and terms having infinite directed distance to `from` terms. 20 | #' 21 | #' @export 22 | #' @returns A character vector of top terms in each partition. 23 | #' @examples 24 | #' \donttest{ 25 | #' dag = create_ontology_DAG_from_GO_db() 26 | #' pa = partition_by_level(dag) 27 | #' table(pa) 28 | #' pa = partition_by_size(dag, size = 1000) 29 | #' table(pa) 30 | #' } 31 | #' 1 32 | partition_by_level = function(dag, level = 1, from = NULL, term_pos = NULL) { 33 | 34 | if(is.null(from)) { 35 | depth = dag_depth(dag) 36 | max_depth = max(depth) 37 | if(level < 0 && level >= max_depth) { 38 | stop("wrong value of `level`.") 39 | } 40 | from = which(depth == level) 41 | } else { 42 | from = term_to_node_id(dag, from, strict = FALSE) 43 | } 44 | 45 | if(is.null(term_pos)) { 46 | if(dag_is_tree(dag)) { 47 | tree = dag 48 | } else { 49 | tree = dag_treelize(dag) 50 | } 51 | term_pos = cpp_node_pos_in_tree(tree, n_offspring(dag), 1, 360) ## in polar coordinate 52 | } 53 | 54 | from = from[order(term_pos[from, "h"])] 55 | range = data.frame(left = term_pos[from, "x"] - term_pos[from, "width"]/2, 56 | right = term_pos[from, "x"] + term_pos[from, "width"]/2) 57 | 58 | partition = rep(NA_character_, dag@n_terms) 59 | all_offspring = setdiff(seq_len(dag@n_terms), dag_ancestors(dag, from, in_labels = FALSE)) 60 | l_offspring = rep(FALSE, dag@n_terms) 61 | l_offspring[all_offspring] = TRUE 62 | for(i in seq_along(from)) { 63 | l = term_pos$x >= range$left[i] & term_pos$x <= range$right[i] & l_offspring 64 | partition[l] = dag@terms[ from[i] ] 65 | } 66 | 67 | partition 68 | } 69 | 70 | #' @param size Number of terms in a cluster. The splitting stops on a term if all its child-trees are smaller than `size`. 71 | #' @rdname partition_by_level 72 | #' @importFrom stats dendrapply 73 | #' @export 74 | partition_by_size = function(dag, size = round(dag_n_terms(dag)/5)) { 75 | 76 | tree = dag_treelize(dag) 77 | 78 | pa = cpp_partition_by_size(tree, as.integer(size)) 79 | pa[pa < 0] = NA 80 | 81 | tree@terms[pa] 82 | } 83 | 84 | 85 | -------------------------------------------------------------------------------- /man/ontology_DAG-class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \docType{class} 4 | \name{ontology_DAG-class} 5 | \alias{ontology_DAG-class} 6 | \alias{ontology_DAG} 7 | \title{The ontology_DAG class} 8 | \value{ 9 | An \code{ontology_DAG} object. 10 | } 11 | \description{ 12 | This class defines the DAG structure of an ontology. 13 | } 14 | \section{Slots}{ 15 | 16 | \describe{ 17 | \item{\code{terms}}{A character vector of length \code{n} of all term names. Other slots that store term-level information use the integer indices of terms.} 18 | 19 | \item{\code{n_terms}}{An integer scalar of the total number of terms in the DAG.} 20 | 21 | \item{\code{n_relations}}{An integer scalar of the total number of relations in the DAG.} 22 | 23 | \item{\code{lt_parents}}{A list of length \code{n}. Each element in the list is an integer index vector of the parent terms of the i^th term.} 24 | 25 | \item{\code{lt_children}}{A list of length \code{n}. Each element in the list is an integer index vector of the child terms of the i^th term.} 26 | 27 | \item{\code{lt_children_relations}}{A list of length \code{n}. Each element is a vector of the semantic relations between the i^th term and its child terms, e.g. a child "is_a" parent. 28 | The relations are represented as integers. The character name of the relations is in \code{attr(dag@lt_children_relations, "levels")}.} 29 | 30 | \item{\code{relations_DAG}}{A simple \code{ontology_DAG} object but constructed for relation types.} 31 | 32 | \item{\code{source}}{The source of the ontology. A character scalar only used as a mark of the returned object.} 33 | 34 | \item{\code{root}}{An integer scalar of the root term.} 35 | 36 | \item{\code{leaves}}{An integer vector of the indicies of leaf terms.} 37 | 38 | \item{\code{alternative_terms}}{A named character vector of mappings between alternative terms to DAG terms.} 39 | 40 | \item{\code{tpl_sorted}}{An integer vector of reordered term indices which has been topologically sorted in the DAG. Terms are sorted first by the depth (maximal 41 | distance from root), then the number of child terms, then the number of parent terms, and last the term names.} 42 | 43 | \item{\code{tpl_pos}}{The position of the original term in the topologically sorted path (similar as the rank), e.g. the value of the first element in the vector 44 | is the position of term 1 in the topologically sorted path.} 45 | 46 | \item{\code{annotation}}{A list of two elements: \code{list} and \code{names}. The \code{dag@annotation$list} element contains a list of length \code{n} and each element 47 | is a vector of integer indices of annotated items. The full list of annotated items is in \code{dag@annotation$names}.} 48 | 49 | \item{\code{term_env}}{An environment which contains various term-level statistics. It is mainly for cache purpose.} 50 | 51 | \item{\code{aspect_ratio}}{A numeric vector of length two. The aspect ratio is calculated as \code{w/h}. For each term, there is a distance to root, 52 | \code{h} is the maximal distance of all terms, \code{w} is the maximal number of items with the same distance. The two values in the \code{aspect_ratio} slot 53 | use maximal distance to root (the height) and the shortest distance to root as the distance measure.} 54 | 55 | \item{\code{elementMetadata}}{An additional data frame with the same number of rows as the number of terms in DAG. Order of rows should be the same as order of terms in \code{dag@terms}.} 56 | }} 57 | 58 | \examples{ 59 | 1 60 | # This function should not be used directly. 61 | } 62 | -------------------------------------------------------------------------------- /inst/scripts/download.R: -------------------------------------------------------------------------------- 1 | 2 | setwd("~/workspace/ontology") 3 | 4 | 5 | # http://obofoundry.org/ 6 | 7 | 8 | dir.create("OBOFoundry", showWarnings = FALSE) 9 | setwd("OBOFoundry") 10 | 11 | library(jsonlite) 12 | 13 | lt = fromJSON("http://obofoundry.org/registry/ontologies.jsonld") 14 | tb = lt$ontologies 15 | saveRDS(tb, file = "OBOFoundry_meta_table.rds") 16 | 17 | library(rvest) 18 | options(timeout = 9999999) 19 | 20 | for(i in seq_len(nrow(tb))) { 21 | qqcat("============ @{tb[i, 'id']} [@{i}/@{nrow(tb)}] ============\n") 22 | 23 | html = read_html(qq("http://obofoundry.org/ontology/@{tb[i, 'id']}.html")) 24 | nodes = html %>% html_elements(xpath = "//div[contains (text(), 'Products')]/following-sibling::table/*/tr/td[1]/a[@href]") 25 | file = nodes %>% html_text() 26 | url = nodes %>% html_attr("href") 27 | 28 | dir.create(tb$id[i], showWarnings = FALSE) 29 | 30 | for(j in seq_along(file)) { 31 | dest = qq("@{tb$id[i]}/@{basename(file[j])}") 32 | 33 | oe = try(header <- curlGetHeaders(url[j])) 34 | if(!inherits(oe, "try-error")) { 35 | ln2 = header[grepl("Content-Length", header, ignore.case = TRUE)] 36 | 37 | filesize = max(as.numeric(gsub('^Content-Length: (\\d+)\\s*$', "\\1", ln2, ignore.case = TRUE))) 38 | 39 | if(file.exists(dest)) { 40 | if(file.info(dest)[1, "size"] == filesize) { 41 | qqcat("already downloaded, skip.\n") 42 | next 43 | } 44 | } 45 | } 46 | oe = try(download.file(url[j], dest = dest)) 47 | if(inherits(oe, "try-error")) { 48 | file.remove(dest) 49 | } 50 | } 51 | } 52 | 53 | 54 | 55 | ########################## 56 | 57 | setwd("~/workspace/ontology") 58 | 59 | dir.create("BioPortal", showWarnings = FALSE) 60 | setwd("BioPortal") 61 | 62 | options(timeout = 9999999) 63 | 64 | ## https://bioportal.bioontology.org/ 65 | apikey = 66 | js = fromJSON(qq("https://data.bioontology.org/ontologies?apikey=@{apikey}")) 67 | 68 | saveRDS(js, file = "BioPortal_meta_table.rds") 69 | 70 | 71 | for(i in seq_len(nrow(js))) { 72 | acronym = js$acronym[i] 73 | submissions = js$links$submissions[i] 74 | 75 | qqcat("============ @{acronym} [@{i}/@{nrow(js)}] ============\n") 76 | 77 | oe = try(sub <- fromJSON(qq("@{submissions}?apikey=@{apikey}"))) 78 | 79 | if(inherits(oe, "try-error")) { 80 | next 81 | } 82 | 83 | if(length(sub) == 0) { 84 | next 85 | } 86 | 87 | hasOntologyLanguage = sub$hasOntologyLanguage[1] 88 | submission_id = sub[1, "@id"] 89 | 90 | url = qq("@{submission_id}/download?apikey=@{apikey}") 91 | header = curlGetHeaders(url) 92 | ln = header[grepl("Content-Disposition: attachment; filename", header, ignore.case = TRUE)] 93 | ln2 = header[grepl("Content-Length", header, ignore.case = TRUE)] 94 | 95 | if(length(ln)) { 96 | dest = gsub('^Content-Disposition: attachment; filename="(.*)".*$', "\\1", ln, ignore.case = TRUE) 97 | filesize = as.numeric(gsub('^Content-Length: (\\d+)\\s*$', "\\1", ln2, ignore.case = TRUE)) 98 | } else { 99 | next 100 | } 101 | 102 | dest = qq("@{acronym}/@{dest}") 103 | 104 | dir.create(acronym, showWarnings = FALSE) 105 | 106 | if(file.exists(dest)) { 107 | qqcat("already downloaded, skip.\n") 108 | if(file.info(dest)[1, "size"] == filesize) { 109 | next 110 | } 111 | } 112 | 113 | qqcat(" hasOntologyLanguage: @{hasOntologyLanguage}\n") 114 | qqcat(" download: @{submission_id}/download\n") 115 | qqcat(" local: @{dest}\n") 116 | cat("\n") 117 | 118 | download.file(qq("@{submission_id}/download?apikey=@{apikey}"), dest = dest) 119 | } 120 | 121 | -------------------------------------------------------------------------------- /tests/testthat/tests_dist.R: -------------------------------------------------------------------------------- 1 | 2 | library(testthat) 3 | 4 | 5 | ## export all functions 6 | if(!identical(topenv(), .GlobalEnv)) { 7 | pkg_env = asNamespace("simona") 8 | all_objs = ls(envir = pkg_env) 9 | for(obj in all_objs) { 10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE)) 11 | } 12 | } 13 | 14 | #### test a small dag 15 | 16 | # b--d--f 17 | # / \ 18 | # a---c--e 19 | # upstream -> downstream 20 | 21 | parents = c("a", "a", "b", "b", "c", "d") 22 | children = c("b", "c", "c", "d", "e", "f") 23 | 24 | dag = create_ontology_DAG(parents, children) 25 | 26 | test_that("test tpl paths", { 27 | expect_equal( 28 | cpp_tpl_shortest_path_length(dag, 1, 3), 29 | 1 30 | ) 31 | expect_equal( 32 | cpp_tpl_shortest_path_length(dag, 1, 5), 33 | 2 34 | ) 35 | expect_equal( 36 | cpp_tpl_longest_path_length(dag, 1, 3), 37 | 2 38 | ) 39 | expect_equal( 40 | cpp_tpl_longest_path_length(dag, 1, 5), 41 | 3 42 | ) 43 | expect_equal( 44 | cpp_tpl_shortest_path_length(dag, 1, 4), 45 | cpp_tpl_longest_path_length(dag, 1, 4) 46 | ) 47 | 48 | ## path 49 | expect_equal( 50 | cpp_tpl_shortest_path(dag, 1, 3), 51 | c(1, 3) 52 | ) 53 | expect_equal( 54 | cpp_tpl_shortest_path_sum_value(dag, 1, 3, 1:6), 55 | 4 56 | ) 57 | expect_equal( 58 | cpp_tpl_shortest_path(dag, 1, 5), 59 | c(1, 3, 5) 60 | ) 61 | expect_equal( 62 | cpp_tpl_shortest_path_sum_value(dag, 1, 5, 1:6), 63 | 9 64 | ) 65 | expect_equal( 66 | cpp_tpl_longest_path(dag, 1, 3), 67 | c(1, 2, 3) 68 | ) 69 | expect_equal( 70 | cpp_tpl_longest_path_sum_value(dag, 1, 3, 1:6), 71 | 6 72 | ) 73 | expect_equal( 74 | cpp_tpl_longest_path(dag, 1, 5), 75 | c(1, 2, 3, 5) 76 | ) 77 | expect_equal( 78 | cpp_tpl_longest_path_sum_value(dag, 1, 5, 1:6), 79 | 11 80 | ) 81 | expect_equal( 82 | cpp_tpl_shortest_path(dag, 1, 4), 83 | cpp_tpl_longest_path(dag, 1, 4) 84 | ) 85 | 86 | ## test the other distance method 87 | m = cpp_longest_distances_directed(dag, 1:6) 88 | for(i in 1:6) { 89 | for(j in 1:6) { 90 | expect_equal( 91 | m[i, j], 92 | cpp_tpl_longest_path_length(dag, i, j) 93 | ) 94 | } 95 | } 96 | 97 | m = cpp_shortest_distances_directed(dag, 1:6) 98 | for(i in 1:6) { 99 | for(j in 1:6) { 100 | expect_equal( 101 | m[i, j], 102 | cpp_tpl_shortest_path_length(dag, i, j) 103 | ) 104 | } 105 | } 106 | }) 107 | 108 | ### test on GO BP 109 | 110 | dag = create_ontology_DAG_from_GO_db() 111 | depth = dag_depth(dag) 112 | 113 | test_that("test two dist methods with GO BP", { 114 | for(i in 1:10) { 115 | go_id_1 = sample(dag@terms[depth > 5], 1) 116 | go_id_2 = sample(dag_ancestors(dag, go_id_1), 1) 117 | j = which(dag@terms == go_id_1) 118 | i = which(dag@terms == go_id_2) 119 | 120 | expect_equal( 121 | cpp_tpl_shortest_path_length(dag, i, j), 122 | cpp_shortest_distances_directed(dag, c(i, j))[1, 2] 123 | ) 124 | 125 | expect_equal( 126 | cpp_tpl_longest_path_length(dag, i, j), 127 | cpp_longest_distances_directed(dag, c(i, j))[1, 2] 128 | ) 129 | } 130 | }) 131 | 132 | 133 | 134 | if(FALSE) { 135 | 136 | dag = create_ontology_DAG_from_GO_db() 137 | system.time(d1 <- shortest_distances_via_NCA(dag, dag@terms[1:1000])); rm(d1); gc(); 138 | system.time(d2 <- longest_distances_via_LCA(dag, dag@terms[1:1000])); rm(d2); gc(); 139 | system.time(d3 <- shortest_distances_directed(dag, dag@terms[1:1000])); rm(d3); gc(); 140 | system.time(d4 <- longest_distances_directed(dag, dag@terms[1:1000])); rm(d4); gc(); 141 | 142 | } 143 | 144 | -------------------------------------------------------------------------------- /inst/scripts/parse_ttl.pl: -------------------------------------------------------------------------------- 1 | use strict; 2 | 3 | my $file = shift(@ARGV); 4 | my @relation_types = @ARGV; 5 | if($file =~/\.gz$/) { 6 | open FILE, "gzip -d -c $file |" or die "cannot open $file."; 7 | } else { 8 | open FILE, $file or die "cannot open $file."; 9 | } 10 | 11 | my $line; 12 | my $section = {}; 13 | my $id; 14 | my $i_record = 0; 15 | while(my $line = ) { 16 | if($line =~/^\@prefix /) { 17 | next; 18 | } 19 | 20 | if($line =~/^\s*$/) { 21 | next; 22 | } else { 23 | if($line =~/owl:Class/ and $line !~/\/STY\//) { 24 | $line =~/<(.*?)>/; 25 | $id = $1; 26 | $section->{$id} = {}; 27 | $i_record ++; 28 | 29 | # if($i_record % 10000 == 0) { 30 | # print "$i_record finished...\n"; 31 | # } 32 | 33 | while($line = ) { 34 | if($line =~/skos:prefLabel/) { 35 | $line =~/"""(.*?)"""/; 36 | $section->{$id}->{prefLabel} = $1; 37 | $section->{$id}->{prefLabel} =~s/"/``/g; 38 | } 39 | if($line =~/skos:notation/) { 40 | $line =~/"""(.*?)"""/; 41 | $section->{$id}->{notation} = $1; 42 | } 43 | if($line =~/skos:definition/) { 44 | $line =~/"""(.*?)"""/; 45 | $section->{$id}->{definition} = $1; 46 | $section->{$id}->{definition} =~s/"/``/g; 47 | } 48 | if($line =~/rdfs:subClassOf/ or $line =~/\/is_?a/i) { 49 | if($line =~/<([^<]+?)> ;/) { 50 | if(!defined($section->{$id}->{parent})) { 51 | $section->{$id}->{parent} = {}; 52 | $section->{$id}->{parent}->{$1} = 1; 53 | $section->{$id}->{relation_type} = {}; 54 | $section->{$id}->{relation_type}->{$1} = "is_a"; 55 | 56 | } else { 57 | $section->{$id}->{parent}->{$1} = 1; 58 | $section->{$id}->{relation_type}->{$1} = "is_a"; 59 | } 60 | } 61 | } 62 | foreach my $type (@relation_types) { 63 | if($line =~/\/$type/i) { 64 | if($line =~/<([^<]+?)> ;/) { 65 | if(!defined($section->{$id}->{parent})) { 66 | $section->{$id}->{parent} = {}; 67 | $section->{$id}->{parent}->{$1} = 1; 68 | $section->{$id}->{relation_type} = {}; 69 | $section->{$id}->{relation_type}->{$1} = $type; 70 | } else { 71 | $section->{$id}->{parent}->{$1} = 1; 72 | $section->{$id}->{relation_type}->{$1} = $type; 73 | } 74 | } 75 | } 76 | } 77 | 78 | if($line =~/\.$/) { 79 | last; 80 | } 81 | } 82 | } else { 83 | while($line = ) { 84 | if($line =~/\.$/) { 85 | last; 86 | } 87 | } 88 | } 89 | } 90 | } 91 | 92 | if($i_record == 0) { 93 | die "cannot find any object of 'owl:Class'."; 94 | } 95 | 96 | print "\"id\",\"prefLabel\",\"notation\",\"definition\",\"parent\",\"relation_type\"\n"; 97 | 98 | foreach $id (sort keys %$section) { 99 | print "\"$id\""; 100 | if(!defined($section->{$id}->{prefLabel})) { 101 | print ",\"\""; 102 | } else { 103 | print ",\"$section->{$id}->{prefLabel}\""; 104 | } 105 | if(!defined($section->{$id}->{notation})) { 106 | print ",\"\""; 107 | } else { 108 | print ",\"$section->{$id}->{notation}\""; 109 | } 110 | if(!defined($section->{$id}->{definition})) { 111 | print ",\"\""; 112 | } else { 113 | print ",\"$section->{$id}->{definition}\""; 114 | } 115 | if(!defined($section->{$id}->{parent})) { 116 | print ",\"\""; 117 | } else { 118 | print ",\"".join(",", keys %{$section->{$id}->{relation_type}})."\""; 119 | } 120 | if(!defined($section->{$id}->{relation_type})) { 121 | print ",\"\""; 122 | } else { 123 | print ",\"".join(",", values %{$section->{$id}->{relation_type}})."\""; 124 | } 125 | print "\n"; 126 | } 127 | 128 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /vignettes/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #fff; 3 | margin: 1em auto; 4 | max-width: 1000px; 5 | overflow: visible; 6 | padding-left: 2em; 7 | padding-right: 2em; 8 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 9 | font-size: 14px; 10 | line-height: 1.5; 11 | } 12 | #TOC { 13 | clear: both; 14 | margin: 0 0 10px 10px; 15 | padding: 4px; 16 | width: 600px; 17 | border: 1px solid #CCCCCC; 18 | border-radius: 5px; 19 | background-color: #f6f6f6; 20 | font-size: 13px; 21 | line-height: 1.3; 22 | } 23 | #TOC .toctitle { 24 | font-weight: bold; 25 | font-size: 15px; 26 | margin-left: 5px; 27 | } 28 | #TOC ul { 29 | padding-left: 40px; 30 | margin-left: -1.5em; 31 | margin-top: 5px; 32 | margin-bottom: 5px; 33 | } 34 | #TOC ul ul { 35 | margin-left: -2em; 36 | } 37 | #TOC li { 38 | line-height: 16px; 39 | } 40 | table { 41 | margin: 1em auto; 42 | border-width: 1px; 43 | border-color: #DDDDDD; 44 | border-style: outset; 45 | border-collapse: collapse; 46 | } 47 | table th { 48 | border-width: 2px; 49 | padding: 5px; 50 | border-style: inset; 51 | } 52 | table td { 53 | border-width: 1px; 54 | border-style: inset; 55 | line-height: 18px; 56 | padding: 5px 5px; 57 | } 58 | table, table th, table td { 59 | border-left-style: none; 60 | border-right-style: none; 61 | } 62 | table thead, table tr.even { 63 | background-color: #f7f7f7; 64 | } 65 | p { 66 | margin: 0.5em 0; 67 | } 68 | blockquote { 69 | background-color: #f6f6f6; 70 | padding: 0.25em 0.75em; 71 | } 72 | hr { 73 | border-style: solid; 74 | border: none; 75 | border-top: 1px solid #777; 76 | margin: 28px 0; 77 | } 78 | dl { 79 | margin-left: 0; 80 | } 81 | dl dd { 82 | margin-bottom: 13px; 83 | margin-left: 13px; 84 | } 85 | dl dt { 86 | font-weight: bold; 87 | } 88 | ul { 89 | margin-top: 0; 90 | } 91 | ul li { 92 | list-style: circle outside; 93 | } 94 | ul ul { 95 | margin-bottom: 0; 96 | } 97 | pre, code { 98 | background-color: #f7f7f7; 99 | border-radius: 3px; 100 | color: #333; 101 | white-space: pre-wrap; 102 | } 103 | pre { 104 | border-radius: 3px; 105 | margin: 5px 0px 10px 0px; 106 | padding: 10px; 107 | } 108 | pre:not([class]) { 109 | background-color: #f7f7f7; 110 | } 111 | code { 112 | font-family: Consolas, Monaco, 'Courier New', monospace; 113 | font-size: 95%; 114 | } 115 | p > code, li > code { 116 | padding: 2px 0px; 117 | } 118 | div.figure { 119 | text-align: center; 120 | } 121 | img { 122 | max-width: 100%; 123 | } 124 | h1 { 125 | margin-top: 0; 126 | font-size: 35px; 127 | line-height: 40px; 128 | } 129 | h2 { 130 | border-bottom: 4px solid #f7f7f7; 131 | padding-top: 10px; 132 | padding-bottom: 2px; 133 | font-size: 145%; 134 | } 135 | h3 { 136 | border-bottom: 2px solid #f7f7f7; 137 | padding-top: 10px; 138 | font-size: 120%; 139 | } 140 | h4 { 141 | border-bottom: 1px solid #f7f7f7; 142 | margin-left: 8px; 143 | font-size: 105%; 144 | } 145 | h5, h6 { 146 | border-bottom: 1px solid #ccc; 147 | font-size: 105%; 148 | } 149 | a { 150 | color: #0033dd; 151 | text-decoration: none; 152 | } 153 | a:hover { 154 | color: #6666ff; } 155 | a:visited { 156 | color: #800080; } 157 | a:visited:hover { 158 | color: #BB00BB; } 159 | a[href^="http:"] { 160 | text-decoration: underline; } 161 | a[href^="https:"] { 162 | text-decoration: underline; } 163 | 164 | code > span.kw { color: #555; font-weight: bold; } 165 | code > span.dt { color: #902000; } 166 | code > span.dv { color: #40a070; } 167 | code > span.bn { color: #d14; } 168 | code > span.fl { color: #d14; } 169 | code > span.ch { color: #d14; } 170 | code > span.st { color: #d14; } 171 | code > span.co { color: #888888; font-style: italic; } 172 | code > span.ot { color: #007020; } 173 | code > span.al { color: #ff0000; font-weight: bold; } 174 | code > span.fu { color: #900; font-weight: bold; } 175 | code > span.er { color: #a61717; background-color: #e3d2d2; } 176 | 177 | 178 | .caption { 179 | font-style: italic; 180 | color: grey; 181 | } 182 | -------------------------------------------------------------------------------- /docs/articles/main.css: -------------------------------------------------------------------------------- 1 | body { 2 | background-color: #fff; 3 | margin: 1em auto; 4 | max-width: 1000px; 5 | overflow: visible; 6 | padding-left: 2em; 7 | padding-right: 2em; 8 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif; 9 | font-size: 14px; 10 | line-height: 1.5; 11 | } 12 | #TOC { 13 | clear: both; 14 | margin: 0 0 10px 10px; 15 | padding: 4px; 16 | width: 600px; 17 | border: 1px solid #CCCCCC; 18 | border-radius: 5px; 19 | background-color: #f6f6f6; 20 | font-size: 13px; 21 | line-height: 1.3; 22 | } 23 | #TOC .toctitle { 24 | font-weight: bold; 25 | font-size: 15px; 26 | margin-left: 5px; 27 | } 28 | #TOC ul { 29 | padding-left: 40px; 30 | margin-left: -1.5em; 31 | margin-top: 5px; 32 | margin-bottom: 5px; 33 | } 34 | #TOC ul ul { 35 | margin-left: -2em; 36 | } 37 | #TOC li { 38 | line-height: 16px; 39 | } 40 | table { 41 | margin: 1em auto; 42 | border-width: 1px; 43 | border-color: #DDDDDD; 44 | border-style: outset; 45 | border-collapse: collapse; 46 | } 47 | table th { 48 | border-width: 2px; 49 | padding: 5px; 50 | border-style: inset; 51 | } 52 | table td { 53 | border-width: 1px; 54 | border-style: inset; 55 | line-height: 18px; 56 | padding: 5px 5px; 57 | } 58 | table, table th, table td { 59 | border-left-style: none; 60 | border-right-style: none; 61 | } 62 | table thead, table tr.even { 63 | background-color: #f7f7f7; 64 | } 65 | p { 66 | margin: 0.5em 0; 67 | } 68 | blockquote { 69 | background-color: #f6f6f6; 70 | padding: 0.25em 0.75em; 71 | } 72 | hr { 73 | border-style: solid; 74 | border: none; 75 | border-top: 1px solid #777; 76 | margin: 28px 0; 77 | } 78 | dl { 79 | margin-left: 0; 80 | } 81 | dl dd { 82 | margin-bottom: 13px; 83 | margin-left: 13px; 84 | } 85 | dl dt { 86 | font-weight: bold; 87 | } 88 | ul { 89 | margin-top: 0; 90 | } 91 | ul li { 92 | list-style: circle outside; 93 | } 94 | ul ul { 95 | margin-bottom: 0; 96 | } 97 | pre, code { 98 | background-color: #f7f7f7; 99 | border-radius: 3px; 100 | color: #333; 101 | white-space: pre-wrap; 102 | } 103 | pre { 104 | border-radius: 3px; 105 | margin: 5px 0px 10px 0px; 106 | padding: 10px; 107 | } 108 | pre:not([class]) { 109 | background-color: #f7f7f7; 110 | } 111 | code { 112 | font-family: Consolas, Monaco, 'Courier New', monospace; 113 | font-size: 95%; 114 | } 115 | p > code, li > code { 116 | padding: 2px 0px; 117 | } 118 | div.figure { 119 | text-align: center; 120 | } 121 | img { 122 | max-width: 100%; 123 | } 124 | h1 { 125 | margin-top: 0; 126 | font-size: 35px; 127 | line-height: 40px; 128 | } 129 | h2 { 130 | border-bottom: 4px solid #f7f7f7; 131 | padding-top: 10px; 132 | padding-bottom: 2px; 133 | font-size: 145%; 134 | } 135 | h3 { 136 | border-bottom: 2px solid #f7f7f7; 137 | padding-top: 10px; 138 | font-size: 120%; 139 | } 140 | h4 { 141 | border-bottom: 1px solid #f7f7f7; 142 | margin-left: 8px; 143 | font-size: 105%; 144 | } 145 | h5, h6 { 146 | border-bottom: 1px solid #ccc; 147 | font-size: 105%; 148 | } 149 | a { 150 | color: #0033dd; 151 | text-decoration: none; 152 | } 153 | a:hover { 154 | color: #6666ff; } 155 | a:visited { 156 | color: #800080; } 157 | a:visited:hover { 158 | color: #BB00BB; } 159 | a[href^="http:"] { 160 | text-decoration: underline; } 161 | a[href^="https:"] { 162 | text-decoration: underline; } 163 | 164 | code > span.kw { color: #555; font-weight: bold; } 165 | code > span.dt { color: #902000; } 166 | code > span.dv { color: #40a070; } 167 | code > span.bn { color: #d14; } 168 | code > span.fl { color: #d14; } 169 | code > span.ch { color: #d14; } 170 | code > span.st { color: #d14; } 171 | code > span.co { color: #888888; font-style: italic; } 172 | code > span.ot { color: #007020; } 173 | code > span.al { color: #ff0000; font-weight: bold; } 174 | code > span.fu { color: #900; font-weight: bold; } 175 | code > span.er { color: #a61717; background-color: #e3d2d2; } 176 | 177 | 178 | .caption { 179 | font-style: italic; 180 | color: grey; 181 | } 182 | -------------------------------------------------------------------------------- /man/import_obo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/import.R 3 | \name{import_obo} 4 | \alias{import_obo} 5 | \alias{import_owl} 6 | \alias{import_ontology} 7 | \alias{import_ttl} 8 | \title{Import ontology file to an ontology_DAG object} 9 | \usage{ 10 | import_obo( 11 | file, 12 | relation_type = character(0), 13 | inherit_relations = TRUE, 14 | verbose = simona_opt$verbose, 15 | ... 16 | ) 17 | 18 | import_owl( 19 | file, 20 | relation_type = character(0), 21 | inherit_relations = TRUE, 22 | verbose = simona_opt$verbose, 23 | ... 24 | ) 25 | 26 | import_ontology( 27 | file, 28 | robot_jar = simona_opt$robot_jar, 29 | JAVA_ARGS = "", 30 | verbose = simona_opt$verbose, 31 | ... 32 | ) 33 | 34 | import_ttl(file, relation_type = "part_of", verbose = simona_opt$verbose, ...) 35 | } 36 | \arguments{ 37 | \item{file}{Path of the ontology file or an URL.} 38 | 39 | \item{relation_type}{Semantic relation types to include. Note \code{is_a} relation is always included.} 40 | 41 | \item{inherit_relations}{Relations may also be structured as a DAG. It controls whether to merge with a relations's offspring relations.} 42 | 43 | \item{verbose}{Whether to print messages.} 44 | 45 | \item{...}{Pass to \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}.} 46 | 47 | \item{robot_jar}{The path of the \code{robot.jar} file. It can be downloaded from https://github.com/ontodev/robot/releases. 48 | Internally, the file is converted to the obo format and parsed by \code{import_obo()}. The value of \code{robot_jar} can be 49 | set as a global option \code{simona_opt$robot_jar = ...}.} 50 | 51 | \item{JAVA_ARGS}{Options for \code{java}. For example you can set \code{-Xmx20G} if you want to increase the memory to 20G for java.} 52 | } 53 | \value{ 54 | An \code{ontology_DAG} object. 55 | } 56 | \description{ 57 | Import ontology file to an ontology_DAG object 58 | } 59 | \details{ 60 | Public bio-ontologies can be obtained from \href{http://obofoundry.org/}{Ontology Foundry} or \href{https://bioportal.bioontology.org/}{BioPortal}. 61 | 62 | The \code{import_obo()} function parses the ontology file in \code{.obo} format. To parse other formats, external tool \code{robot.jar} is required. 63 | 64 | \code{import_owl()} only recognizes \verb{} and \verb{}. If the .owl file does not contain these tags, 65 | please use \code{import_ontology()} directly. 66 | 67 | \code{robot.jar} can automatically recognize the following formats: 68 | \itemize{ 69 | \item \code{json}: OBO Graphs JSON 70 | \item \code{obo}: OBO Format 71 | \item \code{ofn}: OWL Functional 72 | \item \code{omn}: Manchester 73 | \item \code{owl}: RDF/XML 74 | \item \code{owx}: OWL/XML 75 | \item \code{ttl}: Turtle 76 | } 77 | 78 | The description of the ROBOT tool is at \url{http://robot.obolibrary.org/convert}. 79 | 80 | \code{import_ttl()} is a simple parser for the \code{.ttl} format files. It only recognizes 81 | terms that have the \code{owl:Class} object. The "is_a" relation is recognized by the predicate \code{rdfs:subClassOf} 82 | or an ontology-specific predicate that contains \verb{.*/isa}. Other relation types are defined with 83 | the predicate \code{owl:ObjectProperty}. The format is parsed by a Perl script \code{system.file("scripts", "parse_ttl.pl", package = "simona")}. 84 | } 85 | \examples{ 86 | \donttest{ 87 | # The plant ontology: http://obofoundry.org/ontology/po.html 88 | import_obo("https://raw.githubusercontent.com/Planteome/plant-ontology/master/po.obo") 89 | } 90 | \donttest{ 91 | import_owl("http://purl.obolibrary.org/obo/po.owl") 92 | } 93 | \dontrun{ 94 | # The plant ontology: http://obofoundry.org/ontology/po.html 95 | dag = import_ontology("http://purl.obolibrary.org/obo/po.owl", robot_jar = ...) 96 | } 97 | \donttest{ 98 | # file is from https://bioportal.bioontology.org/ontologies/MSTDE 99 | import_ttl("https://jokergoo.github.io/simona/MSTDE.ttl") 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /man/create_ontology_DAG.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/constructor.R 3 | \name{create_ontology_DAG} 4 | \alias{create_ontology_DAG} 5 | \title{Create the ontology_DAG object} 6 | \usage{ 7 | create_ontology_DAG( 8 | parents, 9 | children, 10 | relations = NULL, 11 | relations_DAG = NULL, 12 | source = "Ontology", 13 | annotation = NULL, 14 | remove_cyclic_paths = FALSE, 15 | remove_rings = FALSE, 16 | alternative_terms = list(), 17 | verbose = simona_opt$verbose 18 | ) 19 | } 20 | \arguments{ 21 | \item{parents}{A character vector of parent terms. You can also construct the \code{ontology_DAG} object by a list of parent-child links. See \strong{Examples}.} 22 | 23 | \item{children}{A character vector of child terms.} 24 | 25 | \item{relations}{A character vector of parent-child relations, e.g. "is_a", "part_of", or self-defined semantic relations. 26 | If it is set, it should have the same length as \code{parents} and \code{children}.} 27 | 28 | \item{relations_DAG}{If the relation types have hierarchical relations, it can also be constructed by \code{create_ontology_DAG()} first. See \strong{Examples}. 29 | When the DAG for relation types is provided, the ancestor/offspring relationship of relation types will be taken into consideration automatically.} 30 | 31 | \item{source}{Source of the ontology. It is only used as a label of the object.} 32 | 33 | \item{annotation}{A list of character vectors which contain items annotated to the terms. Names of the list should be the term names. In the DAG, items 34 | annotated to a term will also be annotated to its parents. Such merging 35 | is applied automatically in the package.} 36 | 37 | \item{remove_cyclic_paths}{Whether to remove cyclic paths If a cyclic path is represented as \verb{[a, b, ..., z, a]}, 38 | the last link (i.e. \code{z->a}) is simply removed. If the value is set to \code{FALSE} and if there are cyclic paths, there 39 | will be an error that lists all cyclic paths.} 40 | 41 | \item{remove_rings}{There might be rings that are isolated to the main DAG where there are no roots on the rings, thus they cannot be attached to the main DAG. If the value 42 | of \code{remove_rings} is set to \code{TRUE}, such rings are removed.} 43 | 44 | \item{alternative_terms}{A named list or vector that contains mappings from alternative term IDs to terms used in the DAG. In an ontology, there 45 | might be old terms IDs marked as "replaced_by", "consider" or "alt_id" in ".obo" file. You can provide mappings from old term iDs to current term IDs with this argument. 46 | If it is a one-to-one mapping, the mapping 47 | can be a named vector where alternative term IDs are names and DAG term IDs are values. It it is a one-to-many mapping, the variable 48 | should be a named list where each member vector will first be matched to the DAG terms. If the mapping is still one-to-many, the first one is selected.} 49 | 50 | \item{verbose}{Whether to print messages.} 51 | } 52 | \value{ 53 | An \code{ontology_DAG} object. 54 | } 55 | \description{ 56 | Create the ontology_DAG object 57 | } 58 | \examples{ 59 | parents = c("a", "a", "b", "b", "c", "d") 60 | children = c("b", "c", "c", "d", "e", "f") 61 | dag = create_ontology_DAG(parents, children) 62 | 63 | # with annotations 64 | annotation = list( 65 | "a" = c("t1", "t2", "t3"), 66 | "b" = c("t3", "t4"), 67 | "c" = "t5", 68 | "d" = "t7", 69 | "e" = c("t4", "t5", "t6", "t7"), 70 | "f" = "t8" 71 | ) 72 | dag = create_ontology_DAG(parents, children, annotation = annotation) 73 | 74 | # with relations 75 | dag = create_ontology_DAG(parents, children, 76 | relations = c("is_a", "part_of", "is_a", "part_of", "is_a", "part_of")) 77 | 78 | # with relations_DAG 79 | relations_DAG = create_ontology_DAG(c("r2", "r2"), c("r3", "r4")) 80 | dag = create_ontology_DAG(parents, children, 81 | relations = c("r1", "r2", "r1", "r3", "r1", "r4"), 82 | relations_DAG = relations_DAG) 83 | 84 | # with a list of parent-child relations 85 | dag = create_ontology_DAG(c("a-b", "a-c", "b-c", "b-d", "c-e", "e-f")) 86 | } 87 | -------------------------------------------------------------------------------- /R/dist.R: -------------------------------------------------------------------------------- 1 | 2 | #' Distance on the DAG 3 | #' 4 | #' @param dag An `ontology_DAG` object. 5 | #' @param terms A vector of term names. 6 | #' @param verbose Whether to print messages. 7 | #' 8 | #' @details 9 | #' Denote two terms as `a` and `b`, a common ancestor as `c`, and the distance function `d()` calculates the longest 10 | #' distance or the shortest distance depending on the function. 11 | #' 12 | #' - `shortest_distances_via_NCA()`: It calculates the smallest `d(c, a) + d(c, b)` where `d()` calculates the shortest distance between two terms. In this case, 13 | #' `c` is the NCA (nearest common ancestor) of `a` and `b`. 14 | #' - `longest_distances_via_LCA()`: It calculates the largest `d(c, a) + d(c, b)` where `d()` calculates the longest distance between two terms *via the LCA (lowest common ancestor) term*. In this case, 15 | #' `c` is the LCA of `a` and `b`. 16 | #' - `shortest_distances_directed()`: It calculates `d(a, b)` where `d()` calculates the shortest distance between two terms. The distance is only calculated when `a` is an ancestor of `b`, otherwise the distance value is -1. 17 | #' - `longest_distances_directed()`: It calculates `d(a, b)` where `d()` calculates the longest distance between two terms. The distance is only calculated when `a` is an ancestor of `b`, otherwise the distance value is -1. 18 | #' @rdname distance 19 | #' @export 20 | #' @returns A numeric distance matrix. 21 | #' @examples 22 | #' parents = c("a", "a", "b", "b", "c", "d") 23 | #' children = c("b", "c", "c", "d", "e", "f") 24 | #' dag = create_ontology_DAG(parents, children) 25 | #' shortest_distances_via_NCA(dag, letters[1:6]) 26 | #' longest_distances_via_LCA(dag, letters[1:6]) 27 | #' shortest_distances_directed(dag, letters[1:6]) 28 | #' longest_distances_directed(dag, letters[1:6]) 29 | shortest_distances_via_NCA = function(dag, terms, verbose = simona_opt$verbose) { 30 | if(is.character(terms)) { 31 | id = term_to_node_id(dag, terms, strict = FALSE) 32 | } else { 33 | id = terms 34 | } 35 | if(any(duplicated(id))) { 36 | stop("`term` should not be duplicated.") 37 | } 38 | d = exec_under_message_condition({ 39 | cpp_shortest_distances_via_NCA(dag, id) 40 | }, verbose = verbose) 41 | 42 | dimnames(d) = list(dag@terms[id], dag@terms[id]) 43 | d 44 | } 45 | 46 | #' @rdname distance 47 | #' @export 48 | longest_distances_via_LCA = function(dag, terms, verbose = simona_opt$verbose) { 49 | if(is.character(terms)) { 50 | id = term_to_node_id(dag, terms, strict = FALSE) 51 | } else { 52 | id = terms 53 | } 54 | if(any(duplicated(id))) { 55 | stop("`term` should not be duplicated.") 56 | } 57 | d = exec_under_message_condition({ 58 | cpp_max_ancestor_path_sum_value(dag, id, dag_depth(dag), rep(1, dag@n_terms)) - 1 59 | }, verbose = verbose) 60 | 61 | dimnames(d) = list(dag@terms[id], dag@terms[id]) 62 | d 63 | } 64 | 65 | #' @rdname distance 66 | #' @export 67 | shortest_distances_directed = function(dag, terms, verbose = simona_opt$verbose) { 68 | if(is.character(terms)) { 69 | id = term_to_node_id(dag, terms, strict = FALSE) 70 | } else { 71 | id = terms 72 | } 73 | if(any(duplicated(id))) { 74 | stop("`term` should not be duplicated.") 75 | } 76 | d = exec_under_message_condition({ 77 | cpp_shortest_distances_directed(dag, id) 78 | }, verbose = verbose) 79 | 80 | dimnames(d) = list(dag@terms[id], dag@terms[id]) 81 | d 82 | } 83 | 84 | #' @rdname distance 85 | #' @export 86 | longest_distances_directed = function(dag, terms, verbose = simona_opt$verbose) { 87 | if(is.character(terms)) { 88 | id = term_to_node_id(dag, terms, strict = FALSE) 89 | } else { 90 | id = terms 91 | } 92 | if(any(duplicated(id))) { 93 | stop("`term` should not be duplicated.") 94 | } 95 | d = exec_under_message_condition({ 96 | cpp_longest_distances_directed(dag, id) 97 | }, verbose = verbose) 98 | 99 | dimnames(d) = list(dag@terms[id], dag@terms[id]) 100 | d 101 | } 102 | 103 | 104 | longest_distances_from_LCA = function(dag, id, verbose = simona_opt$verbose) { 105 | exec_under_message_condition({ 106 | cpp_longest_distances_from_LCA(dag, id) 107 | }, verbose = verbose) 108 | } 109 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,ontology_tree) 4 | export(CA_terms) 5 | export(LCA_depth) 6 | export(LCA_term) 7 | export(MICA_IC) 8 | export(MICA_term) 9 | export(NCA_term) 10 | export(add_annotation) 11 | export(all_group_sim_methods) 12 | export(all_term_IC_methods) 13 | export(all_term_sim_methods) 14 | export(alternative_GO_terms) 15 | export(annotated_terms) 16 | export(avg_children) 17 | export(avg_parents) 18 | export(create_ontology_DAG) 19 | export(create_ontology_DAG_from_GO_db) 20 | export(create_ontology_DAG_from_igraph) 21 | export(dag_add_random_children) 22 | export(dag_all_terms) 23 | export(dag_ancestors) 24 | export(dag_as_DOT) 25 | export(dag_as_dendrogram) 26 | export(dag_as_igraph) 27 | export(dag_children) 28 | export(dag_circular_viz) 29 | export(dag_depth) 30 | export(dag_distinct_ancestors) 31 | export(dag_enrich_on_genes) 32 | export(dag_enrich_on_items) 33 | export(dag_enrich_on_offsprings) 34 | export(dag_filter) 35 | export(dag_graphviz) 36 | export(dag_has_terms) 37 | export(dag_height) 38 | export(dag_is_leaf) 39 | export(dag_leaves) 40 | export(dag_longest_dist_from_ancestors) 41 | export(dag_longest_dist_to_offspring) 42 | export(dag_n_leaves) 43 | export(dag_n_relations) 44 | export(dag_n_terms) 45 | export(dag_offspring) 46 | export(dag_parents) 47 | export(dag_permutate_children) 48 | export(dag_random) 49 | export(dag_random_tree) 50 | export(dag_reorder) 51 | export(dag_root) 52 | export(dag_shiny) 53 | export(dag_shortest_dist_from_ancestors) 54 | export(dag_shortest_dist_from_root) 55 | export(dag_shortest_dist_to_leaves) 56 | export(dag_shortest_dist_to_offspring) 57 | export(dag_siblings) 58 | export(dag_treelize) 59 | export(group_sim) 60 | export(import_obo) 61 | export(import_ontology) 62 | export(import_owl) 63 | export(import_ttl) 64 | export(longest_distances_directed) 65 | export(longest_distances_via_LCA) 66 | export(max_ancestor_id) 67 | export(max_ancestor_path_sum) 68 | export(max_ancestor_v) 69 | export(method_param) 70 | export(n_ancestors) 71 | export(n_annotations) 72 | export(n_children) 73 | export(n_connected_leaves) 74 | export(n_offspring) 75 | export(n_parents) 76 | export(ontology_DAG) 77 | export(ontology_chebi) 78 | export(ontology_go) 79 | export(ontology_hp) 80 | export(ontology_kw) 81 | export(ontology_pw) 82 | export(ontology_rdo) 83 | export(ontology_reactome) 84 | export(ontology_vt) 85 | export(partition_by_level) 86 | export(partition_by_size) 87 | export(random_items) 88 | export(random_terms) 89 | export(shortest_distances_directed) 90 | export(shortest_distances_via_NCA) 91 | export(simona_opt) 92 | export(term_IC) 93 | export(term_annotations) 94 | export(term_sim) 95 | exportClasses(ontology_DAG) 96 | exportMethods("[") 97 | exportMethods("[[") 98 | exportMethods('mcols<-') 99 | exportMethods(mcols) 100 | exportMethods(show) 101 | import(ComplexHeatmap) 102 | import(GlobalOptions) 103 | import(Rcpp) 104 | import(grid) 105 | import(igraph) 106 | import(shiny) 107 | import(fastmatch) 108 | importFrom(GetoptLong,qq) 109 | importFrom(Polychrome,alphabet.colors) 110 | importFrom(circlize,rand_color) 111 | importFrom(grDevices,col2rgb) 112 | importFrom(grDevices,dev.off) 113 | importFrom(grDevices,dev.size) 114 | importFrom(grDevices,png) 115 | importFrom(grDevices,rgb) 116 | importFrom(graphics,barplot) 117 | importFrom(graphics,par) 118 | importFrom(matrixStats,colMaxs) 119 | importFrom(matrixStats,rowMaxs) 120 | importFrom(methods,as) 121 | importFrom(methods,new) 122 | importFrom(stats,dendrapply) 123 | importFrom(stats,p.adjust) 124 | importFrom(stats,phyper) 125 | importFrom(stats,quantile) 126 | importFrom(stats,runif) 127 | importFrom(stats,sd) 128 | importFrom(utils,data) 129 | importFrom(utils,download.file) 130 | importFrom(utils,getFromNamespace) 131 | importFrom(utils,packageDescription) 132 | importFrom(utils,read.csv) 133 | importFrom(utils,read.table) 134 | importFrom(xml2,read_xml) 135 | importFrom(xml2,xml_attr) 136 | importFrom(xml2,xml_find_all) 137 | importFrom(xml2,xml_ns) 138 | importFrom(xml2,xml_text) 139 | importMethodsFrom(S4Vectors,'mcols<-') 140 | importMethodsFrom(S4Vectors,mcols) 141 | useDynLib(simona, .registration = TRUE) 142 | -------------------------------------------------------------------------------- /tests/testthat/tests_reorder.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | 3 | 4 | 5 | 6 | parents = c("a", "a", "b", "b", "c", "d") 7 | children = c("b", "c", "c", "d", "e", "f") 8 | 9 | dag = create_ontology_DAG(parents, children) 10 | 11 | test_that("test dag_reorder", { 12 | dag2 = dag_reorder(dag, value = c(1, 1, 10, 1, 10, 1)) 13 | expect_equal(dag2@lt_children[[2]], rev(dag@lt_children[[2]])) 14 | 15 | dag3 = dag_reorder(dag, value = c(10, 1)) 16 | expect_equal(dag3@lt_children[[2]], rev(dag@lt_children[[2]])) 17 | }) 18 | 19 | 20 | dag = create_ontology_DAG(c("a-b", "a-c", "a-d", "a-e", "a-f", "b-g", "b-h", "b-i", "b-j")) 21 | dag2 = dag_permutate_children(dag) 22 | dag_children(dag2, "a") 23 | dag_children(dag2, "b") 24 | 25 | dag = create_ontology_DAG(c("a-h", "a-b", "a-c", "a-d", "b-e", "b-f", "c-g", "h-g", "d-e")) 26 | tree = dag_treelize(dag) 27 | lt = cpp_get_force_counterpart(dag@lt_children, dag@lt_parents, tree@lt_children, tree@lt_parents, dag@root) 28 | 29 | test_that("test cpp_get_force_counterpart", { 30 | expect_equal(lt[[1]], integer(0)) 31 | expect_equal(lt[[2]], 4) 32 | expect_equal(lt[[3]], 8) 33 | expect_equal(lt[[4]], 5) 34 | expect_equal(lt[[5]], 4) 35 | expect_equal(lt[[6]], integer(0)) 36 | expect_equal(lt[[7]], 8) 37 | expect_equal(lt[[8]], 7) 38 | }) 39 | 40 | test_that("test move_index", { 41 | x = c(2, 1, 5, 4, 3) 42 | od = order(-abs(x)) 43 | expect_equal(x[move_index(x, od-1, 1) + 1], c(5, 2, 1, 4, 3)) 44 | expect_equal(x[move_index(x, od-1, 2) + 1], c(5, 4, 2, 1, 3)) 45 | expect_equal(x[move_index(x, od-1, 3) + 1], c(5, 4, 3, 2, 1)) 46 | expect_equal(x[move_index(x, od-1, 4) + 1], c(5, 4, 3, 2, 1)) 47 | expect_equal(x[move_index(x, od-1, 5) + 1], c(5, 4, 3, 2, 1)) 48 | 49 | x = c(-2, -1, -5, -4, -3) 50 | od = order(-abs(x)) 51 | expect_equal(x[move_index(x, od-1, 1) + 1], c(-2, -1, -4, -3, -5)) 52 | expect_equal(x[move_index(x, od-1, 2) + 1], c(-2, -1, -3, -4, -5)) 53 | expect_equal(x[move_index(x, od-1, 3) + 1], c(-2, -1, -3, -4, -5)) 54 | expect_equal(x[move_index(x, od-1, 4) + 1], c(-1, -2, -3, -4, -5)) 55 | expect_equal(x[move_index(x, od-1, 5) + 1], c(-1, -2, -3, -4, -5)) 56 | 57 | x = c(-2, 1, 5, -4, 3) 58 | od = order(-abs(x)) 59 | expect_equal(x[move_index(x, od-1, 1) + 1], c(5, -2, 1, -4, 3)) 60 | expect_equal(x[move_index(x, od-1, 2) + 1], c(5, -2, 1, 3, -4)) 61 | expect_equal(x[move_index(x, od-1, 3) + 1], c(5, 3, -2, 1, -4)) 62 | expect_equal(x[move_index(x, od-1, 4) + 1], c(5, 3, 1, -2, -4)) 63 | expect_equal(x[move_index(x, od-1, 5) + 1], c(5, 3, 1, -2, -4)) 64 | 65 | x = c(3, 1, 5, -4, -2) 66 | od = order(-abs(x)) 67 | expect_equal(x[move_index(x, od-1, 1, FALSE) + 1], c(3, 1, -4, -2, 5)) 68 | expect_equal(x[move_index(x, od-1, 2, FALSE) + 1], c(-4, 3, 1, -2, 5)) 69 | expect_equal(x[move_index(x, od-1, 3, FALSE) + 1], c(-4, 1, -2, 3, 5)) 70 | expect_equal(x[move_index(x, od-1, 4, FALSE) + 1], c(-4, -2, 1, 3, 5)) 71 | expect_equal(x[move_index(x, od-1, 5, FALSE) + 1], c(-4, -2, 1, 3, 5)) 72 | }) 73 | 74 | 75 | test_that("test calc_x_offset", { 76 | prev_od = 1:5 77 | new_od = 1:5 78 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5), 79 | c(0, 0, 0, 0, 0)) 80 | 81 | prev_od = 1:5 82 | new_od = c(2, 1, 3, 4, 5) 83 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5), 84 | c(2, -1, 0, 0, 0)) 85 | 86 | prev_od = 1:5 87 | new_od = 5:1 88 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5), 89 | c(14, 11, 6, -1, -10)) 90 | }) 91 | 92 | 93 | # pos = cpp_node_pos_in_tree(tree, n_connected_leaves(tree)) 94 | # lt_counterpart = cpp_get_force_counterpart(dag@lt_children, dag@lt_parents, tree@lt_children, tree@lt_parents, dag@root) 95 | 96 | 97 | # force = cpp_get_force(lt_counterpart, pos$x, dag_depth(tree)) 98 | # test_that("test cpp_get_force", { 99 | # expect_equal(sign(force), c(0, 1, -1, -1, 1, 0, -1, 1)) 100 | # }) 101 | 102 | 103 | # n_cp = sapply(lt_counterpart, length) 104 | # x = pos$x 105 | # test_that("test reorder_children", { 106 | # expect_equal(reorder_children(tree@lt_children[[1]], n_cp, force, pos$width, dag_depth(tree), x, tree@lt_children), 107 | # c(3, 4, 2, 8)) 108 | # expect_equal(order(x[c(3, 4, 2, 8)]), 1:4) 109 | # }) 110 | 111 | 112 | # pos = cpp_node_pos_in_tree(tree, n_connected_leaves(tree)) 113 | # cpp_reorder_tree_x(tree, lt_counterpart, pos$x, pos$width) 114 | 115 | 116 | -------------------------------------------------------------------------------- /src/term.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | #include "traverse.h" 5 | #include "utils.h" 6 | 7 | 8 | // [[Rcpp::export]] 9 | NumericVector cpp_ic_meng(S4 dag, bool correct) { 10 | List lt_children = dag.slot("lt_children"); 11 | IntegerVector depth = _dag_depth(dag); 12 | int n_terms = dag.slot("n_terms"); 13 | 14 | int max_depth = max(depth); 15 | int n = lt_children.size(); 16 | 17 | NumericVector ic(n); 18 | for(int i = 0; i < n; i ++) { 19 | if(depth[i] == 0 || (!correct && depth[i] == 1)) { 20 | ic[i] = 0; 21 | } else { 22 | LogicalVector l_offspring(n); 23 | _find_offspring(lt_children, i, l_offspring); 24 | 25 | double x = 0.0; 26 | for(int j = 0; j < n; j ++) { 27 | if(l_offspring[j]) { 28 | x = x + 1.0/depth[j]; 29 | } 30 | } 31 | 32 | if(correct) { 33 | ic[i] = log(depth[i]+1)/log(max_depth+1)*(1 - log(x + 1)/log(n_terms)); 34 | } else { 35 | ic[i] = log(depth[i])/log(max_depth)*(1 - log(x + 1)/log(n_terms)); 36 | } 37 | } 38 | } 39 | 40 | return ic; 41 | } 42 | 43 | 44 | // it calculates S_a(t) 45 | double _calc_wang_s(List lt_children, List lt_children_relations, NumericVector contribution, 46 | int i_node, int i_end, LogicalVector l_background, bool correct = false, double c = 0.66667) { 47 | 48 | if(i_node == i_end) { 49 | return 1; 50 | } else { 51 | IntegerVector children = lt_children[i_node]; 52 | IntegerVector relations = lt_children_relations[i_node]; 53 | LogicalVector l_children_included(children.size(), false); 54 | 55 | for(int i = 0; i < children.size(); i ++) { 56 | if(l_background[ children[i] - 1 ]) { 57 | l_children_included[i] = true; 58 | } 59 | } 60 | 61 | NumericVector s(sum(l_children_included), 0); 62 | int nc = sum(l_children_included); 63 | int si = 0; 64 | for(int i = 0; i < children.size(); i ++) { 65 | if(l_children_included[i]) { 66 | if(correct) { 67 | s[si] = _calc_wang_s(lt_children, lt_children_relations, contribution, 68 | children[i] - 1, i_end, l_background, correct, c) * (1/(c+nc) + contribution[relations[i] - 1]); 69 | } else { 70 | s[si] = _calc_wang_s(lt_children, lt_children_relations, contribution, 71 | children[i] - 1, i_end, l_background, correct, c) * contribution[relations[i] - 1]; 72 | } 73 | si ++; 74 | } 75 | 76 | } 77 | return max(s); 78 | } 79 | } 80 | 81 | // [[Rcpp::export]] 82 | NumericVector cpp_ic_wang(S4 dag, NumericVector contribution) { 83 | 84 | List lt_parents = dag.slot("lt_parents"); 85 | List lt_children = dag.slot("lt_children"); 86 | List lt_children_relations = dag.slot("lt_children_relations"); 87 | 88 | int n = lt_parents.size(); 89 | NumericVector ic(n); 90 | 91 | for(int i = 0; i < n; i ++) { 92 | 93 | if(i % 1000 == 0) { 94 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 95 | message("going through " + std::to_string(i) + " / " + std::to_string(n) + " nodes ...", false); 96 | } 97 | 98 | LogicalVector l_ancestors(n); 99 | _find_ancestors(lt_parents, i, l_ancestors, true); 100 | 101 | for(int j = 0; j < n; j ++) { 102 | if(l_ancestors[j]) { 103 | ic[i] += _calc_wang_s(lt_children, lt_children_relations, contribution, j, i, l_ancestors); 104 | } 105 | } 106 | 107 | reset_logical_vector_to_false(l_ancestors); 108 | } 109 | 110 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false); 111 | message("going through " + std::to_string(n) + " / " + std::to_string(n) + " nodes ... Done.", true); 112 | 113 | return ic; 114 | } 115 | 116 | 117 | 118 | 119 | // a leaf's most informative leaf is itself 120 | // [[Rcpp::export]] 121 | IntegerVector cpp_max_leaves_id(S4 dag, IntegerVector nodes, NumericVector v) { 122 | 123 | List lt_children = dag.slot("lt_children"); 124 | int n = lt_children.size(); 125 | 126 | int m = nodes.size(); 127 | IntegerVector cl(m); 128 | LogicalVector is_leaf(n); 129 | for(int i = 0; i < m; i ++) { 130 | cl[i] = nodes[i]; 131 | _find_connected_leaves(lt_children, nodes[i]-1, is_leaf); 132 | 133 | double max_v = 0; 134 | for(int j = 0; j < n; j ++) { 135 | if(is_leaf[j]) { 136 | if(v[j] > max_v) { 137 | max_v = v[j]; 138 | cl[i] = j+1; 139 | } 140 | } 141 | } 142 | 143 | reset_logical_vector_to_false(is_leaf); 144 | } 145 | 146 | return cl; 147 | } 148 | 149 | -------------------------------------------------------------------------------- /tests/testthat/tests_term.R: -------------------------------------------------------------------------------- 1 | 2 | library(testthat) 3 | 4 | 5 | ## export all functions 6 | if(!identical(topenv(), .GlobalEnv)) { 7 | pkg_env = asNamespace("simona") 8 | all_objs = ls(envir = pkg_env) 9 | for(obj in all_objs) { 10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE)) 11 | } 12 | } 13 | 14 | #### test a small dag 15 | 16 | # b--d--f 17 | # / \ 18 | # a---c--e 19 | # upstream -> downstream 20 | 21 | parents = c("a", "a", "b", "b", "c", "d") 22 | children = c("b", "c", "c", "d", "e", "f") 23 | 24 | dag = create_ontology_DAG(parents, children) 25 | 26 | 27 | test_that("test IC_universal", { 28 | expect_equal( 29 | IC_universal(dag, use_cache = FALSE), 30 | -log(c(1, 1/2, 1/8, 1/4, 1/8, 1/4)) 31 | ) 32 | }) 33 | 34 | test_that("test reachability", { 35 | expect_equal( 36 | reachability(dag, use_cache = FALSE), 37 | c(3, 2, 1, 1, 1, 1) 38 | ) 39 | }) 40 | 41 | test_that("test totipotency", { 42 | expect_equal( 43 | totipotency(dag, use_cache = FALSE), 44 | c(1, 5/6, 1/3, 1/3, 1/6, 1/6) 45 | ) 46 | }) 47 | 48 | test_that("test IC_Meng_2012", { 49 | expect_equal( 50 | IC_Meng_2012(dag, correct = FALSE, FALSE), 51 | c(0, 0, log(2)/log(3)*(1-log(4/3)/log(6)), log(2)/log(3)*(1-log(4/3)/log(6)), 1, 1) 52 | ) 53 | expect_equal( 54 | IC_Meng_2012(dag, correct = TRUE, FALSE), 55 | c(0, log(1+1)/log(3+1)*(1-log(8/3)/log(6)), log(2+1)/log(3+1)*(1-log(4/3)/log(6)), log(2+1)/log(3+1)*(1-log(4/3)/log(6)), 1, 1) 56 | ) 57 | }) 58 | IC_Zhou_2008(dag, use_cache = FALSE) 59 | IC_Seco_2004(dag, use_cache = FALSE) 60 | IC_Zhang_2006(dag, use_cache = FALSE) 61 | # IC_Seddiqui_2010(dag, use_cache = FALSE) 62 | IC_Sanchez_2011(dag, use_cache = FALSE) 63 | 64 | test_that("test IC_Wang_2007", { 65 | expect_error( 66 | IC_Wang_2007(dag, use_cache = FALSE), 67 | "not set" 68 | ) 69 | }) 70 | 71 | 72 | 73 | parents = c("a", "a", "b", "b", "c", "d") 74 | children = c("b", "c", "c", "d", "e", "f") 75 | 76 | annotation = list( 77 | "a" = 1:3, 78 | "b" = 3:4, 79 | "c" = 5, 80 | "d" = 7, 81 | "e" = 4:7, 82 | "f" = 8 83 | ) 84 | 85 | annotation2 = list( 86 | "a" = c(1, 2, 3, 4, 5, 7, 6, 8), 87 | "b" = c(3, 4, 5, 7, 6, 8), 88 | "c" = c(5, 4, 6, 7), 89 | "d" = 7:8, 90 | "e" = 4:7, 91 | "f" = 8 92 | ) 93 | 94 | dag1 = create_ontology_DAG(parents, children, annotation = annotation) 95 | dag2 = create_ontology_DAG(parents, children, annotation = annotation2) 96 | 97 | remove_attr = function(x) { 98 | attributes(x) = NULL 99 | x 100 | } 101 | 102 | test_that("test IC_annotation", { 103 | expect_equal( 104 | n_annotations(dag1, use_cache = FALSE), 105 | n_annotations(dag2, use_cache = FALSE) 106 | ) 107 | expect_equal( 108 | remove_attr(IC_annotation(dag1, use_cache = FALSE)), 109 | -c(log(8/8), log(6/8), log(4/8), log(2/8), log(4/8), log(1/8)) 110 | ) 111 | }) 112 | 113 | ##################### 114 | # b--d--f 115 | # / \ 116 | # a---c--e 117 | # upstream -> downstream 118 | 119 | parents = c("a", "a", "b", "b", "c", "d") 120 | children = c("b", "c", "c", "d", "e", "f") 121 | 122 | 123 | dag = create_ontology_DAG(parents, children, relations = c("isa", "part of", "isa", "part of", "isa", "part of"), 124 | annotation = annotation) 125 | test_that("test IC_Wang_2007", { 126 | expect_equal( 127 | IC_Wang_2007(dag, c("is_a" = 0.7, "part of" = 0.6), use_cache = FALSE), 128 | c(1, 1.7, 2.3, 2.02, 2.61, 2.212) 129 | ) 130 | }) 131 | 132 | library(igraph) 133 | g = dag_as_igraph(dag) 134 | E(g)$weight = c("is_a" = 0.7, "part_of" = 0.6)[E(g)$relation] 135 | d = distances(g, mode = "out", weights = -log(E(g)$weight)) 136 | s = exp(-d) 137 | test_that("test IC_Wang_2007 and shortest path weighted by 1/w", { 138 | expect_equal( 139 | IC_Wang_2007(dag, c("is_a" = 0.7, "part of" = 0.6), use_cache = FALSE), 140 | unname(colSums(s)) 141 | ) 142 | }) 143 | 144 | ### test annotation 145 | dag = create_ontology_DAG_from_GO_db("BP", org_db = "org.Hs.eg.db") 146 | n = n_annotations(dag) 147 | test_that("test n_annotations", { 148 | for(i in 1:10) { 149 | x = sample(dag@terms, 1) 150 | an = dag_ancestors(dag, x) 151 | expect_true( 152 | all(n[an] >= n[x]) 153 | ) 154 | } 155 | }) 156 | 157 | if(FALSE) { 158 | 159 | dag = create_ontology_DAG_from_GO_db("BP", org_db = "org.Hs.eg.db") 160 | lt = lapply(all_ic_methods(), function(method) { 161 | cat("=====", method, "=====\n") 162 | term_IC(dag, method) 163 | }) 164 | names(lt) = all_ic_methods() 165 | 166 | df = as.data.frame(lt) 167 | pairs(df, pch = ".", col = dag_depth(dag)) 168 | 169 | } 170 | -------------------------------------------------------------------------------- /tests/testthat/tests_common_ancestor.R: -------------------------------------------------------------------------------- 1 | 2 | library(testthat) 3 | 4 | 5 | ## export all functions 6 | if(!identical(topenv(), .GlobalEnv)) { 7 | pkg_env = asNamespace("simona") 8 | all_objs = ls(envir = pkg_env) 9 | for(obj in all_objs) { 10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE)) 11 | } 12 | } 13 | 14 | #### test a small dag 15 | 16 | # b--d--f 17 | # / \ 18 | # a---c--e 19 | # upstream -> downstream 20 | 21 | parents = c("a", "a", "b", "b", "c", "d") 22 | children = c("b", "c", "c", "d", "e", "f") 23 | 24 | dag = create_ontology_DAG(parents, children) 25 | 26 | test_that("test cpp_max_ancestor_v", { 27 | m = max_ancestor_v(dag, 1:6, dag_depth(dag)) 28 | expect_equal( 29 | m[upper.tri(m, diag = TRUE)], 30 | c(0, 0, 1, 0, 1, 2, 0, 1, 1, 2, 0, 1, 2, 1, 3, 0, 1, 1, 2, 1, 3) 31 | ) 32 | 33 | m = max_ancestor_v(dag, c(2, 4, 5, 3), dag_depth(dag)) 34 | expect_equal( 35 | m[upper.tri(m, diag = TRUE)], 36 | c(1, 1, 2, 1, 1, 3, 1, 1, 2, 2) 37 | ) 38 | }) 39 | 40 | test_that("test cpp_max_ancestor_id", { 41 | m = max_ancestor_id(dag, 1:6, dag_depth(dag)) 42 | expect_equal( 43 | m[upper.tri(m, diag = TRUE)], 44 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6) 45 | ) 46 | 47 | m = max_ancestor_id(dag, c(2, 4, 5, 3), dag_depth(dag)) 48 | expect_equal( 49 | m[upper.tri(m, diag = TRUE)], 50 | c(2, 2,4, 2, 2, 5, 2, 2, 3, 3) 51 | ) 52 | 53 | m = max_ancestor_id(dag, 1:6, rep(0, 6)) 54 | expect_equal( 55 | m[upper.tri(m, diag = TRUE)], 56 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6) 57 | ) 58 | 59 | }) 60 | 61 | test_that("test cpp_distances", { 62 | m = shortest_distances_via_NCA(dag, 1:6) 63 | expect_equal( 64 | m[upper.tri(m, diag = TRUE)], 65 | c(0, 1, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 3, 0, 3, 2, 3, 1, 4, 0) 66 | ) 67 | 68 | m2 = shortest_distances_via_NCA(dag, c(2, 4, 5, 3)) 69 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)] 70 | expect_equal(m2, m3) 71 | 72 | m = longest_distances_via_LCA(dag, 1:6) 73 | expect_equal( 74 | m[upper.tri(m, diag = TRUE)], 75 | c(0, 1, 0, 2, 1, 0, 2, 1, 2, 0, 3, 2, 1, 3, 0, 3, 2, 3, 1, 4, 0) 76 | ) 77 | 78 | m2 = longest_distances_via_LCA(dag, c(2, 4, 5, 3)) 79 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)] 80 | expect_equal(m2, m3) 81 | 82 | lt = cpp_longest_distances_from_LCA(dag, 1:6) 83 | m = longest_distances_via_LCA(dag, 1:6) 84 | dimnames(m) = NULL 85 | 86 | expect_equal(m, lt$left + lt$right) 87 | }) 88 | 89 | test_that("test distance_directed", { 90 | m = longest_distances_directed(dag, 1:6) 91 | expect_equal( 92 | m[upper.tri(m, diag = TRUE)], 93 | c(0, 1, 0, 2, 1, 0, 2, 1, -1, 0, 3, 2, 1, -1, 0, 3, 2, -1, 1, -1, 0) 94 | ) 95 | 96 | m2 = longest_distances_directed(dag, c(2, 4, 5, 3)) 97 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)] 98 | expect_equal(m2, m3) 99 | 100 | expect_equal( 101 | m[lower.tri(m, diag = FALSE)], 102 | rep(-1, 15) 103 | ) 104 | 105 | m = shortest_distances_directed(dag, 1:6) 106 | expect_equal( 107 | m[upper.tri(m, diag = TRUE)], 108 | c(0, 1, 0, 1, 1, 0, 2, 1, -1, 0, 2, 2, 1, -1, 0, 3, 2, -1, 1, -1, 0) 109 | ) 110 | 111 | m2 = shortest_distances_directed(dag, c(2, 4, 5, 3)) 112 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)] 113 | expect_equal(m2, m3) 114 | 115 | expect_equal( 116 | m[lower.tri(m, diag = FALSE)], 117 | rep(-1, 15) 118 | ) 119 | }) 120 | 121 | 122 | test_that("test cpp_nearest_common_ancestor", { 123 | m = cpp_nearest_common_ancestor(dag, 1:6) 124 | expect_equal( 125 | m[upper.tri(m, diag = TRUE)], 126 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6) 127 | ) 128 | 129 | m2 = cpp_nearest_common_ancestor(dag, c(2, 4, 5, 3)) 130 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)] 131 | expect_equal(m2, m3) 132 | }) 133 | 134 | 135 | test_that("compare cpp_nearest_common_ancestor and cpp_max_ancestor_v", { 136 | parents = c("a", "b", "c", "c", "d", "f", "b", "b") 137 | children = c("b", "c", "d", "f", "e", "g", "e", "g") 138 | dag = create_ontology_DAG(parents, children) 139 | depth = dag_depth(dag) 140 | 141 | m1 = max_ancestor_id(dag, 1:7, depth) 142 | m2 = cpp_nearest_common_ancestor(dag, 1:7) 143 | 144 | expect_equal(m1[5, 7], 3) 145 | expect_equal(m2[5, 7], 2) 146 | 147 | expect_equal(m1[4, 6], 3) 148 | expect_equal(m2[4, 6], 3) 149 | }) 150 | 151 | 152 | 153 | if(FALSE) { 154 | 155 | dag = create_ontology_DAG_from_GO_db() 156 | 157 | system.time(d <- LCA_term(dag, dag@terms)); rm(d); gc(); 158 | system.time(d <- LCA_depth(dag, dag@terms)); rm(d); gc(); 159 | system.time(d <- NCA_term(dag, dag@terms)); rm(d); gc(); 160 | system.time(d <- MICA_term(dag, dag@terms, "IC_universal")); rm(d); gc(); 161 | system.time(d <- MICA_IC(dag, dag@terms, "IC_universal")); rm(d); gc(); 162 | 163 | } 164 | 165 | -------------------------------------------------------------------------------- /man/common_ancestor.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/common_ancestor.R 3 | \name{MICA_term} 4 | \alias{MICA_term} 5 | \alias{MICA_IC} 6 | \alias{LCA_term} 7 | \alias{LCA_depth} 8 | \alias{NCA_term} 9 | \alias{max_ancestor_v} 10 | \alias{max_ancestor_id} 11 | \alias{max_ancestor_path_sum} 12 | \alias{CA_terms} 13 | \title{Various types of common ancestors} 14 | \usage{ 15 | MICA_term( 16 | dag, 17 | terms, 18 | IC_method, 19 | in_labels = TRUE, 20 | distance = "longest", 21 | verbose = simona_opt$verbose 22 | ) 23 | 24 | MICA_IC(dag, terms, IC_method, verbose = simona_opt$verbose) 25 | 26 | LCA_term( 27 | dag, 28 | terms, 29 | in_labels = TRUE, 30 | distance = "longest", 31 | verbose = simona_opt$verbose 32 | ) 33 | 34 | LCA_depth(dag, terms, verbose = simona_opt$verbose) 35 | 36 | NCA_term(dag, terms, in_labels = TRUE, verbose = simona_opt$verbose) 37 | 38 | max_ancestor_v(dag, terms, value, verbose = simona_opt$verbose) 39 | 40 | max_ancestor_id( 41 | dag, 42 | terms, 43 | value, 44 | in_labels = FALSE, 45 | distance = "longest", 46 | verbose = simona_opt$verbose 47 | ) 48 | 49 | max_ancestor_path_sum( 50 | dag, 51 | terms, 52 | value, 53 | add_v, 54 | distance = "longest", 55 | verbose = simona_opt$verbose 56 | ) 57 | 58 | CA_terms(dag, term1, term2, in_labels = TRUE) 59 | } 60 | \arguments{ 61 | \item{dag}{An \code{ontology_DAG} object.} 62 | 63 | \item{terms}{A vector of term names.} 64 | 65 | \item{IC_method}{An IC method. Valid values are in \code{\link[=all_term_IC_methods]{all_term_IC_methods()}}.} 66 | 67 | \item{in_labels}{Whether the terms are represented in their names or as integer indices?} 68 | 69 | \item{distance}{If there are multiple LCA or MICA of two terms, whether to take the one with 70 | the longest distance of shortest distance to the two terms. Possible values are "longest" and "shortest".} 71 | 72 | \item{verbose}{Whether to print messages.} 73 | 74 | \item{value}{A numeric vector. The elements should corrrespond to terms in \code{dag_all_terms()} (should have the same length as the number of terms in the DAG).} 75 | 76 | \item{add_v}{Values to be added along the path to the MICA or LCA. The same format as \code{value}.} 77 | 78 | \item{term1}{A single term ID.} 79 | 80 | \item{term2}{A single term ID.} 81 | } 82 | \value{ 83 | \itemize{ 84 | \item \code{MICA_term()} returns an integer or a character matrix of the MICA terms depending on the value of \code{in_labels}. 85 | \item \code{MICA_IC()} returns a numeric matrix of the IC of the MICA terms. 86 | \item \code{LCA_term()} returns an integer or a character matrix of the LCA term depending on the value of \code{in_labels}. 87 | \item \code{LCA_depth()} returns an integer matrix of the depth of the LCA terms. 88 | \item \code{NCA_term()} returns an integer or a character matrix of the NCA term depending on the value of \code{in_labels}. The shortest distance from NCA terms can be calculated by \code{\link[=shortest_distances_via_NCA]{shortest_distances_via_NCA()}}. 89 | \item \code{max_ancestor_v()} returns a numeric matrix. 90 | \item \code{max_ancestor_id()} returns an integer or a character matrix. 91 | \item \code{CA_terms()} returns a vector of term IDs. 92 | } 93 | } 94 | \description{ 95 | Various types of common ancestors 96 | } 97 | \details{ 98 | There are the following three types of common ancestors: 99 | \itemize{ 100 | \item MICA (most informative common ancestor): The common ancestor with the highest IC value. 101 | \item LCA (lowest common ancestor): The common ancestor with the largest depth (The depth of a term is the maximal distance from the root term). If there are multiple ancestors having 102 | the same max depth, the ancestor with the smallest distance to the two terms is used. 103 | \item NCA (nearest common ancestor): The common ancestor with the smallest distance to the two terms. If there are multiple 104 | ancestors with the same smallest distance, the ancestor with the largest depth is used. 105 | } 106 | 107 | \code{max_ancestor_v()} and \code{max_ancestor_id()} are more general functions which return common ancestors with 108 | the highest value in \code{value}. 109 | 110 | Given a path connecting two terms and their MICA/LCA, \code{max_ancestor_path_sum()} calculates the sum of terms along the path. The values 111 | to be added in specified in \code{add_v} argument. 112 | } 113 | \examples{ 114 | parents = c("a", "a", "b", "b", "c", "d") 115 | children = c("b", "c", "c", "d", "e", "f") 116 | dag = create_ontology_DAG(parents, children) 117 | MICA_term(dag, letters[1:6], "IC_universal") 118 | MICA_IC(dag, letters[1:6], "IC_universal") 119 | LCA_term(dag, letters[1:6]) 120 | LCA_depth(dag, letters[1:6]) 121 | NCA_term(dag, letters[1:6]) 122 | CA_terms(dag, "c", "d") 123 | } 124 | -------------------------------------------------------------------------------- /src/traverse.h: -------------------------------------------------------------------------------- 1 | #ifndef __TRAVERSE__ 2 | #define __TRAVERSE__ 3 | 4 | extern const int SET_UNION; 5 | extern const int SET_INTERSECT; 6 | extern const int SET_UNIQU_IN_1; 7 | extern const int SET_UNIQU_IN_2; 8 | 9 | void _add_parents(List lt_parents, int i_node, LogicalVector& l_ancestors); 10 | void _add_parents_within_background(List lt_parents, int i_node, LogicalVector& l_ancestors, LogicalVector l_background); 11 | void _find_ancestors(List lt_parents, int i_node, LogicalVector& l_ancestors, bool include_self = false); 12 | void _find_ancestors_with_background(List lt_parents, int i_node, LogicalVector& l_ancestors, LogicalVector l_background, bool include_self = false); 13 | IntegerVector cpp_ancestors(S4 dag, int node, bool include_self = false); 14 | IntegerVector cpp_ancestors_within_background(S4 dag, int node, IntegerVector background, bool include_self = false); 15 | void _add_children(List lt_children, int i_node, LogicalVector& l_offspring); 16 | void _add_children_within_background(List lt_children, int i_node, LogicalVector& l_offspring, LogicalVector l_background); 17 | void _find_offspring(List lt_children, int i_node, LogicalVector& l_offspring, bool include_self = false); 18 | void _find_offspring_within_background(List lt_children, int i_node, LogicalVector& l_offspring, LogicalVector l_background, bool include_self = false); 19 | IntegerVector cpp_offspring(S4 dag, int node, bool include_self = false); 20 | IntegerVector cpp_offspring_within_background(S4 dag, int node, IntegerVector background, bool include_self = false); 21 | void _add_leaves(List lt_children, int i_node, LogicalVector& l_offspring); 22 | void _find_connected_leaves(List lt_children, int i_node, LogicalVector& l_offspring); 23 | 24 | IntegerVector cpp_n_ancestors(S4 dag, bool include_self = false); 25 | IntegerVector cpp_n_ancestors_on_tree(S4 dag, bool include_self = false); 26 | IntegerVector cpp_n_offspring(S4 dag, bool include_self = false); 27 | IntegerVector cpp_n_offspring_on_tree(S4 dag, bool include_self = false); 28 | IntegerVector cpp_n_leaves(S4 dag); 29 | IntegerVector cpp_ancestors_of_a_group(S4 dag, IntegerVector nodes, int type = 1, bool include_self = false); 30 | IntegerVector cpp_ancestors_of_a_group_within_background(S4 dag, IntegerVector nodes, IntegerVector background, int type = 1, bool include_self = false); 31 | IntegerVector cpp_ancestors_of_two_groups(S4 dag, IntegerVector nodes1, IntegerVector nodes2, int type, bool include_self = false); 32 | IntegerVector cpp_offspring_of_a_group(S4 dag, IntegerVector nodes, bool include_self = false); 33 | LogicalMatrix cpp_is_reachable(S4 dag, IntegerVector nodes, bool directed = false); 34 | 35 | IntegerVector cpp_dag_depth(S4 dag); 36 | IntegerVector cpp_dag_longest_dist_to_offspring(S4 dag, IntegerVector from_node, LogicalVector l_background = LogicalVector(0)); 37 | IntegerVector cpp_dag_longest_dist_to_offspring(S4 dag, int from_node, LogicalVector l_background = LogicalVector(0)); 38 | IntegerVector cpp_dag_shortest_dist_to_offspring(S4 dag, IntegerVector from_node, LogicalVector l_background = LogicalVector(0)); 39 | IntegerVector cpp_dag_shortest_dist_to_offspring(S4 dag, int from_node, LogicalVector l_background = LogicalVector(0)); 40 | IntegerVector cpp_dag_height(S4 dag); 41 | IntegerVector cpp_dag_longest_dist_from_ancestors(S4 dag, IntegerVector to_node, LogicalVector l_background = LogicalVector(0)); 42 | IntegerVector cpp_dag_longest_dist_from_ancestors(S4 dag, int to_node, LogicalVector l_background = LogicalVector(0)); 43 | IntegerVector cpp_dag_shortest_dist_from_ancestors(S4 dag, IntegerVector to_node, LogicalVector l_background = LogicalVector(0)); 44 | IntegerVector cpp_dag_shortest_dist_from_ancestors(S4 dag, int to_node, LogicalVector l_background = LogicalVector(0)); 45 | 46 | 47 | NumericVector cpp_dag_longest_path_to_offspring_sum_value(S4 dag, IntegerVector from_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 48 | NumericVector cpp_dag_shortest_path_to_offspring_sum_value(S4 dag, IntegerVector from_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 49 | NumericVector cpp_dag_longest_path_from_ancestors_sum_value(S4 dag, IntegerVector to_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 50 | NumericVector cpp_dag_shortest_path_from_ancestors_sum_value(S4 dag, IntegerVector to_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 51 | NumericVector cpp_dag_longest_path_to_offspring_sum_value(S4 dag, int from_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 52 | NumericVector cpp_dag_shortest_path_to_offspring_sum_value(S4 dag, int from_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 53 | NumericVector cpp_dag_longest_path_from_ancestors_sum_value(S4 dag, int to_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 54 | NumericVector cpp_dag_shortest_path_from_ancestors_sum_value(S4 dag, int to_node, NumericVector value, LogicalVector l_background = LogicalVector(0)); 55 | 56 | #endif 57 | -------------------------------------------------------------------------------- /src/annotation.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | #include "traverse.h" 5 | #include "utils.h" 6 | 7 | // [[Rcpp::export]] 8 | IntegerVector cpp_n_annotations(S4 dag, bool unify = true) { 9 | 10 | List lt_children = dag.slot("lt_children"); 11 | List annotation = dag.slot("annotation"); 12 | List lt_annotation = annotation["list"]; 13 | CharacterVector anno_names = annotation["names"]; 14 | int n_all_anno = anno_names.size(); 15 | 16 | int n = lt_children.size(); 17 | IntegerVector n_anno(n, 0); 18 | 19 | IntegerVector anno_size(n); 20 | if(!unify) { 21 | for(int i = 0; i < n; i ++) { 22 | IntegerVector anno = lt_annotation[i]; 23 | anno_size[i] = anno.size(); 24 | } 25 | } 26 | 27 | LogicalVector l_offspring(n, false); 28 | for(int i = 0; i < n; i ++) { 29 | _find_offspring(lt_children, i, l_offspring, true); //include self 30 | 31 | if(unify) { 32 | LogicalVector l_anno(n_all_anno, false); 33 | for(int j = 0; j < n; j ++) { 34 | if(l_offspring[j]) { 35 | IntegerVector anno = lt_annotation[j]; 36 | for(int k = 0; k < anno.size(); k ++) { 37 | l_anno[anno[k]-1] = true; 38 | } 39 | } 40 | } 41 | n_anno[i] = sum(l_anno); 42 | } else { 43 | IntegerVector anno_sub = anno_size[l_offspring]; 44 | if(anno_sub.size()) { 45 | n_anno[i] = sum(anno_sub); 46 | } else { 47 | n_anno[i] = 0; 48 | } 49 | } 50 | 51 | reset_logical_vector_to_false(l_offspring); 52 | } 53 | 54 | return n_anno; 55 | } 56 | 57 | 58 | // [[Rcpp::export]] 59 | IntegerVector cpp_n_annotations_with_intersect(S4 dag, IntegerVector anno_id) { 60 | 61 | List lt_children = dag.slot("lt_children"); 62 | List annotation = dag.slot("annotation"); 63 | List lt_annotation = annotation["list"]; 64 | CharacterVector anno_names = annotation["names"]; 65 | int n_all_anno = anno_names.size(); 66 | 67 | int n = lt_children.size(); 68 | IntegerVector n_anno(n, 0); 69 | 70 | int m = anno_id.size(); 71 | 72 | if(m == 0) { 73 | return n_anno; 74 | } 75 | 76 | LogicalVector l_offspring(n, false); 77 | for(int i = 0; i < n; i ++) { 78 | _find_offspring(lt_children, i, l_offspring, true); //include self 79 | 80 | LogicalVector l_anno(n_all_anno, false); 81 | for(int j = 0; j < n; j ++) { 82 | if(l_offspring[j]) { 83 | IntegerVector anno = lt_annotation[j]; 84 | for(int k = 0; k < anno.size(); k ++) { 85 | l_anno[anno[k]-1] = true; 86 | } 87 | } 88 | } 89 | 90 | for(int k = 0; k < m; k ++) { 91 | if(l_anno[ anno_id[k]-1 ]) { 92 | n_anno[i] ++; 93 | } 94 | } 95 | 96 | reset_logical_vector_to_false(l_offspring); 97 | } 98 | 99 | return n_anno; 100 | } 101 | 102 | 103 | // [[Rcpp::export]] 104 | IntegerMatrix cpp_get_term_annotations(S4 dag, IntegerVector nodes) { 105 | List lt_children = dag.slot("lt_children"); 106 | List annotation = dag.slot("annotation"); 107 | List lt_annotation = annotation["list"]; 108 | CharacterVector anno_names = annotation["names"]; 109 | int n_all_anno = anno_names.size(); 110 | int n = lt_children.size(); 111 | int m = nodes.size(); 112 | 113 | IntegerMatrix mat(m, n_all_anno); 114 | 115 | LogicalVector l_offspring(n, false); 116 | for(int i = 0; i < m; i ++) { 117 | _find_offspring(lt_children, nodes[i]-1, l_offspring, true); //include self 118 | 119 | // LogicalVector l_anno(n_all_anno, false); 120 | for(int j = 0; j < n; j ++) { 121 | if(l_offspring[j]) { 122 | IntegerVector anno = lt_annotation[j]; 123 | for(int k = 0; k < anno.size(); k ++) { 124 | // l_anno[anno[k]-1] = true; 125 | mat(i, anno[k]-1) = 1; 126 | } 127 | } 128 | } 129 | reset_logical_vector_to_false(l_offspring); 130 | } 131 | 132 | return mat; 133 | } 134 | 135 | 136 | // given an item id, return the terms also the ancestors annotated with this item 137 | IntegerVector cpp_get_annotated_terms(S4 dag, int anno_id) { 138 | List lt_children = dag.slot("lt_children"); 139 | List lt_parents = dag.slot("lt_parents"); 140 | 141 | List annotation = dag.slot("annotation"); 142 | List lt_annotation = annotation["list"]; 143 | 144 | int n = lt_parents.size(); 145 | 146 | IntegerVector x(n); 147 | LogicalVector l_ancestors(n, false); 148 | for(int i = 0; i < n; i ++) { 149 | IntegerVector anno = lt_annotation[i]; 150 | if(anno.size() || x[i] == 0) { 151 | for(int ia = 0; ia < anno.size(); ia ++) { 152 | if(anno[ia] == anno_id) { 153 | _find_ancestors(lt_parents, i, l_ancestors, true); 154 | for(int j = 0; j < n; j ++) { 155 | if(l_ancestors[j]) { 156 | x[j] = 1; 157 | } 158 | } 159 | break; 160 | } 161 | } 162 | } 163 | 164 | reset_logical_vector_to_false(l_ancestors); 165 | } 166 | 167 | return x; 168 | } 169 | 170 | // [[Rcpp::export]] 171 | IntegerMatrix cpp_get_annotated_terms(S4 dag, IntegerVector anno_id) { 172 | int m = anno_id.size(); 173 | int n = dag.slot("n_terms"); 174 | IntegerMatrix x(m, n); 175 | 176 | for(int i = 0; i < m; i ++) { 177 | x(i, _) = cpp_get_annotated_terms(dag, anno_id[i]); 178 | } 179 | 180 | return x; 181 | 182 | } 183 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | 2 | term_to_node_id = function(dag, term, strict = TRUE, add_name = FALSE) { 3 | if(is.numeric(term)) { 4 | id = term 5 | } else if(length(term) == 1) { 6 | i = which(dag@terms == term) 7 | 8 | if(length(i) == 0 && length(dag@alternative_terms)) { 9 | term2 = dag@alternative_terms[term] 10 | if(is.na(term2)) { 11 | stop("Cannot find term: ", term) 12 | } else { 13 | i = which(dag@terms == term2) 14 | } 15 | } 16 | 17 | if(length(i) == 0) { 18 | stop("Cannot find term: ", term) 19 | } 20 | 21 | id = i 22 | } else if(length(term) > 1) { 23 | unique_term = unique(term) 24 | l = dag@terms %in% unique_term 25 | if(sum(l) < length(unique_term) && length(dag@alternative_terms)) { 26 | unique_term2 = dag@alternative_terms[setdiff(unique_term, dag@terms[l])] 27 | unique_term2 = unique_term2[!is.na(unique_term2)] 28 | l2 = dag@terms %in% unique_term2 29 | l = l | l2 30 | } 31 | i = which(l) 32 | 33 | if(length(i) == 0) { 34 | stop("Cannot find all these terms.") 35 | } 36 | 37 | if(length(i) != length(unique_term)) { 38 | if(strict) { 39 | stop("Cannot find some of the terms in the DAG.") 40 | } else { 41 | message("removed ", length(unique_term) - length(i), " terms that cannot be found in the DAG.") 42 | } 43 | } 44 | 45 | id = unname(structure(i, names = dag@terms[i])[intersect(term, dag@terms[i])]) 46 | } 47 | if(add_name) { 48 | structure(id, names = dag@terms[id]) 49 | } else { 50 | id 51 | } 52 | } 53 | 54 | 55 | #' @importFrom GetoptLong qq 56 | check_pkg = function(pkg, bioc = FALSE, github = NULL) { 57 | if(requireNamespace(pkg, quietly = TRUE)) { 58 | return(NULL) 59 | } else { 60 | 61 | if(!interactive()) { 62 | if(bioc) { 63 | stop_wrap(qq("You need to manually install package '@{pkg}' from Bioconductor.")) 64 | } else { 65 | stop_wrap(qq("You need to manually install package '@{pkg}' from CRAN.")) 66 | } 67 | } 68 | } 69 | } 70 | 71 | 72 | stop_wrap = function (...) { 73 | x = paste0(...) 74 | x = paste(strwrap(x), collapse = "\n") 75 | stop(x, call. = FALSE) 76 | } 77 | 78 | warning_wrap = function (...) { 79 | x = paste0(...) 80 | x = paste(strwrap(x), collapse = "\n") 81 | warning(x, call. = FALSE) 82 | } 83 | 84 | message_wrap = function (...) { 85 | x = paste0(...) 86 | x = paste(strwrap(x), collapse = "\n") 87 | message(x) 88 | } 89 | 90 | #' @importFrom grDevices col2rgb rgb 91 | add_transparency = function (col, transparency = 0, alpha = TRUE) { 92 | if(alpha) { 93 | rgb(t(col2rgb(col)/255), alpha = 1 - transparency) 94 | } else { 95 | 96 | m = col2rgb(col) 97 | m = 255 - (255-m)*(1-transparency) 98 | rgb(t(m), maxColorValue = 255) 99 | 100 | } 101 | } 102 | 103 | 104 | lt_children_to_lt_parents = function(lt_children) { 105 | n = length(lt_children) 106 | parents = rep(seq_len(n), times = vapply(lt_children, length, FUN.VALUE = integer(1))) 107 | children = unlist(lt_children) 108 | 109 | lt = split(parents, children) 110 | 111 | lt_parents = rep(list(integer(0))) 112 | lt_parents[ as.integer(names(lt)) ] = lt 113 | 114 | lt_parents 115 | } 116 | 117 | lt_parents_to_lt_children = function(lt_parents) { 118 | n = length(lt_parents) 119 | children = rep(seq_len(n), times = vapply(lt_parents, length, FUN.VALUE = integer(1))) 120 | parents = unlist(lt_parents) 121 | 122 | lt = split(children, parents) 123 | 124 | lt_children = rep(list(integer(0))) 125 | lt_children[ as.integer(names(lt)) ] = lt 126 | 127 | lt_children 128 | } 129 | 130 | dag_is_tree = function(dag) { 131 | n_terms = dag@n_terms 132 | n_relations = sum(vapply(dag@lt_children, length, FUN.VALUE = integer(1))) 133 | 134 | n_terms == n_relations + 1 135 | } 136 | 137 | 138 | merge_offspring_relation_types = function(relations_DAG, relations) { 139 | if(length(relations) == 0) { 140 | return(relations) 141 | } 142 | 143 | r1 = relations_DAG@terms 144 | rc = intersect(r1, relations) 145 | 146 | if(length(rc)) { 147 | unique(c(setdiff(relations, rc), dag_offspring(relations_DAG, rc, include_self = TRUE))) 148 | } else { 149 | relations 150 | } 151 | } 152 | 153 | 154 | # all offspring types are assigned to the same value 155 | extend_contribution_factor = function(relations_DAG, contribution_factor) { 156 | 157 | cf = contribution_factor 158 | 159 | if(is.null(relations_DAG)) { 160 | return(cf) 161 | } 162 | 163 | for(nm in names(contribution_factor)) { 164 | if(nm %in% relations_DAG@terms) { 165 | offspring = dag_offspring(relations_DAG, nm) 166 | if(length(offspring)) { 167 | cf[offspring] = contribution_factor[nm] 168 | } 169 | } 170 | } 171 | 172 | cf 173 | } 174 | 175 | 176 | normalize_relation_type = function(x) { 177 | 178 | x = tolower(x) 179 | x = gsub("[- ~]", "_", x) 180 | x[x == "isa"] = "is_a" 181 | x[x == "part_a"] = "part_of" 182 | 183 | x 184 | } 185 | 186 | exec_under_message_condition = function(code, verbose = TRUE, envir = parent.frame()) { 187 | if(verbose) { 188 | eval(code, envir = envir) 189 | } else { 190 | suppressMessages(eval(code, envir = envir)) 191 | } 192 | } 193 | 194 | -------------------------------------------------------------------------------- /vignettes/v02_GO.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Gene Ontology" 3 | author: "Zuguang Gu ( z.gu@dkfz.de )" 4 | date: '`r Sys.Date()`' 5 | output: html_vignette 6 | --- 7 | 8 | ```{r, echo = FALSE, message = FALSE} 9 | library(knitr) 10 | knitr::opts_chunk$set( 11 | error = FALSE, 12 | tidy = FALSE, 13 | message = FALSE, 14 | warning = FALSE, 15 | fig.align = "center") 16 | ``` 17 | 18 | 19 | Gene Ontology is the most widely used bio-ontologies. On Bioconductor, there 20 | are standard packages for GO (**GO.db**) and organism-specific GO annotation packages 21 | (**org.\*.db**). In **simona**, there is a helper function 22 | `create_ontology_DAG_from_GO_db()` which makes use of the Biocoductor standard 23 | GO packages and constructs a DAG object automatically. 24 | 25 | ## Create the GO DAG object 26 | 27 | GO has three namespaces (or ontologies): biological process (BP), molecular 28 | function (MF) and celullar component (CC). The three GO namespaces are 29 | mutually exclusive, so the first argument of 30 | `create_ontology_DAG_from_GO_db()` is the GO namespace. 31 | 32 | ```{r} 33 | library(simona) 34 | dag = create_ontology_DAG_from_GO_db("BP") 35 | dag 36 | ``` 37 | 38 | There are three main GO relations: "is_a", "part_of" and "regulates". 39 | "regulates" has two child relation types in GO: "negatively_regulates" and 40 | "positively_regulates". So when "regulates" is selected, the two child 41 | relation types are automatically selected. By default only "is_a" and "part_of" are selected. 42 | 43 | You can set a subset of relation types with the argument `relations`. 44 | 45 | ```{r} 46 | create_ontology_DAG_from_GO_db("BP", relations = c("part of", "regulates")) # "part_of" is also OK 47 | ``` 48 | 49 | "is_a" is always selected because this is primary semantic relation type. So 50 | if you only want to include "is_a" relation, you can assign an empty vector to 51 | `relations`: 52 | 53 |
     54 | create_ontology_DAG_from_GO_db("BP", relations = character(0)) # or NULL, NA
     55 | 
    56 | 57 | Or you can apply `dag_filter()` after DAG is generated. 58 | 59 |
     60 | dag = create_ontology_DAG_from_GO_db("BP")
     61 | dag_filter(dag, relations = "is_a")
     62 | 
    63 | 64 | ## Add gene annotation 65 | 66 | Gene annotation can be set with the argument `org_db`. The value is an 67 | `OrgDb` object of the corresponding organism. The primary gene ID type in the __org.*.db__ 68 | package is internally used (which is normally the EntreZ ID type). 69 | 70 | ```{r} 71 | library(org.Hs.eg.db) 72 | dag = create_ontology_DAG_from_GO_db("BP", org_db = org.Hs.eg.db) 73 | dag 74 | ``` 75 | 76 | For standard organism packages on Biocoductor, the `OrgDb` object always has 77 | the same name as the package, so the name of the organism package can also be 78 | set to `org_db`: 79 | 80 |
     81 | create_ontology_DAG_from_GO_db("BP", org_db = "org.Hs.eg.db")
     82 | 
    83 | 84 | Similarly, if the analysis is applied on mouse, the mouse organism package can 85 | be set to `org_db`. If the mouse organism package is not installed yet, it 86 | will be installed automatically. 87 | 88 |
     89 | create_ontology_DAG_from_GO_db("BP", org_db = "org.Mm.eg.db")
     90 | 
    91 | 92 | Genes that are annotated to GO terms can be obtained by `term_annotations()`. Note 93 | the genes are automatically merged from offspring terms. 94 | 95 | ```{r} 96 | term_annotations(dag, c("GO:0000002", "GO:0000012")) 97 | ``` 98 | 99 | ## Meta data frame 100 | 101 | There are additional meta columns attached to the DAG object. They can be accessed by `mcols()`. 102 | 103 | ```{r} 104 | head(mcols(dag)) 105 | ``` 106 | 107 | The additional information of GO terms is from the **GO.db** package. The row 108 | order of the meta data frame is the same as in `dag_all_terms(dag)`. 109 | 110 | 111 | ## Session info 112 | 113 | ```{r} 114 | sessionInfo() 115 | ``` 116 | 117 | 118 | 119 | 120 | 121 | 151 | 152 | 153 | -------------------------------------------------------------------------------- /man/dag_viz.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.R 3 | \name{dag_circular_viz} 4 | \alias{dag_circular_viz} 5 | \alias{dag_as_DOT} 6 | \alias{dag_graphviz} 7 | \title{Visualize the DAG} 8 | \usage{ 9 | dag_circular_viz( 10 | dag, 11 | highlight = NULL, 12 | start = 0, 13 | end = 360, 14 | partition_by_level = 1, 15 | partition_by_size = NULL, 16 | node_col = NULL, 17 | node_transparency = 0.4, 18 | node_size = NULL, 19 | edge_col = NULL, 20 | edge_transparency = default_edge_transparency(dag), 21 | legend_labels_from = NULL, 22 | legend_labels_max_width = 50, 23 | other_legends = list(), 24 | use_raster = dag_n_terms(dag) > 10000, 25 | newpage = TRUE, 26 | verbose = simona_opt$verbose 27 | ) 28 | 29 | dag_as_DOT( 30 | dag, 31 | node_param = default_node_param, 32 | edge_param = default_edge_param, 33 | rankdir = c("TB", "LR", "BT", "RL") 34 | ) 35 | 36 | dag_graphviz( 37 | dag, 38 | node_param = default_node_param, 39 | edge_param = default_edge_param, 40 | rankdir = "TB", 41 | ... 42 | ) 43 | } 44 | \arguments{ 45 | \item{dag}{An \code{ontology_Dag} object.} 46 | 47 | \item{highlight}{A vector of terms to be highlighted on the DAG.} 48 | 49 | \item{start}{Start of the circle, measured in degree.} 50 | 51 | \item{end}{End of the circle, measured in degree.} 52 | 53 | \item{partition_by_level}{If \code{node_col} is not set, users can cut the DAG into clusters with different node colors. The partitioning is applied by \code{\link[=partition_by_level]{partition_by_level()}}.} 54 | 55 | \item{partition_by_size}{Similar as \code{partition_by_level}, but the partitioning is applied by \code{\link[=partition_by_size]{partition_by_size()}}.} 56 | 57 | \item{node_col}{Colors of nodes. If the value is a vector, the order should correspond to terms in \code{\link[=dag_all_terms]{dag_all_terms()}}.} 58 | 59 | \item{node_transparency}{Transparency of nodes. The same format as \code{node_col}.} 60 | 61 | \item{node_size}{Size of nodes. The same format as \code{node_col}.} 62 | 63 | \item{edge_col}{A named vector where names correspond to relation types.} 64 | 65 | \item{edge_transparency}{A named vector where names correspond to relation types.} 66 | 67 | \item{legend_labels_from}{If partitioning is applied on the DAG, a legend is generated showing different top 68 | terms. By default, the legend labels are the term IDs. If there are additionally column stored 69 | in the meta data frame of the DAG object, the column name can be set here to replace the term IDs as 70 | legend labels.} 71 | 72 | \item{legend_labels_max_width}{Maximal width of legend labels measured by the number of characters per line. Labels are wrapped into 73 | multiple lines if the widths exceed it.} 74 | 75 | \item{other_legends}{A list of legends generated by \code{\link[ComplexHeatmap:Legend]{ComplexHeatmap::Legend()}}.} 76 | 77 | \item{use_raster}{Whether to first write the circular image into a temporary png file, then add to the plot 78 | as a raster object?} 79 | 80 | \item{newpage}{Whether call \code{\link[grid:grid.newpage]{grid::grid.newpage()}} to create a new plot?} 81 | 82 | \item{verbose}{Whether to print messages.} 83 | 84 | \item{node_param}{A list of parameters. Each parameter has the same format. The value can be 85 | a single scalar, a full length vector with the same order as in \code{\link[=dag_all_terms]{dag_all_terms()}}, 86 | or a named vector that contains a subset of terms that need to be customized. 87 | The full set of parameters can be found at \url{https://graphviz.org/docs/nodes/}.} 88 | 89 | \item{edge_param}{A list of parameters. Each parameter has the same format. The value can be a single 90 | scalar, or a named vector that contains a subset of terms that need to be customized. 91 | The full set of parameters can be found at \url{https://graphviz.org/docs/edges/}. 92 | If the parameter is set to a named vector, it can be named by relation types \code{c("is_a" = ...)}, 93 | or directly relations \code{c("a -> b" = ...)}. Please see the vignette for details.} 94 | 95 | \item{rankdir}{The direction of the layout. Only four values are allowed: \code{"TB"}, \code{"LR"}, \code{"BT"} and \code{"RL"}.} 96 | 97 | \item{...}{Pass to \code{\link[DiagrammeR:grViz]{DiagrammeR::grViz()}}.} 98 | } 99 | \value{ 100 | \code{dag_as_DOT()} returns a vector of DOT code. 101 | } 102 | \description{ 103 | Visualize the DAG 104 | } 105 | \details{ 106 | \code{dag_circular_viz()} uses a circular layout for visualizing large DAGs. \code{dag_graphviz()} 107 | uses a hierarchical layout for visualizing small DAGs. 108 | 109 | \code{dag_as_DOT()} generates the DOT code of the DAG. 110 | 111 | \code{dag_graphviz()} visualizes the DAG with the \strong{DiagrammeR} package. 112 | } 113 | \examples{ 114 | \donttest{ 115 | dag = create_ontology_DAG_from_GO_db() 116 | dag_circular_viz(dag) 117 | } 118 | 1 119 | if(interactive()) { 120 | dag = create_ontology_DAG_from_GO_db() 121 | dag_graphviz(dag[, "GO:0010228"]) 122 | dag_graphviz(dag[, "GO:0010228"], 123 | edge_param = list(color = c("is_a" = "purple", "part_of" = "darkgreen"), 124 | style = c("is_a" = "solid", "part_of" = "dashed")), 125 | width = 800, height = 800) 126 | 127 | # the DOT code for graphviz 128 | dag_as_DOT(dag[, "GO:0010228"]) 129 | } 130 | } 131 | \seealso{ 132 | \url{http://magjac.com/graphviz-visual-editor/} is nice place to try the DOT code. 133 | } 134 | -------------------------------------------------------------------------------- /src/utils.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | using namespace Rcpp; 4 | 5 | 6 | void reset_logical_vector_to_false(LogicalVector& x) { 7 | int n = x.size(); 8 | for(int i = 0; i < n; i ++) { 9 | x[i] = false; 10 | } 11 | } 12 | 13 | void reset_logical_vector_to_true(LogicalVector& x) { 14 | int n = x.size(); 15 | for(int i = 0; i < n; i ++) { 16 | x[i] = true; 17 | } 18 | } 19 | 20 | void fill_logical_vector_to_true(LogicalVector&x, IntegerVector ind) { 21 | for(int i = 0; i < ind.size(); i ++) { 22 | x[ind[i]] = true; 23 | } 24 | } 25 | 26 | void reset_numeric_vector_to_zero(NumericVector& x) { 27 | for(int i = 0; i < x.size(); i ++) { 28 | x[i] = 0; 29 | } 30 | } 31 | 32 | void reset_integer_vector_to_zero(IntegerVector& x) { 33 | for(int i = 0; i < x.size(); i ++) { 34 | x[i] = 0; 35 | } 36 | } 37 | 38 | IntegerVector _dag_depth(S4 dag) { 39 | Environment term_env = dag.slot("term_env"); 40 | IntegerVector depth = term_env["dag_depth"]; 41 | return depth; 42 | } 43 | 44 | 45 | // return the indices where elements in `l` is true 46 | IntegerVector _which(LogicalVector l) { 47 | int n = l.size(); 48 | int n2 = sum(l); 49 | IntegerVector ind(n2); 50 | 51 | if(n2 == 0) { 52 | return(ind); 53 | } 54 | 55 | int i2 = 0; 56 | for(int i = 0; i < n; i ++) { 57 | if(l[i]) { 58 | ind[i2] = i; 59 | i2 ++; 60 | } 61 | } 62 | 63 | return ind; 64 | } 65 | 66 | LogicalVector integer_to_logical_vector(IntegerVector i, int n) { 67 | LogicalVector l(n); 68 | for(int k = 0; k < i.size(); k ++) { 69 | l[i[k]] = true; 70 | } 71 | return l; 72 | } 73 | 74 | 75 | // [[Rcpp::export]] 76 | IntegerVector cpp_match_index(IntegerVector ind1, IntegerVector ind2) { 77 | 78 | // ind1 and ind2 are all sorted, for each i in ind1, assign the index in ind2 which is the nearest larger value to i 79 | int n1 = ind1.size(); 80 | 81 | IntegerVector ind(n1); 82 | 83 | int i2 = 0; 84 | for(int i1 = 0; i1 < n1; i1 ++) { 85 | while( true ) { 86 | if(ind1[i1] < ind2[i2]) { 87 | ind[i1] = ind2[i2]; 88 | break; 89 | } else { 90 | i2 ++; 91 | } 92 | } 93 | } 94 | 95 | return ind; 96 | } 97 | 98 | 99 | void message(String s, bool appendLF = true) { 100 | Function _message("message"); 101 | _message(s, Named("appendLF", appendLF)); 102 | return; 103 | } 104 | 105 | 106 | LogicalVector merge_two_logical_vectors_by_or(LogicalVector l1, LogicalVector l2) { 107 | int n = l1.size(); 108 | LogicalVector l(n); 109 | for(int i = 0; i < n; i ++) { 110 | l[i] = l1[i] || l2[i]; 111 | } 112 | 113 | return l; 114 | } 115 | 116 | LogicalVector merge_two_logical_vectors_by_and(LogicalVector l1, LogicalVector l2) { 117 | int n = l1.size(); 118 | LogicalVector l(n); 119 | for(int i = 0; i < n; i ++) { 120 | l[i] = l1[i] && l2[i]; 121 | } 122 | 123 | return l; 124 | } 125 | 126 | // https://stackoverflow.com/questions/21609934/ordering-permutation-in-rcpp-i-e-baseorder 127 | IntegerVector _order(NumericVector x) { 128 | NumericVector x2 = clone(x); 129 | for(int i = 0; i < x2.size(); i ++) { 130 | x2[i] += (i+1)*1e-8; // to get rid of duplicated values 131 | } 132 | NumericVector sorted = clone(x2).sort(); 133 | return match(sorted, x2) - 1; 134 | } 135 | 136 | IntegerVector _order(IntegerVector x) { 137 | IntegerVector sorted = clone(x).sort(); 138 | return match(sorted, x) - 1; 139 | } 140 | 141 | 142 | // [[Rcpp::export]] 143 | NumericMatrix cross_sum(NumericVector x) { 144 | int n = x.size(); 145 | NumericMatrix m(n, n); 146 | 147 | for(int i = 0; i < n; i ++) { 148 | m(i, i) = x[i] + x[i]; 149 | } 150 | 151 | if(n > 1) { 152 | for(int i = 0; i < n-1; i ++) { 153 | for(int j = i+1; j < n; j ++) { 154 | m(j, i) = m(i, j) = x[i] + x[j]; 155 | } 156 | } 157 | } 158 | 159 | return m; 160 | } 161 | 162 | // [[Rcpp::export]] 163 | NumericMatrix cross_multiply(NumericVector x) { 164 | int n = x.size(); 165 | NumericMatrix m(n, n); 166 | 167 | for(int i = 0; i < n; i ++) { 168 | m(i, i) = x[i] * x[i]; 169 | } 170 | 171 | if(n > 1) { 172 | for(int i = 0; i < n-1; i ++) { 173 | for(int j = i+1; j < n; j ++) { 174 | m(i, j) = m(j, i) = x[i] * x[j]; 175 | } 176 | } 177 | } 178 | 179 | return m; 180 | } 181 | 182 | // [[Rcpp::export]] 183 | NumericMatrix cross_minus(NumericVector x) { 184 | int n = x.size(); 185 | NumericMatrix m(n, n); 186 | 187 | for(int i = 0; i < n; i ++) { 188 | m(i, i) = 0; 189 | } 190 | 191 | if(n > 1) { 192 | for(int i = 0; i < n-1; i ++) { 193 | for(int j = i+1; j < n; j ++) { 194 | m(i, j) = x[i] - x[j]; 195 | m(j, i) = -m(i, j); 196 | } 197 | } 198 | } 199 | 200 | return m; 201 | } 202 | 203 | // [[Rcpp::export]] 204 | NumericMatrix cross_max(NumericVector x) { 205 | int n = x.size(); 206 | NumericMatrix m(n, n); 207 | 208 | for(int i = 0; i < n; i ++) { 209 | m(i, i) = x[i]; 210 | } 211 | 212 | if(n > 1) { 213 | for(int i = 0; i < n-1; i ++) { 214 | for(int j = i+1; j < n; j ++) { 215 | m(j, i) = m(i, j) = x[i] > x[j] ? x[i] : x[j]; 216 | } 217 | } 218 | } 219 | 220 | return m; 221 | } 222 | 223 | // [[Rcpp::export]] 224 | NumericMatrix cross_min(NumericVector x) { 225 | int n = x.size(); 226 | NumericMatrix m(n, n); 227 | 228 | for(int i = 0; i < n; i ++) { 229 | m(i, i) = x[i]; 230 | } 231 | 232 | if(n > 1) { 233 | for(int i = 0; i < n-1; i ++) { 234 | for(int j = i+1; j < n; j ++) { 235 | m(j, i) = m(i, j) = x[i] < x[j] ? x[i] : x[j]; 236 | } 237 | } 238 | } 239 | 240 | return m; 241 | } 242 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | --------------------------------------------------------------------------------