}}
46 | }
47 | \examples{
48 | simona_opt
49 | }
50 |
--------------------------------------------------------------------------------
/src/partition.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace Rcpp;
3 |
4 | #include "traverse.h"
5 |
6 | // [[Rcpp::export]]
7 | IntegerVector cpp_partition_by_size(S4 tree, int size) {
8 | List lt_children = tree.slot("lt_children");
9 | int n = tree.slot("n_terms");
10 | int root = tree.slot("root");
11 |
12 | IntegerVector n_offspring = cpp_n_offspring_on_tree(tree, true);
13 | IntegerVector pa(n, -1);
14 |
15 | // breadth-first search
16 | IntegerVector current_nodes = {root};
17 | while(current_nodes.size()) {
18 | IntegerVector current_nodes2;
19 | for(int i = 0; i < current_nodes.size(); i ++) {
20 | int i_node = current_nodes[i] - 1;
21 | IntegerVector children = lt_children[i_node];
22 |
23 | if(children.size() == 0) { // leaf
24 | pa[i_node] = current_nodes[i];
25 | } else {
26 |
27 | if(n_offspring[i_node] <= size) {
28 | Rcout << "getting offsprings for " << current_nodes[i] << "\n";
29 | IntegerVector offspring = cpp_offspring(tree, current_nodes[i], true);
30 | pa[offspring-1] = current_nodes[i];
31 | continue;
32 | }
33 |
34 | // check i_node's children
35 | bool all_small_children = true;
36 | for(int j = 0; j < children.size(); j ++) {
37 | if(n_offspring[children[j]-1] >= size) {
38 | all_small_children = false;
39 | }
40 | }
41 | if(all_small_children) {
42 | IntegerVector offspring = cpp_offspring(tree, current_nodes[i], true);
43 | pa[offspring-1] = current_nodes[i];
44 | } else {
45 | for(int j = 0; j < children.size(); j ++) {
46 | if(n_offspring[children[j]-1] <= size) {
47 | IntegerVector offspring = cpp_offspring(tree, children[j], true);
48 | pa[offspring-1] = children[j];
49 | } else {
50 | current_nodes2.push_back(children[j]);
51 | }
52 | }
53 | }
54 | }
55 | }
56 | current_nodes = current_nodes2;
57 | }
58 |
59 | return pa;
60 | }
61 |
--------------------------------------------------------------------------------
/man/dag_filter.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/filter.R
3 | \name{dag_filter}
4 | \alias{dag_filter}
5 | \title{Filter the DAG}
6 | \usage{
7 | dag_filter(
8 | dag,
9 | terms = NULL,
10 | relations = NULL,
11 | root = NULL,
12 | leaves = NULL,
13 | mcols_filter = NULL,
14 | namespace = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{dag}{An \code{ontology_DAG} object.}
19 |
20 | \item{terms}{A vector of term names. The sub-DAG will only contain these terms.}
21 |
22 | \item{relations}{A vector of relations. The sub-DAG will only contain these relations.
23 | Valid values of "relations" should correspond to the values set in the
24 | \code{relations} argument in the \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}. If \code{relations_DAG} is
25 | already provided, offspring relation types will all be selected. Note "is_a"
26 | is always included.}
27 |
28 | \item{root}{A vector of term names which will be used as roots of the sub-DAG. Only
29 | these with their offspring terms will be kept. If there are multiple root terms,
30 | a super root will be automatically added.}
31 |
32 | \item{leaves}{A vector of leaf terms. Only these with their ancestor terms will be kept.}
33 |
34 | \item{mcols_filter}{Filtering on columns in the meta data frame.}
35 |
36 | \item{namespace}{The prefix before ":" of the term IDs.}
37 | }
38 | \value{
39 | An \code{ontology_DAG} object.
40 | }
41 | \description{
42 | Filter the DAG
43 | }
44 | \details{
45 | If the DAG is reduced into several disconnected parts after the filtering, a
46 | super root is automatically added.
47 | }
48 | \examples{
49 | parents = c("a", "a", "b", "b", "c", "d")
50 | children = c("b", "c", "c", "d", "e", "f")
51 | dag = create_ontology_DAG(parents, children)
52 | dag_filter(dag, terms = c("b", "d", "f"))
53 | dag_filter(dag, root = "b")
54 | dag_filter(dag, leaves = c("c", "b"))
55 | dag_filter(dag, root = "b", leaves = "e")
56 |
57 | \donttest{
58 | dag = create_ontology_DAG_from_GO_db()
59 | dag_filter(dag, relations = "is_a")
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/man/dag_reorder.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/reorder.R
3 | \name{dag_reorder}
4 | \alias{dag_reorder}
5 | \alias{dag_permutate_children}
6 | \title{Reorder the DAG}
7 | \usage{
8 | dag_reorder(dag, value, verbose = simona_opt$verbose)
9 |
10 | dag_permutate_children(dag, verbose = simona_opt$verbose)
11 | }
12 | \arguments{
13 | \item{dag}{An \code{ontology_Dag} object.}
14 |
15 | \item{value}{A vector of numeric values. See the \strong{Details} section.}
16 |
17 | \item{verbose}{Whether to print messages.}
18 | }
19 | \value{
20 | An \code{ontology_DAG} object.
21 | }
22 | \description{
23 | Reorder the DAG
24 | }
25 | \details{
26 | In \code{dag_reorder()}, there are two ways to set the \code{value} argument. It can be a vector corresponding
27 | to all terms (in the same order as in \code{\link[=dag_all_terms]{dag_all_terms()}}) or a vector corresponding
28 | to all leaf terms (in the same order as in \code{\link[=dag_leaves]{dag_leaves()}}). If \code{value} corresponds
29 | to all terms, the score associates to each term is the average value of all its offspring terms.
30 | And if \code{value} corresponds to all leaf terms, the score for each term is the average of all its
31 | connectable leaves.
32 |
33 | The reordering is simply applied on each term to reorder its child terms.
34 |
35 | \code{dag_permutate_children()} randomly permute child terms under a term.
36 | }
37 | \examples{
38 | parents = c("a", "a", "b", "b", "c", "d")
39 | children = c("b", "c", "c", "d", "e", "f")
40 | # by default, c and e locate on the left side, d and f locate on the right side
41 | dag = create_ontology_DAG(parents, children)
42 | dag_children(dag, "b")
43 |
44 | # move c and e to the right side of the diagram
45 | dag2 = dag_reorder(dag, value = c(1, 1, 10, 1, 10, 1))
46 | dag_children(dag2, "b")
47 |
48 | # we can also only set values for leaf terms
49 | # there are two leaf terms c and e
50 | # we let v(c) > v(e) to move c to the right side of the diagram
51 | dag3 = dag_reorder(dag, value = c(10, 1))
52 | dag_children(dag3, "b")
53 | }
54 |
--------------------------------------------------------------------------------
/man/partition_by_level.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/partition.R
3 | \name{partition_by_level}
4 | \alias{partition_by_level}
5 | \alias{partition_by_size}
6 | \title{Partition the DAG}
7 | \usage{
8 | partition_by_level(dag, level = 1, from = NULL, term_pos = NULL)
9 |
10 | partition_by_size(dag, size = round(dag_n_terms(dag)/5))
11 | }
12 | \arguments{
13 | \item{dag}{An \code{ontology_DAG} object.}
14 |
15 | \item{level}{Depth in the DAG to cut. The DAG is cut below terms (or cut the links to their child terms) with \code{depth == level}.}
16 |
17 | \item{from}{A list of terms to cut. If it is set, \code{level} is ignored.}
18 |
19 | \item{term_pos}{Internally used.}
20 |
21 | \item{size}{Number of terms in a cluster. The splitting stops on a term if all its child-trees are smaller than \code{size}.}
22 | }
23 | \value{
24 | A character vector of top terms in each partition.
25 | }
26 | \description{
27 | Partition the DAG
28 | }
29 | \details{
30 | Let's call the terms below the \code{from} term as "top terms" because they will be on top of the sub-DAGs after the partitioning.
31 | It is possible that a term in the middle of the DAG can be traced back to more than one top terms.
32 | To partition all terms exclusively, a term partitioned to the sub-DAG from the top term with the largest distance to the term.
33 | If a term has the same largest distances to several top terms, a random top term is selected.
34 |
35 | In \code{partition_by_size()}, the DAG is first reduced to a tree where a child term only has one parent.
36 | The partition is done recursively by cutting into its child-trees.
37 | The splitting stops when all the child-trees have size less than \code{size}.
38 |
39 | \code{NA} is assigned to the \code{from} terms, their ancestor terms, and terms having infinite directed distance to \code{from} terms.
40 | }
41 | \examples{
42 | \donttest{
43 | dag = create_ontology_DAG_from_GO_db()
44 | pa = partition_by_level(dag)
45 | table(pa)
46 | pa = partition_by_size(dag, size = 1000)
47 | table(pa)
48 | }
49 | 1
50 | }
51 |
--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
1 | /*!
2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
3 | * Copyright 2015 Aidan Feldman
4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
5 |
6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
7 |
8 | /* All levels of nav */
9 | nav[data-toggle='toc'] .nav > li > a {
10 | display: block;
11 | padding: 4px 20px;
12 | font-size: 13px;
13 | font-weight: 500;
14 | color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 | padding-left: 19px;
19 | color: #563d7c;
20 | text-decoration: none;
21 | background-color: transparent;
22 | border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 | padding-left: 18px;
28 | font-weight: bold;
29 | color: #563d7c;
30 | background-color: transparent;
31 | border-left: 2px solid #563d7c;
32 | }
33 |
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 | display: none; /* Hide by default, but at >768px, show it */
37 | padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 | padding-top: 1px;
41 | padding-bottom: 1px;
42 | padding-left: 30px;
43 | font-size: 12px;
44 | font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 | padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 | padding-left: 28px;
54 | font-weight: 500;
55 | }
56 |
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 | display: block;
60 | }
61 |
--------------------------------------------------------------------------------
/man/dag_longest_dist_to_offspring.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/transverse.R
3 | \name{dag_longest_dist_to_offspring}
4 | \alias{dag_longest_dist_to_offspring}
5 | \alias{dag_shortest_dist_to_offspring}
6 | \alias{dag_longest_dist_from_ancestors}
7 | \alias{dag_shortest_dist_from_ancestors}
8 | \title{Distance from all ancestors/to all offspring in the DAG}
9 | \usage{
10 | dag_longest_dist_to_offspring(dag, from, terms = NULL, background = NULL)
11 |
12 | dag_shortest_dist_to_offspring(dag, from, terms = NULL, background = NULL)
13 |
14 | dag_longest_dist_from_ancestors(dag, to, terms = NULL, background = NULL)
15 |
16 | dag_shortest_dist_from_ancestors(dag, to, terms = NULL, background = NULL)
17 | }
18 | \arguments{
19 | \item{dag}{An \code{ontology_DAG} object.}
20 |
21 | \item{from}{A single term name or a vector of term names.}
22 |
23 | \item{terms}{A vector of term names. If it is set, the returned vector will be subsetted to the terms that have been set here.}
24 |
25 | \item{background}{A vector of terms. Then the lookup will only be applied in this set of terms.}
26 |
27 | \item{to}{Same format as the \code{from} argument.}
28 | }
29 | \value{
30 | An integer vector having length the same as the number of terms in the DAG. If terms are not
31 | reachable to the \code{from} or \code{to} terms, the corresponding value is -1.
32 | }
33 | \description{
34 | Distance from all ancestors/to all offspring in the DAG
35 | }
36 | \details{
37 | If \code{from} or \code{to} is a vector, for a specific, the longest/shortest distance among all \code{from}/\code{to} terms is taken.
38 |
39 | As a special case, when \code{from} is the root term, \code{dag_longest_dist_to_offspring()} is the same as \code{dag_depth()},
40 | and when \code{to} are all leaf terms, \code{dag_longest_dist_to_offspring()} is the same as \code{dag_height()}.
41 | }
42 | \examples{
43 | parents = c("a", "a", "b", "b", "c", "d")
44 | children = c("b", "c", "c", "d", "e", "f")
45 | dag = create_ontology_DAG(parents, children)
46 | dag_longest_dist_from_ancestors(dag, "e")
47 | dag_shortest_dist_from_ancestors(dag, "e")
48 | dag_longest_dist_to_offspring(dag, "b")
49 | }
50 |
--------------------------------------------------------------------------------
/man/subset.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/filter.R
3 | \name{[,ontology_DAG,ANY,ANY,missing-method}
4 | \alias{[,ontology_DAG,ANY,ANY,missing-method}
5 | \alias{[,ontology_DAG,ANY,ANY,ANY-method}
6 | \alias{[,ontology_DAG,ANY,missing,missing-method}
7 | \alias{[,ontology_DAG,ANY,missing,ANY-method}
8 | \alias{[,ontology_DAG,missing,ANY,missing-method}
9 | \alias{[,ontology_DAG,missing,ANY,ANY-method}
10 | \alias{[,ontology_DAG,missing,missing,missing-method}
11 | \alias{[,ontology_DAG,missing,missing,ANY-method}
12 | \alias{[[,ontology_DAG,character,missing-method}
13 | \title{Create sub-DAGs}
14 | \usage{
15 | \S4method{[}{ontology_DAG,ANY,ANY,missing}(x, i, j, ..., drop = FALSE)
16 |
17 | \S4method{[}{ontology_DAG,ANY,ANY,ANY}(x, i, j, ..., drop = FALSE)
18 |
19 | \S4method{[}{ontology_DAG,ANY,missing,missing}(x, i, j, ..., drop = FALSE)
20 |
21 | \S4method{[}{ontology_DAG,ANY,missing,ANY}(x, i, j, ..., drop = FALSE)
22 |
23 | \S4method{[}{ontology_DAG,missing,ANY,missing}(x, i, j, ..., drop = FALSE)
24 |
25 | \S4method{[}{ontology_DAG,missing,ANY,ANY}(x, i, j, ..., drop = FALSE)
26 |
27 | \S4method{[}{ontology_DAG,missing,missing,missing}(x, i, j, ..., drop = FALSE)
28 |
29 | \S4method{[}{ontology_DAG,missing,missing,ANY}(x, i, j, ..., drop = FALSE)
30 |
31 | \S4method{[[}{ontology_DAG,character,missing}(x, i, j, ...)
32 | }
33 | \arguments{
34 | \item{x}{An \code{ontology_DAG} object.}
35 |
36 | \item{i}{A single term name. The value should be a character vector. It corresponds to the roots.}
37 |
38 | \item{j}{A single term name. The value should be a character vector. It corresponds to the leaves.}
39 |
40 | \item{...}{Ignored.}
41 |
42 | \item{drop}{Ignored.}
43 | }
44 | \value{
45 | An \code{ontology_DAG} object.
46 | }
47 | \description{
48 | Create sub-DAGs
49 | }
50 | \details{
51 | It returns a sub-DAG taking node \code{i} as the root and \code{j} as the leaves. If \code{i} is a vector, a super root will be added.
52 | }
53 | \examples{
54 | parents = c("a", "a", "b", "b", "c", "d")
55 | children = c("b", "c", "c", "d", "e", "f")
56 | dag = create_ontology_DAG(parents, children)
57 | dag["b"]
58 | dag[["b"]]
59 | dag["b", "f"]
60 | dag[, "f"]
61 | }
62 |
--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
1 | $(function() {
2 |
3 | // register a handler to move the focus to the search bar
4 | // upon pressing shift + "/" (i.e. "?")
5 | $(document).on('keydown', function(e) {
6 | if (e.shiftKey && e.keyCode == 191) {
7 | e.preventDefault();
8 | $("#search-input").focus();
9 | }
10 | });
11 |
12 | $(document).ready(function() {
13 | // do keyword highlighting
14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 | var mark = function() {
16 |
17 | var referrer = document.URL ;
18 | var paramKey = "q" ;
19 |
20 | if (referrer.indexOf("?") !== -1) {
21 | var qs = referrer.substr(referrer.indexOf('?') + 1);
22 | var qs_noanchor = qs.split('#')[0];
23 | var qsa = qs_noanchor.split('&');
24 | var keyword = "";
25 |
26 | for (var i = 0; i < qsa.length; i++) {
27 | var currentParam = qsa[i].split('=');
28 |
29 | if (currentParam.length !== 2) {
30 | continue;
31 | }
32 |
33 | if (currentParam[0] == paramKey) {
34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 | }
36 | }
37 |
38 | if (keyword !== "") {
39 | $(".contents").unmark({
40 | done: function() {
41 | $(".contents").mark(keyword);
42 | }
43 | });
44 | }
45 | }
46 | };
47 |
48 | mark();
49 | });
50 | });
51 |
52 | /* Search term highlighting ------------------------------*/
53 |
54 | function matchedWords(hit) {
55 | var words = [];
56 |
57 | var hierarchy = hit._highlightResult.hierarchy;
58 | // loop to fetch from lvl0, lvl1, etc.
59 | for (var idx in hierarchy) {
60 | words = words.concat(hierarchy[idx].matchedWords);
61 | }
62 |
63 | var content = hit._highlightResult.content;
64 | if (content) {
65 | words = words.concat(content.matchedWords);
66 | }
67 |
68 | // return unique words
69 | var words_uniq = [...new Set(words)];
70 | return words_uniq;
71 | }
72 |
73 | function updateHitURL(hit) {
74 |
75 | var words = matchedWords(hit);
76 | var url = "";
77 |
78 | if (hit.anchor) {
79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 | } else {
81 | url = hit.url + '?q=' + escape(words.join(" "));
82 | }
83 |
84 | return url;
85 | }
86 |
--------------------------------------------------------------------------------
/inst/extdata/onto_gene.R:
--------------------------------------------------------------------------------
1 | ## uniprot keywords
2 |
3 |
4 | library(UniProtKeywords)
5 |
6 | data(kw_parents)
7 |
8 | parents = unlist(kw_parents)
9 | children = rep(names(kw_parents), times = sapply(kw_parents, length))
10 |
11 | dag = create_ontology_DAG(parents, children)
12 |
13 | data(kw_terms)
14 |
15 | meta = data.frame(
16 | id = sapply(kw_terms, function(x) x$Identifier),
17 | accession = sapply(kw_terms, function(x) x$Accession),
18 | name = sapply(kw_terms, function(x) x$Identifier),
19 | description = sapply(kw_terms, function(x) x$Description),
20 | category = sapply(kw_terms, function(x) paste(x$Category, collapse = "; "))
21 | )
22 | rownames(meta) = meta$id
23 | meta = meta[dag@terms, ]
24 | rownames(meta)[nrow(meta)] = simona:::SUPER_ROOT
25 |
26 | mcols(dag) = meta
27 |
28 | annotation = load_keyword_genesets("9606")
29 | dag = add_annotation(dag, annotation)
30 |
31 |
32 | ## The Human Phenotype Ontology
33 |
34 | # https://hpo.jax.org/app/data/annotations
35 |
36 | dag = import_obo("https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-10-09/hp-base.obo")
37 |
38 | tb = read.table(url("https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2023-10-09/genes_to_phenotype.txt"), sep = "\t", header = TRUE)
39 | annotation = split(tb$ncbi_gene_id, tb$hpo_id)
40 |
41 | dag = add_annotation(dag, annotation)
42 |
43 |
44 | ## Pathway ontology and many
45 |
46 | # https://download.rgd.mcw.edu/ontology/
47 |
48 | library(rvest)
49 |
50 | html = read_html("https://download.rgd.mcw.edu/ontology/annotated_rgd_objects_by_ontology/")
51 | tb = html %>% html_element("table") %>% html_table()
52 | fn = tb[grep("_genes_", tb[[2]]), ][[2]]
53 | tb = data.frame(org = gsub("_.*$", "", fn),
54 | onto = gsub("^.*_", "", fn))
55 | tb = tb[!tb$onto %in% c("go", "nbo", "mp", "cmo"), ]
56 | tb = tb[order(tb$onto), ]
57 |
58 | tb$anno_url = paste0("https://download.rgd.mcw.edu/ontology/annotated_rgd_objects_by_ontology/", tb$org, "_genes_", tb$onto)
59 |
60 | onto = c("chebi" = "https://purl.obolibrary.org/obo/chebi.obo",
61 | "pw" = "https://download.rgd.mcw.edu/ontology/pathway/pathway.obo",
62 | "rdo" = "https://download.rgd.mcw.edu/ontology/disease/RDO.obo",
63 | "vt" = "https://purl.obolibrary.org/obo/vt.owl",
64 | "hp" = "https://purl.obolibrary.org/obo/hp.obo")
65 |
66 | tb$onto_url = onto[tb$onto]
67 | ## MeSH
68 |
69 | dag = import_ttl("https://data.bioontology.org/ontologies/MESH/submissions/26/download?apikey=8b5b7825-538d-40e0-9e9e-5ab9274a9aeb")
70 |
71 |
72 |
--------------------------------------------------------------------------------
/man/distance.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/dist.R
3 | \name{shortest_distances_via_NCA}
4 | \alias{shortest_distances_via_NCA}
5 | \alias{longest_distances_via_LCA}
6 | \alias{shortest_distances_directed}
7 | \alias{longest_distances_directed}
8 | \title{Distance on the DAG}
9 | \usage{
10 | shortest_distances_via_NCA(dag, terms, verbose = simona_opt$verbose)
11 |
12 | longest_distances_via_LCA(dag, terms, verbose = simona_opt$verbose)
13 |
14 | shortest_distances_directed(dag, terms, verbose = simona_opt$verbose)
15 |
16 | longest_distances_directed(dag, terms, verbose = simona_opt$verbose)
17 | }
18 | \arguments{
19 | \item{dag}{An \code{ontology_DAG} object.}
20 |
21 | \item{terms}{A vector of term names.}
22 |
23 | \item{verbose}{Whether to print messages.}
24 | }
25 | \value{
26 | A numeric distance matrix.
27 | }
28 | \description{
29 | Distance on the DAG
30 | }
31 | \details{
32 | Denote two terms as \code{a} and \code{b}, a common ancestor as \code{c}, and the distance function \code{d()} calculates the longest
33 | distance or the shortest distance depending on the function.
34 | \itemize{
35 | \item \code{shortest_distances_via_NCA()}: It calculates the smallest \code{d(c, a) + d(c, b)} where \code{d()} calculates the shortest distance between two terms. In this case,
36 | \code{c} is the NCA (nearest common ancestor) of \code{a} and \code{b}.
37 | \item \code{longest_distances_via_LCA()}: It calculates the largest \code{d(c, a) + d(c, b)} where \code{d()} calculates the longest distance between two terms \emph{via the LCA (lowest common ancestor) term}. In this case,
38 | \code{c} is the LCA of \code{a} and \code{b}.
39 | \item \code{shortest_distances_directed()}: It calculates \code{d(a, b)} where \code{d()} calculates the shortest distance between two terms. The distance is only calculated when \code{a} is an ancestor of \code{b}, otherwise the distance value is -1.
40 | \item \code{longest_distances_directed()}: It calculates \code{d(a, b)} where \code{d()} calculates the longest distance between two terms. The distance is only calculated when \code{a} is an ancestor of \code{b}, otherwise the distance value is -1.
41 | }
42 | }
43 | \examples{
44 | parents = c("a", "a", "b", "b", "c", "d")
45 | children = c("b", "c", "c", "d", "e", "f")
46 | dag = create_ontology_DAG(parents, children)
47 | shortest_distances_via_NCA(dag, letters[1:6])
48 | longest_distances_via_LCA(dag, letters[1:6])
49 | shortest_distances_directed(dag, letters[1:6])
50 | longest_distances_directed(dag, letters[1:6])
51 | }
52 |
--------------------------------------------------------------------------------
/man/dag_enrich_on_offsprings.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/enrich.R
3 | \name{dag_enrich_on_offsprings}
4 | \alias{dag_enrich_on_offsprings}
5 | \title{Enrichment analysis on offspring terms}
6 | \usage{
7 | dag_enrich_on_offsprings(dag, terms, min_hits = 3, min_offspring = 10)
8 | }
9 | \arguments{
10 | \item{dag}{An \code{ontology_DAG} object.}
11 |
12 | \item{terms}{A vector of term names.}
13 |
14 | \item{min_hits}{Minimal number of terms in an offspring set.}
15 |
16 | \item{min_offspring}{Minimal size of the offspring set.}
17 | }
18 | \value{
19 | A data frame with the following columns:
20 | \itemize{
21 | \item \code{term}: Term names.
22 | \item \code{n_hits}: Number of terms in \code{terms} intersecting to \code{t}'s offspring terms.
23 | \item \code{n_offspring}: Number of offspring terms of \code{t} (including \code{t} itself).
24 | \item \code{n_terms}: Number of terms in \code{term} intersecting to all terms in the DAG.
25 | \item \code{n_all}: Number of all terms in the DAG.
26 | \item \code{log2_fold_enrichment}: Defined as log2(observation/expected).
27 | \item \code{z_score}: Defined as (observed-expected)/sd.
28 | \item \code{p_value}: P-values from hypergeometric test.
29 | \item \code{p_adjust}: Adjusted p-values from the BH method.
30 | }
31 |
32 | The number of rows in the data frame is the same as the number of terms in the DAG.
33 | }
34 | \description{
35 | The analysis task is to evaluate how significant a term includes \code{terms}.
36 | }
37 | \details{
38 | Given a list of terms in \code{terms}, the function tests whether they are enriched in a term's offspring terms.
39 | The test is based on the hypergeometric distribution. In the following 2x2 contigency table, \code{S} is the set of \code{terms},
40 | for a term \code{t} in the DAG, \code{T} is the set of its offspring plus the \code{t} itself, the aim is to test whether \code{S} is over-represented
41 | in \code{T}.
42 |
43 | If there is a significant p-value, we can say the term \code{t} preferably includes terms in \code{term}.
44 |
45 | \if{html}{\out{
}}\preformatted{+----------+------+----------+-----+
46 | | | in S | not in S | all |
47 | +----------+------+----------+-----+
48 | | in T | x11 | x12 | x10 |
49 | | not in T | x21 | x22 | x20 |
50 | +----------+------+----------+-----+
51 | | all | x01 | x02 | x |
52 | +----------+------+----------+-----+
53 | }\if{html}{\out{
}}
54 | }
55 | \examples{
56 | \dontrun{
57 | dag = create_ontology_DAG_from_GO_db()
58 | terms = random_terms(dag, 100)
59 | df = dag_enrich_on_offsprings(dag, terms)
60 | }
61 | 1
62 | }
63 |
--------------------------------------------------------------------------------
/man/create_ontology_DAG_from_GO_db.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/go.R
3 | \name{create_ontology_DAG_from_GO_db}
4 | \alias{create_ontology_DAG_from_GO_db}
5 | \alias{alternative_GO_terms}
6 | \title{Create the ontology_DAG object from the GO.db package}
7 | \usage{
8 | create_ontology_DAG_from_GO_db(
9 | namespace = "BP",
10 | relations = "part of",
11 | org_db = NULL,
12 | evidence_code = NULL,
13 | retrieve_alternative = FALSE,
14 | verbose = simona_opt$verbose
15 | )
16 |
17 | alternative_GO_terms(
18 | tag = c("replaced_by", "alt_id", "consider"),
19 | version = NULL,
20 | verbose = TRUE
21 | )
22 | }
23 | \arguments{
24 | \item{namespace}{One of "BP", "CC" and "MF".}
25 |
26 | \item{relations}{Types of the GO term relations. In the \strong{GO.db} package, the GO term relations can be "is_a", "part_of",
27 | "regulates", "negatively regulates", "positively regulates". Note since "regulates" is a parent relation
28 | of "negatively regulates", "positively regulates", if "regulates" is selected, "negatively regulates" and "positively regulates"
29 | are also selected. Note "is_a" is always included.}
30 |
31 | \item{org_db}{The name of the organism package or the corresponding database object, e.g. \code{"org.Hs.eg.db"} or
32 | directly the \code{\link[org.Hs.eg.db:org.Hs.egBASE]{org.Hs.eg.db::org.Hs.eg.db}} object for human, then the gene annotation to GO terms will be added
33 | to the object. For other non-model organisms, consider to use the \strong{AnnotationHub} package to find one.}
34 |
35 | \item{evidence_code}{A vector of evidence codes for gene annotation to GO terms. See \url{https://geneontology.org/docs/guide-go-evidence-codes/}.}
36 |
37 | \item{retrieve_alternative}{Whether to retrieve alternative/obsolete GO terms from geneontology.org?}
38 |
39 | \item{verbose}{Whether to print messages.}
40 |
41 | \item{tag}{In the \code{go-basic.obo} file, there are three tags which define alternative GO terms: \code{replaced_by}, \code{alt_id} and \code{consider}.
42 | See https://owlcollab.github.io/oboformat/doc/GO.format.obo-1_4.html#S.2.2.1}
43 |
44 | \item{version}{Version of the \code{go-basic.obo} file. By default it is the version for building \strong{GO.db} package. The value is a string in the format of "2024-01-17".}
45 | }
46 | \value{
47 | An \code{ontology_DAG} object.
48 |
49 | A list of named vectors where names are alternative GO IDs and value vectors are current GO IDs in use.
50 | }
51 | \description{
52 | Create the ontology_DAG object from the GO.db package
53 |
54 | Mappings between alternative GO terms to official GO terms
55 | }
56 | \examples{
57 | dag = create_ontology_DAG_from_GO_db()
58 | dag
59 | }
60 |
--------------------------------------------------------------------------------
/tests/testthat/tests_constructor.R:
--------------------------------------------------------------------------------
1 |
2 | library(testthat)
3 |
4 | test_that("test cyclic_node", {
5 | parents = c("a", "b", "c", "d")
6 | children = c("b", "c", "d", "b")
7 | expect_error(
8 | create_ontology_DAG(parents, children),
9 | "Found cyclic nodes"
10 | )
11 |
12 | parents = c("a", "b", "c", "g", "h")
13 | children = c("b", "c", "d", "h", "i")
14 | expect_message(
15 | dag <- create_ontology_DAG(parents, children),
16 | "more than one root"
17 | )
18 | expect_equal(
19 | length(dag@terms),
20 | length(unique(c(parents, children))) + 1
21 | )
22 |
23 | parents = c("a", "b", "c", "d")
24 | children = c("b", "c", "d", "a")
25 | expect_error(
26 | create_ontology_DAG(parents, children),
27 | "There might exist a cycle"
28 | )
29 | })
30 |
31 |
32 | # b--d--f
33 | # / \
34 | # a---c--e
35 | # upstream -> downstream
36 |
37 | parents = c("a", "a", "b", "b", "c", "d")
38 | children = c("b", "c", "c", "d", "e", "f")
39 |
40 | dag = create_ontology_DAG(parents, children)
41 |
42 | test_that("test sub-DAG", {
43 | expect_identical(
44 | dag[["c"]]@terms,
45 | c("c", "e")
46 | )
47 | expect_identical(
48 | dag[["b"]]@terms,
49 | c("b", "c", "d", "e", "f")
50 | )
51 | expect_error(
52 | dag["l"]
53 | )
54 |
55 | expect_identical(
56 | dag["f"]@terms,
57 | "~~singleton~~"
58 | )
59 | })
60 |
61 |
62 | test_that("test DAG filter", {
63 | expect_identical(
64 | dag_filter(dag, terms = c("b", "d", "f"))@terms,
65 | c("b", "d", "f")
66 | )
67 | expect_identical(
68 | dag_filter(dag, root = "b")@terms,
69 | c("b", "c", "d", "e", "f")
70 | )
71 | expect_identical(
72 | dag_filter(dag, root = c("b", "c"))@terms,
73 | c("b", "c", "d", "e", "f")
74 | )
75 | expect_identical(
76 | dag_filter(dag, leaves = c("c", "d"))@terms,
77 | c("a", "b", "c", "d")
78 | )
79 | expect_identical(
80 | dag_filter(dag, leaves = c("b", "c"))@terms,
81 | c("a", "b", "c")
82 | )
83 | expect_identical(
84 | dag_filter(dag, root = "b", leaves = "e")@terms,
85 | c("b", "c", "e")
86 | )
87 | })
88 |
89 |
90 | parents = c("a", "b", "c", "d", "e")
91 | children = c("b", "c", "d", "e", "b")
92 |
93 | test_that("test cyclic path", {
94 | expect_error(
95 | create_ontology_DAG(parents, children),
96 | "Found cyclic nodes"
97 | )
98 |
99 | expect_message(
100 | create_ontology_DAG(parents, children, remove_cyclic_paths = TRUE),
101 | "Remove"
102 | )
103 | })
104 |
105 | parents = c("a", "b", "c", "d", "f", "g", "h")
106 | children = c("b", "c", "d", "e", "g", "h", "f")
107 |
108 | test_that("test isolated rings", {
109 | expect_error(
110 | create_ontology_DAG(parents, children),
111 | "Found isolated rings"
112 | )
113 |
114 | expect_message(
115 | create_ontology_DAG(parents, children, remove_rings = TRUE),
116 | "Remove"
117 | )
118 | })
119 |
--------------------------------------------------------------------------------
/man/dag_random.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/random.R
3 | \name{dag_random_tree}
4 | \alias{dag_random_tree}
5 | \alias{dag_add_random_children}
6 | \alias{dag_random}
7 | \title{Generate a random DAG}
8 | \usage{
9 | dag_random_tree(
10 | n_children = 2,
11 | p_stop = 0,
12 | max = 2^10 - 1,
13 | verbose = simona_opt$verbose
14 | )
15 |
16 | dag_add_random_children(
17 | dag,
18 | p_add = 0.1,
19 | new_children = c(1, 4),
20 | add_random_children_fun = NULL,
21 | verbose = simona_opt$verbose
22 | )
23 |
24 | dag_random(
25 | n_children = 2,
26 | p_stop = 0,
27 | max = 2^10 - 1,
28 | p_add = 0.1,
29 | new_children = c(1, 4),
30 | verbose = simona_opt$verbose
31 | )
32 | }
33 | \arguments{
34 | \item{n_children}{Number of children of a term. The value can also be a vector of
35 | length two representing the range of the number of child terms.}
36 |
37 | \item{p_stop}{The probability of a term to stop growing.}
38 |
39 | \item{max}{Maximal number of terms.}
40 |
41 | \item{verbose}{Whether to print messages.}
42 |
43 | \item{dag}{An \code{ontology_DAG} object.}
44 |
45 | \item{p_add}{The probability to add children on each term.}
46 |
47 | \item{new_children}{The number or range of numbers of new children if a term is selected to add more children.}
48 |
49 | \item{add_random_children_fun}{A function to randomly add children from the DAG.}
50 | }
51 | \value{
52 | An \code{ontology_DAG} object.
53 | }
54 | \description{
55 | Generate a random DAG
56 | }
57 | \details{
58 | \code{dag_random_tree()} generates a random DAG tree from the root term. In a certain step of
59 | the growing, let's denote the set of all leaf terms as L, then in the next round of growing,
60 | \code{floor(length(L)*p_stop)} leaf terms stop growing, and for the remaining leaf terms that
61 | continue to grow, each term will add child terms with number in uniformly sampled within \verb{[ n_children[1], n_children[2] ]}.
62 | The growing stops when the total number of terms in the DAG exceeds \code{max}.
63 |
64 | \code{dag_add_random_children()} adds more links in a DAG. Each term is associated with a probability \code{p_add}
65 | to add new links where the term, if it is selected, is as a parent term, linking to other terms in the DAG.
66 | The number of new child terms is controlled by \code{new_children} which can be a single number of a range. By default,
67 | new child terms of a term \code{t} are randomly selected from other terms that are lower than the term \code{t}
68 | (check the function \code{simona:::add_random_children}). The way how to randomly select new child terms for \code{t}
69 | can be controlled by a self-defined function for the \code{add_random_children_fun} argument.
70 |
71 | \code{dag_random()}: it simply wraps \code{dag_random_tree()} and \code{dag_add_random_children()}.
72 | }
73 | \examples{
74 | tree = dag_random_tree()
75 | dag = dag_random()
76 | }
77 |
--------------------------------------------------------------------------------
/man/ontology.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/anno_gene.R
3 | \name{ontology_kw}
4 | \alias{ontology_kw}
5 | \alias{ontology_chebi}
6 | \alias{ontology_hp}
7 | \alias{ontology_pw}
8 | \alias{ontology_rdo}
9 | \alias{ontology_vt}
10 | \alias{ontology_go}
11 | \alias{ontology_reactome}
12 | \title{Import ontologies already having gene annotations}
13 | \usage{
14 | ontology_kw(
15 | organism = "human",
16 | gene_annotation = TRUE,
17 | verbose = simona_opt$verbose,
18 | ...
19 | )
20 |
21 | ontology_chebi(
22 | organism = c("human", "mouse", "rat", "pig", "dog"),
23 | gene_annotation = TRUE,
24 | verbose = simona_opt$verbose,
25 | ...
26 | )
27 |
28 | ontology_hp(
29 | organism = c("human", "mouse"),
30 | gene_annotation = TRUE,
31 | verbose = simona_opt$verbose,
32 | ...
33 | )
34 |
35 | ontology_pw(
36 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"),
37 | gene_annotation = TRUE,
38 | verbose = simona_opt$verbose,
39 | ...
40 | )
41 |
42 | ontology_rdo(
43 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"),
44 | gene_annotation = TRUE,
45 | verbose = simona_opt$verbose,
46 | ...
47 | )
48 |
49 | ontology_vt(
50 | organism = c("human", "mouse", "rat", "pig", "dog", "chimpanzee"),
51 | gene_annotation = TRUE,
52 | verbose = simona_opt$verbose,
53 | ...
54 | )
55 |
56 | ontology_go(...)
57 |
58 | ontology_reactome(
59 | organism = "HSA",
60 | gene_annotation = TRUE,
61 | verbose = simona_opt$verbose,
62 | ...
63 | )
64 | }
65 | \arguments{
66 | \item{organism}{Organism.}
67 |
68 | \item{gene_annotation}{Whether to add gene annotations to the DAG.}
69 |
70 | \item{verbose}{Whether to print messages?}
71 |
72 | \item{...}{Pass to \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}.}
73 | }
74 | \description{
75 | Import ontologies already having gene annotations
76 | }
77 | \details{
78 | There are the following ontologies:
79 | \itemize{
80 | \item \code{ontology_kw()}: UniProt Keywords. The list of supported organisms can be found in \code{\link[UniProtKeywords:load_keyword_genesets]{UniProtKeywords::load_keyword_genesets()}}.
81 | \item \code{ontology_chebi()}: Chemical Entities of Biological Interest.
82 | \item \code{ontology_hp()}: The Human Phenotype Ontology.
83 | \item \code{ontology_pw()}: Pathway Ontology.
84 | \item \code{ontology_rdo()}: RGD Disease Ontology.
85 | \item \code{ontology_vt()}: Vertebrate Trait Ontology.
86 | }
87 |
88 | The source of the original files can be found with \code{simona:::RGD_TB}.
89 |
90 | \code{ontology_go()} is an alias of \code{\link[=create_ontology_DAG_from_GO_db]{create_ontology_DAG_from_GO_db()}}. All arguments go there.
91 |
92 | Valid values for \code{organism} argument in \code{ontology_reactome()} are
93 |
94 | \if{html}{\out{
}}
49 |
50 | In \code{n_annotations()}, when \code{uniquify = TRUE}, the first method is used; and when \code{uniquify = FALSE}, the second method is used.
51 |
52 | For some annotation sources, it is possible that an item is annotated to multiple terms, thus, the second method which simply
53 | adds numbers of all its child terms may not be proper because an item may be counted duplicatedly, thus over-estimating \code{n}. The two methods
54 | are identical only if an item is annotated to a unique term in the DAG.
55 |
56 | We suggest to always set \code{uniquify = TRUE} (the default), and the scenario of \code{uniquify = FALSE} is only for the testing or benchmarking purpose.
57 | }
58 | \examples{
59 | parents = c("a", "a", "b", "b", "c", "d")
60 | children = c("b", "c", "c", "d", "e", "f")
61 | annotation = list(
62 | "a" = c("t1", "t2", "t3"),
63 | "b" = c("t3", "t4"),
64 | "c" = "t5",
65 | "d" = "t7",
66 | "e" = c("t4", "t5", "t6", "t7"),
67 | "f" = "t8"
68 | )
69 | dag = create_ontology_DAG(parents, children, annotation = annotation)
70 | n_annotations(dag)
71 | }
72 |
--------------------------------------------------------------------------------
/man/dag_enrich_on_offsprings_by_permutation.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/enrich.R
3 | \name{dag_enrich_on_offsprings_by_permutation}
4 | \alias{dag_enrich_on_offsprings_by_permutation}
5 | \title{Enrichment analysis on offspring terms by permutation test}
6 | \usage{
7 | dag_enrich_on_offsprings_by_permutation(
8 | dag,
9 | value,
10 | perm = 1000,
11 | min_offspring = 10,
12 | verbose = simona_opt$verbose
13 | )
14 | }
15 | \arguments{
16 | \item{dag}{An \code{ontology_DAG} object.}
17 |
18 | \item{value}{A numeric value. The value should correspond to terms in \code{dag@terms}.}
19 |
20 | \item{perm}{Number of permutations.}
21 |
22 | \item{min_offspring}{Minimal size of the offspring set.}
23 |
24 | \item{verbose}{Whether to print messages.}
25 | }
26 | \value{
27 | A data frame with the following columns:
28 | \itemize{
29 | \item \code{term}: Term names.
30 | \item \code{stats}: The statistics of terms.
31 | \item \code{n_offspring}: Number of offspring terms of \code{t} (including \code{t} itself).
32 | \item \code{log2_fold_enrichment}: defined as \code{log2(s/mean)} where \code{mean} is calculated from random permutation.
33 | \item \code{z_score}: Defined as \code{(s - mean)/sd} where \code{mean} and \code{sd} are calculated from random permutation.
34 | \item \code{p_value}: P-values from permutation test.
35 | \item \code{p_adjust}: Adjusted p-values from the BH method.
36 | }
37 |
38 | The number of rows in the data frame is the same as the number of terms in the DAG.
39 | }
40 | \description{
41 | Enrichment analysis on offspring terms by permutation test
42 | }
43 | \details{
44 | In the function \code{\link[=dag_enrich_on_offsprings]{dag_enrich_on_offsprings()}}, the statistic for testing is the number of terms in each category. Here
45 | this funtion makes the testing procedure more general
46 |
47 | The function tests whether a term \code{t}'s offspring terms have an over-represented pattern on values in \code{value}.
48 | Denote \code{T} as the set of \code{t}'s offspring terms plus \code{t} itself, and \code{v} as the numeric vector of \code{value}, we first
49 | calculate a score \code{s} based on values in \code{T}:
50 |
51 | \if{html}{\out{
}}\preformatted{s = mean_\{terms in T\}(v)
52 | }\if{html}{\out{
}}
53 |
54 | To construct a random version of \code{s}, we randomly sample \code{n_T} terms from the DAG where \code{n_T} is the size of set \code{T}:
55 |
56 | \if{html}{\out{
}}
58 |
59 | where index \code{i} represents the i^th sampling. If we sample \code{k} times, the p-value is calculated as:
60 |
61 | \if{html}{\out{
}}\preformatted{p = sum_\{i in 1..k\}(I(sr_i > s))/k
62 | }\if{html}{\out{
}}
63 | }
64 | \examples{
65 | \dontrun{
66 | dag = create_ontology_DAG_from_GO_db()
67 | value = runif(dag_n_terms(dag)) # a set of random values
68 | df = dag_enrich_on_offsprings_by_permutation(dag, value)
69 | }
70 | 1
71 | }
72 |
--------------------------------------------------------------------------------
/docs/articles/v07_dag_visualization_files/grViz-binding-1.0.10/grViz.js:
--------------------------------------------------------------------------------
1 | HTMLWidgets.widget({
2 |
3 | name: 'grViz',
4 |
5 | type: 'output',
6 |
7 | initialize: function(el, width, height) {
8 |
9 | return {
10 | // TODO: add instance fields as required
11 | };
12 | },
13 |
14 | renderValue: function(el, x, instance) {
15 | // Use this to sort of make our diagram responsive
16 | // or at a minimum fit within the bounds set by htmlwidgets
17 | // for the parent container
18 | function makeResponsive(el){
19 | var svg = el.getElementsByTagName("svg")[0];
20 | if (svg) {
21 | if (svg.width) {svg.removeAttribute("width")}
22 | if (svg.height) {svg.removeAttribute("height")}
23 | svg.style.width = "100%";
24 | svg.style.height = "100%";
25 | }
26 | }
27 |
28 | if (x.diagram !== "") {
29 |
30 | if (typeof x.config === "undefined"){
31 | x.config = {};
32 | x.config.engine = "dot";
33 | x.config.options = {};
34 | }
35 |
36 | try {
37 |
38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options);
39 |
40 | makeResponsive(el);
41 |
42 | if (HTMLWidgets.shinyMode) {
43 | // Get widget id
44 | var id = el.id;
45 |
46 | $("#" + id + " .node").click(function(e) {
47 | // Get node id
48 | var nodeid = e.currentTarget.id;
49 | // Get node text object and make an array
50 | var node_texts = $("#" + id + " #" + nodeid + " text");
51 | //var node_path = $("#" + nodeid + " path")[0];
52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray();
53 | // Build return object *obj* with node-id, node text values and node fill
54 | var obj = {
55 | id: nodeid,
56 | //fill: node_path.attributes.fill.nodeValue,
57 | //outerHMTL: node_path.outerHTML,
58 | nodeValues: text_array
59 | };
60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click))
61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"});
62 | });
63 | }
64 |
65 | // set up a container for tasks to perform after completion
66 | // one example would be add callbacks for event handling
67 | // styling
68 | if (typeof x.tasks !== "undefined") {
69 | if ((typeof x.tasks.length === "undefined") ||
70 | (typeof x.tasks === "function")) {
71 | // handle a function not enclosed in array
72 | // should be able to remove once using jsonlite
73 | x.tasks = [x.tasks];
74 | }
75 | x.tasks.map(function(t){
76 | // for each tasks add it to the mermaid.tasks with el
77 | t.call(el);
78 | });
79 | }
80 | } catch(e){
81 | var p = document.createElement("pre");
82 | p.innerText = e;
83 | el.appendChild(p);
84 | }
85 | }
86 |
87 | },
88 |
89 | resize: function(el, width, height, instance) {
90 | }
91 | });
92 |
--------------------------------------------------------------------------------
/docs/articles/v07_dag_visualization_files/grViz-binding-1.0.11/grViz.js:
--------------------------------------------------------------------------------
1 | HTMLWidgets.widget({
2 |
3 | name: 'grViz',
4 |
5 | type: 'output',
6 |
7 | initialize: function(el, width, height) {
8 |
9 | return {
10 | // TODO: add instance fields as required
11 | };
12 | },
13 |
14 | renderValue: function(el, x, instance) {
15 | // Use this to sort of make our diagram responsive
16 | // or at a minimum fit within the bounds set by htmlwidgets
17 | // for the parent container
18 | function makeResponsive(el){
19 | var svg = el.getElementsByTagName("svg")[0];
20 | if (svg) {
21 | if (svg.width) {svg.removeAttribute("width")}
22 | if (svg.height) {svg.removeAttribute("height")}
23 | svg.style.width = "100%";
24 | svg.style.height = "100%";
25 | }
26 | }
27 |
28 | if (x.diagram !== "") {
29 |
30 | if (typeof x.config === "undefined"){
31 | x.config = {};
32 | x.config.engine = "dot";
33 | x.config.options = {};
34 | }
35 |
36 | try {
37 |
38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options);
39 |
40 | makeResponsive(el);
41 |
42 | if (HTMLWidgets.shinyMode) {
43 | // Get widget id
44 | var id = el.id;
45 |
46 | $("#" + id + " .node").click(function(e) {
47 | // Get node id
48 | var nodeid = e.currentTarget.id;
49 | // Get node text object and make an array
50 | var node_texts = $("#" + id + " #" + nodeid + " text");
51 | //var node_path = $("#" + nodeid + " path")[0];
52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray();
53 | // Build return object *obj* with node-id, node text values and node fill
54 | var obj = {
55 | id: nodeid,
56 | //fill: node_path.attributes.fill.nodeValue,
57 | //outerHMTL: node_path.outerHTML,
58 | nodeValues: text_array
59 | };
60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click))
61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"});
62 | });
63 | }
64 |
65 | // set up a container for tasks to perform after completion
66 | // one example would be add callbacks for event handling
67 | // styling
68 | if (typeof x.tasks !== "undefined") {
69 | if ((typeof x.tasks.length === "undefined") ||
70 | (typeof x.tasks === "function")) {
71 | // handle a function not enclosed in array
72 | // should be able to remove once using jsonlite
73 | x.tasks = [x.tasks];
74 | }
75 | x.tasks.map(function(t){
76 | // for each tasks add it to the mermaid.tasks with el
77 | t.call(el);
78 | });
79 | }
80 | } catch(e){
81 | var p = document.createElement("pre");
82 | p.innerText = e;
83 | el.appendChild(p);
84 | }
85 | }
86 |
87 | },
88 |
89 | resize: function(el, width, height, instance) {
90 | }
91 | });
92 |
--------------------------------------------------------------------------------
/docs/articles/v7_dag_visualization_files/grViz-binding-1.0.10/grViz.js:
--------------------------------------------------------------------------------
1 | HTMLWidgets.widget({
2 |
3 | name: 'grViz',
4 |
5 | type: 'output',
6 |
7 | initialize: function(el, width, height) {
8 |
9 | return {
10 | // TODO: add instance fields as required
11 | };
12 | },
13 |
14 | renderValue: function(el, x, instance) {
15 | // Use this to sort of make our diagram responsive
16 | // or at a minimum fit within the bounds set by htmlwidgets
17 | // for the parent container
18 | function makeResponsive(el){
19 | var svg = el.getElementsByTagName("svg")[0];
20 | if (svg) {
21 | if (svg.width) {svg.removeAttribute("width")}
22 | if (svg.height) {svg.removeAttribute("height")}
23 | svg.style.width = "100%";
24 | svg.style.height = "100%";
25 | }
26 | }
27 |
28 | if (x.diagram !== "") {
29 |
30 | if (typeof x.config === "undefined"){
31 | x.config = {};
32 | x.config.engine = "dot";
33 | x.config.options = {};
34 | }
35 |
36 | try {
37 |
38 | el.innerHTML = Viz(x.diagram, format="svg", engine=x.config.engine, options=x.config.options);
39 |
40 | makeResponsive(el);
41 |
42 | if (HTMLWidgets.shinyMode) {
43 | // Get widget id
44 | var id = el.id;
45 |
46 | $("#" + id + " .node").click(function(e) {
47 | // Get node id
48 | var nodeid = e.currentTarget.id;
49 | // Get node text object and make an array
50 | var node_texts = $("#" + id + " #" + nodeid + " text");
51 | //var node_path = $("#" + nodeid + " path")[0];
52 | var text_array = node_texts.map(function() {return $(this).text(); }).toArray();
53 | // Build return object *obj* with node-id, node text values and node fill
54 | var obj = {
55 | id: nodeid,
56 | //fill: node_path.attributes.fill.nodeValue,
57 | //outerHMTL: node_path.outerHTML,
58 | nodeValues: text_array
59 | };
60 | // Send *obj* to Shiny's inputs (input$[id]+_click e.g.: input$vtree_click))
61 | Shiny.setInputValue(id + "_click", obj, {priority: "event"});
62 | });
63 | }
64 |
65 | // set up a container for tasks to perform after completion
66 | // one example would be add callbacks for event handling
67 | // styling
68 | if (typeof x.tasks !== "undefined") {
69 | if ((typeof x.tasks.length === "undefined") ||
70 | (typeof x.tasks === "function")) {
71 | // handle a function not enclosed in array
72 | // should be able to remove once using jsonlite
73 | x.tasks = [x.tasks];
74 | }
75 | x.tasks.map(function(t){
76 | // for each tasks add it to the mermaid.tasks with el
77 | t.call(el);
78 | });
79 | }
80 | } catch(e){
81 | var p = document.createElement("pre");
82 | p.innerText = e;
83 | el.appendChild(p);
84 | }
85 | }
86 |
87 | },
88 |
89 | resize: function(el, width, height, instance) {
90 | }
91 | });
92 |
--------------------------------------------------------------------------------
/man/dag_enrich_on_items.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/enrich.R
3 | \name{dag_enrich_on_items}
4 | \alias{dag_enrich_on_items}
5 | \alias{dag_enrich_on_genes}
6 | \title{Enrichment analysis on the number of annotated items}
7 | \usage{
8 | dag_enrich_on_items(dag, items, min_hits = 5, min_items = 10)
9 |
10 | dag_enrich_on_genes(dag, genes, min_hits = 5, min_genes = 10)
11 | }
12 | \arguments{
13 | \item{dag}{An \code{ontology_DAG} object.}
14 |
15 | \item{items}{A vector of item names.}
16 |
17 | \item{min_hits}{Minimal number of items in the term set.}
18 |
19 | \item{min_items}{Minimal size of the term set.}
20 |
21 | \item{genes}{A vector of gene IDs. The gene ID type can be found by directly printing the \code{ontology_DAG} object.}
22 |
23 | \item{min_genes}{Minimal number of genes.}
24 | }
25 | \value{
26 | A data frame with the following columns:
27 | \itemize{
28 | \item \code{term}: Term names.
29 | \item \code{n_hits}: Number of items in \code{items} intersecting to \code{t}'s annotated items.
30 | \item \code{n_anno}: Number of annotated items of \code{t}. Specifically for \code{dag_enrich_on_genes()}, this column
31 | is renamed to \code{n_gs}.
32 | \item \code{n_items}: Number of items in \code{items} intersecting to all annotated items in the DAG. Specifically
33 | for \code{dag_enrich_on_genes()}, this column is renamed to \code{n_genes}.
34 | \item \code{n_all}: Number of all annotated items in the DAG.
35 | \item \code{log2_fold_enrichment}: Defined as log2(observation/expected).
36 | \item \code{z_score}: Defined as (observed-expected)/sd.
37 | \item \code{p_value}: P-values from hypergeometric test.
38 | \item \code{p_adjust}: Adjusted p-values from the BH method.
39 | }
40 |
41 | The number of rows in the data frame is the same as the number of terms in the DAG.
42 | }
43 | \description{
44 | The analysis task is to evaluate which terms the given items are enriched to.
45 | }
46 | \details{
47 | The function tests whether the list of items are enriched in terms on the DAG.
48 | The test is based on the hypergeometric distribution. In the following 2x2 contigency table, \code{S} is the set of \code{items},
49 | for a term \code{t} in the DAG, \code{T} is the set of items annotated to \code{t} (by automatically merging from its offspring terms),
50 | the aim is to test whether \code{S} is over-represented in \code{T}.
51 |
52 | The universal set \code{all} correspond to the full set of items annotated to the DAG.
53 |
54 | \if{html}{\out{
}}\preformatted{+----------+------+----------+-----+
55 | | | in S | not in S | all |
56 | +----------+------+----------+-----+
57 | | in T | x11 | x12 | x10 |
58 | | not in T | x21 | x22 | x20 |
59 | +----------+------+----------+-----+
60 | | all | x01 | x02 | x |
61 | +----------+------+----------+-----+
62 | }\if{html}{\out{
}}
63 |
64 | \code{dag_enrich_on_genes()} is the same as \code{dag_enrich_on_items()} which only changes the argument \code{item} to \code{gene}.
65 | }
66 | \examples{
67 | \dontrun{
68 | dag = create_ontology_DAG_from_GO_db(org_db = "org.Hs.eg.db")
69 | items = random_items(dag, 1000)
70 | df = dag_enrich_on_items(dag, items)
71 | }
72 | 1
73 | }
74 |
--------------------------------------------------------------------------------
/R/partition.R:
--------------------------------------------------------------------------------
1 |
2 | #' Partition the DAG
3 | #'
4 | #' @param dag An `ontology_DAG` object.
5 | #' @param level Depth in the DAG to cut. The DAG is cut below terms (or cut the links to their child terms) with `depth == level`.
6 | #' @param from A list of terms to cut. If it is set, `level` is ignored.
7 | #' @param term_pos Internally used.
8 | #'
9 | #' @details
10 | #' Let's call the terms below the `from` term as "top terms" because they will be on top of the sub-DAGs after the partitioning.
11 | #' It is possible that a term in the middle of the DAG can be traced back to more than one top terms.
12 | #' To partition all terms exclusively, a term partitioned to the sub-DAG from the top term with the largest distance to the term.
13 | #' If a term has the same largest distances to several top terms, a random top term is selected.
14 | #'
15 | #' In `partition_by_size()`, the DAG is first reduced to a tree where a child term only has one parent.
16 | #' The partition is done recursively by cutting into its child-trees.
17 | #' The splitting stops when all the child-trees have size less than `size`.
18 | #'
19 | #' `NA` is assigned to the `from` terms, their ancestor terms, and terms having infinite directed distance to `from` terms.
20 | #'
21 | #' @export
22 | #' @returns A character vector of top terms in each partition.
23 | #' @examples
24 | #' \donttest{
25 | #' dag = create_ontology_DAG_from_GO_db()
26 | #' pa = partition_by_level(dag)
27 | #' table(pa)
28 | #' pa = partition_by_size(dag, size = 1000)
29 | #' table(pa)
30 | #' }
31 | #' 1
32 | partition_by_level = function(dag, level = 1, from = NULL, term_pos = NULL) {
33 |
34 | if(is.null(from)) {
35 | depth = dag_depth(dag)
36 | max_depth = max(depth)
37 | if(level < 0 && level >= max_depth) {
38 | stop("wrong value of `level`.")
39 | }
40 | from = which(depth == level)
41 | } else {
42 | from = term_to_node_id(dag, from, strict = FALSE)
43 | }
44 |
45 | if(is.null(term_pos)) {
46 | if(dag_is_tree(dag)) {
47 | tree = dag
48 | } else {
49 | tree = dag_treelize(dag)
50 | }
51 | term_pos = cpp_node_pos_in_tree(tree, n_offspring(dag), 1, 360) ## in polar coordinate
52 | }
53 |
54 | from = from[order(term_pos[from, "h"])]
55 | range = data.frame(left = term_pos[from, "x"] - term_pos[from, "width"]/2,
56 | right = term_pos[from, "x"] + term_pos[from, "width"]/2)
57 |
58 | partition = rep(NA_character_, dag@n_terms)
59 | all_offspring = setdiff(seq_len(dag@n_terms), dag_ancestors(dag, from, in_labels = FALSE))
60 | l_offspring = rep(FALSE, dag@n_terms)
61 | l_offspring[all_offspring] = TRUE
62 | for(i in seq_along(from)) {
63 | l = term_pos$x >= range$left[i] & term_pos$x <= range$right[i] & l_offspring
64 | partition[l] = dag@terms[ from[i] ]
65 | }
66 |
67 | partition
68 | }
69 |
70 | #' @param size Number of terms in a cluster. The splitting stops on a term if all its child-trees are smaller than `size`.
71 | #' @rdname partition_by_level
72 | #' @importFrom stats dendrapply
73 | #' @export
74 | partition_by_size = function(dag, size = round(dag_n_terms(dag)/5)) {
75 |
76 | tree = dag_treelize(dag)
77 |
78 | pa = cpp_partition_by_size(tree, as.integer(size))
79 | pa[pa < 0] = NA
80 |
81 | tree@terms[pa]
82 | }
83 |
84 |
85 |
--------------------------------------------------------------------------------
/man/ontology_DAG-class.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \docType{class}
4 | \name{ontology_DAG-class}
5 | \alias{ontology_DAG-class}
6 | \alias{ontology_DAG}
7 | \title{The ontology_DAG class}
8 | \value{
9 | An \code{ontology_DAG} object.
10 | }
11 | \description{
12 | This class defines the DAG structure of an ontology.
13 | }
14 | \section{Slots}{
15 |
16 | \describe{
17 | \item{\code{terms}}{A character vector of length \code{n} of all term names. Other slots that store term-level information use the integer indices of terms.}
18 |
19 | \item{\code{n_terms}}{An integer scalar of the total number of terms in the DAG.}
20 |
21 | \item{\code{n_relations}}{An integer scalar of the total number of relations in the DAG.}
22 |
23 | \item{\code{lt_parents}}{A list of length \code{n}. Each element in the list is an integer index vector of the parent terms of the i^th term.}
24 |
25 | \item{\code{lt_children}}{A list of length \code{n}. Each element in the list is an integer index vector of the child terms of the i^th term.}
26 |
27 | \item{\code{lt_children_relations}}{A list of length \code{n}. Each element is a vector of the semantic relations between the i^th term and its child terms, e.g. a child "is_a" parent.
28 | The relations are represented as integers. The character name of the relations is in \code{attr(dag@lt_children_relations, "levels")}.}
29 |
30 | \item{\code{relations_DAG}}{A simple \code{ontology_DAG} object but constructed for relation types.}
31 |
32 | \item{\code{source}}{The source of the ontology. A character scalar only used as a mark of the returned object.}
33 |
34 | \item{\code{root}}{An integer scalar of the root term.}
35 |
36 | \item{\code{leaves}}{An integer vector of the indicies of leaf terms.}
37 |
38 | \item{\code{alternative_terms}}{A named character vector of mappings between alternative terms to DAG terms.}
39 |
40 | \item{\code{tpl_sorted}}{An integer vector of reordered term indices which has been topologically sorted in the DAG. Terms are sorted first by the depth (maximal
41 | distance from root), then the number of child terms, then the number of parent terms, and last the term names.}
42 |
43 | \item{\code{tpl_pos}}{The position of the original term in the topologically sorted path (similar as the rank), e.g. the value of the first element in the vector
44 | is the position of term 1 in the topologically sorted path.}
45 |
46 | \item{\code{annotation}}{A list of two elements: \code{list} and \code{names}. The \code{dag@annotation$list} element contains a list of length \code{n} and each element
47 | is a vector of integer indices of annotated items. The full list of annotated items is in \code{dag@annotation$names}.}
48 |
49 | \item{\code{term_env}}{An environment which contains various term-level statistics. It is mainly for cache purpose.}
50 |
51 | \item{\code{aspect_ratio}}{A numeric vector of length two. The aspect ratio is calculated as \code{w/h}. For each term, there is a distance to root,
52 | \code{h} is the maximal distance of all terms, \code{w} is the maximal number of items with the same distance. The two values in the \code{aspect_ratio} slot
53 | use maximal distance to root (the height) and the shortest distance to root as the distance measure.}
54 |
55 | \item{\code{elementMetadata}}{An additional data frame with the same number of rows as the number of terms in DAG. Order of rows should be the same as order of terms in \code{dag@terms}.}
56 | }}
57 |
58 | \examples{
59 | 1
60 | # This function should not be used directly.
61 | }
62 |
--------------------------------------------------------------------------------
/inst/scripts/download.R:
--------------------------------------------------------------------------------
1 |
2 | setwd("~/workspace/ontology")
3 |
4 |
5 | # http://obofoundry.org/
6 |
7 |
8 | dir.create("OBOFoundry", showWarnings = FALSE)
9 | setwd("OBOFoundry")
10 |
11 | library(jsonlite)
12 |
13 | lt = fromJSON("http://obofoundry.org/registry/ontologies.jsonld")
14 | tb = lt$ontologies
15 | saveRDS(tb, file = "OBOFoundry_meta_table.rds")
16 |
17 | library(rvest)
18 | options(timeout = 9999999)
19 |
20 | for(i in seq_len(nrow(tb))) {
21 | qqcat("============ @{tb[i, 'id']} [@{i}/@{nrow(tb)}] ============\n")
22 |
23 | html = read_html(qq("http://obofoundry.org/ontology/@{tb[i, 'id']}.html"))
24 | nodes = html %>% html_elements(xpath = "//div[contains (text(), 'Products')]/following-sibling::table/*/tr/td[1]/a[@href]")
25 | file = nodes %>% html_text()
26 | url = nodes %>% html_attr("href")
27 |
28 | dir.create(tb$id[i], showWarnings = FALSE)
29 |
30 | for(j in seq_along(file)) {
31 | dest = qq("@{tb$id[i]}/@{basename(file[j])}")
32 |
33 | oe = try(header <- curlGetHeaders(url[j]))
34 | if(!inherits(oe, "try-error")) {
35 | ln2 = header[grepl("Content-Length", header, ignore.case = TRUE)]
36 |
37 | filesize = max(as.numeric(gsub('^Content-Length: (\\d+)\\s*$', "\\1", ln2, ignore.case = TRUE)))
38 |
39 | if(file.exists(dest)) {
40 | if(file.info(dest)[1, "size"] == filesize) {
41 | qqcat("already downloaded, skip.\n")
42 | next
43 | }
44 | }
45 | }
46 | oe = try(download.file(url[j], dest = dest))
47 | if(inherits(oe, "try-error")) {
48 | file.remove(dest)
49 | }
50 | }
51 | }
52 |
53 |
54 |
55 | ##########################
56 |
57 | setwd("~/workspace/ontology")
58 |
59 | dir.create("BioPortal", showWarnings = FALSE)
60 | setwd("BioPortal")
61 |
62 | options(timeout = 9999999)
63 |
64 | ## https://bioportal.bioontology.org/
65 | apikey =
66 | js = fromJSON(qq("https://data.bioontology.org/ontologies?apikey=@{apikey}"))
67 |
68 | saveRDS(js, file = "BioPortal_meta_table.rds")
69 |
70 |
71 | for(i in seq_len(nrow(js))) {
72 | acronym = js$acronym[i]
73 | submissions = js$links$submissions[i]
74 |
75 | qqcat("============ @{acronym} [@{i}/@{nrow(js)}] ============\n")
76 |
77 | oe = try(sub <- fromJSON(qq("@{submissions}?apikey=@{apikey}")))
78 |
79 | if(inherits(oe, "try-error")) {
80 | next
81 | }
82 |
83 | if(length(sub) == 0) {
84 | next
85 | }
86 |
87 | hasOntologyLanguage = sub$hasOntologyLanguage[1]
88 | submission_id = sub[1, "@id"]
89 |
90 | url = qq("@{submission_id}/download?apikey=@{apikey}")
91 | header = curlGetHeaders(url)
92 | ln = header[grepl("Content-Disposition: attachment; filename", header, ignore.case = TRUE)]
93 | ln2 = header[grepl("Content-Length", header, ignore.case = TRUE)]
94 |
95 | if(length(ln)) {
96 | dest = gsub('^Content-Disposition: attachment; filename="(.*)".*$', "\\1", ln, ignore.case = TRUE)
97 | filesize = as.numeric(gsub('^Content-Length: (\\d+)\\s*$', "\\1", ln2, ignore.case = TRUE))
98 | } else {
99 | next
100 | }
101 |
102 | dest = qq("@{acronym}/@{dest}")
103 |
104 | dir.create(acronym, showWarnings = FALSE)
105 |
106 | if(file.exists(dest)) {
107 | qqcat("already downloaded, skip.\n")
108 | if(file.info(dest)[1, "size"] == filesize) {
109 | next
110 | }
111 | }
112 |
113 | qqcat(" hasOntologyLanguage: @{hasOntologyLanguage}\n")
114 | qqcat(" download: @{submission_id}/download\n")
115 | qqcat(" local: @{dest}\n")
116 | cat("\n")
117 |
118 | download.file(qq("@{submission_id}/download?apikey=@{apikey}"), dest = dest)
119 | }
120 |
121 |
--------------------------------------------------------------------------------
/tests/testthat/tests_dist.R:
--------------------------------------------------------------------------------
1 |
2 | library(testthat)
3 |
4 |
5 | ## export all functions
6 | if(!identical(topenv(), .GlobalEnv)) {
7 | pkg_env = asNamespace("simona")
8 | all_objs = ls(envir = pkg_env)
9 | for(obj in all_objs) {
10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE))
11 | }
12 | }
13 |
14 | #### test a small dag
15 |
16 | # b--d--f
17 | # / \
18 | # a---c--e
19 | # upstream -> downstream
20 |
21 | parents = c("a", "a", "b", "b", "c", "d")
22 | children = c("b", "c", "c", "d", "e", "f")
23 |
24 | dag = create_ontology_DAG(parents, children)
25 |
26 | test_that("test tpl paths", {
27 | expect_equal(
28 | cpp_tpl_shortest_path_length(dag, 1, 3),
29 | 1
30 | )
31 | expect_equal(
32 | cpp_tpl_shortest_path_length(dag, 1, 5),
33 | 2
34 | )
35 | expect_equal(
36 | cpp_tpl_longest_path_length(dag, 1, 3),
37 | 2
38 | )
39 | expect_equal(
40 | cpp_tpl_longest_path_length(dag, 1, 5),
41 | 3
42 | )
43 | expect_equal(
44 | cpp_tpl_shortest_path_length(dag, 1, 4),
45 | cpp_tpl_longest_path_length(dag, 1, 4)
46 | )
47 |
48 | ## path
49 | expect_equal(
50 | cpp_tpl_shortest_path(dag, 1, 3),
51 | c(1, 3)
52 | )
53 | expect_equal(
54 | cpp_tpl_shortest_path_sum_value(dag, 1, 3, 1:6),
55 | 4
56 | )
57 | expect_equal(
58 | cpp_tpl_shortest_path(dag, 1, 5),
59 | c(1, 3, 5)
60 | )
61 | expect_equal(
62 | cpp_tpl_shortest_path_sum_value(dag, 1, 5, 1:6),
63 | 9
64 | )
65 | expect_equal(
66 | cpp_tpl_longest_path(dag, 1, 3),
67 | c(1, 2, 3)
68 | )
69 | expect_equal(
70 | cpp_tpl_longest_path_sum_value(dag, 1, 3, 1:6),
71 | 6
72 | )
73 | expect_equal(
74 | cpp_tpl_longest_path(dag, 1, 5),
75 | c(1, 2, 3, 5)
76 | )
77 | expect_equal(
78 | cpp_tpl_longest_path_sum_value(dag, 1, 5, 1:6),
79 | 11
80 | )
81 | expect_equal(
82 | cpp_tpl_shortest_path(dag, 1, 4),
83 | cpp_tpl_longest_path(dag, 1, 4)
84 | )
85 |
86 | ## test the other distance method
87 | m = cpp_longest_distances_directed(dag, 1:6)
88 | for(i in 1:6) {
89 | for(j in 1:6) {
90 | expect_equal(
91 | m[i, j],
92 | cpp_tpl_longest_path_length(dag, i, j)
93 | )
94 | }
95 | }
96 |
97 | m = cpp_shortest_distances_directed(dag, 1:6)
98 | for(i in 1:6) {
99 | for(j in 1:6) {
100 | expect_equal(
101 | m[i, j],
102 | cpp_tpl_shortest_path_length(dag, i, j)
103 | )
104 | }
105 | }
106 | })
107 |
108 | ### test on GO BP
109 |
110 | dag = create_ontology_DAG_from_GO_db()
111 | depth = dag_depth(dag)
112 |
113 | test_that("test two dist methods with GO BP", {
114 | for(i in 1:10) {
115 | go_id_1 = sample(dag@terms[depth > 5], 1)
116 | go_id_2 = sample(dag_ancestors(dag, go_id_1), 1)
117 | j = which(dag@terms == go_id_1)
118 | i = which(dag@terms == go_id_2)
119 |
120 | expect_equal(
121 | cpp_tpl_shortest_path_length(dag, i, j),
122 | cpp_shortest_distances_directed(dag, c(i, j))[1, 2]
123 | )
124 |
125 | expect_equal(
126 | cpp_tpl_longest_path_length(dag, i, j),
127 | cpp_longest_distances_directed(dag, c(i, j))[1, 2]
128 | )
129 | }
130 | })
131 |
132 |
133 |
134 | if(FALSE) {
135 |
136 | dag = create_ontology_DAG_from_GO_db()
137 | system.time(d1 <- shortest_distances_via_NCA(dag, dag@terms[1:1000])); rm(d1); gc();
138 | system.time(d2 <- longest_distances_via_LCA(dag, dag@terms[1:1000])); rm(d2); gc();
139 | system.time(d3 <- shortest_distances_directed(dag, dag@terms[1:1000])); rm(d3); gc();
140 | system.time(d4 <- longest_distances_directed(dag, dag@terms[1:1000])); rm(d4); gc();
141 |
142 | }
143 |
144 |
--------------------------------------------------------------------------------
/inst/scripts/parse_ttl.pl:
--------------------------------------------------------------------------------
1 | use strict;
2 |
3 | my $file = shift(@ARGV);
4 | my @relation_types = @ARGV;
5 | if($file =~/\.gz$/) {
6 | open FILE, "gzip -d -c $file |" or die "cannot open $file.";
7 | } else {
8 | open FILE, $file or die "cannot open $file.";
9 | }
10 |
11 | my $line;
12 | my $section = {};
13 | my $id;
14 | my $i_record = 0;
15 | while(my $line = ) {
16 | if($line =~/^\@prefix /) {
17 | next;
18 | }
19 |
20 | if($line =~/^\s*$/) {
21 | next;
22 | } else {
23 | if($line =~/owl:Class/ and $line !~/\/STY\//) {
24 | $line =~/<(.*?)>/;
25 | $id = $1;
26 | $section->{$id} = {};
27 | $i_record ++;
28 |
29 | # if($i_record % 10000 == 0) {
30 | # print "$i_record finished...\n";
31 | # }
32 |
33 | while($line = ) {
34 | if($line =~/skos:prefLabel/) {
35 | $line =~/"""(.*?)"""/;
36 | $section->{$id}->{prefLabel} = $1;
37 | $section->{$id}->{prefLabel} =~s/"/``/g;
38 | }
39 | if($line =~/skos:notation/) {
40 | $line =~/"""(.*?)"""/;
41 | $section->{$id}->{notation} = $1;
42 | }
43 | if($line =~/skos:definition/) {
44 | $line =~/"""(.*?)"""/;
45 | $section->{$id}->{definition} = $1;
46 | $section->{$id}->{definition} =~s/"/``/g;
47 | }
48 | if($line =~/rdfs:subClassOf/ or $line =~/\/is_?a/i) {
49 | if($line =~/<([^<]+?)> ;/) {
50 | if(!defined($section->{$id}->{parent})) {
51 | $section->{$id}->{parent} = {};
52 | $section->{$id}->{parent}->{$1} = 1;
53 | $section->{$id}->{relation_type} = {};
54 | $section->{$id}->{relation_type}->{$1} = "is_a";
55 |
56 | } else {
57 | $section->{$id}->{parent}->{$1} = 1;
58 | $section->{$id}->{relation_type}->{$1} = "is_a";
59 | }
60 | }
61 | }
62 | foreach my $type (@relation_types) {
63 | if($line =~/\/$type/i) {
64 | if($line =~/<([^<]+?)> ;/) {
65 | if(!defined($section->{$id}->{parent})) {
66 | $section->{$id}->{parent} = {};
67 | $section->{$id}->{parent}->{$1} = 1;
68 | $section->{$id}->{relation_type} = {};
69 | $section->{$id}->{relation_type}->{$1} = $type;
70 | } else {
71 | $section->{$id}->{parent}->{$1} = 1;
72 | $section->{$id}->{relation_type}->{$1} = $type;
73 | }
74 | }
75 | }
76 | }
77 |
78 | if($line =~/\.$/) {
79 | last;
80 | }
81 | }
82 | } else {
83 | while($line = ) {
84 | if($line =~/\.$/) {
85 | last;
86 | }
87 | }
88 | }
89 | }
90 | }
91 |
92 | if($i_record == 0) {
93 | die "cannot find any object of 'owl:Class'.";
94 | }
95 |
96 | print "\"id\",\"prefLabel\",\"notation\",\"definition\",\"parent\",\"relation_type\"\n";
97 |
98 | foreach $id (sort keys %$section) {
99 | print "\"$id\"";
100 | if(!defined($section->{$id}->{prefLabel})) {
101 | print ",\"\"";
102 | } else {
103 | print ",\"$section->{$id}->{prefLabel}\"";
104 | }
105 | if(!defined($section->{$id}->{notation})) {
106 | print ",\"\"";
107 | } else {
108 | print ",\"$section->{$id}->{notation}\"";
109 | }
110 | if(!defined($section->{$id}->{definition})) {
111 | print ",\"\"";
112 | } else {
113 | print ",\"$section->{$id}->{definition}\"";
114 | }
115 | if(!defined($section->{$id}->{parent})) {
116 | print ",\"\"";
117 | } else {
118 | print ",\"".join(",", keys %{$section->{$id}->{relation_type}})."\"";
119 | }
120 | if(!defined($section->{$id}->{relation_type})) {
121 | print ",\"\"";
122 | } else {
123 | print ",\"".join(",", values %{$section->{$id}->{relation_type}})."\"";
124 | }
125 | print "\n";
126 | }
127 |
128 |
--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
2 | (function($) {
3 | $(function() {
4 |
5 | $('.navbar-fixed-top').headroom();
6 |
7 | $('body').css('padding-top', $('.navbar').height() + 10);
8 | $(window).resize(function(){
9 | $('body').css('padding-top', $('.navbar').height() + 10);
10 | });
11 |
12 | $('[data-toggle="tooltip"]').tooltip();
13 |
14 | var cur_path = paths(location.pathname);
15 | var links = $("#navbar ul li a");
16 | var max_length = -1;
17 | var pos = -1;
18 | for (var i = 0; i < links.length; i++) {
19 | if (links[i].getAttribute("href") === "#")
20 | continue;
21 | // Ignore external links
22 | if (links[i].host !== location.host)
23 | continue;
24 |
25 | var nav_path = paths(links[i].pathname);
26 |
27 | var length = prefix_length(nav_path, cur_path);
28 | if (length > max_length) {
29 | max_length = length;
30 | pos = i;
31 | }
32 | }
33 |
34 | // Add class to parent
, and enclosing
if in dropdown
35 | if (pos >= 0) {
36 | var menu_anchor = $(links[pos]);
37 | menu_anchor.parent().addClass("active");
38 | menu_anchor.closest("li.dropdown").addClass("active");
39 | }
40 | });
41 |
42 | function paths(pathname) {
43 | var pieces = pathname.split("/");
44 | pieces.shift(); // always starts with /
45 |
46 | var end = pieces[pieces.length - 1];
47 | if (end === "index.html" || end === "")
48 | pieces.pop();
49 | return(pieces);
50 | }
51 |
52 | // Returns -1 if not found
53 | function prefix_length(needle, haystack) {
54 | if (needle.length > haystack.length)
55 | return(-1);
56 |
57 | // Special case for length-0 haystack, since for loop won't run
58 | if (haystack.length === 0) {
59 | return(needle.length === 0 ? 0 : -1);
60 | }
61 |
62 | for (var i = 0; i < haystack.length; i++) {
63 | if (needle[i] != haystack[i])
64 | return(i);
65 | }
66 |
67 | return(haystack.length);
68 | }
69 |
70 | /* Clipboard --------------------------*/
71 |
72 | function changeTooltipMessage(element, msg) {
73 | var tooltipOriginalTitle=element.getAttribute('data-original-title');
74 | element.setAttribute('data-original-title', msg);
75 | $(element).tooltip('show');
76 | element.setAttribute('data-original-title', tooltipOriginalTitle);
77 | }
78 |
79 | if(ClipboardJS.isSupported()) {
80 | $(document).ready(function() {
81 | var copyButton = "";
82 |
83 | $("div.sourceCode").addClass("hasCopyButton");
84 |
85 | // Insert copy buttons:
86 | $(copyButton).prependTo(".hasCopyButton");
87 |
88 | // Initialize tooltips:
89 | $('.btn-copy-ex').tooltip({container: 'body'});
90 |
91 | // Initialize clipboard:
92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
93 | text: function(trigger) {
94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, "");
95 | }
96 | });
97 |
98 | clipboardBtnCopies.on('success', function(e) {
99 | changeTooltipMessage(e.trigger, 'Copied!');
100 | e.clearSelection();
101 | });
102 |
103 | clipboardBtnCopies.on('error', function() {
104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 | });
106 | });
107 | }
108 | })(window.jQuery || window.$)
109 |
--------------------------------------------------------------------------------
/vignettes/main.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #fff;
3 | margin: 1em auto;
4 | max-width: 1000px;
5 | overflow: visible;
6 | padding-left: 2em;
7 | padding-right: 2em;
8 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
9 | font-size: 14px;
10 | line-height: 1.5;
11 | }
12 | #TOC {
13 | clear: both;
14 | margin: 0 0 10px 10px;
15 | padding: 4px;
16 | width: 600px;
17 | border: 1px solid #CCCCCC;
18 | border-radius: 5px;
19 | background-color: #f6f6f6;
20 | font-size: 13px;
21 | line-height: 1.3;
22 | }
23 | #TOC .toctitle {
24 | font-weight: bold;
25 | font-size: 15px;
26 | margin-left: 5px;
27 | }
28 | #TOC ul {
29 | padding-left: 40px;
30 | margin-left: -1.5em;
31 | margin-top: 5px;
32 | margin-bottom: 5px;
33 | }
34 | #TOC ul ul {
35 | margin-left: -2em;
36 | }
37 | #TOC li {
38 | line-height: 16px;
39 | }
40 | table {
41 | margin: 1em auto;
42 | border-width: 1px;
43 | border-color: #DDDDDD;
44 | border-style: outset;
45 | border-collapse: collapse;
46 | }
47 | table th {
48 | border-width: 2px;
49 | padding: 5px;
50 | border-style: inset;
51 | }
52 | table td {
53 | border-width: 1px;
54 | border-style: inset;
55 | line-height: 18px;
56 | padding: 5px 5px;
57 | }
58 | table, table th, table td {
59 | border-left-style: none;
60 | border-right-style: none;
61 | }
62 | table thead, table tr.even {
63 | background-color: #f7f7f7;
64 | }
65 | p {
66 | margin: 0.5em 0;
67 | }
68 | blockquote {
69 | background-color: #f6f6f6;
70 | padding: 0.25em 0.75em;
71 | }
72 | hr {
73 | border-style: solid;
74 | border: none;
75 | border-top: 1px solid #777;
76 | margin: 28px 0;
77 | }
78 | dl {
79 | margin-left: 0;
80 | }
81 | dl dd {
82 | margin-bottom: 13px;
83 | margin-left: 13px;
84 | }
85 | dl dt {
86 | font-weight: bold;
87 | }
88 | ul {
89 | margin-top: 0;
90 | }
91 | ul li {
92 | list-style: circle outside;
93 | }
94 | ul ul {
95 | margin-bottom: 0;
96 | }
97 | pre, code {
98 | background-color: #f7f7f7;
99 | border-radius: 3px;
100 | color: #333;
101 | white-space: pre-wrap;
102 | }
103 | pre {
104 | border-radius: 3px;
105 | margin: 5px 0px 10px 0px;
106 | padding: 10px;
107 | }
108 | pre:not([class]) {
109 | background-color: #f7f7f7;
110 | }
111 | code {
112 | font-family: Consolas, Monaco, 'Courier New', monospace;
113 | font-size: 95%;
114 | }
115 | p > code, li > code {
116 | padding: 2px 0px;
117 | }
118 | div.figure {
119 | text-align: center;
120 | }
121 | img {
122 | max-width: 100%;
123 | }
124 | h1 {
125 | margin-top: 0;
126 | font-size: 35px;
127 | line-height: 40px;
128 | }
129 | h2 {
130 | border-bottom: 4px solid #f7f7f7;
131 | padding-top: 10px;
132 | padding-bottom: 2px;
133 | font-size: 145%;
134 | }
135 | h3 {
136 | border-bottom: 2px solid #f7f7f7;
137 | padding-top: 10px;
138 | font-size: 120%;
139 | }
140 | h4 {
141 | border-bottom: 1px solid #f7f7f7;
142 | margin-left: 8px;
143 | font-size: 105%;
144 | }
145 | h5, h6 {
146 | border-bottom: 1px solid #ccc;
147 | font-size: 105%;
148 | }
149 | a {
150 | color: #0033dd;
151 | text-decoration: none;
152 | }
153 | a:hover {
154 | color: #6666ff; }
155 | a:visited {
156 | color: #800080; }
157 | a:visited:hover {
158 | color: #BB00BB; }
159 | a[href^="http:"] {
160 | text-decoration: underline; }
161 | a[href^="https:"] {
162 | text-decoration: underline; }
163 |
164 | code > span.kw { color: #555; font-weight: bold; }
165 | code > span.dt { color: #902000; }
166 | code > span.dv { color: #40a070; }
167 | code > span.bn { color: #d14; }
168 | code > span.fl { color: #d14; }
169 | code > span.ch { color: #d14; }
170 | code > span.st { color: #d14; }
171 | code > span.co { color: #888888; font-style: italic; }
172 | code > span.ot { color: #007020; }
173 | code > span.al { color: #ff0000; font-weight: bold; }
174 | code > span.fu { color: #900; font-weight: bold; }
175 | code > span.er { color: #a61717; background-color: #e3d2d2; }
176 |
177 |
178 | .caption {
179 | font-style: italic;
180 | color: grey;
181 | }
182 |
--------------------------------------------------------------------------------
/docs/articles/main.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #fff;
3 | margin: 1em auto;
4 | max-width: 1000px;
5 | overflow: visible;
6 | padding-left: 2em;
7 | padding-right: 2em;
8 | font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
9 | font-size: 14px;
10 | line-height: 1.5;
11 | }
12 | #TOC {
13 | clear: both;
14 | margin: 0 0 10px 10px;
15 | padding: 4px;
16 | width: 600px;
17 | border: 1px solid #CCCCCC;
18 | border-radius: 5px;
19 | background-color: #f6f6f6;
20 | font-size: 13px;
21 | line-height: 1.3;
22 | }
23 | #TOC .toctitle {
24 | font-weight: bold;
25 | font-size: 15px;
26 | margin-left: 5px;
27 | }
28 | #TOC ul {
29 | padding-left: 40px;
30 | margin-left: -1.5em;
31 | margin-top: 5px;
32 | margin-bottom: 5px;
33 | }
34 | #TOC ul ul {
35 | margin-left: -2em;
36 | }
37 | #TOC li {
38 | line-height: 16px;
39 | }
40 | table {
41 | margin: 1em auto;
42 | border-width: 1px;
43 | border-color: #DDDDDD;
44 | border-style: outset;
45 | border-collapse: collapse;
46 | }
47 | table th {
48 | border-width: 2px;
49 | padding: 5px;
50 | border-style: inset;
51 | }
52 | table td {
53 | border-width: 1px;
54 | border-style: inset;
55 | line-height: 18px;
56 | padding: 5px 5px;
57 | }
58 | table, table th, table td {
59 | border-left-style: none;
60 | border-right-style: none;
61 | }
62 | table thead, table tr.even {
63 | background-color: #f7f7f7;
64 | }
65 | p {
66 | margin: 0.5em 0;
67 | }
68 | blockquote {
69 | background-color: #f6f6f6;
70 | padding: 0.25em 0.75em;
71 | }
72 | hr {
73 | border-style: solid;
74 | border: none;
75 | border-top: 1px solid #777;
76 | margin: 28px 0;
77 | }
78 | dl {
79 | margin-left: 0;
80 | }
81 | dl dd {
82 | margin-bottom: 13px;
83 | margin-left: 13px;
84 | }
85 | dl dt {
86 | font-weight: bold;
87 | }
88 | ul {
89 | margin-top: 0;
90 | }
91 | ul li {
92 | list-style: circle outside;
93 | }
94 | ul ul {
95 | margin-bottom: 0;
96 | }
97 | pre, code {
98 | background-color: #f7f7f7;
99 | border-radius: 3px;
100 | color: #333;
101 | white-space: pre-wrap;
102 | }
103 | pre {
104 | border-radius: 3px;
105 | margin: 5px 0px 10px 0px;
106 | padding: 10px;
107 | }
108 | pre:not([class]) {
109 | background-color: #f7f7f7;
110 | }
111 | code {
112 | font-family: Consolas, Monaco, 'Courier New', monospace;
113 | font-size: 95%;
114 | }
115 | p > code, li > code {
116 | padding: 2px 0px;
117 | }
118 | div.figure {
119 | text-align: center;
120 | }
121 | img {
122 | max-width: 100%;
123 | }
124 | h1 {
125 | margin-top: 0;
126 | font-size: 35px;
127 | line-height: 40px;
128 | }
129 | h2 {
130 | border-bottom: 4px solid #f7f7f7;
131 | padding-top: 10px;
132 | padding-bottom: 2px;
133 | font-size: 145%;
134 | }
135 | h3 {
136 | border-bottom: 2px solid #f7f7f7;
137 | padding-top: 10px;
138 | font-size: 120%;
139 | }
140 | h4 {
141 | border-bottom: 1px solid #f7f7f7;
142 | margin-left: 8px;
143 | font-size: 105%;
144 | }
145 | h5, h6 {
146 | border-bottom: 1px solid #ccc;
147 | font-size: 105%;
148 | }
149 | a {
150 | color: #0033dd;
151 | text-decoration: none;
152 | }
153 | a:hover {
154 | color: #6666ff; }
155 | a:visited {
156 | color: #800080; }
157 | a:visited:hover {
158 | color: #BB00BB; }
159 | a[href^="http:"] {
160 | text-decoration: underline; }
161 | a[href^="https:"] {
162 | text-decoration: underline; }
163 |
164 | code > span.kw { color: #555; font-weight: bold; }
165 | code > span.dt { color: #902000; }
166 | code > span.dv { color: #40a070; }
167 | code > span.bn { color: #d14; }
168 | code > span.fl { color: #d14; }
169 | code > span.ch { color: #d14; }
170 | code > span.st { color: #d14; }
171 | code > span.co { color: #888888; font-style: italic; }
172 | code > span.ot { color: #007020; }
173 | code > span.al { color: #ff0000; font-weight: bold; }
174 | code > span.fu { color: #900; font-weight: bold; }
175 | code > span.er { color: #a61717; background-color: #e3d2d2; }
176 |
177 |
178 | .caption {
179 | font-style: italic;
180 | color: grey;
181 | }
182 |
--------------------------------------------------------------------------------
/man/import_obo.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/import.R
3 | \name{import_obo}
4 | \alias{import_obo}
5 | \alias{import_owl}
6 | \alias{import_ontology}
7 | \alias{import_ttl}
8 | \title{Import ontology file to an ontology_DAG object}
9 | \usage{
10 | import_obo(
11 | file,
12 | relation_type = character(0),
13 | inherit_relations = TRUE,
14 | verbose = simona_opt$verbose,
15 | ...
16 | )
17 |
18 | import_owl(
19 | file,
20 | relation_type = character(0),
21 | inherit_relations = TRUE,
22 | verbose = simona_opt$verbose,
23 | ...
24 | )
25 |
26 | import_ontology(
27 | file,
28 | robot_jar = simona_opt$robot_jar,
29 | JAVA_ARGS = "",
30 | verbose = simona_opt$verbose,
31 | ...
32 | )
33 |
34 | import_ttl(file, relation_type = "part_of", verbose = simona_opt$verbose, ...)
35 | }
36 | \arguments{
37 | \item{file}{Path of the ontology file or an URL.}
38 |
39 | \item{relation_type}{Semantic relation types to include. Note \code{is_a} relation is always included.}
40 |
41 | \item{inherit_relations}{Relations may also be structured as a DAG. It controls whether to merge with a relations's offspring relations.}
42 |
43 | \item{verbose}{Whether to print messages.}
44 |
45 | \item{...}{Pass to \code{\link[=create_ontology_DAG]{create_ontology_DAG()}}.}
46 |
47 | \item{robot_jar}{The path of the \code{robot.jar} file. It can be downloaded from https://github.com/ontodev/robot/releases.
48 | Internally, the file is converted to the obo format and parsed by \code{import_obo()}. The value of \code{robot_jar} can be
49 | set as a global option \code{simona_opt$robot_jar = ...}.}
50 |
51 | \item{JAVA_ARGS}{Options for \code{java}. For example you can set \code{-Xmx20G} if you want to increase the memory to 20G for java.}
52 | }
53 | \value{
54 | An \code{ontology_DAG} object.
55 | }
56 | \description{
57 | Import ontology file to an ontology_DAG object
58 | }
59 | \details{
60 | Public bio-ontologies can be obtained from \href{http://obofoundry.org/}{Ontology Foundry} or \href{https://bioportal.bioontology.org/}{BioPortal}.
61 |
62 | The \code{import_obo()} function parses the ontology file in \code{.obo} format. To parse other formats, external tool \code{robot.jar} is required.
63 |
64 | \code{import_owl()} only recognizes \verb{} and \verb{}. If the .owl file does not contain these tags,
65 | please use \code{import_ontology()} directly.
66 |
67 | \code{robot.jar} can automatically recognize the following formats:
68 | \itemize{
69 | \item \code{json}: OBO Graphs JSON
70 | \item \code{obo}: OBO Format
71 | \item \code{ofn}: OWL Functional
72 | \item \code{omn}: Manchester
73 | \item \code{owl}: RDF/XML
74 | \item \code{owx}: OWL/XML
75 | \item \code{ttl}: Turtle
76 | }
77 |
78 | The description of the ROBOT tool is at \url{http://robot.obolibrary.org/convert}.
79 |
80 | \code{import_ttl()} is a simple parser for the \code{.ttl} format files. It only recognizes
81 | terms that have the \code{owl:Class} object. The "is_a" relation is recognized by the predicate \code{rdfs:subClassOf}
82 | or an ontology-specific predicate that contains \verb{.*/isa}. Other relation types are defined with
83 | the predicate \code{owl:ObjectProperty}. The format is parsed by a Perl script \code{system.file("scripts", "parse_ttl.pl", package = "simona")}.
84 | }
85 | \examples{
86 | \donttest{
87 | # The plant ontology: http://obofoundry.org/ontology/po.html
88 | import_obo("https://raw.githubusercontent.com/Planteome/plant-ontology/master/po.obo")
89 | }
90 | \donttest{
91 | import_owl("http://purl.obolibrary.org/obo/po.owl")
92 | }
93 | \dontrun{
94 | # The plant ontology: http://obofoundry.org/ontology/po.html
95 | dag = import_ontology("http://purl.obolibrary.org/obo/po.owl", robot_jar = ...)
96 | }
97 | \donttest{
98 | # file is from https://bioportal.bioontology.org/ontologies/MSTDE
99 | import_ttl("https://jokergoo.github.io/simona/MSTDE.ttl")
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/man/create_ontology_DAG.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/constructor.R
3 | \name{create_ontology_DAG}
4 | \alias{create_ontology_DAG}
5 | \title{Create the ontology_DAG object}
6 | \usage{
7 | create_ontology_DAG(
8 | parents,
9 | children,
10 | relations = NULL,
11 | relations_DAG = NULL,
12 | source = "Ontology",
13 | annotation = NULL,
14 | remove_cyclic_paths = FALSE,
15 | remove_rings = FALSE,
16 | alternative_terms = list(),
17 | verbose = simona_opt$verbose
18 | )
19 | }
20 | \arguments{
21 | \item{parents}{A character vector of parent terms. You can also construct the \code{ontology_DAG} object by a list of parent-child links. See \strong{Examples}.}
22 |
23 | \item{children}{A character vector of child terms.}
24 |
25 | \item{relations}{A character vector of parent-child relations, e.g. "is_a", "part_of", or self-defined semantic relations.
26 | If it is set, it should have the same length as \code{parents} and \code{children}.}
27 |
28 | \item{relations_DAG}{If the relation types have hierarchical relations, it can also be constructed by \code{create_ontology_DAG()} first. See \strong{Examples}.
29 | When the DAG for relation types is provided, the ancestor/offspring relationship of relation types will be taken into consideration automatically.}
30 |
31 | \item{source}{Source of the ontology. It is only used as a label of the object.}
32 |
33 | \item{annotation}{A list of character vectors which contain items annotated to the terms. Names of the list should be the term names. In the DAG, items
34 | annotated to a term will also be annotated to its parents. Such merging
35 | is applied automatically in the package.}
36 |
37 | \item{remove_cyclic_paths}{Whether to remove cyclic paths If a cyclic path is represented as \verb{[a, b, ..., z, a]},
38 | the last link (i.e. \code{z->a}) is simply removed. If the value is set to \code{FALSE} and if there are cyclic paths, there
39 | will be an error that lists all cyclic paths.}
40 |
41 | \item{remove_rings}{There might be rings that are isolated to the main DAG where there are no roots on the rings, thus they cannot be attached to the main DAG. If the value
42 | of \code{remove_rings} is set to \code{TRUE}, such rings are removed.}
43 |
44 | \item{alternative_terms}{A named list or vector that contains mappings from alternative term IDs to terms used in the DAG. In an ontology, there
45 | might be old terms IDs marked as "replaced_by", "consider" or "alt_id" in ".obo" file. You can provide mappings from old term iDs to current term IDs with this argument.
46 | If it is a one-to-one mapping, the mapping
47 | can be a named vector where alternative term IDs are names and DAG term IDs are values. It it is a one-to-many mapping, the variable
48 | should be a named list where each member vector will first be matched to the DAG terms. If the mapping is still one-to-many, the first one is selected.}
49 |
50 | \item{verbose}{Whether to print messages.}
51 | }
52 | \value{
53 | An \code{ontology_DAG} object.
54 | }
55 | \description{
56 | Create the ontology_DAG object
57 | }
58 | \examples{
59 | parents = c("a", "a", "b", "b", "c", "d")
60 | children = c("b", "c", "c", "d", "e", "f")
61 | dag = create_ontology_DAG(parents, children)
62 |
63 | # with annotations
64 | annotation = list(
65 | "a" = c("t1", "t2", "t3"),
66 | "b" = c("t3", "t4"),
67 | "c" = "t5",
68 | "d" = "t7",
69 | "e" = c("t4", "t5", "t6", "t7"),
70 | "f" = "t8"
71 | )
72 | dag = create_ontology_DAG(parents, children, annotation = annotation)
73 |
74 | # with relations
75 | dag = create_ontology_DAG(parents, children,
76 | relations = c("is_a", "part_of", "is_a", "part_of", "is_a", "part_of"))
77 |
78 | # with relations_DAG
79 | relations_DAG = create_ontology_DAG(c("r2", "r2"), c("r3", "r4"))
80 | dag = create_ontology_DAG(parents, children,
81 | relations = c("r1", "r2", "r1", "r3", "r1", "r4"),
82 | relations_DAG = relations_DAG)
83 |
84 | # with a list of parent-child relations
85 | dag = create_ontology_DAG(c("a-b", "a-c", "b-c", "b-d", "c-e", "e-f"))
86 | }
87 |
--------------------------------------------------------------------------------
/R/dist.R:
--------------------------------------------------------------------------------
1 |
2 | #' Distance on the DAG
3 | #'
4 | #' @param dag An `ontology_DAG` object.
5 | #' @param terms A vector of term names.
6 | #' @param verbose Whether to print messages.
7 | #'
8 | #' @details
9 | #' Denote two terms as `a` and `b`, a common ancestor as `c`, and the distance function `d()` calculates the longest
10 | #' distance or the shortest distance depending on the function.
11 | #'
12 | #' - `shortest_distances_via_NCA()`: It calculates the smallest `d(c, a) + d(c, b)` where `d()` calculates the shortest distance between two terms. In this case,
13 | #' `c` is the NCA (nearest common ancestor) of `a` and `b`.
14 | #' - `longest_distances_via_LCA()`: It calculates the largest `d(c, a) + d(c, b)` where `d()` calculates the longest distance between two terms *via the LCA (lowest common ancestor) term*. In this case,
15 | #' `c` is the LCA of `a` and `b`.
16 | #' - `shortest_distances_directed()`: It calculates `d(a, b)` where `d()` calculates the shortest distance between two terms. The distance is only calculated when `a` is an ancestor of `b`, otherwise the distance value is -1.
17 | #' - `longest_distances_directed()`: It calculates `d(a, b)` where `d()` calculates the longest distance between two terms. The distance is only calculated when `a` is an ancestor of `b`, otherwise the distance value is -1.
18 | #' @rdname distance
19 | #' @export
20 | #' @returns A numeric distance matrix.
21 | #' @examples
22 | #' parents = c("a", "a", "b", "b", "c", "d")
23 | #' children = c("b", "c", "c", "d", "e", "f")
24 | #' dag = create_ontology_DAG(parents, children)
25 | #' shortest_distances_via_NCA(dag, letters[1:6])
26 | #' longest_distances_via_LCA(dag, letters[1:6])
27 | #' shortest_distances_directed(dag, letters[1:6])
28 | #' longest_distances_directed(dag, letters[1:6])
29 | shortest_distances_via_NCA = function(dag, terms, verbose = simona_opt$verbose) {
30 | if(is.character(terms)) {
31 | id = term_to_node_id(dag, terms, strict = FALSE)
32 | } else {
33 | id = terms
34 | }
35 | if(any(duplicated(id))) {
36 | stop("`term` should not be duplicated.")
37 | }
38 | d = exec_under_message_condition({
39 | cpp_shortest_distances_via_NCA(dag, id)
40 | }, verbose = verbose)
41 |
42 | dimnames(d) = list(dag@terms[id], dag@terms[id])
43 | d
44 | }
45 |
46 | #' @rdname distance
47 | #' @export
48 | longest_distances_via_LCA = function(dag, terms, verbose = simona_opt$verbose) {
49 | if(is.character(terms)) {
50 | id = term_to_node_id(dag, terms, strict = FALSE)
51 | } else {
52 | id = terms
53 | }
54 | if(any(duplicated(id))) {
55 | stop("`term` should not be duplicated.")
56 | }
57 | d = exec_under_message_condition({
58 | cpp_max_ancestor_path_sum_value(dag, id, dag_depth(dag), rep(1, dag@n_terms)) - 1
59 | }, verbose = verbose)
60 |
61 | dimnames(d) = list(dag@terms[id], dag@terms[id])
62 | d
63 | }
64 |
65 | #' @rdname distance
66 | #' @export
67 | shortest_distances_directed = function(dag, terms, verbose = simona_opt$verbose) {
68 | if(is.character(terms)) {
69 | id = term_to_node_id(dag, terms, strict = FALSE)
70 | } else {
71 | id = terms
72 | }
73 | if(any(duplicated(id))) {
74 | stop("`term` should not be duplicated.")
75 | }
76 | d = exec_under_message_condition({
77 | cpp_shortest_distances_directed(dag, id)
78 | }, verbose = verbose)
79 |
80 | dimnames(d) = list(dag@terms[id], dag@terms[id])
81 | d
82 | }
83 |
84 | #' @rdname distance
85 | #' @export
86 | longest_distances_directed = function(dag, terms, verbose = simona_opt$verbose) {
87 | if(is.character(terms)) {
88 | id = term_to_node_id(dag, terms, strict = FALSE)
89 | } else {
90 | id = terms
91 | }
92 | if(any(duplicated(id))) {
93 | stop("`term` should not be duplicated.")
94 | }
95 | d = exec_under_message_condition({
96 | cpp_longest_distances_directed(dag, id)
97 | }, verbose = verbose)
98 |
99 | dimnames(d) = list(dag@terms[id], dag@terms[id])
100 | d
101 | }
102 |
103 |
104 | longest_distances_from_LCA = function(dag, id, verbose = simona_opt$verbose) {
105 | exec_under_message_condition({
106 | cpp_longest_distances_from_LCA(dag, id)
107 | }, verbose = verbose)
108 | }
109 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | S3method(print,ontology_tree)
4 | export(CA_terms)
5 | export(LCA_depth)
6 | export(LCA_term)
7 | export(MICA_IC)
8 | export(MICA_term)
9 | export(NCA_term)
10 | export(add_annotation)
11 | export(all_group_sim_methods)
12 | export(all_term_IC_methods)
13 | export(all_term_sim_methods)
14 | export(alternative_GO_terms)
15 | export(annotated_terms)
16 | export(avg_children)
17 | export(avg_parents)
18 | export(create_ontology_DAG)
19 | export(create_ontology_DAG_from_GO_db)
20 | export(create_ontology_DAG_from_igraph)
21 | export(dag_add_random_children)
22 | export(dag_all_terms)
23 | export(dag_ancestors)
24 | export(dag_as_DOT)
25 | export(dag_as_dendrogram)
26 | export(dag_as_igraph)
27 | export(dag_children)
28 | export(dag_circular_viz)
29 | export(dag_depth)
30 | export(dag_distinct_ancestors)
31 | export(dag_enrich_on_genes)
32 | export(dag_enrich_on_items)
33 | export(dag_enrich_on_offsprings)
34 | export(dag_filter)
35 | export(dag_graphviz)
36 | export(dag_has_terms)
37 | export(dag_height)
38 | export(dag_is_leaf)
39 | export(dag_leaves)
40 | export(dag_longest_dist_from_ancestors)
41 | export(dag_longest_dist_to_offspring)
42 | export(dag_n_leaves)
43 | export(dag_n_relations)
44 | export(dag_n_terms)
45 | export(dag_offspring)
46 | export(dag_parents)
47 | export(dag_permutate_children)
48 | export(dag_random)
49 | export(dag_random_tree)
50 | export(dag_reorder)
51 | export(dag_root)
52 | export(dag_shiny)
53 | export(dag_shortest_dist_from_ancestors)
54 | export(dag_shortest_dist_from_root)
55 | export(dag_shortest_dist_to_leaves)
56 | export(dag_shortest_dist_to_offspring)
57 | export(dag_siblings)
58 | export(dag_treelize)
59 | export(group_sim)
60 | export(import_obo)
61 | export(import_ontology)
62 | export(import_owl)
63 | export(import_ttl)
64 | export(longest_distances_directed)
65 | export(longest_distances_via_LCA)
66 | export(max_ancestor_id)
67 | export(max_ancestor_path_sum)
68 | export(max_ancestor_v)
69 | export(method_param)
70 | export(n_ancestors)
71 | export(n_annotations)
72 | export(n_children)
73 | export(n_connected_leaves)
74 | export(n_offspring)
75 | export(n_parents)
76 | export(ontology_DAG)
77 | export(ontology_chebi)
78 | export(ontology_go)
79 | export(ontology_hp)
80 | export(ontology_kw)
81 | export(ontology_pw)
82 | export(ontology_rdo)
83 | export(ontology_reactome)
84 | export(ontology_vt)
85 | export(partition_by_level)
86 | export(partition_by_size)
87 | export(random_items)
88 | export(random_terms)
89 | export(shortest_distances_directed)
90 | export(shortest_distances_via_NCA)
91 | export(simona_opt)
92 | export(term_IC)
93 | export(term_annotations)
94 | export(term_sim)
95 | exportClasses(ontology_DAG)
96 | exportMethods("[")
97 | exportMethods("[[")
98 | exportMethods('mcols<-')
99 | exportMethods(mcols)
100 | exportMethods(show)
101 | import(ComplexHeatmap)
102 | import(GlobalOptions)
103 | import(Rcpp)
104 | import(grid)
105 | import(igraph)
106 | import(shiny)
107 | import(fastmatch)
108 | importFrom(GetoptLong,qq)
109 | importFrom(Polychrome,alphabet.colors)
110 | importFrom(circlize,rand_color)
111 | importFrom(grDevices,col2rgb)
112 | importFrom(grDevices,dev.off)
113 | importFrom(grDevices,dev.size)
114 | importFrom(grDevices,png)
115 | importFrom(grDevices,rgb)
116 | importFrom(graphics,barplot)
117 | importFrom(graphics,par)
118 | importFrom(matrixStats,colMaxs)
119 | importFrom(matrixStats,rowMaxs)
120 | importFrom(methods,as)
121 | importFrom(methods,new)
122 | importFrom(stats,dendrapply)
123 | importFrom(stats,p.adjust)
124 | importFrom(stats,phyper)
125 | importFrom(stats,quantile)
126 | importFrom(stats,runif)
127 | importFrom(stats,sd)
128 | importFrom(utils,data)
129 | importFrom(utils,download.file)
130 | importFrom(utils,getFromNamespace)
131 | importFrom(utils,packageDescription)
132 | importFrom(utils,read.csv)
133 | importFrom(utils,read.table)
134 | importFrom(xml2,read_xml)
135 | importFrom(xml2,xml_attr)
136 | importFrom(xml2,xml_find_all)
137 | importFrom(xml2,xml_ns)
138 | importFrom(xml2,xml_text)
139 | importMethodsFrom(S4Vectors,'mcols<-')
140 | importMethodsFrom(S4Vectors,mcols)
141 | useDynLib(simona, .registration = TRUE)
142 |
--------------------------------------------------------------------------------
/tests/testthat/tests_reorder.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 |
3 |
4 |
5 |
6 | parents = c("a", "a", "b", "b", "c", "d")
7 | children = c("b", "c", "c", "d", "e", "f")
8 |
9 | dag = create_ontology_DAG(parents, children)
10 |
11 | test_that("test dag_reorder", {
12 | dag2 = dag_reorder(dag, value = c(1, 1, 10, 1, 10, 1))
13 | expect_equal(dag2@lt_children[[2]], rev(dag@lt_children[[2]]))
14 |
15 | dag3 = dag_reorder(dag, value = c(10, 1))
16 | expect_equal(dag3@lt_children[[2]], rev(dag@lt_children[[2]]))
17 | })
18 |
19 |
20 | dag = create_ontology_DAG(c("a-b", "a-c", "a-d", "a-e", "a-f", "b-g", "b-h", "b-i", "b-j"))
21 | dag2 = dag_permutate_children(dag)
22 | dag_children(dag2, "a")
23 | dag_children(dag2, "b")
24 |
25 | dag = create_ontology_DAG(c("a-h", "a-b", "a-c", "a-d", "b-e", "b-f", "c-g", "h-g", "d-e"))
26 | tree = dag_treelize(dag)
27 | lt = cpp_get_force_counterpart(dag@lt_children, dag@lt_parents, tree@lt_children, tree@lt_parents, dag@root)
28 |
29 | test_that("test cpp_get_force_counterpart", {
30 | expect_equal(lt[[1]], integer(0))
31 | expect_equal(lt[[2]], 4)
32 | expect_equal(lt[[3]], 8)
33 | expect_equal(lt[[4]], 5)
34 | expect_equal(lt[[5]], 4)
35 | expect_equal(lt[[6]], integer(0))
36 | expect_equal(lt[[7]], 8)
37 | expect_equal(lt[[8]], 7)
38 | })
39 |
40 | test_that("test move_index", {
41 | x = c(2, 1, 5, 4, 3)
42 | od = order(-abs(x))
43 | expect_equal(x[move_index(x, od-1, 1) + 1], c(5, 2, 1, 4, 3))
44 | expect_equal(x[move_index(x, od-1, 2) + 1], c(5, 4, 2, 1, 3))
45 | expect_equal(x[move_index(x, od-1, 3) + 1], c(5, 4, 3, 2, 1))
46 | expect_equal(x[move_index(x, od-1, 4) + 1], c(5, 4, 3, 2, 1))
47 | expect_equal(x[move_index(x, od-1, 5) + 1], c(5, 4, 3, 2, 1))
48 |
49 | x = c(-2, -1, -5, -4, -3)
50 | od = order(-abs(x))
51 | expect_equal(x[move_index(x, od-1, 1) + 1], c(-2, -1, -4, -3, -5))
52 | expect_equal(x[move_index(x, od-1, 2) + 1], c(-2, -1, -3, -4, -5))
53 | expect_equal(x[move_index(x, od-1, 3) + 1], c(-2, -1, -3, -4, -5))
54 | expect_equal(x[move_index(x, od-1, 4) + 1], c(-1, -2, -3, -4, -5))
55 | expect_equal(x[move_index(x, od-1, 5) + 1], c(-1, -2, -3, -4, -5))
56 |
57 | x = c(-2, 1, 5, -4, 3)
58 | od = order(-abs(x))
59 | expect_equal(x[move_index(x, od-1, 1) + 1], c(5, -2, 1, -4, 3))
60 | expect_equal(x[move_index(x, od-1, 2) + 1], c(5, -2, 1, 3, -4))
61 | expect_equal(x[move_index(x, od-1, 3) + 1], c(5, 3, -2, 1, -4))
62 | expect_equal(x[move_index(x, od-1, 4) + 1], c(5, 3, 1, -2, -4))
63 | expect_equal(x[move_index(x, od-1, 5) + 1], c(5, 3, 1, -2, -4))
64 |
65 | x = c(3, 1, 5, -4, -2)
66 | od = order(-abs(x))
67 | expect_equal(x[move_index(x, od-1, 1, FALSE) + 1], c(3, 1, -4, -2, 5))
68 | expect_equal(x[move_index(x, od-1, 2, FALSE) + 1], c(-4, 3, 1, -2, 5))
69 | expect_equal(x[move_index(x, od-1, 3, FALSE) + 1], c(-4, 1, -2, 3, 5))
70 | expect_equal(x[move_index(x, od-1, 4, FALSE) + 1], c(-4, -2, 1, 3, 5))
71 | expect_equal(x[move_index(x, od-1, 5, FALSE) + 1], c(-4, -2, 1, 3, 5))
72 | })
73 |
74 |
75 | test_that("test calc_x_offset", {
76 | prev_od = 1:5
77 | new_od = 1:5
78 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5),
79 | c(0, 0, 0, 0, 0))
80 |
81 | prev_od = 1:5
82 | new_od = c(2, 1, 3, 4, 5)
83 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5),
84 | c(2, -1, 0, 0, 0))
85 |
86 | prev_od = 1:5
87 | new_od = 5:1
88 | expect_equal(calc_x_offset(1:5, prev_od - 1, new_od - 1, 1:5),
89 | c(14, 11, 6, -1, -10))
90 | })
91 |
92 |
93 | # pos = cpp_node_pos_in_tree(tree, n_connected_leaves(tree))
94 | # lt_counterpart = cpp_get_force_counterpart(dag@lt_children, dag@lt_parents, tree@lt_children, tree@lt_parents, dag@root)
95 |
96 |
97 | # force = cpp_get_force(lt_counterpart, pos$x, dag_depth(tree))
98 | # test_that("test cpp_get_force", {
99 | # expect_equal(sign(force), c(0, 1, -1, -1, 1, 0, -1, 1))
100 | # })
101 |
102 |
103 | # n_cp = sapply(lt_counterpart, length)
104 | # x = pos$x
105 | # test_that("test reorder_children", {
106 | # expect_equal(reorder_children(tree@lt_children[[1]], n_cp, force, pos$width, dag_depth(tree), x, tree@lt_children),
107 | # c(3, 4, 2, 8))
108 | # expect_equal(order(x[c(3, 4, 2, 8)]), 1:4)
109 | # })
110 |
111 |
112 | # pos = cpp_node_pos_in_tree(tree, n_connected_leaves(tree))
113 | # cpp_reorder_tree_x(tree, lt_counterpart, pos$x, pos$width)
114 |
115 |
116 |
--------------------------------------------------------------------------------
/src/term.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace Rcpp;
3 |
4 | #include "traverse.h"
5 | #include "utils.h"
6 |
7 |
8 | // [[Rcpp::export]]
9 | NumericVector cpp_ic_meng(S4 dag, bool correct) {
10 | List lt_children = dag.slot("lt_children");
11 | IntegerVector depth = _dag_depth(dag);
12 | int n_terms = dag.slot("n_terms");
13 |
14 | int max_depth = max(depth);
15 | int n = lt_children.size();
16 |
17 | NumericVector ic(n);
18 | for(int i = 0; i < n; i ++) {
19 | if(depth[i] == 0 || (!correct && depth[i] == 1)) {
20 | ic[i] = 0;
21 | } else {
22 | LogicalVector l_offspring(n);
23 | _find_offspring(lt_children, i, l_offspring);
24 |
25 | double x = 0.0;
26 | for(int j = 0; j < n; j ++) {
27 | if(l_offspring[j]) {
28 | x = x + 1.0/depth[j];
29 | }
30 | }
31 |
32 | if(correct) {
33 | ic[i] = log(depth[i]+1)/log(max_depth+1)*(1 - log(x + 1)/log(n_terms));
34 | } else {
35 | ic[i] = log(depth[i])/log(max_depth)*(1 - log(x + 1)/log(n_terms));
36 | }
37 | }
38 | }
39 |
40 | return ic;
41 | }
42 |
43 |
44 | // it calculates S_a(t)
45 | double _calc_wang_s(List lt_children, List lt_children_relations, NumericVector contribution,
46 | int i_node, int i_end, LogicalVector l_background, bool correct = false, double c = 0.66667) {
47 |
48 | if(i_node == i_end) {
49 | return 1;
50 | } else {
51 | IntegerVector children = lt_children[i_node];
52 | IntegerVector relations = lt_children_relations[i_node];
53 | LogicalVector l_children_included(children.size(), false);
54 |
55 | for(int i = 0; i < children.size(); i ++) {
56 | if(l_background[ children[i] - 1 ]) {
57 | l_children_included[i] = true;
58 | }
59 | }
60 |
61 | NumericVector s(sum(l_children_included), 0);
62 | int nc = sum(l_children_included);
63 | int si = 0;
64 | for(int i = 0; i < children.size(); i ++) {
65 | if(l_children_included[i]) {
66 | if(correct) {
67 | s[si] = _calc_wang_s(lt_children, lt_children_relations, contribution,
68 | children[i] - 1, i_end, l_background, correct, c) * (1/(c+nc) + contribution[relations[i] - 1]);
69 | } else {
70 | s[si] = _calc_wang_s(lt_children, lt_children_relations, contribution,
71 | children[i] - 1, i_end, l_background, correct, c) * contribution[relations[i] - 1];
72 | }
73 | si ++;
74 | }
75 |
76 | }
77 | return max(s);
78 | }
79 | }
80 |
81 | // [[Rcpp::export]]
82 | NumericVector cpp_ic_wang(S4 dag, NumericVector contribution) {
83 |
84 | List lt_parents = dag.slot("lt_parents");
85 | List lt_children = dag.slot("lt_children");
86 | List lt_children_relations = dag.slot("lt_children_relations");
87 |
88 | int n = lt_parents.size();
89 | NumericVector ic(n);
90 |
91 | for(int i = 0; i < n; i ++) {
92 |
93 | if(i % 1000 == 0) {
94 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false);
95 | message("going through " + std::to_string(i) + " / " + std::to_string(n) + " nodes ...", false);
96 | }
97 |
98 | LogicalVector l_ancestors(n);
99 | _find_ancestors(lt_parents, i, l_ancestors, true);
100 |
101 | for(int j = 0; j < n; j ++) {
102 | if(l_ancestors[j]) {
103 | ic[i] += _calc_wang_s(lt_children, lt_children_relations, contribution, j, i, l_ancestors);
104 | }
105 | }
106 |
107 | reset_logical_vector_to_false(l_ancestors);
108 | }
109 |
110 | message("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b", false);
111 | message("going through " + std::to_string(n) + " / " + std::to_string(n) + " nodes ... Done.", true);
112 |
113 | return ic;
114 | }
115 |
116 |
117 |
118 |
119 | // a leaf's most informative leaf is itself
120 | // [[Rcpp::export]]
121 | IntegerVector cpp_max_leaves_id(S4 dag, IntegerVector nodes, NumericVector v) {
122 |
123 | List lt_children = dag.slot("lt_children");
124 | int n = lt_children.size();
125 |
126 | int m = nodes.size();
127 | IntegerVector cl(m);
128 | LogicalVector is_leaf(n);
129 | for(int i = 0; i < m; i ++) {
130 | cl[i] = nodes[i];
131 | _find_connected_leaves(lt_children, nodes[i]-1, is_leaf);
132 |
133 | double max_v = 0;
134 | for(int j = 0; j < n; j ++) {
135 | if(is_leaf[j]) {
136 | if(v[j] > max_v) {
137 | max_v = v[j];
138 | cl[i] = j+1;
139 | }
140 | }
141 | }
142 |
143 | reset_logical_vector_to_false(is_leaf);
144 | }
145 |
146 | return cl;
147 | }
148 |
149 |
--------------------------------------------------------------------------------
/tests/testthat/tests_term.R:
--------------------------------------------------------------------------------
1 |
2 | library(testthat)
3 |
4 |
5 | ## export all functions
6 | if(!identical(topenv(), .GlobalEnv)) {
7 | pkg_env = asNamespace("simona")
8 | all_objs = ls(envir = pkg_env)
9 | for(obj in all_objs) {
10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE))
11 | }
12 | }
13 |
14 | #### test a small dag
15 |
16 | # b--d--f
17 | # / \
18 | # a---c--e
19 | # upstream -> downstream
20 |
21 | parents = c("a", "a", "b", "b", "c", "d")
22 | children = c("b", "c", "c", "d", "e", "f")
23 |
24 | dag = create_ontology_DAG(parents, children)
25 |
26 |
27 | test_that("test IC_universal", {
28 | expect_equal(
29 | IC_universal(dag, use_cache = FALSE),
30 | -log(c(1, 1/2, 1/8, 1/4, 1/8, 1/4))
31 | )
32 | })
33 |
34 | test_that("test reachability", {
35 | expect_equal(
36 | reachability(dag, use_cache = FALSE),
37 | c(3, 2, 1, 1, 1, 1)
38 | )
39 | })
40 |
41 | test_that("test totipotency", {
42 | expect_equal(
43 | totipotency(dag, use_cache = FALSE),
44 | c(1, 5/6, 1/3, 1/3, 1/6, 1/6)
45 | )
46 | })
47 |
48 | test_that("test IC_Meng_2012", {
49 | expect_equal(
50 | IC_Meng_2012(dag, correct = FALSE, FALSE),
51 | c(0, 0, log(2)/log(3)*(1-log(4/3)/log(6)), log(2)/log(3)*(1-log(4/3)/log(6)), 1, 1)
52 | )
53 | expect_equal(
54 | IC_Meng_2012(dag, correct = TRUE, FALSE),
55 | c(0, log(1+1)/log(3+1)*(1-log(8/3)/log(6)), log(2+1)/log(3+1)*(1-log(4/3)/log(6)), log(2+1)/log(3+1)*(1-log(4/3)/log(6)), 1, 1)
56 | )
57 | })
58 | IC_Zhou_2008(dag, use_cache = FALSE)
59 | IC_Seco_2004(dag, use_cache = FALSE)
60 | IC_Zhang_2006(dag, use_cache = FALSE)
61 | # IC_Seddiqui_2010(dag, use_cache = FALSE)
62 | IC_Sanchez_2011(dag, use_cache = FALSE)
63 |
64 | test_that("test IC_Wang_2007", {
65 | expect_error(
66 | IC_Wang_2007(dag, use_cache = FALSE),
67 | "not set"
68 | )
69 | })
70 |
71 |
72 |
73 | parents = c("a", "a", "b", "b", "c", "d")
74 | children = c("b", "c", "c", "d", "e", "f")
75 |
76 | annotation = list(
77 | "a" = 1:3,
78 | "b" = 3:4,
79 | "c" = 5,
80 | "d" = 7,
81 | "e" = 4:7,
82 | "f" = 8
83 | )
84 |
85 | annotation2 = list(
86 | "a" = c(1, 2, 3, 4, 5, 7, 6, 8),
87 | "b" = c(3, 4, 5, 7, 6, 8),
88 | "c" = c(5, 4, 6, 7),
89 | "d" = 7:8,
90 | "e" = 4:7,
91 | "f" = 8
92 | )
93 |
94 | dag1 = create_ontology_DAG(parents, children, annotation = annotation)
95 | dag2 = create_ontology_DAG(parents, children, annotation = annotation2)
96 |
97 | remove_attr = function(x) {
98 | attributes(x) = NULL
99 | x
100 | }
101 |
102 | test_that("test IC_annotation", {
103 | expect_equal(
104 | n_annotations(dag1, use_cache = FALSE),
105 | n_annotations(dag2, use_cache = FALSE)
106 | )
107 | expect_equal(
108 | remove_attr(IC_annotation(dag1, use_cache = FALSE)),
109 | -c(log(8/8), log(6/8), log(4/8), log(2/8), log(4/8), log(1/8))
110 | )
111 | })
112 |
113 | #####################
114 | # b--d--f
115 | # / \
116 | # a---c--e
117 | # upstream -> downstream
118 |
119 | parents = c("a", "a", "b", "b", "c", "d")
120 | children = c("b", "c", "c", "d", "e", "f")
121 |
122 |
123 | dag = create_ontology_DAG(parents, children, relations = c("isa", "part of", "isa", "part of", "isa", "part of"),
124 | annotation = annotation)
125 | test_that("test IC_Wang_2007", {
126 | expect_equal(
127 | IC_Wang_2007(dag, c("is_a" = 0.7, "part of" = 0.6), use_cache = FALSE),
128 | c(1, 1.7, 2.3, 2.02, 2.61, 2.212)
129 | )
130 | })
131 |
132 | library(igraph)
133 | g = dag_as_igraph(dag)
134 | E(g)$weight = c("is_a" = 0.7, "part_of" = 0.6)[E(g)$relation]
135 | d = distances(g, mode = "out", weights = -log(E(g)$weight))
136 | s = exp(-d)
137 | test_that("test IC_Wang_2007 and shortest path weighted by 1/w", {
138 | expect_equal(
139 | IC_Wang_2007(dag, c("is_a" = 0.7, "part of" = 0.6), use_cache = FALSE),
140 | unname(colSums(s))
141 | )
142 | })
143 |
144 | ### test annotation
145 | dag = create_ontology_DAG_from_GO_db("BP", org_db = "org.Hs.eg.db")
146 | n = n_annotations(dag)
147 | test_that("test n_annotations", {
148 | for(i in 1:10) {
149 | x = sample(dag@terms, 1)
150 | an = dag_ancestors(dag, x)
151 | expect_true(
152 | all(n[an] >= n[x])
153 | )
154 | }
155 | })
156 |
157 | if(FALSE) {
158 |
159 | dag = create_ontology_DAG_from_GO_db("BP", org_db = "org.Hs.eg.db")
160 | lt = lapply(all_ic_methods(), function(method) {
161 | cat("=====", method, "=====\n")
162 | term_IC(dag, method)
163 | })
164 | names(lt) = all_ic_methods()
165 |
166 | df = as.data.frame(lt)
167 | pairs(df, pch = ".", col = dag_depth(dag))
168 |
169 | }
170 |
--------------------------------------------------------------------------------
/tests/testthat/tests_common_ancestor.R:
--------------------------------------------------------------------------------
1 |
2 | library(testthat)
3 |
4 |
5 | ## export all functions
6 | if(!identical(topenv(), .GlobalEnv)) {
7 | pkg_env = asNamespace("simona")
8 | all_objs = ls(envir = pkg_env)
9 | for(obj in all_objs) {
10 | assign(obj, get(obj, envir = pkg_env, inherits = FALSE))
11 | }
12 | }
13 |
14 | #### test a small dag
15 |
16 | # b--d--f
17 | # / \
18 | # a---c--e
19 | # upstream -> downstream
20 |
21 | parents = c("a", "a", "b", "b", "c", "d")
22 | children = c("b", "c", "c", "d", "e", "f")
23 |
24 | dag = create_ontology_DAG(parents, children)
25 |
26 | test_that("test cpp_max_ancestor_v", {
27 | m = max_ancestor_v(dag, 1:6, dag_depth(dag))
28 | expect_equal(
29 | m[upper.tri(m, diag = TRUE)],
30 | c(0, 0, 1, 0, 1, 2, 0, 1, 1, 2, 0, 1, 2, 1, 3, 0, 1, 1, 2, 1, 3)
31 | )
32 |
33 | m = max_ancestor_v(dag, c(2, 4, 5, 3), dag_depth(dag))
34 | expect_equal(
35 | m[upper.tri(m, diag = TRUE)],
36 | c(1, 1, 2, 1, 1, 3, 1, 1, 2, 2)
37 | )
38 | })
39 |
40 | test_that("test cpp_max_ancestor_id", {
41 | m = max_ancestor_id(dag, 1:6, dag_depth(dag))
42 | expect_equal(
43 | m[upper.tri(m, diag = TRUE)],
44 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6)
45 | )
46 |
47 | m = max_ancestor_id(dag, c(2, 4, 5, 3), dag_depth(dag))
48 | expect_equal(
49 | m[upper.tri(m, diag = TRUE)],
50 | c(2, 2,4, 2, 2, 5, 2, 2, 3, 3)
51 | )
52 |
53 | m = max_ancestor_id(dag, 1:6, rep(0, 6))
54 | expect_equal(
55 | m[upper.tri(m, diag = TRUE)],
56 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6)
57 | )
58 |
59 | })
60 |
61 | test_that("test cpp_distances", {
62 | m = shortest_distances_via_NCA(dag, 1:6)
63 | expect_equal(
64 | m[upper.tri(m, diag = TRUE)],
65 | c(0, 1, 0, 1, 1, 0, 2, 1, 2, 0, 2, 2, 1, 3, 0, 3, 2, 3, 1, 4, 0)
66 | )
67 |
68 | m2 = shortest_distances_via_NCA(dag, c(2, 4, 5, 3))
69 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)]
70 | expect_equal(m2, m3)
71 |
72 | m = longest_distances_via_LCA(dag, 1:6)
73 | expect_equal(
74 | m[upper.tri(m, diag = TRUE)],
75 | c(0, 1, 0, 2, 1, 0, 2, 1, 2, 0, 3, 2, 1, 3, 0, 3, 2, 3, 1, 4, 0)
76 | )
77 |
78 | m2 = longest_distances_via_LCA(dag, c(2, 4, 5, 3))
79 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)]
80 | expect_equal(m2, m3)
81 |
82 | lt = cpp_longest_distances_from_LCA(dag, 1:6)
83 | m = longest_distances_via_LCA(dag, 1:6)
84 | dimnames(m) = NULL
85 |
86 | expect_equal(m, lt$left + lt$right)
87 | })
88 |
89 | test_that("test distance_directed", {
90 | m = longest_distances_directed(dag, 1:6)
91 | expect_equal(
92 | m[upper.tri(m, diag = TRUE)],
93 | c(0, 1, 0, 2, 1, 0, 2, 1, -1, 0, 3, 2, 1, -1, 0, 3, 2, -1, 1, -1, 0)
94 | )
95 |
96 | m2 = longest_distances_directed(dag, c(2, 4, 5, 3))
97 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)]
98 | expect_equal(m2, m3)
99 |
100 | expect_equal(
101 | m[lower.tri(m, diag = FALSE)],
102 | rep(-1, 15)
103 | )
104 |
105 | m = shortest_distances_directed(dag, 1:6)
106 | expect_equal(
107 | m[upper.tri(m, diag = TRUE)],
108 | c(0, 1, 0, 1, 1, 0, 2, 1, -1, 0, 2, 2, 1, -1, 0, 3, 2, -1, 1, -1, 0)
109 | )
110 |
111 | m2 = shortest_distances_directed(dag, c(2, 4, 5, 3))
112 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)]
113 | expect_equal(m2, m3)
114 |
115 | expect_equal(
116 | m[lower.tri(m, diag = FALSE)],
117 | rep(-1, 15)
118 | )
119 | })
120 |
121 |
122 | test_that("test cpp_nearest_common_ancestor", {
123 | m = cpp_nearest_common_ancestor(dag, 1:6)
124 | expect_equal(
125 | m[upper.tri(m, diag = TRUE)],
126 | c(1, 1, 2, 1, 2, 3, 1, 2, 2, 4, 1, 2, 3, 2, 5, 1, 2, 2, 4, 2, 6)
127 | )
128 |
129 | m2 = cpp_nearest_common_ancestor(dag, c(2, 4, 5, 3))
130 | m3 = m[c(2, 4, 5, 3), c(2, 4, 5, 3)]
131 | expect_equal(m2, m3)
132 | })
133 |
134 |
135 | test_that("compare cpp_nearest_common_ancestor and cpp_max_ancestor_v", {
136 | parents = c("a", "b", "c", "c", "d", "f", "b", "b")
137 | children = c("b", "c", "d", "f", "e", "g", "e", "g")
138 | dag = create_ontology_DAG(parents, children)
139 | depth = dag_depth(dag)
140 |
141 | m1 = max_ancestor_id(dag, 1:7, depth)
142 | m2 = cpp_nearest_common_ancestor(dag, 1:7)
143 |
144 | expect_equal(m1[5, 7], 3)
145 | expect_equal(m2[5, 7], 2)
146 |
147 | expect_equal(m1[4, 6], 3)
148 | expect_equal(m2[4, 6], 3)
149 | })
150 |
151 |
152 |
153 | if(FALSE) {
154 |
155 | dag = create_ontology_DAG_from_GO_db()
156 |
157 | system.time(d <- LCA_term(dag, dag@terms)); rm(d); gc();
158 | system.time(d <- LCA_depth(dag, dag@terms)); rm(d); gc();
159 | system.time(d <- NCA_term(dag, dag@terms)); rm(d); gc();
160 | system.time(d <- MICA_term(dag, dag@terms, "IC_universal")); rm(d); gc();
161 | system.time(d <- MICA_IC(dag, dag@terms, "IC_universal")); rm(d); gc();
162 |
163 | }
164 |
165 |
--------------------------------------------------------------------------------
/man/common_ancestor.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/common_ancestor.R
3 | \name{MICA_term}
4 | \alias{MICA_term}
5 | \alias{MICA_IC}
6 | \alias{LCA_term}
7 | \alias{LCA_depth}
8 | \alias{NCA_term}
9 | \alias{max_ancestor_v}
10 | \alias{max_ancestor_id}
11 | \alias{max_ancestor_path_sum}
12 | \alias{CA_terms}
13 | \title{Various types of common ancestors}
14 | \usage{
15 | MICA_term(
16 | dag,
17 | terms,
18 | IC_method,
19 | in_labels = TRUE,
20 | distance = "longest",
21 | verbose = simona_opt$verbose
22 | )
23 |
24 | MICA_IC(dag, terms, IC_method, verbose = simona_opt$verbose)
25 |
26 | LCA_term(
27 | dag,
28 | terms,
29 | in_labels = TRUE,
30 | distance = "longest",
31 | verbose = simona_opt$verbose
32 | )
33 |
34 | LCA_depth(dag, terms, verbose = simona_opt$verbose)
35 |
36 | NCA_term(dag, terms, in_labels = TRUE, verbose = simona_opt$verbose)
37 |
38 | max_ancestor_v(dag, terms, value, verbose = simona_opt$verbose)
39 |
40 | max_ancestor_id(
41 | dag,
42 | terms,
43 | value,
44 | in_labels = FALSE,
45 | distance = "longest",
46 | verbose = simona_opt$verbose
47 | )
48 |
49 | max_ancestor_path_sum(
50 | dag,
51 | terms,
52 | value,
53 | add_v,
54 | distance = "longest",
55 | verbose = simona_opt$verbose
56 | )
57 |
58 | CA_terms(dag, term1, term2, in_labels = TRUE)
59 | }
60 | \arguments{
61 | \item{dag}{An \code{ontology_DAG} object.}
62 |
63 | \item{terms}{A vector of term names.}
64 |
65 | \item{IC_method}{An IC method. Valid values are in \code{\link[=all_term_IC_methods]{all_term_IC_methods()}}.}
66 |
67 | \item{in_labels}{Whether the terms are represented in their names or as integer indices?}
68 |
69 | \item{distance}{If there are multiple LCA or MICA of two terms, whether to take the one with
70 | the longest distance of shortest distance to the two terms. Possible values are "longest" and "shortest".}
71 |
72 | \item{verbose}{Whether to print messages.}
73 |
74 | \item{value}{A numeric vector. The elements should corrrespond to terms in \code{dag_all_terms()} (should have the same length as the number of terms in the DAG).}
75 |
76 | \item{add_v}{Values to be added along the path to the MICA or LCA. The same format as \code{value}.}
77 |
78 | \item{term1}{A single term ID.}
79 |
80 | \item{term2}{A single term ID.}
81 | }
82 | \value{
83 | \itemize{
84 | \item \code{MICA_term()} returns an integer or a character matrix of the MICA terms depending on the value of \code{in_labels}.
85 | \item \code{MICA_IC()} returns a numeric matrix of the IC of the MICA terms.
86 | \item \code{LCA_term()} returns an integer or a character matrix of the LCA term depending on the value of \code{in_labels}.
87 | \item \code{LCA_depth()} returns an integer matrix of the depth of the LCA terms.
88 | \item \code{NCA_term()} returns an integer or a character matrix of the NCA term depending on the value of \code{in_labels}. The shortest distance from NCA terms can be calculated by \code{\link[=shortest_distances_via_NCA]{shortest_distances_via_NCA()}}.
89 | \item \code{max_ancestor_v()} returns a numeric matrix.
90 | \item \code{max_ancestor_id()} returns an integer or a character matrix.
91 | \item \code{CA_terms()} returns a vector of term IDs.
92 | }
93 | }
94 | \description{
95 | Various types of common ancestors
96 | }
97 | \details{
98 | There are the following three types of common ancestors:
99 | \itemize{
100 | \item MICA (most informative common ancestor): The common ancestor with the highest IC value.
101 | \item LCA (lowest common ancestor): The common ancestor with the largest depth (The depth of a term is the maximal distance from the root term). If there are multiple ancestors having
102 | the same max depth, the ancestor with the smallest distance to the two terms is used.
103 | \item NCA (nearest common ancestor): The common ancestor with the smallest distance to the two terms. If there are multiple
104 | ancestors with the same smallest distance, the ancestor with the largest depth is used.
105 | }
106 |
107 | \code{max_ancestor_v()} and \code{max_ancestor_id()} are more general functions which return common ancestors with
108 | the highest value in \code{value}.
109 |
110 | Given a path connecting two terms and their MICA/LCA, \code{max_ancestor_path_sum()} calculates the sum of terms along the path. The values
111 | to be added in specified in \code{add_v} argument.
112 | }
113 | \examples{
114 | parents = c("a", "a", "b", "b", "c", "d")
115 | children = c("b", "c", "c", "d", "e", "f")
116 | dag = create_ontology_DAG(parents, children)
117 | MICA_term(dag, letters[1:6], "IC_universal")
118 | MICA_IC(dag, letters[1:6], "IC_universal")
119 | LCA_term(dag, letters[1:6])
120 | LCA_depth(dag, letters[1:6])
121 | NCA_term(dag, letters[1:6])
122 | CA_terms(dag, "c", "d")
123 | }
124 |
--------------------------------------------------------------------------------
/src/traverse.h:
--------------------------------------------------------------------------------
1 | #ifndef __TRAVERSE__
2 | #define __TRAVERSE__
3 |
4 | extern const int SET_UNION;
5 | extern const int SET_INTERSECT;
6 | extern const int SET_UNIQU_IN_1;
7 | extern const int SET_UNIQU_IN_2;
8 |
9 | void _add_parents(List lt_parents, int i_node, LogicalVector& l_ancestors);
10 | void _add_parents_within_background(List lt_parents, int i_node, LogicalVector& l_ancestors, LogicalVector l_background);
11 | void _find_ancestors(List lt_parents, int i_node, LogicalVector& l_ancestors, bool include_self = false);
12 | void _find_ancestors_with_background(List lt_parents, int i_node, LogicalVector& l_ancestors, LogicalVector l_background, bool include_self = false);
13 | IntegerVector cpp_ancestors(S4 dag, int node, bool include_self = false);
14 | IntegerVector cpp_ancestors_within_background(S4 dag, int node, IntegerVector background, bool include_self = false);
15 | void _add_children(List lt_children, int i_node, LogicalVector& l_offspring);
16 | void _add_children_within_background(List lt_children, int i_node, LogicalVector& l_offspring, LogicalVector l_background);
17 | void _find_offspring(List lt_children, int i_node, LogicalVector& l_offspring, bool include_self = false);
18 | void _find_offspring_within_background(List lt_children, int i_node, LogicalVector& l_offspring, LogicalVector l_background, bool include_self = false);
19 | IntegerVector cpp_offspring(S4 dag, int node, bool include_self = false);
20 | IntegerVector cpp_offspring_within_background(S4 dag, int node, IntegerVector background, bool include_self = false);
21 | void _add_leaves(List lt_children, int i_node, LogicalVector& l_offspring);
22 | void _find_connected_leaves(List lt_children, int i_node, LogicalVector& l_offspring);
23 |
24 | IntegerVector cpp_n_ancestors(S4 dag, bool include_self = false);
25 | IntegerVector cpp_n_ancestors_on_tree(S4 dag, bool include_self = false);
26 | IntegerVector cpp_n_offspring(S4 dag, bool include_self = false);
27 | IntegerVector cpp_n_offspring_on_tree(S4 dag, bool include_self = false);
28 | IntegerVector cpp_n_leaves(S4 dag);
29 | IntegerVector cpp_ancestors_of_a_group(S4 dag, IntegerVector nodes, int type = 1, bool include_self = false);
30 | IntegerVector cpp_ancestors_of_a_group_within_background(S4 dag, IntegerVector nodes, IntegerVector background, int type = 1, bool include_self = false);
31 | IntegerVector cpp_ancestors_of_two_groups(S4 dag, IntegerVector nodes1, IntegerVector nodes2, int type, bool include_self = false);
32 | IntegerVector cpp_offspring_of_a_group(S4 dag, IntegerVector nodes, bool include_self = false);
33 | LogicalMatrix cpp_is_reachable(S4 dag, IntegerVector nodes, bool directed = false);
34 |
35 | IntegerVector cpp_dag_depth(S4 dag);
36 | IntegerVector cpp_dag_longest_dist_to_offspring(S4 dag, IntegerVector from_node, LogicalVector l_background = LogicalVector(0));
37 | IntegerVector cpp_dag_longest_dist_to_offspring(S4 dag, int from_node, LogicalVector l_background = LogicalVector(0));
38 | IntegerVector cpp_dag_shortest_dist_to_offspring(S4 dag, IntegerVector from_node, LogicalVector l_background = LogicalVector(0));
39 | IntegerVector cpp_dag_shortest_dist_to_offspring(S4 dag, int from_node, LogicalVector l_background = LogicalVector(0));
40 | IntegerVector cpp_dag_height(S4 dag);
41 | IntegerVector cpp_dag_longest_dist_from_ancestors(S4 dag, IntegerVector to_node, LogicalVector l_background = LogicalVector(0));
42 | IntegerVector cpp_dag_longest_dist_from_ancestors(S4 dag, int to_node, LogicalVector l_background = LogicalVector(0));
43 | IntegerVector cpp_dag_shortest_dist_from_ancestors(S4 dag, IntegerVector to_node, LogicalVector l_background = LogicalVector(0));
44 | IntegerVector cpp_dag_shortest_dist_from_ancestors(S4 dag, int to_node, LogicalVector l_background = LogicalVector(0));
45 |
46 |
47 | NumericVector cpp_dag_longest_path_to_offspring_sum_value(S4 dag, IntegerVector from_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
48 | NumericVector cpp_dag_shortest_path_to_offspring_sum_value(S4 dag, IntegerVector from_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
49 | NumericVector cpp_dag_longest_path_from_ancestors_sum_value(S4 dag, IntegerVector to_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
50 | NumericVector cpp_dag_shortest_path_from_ancestors_sum_value(S4 dag, IntegerVector to_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
51 | NumericVector cpp_dag_longest_path_to_offspring_sum_value(S4 dag, int from_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
52 | NumericVector cpp_dag_shortest_path_to_offspring_sum_value(S4 dag, int from_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
53 | NumericVector cpp_dag_longest_path_from_ancestors_sum_value(S4 dag, int to_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
54 | NumericVector cpp_dag_shortest_path_from_ancestors_sum_value(S4 dag, int to_node, NumericVector value, LogicalVector l_background = LogicalVector(0));
55 |
56 | #endif
57 |
--------------------------------------------------------------------------------
/src/annotation.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | using namespace Rcpp;
3 |
4 | #include "traverse.h"
5 | #include "utils.h"
6 |
7 | // [[Rcpp::export]]
8 | IntegerVector cpp_n_annotations(S4 dag, bool unify = true) {
9 |
10 | List lt_children = dag.slot("lt_children");
11 | List annotation = dag.slot("annotation");
12 | List lt_annotation = annotation["list"];
13 | CharacterVector anno_names = annotation["names"];
14 | int n_all_anno = anno_names.size();
15 |
16 | int n = lt_children.size();
17 | IntegerVector n_anno(n, 0);
18 |
19 | IntegerVector anno_size(n);
20 | if(!unify) {
21 | for(int i = 0; i < n; i ++) {
22 | IntegerVector anno = lt_annotation[i];
23 | anno_size[i] = anno.size();
24 | }
25 | }
26 |
27 | LogicalVector l_offspring(n, false);
28 | for(int i = 0; i < n; i ++) {
29 | _find_offspring(lt_children, i, l_offspring, true); //include self
30 |
31 | if(unify) {
32 | LogicalVector l_anno(n_all_anno, false);
33 | for(int j = 0; j < n; j ++) {
34 | if(l_offspring[j]) {
35 | IntegerVector anno = lt_annotation[j];
36 | for(int k = 0; k < anno.size(); k ++) {
37 | l_anno[anno[k]-1] = true;
38 | }
39 | }
40 | }
41 | n_anno[i] = sum(l_anno);
42 | } else {
43 | IntegerVector anno_sub = anno_size[l_offspring];
44 | if(anno_sub.size()) {
45 | n_anno[i] = sum(anno_sub);
46 | } else {
47 | n_anno[i] = 0;
48 | }
49 | }
50 |
51 | reset_logical_vector_to_false(l_offspring);
52 | }
53 |
54 | return n_anno;
55 | }
56 |
57 |
58 | // [[Rcpp::export]]
59 | IntegerVector cpp_n_annotations_with_intersect(S4 dag, IntegerVector anno_id) {
60 |
61 | List lt_children = dag.slot("lt_children");
62 | List annotation = dag.slot("annotation");
63 | List lt_annotation = annotation["list"];
64 | CharacterVector anno_names = annotation["names"];
65 | int n_all_anno = anno_names.size();
66 |
67 | int n = lt_children.size();
68 | IntegerVector n_anno(n, 0);
69 |
70 | int m = anno_id.size();
71 |
72 | if(m == 0) {
73 | return n_anno;
74 | }
75 |
76 | LogicalVector l_offspring(n, false);
77 | for(int i = 0; i < n; i ++) {
78 | _find_offspring(lt_children, i, l_offspring, true); //include self
79 |
80 | LogicalVector l_anno(n_all_anno, false);
81 | for(int j = 0; j < n; j ++) {
82 | if(l_offspring[j]) {
83 | IntegerVector anno = lt_annotation[j];
84 | for(int k = 0; k < anno.size(); k ++) {
85 | l_anno[anno[k]-1] = true;
86 | }
87 | }
88 | }
89 |
90 | for(int k = 0; k < m; k ++) {
91 | if(l_anno[ anno_id[k]-1 ]) {
92 | n_anno[i] ++;
93 | }
94 | }
95 |
96 | reset_logical_vector_to_false(l_offspring);
97 | }
98 |
99 | return n_anno;
100 | }
101 |
102 |
103 | // [[Rcpp::export]]
104 | IntegerMatrix cpp_get_term_annotations(S4 dag, IntegerVector nodes) {
105 | List lt_children = dag.slot("lt_children");
106 | List annotation = dag.slot("annotation");
107 | List lt_annotation = annotation["list"];
108 | CharacterVector anno_names = annotation["names"];
109 | int n_all_anno = anno_names.size();
110 | int n = lt_children.size();
111 | int m = nodes.size();
112 |
113 | IntegerMatrix mat(m, n_all_anno);
114 |
115 | LogicalVector l_offspring(n, false);
116 | for(int i = 0; i < m; i ++) {
117 | _find_offspring(lt_children, nodes[i]-1, l_offspring, true); //include self
118 |
119 | // LogicalVector l_anno(n_all_anno, false);
120 | for(int j = 0; j < n; j ++) {
121 | if(l_offspring[j]) {
122 | IntegerVector anno = lt_annotation[j];
123 | for(int k = 0; k < anno.size(); k ++) {
124 | // l_anno[anno[k]-1] = true;
125 | mat(i, anno[k]-1) = 1;
126 | }
127 | }
128 | }
129 | reset_logical_vector_to_false(l_offspring);
130 | }
131 |
132 | return mat;
133 | }
134 |
135 |
136 | // given an item id, return the terms also the ancestors annotated with this item
137 | IntegerVector cpp_get_annotated_terms(S4 dag, int anno_id) {
138 | List lt_children = dag.slot("lt_children");
139 | List lt_parents = dag.slot("lt_parents");
140 |
141 | List annotation = dag.slot("annotation");
142 | List lt_annotation = annotation["list"];
143 |
144 | int n = lt_parents.size();
145 |
146 | IntegerVector x(n);
147 | LogicalVector l_ancestors(n, false);
148 | for(int i = 0; i < n; i ++) {
149 | IntegerVector anno = lt_annotation[i];
150 | if(anno.size() || x[i] == 0) {
151 | for(int ia = 0; ia < anno.size(); ia ++) {
152 | if(anno[ia] == anno_id) {
153 | _find_ancestors(lt_parents, i, l_ancestors, true);
154 | for(int j = 0; j < n; j ++) {
155 | if(l_ancestors[j]) {
156 | x[j] = 1;
157 | }
158 | }
159 | break;
160 | }
161 | }
162 | }
163 |
164 | reset_logical_vector_to_false(l_ancestors);
165 | }
166 |
167 | return x;
168 | }
169 |
170 | // [[Rcpp::export]]
171 | IntegerMatrix cpp_get_annotated_terms(S4 dag, IntegerVector anno_id) {
172 | int m = anno_id.size();
173 | int n = dag.slot("n_terms");
174 | IntegerMatrix x(m, n);
175 |
176 | for(int i = 0; i < m; i ++) {
177 | x(i, _) = cpp_get_annotated_terms(dag, anno_id[i]);
178 | }
179 |
180 | return x;
181 |
182 | }
183 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 |
2 | term_to_node_id = function(dag, term, strict = TRUE, add_name = FALSE) {
3 | if(is.numeric(term)) {
4 | id = term
5 | } else if(length(term) == 1) {
6 | i = which(dag@terms == term)
7 |
8 | if(length(i) == 0 && length(dag@alternative_terms)) {
9 | term2 = dag@alternative_terms[term]
10 | if(is.na(term2)) {
11 | stop("Cannot find term: ", term)
12 | } else {
13 | i = which(dag@terms == term2)
14 | }
15 | }
16 |
17 | if(length(i) == 0) {
18 | stop("Cannot find term: ", term)
19 | }
20 |
21 | id = i
22 | } else if(length(term) > 1) {
23 | unique_term = unique(term)
24 | l = dag@terms %in% unique_term
25 | if(sum(l) < length(unique_term) && length(dag@alternative_terms)) {
26 | unique_term2 = dag@alternative_terms[setdiff(unique_term, dag@terms[l])]
27 | unique_term2 = unique_term2[!is.na(unique_term2)]
28 | l2 = dag@terms %in% unique_term2
29 | l = l | l2
30 | }
31 | i = which(l)
32 |
33 | if(length(i) == 0) {
34 | stop("Cannot find all these terms.")
35 | }
36 |
37 | if(length(i) != length(unique_term)) {
38 | if(strict) {
39 | stop("Cannot find some of the terms in the DAG.")
40 | } else {
41 | message("removed ", length(unique_term) - length(i), " terms that cannot be found in the DAG.")
42 | }
43 | }
44 |
45 | id = unname(structure(i, names = dag@terms[i])[intersect(term, dag@terms[i])])
46 | }
47 | if(add_name) {
48 | structure(id, names = dag@terms[id])
49 | } else {
50 | id
51 | }
52 | }
53 |
54 |
55 | #' @importFrom GetoptLong qq
56 | check_pkg = function(pkg, bioc = FALSE, github = NULL) {
57 | if(requireNamespace(pkg, quietly = TRUE)) {
58 | return(NULL)
59 | } else {
60 |
61 | if(!interactive()) {
62 | if(bioc) {
63 | stop_wrap(qq("You need to manually install package '@{pkg}' from Bioconductor."))
64 | } else {
65 | stop_wrap(qq("You need to manually install package '@{pkg}' from CRAN."))
66 | }
67 | }
68 | }
69 | }
70 |
71 |
72 | stop_wrap = function (...) {
73 | x = paste0(...)
74 | x = paste(strwrap(x), collapse = "\n")
75 | stop(x, call. = FALSE)
76 | }
77 |
78 | warning_wrap = function (...) {
79 | x = paste0(...)
80 | x = paste(strwrap(x), collapse = "\n")
81 | warning(x, call. = FALSE)
82 | }
83 |
84 | message_wrap = function (...) {
85 | x = paste0(...)
86 | x = paste(strwrap(x), collapse = "\n")
87 | message(x)
88 | }
89 |
90 | #' @importFrom grDevices col2rgb rgb
91 | add_transparency = function (col, transparency = 0, alpha = TRUE) {
92 | if(alpha) {
93 | rgb(t(col2rgb(col)/255), alpha = 1 - transparency)
94 | } else {
95 |
96 | m = col2rgb(col)
97 | m = 255 - (255-m)*(1-transparency)
98 | rgb(t(m), maxColorValue = 255)
99 |
100 | }
101 | }
102 |
103 |
104 | lt_children_to_lt_parents = function(lt_children) {
105 | n = length(lt_children)
106 | parents = rep(seq_len(n), times = vapply(lt_children, length, FUN.VALUE = integer(1)))
107 | children = unlist(lt_children)
108 |
109 | lt = split(parents, children)
110 |
111 | lt_parents = rep(list(integer(0)))
112 | lt_parents[ as.integer(names(lt)) ] = lt
113 |
114 | lt_parents
115 | }
116 |
117 | lt_parents_to_lt_children = function(lt_parents) {
118 | n = length(lt_parents)
119 | children = rep(seq_len(n), times = vapply(lt_parents, length, FUN.VALUE = integer(1)))
120 | parents = unlist(lt_parents)
121 |
122 | lt = split(children, parents)
123 |
124 | lt_children = rep(list(integer(0)))
125 | lt_children[ as.integer(names(lt)) ] = lt
126 |
127 | lt_children
128 | }
129 |
130 | dag_is_tree = function(dag) {
131 | n_terms = dag@n_terms
132 | n_relations = sum(vapply(dag@lt_children, length, FUN.VALUE = integer(1)))
133 |
134 | n_terms == n_relations + 1
135 | }
136 |
137 |
138 | merge_offspring_relation_types = function(relations_DAG, relations) {
139 | if(length(relations) == 0) {
140 | return(relations)
141 | }
142 |
143 | r1 = relations_DAG@terms
144 | rc = intersect(r1, relations)
145 |
146 | if(length(rc)) {
147 | unique(c(setdiff(relations, rc), dag_offspring(relations_DAG, rc, include_self = TRUE)))
148 | } else {
149 | relations
150 | }
151 | }
152 |
153 |
154 | # all offspring types are assigned to the same value
155 | extend_contribution_factor = function(relations_DAG, contribution_factor) {
156 |
157 | cf = contribution_factor
158 |
159 | if(is.null(relations_DAG)) {
160 | return(cf)
161 | }
162 |
163 | for(nm in names(contribution_factor)) {
164 | if(nm %in% relations_DAG@terms) {
165 | offspring = dag_offspring(relations_DAG, nm)
166 | if(length(offspring)) {
167 | cf[offspring] = contribution_factor[nm]
168 | }
169 | }
170 | }
171 |
172 | cf
173 | }
174 |
175 |
176 | normalize_relation_type = function(x) {
177 |
178 | x = tolower(x)
179 | x = gsub("[- ~]", "_", x)
180 | x[x == "isa"] = "is_a"
181 | x[x == "part_a"] = "part_of"
182 |
183 | x
184 | }
185 |
186 | exec_under_message_condition = function(code, verbose = TRUE, envir = parent.frame()) {
187 | if(verbose) {
188 | eval(code, envir = envir)
189 | } else {
190 | suppressMessages(eval(code, envir = envir))
191 | }
192 | }
193 |
194 |
--------------------------------------------------------------------------------
/vignettes/v02_GO.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Gene Ontology"
3 | author: "Zuguang Gu ( z.gu@dkfz.de )"
4 | date: '`r Sys.Date()`'
5 | output: html_vignette
6 | ---
7 |
8 | ```{r, echo = FALSE, message = FALSE}
9 | library(knitr)
10 | knitr::opts_chunk$set(
11 | error = FALSE,
12 | tidy = FALSE,
13 | message = FALSE,
14 | warning = FALSE,
15 | fig.align = "center")
16 | ```
17 |
18 |
19 | Gene Ontology is the most widely used bio-ontologies. On Bioconductor, there
20 | are standard packages for GO (**GO.db**) and organism-specific GO annotation packages
21 | (**org.\*.db**). In **simona**, there is a helper function
22 | `create_ontology_DAG_from_GO_db()` which makes use of the Biocoductor standard
23 | GO packages and constructs a DAG object automatically.
24 |
25 | ## Create the GO DAG object
26 |
27 | GO has three namespaces (or ontologies): biological process (BP), molecular
28 | function (MF) and celullar component (CC). The three GO namespaces are
29 | mutually exclusive, so the first argument of
30 | `create_ontology_DAG_from_GO_db()` is the GO namespace.
31 |
32 | ```{r}
33 | library(simona)
34 | dag = create_ontology_DAG_from_GO_db("BP")
35 | dag
36 | ```
37 |
38 | There are three main GO relations: "is_a", "part_of" and "regulates".
39 | "regulates" has two child relation types in GO: "negatively_regulates" and
40 | "positively_regulates". So when "regulates" is selected, the two child
41 | relation types are automatically selected. By default only "is_a" and "part_of" are selected.
42 |
43 | You can set a subset of relation types with the argument `relations`.
44 |
45 | ```{r}
46 | create_ontology_DAG_from_GO_db("BP", relations = c("part of", "regulates")) # "part_of" is also OK
47 | ```
48 |
49 | "is_a" is always selected because this is primary semantic relation type. So
50 | if you only want to include "is_a" relation, you can assign an empty vector to
51 | `relations`:
52 |
53 |
54 | create_ontology_DAG_from_GO_db("BP", relations = character(0)) # or NULL, NA
55 |
56 |
57 | Or you can apply `dag_filter()` after DAG is generated.
58 |
59 |
63 |
64 | ## Add gene annotation
65 |
66 | Gene annotation can be set with the argument `org_db`. The value is an
67 | `OrgDb` object of the corresponding organism. The primary gene ID type in the __org.*.db__
68 | package is internally used (which is normally the EntreZ ID type).
69 |
70 | ```{r}
71 | library(org.Hs.eg.db)
72 | dag = create_ontology_DAG_from_GO_db("BP", org_db = org.Hs.eg.db)
73 | dag
74 | ```
75 |
76 | For standard organism packages on Biocoductor, the `OrgDb` object always has
77 | the same name as the package, so the name of the organism package can also be
78 | set to `org_db`:
79 |
80 |
83 |
84 | Similarly, if the analysis is applied on mouse, the mouse organism package can
85 | be set to `org_db`. If the mouse organism package is not installed yet, it
86 | will be installed automatically.
87 |
88 |