├── pkgdown ├── extra.css └── _pkgdown.yml ├── LICENSE ├── data └── eds_marfan_kg.rda ├── inst ├── extdata │ ├── eds_marfan_kg.tar.gz │ └── README.md ├── association_type_mappings2.yaml └── kg_prefs.yaml ├── vignettes ├── assets │ ├── cftr_cf_edge.png │ └── monarchr_figures.key └── examples │ ├── visualizing_kgs_assets │ └── cytoscape.png │ └── exploring_kgs_assets │ └── autocomplete.png ├── .gitignore ├── R ├── 99_onload.R ├── regex_tilde.R ├── get_engine.R ├── flatten_body_for_httr.R ├── ancestors.R ├── descendants.R ├── get_engine.tbl_kgx.R ├── ancestors.tbl_kgx.R ├── descendants.tbl_kgx.R ├── set_engine.R ├── fetch_nodes.file_engine.R ├── set_engine.tbl_kgx.R ├── expand.tbl_kgx.R ├── graph_sparsity.R ├── in_list.R ├── cypher_query.R ├── order_cols.R ├── example_graph.R ├── normalize_categories.R ├── cypher_query_df.R ├── load_kgx.R ├── cytoscape.R ├── graph_centrality.R ├── data_example_kgs.R ├── cypher_query_df.neo4j_engine.R ├── layout_umap.R ├── file_engine_check.R ├── base_engine.R ├── summarize_neighborhood.R ├── graph_semsim.R ├── neo4j_engine_check.R ├── kg_join.R ├── monarch_engine_check.R ├── transitive_reduction.R ├── expand.R ├── kg_edge_weights.R ├── example_graph.neo4j_engine.R ├── monarch_search.R ├── expand_n.R ├── save_kgx.R ├── fetch_nodes.R ├── monarch_engine.R ├── transitive_closure.R ├── utils.R ├── kg_join.tbl_kgx.R ├── transfer.R ├── knit_print.tbl_kgx.R ├── tbl_kgx.R └── summary.neo4j_engine.R ├── .lintr ├── meta ├── dev_commands.R ├── archive │ ├── dev.R │ └── association_type_mappings.yaml └── checklist.txt ├── .Rbuildignore ├── tests ├── testthat │ ├── test-monarch_engine_check.R │ ├── test-neo4j_engine_check.R │ ├── test-summary.neo4j_engine.R │ ├── test-get_engine.R │ ├── test-base_engine.R │ ├── test-summary.file_engine.R │ ├── test-save_load_kgx.R │ ├── test-file_engine_check.R │ ├── test-summarize_neighborhood.R │ ├── test-monarch_engine.R │ ├── test-monarch_search.R │ ├── test-example_graph.neo4j_engine.R │ ├── test-cypher_query.R │ ├── test-example_graph.file_engine.R │ ├── test-tbl_kgx.R │ ├── test-kg_join.R │ ├── test-file_engine.R │ ├── test-fetch_nodes.neo4j_engine.R │ ├── test-monarch_semsim.R │ ├── test-neo4j_engine.R │ └── test-fetch_nodes.file-engine.R └── testthat.R ├── man ├── order_cols.Rd ├── unname_cols.Rd ├── grapes-twiddle-grapes.Rd ├── base_engine.Rd ├── knit_print.tbl_kgx.Rd ├── neo2r_to_kgx.Rd ├── edges.Rd ├── nodes.Rd ├── explode.Rd ├── get_engine.Rd ├── ancestors.Rd ├── descendants.Rd ├── get_engine.tbl_kgx.Rd ├── ancestors.tbl_kgx.Rd ├── descendants.tbl_kgx.Rd ├── monarch_engine_check.Rd ├── graph_sparsity.Rd ├── roll.Rd ├── normalize_categories.Rd ├── example_graph.file_engine.Rd ├── grapes-in_list-grapes.Rd ├── example_graph.neo4j_engine.Rd ├── graph_centrality.Rd ├── monarch_search.Rd ├── neo4j_engine_check.Rd ├── kg_edge_weights.Rd ├── layout_umap.Rd ├── cypher_query.Rd ├── example_graph.Rd ├── transitive_closure.Rd ├── summary.neo4j_engine.Rd ├── load_kgx.Rd ├── file_engine_check.Rd ├── summary.file_engine.Rd ├── cypher_query_df.Rd ├── save_kgx.Rd ├── cytoscape.Rd ├── graph_semsim.Rd ├── set_engine.Rd ├── cytoscape.tbl_kgx.Rd ├── transitive_reduction.Rd ├── tbl_kgx.Rd ├── eds_marfan_kg.Rd ├── transfer.Rd ├── file_engine.Rd ├── kg_join.Rd ├── summarize_neighborhood.Rd ├── expand_n.Rd ├── plot.tbl_kgx.Rd ├── expand.Rd ├── rolling.Rd ├── monarch_semsim.Rd ├── monarch_engine.Rd ├── monarch_edge_weight_encodings.Rd ├── neo4j_engine.Rd └── fetch_nodes.Rd ├── monarchr.Rproj ├── .vscode └── settings.json ├── LICENSE.md ├── .github └── workflows │ └── rworkflows.yml ├── DESCRIPTION ├── README.Rmd └── README.md /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: monarchr authors 3 | -------------------------------------------------------------------------------- /data/eds_marfan_kg.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/data/eds_marfan_kg.rda -------------------------------------------------------------------------------- /inst/extdata/eds_marfan_kg.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/inst/extdata/eds_marfan_kg.tar.gz -------------------------------------------------------------------------------- /vignettes/assets/cftr_cf_edge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/assets/cftr_cf_edge.png -------------------------------------------------------------------------------- /vignettes/assets/monarchr_figures.key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/assets/monarchr_figures.key -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ..Rcheck 2 | .env 3 | .Rproj.user 4 | .Rhistory 5 | .Rdata 6 | .httr-oauth 7 | .DS_Store 8 | /doc/ 9 | /docs/ 10 | /Meta/ 11 | -------------------------------------------------------------------------------- /vignettes/examples/visualizing_kgs_assets/cytoscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/examples/visualizing_kgs_assets/cytoscape.png -------------------------------------------------------------------------------- /vignettes/examples/exploring_kgs_assets/autocomplete.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/examples/exploring_kgs_assets/autocomplete.png -------------------------------------------------------------------------------- /R/99_onload.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | pref_path <- system.file("kg_prefs.yaml", package = pkgname) 3 | kg_prefs <- yaml::read_yaml(pref_path) 4 | options("default_prefs" = kg_prefs) 5 | invisible() 6 | } 7 | -------------------------------------------------------------------------------- /.lintr: -------------------------------------------------------------------------------- 1 | linters: linters_with_defaults( 2 | line_length_linter = NULL, 3 | indentation_linter = NULL, 4 | whitespace_linter = NULL, 5 | cyclocomp_linter = NULL, 6 | object_name_linter = NULL, 7 | object_usage_linter = NULL) 8 | encoding: "UTF-8" 9 | -------------------------------------------------------------------------------- /pkgdown/_pkgdown.yml: -------------------------------------------------------------------------------- 1 | # template: 2 | # bootstrap: 5 3 | # # bslib: 4 | # # font-size-root: 12px 5 | 6 | articles: 7 | - title: Examples 8 | navbar: Examples 9 | contents: 10 | - examples/visualizing_kgs 11 | - examples/exploring_kgs 12 | - examples/transitive_rollups 13 | - examples/engine_preferences 14 | -------------------------------------------------------------------------------- /meta/dev_commands.R: -------------------------------------------------------------------------------- 1 | devtools::load_all() 2 | 3 | devtools::document() 4 | 5 | devtools::clean_vignettes() 6 | devtools::build_vignettes() 7 | devtools::build_site() 8 | pkgdown::build_articles() 9 | 10 | devtools::test() 11 | 12 | devtools::check() 13 | 14 | devtools::build() 15 | 16 | devtools::install() 17 | # hmm 18 | 19 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | vignettes/visualisation\.Rmd 2 | visualisation\.Rmd 3 | 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | ^LICENSE\.md$ 7 | ^README\.Rmd$ 8 | ^meta$ 9 | ^docs$ 10 | ^pkgdown$ 11 | 12 | ^\.lintr$ 13 | ^\.github$ 14 | ^\.vscode$ 15 | ^\.env$ 16 | 17 | ## For GitHub Actions Windows runner 18 | node_modules$ 19 | package-lock\.json$ 20 | package\.json$ 21 | ^doc$ 22 | ^Meta$ 23 | -------------------------------------------------------------------------------- /tests/testthat/test-monarch_engine_check.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("monarch_engine_check works as expected", { 6 | #testthat::skip("temporary skip") 7 | 8 | # we can't test for TRUE here, because this check my be run without a connection 9 | # so we'll just test that it returns logical 10 | expect_type(monarch_engine_check(warn = FALSE), "logical") 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(monarchr) 11 | 12 | test_check("monarchr") 13 | -------------------------------------------------------------------------------- /man/order_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/order_cols.R 3 | \name{order_cols} 4 | \alias{order_cols} 5 | \title{Set edge/row data column order according to most recent engine preferences} 6 | \usage{ 7 | order_cols(g) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx} graph.} 11 | } 12 | \description{ 13 | Set edge/row data column order according to most recent engine preferences 14 | } 15 | -------------------------------------------------------------------------------- /monarchr.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: No 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /man/unname_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cypher_query.neo4j_engine.R 3 | \name{unname_cols} 4 | \alias{unname_cols} 5 | \title{Remove names from columns} 6 | \usage{ 7 | unname_cols(df) 8 | } 9 | \arguments{ 10 | \item{df}{Input data frame} 11 | } 12 | \value{ 13 | The input, with unnamed columns 14 | } 15 | \description{ 16 | Given a data-frame like object, runs each column through unname() 17 | } 18 | -------------------------------------------------------------------------------- /R/regex_tilde.R: -------------------------------------------------------------------------------- 1 | #' Infix regular expression match. 2 | #' 3 | #' An infix alias for stringr::str_detect(). 4 | #' 5 | #' @examples 6 | #' rownames(mtcars) %~% "^Merc" 7 | #' 8 | #' @param string A character vector to look for matches in. 9 | #' @param pattern A regular expression to look for. 10 | #' @return A logical vector indicating pattern matches. 11 | #' @export 12 | #' @importFrom stringr str_detect 13 | `%~%` <- function(string, pattern) { 14 | str_detect(string, pattern) 15 | } 16 | -------------------------------------------------------------------------------- /tests/testthat/test-neo4j_engine_check.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("neo4j_engine_check works as expected", { 6 | #testthat::skip("temporary skip") 7 | 8 | # we can't test for TRUE here, because this check my be run without a connection 9 | # so we'll just test that it returns logical 10 | expect_type(neo4j_engine_check("https://neo4j.monarchinitiative.org", warn = FALSE), "logical") 11 | # we can test for FALSE however 12 | expect_false(neo4j_engine_check("https://no-such-db.monarchinitiative.org", warn = FALSE)) 13 | }) 14 | -------------------------------------------------------------------------------- /man/grapes-twiddle-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/regex_tilde.R 3 | \name{\%~\%} 4 | \alias{\%~\%} 5 | \title{Infix regular expression match.} 6 | \usage{ 7 | string \%~\% pattern 8 | } 9 | \arguments{ 10 | \item{string}{A character vector to look for matches in.} 11 | 12 | \item{pattern}{A regular expression to look for.} 13 | } 14 | \value{ 15 | A logical vector indicating pattern matches. 16 | } 17 | \description{ 18 | An infix alias for stringr::str_detect(). 19 | } 20 | \examples{ 21 | rownames(mtcars) \%~\% "^Merc" 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/base_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/base_engine.R 3 | \name{base_engine} 4 | \alias{base_engine} 5 | \title{base_engine} 6 | \usage{ 7 | base_engine(name = "default_engine", preferences = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{name}{A character string indicating the name of the engine.} 11 | 12 | \item{preferences}{A named list of preferences for the engine.} 13 | 14 | \item{...}{Other parameters (unused)} 15 | } 16 | \value{ 17 | An object of class \code{base_engine} 18 | } 19 | \description{ 20 | A base class for all engines 21 | } 22 | -------------------------------------------------------------------------------- /man/knit_print.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/knit_print.tbl_kgx.R 3 | \name{knit_print.tbl_kgx} 4 | \alias{knit_print.tbl_kgx} 5 | \title{Specialized print function for KGX graphs in knitted documents} 6 | \usage{ 7 | \method{knit_print}{tbl_kgx}(x, ..., show = 100) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{tbl_kgx} graph to display.} 11 | 12 | \item{...}{Other arguments (unused).} 13 | 14 | \item{show}{The maximum number of nodes and edges to display.} 15 | } 16 | \description{ 17 | Specialized print function for KGX graphs in knitted documents 18 | } 19 | -------------------------------------------------------------------------------- /tests/testthat/test-summary.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("summary() for neo4j_engine", { 5 | res <- summary(monarch_engine(), quiet = TRUE) 6 | 7 | # make sure the output is a list 8 | expect_type(res, "list") 9 | 10 | # let's try a version where we capture the printed output 11 | printed <- capture.output(summary(monarch_engine(), quiet = FALSE)) 12 | 13 | # the result should be a character vector 14 | expect_type(printed, "character") 15 | # one of the lines should be "Total nodes: " 16 | expect_true(any(grepl("Total nodes: ", printed))) 17 | }) 18 | -------------------------------------------------------------------------------- /man/neo2r_to_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cypher_query.neo4j_engine.R 3 | \name{neo2r_to_kgx} 4 | \alias{neo2r_to_kgx} 5 | \title{Process neo2R cypher to tbl_kgx} 6 | \usage{ 7 | neo2r_to_kgx(res, engine) 8 | } 9 | \arguments{ 10 | \item{res}{The result from neo2R::cypher with result = "graph"} 11 | 12 | \item{engine}{The engine to attach to the returned graph} 13 | } 14 | \value{ 15 | A tbl_kgx 16 | } 17 | \description{ 18 | Given a result from neo2R::cypher returning KGX-formatted nodes and edges, 19 | parse the result to generate a tbl_kgx object, attaching the provided engine. 20 | } 21 | -------------------------------------------------------------------------------- /R/get_engine.R: -------------------------------------------------------------------------------- 1 | #' Get most recent engine from a graph. 2 | #' 3 | #' Given a tbl_kgx graph, retrieve the last-used engine. 4 | #' 5 | #' @param g A tbl_kgx graph. 6 | #' @param fail_if_missing If TRUE, fail if there is no engine associated with the graph. 7 | #' @return A graph engine object. 8 | #' @examples 9 | #' # Using example KGX file packaged with monarchr 10 | #' data(eds_marfan_kg) 11 | #' 12 | #' g <- eds_marfan_kg |> 13 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 14 | #' 15 | #' print(get_engine(g)) 16 | #' 17 | #' @export 18 | get_engine <- function(g, fail_if_missing = TRUE) { 19 | UseMethod("get_engine") 20 | } 21 | -------------------------------------------------------------------------------- /man/edges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{edges} 4 | \alias{edges} 5 | \title{Get graph edges table.} 6 | \usage{ 7 | edges(graph, ...) 8 | } 9 | \arguments{ 10 | \item{graph}{Input graph} 11 | 12 | \item{...}{Other options (unused)} 13 | } 14 | \value{ 15 | A tibble with the edges of the graph 16 | } 17 | \description{ 18 | Get graph edges table. 19 | } 20 | \examples{ 21 | # (using the example KGX file packaged with monarchr) 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 26 | 27 | print(edges(g)) 28 | } 29 | -------------------------------------------------------------------------------- /man/nodes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{nodes} 4 | \alias{nodes} 5 | \title{Get graph nodes table.} 6 | \usage{ 7 | nodes(graph, ...) 8 | } 9 | \arguments{ 10 | \item{graph}{A graph object} 11 | 12 | \item{...}{Other options (unused)} 13 | } 14 | \value{ 15 | A tibble with the nodes of the graph 16 | } 17 | \description{ 18 | Get graph nodes table. 19 | } 20 | \examples{ 21 | # (using the example KGX file packaged with monarchr) 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 26 | 27 | print(nodes(g)) 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-get_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("get_engine works", { 5 | g <- monarch_search("fanconi anemia", limit = 5) 6 | engine <- get_engine(g) 7 | expect_s3_class(engine, "neo4j_engine") 8 | 9 | 10 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 11 | e <- file_engine(filename) 12 | query_ids = c("MONDO:0007525", "MONDO:0007524") 13 | 14 | g <- fetch_nodes(e, query_ids = query_ids) %>% expand(predicate = "biolink:subclass_of", transitive = TRUE, direction = "out") 15 | engine <- get_engine(g) 16 | expect_s3_class(engine, "file_engine") 17 | }) 18 | -------------------------------------------------------------------------------- /meta/archive/dev.R: -------------------------------------------------------------------------------- 1 | # # bolt://24.144.94.219:7687 2 | 3 | # # Load libraries 4 | # library(tidyverse) 5 | # library(neo4r) 6 | 7 | # library(neo4r) 8 | # con <- neo4j_api$new( 9 | # url = "http://24.144.94.219:7474", 10 | # user = "", 11 | # password = "" 12 | # ) 13 | 14 | # # Test the endpoint, that will not work : 15 | # con$ping() 16 | 17 | # x <- con$get_labels() 18 | # x 19 | 20 | # # get node with id: MONDO:0011476 and it's neighbors 21 | # z <- "MATCH (n)-[r]-(m) WHERE n.id = 'MONDO:0011476' RETURN n" %>% 22 | # call_neo4j(con) 23 | 24 | # ######################### 25 | 26 | # library(neo2R) 27 | # graph <- startGraph("bolt://24.144.94.219:7687") 28 | 29 | -------------------------------------------------------------------------------- /man/explode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{explode} 4 | \alias{explode} 5 | \title{Explode a graph into a list of single-node graphs} 6 | \usage{ 7 | explode(graph, ...) 8 | } 9 | \arguments{ 10 | \item{graph}{A tbl_kgx graph.} 11 | 12 | \item{...}{Other options (unused)} 13 | } 14 | \value{ 15 | A list of tbl_kgx graphs. 16 | } 17 | \description{ 18 | Explode a graph into a list of single-node graphs 19 | } 20 | \examples{ 21 | # (using the example KGX file packaged with monarchr) 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 26 | 27 | print(explode(g)) 28 | } 29 | -------------------------------------------------------------------------------- /inst/extdata/README.md: -------------------------------------------------------------------------------- 1 | The `eds_marfan_kg.tar.gz` file in this directory was created with the following on Aug 15, 2024, 2 | to represent Ehlers-Danlos syndrome (MONDO:0020066), Marfan syndrome (MONDO:0007947), all their subtypes, 3 | all entities connected to those diseases or their subtypes, and finally all parents (supertypes) of all 4 | those diseases and entities. 5 | 6 | ``` 7 | monarch_engine() |> 8 | fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |> 9 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 10 | expand() |> 11 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) |> 12 | save_kgx("eds_marfan_kg.tar.gz") 13 | ``` 14 | -------------------------------------------------------------------------------- /R/flatten_body_for_httr.R: -------------------------------------------------------------------------------- 1 | # for array params in httr queries 2 | # from https://stackoverflow.com/a/72532186 3 | flatten_body_for_httr <- function(x) { 4 | # A form/query can only have one value per name, so take 5 | # any values that contain vectors length >1 and 6 | # split them up 7 | # list(x=1:2, y="a") becomes list(x=1, x=2, y="a") 8 | if (all(lengths(x) <= 1)) return(x); 9 | do.call("c", mapply(function(name, val) { 10 | if (length(val)==1 || any(c("form_file", "form_data") %in% class(val))) { 11 | x <- list(val) 12 | names(x) <- name 13 | x 14 | } else { 15 | x <- as.list(val) 16 | names(x) <- rep(name, length(val)) 17 | x 18 | } 19 | }, names(x), x, USE.NAMES = FALSE, SIMPLIFY = FALSE)) 20 | } 21 | -------------------------------------------------------------------------------- /tests/testthat/test-base_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("base_engine works", { 5 | # first, let's make sure the default preferences is loaded 6 | e <- base_engine() 7 | expect_equal(e$name, "default_engine") 8 | # result should have a category_priority field 9 | expect_true("category_priority" %in% names(e$preferences)) 10 | 11 | # test that we can load preferences via a list 12 | e <- base_engine(preferences = list(category_priority = c("biolink:Gene", "biolink:Disease"))) 13 | expect_equal(e$name, "default_engine") 14 | # result should have a category_priority field of length 2 15 | expect_equal(length(e$preferences$category_priority), 2) 16 | 17 | }) 18 | -------------------------------------------------------------------------------- /R/ancestors.R: -------------------------------------------------------------------------------- 1 | #' Expand ancestors transitively 2 | #' 3 | #' Expand a tbl_kgx graph to include all ancestors of nodes defined transitively 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around 5 | #' expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 6 | #' 7 | #' 8 | #' @param g A `tbl_kgx()` graph to expand. 9 | #' @param ... Other parameters (unused). 10 | #' 11 | #' @return A tbl_kgx graph. 12 | #' @export 13 | #' @examples 14 | #' data(eds_marfan_kg) 15 | #' g <- eds_marfan_kg |> 16 | #' fetch_nodes(query_ids = "MONDO:0020066") |> 17 | #' ancestors() 18 | #' 19 | #' @import tidygraph 20 | #' @import dplyr 21 | ancestors <- function(g, ...) { 22 | UseMethod("ancestors") 23 | } 24 | -------------------------------------------------------------------------------- /tests/testthat/test-summary.file_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("summary() for file_engine", { 5 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 6 | engine <- file_engine(filename) 7 | 8 | res <- summary(engine, quiet = TRUE) 9 | 10 | # make sure the output is a list 11 | expect_type(res, "list") 12 | 13 | # let's try a version where we capture the printed output 14 | printed <- capture.output(summary(engine, quiet = FALSE)) 15 | 16 | # the result should be a character vector 17 | expect_type(printed, "character") 18 | # one of the lines should be "Total nodes: " 19 | expect_true(any(grepl("Total nodes: ", printed))) 20 | }) 21 | -------------------------------------------------------------------------------- /R/descendants.R: -------------------------------------------------------------------------------- 1 | #' Expand descendants transitively 2 | #' 3 | #' Expand a tbl_kgx graph to include all descendants of nodes defined transitively 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around 5 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) 6 | #' 7 | #' 8 | #' @param g A `tbl_kgx()` graph to expand. 9 | #' @param ... Other parameters (unused). 10 | #' 11 | #' @return A tbl_kgx graph. 12 | #' @export 13 | #' @examples 14 | #' data(eds_marfan_kg) 15 | #' g <- eds_marfan_kg |> 16 | #' fetch_nodes(query_ids = "MONDO:0020066") |> 17 | #' descendants() 18 | #' 19 | #' @import tidygraph 20 | #' @import dplyr 21 | descendants <- function(g, ...) { 22 | UseMethod("descendants") 23 | } 24 | -------------------------------------------------------------------------------- /man/get_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_engine.R 3 | \name{get_engine} 4 | \alias{get_engine} 5 | \title{Get most recent engine from a graph.} 6 | \usage{ 7 | get_engine(g, fail_if_missing = TRUE) 8 | } 9 | \arguments{ 10 | \item{g}{A tbl_kgx graph.} 11 | 12 | \item{fail_if_missing}{If TRUE, fail if there is no engine associated with the graph.} 13 | } 14 | \value{ 15 | A graph engine object. 16 | } 17 | \description{ 18 | Given a tbl_kgx graph, retrieve the last-used engine. 19 | } 20 | \examples{ 21 | # Using example KGX file packaged with monarchr 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 26 | 27 | print(get_engine(g)) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /man/ancestors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ancestors.R 3 | \name{ancestors} 4 | \alias{ancestors} 5 | \title{Expand ancestors transitively} 6 | \usage{ 7 | ancestors(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to expand.} 11 | 12 | \item{...}{Other parameters (unused).} 13 | } 14 | \value{ 15 | A tbl_kgx graph. 16 | } 17 | \description{ 18 | Expand a tbl_kgx graph to include all ancestors of nodes defined transitively 19 | by "biolink:subclass_of" relationships. This is a simple wrapper around 20 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 21 | } 22 | \examples{ 23 | data(eds_marfan_kg) 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = "MONDO:0020066") |> 26 | ancestors() 27 | 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-save_load_kgx.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("save_kgx and load_kgx work as expected", { 5 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 6 | g <- file_engine(filename) |> 7 | fetch_nodes(limit = 5) |> 8 | expand() 9 | 10 | save_kgx(g, "test.tar.gz") 11 | 12 | g2 <- load_kgx("test.tar.gz") 13 | 14 | expect_equal(nrow(nodes(g)), nrow(nodes(g2))) 15 | expect_equal(nrow(edges(g)), nrow(edges(g2))) 16 | 17 | expect_true(all(nodes(g)$id %in% nodes(g2)$id)) 18 | expect_true(all(nodes(g2)$id %in% nodes(g)$id)) 19 | 20 | expect_true("pcategory" %in% colnames(nodes(g2))) 21 | expect_true("category" %in% colnames(nodes(g2))) 22 | expect_true(is.list(nodes(g2)$category)) 23 | 24 | file.remove("test.tar.gz") 25 | }) 26 | -------------------------------------------------------------------------------- /R/get_engine.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' Get most recent engine from a graph. 2 | #' 3 | #' Given a tbl_kgx graph, retrieve the last-used engine. 4 | #' 5 | #' @param g A tbl_kgx graph. 6 | #' @param fail_if_missing If TRUE, fail if there is no engine associated with the graph. 7 | #' @return A graph engine object. 8 | #' @examples 9 | #' # Using example KGX file packaged with monarchr 10 | #' data(eds_marfan_kg) 11 | #' 12 | #' g <- eds_marfan_kg |> 13 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 14 | #' 15 | #' print(get_engine(g)) 16 | #' 17 | #' @export 18 | get_engine.tbl_kgx <- function(g, fail_if_missing = TRUE) { 19 | engine <- attr(g, "last_engine") 20 | if (is.null(engine) && fail_if_missing) { 21 | stop("No engine associated with this graph. Unable to proceed.") 22 | } 23 | return(engine) 24 | } 25 | -------------------------------------------------------------------------------- /man/descendants.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/descendants.R 3 | \name{descendants} 4 | \alias{descendants} 5 | \title{Expand descendants transitively} 6 | \usage{ 7 | descendants(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to expand.} 11 | 12 | \item{...}{Other parameters (unused).} 13 | } 14 | \value{ 15 | A tbl_kgx graph. 16 | } 17 | \description{ 18 | Expand a tbl_kgx graph to include all descendants of nodes defined transitively 19 | by "biolink:subclass_of" relationships. This is a simple wrapper around 20 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) 21 | } 22 | \examples{ 23 | data(eds_marfan_kg) 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = "MONDO:0020066") |> 26 | descendants() 27 | 28 | } 29 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "r.linting.assignmentType": false, 3 | "r.linting.closeCurlySeparateLine": false, 4 | "r.linting.doubleQuotes": false, 5 | "r.linting.multipleDots": false, 6 | "r.linting.multipleStatements": false, 7 | "r.linting.noSpaceAfterFunctionName": false, 8 | "r.linting.noTabs": false, 9 | "r.linting.openCurlyPosition": false, 10 | "r.linting.pascalCase": false, 11 | "r.linting.semicolons": false, 12 | "r.linting.spaceBeforeOpenBrace": false, 13 | "r.linting.spacesAroundComma": false, 14 | "r.linting.spacesAroundOperators": false, 15 | "r.linting.spacesInsideParenthesis": false, 16 | "r.linting.trailingBlankLines": false, 17 | "r.linting.trailingWhitespace": false, 18 | "r.linting.trueFalseNames": false, 19 | "r.linting.upperCase": false 20 | } -------------------------------------------------------------------------------- /man/get_engine.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_engine.tbl_kgx.R 3 | \name{get_engine.tbl_kgx} 4 | \alias{get_engine.tbl_kgx} 5 | \title{Get most recent engine from a graph.} 6 | \usage{ 7 | \method{get_engine}{tbl_kgx}(g, fail_if_missing = TRUE) 8 | } 9 | \arguments{ 10 | \item{g}{A tbl_kgx graph.} 11 | 12 | \item{fail_if_missing}{If TRUE, fail if there is no engine associated with the graph.} 13 | } 14 | \value{ 15 | A graph engine object. 16 | } 17 | \description{ 18 | Given a tbl_kgx graph, retrieve the last-used engine. 19 | } 20 | \examples{ 21 | # Using example KGX file packaged with monarchr 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 26 | 27 | print(get_engine(g)) 28 | 29 | } 30 | -------------------------------------------------------------------------------- /tests/testthat/test-file_engine_check.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("file_engine_check works as expected", { 6 | #testthat::skip("temporary skip") 7 | 8 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 9 | expect_true(file_engine_check(filename, warn = FALSE)) 10 | 11 | filename <- system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr") 12 | expect_false(file_engine_check(filename, warn = FALSE)) 13 | 14 | # we can try to connect to a remote file, but we can't test for TRUE here 15 | # because this check my be run without a connection 16 | # so we'll just test that it returns logical 17 | expect_type(file_engine_check("https://no-such-host.kghub.io/kg-obo/dummy/dummy_kgx_tsv.tar.gz", warn = FALSE), "logical") 18 | }) 19 | -------------------------------------------------------------------------------- /R/ancestors.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' Expand ancestors transitively 2 | #' 3 | #' Expand a tbl_kgx graph to include all ancestors of nodes defined transitively 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around 5 | #' expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 6 | #' 7 | #' 8 | #' @param g A `tbl_kgx()` graph to expand. 9 | #' @param ... Other parameters (unused). 10 | #' 11 | #' @return A tbl_kgx graph. 12 | #' @export 13 | #' @examples 14 | #' data(eds_marfan_kg) 15 | #' g <- eds_marfan_kg |> 16 | #' fetch_nodes(query_ids = "MONDO:0020066") |> 17 | #' ancestors() 18 | #' 19 | #' @import tidygraph 20 | #' @import dplyr 21 | ancestors.tbl_kgx <- function(g, ...) { 22 | return(g |> expand(predicates = "biolink:subclass_of", 23 | direction = "out", 24 | transitive = TRUE)) 25 | } 26 | -------------------------------------------------------------------------------- /man/ancestors.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ancestors.tbl_kgx.R 3 | \name{ancestors.tbl_kgx} 4 | \alias{ancestors.tbl_kgx} 5 | \title{Expand ancestors transitively} 6 | \usage{ 7 | \method{ancestors}{tbl_kgx}(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to expand.} 11 | 12 | \item{...}{Other parameters (unused).} 13 | } 14 | \value{ 15 | A tbl_kgx graph. 16 | } 17 | \description{ 18 | Expand a tbl_kgx graph to include all ancestors of nodes defined transitively 19 | by "biolink:subclass_of" relationships. This is a simple wrapper around 20 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 21 | } 22 | \examples{ 23 | data(eds_marfan_kg) 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = "MONDO:0020066") |> 26 | ancestors() 27 | 28 | } 29 | -------------------------------------------------------------------------------- /R/descendants.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' Expand descendants transitively 2 | #' 3 | #' Expand a tbl_kgx graph to include all descendants of nodes defined transitively 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around 5 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) 6 | #' 7 | #' 8 | #' @param g A `tbl_kgx()` graph to expand. 9 | #' @param ... Other parameters (unused). 10 | #' 11 | #' @return A tbl_kgx graph. 12 | #' @export 13 | #' @examples 14 | #' data(eds_marfan_kg) 15 | #' g <- eds_marfan_kg |> 16 | #' fetch_nodes(query_ids = "MONDO:0020066") |> 17 | #' descendants() 18 | #' 19 | #' @import tidygraph 20 | #' @import dplyr 21 | descendants.tbl_kgx <- function(g, ...) { 22 | return(g |> expand(predicates = "biolink:subclass_of", 23 | direction = "in", 24 | transitive = TRUE)) 25 | } 26 | -------------------------------------------------------------------------------- /R/set_engine.R: -------------------------------------------------------------------------------- 1 | #' Set the engine for a graph. See warning in details. 2 | #' 3 | #' Sets a given graph's engine to a given engine object and returns the graph. 4 | #' WARNING: changing the backing engine for a graph dynamically is not 5 | #' yet fully supported or tested. 6 | #' 7 | #' @param g A tbl_kgx graph. 8 | #' @param engine An engine object. 9 | #' @return A tbl_kgx graph. 10 | #' @examples 11 | #' # Using example KGX file packaged with monarchr 12 | #' data(eds_marfan_kg) 13 | #' 14 | #' g <- eds_marfan_kg |> 15 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 16 | #' 17 | #' other_engine <- eds_marfan_kg # this could be a different file-engine (see `file_engine()`) 18 | #' g <- set_engine(g, other_engine) 19 | #' 20 | #' print(get_engine(g)) 21 | #' 22 | #' @export 23 | set_engine <- function(g, engine) { 24 | UseMethod("set_engine") 25 | } 26 | -------------------------------------------------------------------------------- /man/descendants.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/descendants.tbl_kgx.R 3 | \name{descendants.tbl_kgx} 4 | \alias{descendants.tbl_kgx} 5 | \title{Expand descendants transitively} 6 | \usage{ 7 | \method{descendants}{tbl_kgx}(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to expand.} 11 | 12 | \item{...}{Other parameters (unused).} 13 | } 14 | \value{ 15 | A tbl_kgx graph. 16 | } 17 | \description{ 18 | Expand a tbl_kgx graph to include all descendants of nodes defined transitively 19 | by "biolink:subclass_of" relationships. This is a simple wrapper around 20 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) 21 | } 22 | \examples{ 23 | data(eds_marfan_kg) 24 | g <- eds_marfan_kg |> 25 | fetch_nodes(query_ids = "MONDO:0020066") |> 26 | descendants() 27 | 28 | } 29 | -------------------------------------------------------------------------------- /meta/checklist.txt: -------------------------------------------------------------------------------- 1 | Basic functionality: 2 | 3 | Engines: 4 | - neo4j_engine | dev [x] test [x] 5 | - file_engine | dev [x] test [x] 6 | - monarch_engine | dev [x] test [x] 7 | 8 | Engine Features: 9 | - seach_kg 10 | - neo4j_engine | dev [x] test [x] 11 | - file_engine | dev [x] test [x] 12 | - monarch_engine | dev [x] test [x] 13 | 14 | - fetch_nodes 15 | - neo4j_engine | dev [x] test [x] 16 | - monarch_engine | (inherit from neo4j_engine) 17 | - file_engine | dev [x] test [x] 18 | 19 | Query Graph Features: 20 | - fetch_edges 21 | - neo4j_engine | dev [x] test [x] 22 | - monarch_engine | (inherit from neo4j_engine) 23 | - file_engine | dev [ ] test [ ] 24 | 25 | - summarize_neighborhood 26 | - neo4j_engine | dev [x] test [x] 27 | - monarch_engine | (inherit from neo4j_engine) 28 | - file_engine | dev [ ] test [ ] 29 | 30 | -------------------------------------------------------------------------------- /R/fetch_nodes.file_engine.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @import tidygraph 3 | #' @import dplyr 4 | fetch_nodes.file_engine <- function(engine, ..., query_ids = NULL, limit = NULL) { 5 | if(!is.null(query_ids)) { 6 | res <- engine$graph %>% 7 | activate(nodes) %>% 8 | filter(id %in% query_ids) 9 | } else { 10 | res <- engine$graph %>% 11 | activate(nodes) %>% 12 | filter(...) 13 | } 14 | 15 | # we want to drop all the edges to be compatible with the neo4j engine 16 | res <- res %>% 17 | activate(edges) %>% 18 | filter(FALSE) %>% 19 | activate(nodes) %>% 20 | arrange(id) 21 | 22 | if(!is.null(limit)) { 23 | res <- res %>% activate(nodes) %>% slice_head(n = limit) 24 | } 25 | 26 | attr(res, "last_engine") <- engine 27 | 28 | res <- order_cols(res) 29 | 30 | return(res) 31 | } 32 | -------------------------------------------------------------------------------- /R/set_engine.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' Set the engine for a graph. See warning in details. 2 | #' 3 | #' Sets a given graph's engine to a given engine object and returns the graph. 4 | #' WARNING: changing the backing engine for a graph dynamically is not 5 | #' yet fully supported or tested. 6 | #' 7 | #' @param g A tbl_kgx graph. 8 | #' @param engine An engine object. 9 | #' @return A tbl_kgx graph. 10 | #' @examples 11 | #' # Using example KGX file packaged with monarchr 12 | #' data(eds_marfan_kg) 13 | #' 14 | #' g <- eds_marfan_kg |> 15 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 16 | #' 17 | #' other_engine <- eds_marfan_kg # this could be a different file engine (see `file_engine()`) 18 | #' g <- set_engine(g, other_engine) 19 | #' 20 | #' print(get_engine(g)) 21 | #' 22 | #' @export 23 | set_engine <- function(g, engine) { 24 | attr(g, "last_engine") <- engine 25 | return(g) 26 | } 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /man/monarch_engine_check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/monarch_engine_check.R 3 | \name{monarch_engine_check} 4 | \alias{monarch_engine_check} 5 | \title{Check availability of Monarch Initiative API} 6 | \usage{ 7 | monarch_engine_check(warn = TRUE, service = "graph") 8 | } 9 | \arguments{ 10 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.} 11 | 12 | \item{service}{The service to check: "search", "semsim", "graph", or a vector of these. Default is "graph".} 13 | } 14 | \value{ 15 | TRUE if the available features are online, FALSE otherwise. 16 | } 17 | \description{ 18 | Attempts to connect to the Monarch Initiative API and use the specified functionality. Returns FALSE if the API is not available the result is not as expected. 19 | } 20 | \examples{ 21 | print(monarch_engine_check()) 22 | 23 | } 24 | -------------------------------------------------------------------------------- /man/graph_sparsity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph_sparsity.R 3 | \name{graph_sparsity} 4 | \alias{graph_sparsity} 5 | \title{Compute sparsity} 6 | \usage{ 7 | graph_sparsity(x, fun = igraph::as_adjacency_matrix, ...) 8 | } 9 | \arguments{ 10 | \item{x}{An igraph object or a (sparse) matrix.} 11 | 12 | \item{fun}{Function to convert graph to matrix.} 13 | 14 | \item{...}{Arguments passed to \code{fun}.} 15 | } 16 | \value{ 17 | A numeric value representing the proportion of zero 18 | values in the graph/matrix. 19 | } 20 | \description{ 21 | Compute sparsity (proportion of zero values) in a matrix or graph. 22 | } 23 | \examples{ 24 | ## Using example KGX file packaged with monarchr 25 | data(eds_marfan_kg) 26 | g <- eds_marfan_kg |> 27 | fetch_nodes(query_ids = "MONDO:0007525") |> 28 | expand(predicates = "biolink:has_phenotype", 29 | categories = "biolink:PhenotypicFeature") 30 | 31 | graph_sparsity(g) 32 | } 33 | -------------------------------------------------------------------------------- /R/expand.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @import tidygraph 3 | #' @import dplyr 4 | #' @importFrom assertthat assert_that 5 | expand.tbl_kgx <- function(graph, ...) { 6 | # check to see if g has a last_engine attribute 7 | active_tbl <- active(graph) 8 | if(!is.null(attr(graph, "last_engine"))) { 9 | engine <- attr(graph, "last_engine") 10 | if(any(c("monarch_engine", "neo4j_engine") %in% class(engine))) { 11 | res <- expand_neo4j_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl)) 12 | return(res) 13 | } else if("file_engine" %in% class(engine)) { 14 | res <- expand_file_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl)) 15 | return(res) 16 | } else { 17 | stop("Error: unknown or incompatible engine.") 18 | } 19 | # return(expand(engine, graph, ...)) # this shouldn't be reachable 20 | } else { 21 | stop("Error: tbl_kgx object does not have a most recent engine.") 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /R/graph_sparsity.R: -------------------------------------------------------------------------------- 1 | #' Compute sparsity 2 | #' 3 | #' Compute sparsity (proportion of zero values) in a matrix or graph. 4 | #' @param x An igraph object or a (sparse) matrix. 5 | #' @param fun Function to convert graph to matrix. 6 | #' @param ... Arguments passed to \code{fun}. 7 | #' @returns A numeric value representing the proportion of zero 8 | #' values in the graph/matrix. 9 | #' @export 10 | #' @examples 11 | #' ## Using example KGX file packaged with monarchr 12 | #' data(eds_marfan_kg) 13 | #' g <- eds_marfan_kg |> 14 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 15 | #' expand(predicates = "biolink:has_phenotype", 16 | #' categories = "biolink:PhenotypicFeature") 17 | #' 18 | #' graph_sparsity(g) 19 | graph_sparsity <- function(x, 20 | fun=igraph::as_adjacency_matrix, 21 | ...){ 22 | if(is(x,"igraph")){ 23 | x <- fun(x) 24 | } 25 | if(is(x,"Matrix")||is(x,"sparseMatrix")){ 26 | return(sum(x==0)/length(x)) 27 | } 28 | else{ 29 | stop("x must be an igraph object or a (sparse) matrix.") 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /tests/testthat/test-summarize_neighborhood.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("summarize_neighborhood returns reasonable results", { 5 | # skip for now 6 | #testthat::skip("temporary skip") 7 | 8 | g <- monarch_engine() |> fetch_nodes(query_ids = "MONDO:0019391") 9 | 10 | result <- summarize_neighborhood(g, summarize = "edges") 11 | disease_subclass_of_disease <- result %>% filter(predicate == "biolink:subclass_of" & 12 | query_pcategory == "biolink:Disease" & 13 | result_pcategory == "biolink:Disease") 14 | 15 | expect_contains(30 + -4:4, disease_subclass_of_disease$count) 16 | expect_contains(1 + 0:2, nrow(disease_subclass_of_disease)) 17 | 18 | result <- summarize_neighborhood(g, summarize = "nodes") 19 | disease_nodes <- result %>% filter(pcategory == "biolink:Disease") 20 | expect_contains(30 + -4:4, disease_nodes$count) 21 | expect_contains(1 + 0:2, nrow(disease_nodes)) 22 | }) 23 | -------------------------------------------------------------------------------- /man/roll.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rollup.R 3 | \name{roll} 4 | \alias{roll} 5 | \title{Internal function for rolling up and down} 6 | \usage{ 7 | roll( 8 | column, 9 | fun = c, 10 | include_self = TRUE, 11 | predicates = "biolink:subclass_of", 12 | direction = "up", 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{column}{The node column to draw rollup or rolldown information from.} 18 | 19 | \item{fun}{The aggregation function to use when rolling up or down. Default is \code{c}} 20 | 21 | \item{include_self}{Whether to include each nodes' value in \code{column} in the rollup/rolldown for that node.} 22 | 23 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the rollup/rolldown. Should be transitive; default \code{biolink:subclass_of}} 24 | 25 | \item{direction}{Whether to roll up or down.} 26 | 27 | \item{...}{Other parameters (unused)} 28 | } 29 | \description{ 30 | Internal function for rolling up and down 31 | } 32 | -------------------------------------------------------------------------------- /man/normalize_categories.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/normalize_categories.R 3 | \name{normalize_categories} 4 | \alias{normalize_categories} 5 | \title{Normalize Categories} 6 | \usage{ 7 | normalize_categories(cats_list, cats_prefs) 8 | } 9 | \arguments{ 10 | \item{cats_list}{A list of vectors of categories.} 11 | 12 | \item{cats_prefs}{An ordered preference list over categories.} 13 | } 14 | \value{ 15 | A vector of normalized categories. 16 | } 17 | \description{ 18 | This function takes a list of vectors of categories and an ordered preference list over categories. 19 | It selects the most preferred category from each vector, or the first category if no preferred categories are included. 20 | } 21 | \examples{ 22 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 23 | categories_list <- list(c("A", "B", "C"), c("D", "E", "F")) 24 | categories_prefs <- c("B", "E", "A", "D", "C", "F") 25 | normalize_categories(categories_list, categories_prefs) 26 | \dontshow{\}) # examplesIf} 27 | } 28 | -------------------------------------------------------------------------------- /R/in_list.R: -------------------------------------------------------------------------------- 1 | #' Check if an element is contained in list sub-elements 2 | #' 3 | #' This function is useful for filtering dataframes by list columns. It checks if an element is contained in each list element, returning a logical vector. Note 4 | #' that this is different than `%in%` which does not work with list columns in a natural manner. 5 | #' 6 | #' @examples 7 | #' library(dplyr) 8 | #' 9 | #' df <- tibble(id = c("A", "B", "C"), list_col = list(1:3, 2:4, 3:5)) 10 | #' 11 | #' # works naturally for list columns 12 | #' df %>% filter(2 %in_list% list_col) 13 | #' 14 | #' # also works with basic vector columns 15 | #' df %>% filter("B" %in_list% id) 16 | #' 17 | #' # does not work with multi-valued left hand sides 18 | #' # df %>% filter(c("B", "C") %in_list% id) # error 19 | #' 20 | #' @param element The element to check for 21 | #' @param list A list of elements to check against 22 | #' @return A logical vector indicating if the element is in each list element 23 | #' @export 24 | `%in_list%` <- function(element, list) { 25 | vapply(list, function(x) element %in% x, logical(1)) 26 | } -------------------------------------------------------------------------------- /tests/testthat/test-monarch_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("monarch_engine works as expected", { 6 | #testthat::skip("temporary skip") 7 | 8 | e <- monarch_engine() 9 | g <- fetch_nodes(e, query_ids = "MONDO:0006043") 10 | # this should have 6 subtypes (two direct, four under one of the direct children) 11 | subtypes <- g %>% expand(direction = "in", 12 | predicates = "biolink:subclass_of", 13 | transitive = TRUE) 14 | 15 | nodes_df <- subtypes %>% activate(nodes) %>% as.data.frame() 16 | edges_df <- subtypes %>% activate(edges) %>% as.data.frame() 17 | expect_contains(7 + -2:2, nrow(nodes_df)) 18 | expect_contains(6 + -2:2, nrow(edges_df)) 19 | 20 | # there should be a pcategory col of type character 21 | expect_true("pcategory" %in% names(nodes_df)) 22 | expect_true(is.character(nodes_df$pcategory)) 23 | 24 | # there should be a category col of type list 25 | expect_true("category" %in% names(nodes_df)) 26 | expect_true(is.list(nodes_df$category)) 27 | }) 28 | -------------------------------------------------------------------------------- /R/cypher_query.R: -------------------------------------------------------------------------------- 1 | #' Execute a Cypher Query 2 | #' 3 | #' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a tbl_kgx graph. 4 | #' 5 | #' @param engine A neo4j KG engine 6 | #' @param query A string representing the Cypher query. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a single joined graph. 7 | #' @param parameters A list of parameters for the Cypher query. Default is an empty list. 8 | #' @param ... Additional arguments passed to the function. 9 | #' @return The result of the Cypher query as a tbl_kgx graph. 10 | #' @export 11 | #' @examplesIf monarch_engine_check() 12 | #' engine <- monarch_engine() 13 | #' 14 | #' query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10" 15 | #' ids <- c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021") 16 | #' parameters <- list(ids = ids) 17 | #' 18 | #' result <- cypher_query(engine, query, parameters) 19 | #' print(result) 20 | #' @importFrom neo2R cypher 21 | cypher_query <- function(engine, query, parameters = NULL, ...) { 22 | UseMethod("cypher_query") 23 | } 24 | 25 | -------------------------------------------------------------------------------- /R/order_cols.R: -------------------------------------------------------------------------------- 1 | #' Set edge/row data column order according to most recent engine preferences 2 | #' @param g A `tbl_kgx` graph. 3 | #' @import dplyr 4 | #' @import tidygraph 5 | order_cols <- function(g) { 6 | e <- attr(g, "last_engine") 7 | node_prefs <- c("id", "pcategory", "name") 8 | edge_prefs <- c("subject", "predicate", "object") 9 | 10 | if(!is.null(e)) { 11 | node_prefs <- e$preferences$node_property_priority 12 | edge_prefs <- e$preferences$edge_property_priority 13 | } 14 | 15 | current_node_names <- colnames(nodes(g)) 16 | used_prefs_node_names <- node_prefs[node_prefs %in% current_node_names] 17 | set_node_names <- c(used_prefs_node_names, setdiff(current_node_names, used_prefs_node_names)) 18 | 19 | current_edge_names <- colnames(edges(g)) 20 | used_prefs_edge_names <- edge_prefs[edge_prefs %in% current_edge_names] 21 | set_edge_names <- c(used_prefs_edge_names, setdiff(current_edge_names, used_prefs_edge_names)) 22 | 23 | res <- g |> 24 | activate(nodes) |> 25 | select(all_of(set_node_names)) |> 26 | activate(edges) |> 27 | select(all_of(set_edge_names)) |> 28 | activate(nodes) 29 | 30 | return(res) 31 | } 32 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 monarchr authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /man/example_graph.file_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_graph.file_engine.R 3 | \name{example_graph.file_engine} 4 | \alias{example_graph.file_engine} 5 | \title{Return an example set of nodes from a KG engine.} 6 | \usage{ 7 | \method{example_graph}{file_engine}(engine, ...) 8 | } 9 | \arguments{ 10 | \item{engine}{A \code{file_engine} object} 11 | 12 | \item{...}{Other parameters (not used)} 13 | } 14 | \value{ 15 | A tbl_kgx graph 16 | } 17 | \description{ 18 | Given a KGX file-based KG engine, returns a graph representing the diversity 19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to 20 | contain at least one node of every category, and at least one edge of every 21 | predicate. No other guarantees are made: the example graph is not minimal 22 | to satisfy these criteria, it is not random or even pseudo-random, and it 23 | may not be connected. 24 | } 25 | \examples{ 26 | # Using example KGX file packaged with monarchr 27 | data(eds_marfan_kg) 28 | 29 | # Retrieve and print an example graph: 30 | g <- eds_marfan_kg |> example_graph() 31 | print(g) 32 | } 33 | -------------------------------------------------------------------------------- /R/example_graph.R: -------------------------------------------------------------------------------- 1 | #' Return an example set of nodes from a KG engine. 2 | #' 3 | #' Given a KG engine, returns a graph representing the diversity 4 | #' of node categories and edge predicates for browsing. The returned graph is guaranteed to 5 | #' contain at least one node of every category, and at least one edge of every 6 | #' predicate. No other guarantees are made: the example graph is not minimal 7 | #' to satisfy these criteria, it is not random or even pseudo-random, and it 8 | #' may not be connected. 9 | #' 10 | #' @param engine A KG engine object 11 | #' @param ... Other parameters (not used) 12 | #' @return A tbl_kgx graph 13 | #' @export 14 | #' @examples 15 | #' # Using example KGX file packaged with monarchr 16 | #' data(eds_marfan_kg) 17 | #' 18 | #' # prints a readable summary and returns a list of dataframes 19 | #' g <- eds_marfan_kg |> example_graph() 20 | #' print(g) 21 | #' 22 | #' @examplesIf monarch_engine_check() 23 | #' # prints a readable summary and returns a list of dataframes 24 | #' g <- monarch_engine() |> example_graph() 25 | #' print(g) 26 | #' @import tidygraph 27 | #' @import dplyr 28 | example_graph <- function(engine, ...) { 29 | UseMethod("example_graph") 30 | } 31 | 32 | -------------------------------------------------------------------------------- /man/grapes-in_list-grapes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/in_list.R 3 | \name{\%in_list\%} 4 | \alias{\%in_list\%} 5 | \title{Check if an element is contained in list sub-elements} 6 | \usage{ 7 | element \%in_list\% list 8 | } 9 | \arguments{ 10 | \item{element}{The element to check for} 11 | 12 | \item{list}{A list of elements to check against} 13 | } 14 | \value{ 15 | A logical vector indicating if the element is in each list element 16 | } 17 | \description{ 18 | This function is useful for filtering dataframes by list columns. It checks if an element is contained in each list element, returning a logical vector. Note 19 | that this is different than \code{\%in\%} which does not work with list columns in a natural manner. 20 | } 21 | \examples{ 22 | library(dplyr) 23 | 24 | df <- tibble(id = c("A", "B", "C"), list_col = list(1:3, 2:4, 3:5)) 25 | 26 | # works naturally for list columns 27 | df \%>\% filter(2 \%in_list\% list_col) 28 | 29 | # also works with basic vector columns 30 | df \%>\% filter("B" \%in_list\% id) 31 | 32 | # does not work with multi-valued left hand sides 33 | # df \%>\% filter(c("B", "C") \%in_list\% id) # error 34 | 35 | } 36 | -------------------------------------------------------------------------------- /man/example_graph.neo4j_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_graph.neo4j_engine.R 3 | \name{example_graph.neo4j_engine} 4 | \alias{example_graph.neo4j_engine} 5 | \title{Return an example set of nodes from a KG engine.} 6 | \usage{ 7 | \method{example_graph}{neo4j_engine}(engine, ...) 8 | } 9 | \arguments{ 10 | \item{engine}{A \code{neo4j_engine} object} 11 | 12 | \item{...}{Other parameters (not used)} 13 | } 14 | \value{ 15 | A tbl_kgx graph 16 | } 17 | \description{ 18 | Given a KGX Neo4j KG engine, returns a graph representing the diversity 19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to 20 | contain at least one node of every category, and at least one edge of every 21 | predicate. No other guarantees are made: the example graph is not minimal 22 | to satisfy these criteria, it is not random or even pseudo-random, and it 23 | may not be connected. 24 | } 25 | \examples{ 26 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 27 | # Retrieve and print an example graph: 28 | g <- monarch_engine() |> example_graph() 29 | print(g) 30 | \dontshow{\}) # examplesIf} 31 | } 32 | -------------------------------------------------------------------------------- /R/normalize_categories.R: -------------------------------------------------------------------------------- 1 | #' Normalize Categories 2 | #' 3 | #' This function takes a list of vectors of categories and an ordered preference list over categories. 4 | #' It selects the most preferred category from each vector, or the first category if no preferred categories are included. 5 | #' 6 | #' @param cats_list A list of vectors of categories. 7 | #' @param cats_prefs An ordered preference list over categories. 8 | #' @return A vector of normalized categories. 9 | #' @examplesIf FALSE 10 | #' categories_list <- list(c("A", "B", "C"), c("D", "E", "F")) 11 | #' categories_prefs <- c("B", "E", "A", "D", "C", "F") 12 | #' normalize_categories(categories_list, categories_prefs) 13 | normalize_categories <- function(cats_list, cats_prefs) { 14 | normed <- unlist(lapply(cats_list, function(categories) { 15 | positions <- match(categories, cats_prefs) 16 | 17 | # If all matches are NO, there is no preference match, so use the first label; 18 | # otherwise, use the label with the minimum position in the preferred list 19 | most_preferred_label <- if (all(is.na(positions))) { 20 | categories[1] 21 | } else { 22 | categories[which.min(positions)] 23 | } 24 | 25 | most_preferred_label 26 | })) 27 | 28 | return(normed) 29 | } 30 | -------------------------------------------------------------------------------- /R/cypher_query_df.R: -------------------------------------------------------------------------------- 1 | #' Execute a Cypher Query 2 | #' 3 | #' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a data frame. 4 | #' 5 | #' @param engine A neo4j_engine() or derivative providing access to a Neo4j database. 6 | #' @param query A string representing the Cypher query, which should return a table. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a list of data frames. 7 | #' @param parameters A list of parameters for the Cypher query, if required. 8 | #' @param ... Additional arguments passed to the function. 9 | #' @return The result of the Cypher query as a data frame, or a list of data frames if multiple queries are passed. 10 | #' @export 11 | #' @examplesIf monarch_engine_check() 12 | #' engine <- monarch_engine() 13 | #' 14 | #' query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10" 15 | #' parameters <- list(ids = c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021")) 16 | #' 17 | #' result <- cypher_query_df(engine, query, parameters) 18 | #' print(result) 19 | #' @importFrom neo2R cypher 20 | cypher_query_df <- function(engine, query, parameters = NULL, ...) { 21 | UseMethod("cypher_query_df") 22 | } 23 | -------------------------------------------------------------------------------- /man/graph_centrality.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph_centrality.R 3 | \name{graph_centrality} 4 | \alias{graph_centrality} 5 | \title{Add centrality} 6 | \usage{ 7 | graph_centrality( 8 | graph, 9 | fun = igraph::harmonic_centrality, 10 | col = "centrality", 11 | ... 12 | ) 13 | } 14 | \arguments{ 15 | \item{graph}{A graph object} 16 | 17 | \item{fun}{The centrality function to use. 18 | Default is \link[igraph]{harmonic_centrality}.} 19 | 20 | \item{col}{Name of the new node attribute to store the centrality score.} 21 | 22 | \item{...}{Additional arguments passed to the centrality function 23 | (\code{sim_fun}).} 24 | } 25 | \value{ 26 | Graph object with centrality added as a new node attribute. 27 | } 28 | \description{ 29 | First computes of each node in a graph. 30 | Then adds the centrality score as an node attribute. 31 | } 32 | \examples{ 33 | data(eds_marfan_kg) 34 | g <- eds_marfan_kg |> 35 | fetch_nodes(query_ids = "MONDO:0007525") |> 36 | expand(predicates = "biolink:has_phenotype", 37 | categories = "biolink:PhenotypicFeature")|> 38 | expand(categories = "biolink:Gene") 39 | g <- graph_centrality(g) 40 | nodes(g)$centrality 41 | } 42 | -------------------------------------------------------------------------------- /inst/association_type_mappings2.yaml: -------------------------------------------------------------------------------- 1 | - subject_type: Disease 2 | object_type: Phenotype 3 | category: "biolink:DiseaseToPhenotypicFeatureAssociation" 4 | - subject_type: Gene 5 | object_type: Phenotype 6 | category: "biolink:GeneToPhenotypicFeatureAssociation" 7 | - subject_type: Gene 8 | object_type: Gene 9 | symmetric: true 10 | category: "biolink:PairwiseGeneToGeneInteraction" 11 | - subject_type: Gene 12 | object_type: Pathway 13 | category: "biolink:GeneToPathwayAssociation" 14 | - subject_type: Gene 15 | object_type: Anatomy 16 | category: "biolink:GeneToExpressionSiteAssociation" 17 | - subject_type: Orthologs 18 | object_type: Orthologs 19 | symmetric: true 20 | category: "biolink:GeneToGeneHomologyAssociation" 21 | - subject_type: Chemicals 22 | object_type: Pathways 23 | category: "biolink:ChemicalToPathwayAssociation" 24 | - subject_type: Genes 25 | object_type: Molecular Functions 26 | category: "biolink:MacromolecularMachineToMolecularActivityAssociation" 27 | - subject_type: Causal Genes 28 | object_type: Causal Diseases 29 | category: "biolink:CausalGeneToDiseaseAssociation" 30 | - subject_type: "Correlated Genes" 31 | object_type: "Correlated Diseases" 32 | category: "biolink:CorrelatedGeneToDiseaseAssociation" 33 | -------------------------------------------------------------------------------- /man/monarch_search.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/monarch_search.R 3 | \name{monarch_search} 4 | \alias{monarch_search} 5 | \title{Search for KG nodes using the Monarch Initiative search API} 6 | \usage{ 7 | monarch_search(query, category = NULL, limit = 10, ...) 8 | } 9 | \arguments{ 10 | \item{query}{Search query string, e.g. "Cystic fibrosis"} 11 | 12 | \item{category}{A set of node category labels to limit the search to, e.g. c("biolink:Disease", "biolink:Gene")} 13 | 14 | \item{limit}{Maximum number of nodes to return. Default 10.} 15 | 16 | \item{...}{Parameters passed to monarch_engine().} 17 | } 18 | \value{ 19 | A local tbl_kgx graph with no edges. 20 | } 21 | \description{ 22 | This function is a wrapper around the Monarch-hosted 23 | \href{https://api.monarchinitiative.org/v3/docs#/search/search_v3_api_search_get}{search API}. 24 | It returns nodes (no edges) from the Monarch KG, fetched via an instance of \code{monarch_engine()}. 25 | } 26 | \examples{ 27 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 28 | cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5) 29 | print(cf_hits) 30 | \dontshow{\}) # examplesIf} 31 | } 32 | -------------------------------------------------------------------------------- /tests/testthat/test-monarch_search.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("monarch_search works", { 5 | # skip for now 6 | #testthat::skip("temporary skip") 7 | 8 | g <- monarch_search("fanconi anemia", limit = 5) 9 | # result should be a tbl_kgx with 5 nodes and no edges 10 | expect_s3_class(g, "tbl_kgx") 11 | nodes_df <- data.frame(tidygraph::activate(g, nodes)) 12 | edges_df <- data.frame(tidygraph::activate(g, edges)) 13 | expect_equal(nrow(nodes_df), 5) 14 | expect_equal(nrow(edges_df), 0) 15 | 16 | # limit = 1 should work 17 | g <- monarch_search("fanconi anemia", limit = 1) 18 | # result should be a tbl_kgx with 1 node and no edges 19 | nodes_df <- data.frame(tidygraph::activate(g, nodes)) 20 | edges_df <- data.frame(tidygraph::activate(g, edges)) 21 | expect_equal(nrow(nodes_df), 1) 22 | expect_equal(nrow(edges_df), 0) 23 | 24 | # no hits should work 25 | g <- monarch_search("this is not a real search term") 26 | # this should return a graph with no nodes 27 | nodes_df <- data.frame(tidygraph::activate(g, nodes)) 28 | edges_df <- data.frame(tidygraph::activate(g, edges)) 29 | expect_equal(nrow(nodes_df), 0) 30 | expect_equal(nrow(edges_df), 0) 31 | 32 | 33 | }) 34 | -------------------------------------------------------------------------------- /R/load_kgx.R: -------------------------------------------------------------------------------- 1 | 2 | #' Load a graph from a KGX-formatted .tar.gz file. 3 | #' 4 | #' Given a KGX-formatted tabular KG 5 | #' (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) 6 | #' loads it as a graph. 7 | #' 8 | #' @param filename File to the graph from. Must end in .tar.gz and conform to KGX specification (see description). 9 | #' @param attach_engine An engine to attach to the graph (optional). 10 | #' @param ... Other parameters (unused) 11 | #' @return A `tbl_kgx` graph. 12 | #' @export 13 | #' @examplesIf monarch_engine_check() 14 | #' phenos <- monarch_engine() |> 15 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 16 | #' expand(predicates = "biolink:has_phenotype", 17 | #' categories = "biolink:PhenotypicFeature") 18 | #' 19 | #' save_kgx(phenos, "phenos.tar.gz") 20 | #' 21 | #' # when loading the graph, we can optionally attach an engine 22 | #' loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine()) 23 | #' 24 | #' loaded_phenos 25 | #' 26 | #' # cleanup saved file 27 | #' file.remove("phenos.tar.gz") 28 | load_kgx <- function(filename, attach_engine = NULL, ...) { 29 | e <- file_engine(filename) 30 | g <- e$graph 31 | 32 | attr(g, "last_engine") <- attach_engine 33 | g <- order_cols(g) 34 | return(g) 35 | } 36 | -------------------------------------------------------------------------------- /man/neo4j_engine_check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/neo4j_engine_check.R 3 | \name{neo4j_engine_check} 4 | \alias{neo4j_engine_check} 5 | \title{Check if a neo4j database is available and properly formatted} 6 | \usage{ 7 | neo4j_engine_check(url, username = NA, password = NA, warn = TRUE) 8 | } 9 | \arguments{ 10 | \item{url}{A character string indicating the URL of the neo4j database.} 11 | 12 | \item{username}{A character string indicating the username for the neo4j database (if needed).} 13 | 14 | \item{password}{A character string indicating the password for the neo4j database (if needed).} 15 | 16 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.} 17 | } 18 | \value{ 19 | TRUE if the database is available and properly formatted, FALSE otherwise. 20 | } 21 | \description{ 22 | Attempts to connect to the specified Neo4J database and run a query to see if it is properly formatted. Returns FALSE if the database is not available or not properly formatted. 23 | } 24 | \examples{ 25 | print(neo4j_engine_check("https://neo4j.monarchinitiative.org")) 26 | print(neo4j_engine_check("https://no-such-db.monarchinitiative.org")) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-example_graph.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("example_graph for neo4j engine", { 5 | #testthat::skip("temporary skip") 6 | options(width = 150) 7 | 8 | e <- monarch_engine() 9 | 10 | sample <- example_graph(e) 11 | 12 | # check some expected categories 13 | expect_true(any("biolink:Disease" %in_list% nodes(sample)$category)) 14 | # expect_true(any("biolink:GenomicEntity" %in_list% nodes(sample)$category)) 15 | # expect_true(any("biolink:GeneOrGeneProduct" %in_list% nodes(sample)$category)) 16 | expect_true(any("biolink:SequenceVariant" %in_list% nodes(sample)$category)) 17 | # expect_true(any("biolink:OntologyClass" %in_list% nodes(sample)$category)) 18 | # expect_true(any("biolink:PhysicalEssence" %in_list% nodes(sample)$category)) 19 | 20 | # check some expected predicates 21 | expect_true(any("biolink:causes" %in_list% edges(sample)$predicate)) 22 | expect_true(any("biolink:subclass_of" %in_list% edges(sample)$predicate)) 23 | expect_true(any("biolink:associated_with_increased_likelihood_of" %in_list% edges(sample)$predicate)) 24 | expect_true(any("biolink:treats_or_applied_or_studied_to_treat" %in_list% edges(sample)$predicate)) 25 | expect_true(any("biolink:genetically_associated_with" %in_list% edges(sample)$predicate)) 26 | }) 27 | -------------------------------------------------------------------------------- /meta/archive/association_type_mappings.yaml: -------------------------------------------------------------------------------- 1 | - subject_label: Diseases 2 | object_label: Phenotypes 3 | category: "biolink:DiseaseToPhenotypicFeatureAssociation" 4 | - subject_label: Genes 5 | object_label: Phenotypes 6 | category: "biolink:GeneToPhenotypicFeatureAssociation" 7 | - subject_label: Interactions 8 | object_label: Interactions 9 | symmetric: true 10 | category: "biolink:PairwiseGeneToGeneInteraction" 11 | - subject_label: Genes 12 | object_label: Pathways 13 | category: "biolink:GeneToPathwayAssociation" 14 | - subject_label: Genes 15 | object_label: Anatomy 16 | category: "biolink:GeneToExpressionSiteAssociation" 17 | - subject_label: Orthologs 18 | object_label: Orthologs 19 | symmetric: true 20 | category: "biolink:GeneToGeneHomologyAssociation" 21 | - subject_label: Chemicals 22 | object_label: Pathways 23 | category: "biolink:ChemicalToPathwayAssociation" 24 | - subject_label: Genes 25 | object_label: Molecular Functions 26 | category: "biolink:MacromolecularMachineToMolecularActivityAssociation" 27 | - subject_label: Causal Genes 28 | object_label: Causal Diseases 29 | category: "biolink:CausalGeneToDiseaseAssociation" 30 | - subject_label: "Correlated Genes" 31 | object_label: "Correlated Diseases" 32 | category: "biolink:CorrelatedGeneToDiseaseAssociation" 33 | -------------------------------------------------------------------------------- /tests/testthat/test-cypher_query.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("cypher_query returns a graph object", { 5 | # skip for now 6 | #testthat::skip("temporary skip") 7 | 8 | e <- monarch_engine() 9 | g <- cypher_query(e, query = "MATCH (s) -[r]- (o) return s, r, o LIMIT 1") 10 | expect_s3_class(g, "tbl_kgx") 11 | nodes_df <- data.frame(tidygraph::activate(g, nodes)) 12 | edges_df <- data.frame(tidygraph::activate(g, edges)) 13 | # g should be a tidygraph with two rows in nodes and one in edges 14 | expect_equal(nrow(nodes_df), 2) 15 | expect_equal(nrow(edges_df), 1) 16 | }) 17 | 18 | test_that("cypher_query works with multicypher queries", { 19 | # skip for now 20 | #testthat::skip("temporary skip") 21 | 22 | e <- monarch_engine() 23 | queries <- c("MATCH (n {id: 'MONDO:0007947'}) RETURN n", 24 | "MATCH (n {id: 'MONDO:0017309'}) RETURN n", 25 | "MATCH (n {id: 'MONDO:0020066'}) RETURN n") 26 | g <- cypher_query(e, query = queries) 27 | expect_s3_class(g, "tbl_kgx") 28 | nodes_df <- data.frame(tidygraph::activate(g, nodes)) 29 | edges_df <- data.frame(tidygraph::activate(g, edges)) 30 | # g should be a tidygraph with two rows in nodes and one in edges 31 | expect_equal(nrow(nodes_df), 3) 32 | expect_equal(nrow(edges_df), 0) 33 | }) 34 | -------------------------------------------------------------------------------- /man/kg_edge_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kg_edge_weights.R 3 | \name{kg_edge_weights} 4 | \alias{kg_edge_weights} 5 | \title{Knowledge Graph edge weights} 6 | \usage{ 7 | kg_edge_weights( 8 | graph, 9 | normalise = TRUE, 10 | encodings = monarch_edge_weight_encodings(), 11 | fun = function(x) { 12 | rowSums(x, na.rm = TRUE) 13 | } 14 | ) 15 | } 16 | \arguments{ 17 | \item{graph}{A graph object} 18 | 19 | \item{normalise}{Normalise each encoding from 0-1 by dividing by the 20 | maximum value. Default is \code{TRUE}.} 21 | 22 | \item{encodings}{A list of named lists of encoding values for 23 | different edge attributes.} 24 | 25 | \item{fun}{Function to compute edge weights with across the 26 | numerically encoded attributes. Default is \link{rowSums}.} 27 | } 28 | \description{ 29 | Compute edge weights for the given \link{tbl_kgx} graph using 30 | several pieces of categorical, ordinal, and continuous metadata. 31 | } 32 | \examples{ 33 | data(eds_marfan_kg) 34 | g <- eds_marfan_kg |> 35 | fetch_nodes(query_ids = "MONDO:0007525") |> 36 | expand(predicates = "biolink:has_phenotype", 37 | categories = "biolink:PhenotypicFeature")|> 38 | expand(categories = "biolink:Gene") 39 | g2 <- kg_edge_weights(g) 40 | edges(g2)$weight 41 | } 42 | -------------------------------------------------------------------------------- /man/layout_umap.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/layout_umap.R 3 | \name{layout_umap} 4 | \alias{layout_umap} 5 | \title{Layout UMAP} 6 | \usage{ 7 | layout_umap(graph, use_3d = FALSE, prefix = "UMAP", ...) 8 | } 9 | \arguments{ 10 | \item{graph}{A graph object} 11 | 12 | \item{use_3d}{Logical, whether to use a 3D layout (TRUE) or 13 | 2D layout (FALSE). Default is FALSE (2D).} 14 | 15 | \item{prefix}{A character string prefix to add to the layout column names.} 16 | 17 | \item{...}{Additional arguments passed to the layout function.} 18 | } 19 | \value{ 20 | A matrix of x and y coordinates for each node in the graph. 21 | } 22 | \description{ 23 | Generate a 2D or 2D layout of a graph using the UMAP algorithm. 24 | See here for details: 25 | \href{https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_umap}{ 26 | igraph_layout_umap} 27 | } 28 | \examples{ 29 | set.seed(2024) 30 | data(eds_marfan_kg) 31 | g <- eds_marfan_kg |> 32 | fetch_nodes(pcategory=="biolink:Disease", limit=40) |> 33 | expand(predicates = "biolink:has_phenotype", 34 | categories = "biolink:PhenotypicFeature")|> 35 | tidygraph::sample_n(200) |> 36 | expand(categories = "biolink:Gene") 37 | X <- layout_umap(g) 38 | g <- graph_centrality(g) 39 | plot(g, layout=X, node_size=centrality) 40 | } 41 | -------------------------------------------------------------------------------- /R/cytoscape.R: -------------------------------------------------------------------------------- 1 | #' Send a graph to Cytoscape 2 | #' 3 | #' Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels 4 | #' are mapped to node `name` (if available, otherwise they default to node `id`), 5 | #' node color is mapped to `pcategory`, edge color is mapped to `predicate`, 6 | #' node hover-over text is set to `description` (if available, otherwise node `id`), 7 | #' and edge hover-over text is set to `predicate`. Nodes are layed out 8 | #' using the Kamada-Kawai method. These properties and more may be customized in 9 | #' the Cytoscape application. This function requires that Cytoscape is installed 10 | #' and running independently of the R session. 11 | #' 12 | #' 13 | #' @param g A `tbl_kgx()` graph to visualize. 14 | #' @param ... Other parameters (unused). 15 | #' 16 | #' @return NULL, invisibly 17 | #' @export 18 | #' @examplesIf FALSE 19 | #' data(eds_marfan_kg) 20 | #' g <- eds_marfan_kg |> 21 | #' fetch_nodes(query_ids = "MONDO:0020066") |> 22 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 23 | #' expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene")) 24 | #' 25 | #' # Cytoscape must be installed and running 26 | #' cytoscape(g) 27 | #' 28 | #' @import RCy3 29 | #' @import tidygraph 30 | #' @import dplyr 31 | cytoscape <- function(g, ...) { 32 | UseMethod("cytoscape") 33 | } 34 | -------------------------------------------------------------------------------- /man/cypher_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cypher_query.R 3 | \name{cypher_query} 4 | \alias{cypher_query} 5 | \title{Execute a Cypher Query} 6 | \usage{ 7 | cypher_query(engine, query, parameters = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{engine}{A neo4j KG engine} 11 | 12 | \item{query}{A string representing the Cypher query. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a single joined graph.} 13 | 14 | \item{parameters}{A list of parameters for the Cypher query. Default is an empty list.} 15 | 16 | \item{...}{Additional arguments passed to the function.} 17 | } 18 | \value{ 19 | The result of the Cypher query as a tbl_kgx graph. 20 | } 21 | \description{ 22 | This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a tbl_kgx graph. 23 | } 24 | \examples{ 25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 26 | engine <- monarch_engine() 27 | 28 | query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10" 29 | ids <- c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021") 30 | parameters <- list(ids = ids) 31 | 32 | result <- cypher_query(engine, query, parameters) 33 | print(result) 34 | \dontshow{\}) # examplesIf} 35 | } 36 | -------------------------------------------------------------------------------- /man/example_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/example_graph.R 3 | \name{example_graph} 4 | \alias{example_graph} 5 | \title{Return an example set of nodes from a KG engine.} 6 | \usage{ 7 | example_graph(engine, ...) 8 | } 9 | \arguments{ 10 | \item{engine}{A KG engine object} 11 | 12 | \item{...}{Other parameters (not used)} 13 | } 14 | \value{ 15 | A tbl_kgx graph 16 | } 17 | \description{ 18 | Given a KG engine, returns a graph representing the diversity 19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to 20 | contain at least one node of every category, and at least one edge of every 21 | predicate. No other guarantees are made: the example graph is not minimal 22 | to satisfy these criteria, it is not random or even pseudo-random, and it 23 | may not be connected. 24 | } 25 | \examples{ 26 | # Using example KGX file packaged with monarchr 27 | data(eds_marfan_kg) 28 | 29 | # prints a readable summary and returns a list of dataframes 30 | g <- eds_marfan_kg |> example_graph() 31 | print(g) 32 | 33 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 34 | # prints a readable summary and returns a list of dataframes 35 | g <- monarch_engine() |> example_graph() 36 | print(g) 37 | \dontshow{\}) # examplesIf} 38 | } 39 | -------------------------------------------------------------------------------- /man/transitive_closure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transitive_closure.R 3 | \name{transitive_closure} 4 | \alias{transitive_closure} 5 | \title{Compute transitive closure over a predicate.} 6 | \usage{ 7 | transitive_closure(g, predicate = "biolink:subclass_of") 8 | } 9 | \arguments{ 10 | \item{g}{The \code{tbl_kgx} graph to compute on.} 11 | 12 | \item{predicate}{The edge predicate to close over.} 13 | } 14 | \value{ 15 | Graph with transitive edges added. 16 | } 17 | \description{ 18 | Computes the transitive closure of a graph, treating the specified 19 | predicate as transitive. Resulting edge predicates will the be the 20 | same, but have primary_knowledge_source set to transitive_\if{html}{\out{}}. 21 | } 22 | \examples{ 23 | data(eds_marfan_kg) 24 | 25 | eds_marfan_kg |> fetch_nodes(name == "Tall stature") |> 26 | # get 2 levels of ancestors 27 | expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |> 28 | activate(edges) |> 29 | filter(primary_knowledge_source == "infores:upheno") |> 30 | transitive_closure(predicate = "biolink:subclass_of") |> 31 | plot(edge_color = primary_knowledge_source) 32 | 33 | } 34 | \seealso{ 35 | \code{\link[=roll_up]{roll_up()}}, \code{\link[=transfer]{transfer()}}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}} 36 | } 37 | -------------------------------------------------------------------------------- /R/graph_centrality.R: -------------------------------------------------------------------------------- 1 | #' Add centrality 2 | #' 3 | #' First computes of each node in a graph. 4 | #' Then adds the centrality score as an node attribute. 5 | #' @param fun The centrality function to use. 6 | #' Default is \link[igraph]{harmonic_centrality}. 7 | #' @param col Name of the new node attribute to store the centrality score. 8 | #' @param ... Additional arguments passed to the centrality function 9 | #' (\code{sim_fun}). 10 | #' @inheritParams nodes 11 | #' @returns Graph object with centrality added as a new node attribute. 12 | #' @export 13 | #' @importFrom tidygraph active 14 | #' @importFrom tidygraph activate 15 | #' @examples 16 | #' data(eds_marfan_kg) 17 | #' g <- eds_marfan_kg |> 18 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 19 | #' expand(predicates = "biolink:has_phenotype", 20 | #' categories = "biolink:PhenotypicFeature")|> 21 | #' expand(categories = "biolink:Gene") 22 | #' g <- graph_centrality(g) 23 | #' nodes(g)$centrality 24 | graph_centrality <- function(graph, 25 | fun=igraph::harmonic_centrality, 26 | col="centrality", 27 | ...){ 28 | active_tbl <- active(graph) 29 | message("Computing node centrality.") 30 | graph <- graph|> 31 | activate(nodes)|> 32 | dplyr::mutate(!!col:=fun(graph, ...)) |> 33 | activate(!!rlang::sym(active_tbl)) 34 | return(graph) 35 | } 36 | -------------------------------------------------------------------------------- /tests/testthat/test-example_graph.file_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("example_graph for file engine", { 5 | #testthat::skip("temporary skip") 6 | options(width = 150) 7 | 8 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 9 | e <- file_engine(filename) 10 | 11 | sample <- example_graph(e) 12 | 13 | # check some expected categories 14 | expect_true(any("biolink:Disease" %in_list% nodes(sample)$category)) 15 | expect_true(any("biolink:GenomicEntity" %in_list% nodes(sample)$category)) 16 | expect_true(any("biolink:GeneOrGeneProduct" %in_list% nodes(sample)$category)) 17 | expect_true(any("biolink:SequenceVariant" %in_list% nodes(sample)$category)) 18 | expect_true(any("biolink:OntologyClass" %in_list% nodes(sample)$category)) 19 | expect_true(any("biolink:PhysicalEssence" %in_list% nodes(sample)$category)) 20 | 21 | # check some expected predicates 22 | expect_true(any("biolink:causes" %in_list% edges(sample)$predicate)) 23 | expect_true(any("biolink:subclass_of" %in_list% edges(sample)$predicate)) 24 | expect_true(any("biolink:associated_with_increased_likelihood_of" %in_list% edges(sample)$predicate)) 25 | expect_true(any("biolink:treats_or_applied_or_studied_to_treat" %in_list% edges(sample)$predicate)) 26 | expect_true(any("biolink:genetically_associated_with" %in_list% edges(sample)$predicate)) 27 | }) 28 | -------------------------------------------------------------------------------- /man/summary.neo4j_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.neo4j_engine.R 3 | \name{summary.neo4j_engine} 4 | \alias{summary.neo4j_engine} 5 | \title{Summarize contents of a Neo4j KG engine} 6 | \usage{ 7 | \method{summary}{neo4j_engine}(object, ..., quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{A \code{neo4j_engine} object} 11 | 12 | \item{...}{Other parameters (not used)} 13 | 14 | \item{quiet}{Logical, whether to suppress printing of the summary} 15 | } 16 | \value{ 17 | A list of dataframes and named lists 18 | } 19 | \description{ 20 | Given a Neo4j based KG engine, provides summary information in the form of 21 | node counts, category counts across nodes, relationship type counts, and available properties. 22 | General information about the graph is printed to the console, and a list of 23 | dataframes with this information is returned invisibly. Also returned 24 | are \code{cats}, \code{preds}, and \code{props} entries, containing lists of available 25 | categories/predicates/properties for convenient auto-completion in RStudio. 26 | } 27 | \examples{ 28 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 29 | # prints a readable summary and returns a list of dataframes 30 | stats <- monarch_engine() |> summary() 31 | print(stats) 32 | \dontshow{\}) # examplesIf} 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/rworkflows.yml: -------------------------------------------------------------------------------- 1 | name: rworkflows 2 | 'on': 3 | push: 4 | branches: 5 | - master 6 | - main 7 | - devel 8 | - RELEASE_** 9 | pull_request: 10 | branches: 11 | - master 12 | - main 13 | - devel 14 | - RELEASE_** 15 | jobs: 16 | rworkflows: 17 | permissions: write-all 18 | runs-on: ${{ matrix.config.os }} 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | container: ${{ matrix.config.cont }} 21 | strategy: 22 | fail-fast: ${{ false }} 23 | matrix: 24 | config: 25 | - os: ubuntu-latest 26 | bioc: devel 27 | r: auto 28 | cont: ghcr.io/bioconductor/bioconductor_docker:devel 29 | rspm: ~ 30 | steps: 31 | - uses: neurogenomics/rworkflows@master 32 | with: 33 | run_bioccheck: ${{ false }} 34 | run_rcmdcheck: ${{ false }} 35 | as_cran: ${{ true }} 36 | run_vignettes: ${{ true }} 37 | has_testthat: ${{ true }} 38 | run_covr: ${{ false }} 39 | run_pkgdown: ${{ true }} 40 | has_runit: ${{ false }} 41 | has_latex: ${{ false }} 42 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 43 | run_docker: ${{ false }} 44 | DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} 45 | runner_os: ${{ runner.os }} 46 | cache_version: cache-v1 47 | docker_registry: ghcr.io 48 | -------------------------------------------------------------------------------- /man/load_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/load_kgx.R 3 | \name{load_kgx} 4 | \alias{load_kgx} 5 | \title{Load a graph from a KGX-formatted .tar.gz file.} 6 | \usage{ 7 | load_kgx(filename, attach_engine = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{filename}{File to the graph from. Must end in .tar.gz and conform to KGX specification (see description).} 11 | 12 | \item{attach_engine}{An engine to attach to the graph (optional).} 13 | 14 | \item{...}{Other parameters (unused)} 15 | } 16 | \value{ 17 | A \code{tbl_kgx} graph. 18 | } 19 | \description{ 20 | Given a KGX-formatted tabular KG 21 | (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) 22 | loads it as a graph. 23 | } 24 | \examples{ 25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 26 | phenos <- monarch_engine() |> 27 | fetch_nodes(query_ids = "MONDO:0007525") |> 28 | expand(predicates = "biolink:has_phenotype", 29 | categories = "biolink:PhenotypicFeature") 30 | 31 | save_kgx(phenos, "phenos.tar.gz") 32 | 33 | # when loading the graph, we can optionally attach an engine 34 | loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine()) 35 | 36 | loaded_phenos 37 | 38 | # cleanup saved file 39 | file.remove("phenos.tar.gz") 40 | \dontshow{\}) # examplesIf} 41 | } 42 | -------------------------------------------------------------------------------- /man/file_engine_check.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/file_engine_check.R 3 | \name{file_engine_check} 4 | \alias{file_engine_check} 5 | \title{Check availability of a file-based engine} 6 | \usage{ 7 | file_engine_check(filename, warn = TRUE) 8 | } 9 | \arguments{ 10 | \item{filename}{A character string indicating the path to the file-based engine.} 11 | 12 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.} 13 | } 14 | \value{ 15 | TRUE if the database is available and properly formatted, FALSE otherwise. 16 | } 17 | \description{ 18 | Attempts to connect to the specified file-based engine. Returns FALSE if the file is not available or not properly formatted. 19 | } 20 | \examples{ 21 | \dontshow{if (file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 22 | print(file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz")) 23 | print(file_engine_check("https://no-such-host.kghub.io/sepio_kgx_tsv.tar.gz")) 24 | print(file_engine_check(system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr"))) 25 | print(file_engine_check(system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr"))) 26 | \dontshow{\}) # examplesIf} 27 | } 28 | -------------------------------------------------------------------------------- /man/summary.file_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summary.file_engine.R 3 | \name{summary.file_engine} 4 | \alias{summary.file_engine} 5 | \title{Summarize contents of a KGX-file-based KG engine} 6 | \usage{ 7 | \method{summary}{file_engine}(object, ..., quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{A \code{file_engine} object} 11 | 12 | \item{...}{Other parameters (not used)} 13 | 14 | \item{quiet}{Logical, whether to suppress printing of the summary} 15 | } 16 | \value{ 17 | A list of dataframes and named lists 18 | } 19 | \description{ 20 | Given a KGX file-based KG engine, provides summary information in the form of 21 | node counts, category counts across nodes, relationship type counts, and available properties. 22 | General information about the graph is printed to the console, and a list of 23 | dataframes with this information is returned invisibly. Also returned 24 | are \code{cats}, \code{preds}, and \code{props} entries, containing lists of available 25 | categories/predicates/properties for convenient auto-completion in RStudio. 26 | } 27 | \details{ 28 | When applied to a \code{file_engine}, also included are node-specific and edge-specific properties. 29 | } 30 | \examples{ 31 | # Using example KGX file packaged with monarchr 32 | data(eds_marfan_kg) 33 | 34 | # prints a readable summary and returns a list of dataframes 35 | res <- eds_marfan_kg |> summary() 36 | print(res) 37 | } 38 | -------------------------------------------------------------------------------- /R/data_example_kgs.R: -------------------------------------------------------------------------------- 1 | #' Example Ehlers-Danlos and Marfan Syndrome Knowledge Graph Engine 2 | #' 3 | #' A small `file_engine()` Knowledge Graph (KG) containing Monarch Initiative 4 | #' data for Ehlers-Danlos Syndrome and Marfan Syndrome, including their subtypes, 5 | #' all entities connected to those diseases or subtypes, and all ancestors (supertypes) 6 | #' of all those diseases and entities. Generated 8/15/2024 via: 7 | #' 8 | #' ``` 9 | #' monarch_engine() |> 10 | #' fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |> 11 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 12 | #' expand() |> 13 | #' expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 14 | #' ``` 15 | #' 16 | #' This example engine may also be loaded from file via 17 | #' 18 | #' ``` 19 | #' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 20 | #' eds_marfan_kg <- file_engine(filename) 21 | #' ``` 22 | #' 23 | #' @docType data 24 | #' @usage data(eds_marfan_kg) 25 | #' @format An object of class \code{file_engine} for use with `fetch_nodes()`, `expand()`, etc. 26 | #' @keywords datasets 27 | #' @examples 28 | #' data(eds_marfan_kg) 29 | #' phenos <- eds_marfan_kg |> 30 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 31 | #' expand(predicates = "biolink:has_phenotype", 32 | #' categories = "biolink:PhenotypicFeature") 33 | "eds_marfan_kg" 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /R/cypher_query_df.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | internal_cypher_query_df <- function(engine, query, parameters = NULL, ...) { 2 | if(length(query) == 1) { 3 | result <- neo2R::cypher(engine$graph_conn, query = query, parameters = parameters, result = "row", arraysAsStrings = FALSE) 4 | } else { 5 | result <- neo2R::multicypher(engine$graph_conn, queries = query, parameters = parameters, result = "row", arraysAsStrings = FALSE) 6 | } 7 | 8 | return(result) 9 | } 10 | 11 | #internal_cypher_query_df_memoised <- memoise::memoise(internal_cypher_query_df) 12 | 13 | #' @export 14 | #' @importFrom neo2R cypher 15 | #' @importFrom neo2R multicypher 16 | #' @importFrom memoise memoise 17 | cypher_query_df.neo4j_engine <- function(engine, query, parameters = NULL, ...) { 18 | if(!is.null(engine$cache)) { 19 | # ok, this is a bit wonky 20 | # the engine stores its cache 21 | # we create a memoized internal function using that cache 22 | # and then we call the function 23 | # BUT, the engine itself needs to be sent to the function, 24 | # and if its cache keeps changing it wont memoize properly 25 | # so we create a copy of the engine without a cache and use that 26 | engine_copy <- engine 27 | engine_copy$cache <- NULL 28 | 29 | internal <- memoise::memoise(internal_cypher_query_df, cache = engine$cache) 30 | res <- internal(engine_copy, query, parameters, ...) 31 | 32 | return(res) 33 | } else { 34 | internal_cypher_query_df(engine, query, parameters, ...) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /man/cypher_query_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cypher_query_df.R 3 | \name{cypher_query_df} 4 | \alias{cypher_query_df} 5 | \title{Execute a Cypher Query} 6 | \usage{ 7 | cypher_query_df(engine, query, parameters = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{engine}{A neo4j_engine() or derivative providing access to a Neo4j database.} 11 | 12 | \item{query}{A string representing the Cypher query, which should return a table. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a list of data frames.} 13 | 14 | \item{parameters}{A list of parameters for the Cypher query, if required.} 15 | 16 | \item{...}{Additional arguments passed to the function.} 17 | } 18 | \value{ 19 | The result of the Cypher query as a data frame, or a list of data frames if multiple queries are passed. 20 | } 21 | \description{ 22 | This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a data frame. 23 | } 24 | \examples{ 25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 26 | engine <- monarch_engine() 27 | 28 | query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10" 29 | parameters <- list(ids = c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021")) 30 | 31 | result <- cypher_query_df(engine, query, parameters) 32 | print(result) 33 | \dontshow{\}) # examplesIf} 34 | } 35 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: monarchr 2 | Type: Package 3 | Title: Monarch Knowledge Graph Queries 4 | Description: R package for easy access, manipulation, and analysis of 5 | Monarch KG data Resources. 6 | Version: 2.1.2 7 | URL: https://github.com/monarch-initiative/monarchr 8 | BugReports: https://github.com/monarch-initiative/monarchr/issues 9 | Authors@R: 10 | c( 11 | person(given = "Shawn", 12 | family = "O'Neil", 13 | role = c("aut","cre"), 14 | email = "oneilsh@gmail.com", 15 | comment = c(ORCID = "0000-0001-6220-7080")), 16 | person(given = "Brian", 17 | family = "Schilder", 18 | role = c("aut","ctb"), 19 | email = "brian_schilder@alumni.brown.edu", 20 | comment = c(ORCID = "0000-0001-5949-2191")) 21 | ) 22 | License: MIT + file LICENSE 23 | Encoding: UTF-8 24 | LazyData: true 25 | Config/testthat/edition: 3 26 | Roxygen: list(markdown = TRUE) 27 | RoxygenNote: 7.3.2 28 | VignetteBuilder: knitr 29 | Imports: 30 | assertthat, 31 | dplyr, 32 | httr, 33 | kableExtra, 34 | neo2R, 35 | rlang, 36 | igraph, 37 | tibble, 38 | tidygraph, 39 | yaml, 40 | knitr, 41 | purrr, 42 | stringr, 43 | archive, 44 | readr, 45 | ggraph, 46 | ggplot2 (>= 4.0.0), 47 | R.utils, 48 | RCy3, 49 | digest, 50 | memoise, 51 | sets, 52 | relations 53 | Suggests: 54 | testthat (>= 3.0.0), 55 | rworkflows, 56 | Matrix 57 | -------------------------------------------------------------------------------- /inst/kg_prefs.yaml: -------------------------------------------------------------------------------- 1 | category_priority: 2 | - biolink:LifeStage 3 | - biolink:MolecularEntity 4 | - biolink:OrganismTaxon 5 | - biolink:Cell 6 | - biolink:CellularComponent 7 | - biolink:MolecularActivity 8 | - biolink:SequenceVariant 9 | - biolink:ChemicalEntity 10 | - biolink:ChemicalOrDrugOrTreatment 11 | - biolink:GeneProductMixin 12 | - biolink:Protein 13 | - biolink:Polypeptide 14 | - biolink:Pathway 15 | - biolink:Disease 16 | - biolink:ChemicalEntityOrProteinOrPolypeptide 17 | - biolink:BiologicalProcess 18 | - biolink:Occurrent 19 | - biolink:BiologicalProcessOrActivity 20 | - biolink:AnatomicalEntity 21 | - biolink:OrganismalEntity 22 | - biolink:SubjectOfInvestigation 23 | - biolink:Genotype 24 | - biolink:PhenotypicFeature 25 | - biolink:DiseaseOrPhenotypicFeature 26 | - biolink:Gene 27 | - biolink:MacromolecularMachineMixin 28 | - biolink:GeneOrGeneProduct 29 | - biolink:ChemicalEntityOrGeneOrGeneProduct 30 | - biolink:GenomicEntity 31 | - biolink:OntologyClass 32 | - biolink:PhysicalEssence 33 | - biolink:PhysicalEssenceOrOccurrent 34 | - biolink:BiologicalEntity 35 | - biolink:ThingWithTaxon 36 | - biolink:NamedThing 37 | - biolink:Entity 38 | node_property_priority: 39 | - id 40 | - pcategory 41 | - name 42 | - symbol 43 | - in_taxon_label 44 | - description 45 | - synonym 46 | - primary_knowledge_source 47 | edge_property_priority: 48 | - subject 49 | - predicate 50 | - object 51 | - primary_knowledge_source 52 | 53 | 54 | -------------------------------------------------------------------------------- /man/save_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/save_kgx.R 3 | \name{save_kgx} 4 | \alias{save_kgx} 5 | \title{Save a graph as a KGX-formatted .tar.gz file.} 6 | \usage{ 7 | save_kgx(graph, filename = "saved_kgx_graph.tar.gz", ...) 8 | } 9 | \arguments{ 10 | \item{graph}{A \code{tbl_kgx} graph to save.} 11 | 12 | \item{filename}{File to save the graph to. Must end in .tar.gz.} 13 | 14 | \item{...}{Other parameters (unused)} 15 | } 16 | \value{ 17 | The input graph (invisibly). 18 | } 19 | \description{ 20 | Given a graph, saves it using the tabular KGX format 21 | (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) for later 22 | use with \code{load_kgx()} (or even backing an engine with \code{file_engine()}). Note that if 23 | any engine is associated with the graph it is not saved. 24 | } 25 | \examples{ 26 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 27 | phenos <- monarch_engine() |> 28 | fetch_nodes(query_ids = "MONDO:0007525") |> 29 | expand(predicates = "biolink:has_phenotype", 30 | categories = "biolink:PhenotypicFeature") 31 | 32 | save_kgx(phenos, "phenos.tar.gz") 33 | 34 | # when loading the graph, we can optionally attach an engine 35 | loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine()) 36 | 37 | loaded_phenos 38 | 39 | # cleanup saved file 40 | file.remove("phenos.tar.gz") 41 | \dontshow{\}) # examplesIf} 42 | } 43 | -------------------------------------------------------------------------------- /R/layout_umap.R: -------------------------------------------------------------------------------- 1 | #' Layout UMAP 2 | #' 3 | #' Generate a 2D or 2D layout of a graph using the UMAP algorithm. 4 | #' See here for details: 5 | #' \href{https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_umap}{ 6 | #' igraph_layout_umap} 7 | #' @param use_3d Logical, whether to use a 3D layout (TRUE) or 8 | #' 2D layout (FALSE). Default is FALSE (2D). 9 | #' @param prefix A character string prefix to add to the layout column names. 10 | #' @param ... Additional arguments passed to the layout function. 11 | #' @inheritParams nodes 12 | #' @returns A matrix of x and y coordinates for each node in the graph. 13 | #' @export 14 | #' @examples 15 | #' set.seed(2024) 16 | #' data(eds_marfan_kg) 17 | #' g <- eds_marfan_kg |> 18 | #' fetch_nodes(pcategory=="biolink:Disease", limit=40) |> 19 | #' expand(predicates = "biolink:has_phenotype", 20 | #' categories = "biolink:PhenotypicFeature")|> 21 | #' tidygraph::sample_n(200) |> 22 | #' expand(categories = "biolink:Gene") 23 | #' X <- layout_umap(g) 24 | #' g <- graph_centrality(g) 25 | #' plot(g, layout=X, node_size=centrality) 26 | layout_umap <- function(graph, 27 | use_3d = FALSE, 28 | prefix="UMAP", 29 | ...){ 30 | if(use_3d){ 31 | fun <- utils::getFromNamespace("layout_umap_3d_impl", "igraph") 32 | } else { 33 | fun <- utils::getFromNamespace("layout_umap_impl", "igraph") 34 | } 35 | X <- fun(graph, res=matrix()) 36 | rownames(X) <- nodes(graph)$id 37 | colnames(X) <- paste0(prefix, 1:ncol(X)) 38 | return(X) 39 | } 40 | -------------------------------------------------------------------------------- /man/cytoscape.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cytoscape.R 3 | \name{cytoscape} 4 | \alias{cytoscape} 5 | \title{Send a graph to Cytoscape} 6 | \usage{ 7 | cytoscape(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to visualize.} 11 | 12 | \item{...}{Other parameters (unused).} 13 | } 14 | \value{ 15 | NULL, invisibly 16 | } 17 | \description{ 18 | Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels 19 | are mapped to node \code{name} (if available, otherwise they default to node \code{id}), 20 | node color is mapped to \code{pcategory}, edge color is mapped to \code{predicate}, 21 | node hover-over text is set to \code{description} (if available, otherwise node \code{id}), 22 | and edge hover-over text is set to \code{predicate}. Nodes are layed out 23 | using the Kamada-Kawai method. These properties and more may be customized in 24 | the Cytoscape application. This function requires that Cytoscape is installed 25 | and running independently of the R session. 26 | } 27 | \examples{ 28 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 29 | data(eds_marfan_kg) 30 | g <- eds_marfan_kg |> 31 | fetch_nodes(query_ids = "MONDO:0020066") |> 32 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 33 | expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene")) 34 | 35 | # Cytoscape must be installed and running 36 | cytoscape(g) 37 | \dontshow{\}) # examplesIf} 38 | } 39 | -------------------------------------------------------------------------------- /man/graph_semsim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph_semsim.R 3 | \name{graph_semsim} 4 | \alias{graph_semsim} 5 | \title{Add semantic similarity} 6 | \usage{ 7 | graph_semsim( 8 | graph, 9 | fun = igraph::similarity, 10 | col = "similarity", 11 | nm = "id", 12 | sparse = TRUE, 13 | return_matrix = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{graph}{A graph object} 19 | 20 | \item{fun}{The similarity function to use. 21 | Default is \link[igraph]{similarity}.} 22 | 23 | \item{col}{Name of the new edge attribute to store the similarity score.} 24 | 25 | \item{nm}{The node attribute to use as the matrix row/colnames.} 26 | 27 | \item{sparse}{Return a sparse matrix instead of a dense matrix.} 28 | 29 | \item{return_matrix}{Return the similarity matrix instead of the graph.} 30 | 31 | \item{...}{Additional arguments passed to the similarity function 32 | (\code{fun}).} 33 | } 34 | \value{ 35 | Graph object with similarity added as a new edge attribute. 36 | } 37 | \description{ 38 | First computes semantic similarity between all pairs of nodes in a graph. 39 | Then adds the continuous similarity score as an edge attribute. 40 | } 41 | \examples{ 42 | data(eds_marfan_kg) 43 | g <- eds_marfan_kg |> 44 | fetch_nodes(query_ids = "MONDO:0007525") |> 45 | expand(predicates = "biolink:has_phenotype", 46 | categories = "biolink:PhenotypicFeature")|> 47 | expand(categories = "biolink:Gene") 48 | g <- graph_semsim(g) 49 | edges(g)$similarity 50 | } 51 | -------------------------------------------------------------------------------- /R/file_engine_check.R: -------------------------------------------------------------------------------- 1 | #' Check availability of a file-based engine 2 | #' 3 | #' Attempts to connect to the specified file-based engine. Returns FALSE if the file is not available or not properly formatted. 4 | #' 5 | #' @param filename A character string indicating the path to the file-based engine. 6 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE. 7 | #' @return TRUE if the database is available and properly formatted, FALSE otherwise. 8 | #' @export 9 | #' @examplesIf file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz") 10 | #' print(file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz")) 11 | #' print(file_engine_check("https://no-such-host.kghub.io/sepio_kgx_tsv.tar.gz")) 12 | #' print(file_engine_check(system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr"))) 13 | #' print(file_engine_check(system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr"))) 14 | file_engine_check <- function(filename, warn = TRUE) { 15 | # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not 16 | tryCatch({ 17 | e <- file_engine(filename) 18 | return(TRUE) 19 | }, error = function(e) { 20 | if(warn) { 21 | warning(e$message) 22 | } 23 | return(FALSE) 24 | }, warning = function(e) { 25 | if(warn) { 26 | warning(e$message) 27 | } 28 | return(FALSE) 29 | }) 30 | } 31 | -------------------------------------------------------------------------------- /tests/testthat/test-tbl_kgx.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("tbl_kgx constructor follows the rules", { 6 | #testthat::skip("temporary skip") 7 | 8 | # first up, properly formatted node and edge dfs should work 9 | nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar")) 10 | edges_df <- data.frame(subject = c("a"), object = c("b"), predicate = c("baz")) 11 | g <- tbl_kgx(nodes_df, edges_df) 12 | expect_s3_class(g, "tbl_kgx") 13 | 14 | # if the node df is missing the id column, it should fail 15 | nodes_df <- data.frame(category = c("foo", "bar")) 16 | edges_df <- data.frame(subject = c("a"), object = c("b"), predicate = c("baz")) 17 | expect_error(tbl_kgx(nodes_df, edges_df)) 18 | 19 | # if the edge df is missing the subject column, it should fail 20 | nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar")) 21 | edges_df <- data.frame(object = c("b"), predicate = c("baz")) 22 | expect_error(tbl_kgx(nodes_df, edges_df)) 23 | 24 | # if the edge df is missing the object column, it should fail 25 | nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar")) 26 | edges_df <- data.frame(subject = c("a"), predicate = c("baz")) 27 | expect_error(tbl_kgx(nodes_df, edges_df)) 28 | 29 | # if the edge df is missing the predicate column, it should fail 30 | nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar")) 31 | edges_df <- data.frame(subject = c("a"), object = c("b")) 32 | expect_error(tbl_kgx(nodes_df, edges_df)) 33 | }) -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "`r read.dcf('DESCRIPTION', fields = 'Package')[1]`" 3 | author: "`r rworkflows::use_badges(branch='main', add_hex = FALSE, add_codecov_graphs = FALSE, add_doi = '10.5281/zenodo.14553217')`" 4 | date: "README updated: `r format( Sys.Date(), '%b-%d-%Y')`" 5 | output: 6 | github_document 7 | --- 8 | 9 | 10 | 11 | ```{r, echo=FALSE, include=FALSE} 12 | pkg <- read.dcf("DESCRIPTION", fields = "Package")[1] 13 | title <- read.dcf("DESCRIPTION", fields = "Title")[1] 14 | description <- read.dcf("DESCRIPTION", fields = "Description")[1]|> 15 | gsub(pattern="\n",replacement=" ") 16 | URL <- read.dcf('DESCRIPTION', fields = 'URL')[1] 17 | owner <- tolower(strsplit(URL,"/")[[1]][4]) 18 | ``` 19 | 20 | ## `r pkg`: `r title` 21 | 22 | ### `r description` 23 | 24 | `monarchr` provides a tidy interface to data hosted at 25 | , and other knowledge graphs in KGX format (e.g. those at [KGHub](https://kghub.org/)). 26 | 27 | - [Website](https://monarch-initiative.github.io/monarchr/) 28 | - [Get started](https://monarch-initiative.github.io/monarchr/articles/monarchr.html) 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | Installation: 37 | 38 | ``` r 39 | if(!require("BiocManager")) install.packages("BiocManager") 40 | 41 | BiocManager::install("monarch-initiative/monarchr", update=FALSE) 42 | library(monarchr) 43 | ``` 44 | -------------------------------------------------------------------------------- /man/set_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/set_engine.R, R/set_engine.tbl_kgx.R 3 | \name{set_engine} 4 | \alias{set_engine} 5 | \title{Set the engine for a graph. See warning in details.} 6 | \usage{ 7 | set_engine(g, engine) 8 | 9 | set_engine(g, engine) 10 | } 11 | \arguments{ 12 | \item{g}{A tbl_kgx graph.} 13 | 14 | \item{engine}{An engine object.} 15 | } 16 | \value{ 17 | A tbl_kgx graph. 18 | 19 | A tbl_kgx graph. 20 | } 21 | \description{ 22 | Sets a given graph's engine to a given engine object and returns the graph. 23 | WARNING: changing the backing engine for a graph dynamically is not 24 | yet fully supported or tested. 25 | 26 | Sets a given graph's engine to a given engine object and returns the graph. 27 | WARNING: changing the backing engine for a graph dynamically is not 28 | yet fully supported or tested. 29 | } 30 | \examples{ 31 | # Using example KGX file packaged with monarchr 32 | data(eds_marfan_kg) 33 | 34 | g <- eds_marfan_kg |> 35 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 36 | 37 | other_engine <- eds_marfan_kg # this could be a different file-engine (see `file_engine()`) 38 | g <- set_engine(g, other_engine) 39 | 40 | print(get_engine(g)) 41 | 42 | # Using example KGX file packaged with monarchr 43 | data(eds_marfan_kg) 44 | 45 | g <- eds_marfan_kg |> 46 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 47 | 48 | other_engine <- eds_marfan_kg # this could be a different file engine (see `file_engine()`) 49 | g <- set_engine(g, other_engine) 50 | 51 | print(get_engine(g)) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /man/cytoscape.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cytoscape.tbl_kgx.R 3 | \name{cytoscape.tbl_kgx} 4 | \alias{cytoscape.tbl_kgx} 5 | \title{Send a graph to Cytoscape} 6 | \usage{ 7 | \method{cytoscape}{tbl_kgx}(g, ...) 8 | } 9 | \arguments{ 10 | \item{g}{A \code{tbl_kgx()} graph to visualize.} 11 | 12 | \item{...}{other parameters passed to RCy3 functions, e.g. \code{base.url}.} 13 | } 14 | \value{ 15 | NULL, invisibly 16 | } 17 | \description{ 18 | Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels 19 | are mapped to node \code{name} (if available, otherwise they default to node \code{id}), 20 | node color is mapped to \code{pcategory}, edge color is mapped to \code{predicate}, 21 | node hover-over text is set to \code{description} (if available, otherwise node \code{id}), 22 | and edge hover-over text is set to \code{predicate}. Nodes are layed out 23 | using the Kamada-Kawai method. These properties and more may be customized in 24 | the Cytoscape application. This function requires that Cytoscape is installed 25 | and running independently of the R session. 26 | } 27 | \examples{ 28 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 29 | data(eds_marfan_kg) 30 | g <- eds_marfan_kg |> 31 | fetch_nodes(query_ids = "MONDO:0020066") |> 32 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 33 | expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene")) 34 | 35 | # Cytoscape must be installed and running 36 | cytoscape(g) 37 | \dontshow{\}) # examplesIf} 38 | } 39 | -------------------------------------------------------------------------------- /man/transitive_reduction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transitive_reduction.R 3 | \name{transitive_reduction} 4 | \alias{transitive_reduction} 5 | \title{Compute transitive reduction over a predicate.} 6 | \usage{ 7 | transitive_reduction(g, predicate = "biolink:subclass_of") 8 | } 9 | \arguments{ 10 | \item{g}{The \code{tbl_kgx} graph to compute on.} 11 | 12 | \item{predicate}{The edge predicate to reduce over.} 13 | } 14 | \value{ 15 | Graph with transitive edges added. 16 | } 17 | \description{ 18 | Computes the transitive reduction of a graph, treating the specified 19 | predicate as transitive. 20 | } 21 | \examples{ 22 | data(eds_marfan_kg) 23 | 24 | g <- eds_marfan_kg |> fetch_nodes(name == "Tall stature") |> 25 | expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |> 26 | bind_edges(data.frame(from = 2, 27 | to = 9, 28 | predicate = "biolink_subclass_of", 29 | primary_knowledge_source = "hand_annotated")) 30 | 31 | plot(g, edge_color = primary_knowledge_source) 32 | 33 | g_closed <- g |> 34 | transitive_closure(predicate = "biolink:subclass_of") 35 | 36 | plot(g_closed, edge_color = primary_knowledge_source) 37 | 38 | g_reduced <- g_closed |> 39 | transitive_reduction() 40 | 41 | plot(g_reduced, edge_color = primary_knowledge_source) 42 | } 43 | \seealso{ 44 | \code{\link[=transitive_closure]{transitive_closure()}}, \code{\link[=roll_up]{roll_up()}}, \code{\link[=transfer]{transfer()}}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}} 45 | } 46 | -------------------------------------------------------------------------------- /man/tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tbl_kgx.R 3 | \name{tbl_kgx} 4 | \alias{tbl_kgx} 5 | \title{Create a KGX graph object} 6 | \usage{ 7 | tbl_kgx(nodes = NULL, edges = NULL, attach_engine = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{nodes}{A data frame containing the nodes of the graph. Must have 'id' and 'category' columns.} 11 | 12 | \item{edges}{A data frame containing the edges of the graph. Must have 'subject', 'predicate', and 'object' columns. Can be NULL.} 13 | 14 | \item{attach_engine}{An engine to attach to the newly created graph for use in future queries based on the graph.} 15 | 16 | \item{...}{Additional arguments passed to the function.} 17 | } 18 | \value{ 19 | A KGX graph object. 20 | } 21 | \description{ 22 | This function creates a new tbl_kgx object which inherits from tidygraph::tbl_graph, from node and edge dataframes, ensuring they conform to the KGX specification 23 | described at https://github.com/biolink/kgx/blob/master/specification/kgx-format.md. Specifically, nodes must have an 'id' and 'category' column, 24 | and edges, if provided, must have 'subject', 'predicate', and 'object' columns. The function allows graphs with no edges. 25 | The function sets 'from' and 'to' columns in the edges from 'subject' and 'object' respectively, and sets the node key to 'id'. 26 | Additional columns are allowed. 27 | } 28 | \details{ 29 | This function will generally be called internally. 30 | } 31 | \examples{ 32 | nodes <- data.frame(id = c("A", "B"), category = c("gene", "disease")) 33 | edges <- data.frame(subject = c("A"), predicate = c("associated_with"), object = c("B")) 34 | g <- tbl_kgx(nodes, edges) 35 | } 36 | -------------------------------------------------------------------------------- /man/eds_marfan_kg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_example_kgs.R 3 | \docType{data} 4 | \name{eds_marfan_kg} 5 | \alias{eds_marfan_kg} 6 | \title{Example Ehlers-Danlos and Marfan Syndrome Knowledge Graph Engine} 7 | \format{ 8 | An object of class \code{file_engine} for use with \code{fetch_nodes()}, \code{expand()}, etc. 9 | } 10 | \usage{ 11 | data(eds_marfan_kg) 12 | } 13 | \description{ 14 | A small \code{file_engine()} Knowledge Graph (KG) containing Monarch Initiative 15 | data for Ehlers-Danlos Syndrome and Marfan Syndrome, including their subtypes, 16 | all entities connected to those diseases or subtypes, and all ancestors (supertypes) 17 | of all those diseases and entities. Generated 8/15/2024 via: 18 | } 19 | \details{ 20 | \if{html}{\out{
}}\preformatted{monarch_engine() |> 21 | fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |> 22 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 23 | expand() |> 24 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) 25 | }\if{html}{\out{
}} 26 | 27 | This example engine may also be loaded from file via 28 | 29 | \if{html}{\out{
}}\preformatted{filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 30 | eds_marfan_kg <- file_engine(filename) 31 | }\if{html}{\out{
}} 32 | } 33 | \examples{ 34 | data(eds_marfan_kg) 35 | phenos <- eds_marfan_kg |> 36 | fetch_nodes(query_ids = "MONDO:0007525") |> 37 | expand(predicates = "biolink:has_phenotype", 38 | categories = "biolink:PhenotypicFeature") 39 | } 40 | \keyword{datasets} 41 | -------------------------------------------------------------------------------- /R/base_engine.R: -------------------------------------------------------------------------------- 1 | ########### Internal functions ########### 2 | 3 | #' @title base_engine 4 | #' @description A base class for all engines 5 | #' @param name A character string indicating the name of the engine. 6 | #' @param preferences A named list of preferences for the engine. 7 | #' @param ... Other parameters (unused) 8 | #' @return An object of class `base_engine` 9 | base_engine <- function(name = "default_engine", preferences = NULL, ...) { 10 | # read default prefs from the package 11 | default_preferences <- options("default_prefs")$default_prefs 12 | 13 | if(!is.null(preferences)) { 14 | # if preferences is a length-1 character vector ending with .yaml, and the file exists, read it 15 | if(is.character(preferences) && 16 | length(preferences) == 1 && 17 | grepl("\\.yaml$", preferences) && 18 | file.exists(preferences)) { 19 | preferences <- yaml::read_yaml(preferences) 20 | 21 | # if it's a list, just use it 22 | } else if(is.list(preferences)) { 23 | preferences <- preferences 24 | } 25 | } 26 | 27 | # now, if preferences is still not null, we want to override the default entries 28 | # that are provided in the preferences list 29 | # but just those, leaving other defaults in place 30 | if(!is.null(preferences)) { 31 | for(p in names(preferences)) { 32 | default_preferences[[p]] <- preferences[[p]] 33 | } 34 | } 35 | 36 | # now set preferences to the updated default preferences 37 | preferences <- default_preferences 38 | 39 | obj <- list(name = name, 40 | preferences = preferences) 41 | 42 | class(obj) <- c("base_engine", class(obj)) 43 | return(obj) 44 | } 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | monarchr 2 | ================ 3 | [![License: MIT + file 4 | LICENSE](https://img.shields.io/badge/license-MIT%20+%20file%20LICENSE-blue.svg)](https://cran.r-project.org/web/licenses/MIT%20+%20file%20LICENSE) 5 | [![](https://img.shields.io/badge/devel%20version-2.1.2-black.svg)](https://github.com/monarch-initiative/monarchr) 6 |
[![R build 7 | status](https://github.com/monarch-initiative/monarchr/workflows/rworkflows/badge.svg)](https://github.com/monarch-initiative/monarchr/actions) 8 | [![](https://codecov.io/gh/monarch-initiative/monarchr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/monarch-initiative/monarchr) 9 | [![DOI](https://zenodo.org/badge/639616520.svg)](https://doi.org/10.5281/zenodo.14553217) 10 |
11 |

12 | Authors: Shawn O’Neil, Brian Schilder 13 |

14 |

15 | README updated: Jul-20-2024 16 |

17 | 18 | 19 | 20 | ## `monarchr`: Monarch Knowledge Graph Queries 21 | 22 | ### R package for easy access, manipulation, and analysis of knowledge graphs. 23 | 24 | `monarchr` provides a tidy interface to data hosted at 25 | , and other knowledge graphs in KGX format (e.g. those at [KGHub](https://kghub.org/)). 26 | 27 | - [Website](https://monarch-initiative.github.io/monarchr) 28 | - [Get 29 | started](https://monarch-initiative.github.io/monarchr/articles/monarchr) 30 | 31 | 32 | 33 | 34 | 35 | Installation: 36 | 37 | ``` r 38 | if(!require("BiocManager")) install.packages("BiocManager") 39 | 40 | BiocManager::install("monarch-initiative/monarchr", update=FALSE) 41 | library(monarchr) 42 | ``` 43 | -------------------------------------------------------------------------------- /tests/testthat/test-kg_join.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("kg_join works", { 5 | 6 | g1 <- tbl_kgx(nodes = tibble(id = c("a", "b", "c"), 7 | category = c("gene", "gene", "disease"), 8 | source = c("g1", "g1", "g1")), 9 | edges = tibble(subject = c("a", "b"), 10 | predicate = c("interacts_with", "interacts_with"), 11 | object = c("b", "c"))) 12 | 13 | g2 <- tbl_kgx(nodes = tibble(id = c("c", "c", "d"), 14 | category = c("disease", "disease", "gene"), 15 | source = c("g2", "g2", "g2")), 16 | edges = tibble(subject = c("c"), 17 | predicate = c("interacts_with"), 18 | object = c("d"))) 19 | 20 | res <- kg_join(g1, g2) 21 | 22 | # because of the duplication of c in the nodes, we should have 5 nodes 23 | # and because of the edge between c and d, we should have 2 edges for that one 24 | # along with the other three edges for 5 total 25 | expect_equal(nrow(nodes(res)), 5) 26 | expect_equal(nrow(edges(res)), 5) 27 | expect_equal(nrow(edges(res) |> filter(subject == "c")), 2) 28 | expect_equal(nrow(edges(res) |> filter(subject == "a")), 1) 29 | expect_equal(nrow(edges(res) |> filter(subject == "b")), 2) 30 | expect_equal(nrow(edges(res) |> filter(object == "b")), 1) 31 | 32 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 33 | 34 | res2 <- file_engine(filename) |> 35 | fetch_nodes(query_ids = "MONDO:0007525") 36 | 37 | # there should be no edges and one node 38 | expect_equal(nrow(nodes(res2)), 1) 39 | expect_equal(nrow(edges(res2)), 0) 40 | 41 | 42 | }) 43 | -------------------------------------------------------------------------------- /man/transfer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/transfer.R 3 | \name{transfer} 4 | \alias{transfer} 5 | \title{Transfer information over edges to nodes.} 6 | \usage{ 7 | transfer(colname = NULL, over, direction = "out") 8 | } 9 | \arguments{ 10 | \item{colname}{The node column to transfer information from} 11 | 12 | \item{over}{The edge predicate to transfer information over} 13 | 14 | \item{direction}{Whether to transfer information along the predicate direction ("out") or against ("in")} 15 | } 16 | \value{ 17 | Vector or list, with one entry per node. 18 | } 19 | \description{ 20 | Used to 'transfer' information from nodes to other nodes across 21 | specific predicates, either in an outward direction (along the edge 22 | direction) or inward (against the edge direction). Returns a node-property 23 | column; intended to be used with mutate() on nodes. 24 | } 25 | \details{ 26 | The return value will be either a list, or if the result would be a list 27 | with all length-1 or length-0 elements, a vector with 0-length elements 28 | replaced by NA. Practically, this results in a list when necessary and a vector 29 | otherwise. 30 | } 31 | \examples{ 32 | data(eds_marfan_kg) 33 | 34 | engine |> eds_marfan_kg |> 35 | expand(categories = "biolink:Disease") |> 36 | activate(nodes) |> 37 | mutate(caused_by_genes = 38 | transfer(name, over = "biolink:causes", direction = "out")) |> 39 | mutate(causes_diseases = 40 | transfer(name, over = "biolink:causes", direction = "in")) |> 41 | plot.tbl_kgx(node_label = paste(name, 42 | " caused by: ", caused_by_genes, 43 | " causes: ", causes_diseases), 44 | label_size = 3) 45 | 46 | } 47 | \seealso{ 48 | \code{\link[=roll_up]{roll_up()}}, \link{transitive_closure}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}} 49 | } 50 | -------------------------------------------------------------------------------- /R/summarize_neighborhood.R: -------------------------------------------------------------------------------- 1 | #' Summarize neighborhood 2 | #' 3 | #' Summarizes the neighborhood of the nodes of a given graph. Specifically, 4 | #' letting $N$ be the set of nodes in the given graph, this function retrieves 5 | #' counts of relationship predicates (or node categories) of nodes connected to 6 | #' $N$ but not in $N$. This can be useful to examine the scale and scope of a 7 | #' graph's collective neighborhood in the larger KG. 8 | #' 9 | #' Note that the number of relationships returned may be larger than the number 10 | #' of nodes they connect to; use summarize = "edges" to see edge counts between 11 | #' nodes of different categories, and summarize = "nodes" to see counts of connected 12 | #' node categories. 13 | #' 14 | #' Additionally, when using `summarize = "edges"`, the summary will include 15 | #' edges that may already be present in the query graph. 16 | #' 17 | #' It is also possible to specify the direction of edges to include in the 18 | #' neighborhood, using the direction parameter. The default is "both", which 19 | #' includes both incoming and outgoing edges. 20 | #' 21 | #' 22 | #' @param graph A query graph to summarize the surrounding neighborhood for 23 | #' @param engine (Optional) An engine to use. If not provided, the graph's most recent engine is used. 24 | #' @param direction The direction of edges to include in the neighborhood 25 | #' @param summarize Whether to summarize edges or nodes (default "edges") 26 | #' 27 | #' @return A tbl_kgx graph 28 | #' 29 | #' @export 30 | #' @examplesIf monarch_engine_check() 31 | #' monarch_search("fanconi anemia", limit = 5) |> 32 | #' summarize_neighborhood(direction = "both", summarize = "edges") 33 | #' @import tidygraph 34 | #' @import dplyr 35 | #' @importFrom assertthat assert_that 36 | summarize_neighborhood <- function(graph, engine = NULL, direction = "both", summarize = "edges") { 37 | UseMethod("summarize_neighborhood") 38 | } 39 | -------------------------------------------------------------------------------- /R/graph_semsim.R: -------------------------------------------------------------------------------- 1 | #' Add semantic similarity 2 | #' 3 | #' First computes semantic similarity between all pairs of nodes in a graph. 4 | #' Then adds the continuous similarity score as an edge attribute. 5 | #' @param fun The similarity function to use. 6 | #' Default is \link[igraph]{similarity}. 7 | #' @param col Name of the new edge attribute to store the similarity score. 8 | #' @param nm The node attribute to use as the matrix row/colnames. 9 | #' @param return_matrix Return the similarity matrix instead of the graph. 10 | #' @param sparse Return a sparse matrix instead of a dense matrix. 11 | #' @param ... Additional arguments passed to the similarity function 12 | #' (\code{fun}). 13 | #' @import tidygraph 14 | #' @import dplyr 15 | #' @inheritParams nodes 16 | #' @returns Graph object with similarity added as a new edge attribute. 17 | #' @export 18 | #' @examples 19 | #' data(eds_marfan_kg) 20 | #' g <- eds_marfan_kg |> 21 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 22 | #' expand(predicates = "biolink:has_phenotype", 23 | #' categories = "biolink:PhenotypicFeature")|> 24 | #' expand(categories = "biolink:Gene") 25 | #' g <- graph_semsim(g) 26 | #' edges(g)$similarity 27 | graph_semsim <- function(graph, 28 | fun=igraph::similarity, 29 | col="similarity", 30 | nm="id", 31 | sparse=TRUE, 32 | return_matrix=FALSE, 33 | ...){ 34 | active_tbl <- active(graph) 35 | from <- to <- NULL; 36 | message("Computing pairwise node similarity.") 37 | X <- fun(graph, ...) 38 | if(sparse) { 39 | requireNamespace("Matrix") 40 | X <- Matrix::Matrix(X, sparse=TRUE) 41 | } 42 | rownames(X) <- colnames(X) <- nodes(graph)[[nm]] 43 | if(return_matrix) return(X) 44 | 45 | graph <- graph|> 46 | activate(edges)|> 47 | dplyr::mutate(!!col:=purrr::map2_dbl(from, to, ~ X[.y, .x])) |> 48 | activate(!!rlang::sym(active_tbl)) 49 | 50 | return(graph) 51 | } 52 | -------------------------------------------------------------------------------- /R/neo4j_engine_check.R: -------------------------------------------------------------------------------- 1 | #' Check if a neo4j database is available and properly formatted 2 | #' 3 | #' Attempts to connect to the specified Neo4J database and run a query to see if it is properly formatted. Returns FALSE if the database is not available or not properly formatted. 4 | #' 5 | #' @param url A character string indicating the URL of the neo4j database. 6 | #' @param username A character string indicating the username for the neo4j database (if needed). 7 | #' @param password A character string indicating the password for the neo4j database (if needed). 8 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE. 9 | #' 10 | #' @return TRUE if the database is available and properly formatted, FALSE otherwise. 11 | #' @export 12 | #' @examples 13 | #' print(neo4j_engine_check("https://neo4j.monarchinitiative.org")) 14 | #' print(neo4j_engine_check("https://no-such-db.monarchinitiative.org")) 15 | #' 16 | neo4j_engine_check <- function(url, 17 | username = NA, 18 | password = NA, 19 | warn = TRUE) { 20 | # this will throw an error if it cannot connect: 21 | # graph_conn <- neo2R::startGraph(url, username = username, password = password) 22 | 23 | # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not 24 | tryCatch({ 25 | e <- neo4j_engine(url = url, username = username, password = password) 26 | # check to see if we can run a query, we'll just grab one random node; this should fail 27 | # if something is wrong (e.g. the database is not in KGX format) 28 | cypher_query(e, "MATCH (n)-[r]->(q) RETURN n, r, q LIMIT 1") 29 | return(TRUE) 30 | }, error = function(e) { 31 | if(warn) { 32 | warning(e$message) 33 | } 34 | return(FALSE) 35 | }, warning = function(e) { 36 | if(warn) { 37 | warning(e$message) 38 | } 39 | return(FALSE) 40 | }) 41 | } 42 | -------------------------------------------------------------------------------- /R/kg_join.R: -------------------------------------------------------------------------------- 1 | #' Join two KGX graphs by their nodes and edges. 2 | #' 3 | #' Given two KGX graphs, returns a new KGX graph that is the union of the two input graphs, 4 | #' with any edges between nodes repeated for aother nodes with the same subject and object `id`. 5 | #' The engine of the first graph is used for the new graph. 6 | #' 7 | #' This function first computes new node and edge data, by taking the full natural join of 8 | #' node and edge data from the two input graphs, and then keeping unique rows. Note that nodes with 9 | #' the same `id` that differ in any shared column are effectively kept as separate, taken 10 | #' to represent the same entity in different contexts. (However, a node with an additional property will be 11 | #' merged with a node without that property, as defined by the natural join.) In these 12 | #' cases, any edge that connects to one of these nodes is also valid for the other node, and so the 13 | #' method repeats edges across nodes with the same `id`. 14 | #' 15 | #' 16 | #' @param graph1 A `tbl_kgx()` graph. 17 | #' @param graph2 A `tbl_kgx()` graph. 18 | #' @param ... Other parameters (not used) 19 | #' 20 | #' @return A `tbl_kgx()` graph 21 | #' @export 22 | #' @examples 23 | #' ## Using example KGX file packaged with monarchr 24 | #' data(eds_marfan_kg) 25 | #' 26 | #' eds_and_phenos <- eds_marfan_kg |> 27 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 28 | #' expand(predicates = "biolink:has_phenotype", 29 | #' categories = "biolink:PhenotypicFeature") 30 | #' 31 | #' marfan_and_phenos <- eds_marfan_kg |> 32 | #' fetch_nodes(query_ids = "MONDO:0007947") |> 33 | #' expand(predicates = "biolink:has_phenotype", 34 | #' categories = "biolink:PhenotypicFeature") 35 | #' 36 | #' combined <- kg_join(eds_and_phenos, marfan_and_phenos) 37 | #' print(combined) 38 | kg_join <- function(graph1, graph2, ...) { 39 | UseMethod("kg_join") 40 | } 41 | -------------------------------------------------------------------------------- /man/file_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/file_engine.R 3 | \name{file_engine} 4 | \alias{file_engine} 5 | \title{Create a knowledge graph engine object from a KGX-based tsv file} 6 | \usage{ 7 | file_engine(filename, preferences = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{filename}{A character string indicating the filename or URL of the KGX-based tsv file.} 11 | 12 | \item{preferences}{A named list of preferences for the engine.} 13 | 14 | \item{...}{Additional arguments (unused).} 15 | } 16 | \value{ 17 | An object of class \code{file_engine} 18 | } 19 | \description{ 20 | Creates a knowledge graph engine backed by a KGX-based tab-separated file. This must be a filename or URL to a \code{.tar.gz} file containing a \verb{*_nodes.tsv} and \verb{*_edges.tsv} file. If a URL is provided, the file will be downloaded to the user's current working directory. 21 | } 22 | \details{ 23 | Engines store preference information specifying how data are fetched and manipulated; for example, 24 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"), 25 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is 26 | stored in the engine's preferences. A default set of preferences is stored in the package for use with KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md), 27 | but these can be overridden by the user. 28 | } 29 | \examples{ 30 | library(tidygraph) 31 | library(dplyr) 32 | 33 | # Using example KGX .tar.gz file packaged with monarchr 34 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 35 | engine <- file_engine(filename) 36 | 37 | res <- engine |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947")) 38 | print(res) 39 | 40 | } 41 | \seealso{ 42 | \code{neo4j_engine()}, \code{monarch_engine()} 43 | } 44 | -------------------------------------------------------------------------------- /man/kg_join.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/kg_join.R 3 | \name{kg_join} 4 | \alias{kg_join} 5 | \title{Join two KGX graphs by their nodes and edges.} 6 | \usage{ 7 | kg_join(graph1, graph2, ...) 8 | } 9 | \arguments{ 10 | \item{graph1}{A \code{tbl_kgx()} graph.} 11 | 12 | \item{graph2}{A \code{tbl_kgx()} graph.} 13 | 14 | \item{...}{Other parameters (not used)} 15 | } 16 | \value{ 17 | A \code{tbl_kgx()} graph 18 | } 19 | \description{ 20 | Given two KGX graphs, returns a new KGX graph that is the union of the two input graphs, 21 | with any edges between nodes repeated for aother nodes with the same subject and object \code{id}. 22 | The engine of the first graph is used for the new graph. 23 | } 24 | \details{ 25 | This function first computes new node and edge data, by taking the full natural join of 26 | node and edge data from the two input graphs, and then keeping unique rows. Note that nodes with 27 | the same \code{id} that differ in any shared column are effectively kept as separate, taken 28 | to represent the same entity in different contexts. (However, a node with an additional property will be 29 | merged with a node without that property, as defined by the natural join.) In these 30 | cases, any edge that connects to one of these nodes is also valid for the other node, and so the 31 | method repeats edges across nodes with the same \code{id}. 32 | } 33 | \examples{ 34 | ## Using example KGX file packaged with monarchr 35 | data(eds_marfan_kg) 36 | 37 | eds_and_phenos <- eds_marfan_kg |> 38 | fetch_nodes(query_ids = "MONDO:0007525") |> 39 | expand(predicates = "biolink:has_phenotype", 40 | categories = "biolink:PhenotypicFeature") 41 | 42 | marfan_and_phenos <- eds_marfan_kg |> 43 | fetch_nodes(query_ids = "MONDO:0007947") |> 44 | expand(predicates = "biolink:has_phenotype", 45 | categories = "biolink:PhenotypicFeature") 46 | 47 | combined <- kg_join(eds_and_phenos, marfan_and_phenos) 48 | print(combined) 49 | } 50 | -------------------------------------------------------------------------------- /man/summarize_neighborhood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_neighborhood.R 3 | \name{summarize_neighborhood} 4 | \alias{summarize_neighborhood} 5 | \title{Summarize neighborhood} 6 | \usage{ 7 | summarize_neighborhood( 8 | graph, 9 | engine = NULL, 10 | direction = "both", 11 | summarize = "edges" 12 | ) 13 | } 14 | \arguments{ 15 | \item{graph}{A query graph to summarize the surrounding neighborhood for} 16 | 17 | \item{engine}{(Optional) An engine to use. If not provided, the graph's most recent engine is used.} 18 | 19 | \item{direction}{The direction of edges to include in the neighborhood} 20 | 21 | \item{summarize}{Whether to summarize edges or nodes (default "edges")} 22 | } 23 | \value{ 24 | A tbl_kgx graph 25 | } 26 | \description{ 27 | Summarizes the neighborhood of the nodes of a given graph. Specifically, 28 | letting $N$ be the set of nodes in the given graph, this function retrieves 29 | counts of relationship predicates (or node categories) of nodes connected to 30 | $N$ but not in $N$. This can be useful to examine the scale and scope of a 31 | graph's collective neighborhood in the larger KG. 32 | } 33 | \details{ 34 | Note that the number of relationships returned may be larger than the number 35 | of nodes they connect to; use summarize = "edges" to see edge counts between 36 | nodes of different categories, and summarize = "nodes" to see counts of connected 37 | node categories. 38 | 39 | Additionally, when using \code{summarize = "edges"}, the summary will include 40 | edges that may already be present in the query graph. 41 | 42 | It is also possible to specify the direction of edges to include in the 43 | neighborhood, using the direction parameter. The default is "both", which 44 | includes both incoming and outgoing edges. 45 | } 46 | \examples{ 47 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 48 | monarch_search("fanconi anemia", limit = 5) |> 49 | summarize_neighborhood(direction = "both", summarize = "edges") 50 | \dontshow{\}) # examplesIf} 51 | } 52 | -------------------------------------------------------------------------------- /man/expand_n.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expand_n.R 3 | \name{expand_n} 4 | \alias{expand_n} 5 | \title{Iteratively fetch additional knowledge graph edges connected to a query graph} 6 | \usage{ 7 | expand_n( 8 | graph, 9 | return_each = FALSE, 10 | direction = "both", 11 | predicates = NULL, 12 | categories = NULL, 13 | transitive = NULL, 14 | n = 1, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{graph}{A query \code{tbl_kgx()} graph ot query with.} 20 | 21 | \item{return_each}{If TRUE, return a list of graphs for each iteration. 22 | If FALSE, return the final graph with all expanded edges.} 23 | 24 | \item{direction}{The direction of associations to fetch. Can be "in", "out", or "both". Default is "both".} 25 | 26 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.} 27 | 28 | \item{categories}{A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.} 29 | 30 | \item{transitive}{NULL (not used in this function).} 31 | 32 | \item{n}{Number of expansion iterations to run.} 33 | 34 | \item{...}{Other parameters passed to methods.} 35 | } 36 | \value{ 37 | A \code{tbl_kgx()} graph 38 | } 39 | \description{ 40 | Given an initialized \link{tbl_kgx} graph, iteratively expand the graph 41 | \code{n} iterations using certain predicates/categories. 42 | Arguments can either be a single value or a list of values. 43 | If an argument is provided as a list, its length must be equal to the number 44 | of iterations (\code{n}). 45 | } 46 | \examples{ 47 | ## Using example KGX file packaged with monarchr 48 | data(eds_marfan_kg) 49 | g <- eds_marfan_kg |> 50 | fetch_nodes(query_ids = "MONDO:0007525") |> 51 | expand(predicates = "biolink:has_phenotype", 52 | categories = "biolink:PhenotypicFeature") 53 | 54 | g_expanded <- g |> 55 | expand_n(predicates = "biolink:subclass_of", n=3) 56 | } 57 | -------------------------------------------------------------------------------- /tests/testthat/test-file_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("we can load the example KG w/ data()", { 5 | data(eds_marfan_kg, envir = environment()) 6 | 7 | e <- eds_marfan_kg 8 | 9 | # repeating tests from test-fetch_nodes.file_engine.R 10 | g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "MONDO:0007526")) 11 | 12 | # ensure that the last_engine attribute is set 13 | expect_true(has_attr(g, "last_engine")) 14 | # and that it has the right class 15 | expect_true(inherits(attr(g, "last_engine"), "file_engine")) 16 | 17 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 18 | # there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635, 19 | # but we can't gaurantee the order 20 | expect_equal(nrow(nodes_df), 2) 21 | expect_true(all(nodes_df$id %in% c("MONDO:0007525", "MONDO:0007526"))) 22 | 23 | # there should be no edges 24 | edges_df <- g %>% activate(edges) %>% as.data.frame() 25 | expect_equal(nrow(edges_df), 0) 26 | }) 27 | 28 | 29 | test_that("we can load data from url with file_engine", { 30 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 31 | 32 | e <- file_engine(filename) 33 | 34 | # the result should have class file_engine 35 | expect_true(inherits(e, "file_engine")) 36 | # and a filename attribute 37 | expect_true("filename" %in% names(e)) 38 | # and a graph attribute 39 | expect_true("graph" %in% names(e)) 40 | 41 | # the graph attribute should be a tbl_kgx 42 | expect_true(inherits(e$graph, "tbl_kgx")) 43 | 44 | # the nodes df should have a description field for this test of type character 45 | nodes_df <- nodes(e$graph) 46 | expect_true("description" %in% names(nodes_df)) 47 | 48 | # the edges df should have a knowledge_source field for this test of type character 49 | edges_df <- edges(e$graph) 50 | expect_true("knowledge_source" %in% names(edges_df)) 51 | 52 | # the nodes df category should be a list column 53 | expect_true(is.list(nodes_df$category)) 54 | 55 | # description should not be a list col 56 | expect_true(!is.list(nodes_df$description)) 57 | }) 58 | -------------------------------------------------------------------------------- /tests/testthat/test-fetch_nodes.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | 5 | test_that("fetch_nodes neo4j works with basid id query", { 6 | #testthat::skip("temporary skip") 7 | 8 | e <- monarch_engine() 9 | 10 | # fetch_nodes(id %in% c("MONDO:0007525", "HGNC:4635")) should result in an error 11 | # do so silently in the logs... 12 | g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "HGNC:4635")) 13 | 14 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 15 | # there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635, 16 | # but we can't gaurantee the order 17 | expect_contains(2 + -1:2, nrow(nodes_df)) 18 | expect_true(all(nodes_df$id %in% c("MONDO:0007525", "HGNC:4635"))) 19 | 20 | # there should be no edges 21 | edges_df <- g %>% activate(edges) %>% as.data.frame() 22 | expect_equal(nrow(edges_df), 0) 23 | }) 24 | 25 | test_that("fetch_nodes neo4j works with complex query syntax", { 26 | e <- monarch_engine() 27 | g <- e %>% fetch_nodes(id == "MONDO:0007525") 28 | 29 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 30 | expect_equal(nrow(nodes_df), 1) 31 | expect_equal(nodes_df$id, "MONDO:0007525") 32 | 33 | # check to see that we can chain the fetch_nodes function with other functions 34 | g <- e %>% 35 | fetch_nodes(id == "MONDO:0007525") %>% 36 | expand(categories = "biolink:Gene") 37 | 38 | # this result should have 3 nodes and 3 edges 39 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 40 | expect_contains(3 + -2:2, nrow(nodes_df)) 41 | 42 | edges_df <- g %>% activate(edges) %>% as.data.frame() 43 | expect_contains(3 + -2:2, nrow(edges_df)) 44 | }) 45 | 46 | test_that("fetch_nodes limit works with neo4j_engine", { 47 | e <- monarch_engine() 48 | expect_warning( 49 | g <- e %>% fetch_nodes(in_taxon_label == "Homo sapiens", limit = 10)) 50 | 51 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 52 | expect_contains(10 + -2:2, nrow(nodes_df)) 53 | 54 | e <- monarch_engine() 55 | expect_warning( 56 | g <- e %>% fetch_nodes(in_taxon_label == "Homo sapiens", limit = 5)) 57 | 58 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 59 | expect_contains(5 + -1:2, nrow(nodes_df)) 60 | }) 61 | -------------------------------------------------------------------------------- /R/monarch_engine_check.R: -------------------------------------------------------------------------------- 1 | #' Check availability of Monarch Initiative API 2 | #' 3 | #' Attempts to connect to the Monarch Initiative API and use the specified functionality. Returns FALSE if the API is not available the result is not as expected. 4 | #' 5 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE. 6 | #' @param service The service to check: "search", "semsim", "graph", or a vector of these. Default is "graph". 7 | #' @return TRUE if the available features are online, FALSE otherwise. 8 | #' @export 9 | #' @examples 10 | #' print(monarch_engine_check()) 11 | #' 12 | monarch_engine_check <- function(warn = TRUE, service = "graph") { 13 | # this will throw an error if it cannot connect: 14 | # graph_conn <- neo2R::startGraph(url, username = username, password = password) 15 | 16 | # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not 17 | tryCatch({ 18 | if("graph" %in% service) { 19 | e <- monarch_engine() 20 | # check to see if we can run a query, we'll just grab one random node; this should fail 21 | # if something is wrong (e.g. the database is not in KGX format) 22 | cypher_query(e, "MATCH (n)-[r]->(q) RETURN n, r, q LIMIT 1") 23 | return(TRUE) 24 | } else if("search" %in% service) { 25 | cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5) 26 | return(TRUE) 27 | } else if("semsim" %in% service) { 28 | phenos1 <- tbl_kgx(nodes = data.frame(id = c("HP:0000001", "HP:0000002", "HP:0000003"))) 29 | phenos2 <- tbl_kgx(nodes = data.frame(id = c("HP:0000004", "HP:0000005", "HP:0000006"))) 30 | semsim <- monarch_semsim(phenos1, phenos2) 31 | return(TRUE) 32 | } else { 33 | warning("Service must be one of 'search', 'semsim', or 'graph'") 34 | return(FALSE) 35 | } 36 | }, error = function(e) { 37 | if(warn) { 38 | warning(e$message) 39 | } 40 | return(FALSE) 41 | }, warning = function(e) { 42 | if(warn) { 43 | warning(e$message) 44 | } 45 | return(FALSE) 46 | }) 47 | } -------------------------------------------------------------------------------- /man/plot.tbl_kgx.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.tbl_kgx.R 3 | \name{plot.tbl_kgx} 4 | \alias{plot.tbl_kgx} 5 | \title{Specialized \code{plot()} function for KGX graphs} 6 | \usage{ 7 | \method{plot}{tbl_kgx}( 8 | g, 9 | ..., 10 | layout = "auto", 11 | node_color = pcategory, 12 | node_shape = namespace, 13 | edge_color = predicate, 14 | edge_linetype = primary_knowledge_source, 15 | node_label = name, 16 | plot_ids = FALSE, 17 | label_size = 2, 18 | fan_strength = 2, 19 | edge_alpha = 0.9, 20 | node_alpha = 0.9 21 | ) 22 | } 23 | \arguments{ 24 | \item{g}{A \link{tbl_kgx} graph.} 25 | 26 | \item{...}{ 27 | Arguments passed on to \code{\link[ggraph:ggraph]{ggraph::ggraph}} 28 | \describe{ 29 | \item{\code{graph}}{The object containing the graph. See \emph{Details} for a list 30 | of supported classes. Or a \code{layout_ggraph} object as returned from 31 | \code{create_layout} in which case all subsequent arguments is ignored.} 32 | }} 33 | 34 | \item{layout}{The layout to use for the plot. Default is "auto" as used by \code{ggraph}.} 35 | 36 | \item{node_color}{The column to use for node color. Default is "pcategory".} 37 | 38 | \item{node_shape}{The column to use for node shape Default is "namespace".} 39 | 40 | \item{edge_color}{The column to use for edge color. Default is "predicate".} 41 | 42 | \item{edge_linetype}{The column to use for edge line type. Default is "primary_knowledge_source".} 43 | 44 | \item{node_label}{The column to use for node labels. Defaults to "name".} 45 | 46 | \item{plot_ids}{Whether to show node IDs in node labels. Defaults to FALSE.} 47 | 48 | \item{label_size}{Size of node label text. Default is 2.} 49 | 50 | \item{fan_strength}{Fan strength in ggraph's geom_edge_fan, Default is 2.} 51 | 52 | \item{edge_alpha}{Alpha value for edges, default 0.9.} 53 | 54 | \item{node_alpha}{Alpha value for nodes, default 0.9.} 55 | } 56 | \description{ 57 | Specialized \code{plot()} function for KGX graphs 58 | } 59 | \examples{ 60 | data(eds_marfan_kg) 61 | g <- eds_marfan_kg |> 62 | fetch_nodes(query_ids = "MONDO:0007525") |> 63 | expand(predicates = "biolink:has_phenotype", 64 | categories = "biolink:PhenotypicFeature")|> 65 | expand(categories = "biolink:Gene") 66 | plot(g) 67 | } 68 | -------------------------------------------------------------------------------- /R/transitive_reduction.R: -------------------------------------------------------------------------------- 1 | #' Compute transitive reduction over a predicate. 2 | #' 3 | #' Computes the transitive reduction of a graph, treating the specified 4 | #' predicate as transitive. 5 | #' 6 | #' @return Graph with transitive edges added. 7 | #' @seealso [transitive_closure()], [roll_up()], [transfer()], [descendants()], [ancestors()] 8 | #' @param g The `tbl_kgx` graph to compute on. 9 | #' @param predicate The edge predicate to reduce over. 10 | #' 11 | #' @examples 12 | #' data(eds_marfan_kg) 13 | #' 14 | #' g <- eds_marfan_kg |> fetch_nodes(name == "Tall stature") |> 15 | #' expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |> 16 | #' bind_edges(data.frame(from = 2, 17 | #' to = 9, 18 | #' predicate = "biolink_subclass_of", 19 | #' primary_knowledge_source = "hand_annotated")) 20 | #' 21 | #' plot(g, edge_color = primary_knowledge_source) 22 | #' 23 | #' g_closed <- g |> 24 | #' transitive_closure(predicate = "biolink:subclass_of") 25 | #' 26 | #' plot(g_closed, edge_color = primary_knowledge_source) 27 | #' 28 | #' g_reduced <- g_closed |> 29 | #' transitive_reduction() 30 | #' 31 | #' plot(g_reduced, edge_color = primary_knowledge_source) 32 | #' @import tidygraph 33 | #' @import dplyr 34 | #' @importFrom sets as.set 35 | #' @importFrom relations endorelation 36 | #' @importFrom relations relation_incidence 37 | #' @export 38 | transitive_reduction <- function(g, predicate = "biolink:subclass_of") { 39 | # first we make a copy 40 | active_tbl <- active(g) 41 | g2 <- g 42 | 43 | # in the original, remove the predicate edges 44 | g <- g |> 45 | activate(edges) |> 46 | filter(predicate != predicate) 47 | 48 | df <- g2 |> activate(edges) |> as.data.frame() 49 | r <- endorelation( 50 | domain = lapply(unique(unlist(df[c("from", "to")])), sets::as.set), 51 | graph = df[c("from", "to")] 52 | ) 53 | mat <- relation_incidence(relations::transitive_reduction(r)) 54 | 55 | keep_edges <- which(mat == 1, arr.ind = TRUE) |> 56 | as.data.frame() |> 57 | rename(from = row, to = col) 58 | 59 | g_reduced <- g2 |> 60 | activate(edges) |> 61 | semi_join(keep_edges, by = c("from", "to")) 62 | 63 | # merge the original w g_reduced, adding back just the reduction edges 64 | suppressMessages(g <- kg_join(g, g_reduced), classes = "message") # suppress joining info 65 | g <- g |> activate(!!rlang::sym(active_tbl)) 66 | 67 | return(g) 68 | } 69 | -------------------------------------------------------------------------------- /R/expand.R: -------------------------------------------------------------------------------- 1 | #' Fetch additional knowledge graph edges connected to a query graph 2 | #' 3 | #' Given an optional KG engine (e.g. a `file_engine()`, 4 | #' `neo4j_engine()`, or `monarch_engine()`) and a query `tbl_kgx()` graph, fetches additional nodes and edges 5 | #' from the KG, expanding the query graph according to specific criteria. If the first parameter is an engine, that 6 | #' engine is used; if the first parameter is a query graph, the most recent engine associated with the graph is used. 7 | #' 8 | #' 9 | #' @param graph A query `tbl_kgx()` graph ot query with. 10 | #' @param engine (Optional) An engine to use for fetching query graph edges. If not provided, the graph's most recent engine is used. 11 | #' @param direction The direction of associations to fetch. Can be "in", "out", or "both". Default is "both". 12 | #' @param predicates A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered. 13 | #' @param categories A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched. 14 | #' @param transitive If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like `biolink:subclass_of`. 15 | #' @param ... Other parameters passed to methods. 16 | #' 17 | #' @return A `tbl_kgx()` graph 18 | #' @export 19 | #' @examplesIf monarch_engine_check() 20 | #' ## Using Monarch (hosted) 21 | #' phenos <- monarch_engine() |> 22 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 23 | #' expand(predicates = "biolink:has_phenotype", 24 | #' categories = "biolink:PhenotypicFeature") 25 | #' 26 | #' print(phenos) 27 | #' 28 | #' 29 | #' 30 | #' @examples 31 | #' ## Using example KGX file packaged with monarchr 32 | #' data(eds_marfan_kg) 33 | #' phenos <- eds_marfan_kg |> 34 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 35 | #' expand(predicates = "biolink:has_phenotype", 36 | #' categories = "biolink:PhenotypicFeature") 37 | #' 38 | #' print(phenos) 39 | #' @import tidygraph 40 | #' @import dplyr 41 | #' @importFrom assertthat assert_that 42 | expand <- function(graph, 43 | engine = NULL, 44 | direction = "both", 45 | predicates = NULL, 46 | categories = NULL, 47 | transitive = FALSE, 48 | 49 | ...) { 50 | UseMethod("expand") 51 | } 52 | -------------------------------------------------------------------------------- /tests/testthat/test-monarch_semsim.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | # g1 does not have a disease node, so in the result the target EDS should not be matched by any query 5 | test_that("monarch_semsim works", { 6 | suppressWarnings({ 7 | g1 <- monarch_engine() |> 8 | fetch_nodes(query_ids = "MONDO:0007947") |> # Marfan syndrome 9 | expand(categories = "biolink:PhenotypicFeature", limit = 5) |> 10 | activate(nodes) |> 11 | filter(pcategory == "biolink:PhenotypicFeature") |> 12 | mutate(source = "g1") 13 | 14 | # MONDO:0007522 EDS classic type 15 | g2 <- monarch_engine() |> 16 | fetch_nodes(query_ids = "MONDO:0007522") |> # EDS classic type 17 | expand(categories = "biolink:PhenotypicFeature", limit = 5) |> 18 | activate(nodes) |> 19 | mutate(source = "g2") 20 | }) 21 | 22 | sim <- monarch_semsim(g1, g2) 23 | 24 | # the number of edges in the result should equal the number of nodes in the query graph 25 | expect_equal(sim |> activate(edges) |> nrow(), g1 |> activate(nodes) |> nrow()) 26 | # the edges subject should be the same as the query graph nodes 27 | expect_equal(sim |> activate(edges) |> pull(subject) |> sort(), g1 |> activate(nodes) |> pull(id) |> sort()) 28 | # the "biolink:Disease" node in g2 should not be matched by any query 29 | expect_equal(sum(edges(sim)$object == "MONDO:0007522"), 0) 30 | 31 | # now lets rerun the query, including the reverse and keeping all nodes 32 | # using jaccard similarity for flavor 33 | sim <- monarch_semsim(g1, g2, metric = "jaccard_similarity", include_reverse = TRUE, keep_unmatched = TRUE) 34 | 35 | # all query and target nodes will be subjects exactly once in the resulting edges 36 | query_target_node_ids <- c(g1 |> activate(nodes) |> pull(id), g2 |> activate(nodes) |> pull(id)) 37 | expect_equal(sim |> activate(edges) |> pull(subject) |> sort(), query_target_node_ids |> sort()) 38 | 39 | # the number of edges in the result should equal the number of nodes in the query graph plus the number of nodes in the target graph 40 | expect_equal(edges(sim) |> nrow(), length(query_target_node_ids)) 41 | 42 | # make sure the engine of the result is the same as the query graph 43 | expect_equal(sim |> get_engine(), g1 |> get_engine()) 44 | 45 | # test plot 46 | #plot(sim |> graph_join(g1) |> graph_join(g2), node_color = paste(source, pcategory)) 47 | #print(sim) 48 | }) -------------------------------------------------------------------------------- /R/kg_edge_weights.R: -------------------------------------------------------------------------------- 1 | #' Knowledge Graph edge weights 2 | #' 3 | #' Compute edge weights for the given \link{tbl_kgx} graph using 4 | #' several pieces of categorical, ordinal, and continuous metadata. 5 | #' @param fun Function to compute edge weights with across the 6 | #' numerically encoded attributes. Default is \link{rowSums}. 7 | #' @param normalise Normalise each encoding from 0-1 by dividing by the 8 | #' maximum value. Default is \code{TRUE}. 9 | #' @param encodings A list of named lists of encoding values for 10 | #' different edge attributes. 11 | #' @inheritParams nodes 12 | #' @import tidygraph 13 | #' @import dplyr 14 | #' @export 15 | #' @examples 16 | #' data(eds_marfan_kg) 17 | #' g <- eds_marfan_kg |> 18 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 19 | #' expand(predicates = "biolink:has_phenotype", 20 | #' categories = "biolink:PhenotypicFeature")|> 21 | #' expand(categories = "biolink:Gene") 22 | #' g2 <- kg_edge_weights(g) 23 | #' edges(g2)$weight 24 | kg_edge_weights <- function(graph, 25 | normalise=TRUE, 26 | encodings=monarch_edge_weight_encodings(), 27 | fun=function(x){rowSums(x, na.rm = TRUE)} 28 | ){ 29 | active_tbl <- active(graph) 30 | encoded_cols <- c() 31 | for(key in names(encodings)){ 32 | nm_encoded <- paste0(key,"_encoded") 33 | val <- encodings[[key]] 34 | if(is.null(val)){ 35 | next 36 | } 37 | if(is.numeric(val)){ 38 | # message(key,": numeric") 39 | encoded_cols <- c(encoded_cols, nm_encoded) 40 | graph <- graph|> 41 | activate(edges)|> 42 | mutate(!!nm_encoded:=edges(graph)[[key]]) 43 | next 44 | } 45 | if(is.function(val)){ 46 | # message(key,": function") 47 | encoded_cols <- c(encoded_cols, nm_encoded) 48 | graph <- graph|> 49 | activate(edges)|> 50 | mutate(!!nm_encoded:=val(!!key)) 51 | 52 | next 53 | } 54 | if(is.list(val)){ 55 | # message(key,": list") 56 | encoded_cols <- c(encoded_cols, nm_encoded) 57 | graph <- graph|> 58 | activate(edges)|> 59 | mutate(!!nm_encoded:=ifelse(!!key %in% names(val), val[!!key], 0)) 60 | next 61 | } 62 | } 63 | ## normalise within each col 64 | if(normalise){ 65 | graph <- graph|> 66 | activate(edges)|> 67 | mutate( 68 | across(all_of(encoded_cols), 69 | ~(min(.x, na.rm = TRUE)) / (max(.x, na.rm = TRUE)) 70 | )) |> 71 | activate(!!rlang::sym(active_tbl)) 72 | } 73 | igraph::E(graph)$weight <- fun(edges(graph)[,unique(encoded_cols)]) 74 | return(graph) 75 | } 76 | -------------------------------------------------------------------------------- /tests/testthat/test-neo4j_engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("neo4j_engine caching", { 5 | e <- neo4j_engine(url = c("http://neo4j.monarchinitiative.org:7474", 6 | "http://neo4j.monarchinitiative.org", 7 | "https://neo4j.monarchinitiative.org", 8 | "http://no.such.url"), 9 | cache = TRUE) 10 | 11 | start_time <- Sys.time() 12 | 13 | g <- fetch_nodes(e, query_ids = "MONDO:0020066") 14 | 15 | test <- g %>% expand(direction = "in", 16 | predicates = "biolink:subclass_of", 17 | transitive = TRUE) 18 | 19 | end_time <- Sys.time() 20 | no_cache_elapsed_time <- as.numeric(difftime(end_time, start_time, units = "secs")) 21 | 22 | start_time <- Sys.time() 23 | 24 | test <- g %>% expand(direction = "in", 25 | predicates = "biolink:subclass_of", 26 | transitive = TRUE) 27 | 28 | end_time <- Sys.time() 29 | cache_elapsed_time <- as.numeric(difftime(end_time, start_time, units = "secs")) 30 | 31 | # the cached version should be at least 10x faster 32 | expect_true(cache_elapsed_time * 10 < no_cache_elapsed_time) 33 | }) 34 | 35 | 36 | test_that("neo4j_engine works as expected (using monarch neo4j db)", { 37 | #testthat::skip("temporary skip") 38 | 39 | # one of these should work 40 | e <- neo4j_engine(url = c("http://neo4j.monarchinitiative.org", 41 | "https://neo4j.monarchinitiative.org", 42 | "http://no.such.url", 43 | "http://neo4j.monarchinitiative.org:7474")) 44 | 45 | g <- fetch_nodes(e, query_ids = "MONDO:0006043") 46 | # this should have 6 subtypes (two direct, four under one of the direct children) 47 | subtypes <- g %>% expand(direction = "in", 48 | predicates = "biolink:subclass_of", 49 | transitive = TRUE) 50 | 51 | nodes_df <- subtypes %>% activate(nodes) %>% as.data.frame() 52 | edges_df <- subtypes %>% activate(edges) %>% as.data.frame() 53 | expect_contains(7 + -2:2, nrow(nodes_df)) 54 | expect_contains(6 + -2:2, nrow(edges_df)) 55 | 56 | # there should be a pcategory col of type character 57 | expect_true("pcategory" %in% names(nodes_df)) 58 | expect_true(is.character(nodes_df$pcategory)) 59 | 60 | # there should be a category col of type list 61 | expect_true("category" %in% names(nodes_df)) 62 | expect_true(is.list(nodes_df$category)) 63 | }) 64 | -------------------------------------------------------------------------------- /man/expand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expand.R 3 | \name{expand} 4 | \alias{expand} 5 | \title{Fetch additional knowledge graph edges connected to a query graph} 6 | \usage{ 7 | expand( 8 | graph, 9 | engine = NULL, 10 | direction = "both", 11 | predicates = NULL, 12 | categories = NULL, 13 | transitive = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{graph}{A query \code{tbl_kgx()} graph ot query with.} 19 | 20 | \item{engine}{(Optional) An engine to use for fetching query graph edges. If not provided, the graph's most recent engine is used.} 21 | 22 | \item{direction}{The direction of associations to fetch. Can be "in", "out", or "both". Default is "both".} 23 | 24 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.} 25 | 26 | \item{categories}{A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.} 27 | 28 | \item{transitive}{If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like \code{biolink:subclass_of}.} 29 | 30 | \item{...}{Other parameters passed to methods.} 31 | } 32 | \value{ 33 | A \code{tbl_kgx()} graph 34 | } 35 | \description{ 36 | Given an optional KG engine (e.g. a \code{file_engine()}, 37 | \code{neo4j_engine()}, or \code{monarch_engine()}) and a query \code{tbl_kgx()} graph, fetches additional nodes and edges 38 | from the KG, expanding the query graph according to specific criteria. If the first parameter is an engine, that 39 | engine is used; if the first parameter is a query graph, the most recent engine associated with the graph is used. 40 | } 41 | \examples{ 42 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 43 | ## Using Monarch (hosted) 44 | phenos <- monarch_engine() |> 45 | fetch_nodes(query_ids = "MONDO:0007525") |> 46 | expand(predicates = "biolink:has_phenotype", 47 | categories = "biolink:PhenotypicFeature") 48 | 49 | print(phenos) 50 | 51 | 52 | \dontshow{\}) # examplesIf} 53 | ## Using example KGX file packaged with monarchr 54 | data(eds_marfan_kg) 55 | phenos <- eds_marfan_kg |> 56 | fetch_nodes(query_ids = "MONDO:0007525") |> 57 | expand(predicates = "biolink:has_phenotype", 58 | categories = "biolink:PhenotypicFeature") 59 | 60 | print(phenos) 61 | } 62 | -------------------------------------------------------------------------------- /R/example_graph.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | #' Return an example set of nodes from a KG engine. 2 | #' 3 | #' Given a KGX Neo4j KG engine, returns a graph representing the diversity 4 | #' of node categories and edge predicates for browsing. The returned graph is guaranteed to 5 | #' contain at least one node of every category, and at least one edge of every 6 | #' predicate. No other guarantees are made: the example graph is not minimal 7 | #' to satisfy these criteria, it is not random or even pseudo-random, and it 8 | #' may not be connected. 9 | #' 10 | #' @param engine A `neo4j_engine` object 11 | #' @param ... Other parameters (not used) 12 | #' @return A tbl_kgx graph 13 | #' @export 14 | #' @examplesIf monarch_engine_check() 15 | #' # Retrieve and print an example graph: 16 | #' g <- monarch_engine() |> example_graph() 17 | #' print(g) 18 | #' @import tidygraph 19 | #' @import dplyr 20 | example_graph.neo4j_engine <- function(engine, ...) { 21 | # first, let's discover the different edge types (predicates) available from the schema info 22 | pred_types_query <- "CALL db.schema.visualization() YIELD relationships 23 | UNWIND relationships AS rel 24 | RETURN DISTINCT type(rel) AS predicate" 25 | pred_types <- cypher_query_df(engine, pred_types_query) 26 | 27 | # next we get a bunch of edges of the different predicate types as a graph 28 | sample_preds_query <- paste0("MATCH (a)-[r:`", pred_types$predicate, "`]->(b) RETURN a, b, r LIMIT 1") 29 | sample_preds_graph <- cypher_query(engine, query = sample_preds_query) 30 | 31 | # this might not represent all categories however. 32 | 33 | # So we compute the categories that are represented thus far 34 | used_categories <- sample_preds_graph |> 35 | activate(nodes) |> 36 | as.data.frame() |> 37 | pull(category) |> 38 | unlist() |> 39 | unique() 40 | 41 | # get the available categories from the schema 42 | categories_query <- "CALL db.labels() YIELD label RETURN DISTINCT label" 43 | all_node_categories <- cypher_query_df(engine, categories_query)$label 44 | 45 | # compute the node categories that are still needed 46 | needed_categories <- setdiff(all_node_categories, used_categories) 47 | 48 | # now sample nodes of those categories, and an arbitrary connection 49 | sample_cats_query <- paste0("MATCH (a:`", needed_categories, "`) -[r]- (b) RETURN a, r, b LIMIT 1") 50 | sample_new_cats <- cypher_query(engine, query = sample_cats_query) 51 | 52 | # finally, we join the two and return 53 | suppressMessages(full_sample <- kg_join(sample_preds_graph, sample_new_cats), classes = "message") 54 | 55 | return(full_sample) 56 | } 57 | -------------------------------------------------------------------------------- /tests/testthat/test-fetch_nodes.file-engine.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(assertthat) 3 | 4 | test_that("fetch_nodes file_engine works with basid id query", { 5 | #testthat::skip("temporary skip") 6 | 7 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 8 | e <- file_engine(filename) 9 | 10 | g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "MONDO:0007526")) 11 | 12 | # ensure that the last_engine attribute is set 13 | expect_true(has_attr(g, "last_engine")) 14 | # and that it has the right class 15 | expect_true(inherits(attr(g, "last_engine"), "file_engine")) 16 | 17 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 18 | # there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635, 19 | # but we can't gaurantee the order 20 | expect_equal(nrow(nodes_df), 2) 21 | expect_true(all(nodes_df$id %in% c("MONDO:0007525", "MONDO:0007526"))) 22 | 23 | # there should be no edges 24 | edges_df <- g %>% activate(edges) %>% as.data.frame() 25 | expect_equal(nrow(edges_df), 0) 26 | }) 27 | 28 | test_that("fetch_nodes file_engine works with complex query syntax", { 29 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 30 | e <- file_engine(filename) 31 | 32 | # fetch_nodes(id %in% c("MONDO:0007525", "MONDO:0007526")) actually does work with file_engine 33 | g <- e %>% fetch_nodes(id %in% c("MONDO:0007525", "MONDO:0007526")) 34 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 35 | expect_equal(nrow(nodes_df), 2) 36 | 37 | # basic single-id fetch 38 | g <- e %>% fetch_nodes(id == "MONDO:0007525") 39 | 40 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 41 | expect_equal(nrow(nodes_df), 1) 42 | expect_equal(nodes_df$id, "MONDO:0007525") 43 | 44 | # test a big fetch 45 | g <- e %>% fetch_nodes("biolink:Disease" %in_list% category | "biolink:Gene" %in_list% category) 46 | nodes_df1 <- g %>% activate(nodes) %>% as.data.frame() 47 | expect_equal(nrow(nodes_df1), 133) 48 | }) 49 | 50 | test_that("fetch_nodes limit works with file_engine", { 51 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr") 52 | 53 | e <- file_engine(filename) 54 | g <- e %>% fetch_nodes("biolink:Disease" %in_list% category, limit = 10) 55 | 56 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 57 | expect_equal(nrow(nodes_df), 10) 58 | 59 | e <- file_engine(filename) 60 | g <- e %>% fetch_nodes("biolink:Disease" %in_list% category, limit = 5) 61 | 62 | nodes_df <- g %>% activate(nodes) %>% as.data.frame() 63 | expect_equal(nrow(nodes_df), 5) 64 | }) 65 | 66 | -------------------------------------------------------------------------------- /R/monarch_search.R: -------------------------------------------------------------------------------- 1 | 2 | #' Search for KG nodes using the Monarch Initiative search API 3 | #' 4 | #' This function is a wrapper around the Monarch-hosted 5 | #' [search API](https://api.monarchinitiative.org/v3/docs#/search/search_v3_api_search_get). 6 | #' It returns nodes (no edges) from the Monarch KG, fetched via an instance of `monarch_engine()`. 7 | #' 8 | #' @param query Search query string, e.g. "Cystic fibrosis" 9 | #' @param category A set of node category labels to limit the search to, e.g. c("biolink:Disease", "biolink:Gene") 10 | #' @param limit Maximum number of nodes to return. Default 10. 11 | #' @param ... Parameters passed to monarch_engine(). 12 | #' @return A local tbl_kgx graph with no edges. 13 | #' @export 14 | #' @import tidygraph 15 | #' @import dplyr 16 | #' @importFrom assertthat assert_that 17 | #' @importFrom httr GET content http_status 18 | #' @examplesIf monarch_engine_check() 19 | #' cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5) 20 | #' print(cf_hits) 21 | #' 22 | monarch_search <- function(query, 23 | category = NULL, 24 | limit = 10, 25 | ...) { 26 | 27 | engine <- monarch_engine(...) 28 | api_url <- paste0(engine$preferences$monarch_api_url, "/search") 29 | 30 | # ensure that the limit is not null and is a length-1 integer <= 500 31 | assert_that(!is.null(limit), is.numeric(limit), limit <= 500, msg = "limit must be a length-1 integer <= 500 for search_nodes.monarch_engine()") 32 | 33 | params <- list( 34 | "q" = query, 35 | "limit" = limit, 36 | "offset" = 0 37 | ) 38 | 39 | if(!is.null(category)) { 40 | params$category <- category 41 | } 42 | 43 | # put the httr::GET call in a trycatch block to handle errors 44 | response <- GET(api_url, query = flatten_body_for_httr(params)) 45 | 46 | # if the response is not 200, throw an error 47 | if(response$status_code != 200) { 48 | stop(paste0("Error: ", response$status_code, " ", http_status(response$status_code)$message)) 49 | } 50 | 51 | response_content <- content(response, "parsed") 52 | total_available <- response_content$total 53 | 54 | ids <- unlist(lapply(response_content$items, function(item) { 55 | item$id 56 | })) 57 | 58 | if(length(ids) == 0) { 59 | # return an empty graph 60 | return(tbl_kgx(nodes = data.frame(id = character(), category = list())) 61 | ) 62 | } 63 | if(length(ids) == 1) { 64 | ids <- list(ids) 65 | } 66 | 67 | g <- cypher_query(engine, query = "MATCH (n) WHERE n.id IN $ids RETURN n", 68 | parameters = list(ids = ids)) 69 | 70 | return(g) 71 | } 72 | -------------------------------------------------------------------------------- /R/expand_n.R: -------------------------------------------------------------------------------- 1 | #' Iteratively fetch additional knowledge graph edges connected to a query graph 2 | #' 3 | #' Given an initialized \link{tbl_kgx} graph, iteratively expand the graph 4 | #' \code{n} iterations using certain predicates/categories. 5 | #' Arguments can either be a single value or a list of values. 6 | #' If an argument is provided as a list, its length must be equal to the number 7 | #' of iterations (\code{n}). 8 | #' @param return_each If TRUE, return a list of graphs for each iteration. 9 | #' If FALSE, return the final graph with all expanded edges. 10 | #' @param n Number of expansion iterations to run. 11 | #' @inheritParams expand 12 | #' @param transitive NULL (not used in this function). 13 | #' 14 | #' @return 15 | #' A `tbl_kgx()` graph 16 | #' @export 17 | #' @examples 18 | #' ## Using example KGX file packaged with monarchr 19 | #' data(eds_marfan_kg) 20 | #' g <- eds_marfan_kg |> 21 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 22 | #' expand(predicates = "biolink:has_phenotype", 23 | #' categories = "biolink:PhenotypicFeature") 24 | #' 25 | #' g_expanded <- g |> 26 | #' expand_n(predicates = "biolink:subclass_of", n=3) 27 | #' @import tidygraph 28 | #' @import dplyr 29 | expand_n <- function(graph, 30 | return_each = FALSE, 31 | direction = "both", 32 | predicates = NULL, 33 | categories = NULL, 34 | transitive = NULL, 35 | n=1, 36 | ...) { 37 | ## Check args 38 | ## Check args 39 | check_len <- function(arg,n,i){ 40 | if(is.list(arg)){ 41 | if(length(arg) != n){ 42 | stop(paste("When provided a list, arguments must be equal to n.")) 43 | } 44 | return(arg[[i]]) 45 | }else{ 46 | return(arg) 47 | } 48 | } 49 | if(!is.null(transitive)) { 50 | warning("Arguments to expand_n() are passed on to expand(), except for transitive which is set to NULL. Ignoring provided setting for transitive in expand_n().") 51 | } 52 | 53 | ## Expand graph 54 | message(paste( 55 | "Initial graph size:", 56 | nrow(nodes(graph)),"nodes ||",nrow(edges(graph)),"edges" 57 | )) 58 | if(return_each) graph_list <- list(iteration0=graph) 59 | 60 | for(i in 1:n){ 61 | message("Expanding graph: iteration ",i,"/",n) 62 | graph <- expand(graph = graph, 63 | direction = check_len(direction,n,i), 64 | predicates = check_len(predicates,n,i), 65 | categories = check_len(categories,n,i), 66 | transitive = FALSE, 67 | ...) 68 | if(return_each) graph_list[[paste0("iteration",i)]] <- graph 69 | message(paste( 70 | "Graph size:", 71 | nrow(nodes(graph)),"nodes ||",nrow(edges(graph)),"edges" 72 | )) 73 | } 74 | if(return_each){ 75 | return(graph_list) 76 | } else { 77 | return(graph) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /R/save_kgx.R: -------------------------------------------------------------------------------- 1 | 2 | #' Save a graph as a KGX-formatted .tar.gz file. 3 | #' 4 | #' Given a graph, saves it using the tabular KGX format 5 | #' (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) for later 6 | #' use with `load_kgx()` (or even backing an engine with `file_engine()`). Note that if 7 | #' any engine is associated with the graph it is not saved. 8 | #' 9 | #' @param graph A `tbl_kgx` graph to save. 10 | #' @param filename File to save the graph to. Must end in .tar.gz. 11 | #' @param ... Other parameters (unused) 12 | #' @importFrom readr write_tsv 13 | #' @importFrom archive archive_write_files 14 | #' @return The input graph (invisibly). 15 | #' @export 16 | #' @examplesIf monarch_engine_check() 17 | #' phenos <- monarch_engine() |> 18 | #' fetch_nodes(query_ids = "MONDO:0007525") |> 19 | #' expand(predicates = "biolink:has_phenotype", 20 | #' categories = "biolink:PhenotypicFeature") 21 | #' 22 | #' save_kgx(phenos, "phenos.tar.gz") 23 | #' 24 | #' # when loading the graph, we can optionally attach an engine 25 | #' loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine()) 26 | #' 27 | #' loaded_phenos 28 | #' 29 | #' # cleanup saved file 30 | #' file.remove("phenos.tar.gz") 31 | save_kgx <- function(graph, filename = "saved_kgx_graph.tar.gz", ...) { 32 | # ensure that files is a .tar.gz 33 | if(!grepl(".tar.gz$", filename)) { 34 | stop("Filename must end in .tar.gz") 35 | } 36 | 37 | node_df <- nodes(graph) 38 | edge_df <- edges(graph) 39 | 40 | node_df <- node_df[,colnames(node_df)[colnames(node_df) != "pcategory"]] 41 | 42 | pipe_format <- function(df) { 43 | df |> 44 | # list columns need to be converted to character cols, with |-separated entries 45 | lapply(function(col) { 46 | if(is.list(col)) { 47 | # both empty and NA values should be empty strings 48 | fix <- col |> lapply(function(vec) { 49 | if(length(vec) == 0) { 50 | "" 51 | } else { 52 | vec[is.na(vec)] <- "" 53 | paste0(vec, collapse = "|") 54 | } 55 | }) 56 | # str(fix) 57 | fix |> as.character() 58 | } else { 59 | col[is.na(col)] <- "" 60 | as.character(col) 61 | } 62 | }) |> 63 | as.data.frame() 64 | } 65 | 66 | node_df <- pipe_format(node_df) 67 | edge_df <- pipe_format(edge_df) 68 | 69 | basename <- stringr::str_replace(filename, ".tar.gz", "") 70 | 71 | node_df_file <- paste0(basename, "_nodes.tsv") 72 | edge_df_file <- paste0(basename, "_edges.tsv") 73 | 74 | write_tsv(node_df, node_df_file, col_names = TRUE) 75 | write_tsv(edge_df, edge_df_file, col_names = TRUE) 76 | 77 | archive_write_files(filename, c(node_df_file, edge_df_file)) 78 | 79 | file.remove(node_df_file) 80 | file.remove(edge_df_file) 81 | 82 | return(invisible(graph)) 83 | } 84 | -------------------------------------------------------------------------------- /R/fetch_nodes.R: -------------------------------------------------------------------------------- 1 | #' Fetch nodes from a graph using a set of IDs or conditions 2 | #' 3 | #' This function fetches nodes (and no edges) from a knowledge graph engine based on a set of 4 | #' conditions or a set of identifiers. If query_ids is provided, the function 5 | #' will fetch nodes with the specified identifiers. If query_ids is NULL, the 6 | #' function will fetch nodes based on the conditions provided. Only a limited 7 | #' set of condition expressions are supported, see details. 8 | #' 9 | #' @details 10 | #' If query_ids is provided, the function will fetch nodes with the specified. 11 | #' If query_ids is NULL, the function will fetch nodes based on a condition 12 | #' expression. The following features are supported: 13 | #' 14 | #' - Matching node properties with boolean operators, e.g. `in_taxon_label == "Homo sapiens"`. 15 | #' - Matching multi-valued properties with `%in_list%`, e.g. `"biolink:Gene" %in_list% category`. NOTE: using `%in_list%` against vector queries, e.g. `in_taxon_label %in_list% c("Homo sapiens", "Mus musculus")` is *not* supported. Nor does `%in_list%` support multi-valued left hand sides; `c("biolink:Disease", "biolink:Gene") %in_list% category` will not work. 16 | #' - Boolean connectives with `|`, `&`, and `!`, e.g. `in_taxon_label == "Homo sapiens" | "biolink:Gene" %in_list% category`. 17 | #' 18 | #' If more than one condition parameter is specified, they are combined with `&`; for example, 19 | #' `fetch_nodes(engine, in_taxon_label == "Homo sapiens", "biolink:Gene" %in_list% category)` is equivalent to 20 | #' `fetch_nodes(engine, in_taxon_label == "Homo sapiens" & "biolink:Gene" %in_list% category)`. 21 | #' @param engine A graph engine object 22 | #' @param ... A set of conditions identifying the nodes to fetch, only used if query_ids is NULL 23 | #' @param query_ids A character vector of identifiers to fetch 24 | #' @param limit An integer specifying the maximum number of nodes to fetch. Default to NULL, no limit. 25 | #' @return A tbl_kgx object containing the nodes 26 | #' 27 | #' @examples 28 | #' library(tidygraph) 29 | #' library(dplyr) 30 | #' 31 | #' @examplesIf monarch_engine_check() 32 | #' monarch_engine() |> 33 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 34 | #' 35 | #' # a large query 36 | #' monarch_engine() |> 37 | #' fetch_nodes("biolink:Disease" %in_list% category) 38 | #' 39 | #' @examples 40 | #' # file_engine supports the same features as neo4j_engine 41 | #' # (using the example KGX file packaged with monarchr) 42 | #' data(eds_marfan_kg) 43 | #' 44 | #' eds_marfan_kg |> 45 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 46 | #' 47 | #' # grab all Homo sapiens genes 48 | #' eds_marfan_kg |> 49 | #' fetch_nodes(in_taxon_label == "Homo sapiens" & "biolink:Gene" %in_list% category) 50 | #' 51 | #' @export 52 | fetch_nodes <- function(engine, ..., query_ids = NULL, limit = NULL) { 53 | UseMethod("fetch_nodes") 54 | } 55 | -------------------------------------------------------------------------------- /man/rolling.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rollup.R 3 | \name{rolling} 4 | \alias{rolling} 5 | \alias{roll_up} 6 | \alias{roll_down} 7 | \title{Roll node data up (along) or down (against) transitive edges. Use with \code{mutate()}} 8 | \usage{ 9 | roll_up( 10 | column = NULL, 11 | fun = c, 12 | include_self = TRUE, 13 | predicates = "biolink:subclass_of", 14 | ... 15 | ) 16 | 17 | roll_down( 18 | column = NULL, 19 | fun = c, 20 | include_self = TRUE, 21 | predicates = "biolink:subclass_of", 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{column}{The node column to draw rollup or rolldown information from.} 27 | 28 | \item{fun}{The aggregation function to use when rolling up or down. Default is \code{c}} 29 | 30 | \item{include_self}{Whether to include each nodes' value in \code{column} in the rollup/rolldown for that node.} 31 | 32 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the rollup/rolldown. Should be transitive; default \code{biolink:subclass_of}} 33 | 34 | \item{...}{Other parameters (unused)} 35 | } 36 | \value{ 37 | Vector or list, with one entry per node. 38 | } 39 | \description{ 40 | This function computes, for each node, an aggregated set of data from all 41 | descendant (for roll-ups) or ancestor (for roll-downs) nodes defined by 42 | specified edge predicates. Designed for use with \code{mutate()} 43 | on node data, for each node N, the specified \code{fun} is called 44 | on the node table \code{column} filtered to nodes that can reach (be reached by) N over 45 | \code{predicates} edges. If \code{include_self} is true, N itself is included. 46 | } 47 | \details{ 48 | Note that path counts and order are not considered; rollups (rolldowns) 49 | collect information from all descendant (ancestor) nodes as a set. 50 | 51 | The return value will be either a list, or if the result would be a list 52 | with all length-1 or length-0 elements, a vector with 0-length elements 53 | replaced by NA. Practically, this results in a list when necessary and a vector 54 | otherwise. 55 | } 56 | \examples{ 57 | data(eds_marfan_kg) 58 | 59 | eds_marfan_kg |> 60 | fetch_nodes(name == "Tall stature" | name == "Short stature") |> 61 | # get 2 levels of ancestors 62 | expand_n(predicates = "biolink:subclass_of", direction = "out", n = 2) |> 63 | activate(nodes) |> 64 | # random count value per node 65 | mutate(count = rpois(graph_order(), 1.5)) |> 66 | # apply sum to descendant (and self) values 67 | mutate(sum_count = roll_up(count, fun = sum, include_self = TRUE)) |> 68 | plot(node_label = paste(name, " count: ", count, "sum_count: ", sum_count)) 69 | 70 | } 71 | \seealso{ 72 | \code{\link[=roll_down]{roll_down()}}, \code{\link[=descendants]{descendants()}}, [ancestors(), \code{\link[=transfer]{transfer()}}, \code{\link[=transitive_closure]{transitive_closure()}}] 73 | } 74 | -------------------------------------------------------------------------------- /R/monarch_engine.R: -------------------------------------------------------------------------------- 1 | #' Create a knowledge graph engine object backed by the public Monarch Neo4j instance 2 | #' 3 | #' Creates a knowledge graph engine backed by the publicly hosted Monarch Neo4j database, used to fetch nodes and edges from the database as local 4 | #' graph objects. 5 | #' 6 | #' Engines store preference information specifying how data are fetched and manipulated; for example, 7 | #' while node `category` is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"), 8 | #' typically a single category is used to represent the node in a graph, and is returned as the nodes' `pcategory`. A preference list of categories to use for `pcategory` is 9 | #' stored in the engine's preferences. A default set of preferences is stored in the package for use with this and other KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md), 10 | #' but these can be overridden by the user. 11 | #' 12 | #' The `monarch_engine()` overrides `search_nodes()` to use the Monarch search API, so setting `node_search_properties` in the preferences will not affect the search behavior. To use regex-based searching with the Monarch Neo4j instance, use `neo4j_engine()` instead and specify the Monarch Neo4j URL (https://neo4j.monarchinitiative.org). 13 | #' 14 | #' @param url (Optional) May be specified to override the default Monarch Neo4j URL. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried. 15 | #' @param api_url (Optional) May be specified to override the default Monarch API URL (specifying the location of the `/search` endpoint used by `search_nodes()`). 16 | #' @param preferences A named list of preferences for the engine. 17 | #' @param timeout Number of seconds to wait before trying the next url. 18 | #' @param ... Additional arguments passed to `neo2R::startGraph()`. 19 | #' @seealso `file_engine()`, `neo4j_engine()` 20 | #' @return An object of class `monarch_engine` 21 | #' @export 22 | #' @examplesIf monarch_engine_check() 23 | #' library(tidygraph) 24 | #' library(dplyr) 25 | #' 26 | #' monarch <- monarch_engine() 27 | #' res <- monarch |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947")) 28 | #' print(res) 29 | #' 30 | monarch_engine <- function(url = c("https://neo4j.monarchinitiative.org", 31 | "http://neo4j.monarchinitiative.org", 32 | "https://neo4j.monarchinitiative.org:7473", 33 | "http://neo4j.monarchinitiative.org:7473"), 34 | api_url = "https://api.monarchinitiative.org/v3/api", 35 | preferences = NULL, 36 | timeout = 2, 37 | ...) { 38 | e <- neo4j_engine(url = url, preferences = preferences, timeout = timeout, ...) 39 | e$preferences$monarch_api_url <- api_url 40 | class(e) <- c("monarch_engine", class(e)) 41 | return(e) 42 | } 43 | -------------------------------------------------------------------------------- /man/monarch_semsim.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/monarch_semsim.R 3 | \name{monarch_semsim} 4 | \alias{monarch_semsim} 5 | \title{Semantic similarity mapping between two graphs} 6 | \usage{ 7 | monarch_semsim( 8 | query_graph, 9 | target_graph, 10 | metric = "ancestor_information_content", 11 | include_reverse = FALSE, 12 | keep_unmatched = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{query_graph}{A tbl_kgx graph.} 17 | 18 | \item{target_graph}{A tbl_kgx graph.} 19 | 20 | \item{metric}{The semantic similarity metric to use. Default is \code{"ancestor_information_content"}. Also available are \code{"jaccard_similarity"} and \code{"phenodigm_score"}.} 21 | 22 | \item{include_reverse}{Whether to include the best matches from the target graph to the query graph. Default is \code{FALSE}.} 23 | 24 | \item{keep_unmatched}{Whether to keep nodes in the target graph that do not have a match. Default is \code{FALSE}.} 25 | } 26 | \value{ 27 | A tbl_kgx graph with \code{"computed:best_matches"} edges between the nodes of the two input graphs and columns for \code{monarch_semsim_metric}, \code{monarch_semsim_score}, and \code{monarch_semsim_ancestor_id}. 28 | } 29 | \description{ 30 | This function calls the Monarch-hosted semantic similarity API to compare two 31 | graphs, via the same endpoints as the Monarch Phenotype Explorer: 32 | https://monarchinitiative.org/explore#phenotype-explorer. 33 | } 34 | \details{ 35 | The API returns the best matches between the nodes of the two graphs, based on 36 | a specified knowledge-graph-based metric: the default is \code{"ancestor_information_content"}, 37 | also available are \code{"jaccard_similarity"} and \code{"phenodigm_score"}. The result is 38 | returned as a graph, with \code{"computed:best_matches"} edges between the nodes of the two input graphs. 39 | 40 | By default, the function only returns the best matches from the first graph to the second graph, and 41 | removes any nodes that do not have a match. If \code{include_reverse = TRUE}, the function also returns 42 | the best matches from the second graph to the first graph. 43 | 44 | The engine attached to the return graph is that of the query. 45 | } 46 | \examples{ 47 | \dontshow{if (monarch_engine_check(service = "semsim")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 48 | 49 | g1 <- monarch_engine() |> 50 | fetch_nodes(query_ids = "MONDO:0007947") |> 51 | expand(categories = "biolink:PhenotypicFeature") 52 | 53 | g2 <- monarch_engine() |> 54 | fetch_nodes(query_ids = "MONDO:0007522") |> 55 | expand(categories = "biolink:PhenotypicFeature") 56 | 57 | sim <- monarch_semsim(g1, g2) 58 | print(sim) 59 | 60 | # also inclue the unmatched targets 61 | sim <- monarch_semsim(g1, g2, keep_unmatched = TRUE) 62 | print(sim) 63 | 64 | # inclue reverse matches 65 | sim <- monarch_semsim(g1, g2, include_reverse = TRUE) 66 | print(sim) 67 | \dontshow{\}) # examplesIf} 68 | } 69 | -------------------------------------------------------------------------------- /man/monarch_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/monarch_engine.R 3 | \name{monarch_engine} 4 | \alias{monarch_engine} 5 | \title{Create a knowledge graph engine object backed by the public Monarch Neo4j instance} 6 | \usage{ 7 | monarch_engine( 8 | url = c("https://neo4j.monarchinitiative.org", "http://neo4j.monarchinitiative.org", 9 | "https://neo4j.monarchinitiative.org:7473", 10 | "http://neo4j.monarchinitiative.org:7473"), 11 | api_url = "https://api.monarchinitiative.org/v3/api", 12 | preferences = NULL, 13 | timeout = 2, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{url}{(Optional) May be specified to override the default Monarch Neo4j URL. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried.} 19 | 20 | \item{api_url}{(Optional) May be specified to override the default Monarch API URL (specifying the location of the \verb{/search} endpoint used by \code{search_nodes()}).} 21 | 22 | \item{preferences}{A named list of preferences for the engine.} 23 | 24 | \item{timeout}{Number of seconds to wait before trying the next url.} 25 | 26 | \item{...}{Additional arguments passed to \code{neo2R::startGraph()}.} 27 | } 28 | \value{ 29 | An object of class \code{monarch_engine} 30 | } 31 | \description{ 32 | Creates a knowledge graph engine backed by the publicly hosted Monarch Neo4j database, used to fetch nodes and edges from the database as local 33 | graph objects. 34 | } 35 | \details{ 36 | Engines store preference information specifying how data are fetched and manipulated; for example, 37 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"), 38 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is 39 | stored in the engine's preferences. A default set of preferences is stored in the package for use with this and other KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md), 40 | but these can be overridden by the user. 41 | 42 | The \code{monarch_engine()} overrides \code{search_nodes()} to use the Monarch search API, so setting \code{node_search_properties} in the preferences will not affect the search behavior. To use regex-based searching with the Monarch Neo4j instance, use \code{neo4j_engine()} instead and specify the Monarch Neo4j URL (https://neo4j.monarchinitiative.org). 43 | } 44 | \examples{ 45 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 46 | library(tidygraph) 47 | library(dplyr) 48 | 49 | monarch <- monarch_engine() 50 | res <- monarch |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947")) 51 | print(res) 52 | \dontshow{\}) # examplesIf} 53 | } 54 | \seealso{ 55 | \code{file_engine()}, \code{neo4j_engine()} 56 | } 57 | -------------------------------------------------------------------------------- /R/transitive_closure.R: -------------------------------------------------------------------------------- 1 | #' Compute transitive closure over a predicate. 2 | #' 3 | #' Computes the transitive closure of a graph, treating the specified 4 | #' predicate as transitive. Resulting edge predicates will the be the 5 | #' same, but have primary_knowledge_source set to transitive_. 6 | #' 7 | #' @return Graph with transitive edges added. 8 | #' @seealso [roll_up()], [transfer()], [descendants()], [ancestors()] 9 | #' @param g The `tbl_kgx` graph to compute on. 10 | #' @param predicate The edge predicate to close over. 11 | #' 12 | #' @examples 13 | #' data(eds_marfan_kg) 14 | #' 15 | #' eds_marfan_kg |> fetch_nodes(name == "Tall stature") |> 16 | #' # get 2 levels of ancestors 17 | #' expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |> 18 | #' activate(edges) |> 19 | #' filter(primary_knowledge_source == "infores:upheno") |> 20 | #' transitive_closure(predicate = "biolink:subclass_of") |> 21 | #' plot(edge_color = primary_knowledge_source) 22 | #' 23 | #' @import tidygraph 24 | #' @export 25 | transitive_closure <- function(g, predicate = "biolink:subclass_of") { 26 | if(length(predicate) != 1) { 27 | stop("Error: predicate parameter of transitive_closure() must be length 1.") 28 | } 29 | # if there are no edges to close, return the input 30 | p <- predicate 31 | if(nrow(edges(g) |> filter(predicate == p)) == 0) {return(g)} 32 | 33 | active_tbl <- active(g) 34 | 35 | with_downstream <- g |> 36 | activate(nodes) |> 37 | mutate(downstream_nodes = roll_down(id, include_self = FALSE, predicate = predicate)) |> 38 | filter(!is.na(downstream_nodes)) 39 | 40 | # create a new edge df... start by getting the nodes and the list col 41 | new_edges <- nodes(with_downstream) |> 42 | select(id, downstream_nodes) |> 43 | tidyr::unnest(downstream_nodes, keep_empty = FALSE) |> 44 | mutate(subject = id, predicate = predicate, object = downstream_nodes, primary_knowledge_source = paste0("transitive_", predicate)) |> 45 | select(subject, predicate, object, primary_knowledge_source) |> 46 | mutate(edge_key = paste(subject, "@", predicate, "@", object)) 47 | 48 | node_indices <- nodes(g) |> mutate(index = dplyr::row_number()) |> select(id, index) 49 | 50 | # we create edge keys so that we can filter out edges that are duplicates created as part of 51 | # the process above (the original, non-transitive edges) 52 | g <- g |> 53 | activate(edges) |> 54 | mutate(edge_key = paste(subject, "@", predicate, "@", object)) 55 | 56 | new_edges <- new_edges |> 57 | dplyr::left_join(node_indices, by = c("subject" = "id")) |> 58 | dplyr::rename(from = index) |> 59 | dplyr::left_join(node_indices, by = c("object" = "id")) |> 60 | dplyr::rename(to = index) |> 61 | mutate(edge_key = paste(subject, "@", predicate, "@", object)) |> 62 | filter(!edge_key %in% edges(g)$edge_key) 63 | 64 | res <- g |> 65 | tidygraph::bind_edges(new_edges) |> 66 | activate(edges) |> 67 | select(-edge_key) |> 68 | activate(!!rlang::sym(active_tbl)) 69 | 70 | res 71 | } 72 | -------------------------------------------------------------------------------- /man/monarch_edge_weight_encodings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/monarch_edge_weight_encodings.R 3 | \name{monarch_edge_weight_encodings} 4 | \alias{monarch_edge_weight_encodings} 5 | \title{Generate Default Edge Weight Encodings for Monarch Knowledge Graph} 6 | \usage{ 7 | monarch_edge_weight_encodings() 8 | } 9 | \value{ 10 | A list of encodings for various edge attributes: 11 | \describe{ 12 | \item{knowledge_level}{A named list assigning weights based on knowledge assertion levels: 13 | \itemize{ 14 | \item \code{"knowledge_assertion"}: \code{1} (full weight for direct assertions) 15 | \item \code{"logical_entailment"}: \code{1} (full weight for inferred relationships) 16 | \item \code{"not_provided"}: \code{0} (no weight when knowledge level is unspecified) 17 | } 18 | } 19 | \item{frequency_qualifier}{A named list mapping Human Phenotype Ontology (HPO) frequency terms to numeric weights: 20 | \itemize{ 21 | \item \code{"HP:0040281"}: \code{1} ("Very frequent") 22 | \item \code{"HP:0040282"}: \code{0.75} ("Frequent") 23 | \item \code{"HP:0040283"}: \code{0.5} ("Occasional") 24 | \item \code{"HP:0040284"}: \code{0.25} ("Very rare") 25 | } 26 | } 27 | \item{negated}{A named list representing negation status: 28 | \itemize{ 29 | \item \code{TRUE}: \code{-1} (negated relationships decrease the weight) 30 | \item \code{FALSE}: \code{0} (non-negated relationships have no penalty) 31 | } 32 | } 33 | \item{has_total}{\code{NULL}. Placeholder for total counts; can be overridden with custom logic.} 34 | \item{has_quotient}{An empty numeric vector. Placeholder for quotient-based weight calculations.} 35 | \item{has_count}{\code{NULL}. Placeholder for count-based weight computations.} 36 | \item{has_percentage}{\code{NULL}. Placeholder for percentage-based weight computations.} 37 | \item{has_evidence}{\code{NULL}. Placeholder for evidence-based weight computations.} 38 | \item{onset_qualifier}{\code{NULL}. Placeholder for onset qualifiers, which can influence weights.} 39 | \item{publications}{A function that computes the weight based on the number of unique publications: 40 | \itemize{ 41 | \item Takes a vector of publication IDs and returns the count of unique IDs. 42 | } 43 | } 44 | } 45 | } 46 | \description{ 47 | This function returns a list of default encoding schemes used to compute 48 | edge weights in Monarch knowledge graph visualizations and analyses. 49 | Each encoding represents how specific edge attributes contribute to 50 | the overall edge weight. Users can inspect these defaults and 51 | override them with custom encoding logic if needed. 52 | } 53 | \examples{ 54 | # View the default edge weight encodings 55 | monarch_edge_weight_encodings() 56 | 57 | # Override the encoding to assign more weight to logical entailments 58 | encodings <- monarch_edge_weight_encodings() 59 | encodings$knowledge_level$logical_entailment <- 0.5 60 | 61 | # Modify the negation encoding to reduce penalty for negated relationships 62 | encodings$negated[["TRUE"]] <- -0.5 63 | 64 | } 65 | -------------------------------------------------------------------------------- /man/neo4j_engine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/neo4j_engine.R 3 | \name{neo4j_engine} 4 | \alias{neo4j_engine} 5 | \title{Create a knowledge graph engine object for a neo4j database} 6 | \usage{ 7 | neo4j_engine( 8 | url, 9 | username = NA, 10 | password = NA, 11 | preferences = NULL, 12 | timeout = 1, 13 | cache = TRUE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{url}{A character string indicating the URL of the neo4j database. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried.} 19 | 20 | \item{username}{A character string indicating the username for the neo4j database (if needed).} 21 | 22 | \item{password}{A character string indicating the password for the neo4j database (if needed).} 23 | 24 | \item{preferences}{A named list of preferences for the engine.} 25 | 26 | \item{timeout}{Number of sections to wait before trying the next url.} 27 | 28 | \item{cache}{Whether to cache query results in memory for the length of the R session.} 29 | 30 | \item{...}{Additional arguments passed to \code{neo2R::startGraph()}.} 31 | } 32 | \value{ 33 | An object of class \code{neo4j_engine} 34 | } 35 | \description{ 36 | Creates a knowledge graph engine backed by a neo4j database, from a URL and optional username and password. Knowledge graph "engines" 37 | are objects that store information about how to connect to a (potentially large) knowledge graph, and can be used to fetch nodes and edges from the database as local 38 | graph objects. 39 | } 40 | \details{ 41 | Engines store preference information specifying how data are fetched and manipulated; for example, 42 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"), 43 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is 44 | stored in the engine's preferences. A default set of preferences is stored in the package for use with KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md), 45 | but these can be overridden by the user. 46 | 47 | For \code{neo4j_engine()}s, preferences are also used to set the node properties to search when using \code{search_nodes()}, defaulting to regex-based searches on id, name, and description. (The \code{monarch_engine()} is a type 48 | of \code{neo4j_engine()} with the URL set to the Monarch Neo4j instance, and overrides \code{search_nodes()} to use the Monarch search API, see \code{monarch_engine()} for details). 49 | } 50 | \examples{ 51 | \dontshow{if (neo4j_engine_check("https://neo4j.monarchinitiative.org")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 52 | library(tidygraph) 53 | library(dplyr) 54 | 55 | engine <- neo4j_engine(url = "https://neo4j.monarchinitiative.org") 56 | res <- engine |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947")) 57 | print(res) 58 | \dontshow{\}) # examplesIf} 59 | } 60 | \seealso{ 61 | \code{file_engine()}, \code{monarch_engine()} 62 | } 63 | -------------------------------------------------------------------------------- /man/fetch_nodes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fetch_nodes.R 3 | \name{fetch_nodes} 4 | \alias{fetch_nodes} 5 | \title{Fetch nodes from a graph using a set of IDs or conditions} 6 | \usage{ 7 | fetch_nodes(engine, ..., query_ids = NULL, limit = NULL) 8 | } 9 | \arguments{ 10 | \item{engine}{A graph engine object} 11 | 12 | \item{...}{A set of conditions identifying the nodes to fetch, only used if query_ids is NULL} 13 | 14 | \item{query_ids}{A character vector of identifiers to fetch} 15 | 16 | \item{limit}{An integer specifying the maximum number of nodes to fetch. Default to NULL, no limit.} 17 | } 18 | \value{ 19 | A tbl_kgx object containing the nodes 20 | } 21 | \description{ 22 | This function fetches nodes (and no edges) from a knowledge graph engine based on a set of 23 | conditions or a set of identifiers. If query_ids is provided, the function 24 | will fetch nodes with the specified identifiers. If query_ids is NULL, the 25 | function will fetch nodes based on the conditions provided. Only a limited 26 | set of condition expressions are supported, see details. 27 | } 28 | \details{ 29 | If query_ids is provided, the function will fetch nodes with the specified. 30 | If query_ids is NULL, the function will fetch nodes based on a condition 31 | expression. The following features are supported: 32 | \itemize{ 33 | \item Matching node properties with boolean operators, e.g. \code{in_taxon_label == "Homo sapiens"}. 34 | \item Matching multi-valued properties with \verb{\%in_list\%}, e.g. \code{"biolink:Gene" \%in_list\% category}. NOTE: using \verb{\%in_list\%} against vector queries, e.g. \code{in_taxon_label \%in_list\% c("Homo sapiens", "Mus musculus")} is \emph{not} supported. Nor does \verb{\%in_list\%} support multi-valued left hand sides; \code{c("biolink:Disease", "biolink:Gene") \%in_list\% category} will not work. 35 | \item Boolean connectives with \code{|}, \code{&}, and \code{!}, e.g. \code{in_taxon_label == "Homo sapiens" | "biolink:Gene" \%in_list\% category}. 36 | } 37 | 38 | If more than one condition parameter is specified, they are combined with \code{&}; for example, 39 | \code{fetch_nodes(engine, in_taxon_label == "Homo sapiens", "biolink:Gene" \%in_list\% category)} is equivalent to 40 | \code{fetch_nodes(engine, in_taxon_label == "Homo sapiens" & "biolink:Gene" \%in_list\% category)}. 41 | } 42 | \examples{ 43 | library(tidygraph) 44 | library(dplyr) 45 | 46 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 47 | monarch_engine() |> 48 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 49 | 50 | # a large query 51 | monarch_engine() |> 52 | fetch_nodes("biolink:Disease" \%in_list\% category) 53 | \dontshow{\}) # examplesIf} 54 | # file_engine supports the same features as neo4j_engine 55 | # (using the example KGX file packaged with monarchr) 56 | data(eds_marfan_kg) 57 | 58 | eds_marfan_kg |> 59 | fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 60 | 61 | # grab all Homo sapiens genes 62 | eds_marfan_kg |> 63 | fetch_nodes(in_taxon_label == "Homo sapiens" & "biolink:Gene" \%in_list\% category) 64 | 65 | } 66 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @importFrom tidygraph as_tibble 3 | nodes.tbl_kgx <- function(graph, ...) { 4 | tidygraph::as_tibble(graph, active = "nodes") 5 | } 6 | 7 | 8 | #' @export 9 | #' @importFrom tidygraph as_tibble 10 | edges.tbl_kgx <- function(graph, ...) { 11 | tidygraph::as_tibble(graph, active = "edges") 12 | } 13 | 14 | 15 | #' Get graph nodes table. 16 | #' 17 | #' @param graph A graph object 18 | #' @param ... Other options (unused) 19 | #' @return A tibble with the nodes of the graph 20 | #' @importFrom tidygraph as_tibble 21 | #' @export 22 | #' @examples 23 | #' # (using the example KGX file packaged with monarchr) 24 | #' data(eds_marfan_kg) 25 | #' 26 | #' g <- eds_marfan_kg |> 27 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 28 | #' 29 | #' print(nodes(g)) 30 | nodes <- function(graph, ...) { 31 | UseMethod("nodes") 32 | } 33 | 34 | #' Get graph edges table. 35 | #' 36 | #' @param graph Input graph 37 | #' @param ... Other options (unused) 38 | #' @export 39 | #' @return A tibble with the edges of the graph 40 | #' @importFrom tidygraph as_tibble 41 | #' @examples 42 | #' # (using the example KGX file packaged with monarchr) 43 | #' data(eds_marfan_kg) 44 | #' 45 | #' g <- eds_marfan_kg |> 46 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 47 | #' 48 | #' print(edges(g)) 49 | edges <- function(graph, ...) { 50 | UseMethod("edges") 51 | } 52 | 53 | #' Explode a graph into a list of single-node graphs 54 | #' 55 | #' @param graph A tbl_kgx graph. 56 | #' @param ... Other options (unused) 57 | #' @return A list of tbl_kgx graphs. 58 | #' @examples 59 | #' # (using the example KGX file packaged with monarchr) 60 | #' data(eds_marfan_kg) 61 | #' 62 | #' g <- eds_marfan_kg |> 63 | #' fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526")) 64 | #' 65 | #' print(explode(g)) 66 | #' @export 67 | explode <- function(graph, ...) { 68 | UseMethod("explode") 69 | } 70 | 71 | #' @export 72 | #' @importFrom tidygraph activate 73 | explode.tbl_kgx <- function(graph, ...) { 74 | nodes <- as_tibble(graph, active = "nodes") 75 | graphs <- lapply(nodes$id, function(node_id) { 76 | filter(tidygraph::activate(graph, nodes), id == node_id) 77 | }) 78 | return(graphs) 79 | } 80 | 81 | 82 | 83 | #' @noRd 84 | merge_lists <- function(a, b) { 85 | # This function takes two named lists and merges them recursively. Values in 86 | # the second list (`b`) override or extend those in the first list (`a`). 87 | # If both `a` and `b` contain a named list at the same position, they are merged recursively. 88 | # 89 | # a <- list(x = list(y = 1, z = 2), foo = "bar") 90 | # b <- list(x = list(y = 42, new = 99), foo = "baz", extra = "new_value") 91 | # merge_lists(a, b) 92 | 93 | for (name in names(b)) { 94 | if (is.list(b[[name]]) && name %in% names(a) && is.list(a[[name]])) { 95 | # Recursively merge if both elements are lists 96 | a[[name]] <- merge_lists(a[[name]], b[[name]]) 97 | } else { 98 | # Otherwise, override/add the value from b to a 99 | a[[name]] <- b[[name]] 100 | } 101 | } 102 | return(a) 103 | } 104 | -------------------------------------------------------------------------------- /R/kg_join.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | 2 | # given a dataframe with potential list columns, 3 | # ensures that any NULL entries are replaced with a single NA 4 | null_col_entries_to_na <- function(df) { 5 | for(name in names(df)) { 6 | col <- df[[name]] 7 | if(is.list(col)) { 8 | col <- lapply(col, function(el) { 9 | if(is.null(el)) { 10 | NA 11 | } else { 12 | el 13 | } 14 | } 15 | ) 16 | df[[name]] <- col 17 | } 18 | } 19 | 20 | return(df) 21 | } 22 | 23 | #' @import tidygraph 24 | #' @import dplyr 25 | #' @export 26 | kg_join.tbl_kgx <- function(graph1, graph2, ...) { 27 | nodes_g1 <- nodes(graph1) 28 | nodes_g2 <- nodes(graph2) 29 | 30 | # we don't want to keep the to and from columns in the edges, since we'll be rebuilding edges from scratch, 31 | # and be running them through unique() 32 | edges_g1 <- edges(graph1) |> select(-to, -from) 33 | edges_g2 <- edges(graph2) |> select(-to, -from) 34 | 35 | all_nodes <- unique(nodes_g1 |> 36 | full_join(nodes_g2)) |> 37 | mutate(idx = row_number()) # add an index column to the nodes 38 | 39 | 40 | all_edges <- unique(edges_g1 |> 41 | full_join(edges_g2)) 42 | 43 | 44 | 45 | # given a single row of an edge data frame, we need to replicate it for each distinct from/to pair that match subjects and objects 46 | fill_edges <- function(row_df) { 47 | # we need to replicate this row with distinct from and to information from the all_nodes data 48 | subject_idxs <- all_nodes |> filter(id == row_df$subject) |> pull(idx) 49 | object_idxs <- all_nodes |> filter(id == row_df$object) |> pull(idx) 50 | # we need the cross-join of these two vectors 51 | cross <- expand.grid(from = subject_idxs, to = object_idxs) 52 | # now we need to add the properties from the original row to each of these new rows 53 | # we start by replcating the row_df nrow(cross) times 54 | row_df <- row_df[rep(1, nrow(cross)), ] 55 | # then we add the cross data to the row_df 56 | row_df <- cbind(row_df, cross) 57 | 58 | return(row_df) 59 | } 60 | 61 | filled_edges <- NULL 62 | 63 | # not using rowwise() here so that each row is nicely contained including list columns as a df row (not a list) 64 | # but maybe group_by() isn't right either - this triggers the call to fill_edges(.) even if there are no rows in the df 65 | # so we have a guard here 66 | if(nrow(all_edges) > 0) { 67 | filled_edges <- all_edges |> 68 | mutate(edge_idx = row_number()) |> 69 | group_by(edge_idx) |> 70 | do(fill_edges(.)) |> 71 | as_tibble() |> 72 | select(-edge_idx) # remove the edge_idx column 73 | } 74 | 75 | all_nodes <- all_nodes |> 76 | select(-idx) # remove the idx column 77 | 78 | all_nodes <- null_col_entries_to_na(all_nodes) 79 | filled_edges <- null_col_entries_to_na(filled_edges) 80 | res <- tbl_kgx(nodes = all_nodes, edges = filled_edges, attach_engine = get_engine(graph1, fail_if_missing = FALSE)) 81 | return(res) 82 | } 83 | -------------------------------------------------------------------------------- /R/transfer.R: -------------------------------------------------------------------------------- 1 | #' Transfer information over edges to nodes. 2 | #' 3 | #' Used to 'transfer' information from nodes to other nodes across 4 | #' specific predicates, either in an outward direction (along the edge 5 | #' direction) or inward (against the edge direction). Returns a node-property 6 | #' column; intended to be used with mutate() on nodes. 7 | #' 8 | #' The return value will be either a list, or if the result would be a list 9 | #' with all length-1 or length-0 elements, a vector with 0-length elements 10 | #' replaced by NA. Practically, this results in a list when necessary and a vector 11 | #' otherwise. 12 | #' 13 | #' @return Vector or list, with one entry per node. 14 | #' @seealso [roll_up()], [transitive_closure], [descendants()], [ancestors()] 15 | #' @param colname The node column to transfer information from 16 | #' @param over The edge predicate to transfer information over 17 | #' @param direction Whether to transfer information along the predicate direction ("out") or against ("in") 18 | #' 19 | #' @examples 20 | #' data(eds_marfan_kg) 21 | #' 22 | #' engine |> eds_marfan_kg |> 23 | #' expand(categories = "biolink:Disease") |> 24 | #' activate(nodes) |> 25 | #' mutate(caused_by_genes = 26 | #' transfer(name, over = "biolink:causes", direction = "out")) |> 27 | #' mutate(causes_diseases = 28 | #' transfer(name, over = "biolink:causes", direction = "in")) |> 29 | #' plot.tbl_kgx(node_label = paste(name, 30 | #' " caused by: ", caused_by_genes, 31 | #' " causes: ", causes_diseases), 32 | #' label_size = 3) 33 | #' 34 | #' @import tidygraph 35 | #' @export 36 | transfer <- function(colname = NULL, over, direction = "out") { 37 | if(direction != "in" & direction != "out") { 38 | stop("Error, 'toward' must be one of 'in' or 'out'.") 39 | } 40 | from = ifelse(direction == "out", "subject", "object") 41 | toward = ifelse(from == "subject", "object", "subject") 42 | #print(paste("Pulling from", from, "to", toward, "data from", {{colname}})) 43 | 44 | edge_data <- .E() |> filter(predicate %in% over) 45 | node_data <- .N() 46 | # so we have a set of edges, for each unique object... we need to collect expr 47 | node_ids <- node_data$id 48 | agg_results <- lapply(node_ids, function(node_id) { 49 | relevant_edges <- edge_data[edge_data[[toward]] == node_id,] #|> filter(object == node_id) 50 | source_ids <- unique(relevant_edges[[from]]) # e.g. g1, g2, g3.... 51 | relevant_subject_nodes <- node_data |> filter(id %in% source_ids) 52 | relevant_data <- relevant_subject_nodes |> pull({{colname}}) 53 | relevant_data 54 | }) 55 | 56 | # determines if the given list can be converted safely to a vector 57 | vec_safe <- function(lst) { 58 | lengths_ok <- all(lengths(lst) == 1 | lengths(lst) == 0) 59 | types <- sapply(lst, typeof) 60 | types_ok <- length(unique(types)) == 1 61 | lengths_ok && types_ok 62 | } 63 | 64 | if(vec_safe(agg_results)) { 65 | # length-0 lists need to be replaced with a single NA 66 | agg_results <- lapply(agg_results, function(el) { 67 | if(length(el) == 0) { 68 | NA 69 | } else { 70 | el 71 | } 72 | }) 73 | agg_results <- unlist(agg_results) 74 | } 75 | 76 | return(agg_results) 77 | } 78 | -------------------------------------------------------------------------------- /R/knit_print.tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' @importFrom utils capture.output 2 | clean_df <- function(df) { 3 | new_df <- list() 4 | for(colname in colnames(df)) { 5 | col_i <- df[[colname]] 6 | 7 | if(is.list(col_i)) { 8 | colname <- paste0(colname, " (list)") 9 | } 10 | 11 | contents <- lapply(col_i, function(cell_data) { 12 | paste(capture.output(dput(cell_data, control = "useSource")), collapse = "") 13 | }) |> unlist() 14 | 15 | new_df[[colname]] <- contents 16 | } 17 | 18 | as_tibble(new_df) 19 | } 20 | 21 | 22 | #' Specialized print function for KGX graphs in knitted documents 23 | #' @param x A `tbl_kgx` graph to display. 24 | #' @param ... Other arguments (unused). 25 | #' @param show The maximum number of nodes and edges to display. 26 | #' @export 27 | #' @import knitr 28 | #' @importFrom kableExtra kable 29 | #' @importFrom kableExtra kable_styling 30 | #' @importFrom kableExtra column_spec 31 | #' @importFrom dplyr slice_head 32 | knit_print.tbl_kgx <- function(x, ..., show = 100) { 33 | graph <- x 34 | 35 | g <- order_cols(graph) 36 | 37 | nodes_colnames <- colnames(nodes(g)) 38 | 39 | nodes_sub <- clean_df(nodes(g)) |> 40 | slice_head(n = show) 41 | 42 | nodes_kbl <- nodes_sub |> 43 | kable("html", escape = FALSE) |> 44 | kable_styling(fixed_thead = TRUE, 45 | bootstrap_options = c("striped", "hover", "condensed")) 46 | 47 | if("description" %in% nodes_colnames) { 48 | colnum <- seq_along(nodes_colnames)[nodes_colnames == "description"] 49 | nodes_kbl <- nodes_kbl |> column_spec(colnum, width_min = "300px") 50 | } 51 | if("synonym" %in% nodes_colnames) { 52 | colnum <- seq_along(nodes_colnames)[nodes_colnames == "synonym"] 53 | nodes_kbl <- nodes_kbl |> column_spec(colnum, width_min = "300px") 54 | } 55 | 56 | edges_sub <- clean_df(edges(g)) |> 57 | slice_head(n = show) 58 | 59 | edges_kbl <- edges_sub |> 60 | kable("html", escape = FALSE) |> 61 | kable_styling(fixed_thead = TRUE, 62 | bootstrap_options = c("striped", "hover", "condensed")) 63 | 64 | nodes_total <- nrow(nodes(g)) 65 | edges_total <- nrow(edges(g)) 66 | nodes_showing <- nrow(nodes_sub) 67 | edges_showing <- nrow(edges_sub) 68 | 69 | knitr::asis_output(knitr::knit_child(text = c( 70 | '', 71 | paste0("
Graph with ", nodes_total, " nodes and ", edges_total, " edges. Expand sections below for details.
"), 72 | '
Node Data', 73 | paste0("

Showing ", nodes_showing, " of ", nodes_total, " nodes:

"), 74 | '
', 75 | '```{r eval=TRUE, echo=FALSE}', 76 | 'nodes_kbl', 77 | '```', 78 | '
', 79 | '
', 80 | '
Edge Data', 81 | paste0("

Showing ", edges_showing, " of ", edges_total, " edges:

"), 82 | '
', 83 | '```{r eval=TRUE, echo=FALSE}', 84 | 'edges_kbl', 85 | '```', 86 | '
', 87 | '
', 88 | '
' 89 | ), envir = environment(), quiet = TRUE)) 90 | } 91 | -------------------------------------------------------------------------------- /R/tbl_kgx.R: -------------------------------------------------------------------------------- 1 | #' Create a KGX graph object 2 | #' 3 | #' This function creates a new tbl_kgx object which inherits from tidygraph::tbl_graph, from node and edge dataframes, ensuring they conform to the KGX specification 4 | #' described at https://github.com/biolink/kgx/blob/master/specification/kgx-format.md. Specifically, nodes must have an 'id' and 'category' column, 5 | #' and edges, if provided, must have 'subject', 'predicate', and 'object' columns. The function allows graphs with no edges. 6 | #' The function sets 'from' and 'to' columns in the edges from 'subject' and 'object' respectively, and sets the node key to 'id'. 7 | #' Additional columns are allowed. 8 | #' 9 | #' This function will generally be called internally. 10 | #' 11 | #' @param nodes A data frame containing the nodes of the graph. Must have 'id' and 'category' columns. 12 | #' @param edges A data frame containing the edges of the graph. Must have 'subject', 'predicate', and 'object' columns. Can be NULL. 13 | #' @param attach_engine An engine to attach to the newly created graph for use in future queries based on the graph. 14 | #' @param ... Additional arguments passed to the function. 15 | #' @return A KGX graph object. 16 | #' @examples 17 | #' nodes <- data.frame(id = c("A", "B"), category = c("gene", "disease")) 18 | #' edges <- data.frame(subject = c("A"), predicate = c("associated_with"), object = c("B")) 19 | #' g <- tbl_kgx(nodes, edges) 20 | #' @export 21 | #' @importFrom tidygraph tbl_graph 22 | tbl_kgx <- function(nodes = NULL, edges = NULL, attach_engine = NULL, ...) { 23 | # nodes can be empty, if so we need to create an empty data frame 24 | if(nrow(nodes) == 0) { 25 | nodes <- data.frame(id = character(0)) 26 | nodes$category <- list() 27 | edges <- data.frame(subject = character(0), predicate = character(0), object = character(0)) 28 | } 29 | 30 | if(is.null(nodes$id)) { stop("Error: tbl_kgx nodes must have an 'id' column.") } 31 | if(is.null(nodes$id)) { stop("Error: tbl_kgx nodes must have an 'category' column.") } 32 | 33 | # we do allow graphs with no edges 34 | if(!is.null(edges)) { 35 | if(is.null(edges$subject)) { stop("Error: tbl_kgx edges must have an 'subject' column.") } 36 | if(is.null(edges$predicate)) { stop("Error: tbl_kgx edges must have an 'predicate' column.") } 37 | if(is.null(edges$object)) { stop("Error: tbl_kgx edges must have an 'object' column.") } 38 | 39 | # set canonical to and from columns from subject and object if they don't already exist 40 | if(!"from" %in% colnames(edges)) { 41 | edges$from <- edges$subject 42 | } 43 | if(!"to" %in% colnames(edges)) { 44 | edges$to <- edges$object 45 | } 46 | } else { 47 | # but if given no edges, we spec out subject, predicate, object cols at least (and to and from) 48 | edges <- data.frame(subject = character(), predicate = character(), object = character(), 49 | to = character(), from = character()) 50 | } 51 | 52 | g <- tidygraph::tbl_graph(nodes = nodes, edges = edges, node_key = "id") 53 | 54 | attr(g, "last_engine") <- attach_engine 55 | class(g) <- c("tbl_kgx", class(g)) 56 | 57 | # if we have an engine, use its preferences for setting df column order 58 | # (order_cols uses the last attached engine) 59 | if(!is.null(attach_engine)) { 60 | g <- order_cols(g) 61 | } 62 | 63 | return(g) 64 | } 65 | -------------------------------------------------------------------------------- /R/summary.neo4j_engine.R: -------------------------------------------------------------------------------- 1 | #' Summarize contents of a Neo4j KG engine 2 | #' 3 | #' Given a Neo4j based KG engine, provides summary information in the form of 4 | #' node counts, category counts across nodes, relationship type counts, and available properties. 5 | #' General information about the graph is printed to the console, and a list of 6 | #' dataframes with this information is returned invisibly. Also returned 7 | #' are `cats`, `preds`, and `props` entries, containing lists of available 8 | #' categories/predicates/properties for convenient auto-completion in RStudio. 9 | #' 10 | #' @param object A `neo4j_engine` object 11 | #' @param ... Other parameters (not used) 12 | #' @param quiet Logical, whether to suppress printing of the summary 13 | #' @return A list of dataframes and named lists 14 | #' @export 15 | #' @examplesIf monarch_engine_check() 16 | #' # prints a readable summary and returns a list of dataframes 17 | #' stats <- monarch_engine() |> summary() 18 | #' print(stats) 19 | #' 20 | summary.neo4j_engine <- function(object, ..., quiet = FALSE) { 21 | if(!quiet) { 22 | cat("\n") 23 | cat("A Neo4j-backed knowledge graph engine.\n") 24 | cat("Gathering statistics, please wait...\n") 25 | } 26 | 27 | node_summary_df <- cypher_query_df(object, "MATCH (n) UNWIND labels(n) AS category WITH category, COUNT(n) AS count RETURN category, count ORDER BY count DESC") 28 | edge_summary_df <- cypher_query_df(object, "MATCH ()-[r]->() RETURN type(r) AS predicate, COUNT(*) AS count ORDER BY count DESC") 29 | 30 | counts_query <- " 31 | // Count the total number of nodes 32 | MATCH (n) 33 | RETURN 'nodes_total' AS Type, COUNT(n) AS Count 34 | UNION 35 | // Count the total number of edges 36 | MATCH ()-[r]->() 37 | RETURN 'edges_total' AS Type, COUNT(r) AS Count 38 | " 39 | 40 | total_df <- cypher_query_df(object, counts_query) 41 | total_nodes <- total_df$Count[1] 42 | total_edges <- total_df$Count[2] 43 | 44 | properties <- cypher_query_df(object, "CALL db.propertyKeys()")$propertyKey 45 | 46 | 47 | if(!quiet) { 48 | cat("Total nodes: ", total_nodes, "\n") 49 | cat("Total edges: ", total_edges, "\n") 50 | cat("\n") 51 | cat("Node category counts:\n") 52 | # print the data frame without row names 53 | print(node_summary_df, row.names = FALSE) 54 | cat("\n") 55 | cat("Edge type counts:\n") 56 | # print the data frame without row names 57 | print(edge_summary_df, row.names = FALSE) 58 | cat("\n") 59 | cat("Available node and edge properties:\n") 60 | print(properties) 61 | cat("\n\n") 62 | cat("For more information about Biolink node (Class) and edge (Association) properties, see https://biolink.github.io/biolink-model/.") 63 | } 64 | 65 | cats <- as.list(node_summary_df$category) 66 | names(cats) <- cats 67 | 68 | preds <- as.list(edge_summary_df$predicate) 69 | names(preds) <- preds 70 | 71 | props <- as.list(properties) 72 | names(props) <- props 73 | 74 | return(invisible(list(node_summary = node_summary_df, 75 | edge_summary = edge_summary_df, 76 | total_nodes = total_nodes, 77 | total_edges = total_edges, 78 | cats = cats, 79 | preds = preds, 80 | props = props))) 81 | } 82 | --------------------------------------------------------------------------------