├── pkgdown
    ├── extra.css
    └── _pkgdown.yml
├── LICENSE
├── data
    └── eds_marfan_kg.rda
├── inst
    ├── extdata
    │   ├── eds_marfan_kg.tar.gz
    │   └── README.md
    ├── association_type_mappings2.yaml
    └── kg_prefs.yaml
├── vignettes
    ├── assets
    │   ├── cftr_cf_edge.png
    │   └── monarchr_figures.key
    └── examples
    │   ├── visualizing_kgs_assets
    │       └── cytoscape.png
    │   └── exploring_kgs_assets
    │       └── autocomplete.png
├── .gitignore
├── R
    ├── 99_onload.R
    ├── regex_tilde.R
    ├── get_engine.R
    ├── flatten_body_for_httr.R
    ├── ancestors.R
    ├── descendants.R
    ├── get_engine.tbl_kgx.R
    ├── ancestors.tbl_kgx.R
    ├── descendants.tbl_kgx.R
    ├── set_engine.R
    ├── fetch_nodes.file_engine.R
    ├── set_engine.tbl_kgx.R
    ├── expand.tbl_kgx.R
    ├── graph_sparsity.R
    ├── in_list.R
    ├── cypher_query.R
    ├── order_cols.R
    ├── example_graph.R
    ├── normalize_categories.R
    ├── cypher_query_df.R
    ├── load_kgx.R
    ├── cytoscape.R
    ├── graph_centrality.R
    ├── data_example_kgs.R
    ├── cypher_query_df.neo4j_engine.R
    ├── layout_umap.R
    ├── file_engine_check.R
    ├── base_engine.R
    ├── summarize_neighborhood.R
    ├── graph_semsim.R
    ├── neo4j_engine_check.R
    ├── kg_join.R
    ├── monarch_engine_check.R
    ├── transitive_reduction.R
    ├── expand.R
    ├── kg_edge_weights.R
    ├── example_graph.neo4j_engine.R
    ├── monarch_search.R
    ├── expand_n.R
    ├── save_kgx.R
    ├── fetch_nodes.R
    ├── monarch_engine.R
    ├── transitive_closure.R
    ├── utils.R
    ├── kg_join.tbl_kgx.R
    ├── transfer.R
    ├── knit_print.tbl_kgx.R
    ├── tbl_kgx.R
    └── summary.neo4j_engine.R
├── .lintr
├── meta
    ├── dev_commands.R
    ├── archive
    │   ├── dev.R
    │   └── association_type_mappings.yaml
    └── checklist.txt
├── .Rbuildignore
├── tests
    ├── testthat
    │   ├── test-monarch_engine_check.R
    │   ├── test-neo4j_engine_check.R
    │   ├── test-summary.neo4j_engine.R
    │   ├── test-get_engine.R
    │   ├── test-base_engine.R
    │   ├── test-summary.file_engine.R
    │   ├── test-save_load_kgx.R
    │   ├── test-file_engine_check.R
    │   ├── test-summarize_neighborhood.R
    │   ├── test-monarch_engine.R
    │   ├── test-monarch_search.R
    │   ├── test-example_graph.neo4j_engine.R
    │   ├── test-cypher_query.R
    │   ├── test-example_graph.file_engine.R
    │   ├── test-tbl_kgx.R
    │   ├── test-kg_join.R
    │   ├── test-file_engine.R
    │   ├── test-fetch_nodes.neo4j_engine.R
    │   ├── test-monarch_semsim.R
    │   ├── test-neo4j_engine.R
    │   └── test-fetch_nodes.file-engine.R
    └── testthat.R
├── man
    ├── order_cols.Rd
    ├── unname_cols.Rd
    ├── grapes-twiddle-grapes.Rd
    ├── base_engine.Rd
    ├── knit_print.tbl_kgx.Rd
    ├── neo2r_to_kgx.Rd
    ├── edges.Rd
    ├── nodes.Rd
    ├── explode.Rd
    ├── get_engine.Rd
    ├── ancestors.Rd
    ├── descendants.Rd
    ├── get_engine.tbl_kgx.Rd
    ├── ancestors.tbl_kgx.Rd
    ├── descendants.tbl_kgx.Rd
    ├── monarch_engine_check.Rd
    ├── graph_sparsity.Rd
    ├── roll.Rd
    ├── normalize_categories.Rd
    ├── example_graph.file_engine.Rd
    ├── grapes-in_list-grapes.Rd
    ├── example_graph.neo4j_engine.Rd
    ├── graph_centrality.Rd
    ├── monarch_search.Rd
    ├── neo4j_engine_check.Rd
    ├── kg_edge_weights.Rd
    ├── layout_umap.Rd
    ├── cypher_query.Rd
    ├── example_graph.Rd
    ├── transitive_closure.Rd
    ├── summary.neo4j_engine.Rd
    ├── load_kgx.Rd
    ├── file_engine_check.Rd
    ├── summary.file_engine.Rd
    ├── cypher_query_df.Rd
    ├── save_kgx.Rd
    ├── cytoscape.Rd
    ├── graph_semsim.Rd
    ├── set_engine.Rd
    ├── cytoscape.tbl_kgx.Rd
    ├── transitive_reduction.Rd
    ├── tbl_kgx.Rd
    ├── eds_marfan_kg.Rd
    ├── transfer.Rd
    ├── file_engine.Rd
    ├── kg_join.Rd
    ├── summarize_neighborhood.Rd
    ├── expand_n.Rd
    ├── plot.tbl_kgx.Rd
    ├── expand.Rd
    ├── rolling.Rd
    ├── monarch_semsim.Rd
    ├── monarch_engine.Rd
    ├── monarch_edge_weight_encodings.Rd
    ├── neo4j_engine.Rd
    └── fetch_nodes.Rd
├── monarchr.Rproj
├── .vscode
    └── settings.json
├── LICENSE.md
├── .github
    └── workflows
    │   └── rworkflows.yml
├── DESCRIPTION
├── README.Rmd
└── README.md


/pkgdown/extra.css:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: monarchr authors
3 | 


--------------------------------------------------------------------------------
/data/eds_marfan_kg.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/data/eds_marfan_kg.rda


--------------------------------------------------------------------------------
/inst/extdata/eds_marfan_kg.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/inst/extdata/eds_marfan_kg.tar.gz


--------------------------------------------------------------------------------
/vignettes/assets/cftr_cf_edge.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/assets/cftr_cf_edge.png


--------------------------------------------------------------------------------
/vignettes/assets/monarchr_figures.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/assets/monarchr_figures.key


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ..Rcheck
 2 | .env
 3 | .Rproj.user
 4 | .Rhistory
 5 | .Rdata
 6 | .httr-oauth
 7 | .DS_Store
 8 | /doc/
 9 | /docs/
10 | /Meta/
11 | 


--------------------------------------------------------------------------------
/vignettes/examples/visualizing_kgs_assets/cytoscape.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/examples/visualizing_kgs_assets/cytoscape.png


--------------------------------------------------------------------------------
/vignettes/examples/exploring_kgs_assets/autocomplete.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/monarch-initiative/monarchr/HEAD/vignettes/examples/exploring_kgs_assets/autocomplete.png


--------------------------------------------------------------------------------
/R/99_onload.R:
--------------------------------------------------------------------------------
1 | .onLoad <- function(libname, pkgname) {
2 | 	pref_path <- system.file("kg_prefs.yaml", package = pkgname)
3 | 	kg_prefs <- yaml::read_yaml(pref_path)
4 | 	options("default_prefs" = kg_prefs)
5 | 	invisible()
6 | }
7 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: linters_with_defaults(
2 |   line_length_linter = NULL,
3 |   indentation_linter = NULL,
4 |   whitespace_linter = NULL,
5 |   cyclocomp_linter = NULL,
6 |   object_name_linter = NULL,
7 |   object_usage_linter = NULL)
8 | encoding: "UTF-8"
9 | 


--------------------------------------------------------------------------------
/pkgdown/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | # template:
 2 | #   bootstrap: 5
 3 | #   # bslib:
 4 | #   #   font-size-root: 12px
 5 | 
 6 | articles:
 7 | - title: Examples
 8 |   navbar: Examples
 9 |   contents:
10 |   - examples/visualizing_kgs
11 |   - examples/exploring_kgs
12 |   - examples/transitive_rollups
13 |   - examples/engine_preferences
14 | 


--------------------------------------------------------------------------------
/meta/dev_commands.R:
--------------------------------------------------------------------------------
 1 | devtools::load_all()
 2 | 
 3 | devtools::document()
 4 | 
 5 | devtools::clean_vignettes()
 6 | devtools::build_vignettes()
 7 | devtools::build_site()
 8 | pkgdown::build_articles()
 9 | 
10 | devtools::test()
11 | 
12 | devtools::check()
13 | 
14 | devtools::build()
15 | 
16 | devtools::install()
17 | # hmm
18 | 
19 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | vignettes/visualisation\.Rmd
 2 | visualisation\.Rmd
 3 | 
 4 | ^.*\.Rproj$
 5 | ^\.Rproj\.user$
 6 | ^LICENSE\.md$
 7 | ^README\.Rmd$
 8 | ^meta$
 9 | ^docs$
10 | ^pkgdown$
11 | 
12 | ^\.lintr$
13 | ^\.github$
14 | ^\.vscode$
15 | ^\.env$
16 | 
17 | ## For GitHub Actions Windows runner
18 | node_modules$
19 | package-lock\.json$
20 | package\.json$
21 | ^doc$
22 | ^Meta$
23 | 


--------------------------------------------------------------------------------
/tests/testthat/test-monarch_engine_check.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("monarch_engine_check works as expected", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     # we can't test for TRUE here, because this check my be run without a connection
 9 |     # so we'll just test that it returns logical
10 |     expect_type(monarch_engine_check(warn = FALSE), "logical")
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/tests.html
 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
 8 | 
 9 | library(testthat)
10 | library(monarchr)
11 | 
12 | test_check("monarchr")
13 | 


--------------------------------------------------------------------------------
/man/order_cols.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/order_cols.R
 3 | \name{order_cols}
 4 | \alias{order_cols}
 5 | \title{Set edge/row data column order according to most recent engine preferences}
 6 | \usage{
 7 | order_cols(g)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx} graph.}
11 | }
12 | \description{
13 | Set edge/row data column order according to most recent engine preferences
14 | }
15 | 


--------------------------------------------------------------------------------
/monarchr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: No
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/man/unname_cols.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cypher_query.neo4j_engine.R
 3 | \name{unname_cols}
 4 | \alias{unname_cols}
 5 | \title{Remove names from columns}
 6 | \usage{
 7 | unname_cols(df)
 8 | }
 9 | \arguments{
10 | \item{df}{Input data frame}
11 | }
12 | \value{
13 | The input, with unnamed columns
14 | }
15 | \description{
16 | Given a data-frame like object, runs each column through unname()
17 | }
18 | 


--------------------------------------------------------------------------------
/R/regex_tilde.R:
--------------------------------------------------------------------------------
 1 | #' Infix regular expression match.
 2 | #'
 3 | #' An infix alias for stringr::str_detect().
 4 | #'
 5 | #' @examples
 6 | #' rownames(mtcars) %~% "^Merc"
 7 | #'
 8 | #' @param string A character vector to look for matches in.
 9 | #' @param pattern A regular expression to look for.
10 | #' @return A logical vector indicating pattern matches.
11 | #' @export
12 | #' @importFrom stringr str_detect
13 | `%~%` <- function(string, pattern) {
14 | 	str_detect(string, pattern)
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/testthat/test-neo4j_engine_check.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("neo4j_engine_check works as expected", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     # we can't test for TRUE here, because this check my be run without a connection
 9 |     # so we'll just test that it returns logical
10 |     expect_type(neo4j_engine_check("https://neo4j.monarchinitiative.org", warn = FALSE), "logical")
11 |     # we can test for FALSE however
12 |     expect_false(neo4j_engine_check("https://no-such-db.monarchinitiative.org", warn = FALSE))
13 | })
14 | 


--------------------------------------------------------------------------------
/man/grapes-twiddle-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/regex_tilde.R
 3 | \name{\%~\%}
 4 | \alias{\%~\%}
 5 | \title{Infix regular expression match.}
 6 | \usage{
 7 | string \%~\% pattern
 8 | }
 9 | \arguments{
10 | \item{string}{A character vector to look for matches in.}
11 | 
12 | \item{pattern}{A regular expression to look for.}
13 | }
14 | \value{
15 | A logical vector indicating pattern matches.
16 | }
17 | \description{
18 | An infix alias for stringr::str_detect().
19 | }
20 | \examples{
21 | rownames(mtcars) \%~\% "^Merc"
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/base_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/base_engine.R
 3 | \name{base_engine}
 4 | \alias{base_engine}
 5 | \title{base_engine}
 6 | \usage{
 7 | base_engine(name = "default_engine", preferences = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{name}{A character string indicating the name of the engine.}
11 | 
12 | \item{preferences}{A named list of preferences for the engine.}
13 | 
14 | \item{...}{Other parameters (unused)}
15 | }
16 | \value{
17 | An object of class \code{base_engine}
18 | }
19 | \description{
20 | A base class for all engines
21 | }
22 | 


--------------------------------------------------------------------------------
/man/knit_print.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/knit_print.tbl_kgx.R
 3 | \name{knit_print.tbl_kgx}
 4 | \alias{knit_print.tbl_kgx}
 5 | \title{Specialized print function for KGX graphs in knitted documents}
 6 | \usage{
 7 | \method{knit_print}{tbl_kgx}(x, ..., show = 100)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{tbl_kgx} graph to display.}
11 | 
12 | \item{...}{Other arguments (unused).}
13 | 
14 | \item{show}{The maximum number of nodes and edges to display.}
15 | }
16 | \description{
17 | Specialized print function for KGX graphs in knitted documents
18 | }
19 | 


--------------------------------------------------------------------------------
/tests/testthat/test-summary.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("summary() for neo4j_engine", {
 5 |     res <- summary(monarch_engine(), quiet = TRUE)
 6 | 
 7 |     # make sure the output is a list
 8 |     expect_type(res, "list")
 9 | 
10 |     # let's try a version where we capture the printed output
11 |     printed <- capture.output(summary(monarch_engine(), quiet = FALSE))
12 | 
13 |     # the result should be a character vector
14 |     expect_type(printed, "character")
15 |     # one of the lines should be "Total nodes: "
16 |     expect_true(any(grepl("Total nodes: ", printed)))
17 | })
18 | 


--------------------------------------------------------------------------------
/man/neo2r_to_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cypher_query.neo4j_engine.R
 3 | \name{neo2r_to_kgx}
 4 | \alias{neo2r_to_kgx}
 5 | \title{Process neo2R cypher to tbl_kgx}
 6 | \usage{
 7 | neo2r_to_kgx(res, engine)
 8 | }
 9 | \arguments{
10 | \item{res}{The result from neo2R::cypher with result = "graph"}
11 | 
12 | \item{engine}{The engine to attach to the returned graph}
13 | }
14 | \value{
15 | A tbl_kgx
16 | }
17 | \description{
18 | Given a result from neo2R::cypher returning KGX-formatted nodes and edges,
19 | parse the result to generate a tbl_kgx object, attaching the provided engine.
20 | }
21 | 


--------------------------------------------------------------------------------
/R/get_engine.R:
--------------------------------------------------------------------------------
 1 | #' Get most recent engine from a graph.
 2 | #'
 3 | #' Given a tbl_kgx graph, retrieve the last-used engine.
 4 | #'
 5 | #' @param g A tbl_kgx graph.
 6 | #' @param fail_if_missing If TRUE, fail if there is no engine associated with the graph.
 7 | #' @return A graph engine object.
 8 | #' @examples
 9 | #' # Using example KGX file packaged with monarchr
10 | #' data(eds_marfan_kg)
11 | #'
12 | #' g <- eds_marfan_kg |>
13 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
14 | #'
15 | #' print(get_engine(g))
16 | #'
17 | #' @export
18 | get_engine <- function(g, fail_if_missing = TRUE) {
19 |     UseMethod("get_engine")
20 | }
21 | 


--------------------------------------------------------------------------------
/man/edges.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{edges}
 4 | \alias{edges}
 5 | \title{Get graph edges table.}
 6 | \usage{
 7 | edges(graph, ...)
 8 | }
 9 | \arguments{
10 | \item{graph}{Input graph}
11 | 
12 | \item{...}{Other options (unused)}
13 | }
14 | \value{
15 | A tibble with the edges of the graph
16 | }
17 | \description{
18 | Get graph edges table.
19 | }
20 | \examples{
21 | # (using the example KGX file packaged with monarchr)
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |>
25 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
26 | 
27 | print(edges(g))
28 | }
29 | 


--------------------------------------------------------------------------------
/man/nodes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{nodes}
 4 | \alias{nodes}
 5 | \title{Get graph nodes table.}
 6 | \usage{
 7 | nodes(graph, ...)
 8 | }
 9 | \arguments{
10 | \item{graph}{A graph object}
11 | 
12 | \item{...}{Other options (unused)}
13 | }
14 | \value{
15 | A tibble with the nodes of the graph
16 | }
17 | \description{
18 | Get graph nodes table.
19 | }
20 | \examples{
21 | # (using the example KGX file packaged with monarchr)
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |>
25 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
26 | 
27 | print(nodes(g))
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("get_engine works", {
 5 |     g <- monarch_search("fanconi anemia", limit = 5)
 6 |     engine <- get_engine(g)
 7 |     expect_s3_class(engine, "neo4j_engine")
 8 | 
 9 | 
10 |     filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
11 |     e <- file_engine(filename)
12 |     query_ids = c("MONDO:0007525", "MONDO:0007524")
13 | 
14 |     g <- fetch_nodes(e, query_ids = query_ids) %>% expand(predicate = "biolink:subclass_of", transitive = TRUE, direction = "out")
15 |     engine <- get_engine(g)
16 |     expect_s3_class(engine, "file_engine")
17 | })
18 | 


--------------------------------------------------------------------------------
/meta/archive/dev.R:
--------------------------------------------------------------------------------
 1 | # # bolt://24.144.94.219:7687
 2 | 
 3 | # # Load libraries
 4 | # library(tidyverse)
 5 | # library(neo4r)
 6 | 
 7 | # library(neo4r)
 8 | # con <- neo4j_api$new(
 9 | #   url = "http://24.144.94.219:7474",
10 | #   user = "",
11 | #   password = ""
12 | # )
13 | 
14 | # # Test the endpoint, that will not work :
15 | # con$ping()
16 | 
17 | # x <- con$get_labels()
18 | # x
19 | 
20 | # # get node with id: MONDO:0011476 and it's neighbors
21 | # z <- "MATCH (n)-[r]-(m) WHERE n.id = 'MONDO:0011476' RETURN n" %>%
22 | #   call_neo4j(con)
23 | 
24 | # #########################
25 | 
26 | # library(neo2R)
27 | # graph <- startGraph("bolt://24.144.94.219:7687")
28 | 
29 | 


--------------------------------------------------------------------------------
/man/explode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{explode}
 4 | \alias{explode}
 5 | \title{Explode a graph into a list of single-node graphs}
 6 | \usage{
 7 | explode(graph, ...)
 8 | }
 9 | \arguments{
10 | \item{graph}{A tbl_kgx graph.}
11 | 
12 | \item{...}{Other options (unused)}
13 | }
14 | \value{
15 | A list of tbl_kgx graphs.
16 | }
17 | \description{
18 | Explode a graph into a list of single-node graphs
19 | }
20 | \examples{
21 | # (using the example KGX file packaged with monarchr)
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |>
25 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
26 | 
27 | print(explode(g))
28 | }
29 | 


--------------------------------------------------------------------------------
/inst/extdata/README.md:
--------------------------------------------------------------------------------
 1 | The `eds_marfan_kg.tar.gz` file in this directory was created with the following on Aug 15, 2024,
 2 | to represent Ehlers-Danlos syndrome (MONDO:0020066), Marfan syndrome (MONDO:0007947), all their subtypes,
 3 | all entities connected to those diseases or their subtypes, and finally all parents (supertypes) of all
 4 | those diseases and entities.
 5 | 
 6 | ```
 7 | monarch_engine() |> 
 8 |   fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |> 
 9 |   expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |> 
10 |   expand() |> 
11 |   expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE) |> 
12 |   save_kgx("eds_marfan_kg.tar.gz")
13 | ```
14 | 


--------------------------------------------------------------------------------
/R/flatten_body_for_httr.R:
--------------------------------------------------------------------------------
 1 | # for array params in httr queries
 2 | # from https://stackoverflow.com/a/72532186
 3 | flatten_body_for_httr <- function(x) {
 4 | 	# A form/query can only have one value per name, so take
 5 | 	# any values that contain vectors length >1 and
 6 | 	# split them up
 7 | 	# list(x=1:2, y="a") becomes list(x=1, x=2, y="a")
 8 | 	if (all(lengths(x) <= 1)) return(x);
 9 | 	do.call("c", mapply(function(name, val) {
10 | 		if (length(val)==1 || any(c("form_file", "form_data") %in% class(val))) {
11 | 			x <- list(val)
12 | 			names(x) <- name
13 | 			x
14 | 		} else {
15 | 			x <- as.list(val)
16 | 			names(x) <- rep(name, length(val))
17 | 			x
18 | 		}
19 | 	}, names(x), x, USE.NAMES = FALSE, SIMPLIFY = FALSE))
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test-base_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("base_engine works", {
 5 |     # first, let's make sure the default preferences is loaded
 6 |     e <- base_engine()
 7 |     expect_equal(e$name, "default_engine")
 8 |     # result should have a category_priority field
 9 |     expect_true("category_priority" %in% names(e$preferences))
10 | 
11 |     # test that we can load preferences via a list
12 |     e <- base_engine(preferences = list(category_priority = c("biolink:Gene", "biolink:Disease")))
13 |     expect_equal(e$name, "default_engine")
14 |     # result should have a category_priority field of length 2
15 |     expect_equal(length(e$preferences$category_priority), 2)
16 | 
17 | })
18 | 


--------------------------------------------------------------------------------
/R/ancestors.R:
--------------------------------------------------------------------------------
 1 | #' Expand ancestors transitively
 2 | #'
 3 | #' Expand a tbl_kgx graph to include all ancestors of nodes defined transitively
 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around
 5 | #' expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
 6 | #'
 7 | #'
 8 | #' @param g A `tbl_kgx()` graph to expand.
 9 | #' @param ... Other parameters (unused).
10 | #'
11 | #' @return A tbl_kgx graph.
12 | #' @export
13 | #' @examples
14 | #' data(eds_marfan_kg)
15 | #' g <- eds_marfan_kg |>
16 | #' 	 fetch_nodes(query_ids = "MONDO:0020066") |>
17 | #' 	 ancestors()
18 | #'
19 | #' @import tidygraph
20 | #' @import dplyr
21 | ancestors <- function(g, ...) {
22 | 	UseMethod("ancestors")
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat/test-summary.file_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("summary() for file_engine", {
 5 |     filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
 6 |     engine <- file_engine(filename)
 7 | 
 8 |     res <- summary(engine, quiet = TRUE)
 9 | 
10 |     # make sure the output is a list
11 |     expect_type(res, "list")
12 | 
13 |     # let's try a version where we capture the printed output
14 |     printed <- capture.output(summary(engine, quiet = FALSE))
15 | 
16 |     # the result should be a character vector
17 |     expect_type(printed, "character")
18 |     # one of the lines should be "Total nodes: "
19 |     expect_true(any(grepl("Total nodes: ", printed)))
20 | })
21 | 


--------------------------------------------------------------------------------
/R/descendants.R:
--------------------------------------------------------------------------------
 1 | #' Expand descendants transitively
 2 | #'
 3 | #' Expand a tbl_kgx graph to include all descendants of nodes defined transitively
 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around
 5 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE)
 6 | #'
 7 | #'
 8 | #' @param g A `tbl_kgx()` graph to expand.
 9 | #' @param ... Other parameters (unused).
10 | #'
11 | #' @return A tbl_kgx graph.
12 | #' @export
13 | #' @examples
14 | #' data(eds_marfan_kg)
15 | #' g <- eds_marfan_kg |>
16 | #' 	 fetch_nodes(query_ids = "MONDO:0020066") |>
17 | #' 	 descendants()
18 | #'
19 | #' @import tidygraph
20 | #' @import dplyr
21 | descendants <- function(g, ...) {
22 | 	UseMethod("descendants")
23 | }
24 | 


--------------------------------------------------------------------------------
/man/get_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_engine.R
 3 | \name{get_engine}
 4 | \alias{get_engine}
 5 | \title{Get most recent engine from a graph.}
 6 | \usage{
 7 | get_engine(g, fail_if_missing = TRUE)
 8 | }
 9 | \arguments{
10 | \item{g}{A tbl_kgx graph.}
11 | 
12 | \item{fail_if_missing}{If TRUE, fail if there is no engine associated with the graph.}
13 | }
14 | \value{
15 | A graph engine object.
16 | }
17 | \description{
18 | Given a tbl_kgx graph, retrieve the last-used engine.
19 | }
20 | \examples{
21 | # Using example KGX file packaged with monarchr
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |>
25 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
26 | 
27 | print(get_engine(g))
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/man/ancestors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ancestors.R
 3 | \name{ancestors}
 4 | \alias{ancestors}
 5 | \title{Expand ancestors transitively}
 6 | \usage{
 7 | ancestors(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to expand.}
11 | 
12 | \item{...}{Other parameters (unused).}
13 | }
14 | \value{
15 | A tbl_kgx graph.
16 | }
17 | \description{
18 | Expand a tbl_kgx graph to include all ancestors of nodes defined transitively
19 | by "biolink:subclass_of" relationships. This is a simple wrapper around
20 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
21 | }
22 | \examples{
23 | data(eds_marfan_kg)
24 | g <- eds_marfan_kg |>
25 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
26 | 	 ancestors()
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-save_load_kgx.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("save_kgx and load_kgx work as expected", {
 5 | 	filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
 6 | 	g <- file_engine(filename) |>
 7 | 	  fetch_nodes(limit = 5) |>
 8 | 	  expand()
 9 | 
10 | 	save_kgx(g, "test.tar.gz")
11 | 
12 | 	g2 <- load_kgx("test.tar.gz")
13 | 
14 | 	expect_equal(nrow(nodes(g)), nrow(nodes(g2)))
15 | 	expect_equal(nrow(edges(g)), nrow(edges(g2)))
16 | 
17 | 	expect_true(all(nodes(g)$id %in% nodes(g2)$id))
18 | 	expect_true(all(nodes(g2)$id %in% nodes(g)$id))
19 | 
20 | 	expect_true("pcategory" %in% colnames(nodes(g2)))
21 | 	expect_true("category" %in% colnames(nodes(g2)))
22 | 	expect_true(is.list(nodes(g2)$category))
23 | 
24 | 	file.remove("test.tar.gz")
25 | })
26 | 


--------------------------------------------------------------------------------
/R/get_engine.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' Get most recent engine from a graph.
 2 | #'
 3 | #' Given a tbl_kgx graph, retrieve the last-used engine.
 4 | #'
 5 | #' @param g A tbl_kgx graph.
 6 | #' @param fail_if_missing If TRUE, fail if there is no engine associated with the graph.
 7 | #' @return A graph engine object.
 8 | #' @examples
 9 | #' # Using example KGX file packaged with monarchr
10 | #' data(eds_marfan_kg)
11 | #'
12 | #' g <- eds_marfan_kg |>
13 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
14 | #'
15 | #' print(get_engine(g))
16 | #'
17 | #' @export
18 | get_engine.tbl_kgx <- function(g, fail_if_missing = TRUE) {
19 | 	engine <- attr(g, "last_engine")
20 | 	if (is.null(engine) && fail_if_missing) {
21 | 		stop("No engine associated with this graph. Unable to proceed.")
22 | 	}
23 | 	return(engine)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/descendants.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/descendants.R
 3 | \name{descendants}
 4 | \alias{descendants}
 5 | \title{Expand descendants transitively}
 6 | \usage{
 7 | descendants(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to expand.}
11 | 
12 | \item{...}{Other parameters (unused).}
13 | }
14 | \value{
15 | A tbl_kgx graph.
16 | }
17 | \description{
18 | Expand a tbl_kgx graph to include all descendants of nodes defined transitively
19 | by "biolink:subclass_of" relationships. This is a simple wrapper around
20 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE)
21 | }
22 | \examples{
23 | data(eds_marfan_kg)
24 | g <- eds_marfan_kg |>
25 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
26 | 	 descendants()
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "r.linting.assignmentType": false,
 3 |     "r.linting.closeCurlySeparateLine": false,
 4 |     "r.linting.doubleQuotes": false,
 5 |     "r.linting.multipleDots": false,
 6 |     "r.linting.multipleStatements": false,
 7 |     "r.linting.noSpaceAfterFunctionName": false,
 8 |     "r.linting.noTabs": false,
 9 |     "r.linting.openCurlyPosition": false,
10 |     "r.linting.pascalCase": false,
11 |     "r.linting.semicolons": false,
12 |     "r.linting.spaceBeforeOpenBrace": false,
13 |     "r.linting.spacesAroundComma": false,
14 |     "r.linting.spacesAroundOperators": false,
15 |     "r.linting.spacesInsideParenthesis": false,
16 |     "r.linting.trailingBlankLines": false,
17 |     "r.linting.trailingWhitespace": false,
18 |     "r.linting.trueFalseNames": false,
19 |     "r.linting.upperCase": false
20 | }


--------------------------------------------------------------------------------
/man/get_engine.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_engine.tbl_kgx.R
 3 | \name{get_engine.tbl_kgx}
 4 | \alias{get_engine.tbl_kgx}
 5 | \title{Get most recent engine from a graph.}
 6 | \usage{
 7 | \method{get_engine}{tbl_kgx}(g, fail_if_missing = TRUE)
 8 | }
 9 | \arguments{
10 | \item{g}{A tbl_kgx graph.}
11 | 
12 | \item{fail_if_missing}{If TRUE, fail if there is no engine associated with the graph.}
13 | }
14 | \value{
15 | A graph engine object.
16 | }
17 | \description{
18 | Given a tbl_kgx graph, retrieve the last-used engine.
19 | }
20 | \examples{
21 | # Using example KGX file packaged with monarchr
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |>
25 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
26 | 
27 | print(get_engine(g))
28 | 
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-file_engine_check.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("file_engine_check works as expected", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 | 		filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
 9 | 		expect_true(file_engine_check(filename, warn = FALSE))
10 | 
11 |     filename <- system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr")
12 |     expect_false(file_engine_check(filename, warn = FALSE))
13 | 
14 |     # we can try to connect to a remote file, but we can't test for TRUE here
15 |     # because this check my be run without a connection
16 |     # so we'll just test that it returns logical
17 |     expect_type(file_engine_check("https://no-such-host.kghub.io/kg-obo/dummy/dummy_kgx_tsv.tar.gz", warn = FALSE), "logical")
18 | })
19 | 


--------------------------------------------------------------------------------
/R/ancestors.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' Expand ancestors transitively
 2 | #'
 3 | #' Expand a tbl_kgx graph to include all ancestors of nodes defined transitively
 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around
 5 | #' expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
 6 | #'
 7 | #'
 8 | #' @param g A `tbl_kgx()` graph to expand.
 9 | #' @param ... Other parameters (unused).
10 | #'
11 | #' @return A tbl_kgx graph.
12 | #' @export
13 | #' @examples
14 | #' data(eds_marfan_kg)
15 | #' g <- eds_marfan_kg |>
16 | #' 	 fetch_nodes(query_ids = "MONDO:0020066") |>
17 | #' 	 ancestors()
18 | #'
19 | #' @import tidygraph
20 | #' @import dplyr
21 | ancestors.tbl_kgx <- function(g, ...) {
22 | 	return(g |> expand(predicates = "biolink:subclass_of",
23 | 										 direction = "out",
24 | 										 transitive = TRUE))
25 | }
26 | 


--------------------------------------------------------------------------------
/man/ancestors.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ancestors.tbl_kgx.R
 3 | \name{ancestors.tbl_kgx}
 4 | \alias{ancestors.tbl_kgx}
 5 | \title{Expand ancestors transitively}
 6 | \usage{
 7 | \method{ancestors}{tbl_kgx}(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to expand.}
11 | 
12 | \item{...}{Other parameters (unused).}
13 | }
14 | \value{
15 | A tbl_kgx graph.
16 | }
17 | \description{
18 | Expand a tbl_kgx graph to include all ancestors of nodes defined transitively
19 | by "biolink:subclass_of" relationships. This is a simple wrapper around
20 | expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
21 | }
22 | \examples{
23 | data(eds_marfan_kg)
24 | g <- eds_marfan_kg |>
25 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
26 | 	 ancestors()
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/R/descendants.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' Expand descendants transitively
 2 | #'
 3 | #' Expand a tbl_kgx graph to include all descendants of nodes defined transitively
 4 | #' by "biolink:subclass_of" relationships. This is a simple wrapper around
 5 | #' expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE)
 6 | #'
 7 | #'
 8 | #' @param g A `tbl_kgx()` graph to expand.
 9 | #' @param ... Other parameters (unused).
10 | #'
11 | #' @return A tbl_kgx graph.
12 | #' @export
13 | #' @examples
14 | #' data(eds_marfan_kg)
15 | #' g <- eds_marfan_kg |>
16 | #' 	 fetch_nodes(query_ids = "MONDO:0020066") |>
17 | #' 	 descendants()
18 | #'
19 | #' @import tidygraph
20 | #' @import dplyr
21 | descendants.tbl_kgx <- function(g, ...) {
22 | 	return(g |> expand(predicates = "biolink:subclass_of",
23 | 										 direction = "in",
24 | 										 transitive = TRUE))
25 | }
26 | 


--------------------------------------------------------------------------------
/R/set_engine.R:
--------------------------------------------------------------------------------
 1 | #' Set the engine for a graph. See warning in details.
 2 | #'
 3 | #' Sets a given graph's engine to a given engine object and returns the graph.
 4 | #' WARNING: changing the backing engine for a graph dynamically is not
 5 | #' yet fully supported or tested.
 6 | #'
 7 | #' @param g A tbl_kgx graph.
 8 | #' @param engine An engine object.
 9 | #' @return A tbl_kgx graph.
10 | #' @examples
11 | #' # Using example KGX file packaged with monarchr
12 | #' data(eds_marfan_kg)
13 | #'
14 | #' g <- eds_marfan_kg |>
15 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
16 | #'
17 | #' other_engine <- eds_marfan_kg # this could be a different file-engine (see `file_engine()`)
18 | #' g <- set_engine(g, other_engine)
19 | #'
20 | #' print(get_engine(g))
21 | #'
22 | #' @export
23 | set_engine <- function(g, engine) {
24 | 	UseMethod("set_engine")
25 | }
26 | 


--------------------------------------------------------------------------------
/man/descendants.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/descendants.tbl_kgx.R
 3 | \name{descendants.tbl_kgx}
 4 | \alias{descendants.tbl_kgx}
 5 | \title{Expand descendants transitively}
 6 | \usage{
 7 | \method{descendants}{tbl_kgx}(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to expand.}
11 | 
12 | \item{...}{Other parameters (unused).}
13 | }
14 | \value{
15 | A tbl_kgx graph.
16 | }
17 | \description{
18 | Expand a tbl_kgx graph to include all descendants of nodes defined transitively
19 | by "biolink:subclass_of" relationships. This is a simple wrapper around
20 | expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE)
21 | }
22 | \examples{
23 | data(eds_marfan_kg)
24 | g <- eds_marfan_kg |>
25 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
26 | 	 descendants()
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/meta/checklist.txt:
--------------------------------------------------------------------------------
 1 | Basic functionality:
 2 | 
 3 | Engines:
 4 | - neo4j_engine   | dev [x] test [x]
 5 | - file_engine    | dev [x] test [x]
 6 | - monarch_engine | dev [x] test [x]
 7 | 
 8 | Engine Features:
 9 | - seach_kg
10 |   - neo4j_engine   | dev [x] test [x]
11 |   - file_engine    | dev [x] test [x]
12 |   - monarch_engine | dev [x] test [x]
13 | 
14 | - fetch_nodes
15 |   - neo4j_engine   | dev [x] test [x]
16 |   - monarch_engine | (inherit from neo4j_engine)
17 |   - file_engine    | dev [x] test [x]
18 | 
19 | Query Graph Features:
20 | - fetch_edges
21 |   - neo4j_engine   | dev [x] test [x]
22 |   - monarch_engine | (inherit from neo4j_engine)
23 |   - file_engine    | dev [ ] test [ ]
24 | 
25 | - summarize_neighborhood
26 |   - neo4j_engine   | dev [x] test [x]
27 |   - monarch_engine | (inherit from neo4j_engine)
28 |   - file_engine    | dev [ ] test [ ]
29 | 
30 | 


--------------------------------------------------------------------------------
/R/fetch_nodes.file_engine.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @import tidygraph
 3 | #' @import dplyr
 4 | fetch_nodes.file_engine <- function(engine, ..., query_ids = NULL, limit = NULL) {
 5 |     if(!is.null(query_ids)) {
 6 |         res <- engine$graph %>%
 7 |             activate(nodes) %>%
 8 |             filter(id %in% query_ids)
 9 |     } else {
10 |         res <- engine$graph %>%
11 |             activate(nodes) %>%
12 |             filter(...)
13 |     }
14 | 
15 |     # we want to drop all the edges to be compatible with the neo4j engine
16 |     res <- res %>%
17 |         activate(edges) %>%
18 |         filter(FALSE) %>%
19 |         activate(nodes) %>%
20 |     	  arrange(id)
21 | 
22 |     if(!is.null(limit)) {
23 |     	res <- res %>% activate(nodes) %>% slice_head(n = limit)
24 |     }
25 | 
26 |     attr(res, "last_engine") <- engine
27 | 
28 |     res <- order_cols(res)
29 | 
30 |     return(res)
31 | }
32 | 


--------------------------------------------------------------------------------
/R/set_engine.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' Set the engine for a graph. See warning in details.
 2 | #'
 3 | #' Sets a given graph's engine to a given engine object and returns the graph.
 4 | #' WARNING: changing the backing engine for a graph dynamically is not
 5 | #' yet fully supported or tested.
 6 | #'
 7 | #' @param g A tbl_kgx graph.
 8 | #' @param engine An engine object.
 9 | #' @return A tbl_kgx graph.
10 | #' @examples
11 | #' # Using example KGX file packaged with monarchr
12 | #' data(eds_marfan_kg)
13 | #'
14 | #' g <- eds_marfan_kg |>
15 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
16 | #'
17 | #' other_engine <- eds_marfan_kg # this could be a different file engine (see `file_engine()`)
18 | #' g <- set_engine(g, other_engine)
19 | #'
20 | #' print(get_engine(g))
21 | #'
22 | #' @export
23 | set_engine <- function(g, engine) {
24 | 	attr(g, "last_engine") <- engine
25 | 	return(g)
26 | }
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/man/monarch_engine_check.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monarch_engine_check.R
 3 | \name{monarch_engine_check}
 4 | \alias{monarch_engine_check}
 5 | \title{Check availability of Monarch Initiative API}
 6 | \usage{
 7 | monarch_engine_check(warn = TRUE, service = "graph")
 8 | }
 9 | \arguments{
10 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.}
11 | 
12 | \item{service}{The service to check: "search", "semsim", "graph", or a vector of these. Default is "graph".}
13 | }
14 | \value{
15 | TRUE if the available features are online, FALSE otherwise.
16 | }
17 | \description{
18 | Attempts to connect to the Monarch Initiative API and use the specified functionality. Returns FALSE if the API is not available the result is not as expected.
19 | }
20 | \examples{
21 | print(monarch_engine_check())
22 | 
23 | }
24 | 


--------------------------------------------------------------------------------
/man/graph_sparsity.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph_sparsity.R
 3 | \name{graph_sparsity}
 4 | \alias{graph_sparsity}
 5 | \title{Compute sparsity}
 6 | \usage{
 7 | graph_sparsity(x, fun = igraph::as_adjacency_matrix, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{An igraph object or a (sparse) matrix.}
11 | 
12 | \item{fun}{Function to convert graph to matrix.}
13 | 
14 | \item{...}{Arguments passed to \code{fun}.}
15 | }
16 | \value{
17 | A numeric value representing the proportion of zero
18 | values in the graph/matrix.
19 | }
20 | \description{
21 | Compute sparsity (proportion of zero values) in a matrix or graph.
22 | }
23 | \examples{
24 | ## Using example KGX file packaged with monarchr
25 | data(eds_marfan_kg)
26 | g <- eds_marfan_kg |>
27 |      fetch_nodes(query_ids = "MONDO:0007525") |>
28 |      expand(predicates = "biolink:has_phenotype",
29 |             categories = "biolink:PhenotypicFeature")
30 | 
31 | graph_sparsity(g)
32 | }
33 | 


--------------------------------------------------------------------------------
/R/expand.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @import tidygraph
 3 | #' @import dplyr
 4 | #' @importFrom assertthat assert_that
 5 | expand.tbl_kgx <- function(graph, ...) {
 6 |     # check to see if g has a last_engine attribute
 7 | 	  active_tbl <- active(graph)
 8 |     if(!is.null(attr(graph, "last_engine"))) {
 9 |         engine <- attr(graph, "last_engine")
10 |         if(any(c("monarch_engine", "neo4j_engine") %in% class(engine))) {
11 |         	res <- expand_neo4j_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
12 |         	return(res)
13 |         } else if("file_engine" %in% class(engine)) {
14 |         	res <- expand_file_engine(engine, graph, ...) |> activate(!!rlang::sym(active_tbl))
15 |         	return(res)
16 |         } else {
17 |         	stop("Error: unknown or incompatible engine.")
18 |         }
19 |         # return(expand(engine, graph, ...)) # this shouldn't be reachable
20 |     } else {
21 |         stop("Error: tbl_kgx object does not have a most recent engine.")
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/R/graph_sparsity.R:
--------------------------------------------------------------------------------
 1 | #' Compute sparsity
 2 | #'
 3 | #' Compute sparsity (proportion of zero values) in a matrix or graph.
 4 | #' @param x An igraph object or a (sparse) matrix.
 5 | #' @param fun Function to convert graph to matrix.
 6 | #' @param ... Arguments passed to \code{fun}.
 7 | #' @returns A numeric value representing the proportion of zero
 8 | #'	values in the graph/matrix.
 9 | #' @export
10 | #' @examples
11 | #' ## Using example KGX file packaged with monarchr
12 | #' data(eds_marfan_kg)
13 | #' g <- eds_marfan_kg |>
14 | #'      fetch_nodes(query_ids = "MONDO:0007525") |>
15 | #'      expand(predicates = "biolink:has_phenotype",
16 | #'             categories = "biolink:PhenotypicFeature")
17 | #'
18 | #' graph_sparsity(g)
19 | graph_sparsity <- function(x,
20 | 										 fun=igraph::as_adjacency_matrix,
21 | 										 ...){
22 | 	if(is(x,"igraph")){
23 | 		x <- fun(x)
24 | 	}
25 | 	if(is(x,"Matrix")||is(x,"sparseMatrix")){
26 | 		return(sum(x==0)/length(x))
27 | 	}
28 | 	else{
29 | 		stop("x must be an igraph object or a (sparse) matrix.")
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-summarize_neighborhood.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("summarize_neighborhood returns reasonable results", {
 5 |     # skip for now
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 | 		g <- monarch_engine() |> fetch_nodes(query_ids = "MONDO:0019391")
 9 | 
10 |     result <- summarize_neighborhood(g, summarize = "edges")
11 |     disease_subclass_of_disease <- result %>% filter(predicate == "biolink:subclass_of" &
12 |                                                      query_pcategory == "biolink:Disease" &
13 |                                                      result_pcategory == "biolink:Disease")
14 | 
15 |     expect_contains(30 + -4:4, disease_subclass_of_disease$count)
16 |     expect_contains(1 + 0:2, nrow(disease_subclass_of_disease))
17 | 
18 |     result <- summarize_neighborhood(g, summarize = "nodes")
19 |     disease_nodes <- result %>% filter(pcategory == "biolink:Disease")
20 |     expect_contains(30 + -4:4, disease_nodes$count)
21 |     expect_contains(1 + 0:2, nrow(disease_nodes))
22 |     })
23 | 


--------------------------------------------------------------------------------
/man/roll.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rollup.R
 3 | \name{roll}
 4 | \alias{roll}
 5 | \title{Internal function for rolling up and down}
 6 | \usage{
 7 | roll(
 8 |   column,
 9 |   fun = c,
10 |   include_self = TRUE,
11 |   predicates = "biolink:subclass_of",
12 |   direction = "up",
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{column}{The node column to draw rollup or rolldown information from.}
18 | 
19 | \item{fun}{The aggregation function to use when rolling up or down. Default is \code{c}}
20 | 
21 | \item{include_self}{Whether to include each nodes' value in \code{column} in the rollup/rolldown for that node.}
22 | 
23 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the rollup/rolldown. Should be transitive; default \code{biolink:subclass_of}}
24 | 
25 | \item{direction}{Whether to roll up or down.}
26 | 
27 | \item{...}{Other parameters (unused)}
28 | }
29 | \description{
30 | Internal function for rolling up and down
31 | }
32 | 


--------------------------------------------------------------------------------
/man/normalize_categories.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/normalize_categories.R
 3 | \name{normalize_categories}
 4 | \alias{normalize_categories}
 5 | \title{Normalize Categories}
 6 | \usage{
 7 | normalize_categories(cats_list, cats_prefs)
 8 | }
 9 | \arguments{
10 | \item{cats_list}{A list of vectors of categories.}
11 | 
12 | \item{cats_prefs}{An ordered preference list over categories.}
13 | }
14 | \value{
15 | A vector of normalized categories.
16 | }
17 | \description{
18 | This function takes a list of vectors of categories and an ordered preference list over categories.
19 | It selects the most preferred category from each vector, or the first category if no preferred categories are included.
20 | }
21 | \examples{
22 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
23 | categories_list <- list(c("A", "B", "C"), c("D", "E", "F"))
24 | categories_prefs <- c("B", "E", "A", "D", "C", "F")
25 | normalize_categories(categories_list, categories_prefs)
26 | \dontshow{\}) # examplesIf}
27 | }
28 | 


--------------------------------------------------------------------------------
/R/in_list.R:
--------------------------------------------------------------------------------
 1 | #' Check if an element is contained in list sub-elements
 2 | #' 
 3 | #' This function is useful for filtering dataframes by list columns. It checks if an element is contained in each list element, returning a logical vector. Note
 4 | #' that this is different than `%in%` which does not work with list columns in a natural manner.
 5 | #' 
 6 | #' @examples
 7 | #' library(dplyr)
 8 | #' 
 9 | #' df <- tibble(id = c("A", "B", "C"), list_col = list(1:3, 2:4, 3:5))
10 | #' 
11 | #' # works naturally for list columns
12 | #' df %>% filter(2 %in_list% list_col)
13 | #' 
14 | #' # also works with basic vector columns
15 | #' df %>% filter("B" %in_list% id)
16 | #' 
17 | #' # does not work with multi-valued left hand sides
18 | #' # df %>% filter(c("B", "C") %in_list% id) # error
19 | #' 
20 | #' @param element The element to check for
21 | #' @param list A list of elements to check against
22 | #' @return A logical vector indicating if the element is in each list element
23 | #' @export
24 | `%in_list%` <- function(element, list) {
25 |   vapply(list, function(x) element %in% x, logical(1))
26 | }


--------------------------------------------------------------------------------
/tests/testthat/test-monarch_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("monarch_engine works as expected", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     e <- monarch_engine()
 9 |     g <- fetch_nodes(e, query_ids = "MONDO:0006043")
10 |     # this should have 6 subtypes (two direct, four under one of the direct children)
11 |     subtypes <- g %>% expand(direction = "in",
12 |                                   predicates = "biolink:subclass_of",
13 |                                   transitive = TRUE)
14 | 
15 |     nodes_df <- subtypes %>% activate(nodes) %>% as.data.frame()
16 |     edges_df <- subtypes %>% activate(edges) %>% as.data.frame()
17 |     expect_contains(7 + -2:2, nrow(nodes_df))
18 |     expect_contains(6 + -2:2, nrow(edges_df))
19 | 
20 |     # there should be a pcategory col of type character
21 |     expect_true("pcategory" %in% names(nodes_df))
22 |     expect_true(is.character(nodes_df$pcategory))
23 | 
24 |     # there should be a category col of type list
25 |     expect_true("category" %in% names(nodes_df))
26 |     expect_true(is.list(nodes_df$category))
27 | })
28 | 


--------------------------------------------------------------------------------
/R/cypher_query.R:
--------------------------------------------------------------------------------
 1 | #' Execute a Cypher Query
 2 | #'
 3 | #' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a tbl_kgx graph.
 4 | #'
 5 | #' @param engine A neo4j KG engine
 6 | #' @param query A string representing the Cypher query. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a single joined graph.
 7 | #' @param parameters A list of parameters for the Cypher query. Default is an empty list.
 8 | #' @param ... Additional arguments passed to the function.
 9 | #' @return The result of the Cypher query as a tbl_kgx graph.
10 | #' @export
11 | #' @examplesIf monarch_engine_check()
12 | #' engine <- monarch_engine()
13 | #'
14 | #' query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10"
15 | #' ids <- c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021")
16 | #' parameters <- list(ids = ids)
17 | #'
18 | #' result <- cypher_query(engine, query, parameters)
19 | #' print(result)
20 | #' @importFrom neo2R cypher
21 | cypher_query <- function(engine, query, parameters = NULL, ...) {
22 |     UseMethod("cypher_query")
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/R/order_cols.R:
--------------------------------------------------------------------------------
 1 | #' Set edge/row data column order according to most recent engine preferences
 2 | #' @param g A `tbl_kgx` graph.
 3 | #' @import dplyr
 4 | #' @import tidygraph
 5 | order_cols <- function(g) {
 6 | 	e <- attr(g, "last_engine")
 7 | 	node_prefs <- c("id", "pcategory", "name")
 8 | 	edge_prefs <- c("subject", "predicate", "object")
 9 | 
10 | 	if(!is.null(e)) {
11 | 		node_prefs <- e$preferences$node_property_priority
12 | 		edge_prefs <- e$preferences$edge_property_priority
13 | 	}
14 | 
15 | 	current_node_names <- colnames(nodes(g))
16 | 	used_prefs_node_names <- node_prefs[node_prefs %in% current_node_names]
17 | 	set_node_names <- c(used_prefs_node_names, setdiff(current_node_names, used_prefs_node_names))
18 | 
19 | 	current_edge_names <- colnames(edges(g))
20 | 	used_prefs_edge_names <- edge_prefs[edge_prefs %in% current_edge_names]
21 | 	set_edge_names <- c(used_prefs_edge_names, setdiff(current_edge_names, used_prefs_edge_names))
22 | 
23 | 	res <- g |>
24 | 		activate(nodes) |>
25 | 		select(all_of(set_node_names)) |>
26 | 		activate(edges) |>
27 | 		select(all_of(set_edge_names)) |>
28 | 		activate(nodes)
29 | 
30 | 	return(res)
31 | }
32 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 monarchr authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/man/example_graph.file_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_graph.file_engine.R
 3 | \name{example_graph.file_engine}
 4 | \alias{example_graph.file_engine}
 5 | \title{Return an example set of nodes from a KG engine.}
 6 | \usage{
 7 | \method{example_graph}{file_engine}(engine, ...)
 8 | }
 9 | \arguments{
10 | \item{engine}{A \code{file_engine} object}
11 | 
12 | \item{...}{Other parameters (not used)}
13 | }
14 | \value{
15 | A tbl_kgx graph
16 | }
17 | \description{
18 | Given a KGX file-based KG engine, returns a graph representing the diversity
19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to
20 | contain at least one node of every category, and at least one edge of every
21 | predicate. No other guarantees are made: the example graph is not minimal
22 | to satisfy these criteria, it is not random or even pseudo-random, and it
23 | may not be connected.
24 | }
25 | \examples{
26 | # Using example KGX file packaged with monarchr
27 | data(eds_marfan_kg)
28 | 
29 | # Retrieve and print an example graph:
30 | g <- eds_marfan_kg |> example_graph()
31 | print(g)
32 | }
33 | 


--------------------------------------------------------------------------------
/R/example_graph.R:
--------------------------------------------------------------------------------
 1 | #' Return an example set of nodes from a KG engine.
 2 | #'
 3 | #' Given a KG engine, returns a graph representing the diversity
 4 | #' of node categories and edge predicates for browsing. The returned graph is guaranteed to
 5 | #' contain at least one node of every category, and at least one edge of every
 6 | #' predicate. No other guarantees are made: the example graph is not minimal
 7 | #' to satisfy these criteria, it is not random or even pseudo-random, and it
 8 | #' may not be connected.
 9 | #'
10 | #' @param engine A KG engine object
11 | #' @param ... Other parameters (not used)
12 | #' @return A tbl_kgx graph
13 | #' @export
14 | #' @examples
15 | #' # Using example KGX file packaged with monarchr
16 | #' data(eds_marfan_kg)
17 | #'
18 | #' # prints a readable summary and returns a list of dataframes
19 | #' g <- eds_marfan_kg |> example_graph()
20 | #' print(g)
21 | #'
22 | #' @examplesIf monarch_engine_check()
23 | #' # prints a readable summary and returns a list of dataframes
24 | #' g <- monarch_engine() |> example_graph()
25 | #' print(g)
26 | #' @import tidygraph
27 | #' @import dplyr
28 | example_graph <- function(engine, ...) {
29 | 	UseMethod("example_graph")
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/man/grapes-in_list-grapes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/in_list.R
 3 | \name{\%in_list\%}
 4 | \alias{\%in_list\%}
 5 | \title{Check if an element is contained in list sub-elements}
 6 | \usage{
 7 | element \%in_list\% list
 8 | }
 9 | \arguments{
10 | \item{element}{The element to check for}
11 | 
12 | \item{list}{A list of elements to check against}
13 | }
14 | \value{
15 | A logical vector indicating if the element is in each list element
16 | }
17 | \description{
18 | This function is useful for filtering dataframes by list columns. It checks if an element is contained in each list element, returning a logical vector. Note
19 | that this is different than \code{\%in\%} which does not work with list columns in a natural manner.
20 | }
21 | \examples{
22 | library(dplyr)
23 | 
24 | df <- tibble(id = c("A", "B", "C"), list_col = list(1:3, 2:4, 3:5))
25 | 
26 | # works naturally for list columns
27 | df \%>\% filter(2 \%in_list\% list_col)
28 | 
29 | # also works with basic vector columns
30 | df \%>\% filter("B" \%in_list\% id)
31 | 
32 | # does not work with multi-valued left hand sides
33 | # df \%>\% filter(c("B", "C") \%in_list\% id) # error
34 | 
35 | }
36 | 


--------------------------------------------------------------------------------
/man/example_graph.neo4j_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_graph.neo4j_engine.R
 3 | \name{example_graph.neo4j_engine}
 4 | \alias{example_graph.neo4j_engine}
 5 | \title{Return an example set of nodes from a KG engine.}
 6 | \usage{
 7 | \method{example_graph}{neo4j_engine}(engine, ...)
 8 | }
 9 | \arguments{
10 | \item{engine}{A \code{neo4j_engine} object}
11 | 
12 | \item{...}{Other parameters (not used)}
13 | }
14 | \value{
15 | A tbl_kgx graph
16 | }
17 | \description{
18 | Given a KGX Neo4j KG engine, returns a graph representing the diversity
19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to
20 | contain at least one node of every category, and at least one edge of every
21 | predicate. No other guarantees are made: the example graph is not minimal
22 | to satisfy these criteria, it is not random or even pseudo-random, and it
23 | may not be connected.
24 | }
25 | \examples{
26 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
27 | # Retrieve and print an example graph:
28 | g <- monarch_engine() |> example_graph()
29 | print(g)
30 | \dontshow{\}) # examplesIf}
31 | }
32 | 


--------------------------------------------------------------------------------
/R/normalize_categories.R:
--------------------------------------------------------------------------------
 1 | #' Normalize Categories
 2 | #'
 3 | #' This function takes a list of vectors of categories and an ordered preference list over categories.
 4 | #' It selects the most preferred category from each vector, or the first category if no preferred categories are included.
 5 | #'
 6 | #' @param cats_list A list of vectors of categories.
 7 | #' @param cats_prefs An ordered preference list over categories.
 8 | #' @return A vector of normalized categories.
 9 | #' @examplesIf FALSE
10 | #' categories_list <- list(c("A", "B", "C"), c("D", "E", "F"))
11 | #' categories_prefs <- c("B", "E", "A", "D", "C", "F")
12 | #' normalize_categories(categories_list, categories_prefs)
13 | normalize_categories <- function(cats_list, cats_prefs) {
14 | 	normed <- unlist(lapply(cats_list, function(categories) {
15 | 		positions <- match(categories, cats_prefs)
16 | 
17 | 		# If all matches are NO, there is no preference match, so use the first label;
18 | 		# otherwise, use the label with the minimum position in the preferred list
19 | 		most_preferred_label <- if (all(is.na(positions))) {
20 | 			categories[1]
21 | 		} else {
22 | 			categories[which.min(positions)]
23 | 		}
24 | 
25 | 		most_preferred_label
26 | 	}))
27 | 
28 | 	return(normed)
29 | }
30 | 


--------------------------------------------------------------------------------
/R/cypher_query_df.R:
--------------------------------------------------------------------------------
 1 | #' Execute a Cypher Query
 2 | #'
 3 | #' This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a data frame.
 4 | #'
 5 | #' @param engine A neo4j_engine() or derivative providing access to a Neo4j database.
 6 | #' @param query A string representing the Cypher query, which should return a table. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a list of data frames.
 7 | #' @param parameters A list of parameters for the Cypher query, if required.
 8 | #' @param ... Additional arguments passed to the function.
 9 | #' @return The result of the Cypher query as a data frame, or a list of data frames if multiple queries are passed.
10 | #' @export
11 | #' @examplesIf monarch_engine_check()
12 | #' engine <- monarch_engine()
13 | #'
14 | #' query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10"
15 | #' parameters <- list(ids = c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021"))
16 | #'
17 | #' result <- cypher_query_df(engine, query, parameters)
18 | #' print(result)
19 | #' @importFrom neo2R cypher
20 | cypher_query_df <- function(engine, query, parameters = NULL, ...) {
21 |     UseMethod("cypher_query_df")
22 | }
23 | 


--------------------------------------------------------------------------------
/man/graph_centrality.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph_centrality.R
 3 | \name{graph_centrality}
 4 | \alias{graph_centrality}
 5 | \title{Add centrality}
 6 | \usage{
 7 | graph_centrality(
 8 |   graph,
 9 |   fun = igraph::harmonic_centrality,
10 |   col = "centrality",
11 |   ...
12 | )
13 | }
14 | \arguments{
15 | \item{graph}{A graph object}
16 | 
17 | \item{fun}{The centrality function to use.
18 | Default is \link[igraph]{harmonic_centrality}.}
19 | 
20 | \item{col}{Name of the new node attribute to store the centrality score.}
21 | 
22 | \item{...}{Additional arguments passed to the centrality function
23 | (\code{sim_fun}).}
24 | }
25 | \value{
26 | Graph object with centrality added as a new node attribute.
27 | }
28 | \description{
29 | First computes of each node in a graph.
30 | Then adds the centrality score as an node attribute.
31 | }
32 | \examples{
33 | data(eds_marfan_kg)
34 | g <- eds_marfan_kg |>
35 |           fetch_nodes(query_ids = "MONDO:0007525") |>
36 |           expand(predicates = "biolink:has_phenotype",
37 |                  categories = "biolink:PhenotypicFeature")|>
38 |           expand(categories = "biolink:Gene")
39 | g <- graph_centrality(g)
40 | nodes(g)$centrality
41 | }
42 | 


--------------------------------------------------------------------------------
/inst/association_type_mappings2.yaml:
--------------------------------------------------------------------------------
 1 | - subject_type: Disease
 2 |   object_type: Phenotype
 3 |   category: "biolink:DiseaseToPhenotypicFeatureAssociation"
 4 | - subject_type: Gene
 5 |   object_type: Phenotype
 6 |   category: "biolink:GeneToPhenotypicFeatureAssociation"
 7 | - subject_type: Gene
 8 |   object_type: Gene
 9 |   symmetric: true
10 |   category: "biolink:PairwiseGeneToGeneInteraction"
11 | - subject_type: Gene
12 |   object_type: Pathway
13 |   category: "biolink:GeneToPathwayAssociation"
14 | - subject_type: Gene
15 |   object_type: Anatomy
16 |   category: "biolink:GeneToExpressionSiteAssociation"
17 | - subject_type: Orthologs
18 |   object_type: Orthologs
19 |   symmetric: true
20 |   category: "biolink:GeneToGeneHomologyAssociation"
21 | - subject_type: Chemicals
22 |   object_type: Pathways
23 |   category: "biolink:ChemicalToPathwayAssociation"
24 | - subject_type: Genes
25 |   object_type: Molecular Functions
26 |   category: "biolink:MacromolecularMachineToMolecularActivityAssociation"
27 | - subject_type: Causal Genes
28 |   object_type: Causal Diseases
29 |   category: "biolink:CausalGeneToDiseaseAssociation"
30 | - subject_type: "Correlated Genes"
31 |   object_type: "Correlated Diseases"
32 |   category: "biolink:CorrelatedGeneToDiseaseAssociation"
33 | 


--------------------------------------------------------------------------------
/man/monarch_search.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monarch_search.R
 3 | \name{monarch_search}
 4 | \alias{monarch_search}
 5 | \title{Search for KG nodes using the Monarch Initiative search API}
 6 | \usage{
 7 | monarch_search(query, category = NULL, limit = 10, ...)
 8 | }
 9 | \arguments{
10 | \item{query}{Search query string, e.g. "Cystic fibrosis"}
11 | 
12 | \item{category}{A set of node category labels to limit the search to, e.g. c("biolink:Disease", "biolink:Gene")}
13 | 
14 | \item{limit}{Maximum number of nodes to return. Default 10.}
15 | 
16 | \item{...}{Parameters passed to monarch_engine().}
17 | }
18 | \value{
19 | A local tbl_kgx graph with no edges.
20 | }
21 | \description{
22 | This function is a wrapper around the Monarch-hosted
23 | \href{https://api.monarchinitiative.org/v3/docs#/search/search_v3_api_search_get}{search API}.
24 | It returns nodes (no edges) from the Monarch KG, fetched via an instance of \code{monarch_engine()}.
25 | }
26 | \examples{
27 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
28 | cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5)
29 | print(cf_hits)
30 | \dontshow{\}) # examplesIf}
31 | }
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-monarch_search.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("monarch_search works", {
 5 |     # skip for now
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     g <- monarch_search("fanconi anemia", limit = 5)
 9 |     # result should be a tbl_kgx with 5 nodes and no edges
10 |     expect_s3_class(g, "tbl_kgx")
11 |     nodes_df <- data.frame(tidygraph::activate(g, nodes))
12 |     edges_df <- data.frame(tidygraph::activate(g, edges))
13 |     expect_equal(nrow(nodes_df), 5)
14 |     expect_equal(nrow(edges_df), 0)
15 | 
16 |     # limit = 1 should work
17 |     g <- monarch_search("fanconi anemia", limit = 1)
18 |     # result should be a tbl_kgx with 1 node and no edges
19 |     nodes_df <- data.frame(tidygraph::activate(g, nodes))
20 |     edges_df <- data.frame(tidygraph::activate(g, edges))
21 |     expect_equal(nrow(nodes_df), 1)
22 |     expect_equal(nrow(edges_df), 0)
23 | 
24 |     # no hits should work
25 |     g <- monarch_search("this is not a real search term")
26 |     # this should return a graph with no nodes
27 |     nodes_df <- data.frame(tidygraph::activate(g, nodes))
28 |     edges_df <- data.frame(tidygraph::activate(g, edges))
29 |     expect_equal(nrow(nodes_df), 0)
30 |     expect_equal(nrow(edges_df), 0)
31 | 
32 | 
33 |     })
34 | 


--------------------------------------------------------------------------------
/R/load_kgx.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Load a graph from a KGX-formatted .tar.gz file.
 3 | #'
 4 | #' Given a KGX-formatted tabular KG
 5 | #' (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md)
 6 | #' loads it as a graph.
 7 | #'
 8 | #' @param filename File to the graph from. Must end in .tar.gz and conform to KGX specification (see description).
 9 | #' @param attach_engine An engine to attach to the graph (optional).
10 | #' @param ... Other parameters (unused)
11 | #' @return A `tbl_kgx` graph.
12 | #' @export
13 | #' @examplesIf monarch_engine_check()
14 | #' phenos <- monarch_engine() |>
15 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
16 | #'           expand(predicates = "biolink:has_phenotype",
17 | #'                  categories = "biolink:PhenotypicFeature")
18 | #'
19 | #' save_kgx(phenos, "phenos.tar.gz")
20 | #'
21 | #' # when loading the graph, we can optionally attach an engine
22 | #' loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine())
23 | #'
24 | #' loaded_phenos
25 | #'
26 | #' # cleanup saved file
27 | #' file.remove("phenos.tar.gz")
28 | load_kgx <- function(filename, attach_engine = NULL, ...) {
29 | 	e <- file_engine(filename)
30 | 	g <- e$graph
31 | 
32 | 	attr(g, "last_engine") <- attach_engine
33 | 	g <- order_cols(g)
34 | 	return(g)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/neo4j_engine_check.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/neo4j_engine_check.R
 3 | \name{neo4j_engine_check}
 4 | \alias{neo4j_engine_check}
 5 | \title{Check if a neo4j database is available and properly formatted}
 6 | \usage{
 7 | neo4j_engine_check(url, username = NA, password = NA, warn = TRUE)
 8 | }
 9 | \arguments{
10 | \item{url}{A character string indicating the URL of the neo4j database.}
11 | 
12 | \item{username}{A character string indicating the username for the neo4j database (if needed).}
13 | 
14 | \item{password}{A character string indicating the password for the neo4j database (if needed).}
15 | 
16 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.}
17 | }
18 | \value{
19 | TRUE if the database is available and properly formatted, FALSE otherwise.
20 | }
21 | \description{
22 | Attempts to connect to the specified Neo4J database and run a query to see if it is properly formatted. Returns FALSE if the database is not available or not properly formatted.
23 | }
24 | \examples{
25 | print(neo4j_engine_check("https://neo4j.monarchinitiative.org"))
26 | print(neo4j_engine_check("https://no-such-db.monarchinitiative.org"))
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-example_graph.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("example_graph for neo4j engine", {
 5 | 	#testthat::skip("temporary skip")
 6 | 	options(width = 150)
 7 | 
 8 | 	e <- monarch_engine()
 9 | 
10 | 	sample <- example_graph(e)
11 | 
12 | 	# check some expected categories
13 | 	expect_true(any("biolink:Disease" %in_list% nodes(sample)$category))
14 | 	# expect_true(any("biolink:GenomicEntity" %in_list% nodes(sample)$category))
15 | 	# expect_true(any("biolink:GeneOrGeneProduct" %in_list% nodes(sample)$category))
16 | 	expect_true(any("biolink:SequenceVariant" %in_list% nodes(sample)$category))
17 | 	# expect_true(any("biolink:OntologyClass" %in_list% nodes(sample)$category))
18 | 	# expect_true(any("biolink:PhysicalEssence" %in_list% nodes(sample)$category))
19 | 
20 | 	# check some expected predicates
21 | 	expect_true(any("biolink:causes" %in_list% edges(sample)$predicate))
22 | 	expect_true(any("biolink:subclass_of" %in_list% edges(sample)$predicate))
23 | 	expect_true(any("biolink:associated_with_increased_likelihood_of" %in_list% edges(sample)$predicate))
24 | 	expect_true(any("biolink:treats_or_applied_or_studied_to_treat" %in_list% edges(sample)$predicate))
25 | 	expect_true(any("biolink:genetically_associated_with" %in_list% edges(sample)$predicate))
26 | })
27 | 


--------------------------------------------------------------------------------
/meta/archive/association_type_mappings.yaml:
--------------------------------------------------------------------------------
 1 | - subject_label: Diseases
 2 |   object_label: Phenotypes
 3 |   category: "biolink:DiseaseToPhenotypicFeatureAssociation"
 4 | - subject_label: Genes
 5 |   object_label: Phenotypes
 6 |   category: "biolink:GeneToPhenotypicFeatureAssociation"
 7 | - subject_label: Interactions
 8 |   object_label: Interactions
 9 |   symmetric: true
10 |   category: "biolink:PairwiseGeneToGeneInteraction"
11 | - subject_label: Genes
12 |   object_label: Pathways
13 |   category: "biolink:GeneToPathwayAssociation"
14 | - subject_label: Genes
15 |   object_label: Anatomy
16 |   category: "biolink:GeneToExpressionSiteAssociation"
17 | - subject_label: Orthologs
18 |   object_label: Orthologs
19 |   symmetric: true
20 |   category: "biolink:GeneToGeneHomologyAssociation"
21 | - subject_label: Chemicals
22 |   object_label: Pathways
23 |   category: "biolink:ChemicalToPathwayAssociation"
24 | - subject_label: Genes
25 |   object_label: Molecular Functions
26 |   category: "biolink:MacromolecularMachineToMolecularActivityAssociation"
27 | - subject_label: Causal Genes
28 |   object_label: Causal Diseases
29 |   category: "biolink:CausalGeneToDiseaseAssociation"
30 | - subject_label: "Correlated Genes"
31 |   object_label: "Correlated Diseases"
32 |   category: "biolink:CorrelatedGeneToDiseaseAssociation"
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cypher_query.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("cypher_query returns a graph object", {
 5 |     # skip for now
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     e <- monarch_engine()
 9 |     g <- cypher_query(e, query = "MATCH (s) -[r]- (o) return s, r, o LIMIT 1")
10 |     expect_s3_class(g, "tbl_kgx")
11 |     nodes_df <- data.frame(tidygraph::activate(g, nodes))
12 |     edges_df <- data.frame(tidygraph::activate(g, edges))
13 |     # g should be a tidygraph with two rows in nodes and one in edges
14 |     expect_equal(nrow(nodes_df), 2)
15 |     expect_equal(nrow(edges_df), 1)
16 | })
17 | 
18 | test_that("cypher_query works with multicypher queries", {
19 | 	# skip for now
20 | 	#testthat::skip("temporary skip")
21 | 
22 | 	e <- monarch_engine()
23 | 	queries <- c("MATCH (n {id: 'MONDO:0007947'}) RETURN n",
24 | 							 "MATCH (n {id: 'MONDO:0017309'}) RETURN n",
25 | 							 "MATCH (n {id: 'MONDO:0020066'}) RETURN n")
26 | 	g <- cypher_query(e, query = queries)
27 | 	expect_s3_class(g, "tbl_kgx")
28 | 	nodes_df <- data.frame(tidygraph::activate(g, nodes))
29 | 	edges_df <- data.frame(tidygraph::activate(g, edges))
30 | 	# g should be a tidygraph with two rows in nodes and one in edges
31 | 	expect_equal(nrow(nodes_df), 3)
32 | 	expect_equal(nrow(edges_df), 0)
33 | })
34 | 


--------------------------------------------------------------------------------
/man/kg_edge_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kg_edge_weights.R
 3 | \name{kg_edge_weights}
 4 | \alias{kg_edge_weights}
 5 | \title{Knowledge Graph edge weights}
 6 | \usage{
 7 | kg_edge_weights(
 8 |   graph,
 9 |   normalise = TRUE,
10 |   encodings = monarch_edge_weight_encodings(),
11 |   fun = function(x) {
12 |      rowSums(x, na.rm = TRUE)
13 |  }
14 | )
15 | }
16 | \arguments{
17 | \item{graph}{A graph object}
18 | 
19 | \item{normalise}{Normalise each encoding from 0-1 by dividing by the
20 | maximum value. Default is \code{TRUE}.}
21 | 
22 | \item{encodings}{A list of named lists of encoding values for
23 | different edge attributes.}
24 | 
25 | \item{fun}{Function to compute edge weights with across the
26 | numerically encoded attributes. Default is \link{rowSums}.}
27 | }
28 | \description{
29 | Compute edge weights for the given \link{tbl_kgx} graph using
30 | several pieces of categorical, ordinal, and continuous metadata.
31 | }
32 | \examples{
33 | data(eds_marfan_kg)
34 | g <- eds_marfan_kg |>
35 |      fetch_nodes(query_ids = "MONDO:0007525") |>
36 |      expand(predicates = "biolink:has_phenotype",
37 |             categories = "biolink:PhenotypicFeature")|>
38 |      expand(categories = "biolink:Gene")
39 | g2 <- kg_edge_weights(g)
40 | edges(g2)$weight
41 | }
42 | 


--------------------------------------------------------------------------------
/man/layout_umap.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/layout_umap.R
 3 | \name{layout_umap}
 4 | \alias{layout_umap}
 5 | \title{Layout UMAP}
 6 | \usage{
 7 | layout_umap(graph, use_3d = FALSE, prefix = "UMAP", ...)
 8 | }
 9 | \arguments{
10 | \item{graph}{A graph object}
11 | 
12 | \item{use_3d}{Logical, whether to use a 3D layout (TRUE) or
13 | 2D layout (FALSE). Default is FALSE (2D).}
14 | 
15 | \item{prefix}{A character string prefix to add to the layout column names.}
16 | 
17 | \item{...}{Additional arguments passed to the layout function.}
18 | }
19 | \value{
20 | A matrix of x and y coordinates for each node in the graph.
21 | }
22 | \description{
23 | Generate a 2D or 2D layout of a graph using the UMAP algorithm.
24 | See here for details:
25 | \href{https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_umap}{
26 | igraph_layout_umap}
27 | }
28 | \examples{
29 | set.seed(2024)
30 | data(eds_marfan_kg)
31 | g <- eds_marfan_kg |>
32 |      fetch_nodes(pcategory=="biolink:Disease", limit=40) |>
33 |      expand(predicates = "biolink:has_phenotype",
34 |             categories = "biolink:PhenotypicFeature")|>
35 |      tidygraph::sample_n(200) |>
36 |      expand(categories = "biolink:Gene")
37 | X <- layout_umap(g)
38 | g <- graph_centrality(g)
39 | plot(g, layout=X, node_size=centrality)
40 | }
41 | 


--------------------------------------------------------------------------------
/R/cytoscape.R:
--------------------------------------------------------------------------------
 1 | #' Send a graph to Cytoscape
 2 | #'
 3 | #' Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels
 4 | #' are mapped to node `name` (if available, otherwise they default to node `id`),
 5 | #' node color is mapped to `pcategory`, edge color is mapped to `predicate`,
 6 | #' node hover-over text is set to `description` (if available, otherwise node `id`),
 7 | #' and edge hover-over text is set to `predicate`. Nodes are layed out
 8 | #' using the Kamada-Kawai method. These properties and more may be customized in
 9 | #' the Cytoscape application. This function requires that Cytoscape is installed
10 | #' and running independently of the R session.
11 | #'
12 | #'
13 | #' @param g A `tbl_kgx()` graph to visualize.
14 | #' @param ... Other parameters (unused).
15 | #'
16 | #' @return NULL, invisibly
17 | #' @export
18 | #' @examplesIf FALSE
19 | #' data(eds_marfan_kg)
20 | #' g <- eds_marfan_kg |>
21 | #' 	 fetch_nodes(query_ids = "MONDO:0020066") |>
22 | #' 	 expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |>
23 | #' 	 expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene"))
24 | #'
25 | #' # Cytoscape must be installed and running
26 | #' cytoscape(g)
27 | #'
28 | #' @import RCy3
29 | #' @import tidygraph
30 | #' @import dplyr
31 | cytoscape <- function(g, ...) {
32 | 	UseMethod("cytoscape")
33 | }
34 | 


--------------------------------------------------------------------------------
/man/cypher_query.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cypher_query.R
 3 | \name{cypher_query}
 4 | \alias{cypher_query}
 5 | \title{Execute a Cypher Query}
 6 | \usage{
 7 | cypher_query(engine, query, parameters = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{engine}{A neo4j KG engine}
11 | 
12 | \item{query}{A string representing the Cypher query. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a single joined graph.}
13 | 
14 | \item{parameters}{A list of parameters for the Cypher query. Default is an empty list.}
15 | 
16 | \item{...}{Additional arguments passed to the function.}
17 | }
18 | \value{
19 | The result of the Cypher query as a tbl_kgx graph.
20 | }
21 | \description{
22 | This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a tbl_kgx graph.
23 | }
24 | \examples{
25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
26 | engine <- monarch_engine()
27 | 
28 | query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10"
29 | ids <- c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021")
30 | parameters <- list(ids = ids)
31 | 
32 | result <- cypher_query(engine, query, parameters)
33 | print(result)
34 | \dontshow{\}) # examplesIf}
35 | }
36 | 


--------------------------------------------------------------------------------
/man/example_graph.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/example_graph.R
 3 | \name{example_graph}
 4 | \alias{example_graph}
 5 | \title{Return an example set of nodes from a KG engine.}
 6 | \usage{
 7 | example_graph(engine, ...)
 8 | }
 9 | \arguments{
10 | \item{engine}{A KG engine object}
11 | 
12 | \item{...}{Other parameters (not used)}
13 | }
14 | \value{
15 | A tbl_kgx graph
16 | }
17 | \description{
18 | Given a KG engine, returns a graph representing the diversity
19 | of node categories and edge predicates for browsing. The returned graph is guaranteed to
20 | contain at least one node of every category, and at least one edge of every
21 | predicate. No other guarantees are made: the example graph is not minimal
22 | to satisfy these criteria, it is not random or even pseudo-random, and it
23 | may not be connected.
24 | }
25 | \examples{
26 | # Using example KGX file packaged with monarchr
27 | data(eds_marfan_kg)
28 | 
29 | # prints a readable summary and returns a list of dataframes
30 | g <- eds_marfan_kg |> example_graph()
31 | print(g)
32 | 
33 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
34 | # prints a readable summary and returns a list of dataframes
35 | g <- monarch_engine() |> example_graph()
36 | print(g)
37 | \dontshow{\}) # examplesIf}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/transitive_closure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/transitive_closure.R
 3 | \name{transitive_closure}
 4 | \alias{transitive_closure}
 5 | \title{Compute transitive closure over a predicate.}
 6 | \usage{
 7 | transitive_closure(g, predicate = "biolink:subclass_of")
 8 | }
 9 | \arguments{
10 | \item{g}{The \code{tbl_kgx} graph to compute on.}
11 | 
12 | \item{predicate}{The edge predicate to close over.}
13 | }
14 | \value{
15 | Graph with transitive edges added.
16 | }
17 | \description{
18 | Computes the transitive closure of a graph, treating the specified
19 | predicate as transitive. Resulting edge predicates will the be the
20 | same, but have primary_knowledge_source set to transitive_\if{html}{\out{<predicate>}}.
21 | }
22 | \examples{
23 | data(eds_marfan_kg)
24 | 
25 | eds_marfan_kg |> fetch_nodes(name == "Tall stature") |>
26 |   # get 2 levels of ancestors
27 |   expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |>
28 |   activate(edges) |>
29 |   filter(primary_knowledge_source == "infores:upheno") |>
30 |   transitive_closure(predicate = "biolink:subclass_of") |>
31 |   plot(edge_color = primary_knowledge_source)
32 | 
33 | }
34 | \seealso{
35 | \code{\link[=roll_up]{roll_up()}}, \code{\link[=transfer]{transfer()}}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}}
36 | }
37 | 


--------------------------------------------------------------------------------
/R/graph_centrality.R:
--------------------------------------------------------------------------------
 1 | #' Add centrality
 2 | #'
 3 | #' First computes of each node in a graph.
 4 | #' Then adds the centrality score as an node attribute.
 5 | #' @param fun The centrality function to use.
 6 | #'  Default is \link[igraph]{harmonic_centrality}.
 7 | #' @param col Name of the new node attribute to store the centrality score.
 8 | #' @param ... Additional arguments passed to the centrality function
 9 | #'  (\code{sim_fun}).
10 | #' @inheritParams nodes
11 | #' @returns Graph object with centrality added as a new node attribute.
12 | #' @export
13 | #' @importFrom tidygraph active
14 | #' @importFrom tidygraph activate
15 | #' @examples
16 | #' data(eds_marfan_kg)
17 | #' g <- eds_marfan_kg |>
18 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
19 | #'           expand(predicates = "biolink:has_phenotype",
20 | #'                  categories = "biolink:PhenotypicFeature")|>
21 | #'           expand(categories = "biolink:Gene")
22 | #' g <- graph_centrality(g)
23 | #' nodes(g)$centrality
24 | graph_centrality <- function(graph,
25 | 														 fun=igraph::harmonic_centrality,
26 | 														 col="centrality",
27 | 														 ...){
28 | 	active_tbl <- active(graph)
29 | 	message("Computing node centrality.")
30 | 	graph <- graph|>
31 | 		activate(nodes)|>
32 | 		dplyr::mutate(!!col:=fun(graph, ...)) |>
33 | 		activate(!!rlang::sym(active_tbl))
34 | 	return(graph)
35 | }
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-example_graph.file_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("example_graph for file engine", {
 5 | 	#testthat::skip("temporary skip")
 6 | 	options(width = 150)
 7 | 
 8 | 	filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
 9 | 	e <- file_engine(filename)
10 | 
11 | 	sample <- example_graph(e)
12 | 
13 | 	# check some expected categories
14 | 	expect_true(any("biolink:Disease" %in_list% nodes(sample)$category))
15 | 	expect_true(any("biolink:GenomicEntity" %in_list% nodes(sample)$category))
16 | 	expect_true(any("biolink:GeneOrGeneProduct" %in_list% nodes(sample)$category))
17 | 	expect_true(any("biolink:SequenceVariant" %in_list% nodes(sample)$category))
18 | 	expect_true(any("biolink:OntologyClass" %in_list% nodes(sample)$category))
19 | 	expect_true(any("biolink:PhysicalEssence" %in_list% nodes(sample)$category))
20 | 
21 | 	# check some expected predicates
22 | 	expect_true(any("biolink:causes" %in_list% edges(sample)$predicate))
23 | 	expect_true(any("biolink:subclass_of" %in_list% edges(sample)$predicate))
24 | 	expect_true(any("biolink:associated_with_increased_likelihood_of" %in_list% edges(sample)$predicate))
25 | 	expect_true(any("biolink:treats_or_applied_or_studied_to_treat" %in_list% edges(sample)$predicate))
26 | 	expect_true(any("biolink:genetically_associated_with" %in_list% edges(sample)$predicate))
27 | })
28 | 


--------------------------------------------------------------------------------
/man/summary.neo4j_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.neo4j_engine.R
 3 | \name{summary.neo4j_engine}
 4 | \alias{summary.neo4j_engine}
 5 | \title{Summarize contents of a Neo4j KG engine}
 6 | \usage{
 7 | \method{summary}{neo4j_engine}(object, ..., quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{A \code{neo4j_engine} object}
11 | 
12 | \item{...}{Other parameters (not used)}
13 | 
14 | \item{quiet}{Logical, whether to suppress printing of the summary}
15 | }
16 | \value{
17 | A list of dataframes and named lists
18 | }
19 | \description{
20 | Given a Neo4j based KG engine, provides summary information in the form of
21 | node counts, category counts across nodes, relationship type counts, and available properties.
22 | General information about the graph is printed to the console, and a list of
23 | dataframes with this information is returned invisibly. Also returned
24 | are \code{cats}, \code{preds}, and \code{props} entries, containing lists of available
25 | categories/predicates/properties for convenient auto-completion in RStudio.
26 | }
27 | \examples{
28 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
29 | # prints a readable summary and returns a list of dataframes
30 | stats <- monarch_engine() |> summary()
31 | print(stats)
32 | \dontshow{\}) # examplesIf}
33 | }
34 | 


--------------------------------------------------------------------------------
/.github/workflows/rworkflows.yml:
--------------------------------------------------------------------------------
 1 | name: rworkflows
 2 | 'on':
 3 |   push:
 4 |     branches:
 5 |     - master
 6 |     - main
 7 |     - devel
 8 |     - RELEASE_**
 9 |   pull_request:
10 |     branches:
11 |     - master
12 |     - main
13 |     - devel
14 |     - RELEASE_**
15 | jobs:
16 |   rworkflows:
17 |     permissions: write-all
18 |     runs-on: ${{ matrix.config.os }}
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 |     container: ${{ matrix.config.cont }}
21 |     strategy:
22 |       fail-fast: ${{ false }}
23 |       matrix:
24 |         config:
25 |         - os: ubuntu-latest
26 |           bioc: devel
27 |           r: auto
28 |           cont: ghcr.io/bioconductor/bioconductor_docker:devel
29 |           rspm: ~
30 |     steps:
31 |     - uses: neurogenomics/rworkflows@master
32 |       with:
33 |         run_bioccheck: ${{ false }}
34 |         run_rcmdcheck: ${{ false }}
35 |         as_cran: ${{ true }}
36 |         run_vignettes: ${{ true }}
37 |         has_testthat: ${{ true }}
38 |         run_covr: ${{ false }}
39 |         run_pkgdown: ${{ true }}
40 |         has_runit: ${{ false }}
41 |         has_latex: ${{ false }}
42 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
43 |         run_docker: ${{ false }}
44 |         DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
45 |         runner_os: ${{ runner.os }}
46 |         cache_version: cache-v1
47 |         docker_registry: ghcr.io
48 | 


--------------------------------------------------------------------------------
/man/load_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/load_kgx.R
 3 | \name{load_kgx}
 4 | \alias{load_kgx}
 5 | \title{Load a graph from a KGX-formatted .tar.gz file.}
 6 | \usage{
 7 | load_kgx(filename, attach_engine = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{filename}{File to the graph from. Must end in .tar.gz and conform to KGX specification (see description).}
11 | 
12 | \item{attach_engine}{An engine to attach to the graph (optional).}
13 | 
14 | \item{...}{Other parameters (unused)}
15 | }
16 | \value{
17 | A \code{tbl_kgx} graph.
18 | }
19 | \description{
20 | Given a KGX-formatted tabular KG
21 | (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md)
22 | loads it as a graph.
23 | }
24 | \examples{
25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
26 | phenos <- monarch_engine() |>
27 |           fetch_nodes(query_ids = "MONDO:0007525") |>
28 |           expand(predicates = "biolink:has_phenotype",
29 |                  categories = "biolink:PhenotypicFeature")
30 | 
31 | save_kgx(phenos, "phenos.tar.gz")
32 | 
33 | # when loading the graph, we can optionally attach an engine
34 | loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine())
35 | 
36 | loaded_phenos
37 | 
38 | # cleanup saved file
39 | file.remove("phenos.tar.gz")
40 | \dontshow{\}) # examplesIf}
41 | }
42 | 


--------------------------------------------------------------------------------
/man/file_engine_check.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_engine_check.R
 3 | \name{file_engine_check}
 4 | \alias{file_engine_check}
 5 | \title{Check availability of a file-based engine}
 6 | \usage{
 7 | file_engine_check(filename, warn = TRUE)
 8 | }
 9 | \arguments{
10 | \item{filename}{A character string indicating the path to the file-based engine.}
11 | 
12 | \item{warn}{A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.}
13 | }
14 | \value{
15 | TRUE if the database is available and properly formatted, FALSE otherwise.
16 | }
17 | \description{
18 | Attempts to connect to the specified file-based engine. Returns FALSE if the file is not available or not properly formatted.
19 | }
20 | \examples{
21 | \dontshow{if (file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
22 | print(file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz"))
23 | print(file_engine_check("https://no-such-host.kghub.io/sepio_kgx_tsv.tar.gz"))
24 | print(file_engine_check(system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr")))
25 | print(file_engine_check(system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr")))
26 | \dontshow{\}) # examplesIf}
27 | }
28 | 


--------------------------------------------------------------------------------
/man/summary.file_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.file_engine.R
 3 | \name{summary.file_engine}
 4 | \alias{summary.file_engine}
 5 | \title{Summarize contents of a KGX-file-based KG engine}
 6 | \usage{
 7 | \method{summary}{file_engine}(object, ..., quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{object}{A \code{file_engine} object}
11 | 
12 | \item{...}{Other parameters (not used)}
13 | 
14 | \item{quiet}{Logical, whether to suppress printing of the summary}
15 | }
16 | \value{
17 | A list of dataframes and named lists
18 | }
19 | \description{
20 | Given a KGX file-based KG engine, provides summary information in the form of
21 | node counts, category counts across nodes, relationship type counts, and available properties.
22 | General information about the graph is printed to the console, and a list of
23 | dataframes with this information is returned invisibly. Also returned
24 | are \code{cats}, \code{preds}, and \code{props} entries, containing lists of available
25 | categories/predicates/properties for convenient auto-completion in RStudio.
26 | }
27 | \details{
28 | When applied to a \code{file_engine}, also included are node-specific and edge-specific properties.
29 | }
30 | \examples{
31 | # Using example KGX file packaged with monarchr
32 | data(eds_marfan_kg)
33 | 
34 | # prints a readable summary and returns a list of dataframes
35 | res <- eds_marfan_kg |> summary()
36 | print(res)
37 | }
38 | 


--------------------------------------------------------------------------------
/R/data_example_kgs.R:
--------------------------------------------------------------------------------
 1 | #' Example Ehlers-Danlos and Marfan Syndrome Knowledge Graph Engine
 2 | #'
 3 | #' A small `file_engine()` Knowledge Graph (KG) containing Monarch Initiative
 4 | #' data for Ehlers-Danlos Syndrome and Marfan Syndrome, including their subtypes,
 5 | #' all entities connected to those diseases or subtypes, and all ancestors (supertypes)
 6 | #' of all those diseases and entities. Generated 8/15/2024 via:
 7 | #'
 8 | #' ```
 9 | #' monarch_engine() |>
10 | #' 	fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |>
11 | #' 	expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |>
12 | #' 	expand() |>
13 | #' 	expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
14 | #' ```
15 | #'
16 | #' This example engine may also be loaded from file via
17 | #'
18 | #' ```
19 | #' filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
20 | #' eds_marfan_kg <- file_engine(filename)
21 | #' ```
22 | #'
23 | #' @docType data
24 | #' @usage data(eds_marfan_kg)
25 | #' @format An object of class \code{file_engine} for use with `fetch_nodes()`, `expand()`, etc.
26 | #' @keywords datasets
27 | #' @examples
28 | #' data(eds_marfan_kg)
29 | #' phenos <- eds_marfan_kg |>
30 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
31 | #'           expand(predicates = "biolink:has_phenotype",
32 | #'                  categories = "biolink:PhenotypicFeature")
33 | "eds_marfan_kg"
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/R/cypher_query_df.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | internal_cypher_query_df <- function(engine, query, parameters = NULL, ...) {
 2 | 	if(length(query) == 1) {
 3 | 		result <- neo2R::cypher(engine$graph_conn, query = query, parameters = parameters, result = "row", arraysAsStrings = FALSE)
 4 | 	} else {
 5 | 		result <- neo2R::multicypher(engine$graph_conn, queries = query, parameters = parameters, result = "row", arraysAsStrings = FALSE)
 6 | 	}
 7 | 
 8 | 	return(result)
 9 | }
10 | 
11 | #internal_cypher_query_df_memoised <- memoise::memoise(internal_cypher_query_df)
12 | 
13 | #' @export
14 | #' @importFrom neo2R cypher
15 | #' @importFrom neo2R multicypher
16 | #' @importFrom memoise memoise
17 | cypher_query_df.neo4j_engine <- function(engine, query, parameters = NULL, ...) {
18 | 	if(!is.null(engine$cache)) {
19 | 		# ok, this is a bit wonky
20 | 		# the engine stores its cache
21 | 		# we create a memoized internal function using that cache
22 | 		# and then we call the function
23 | 		# BUT, the engine itself needs to be sent to the function,
24 | 		# and if its cache keeps changing it wont memoize properly
25 | 		# so we create a copy of the engine without a cache and use that
26 | 		engine_copy <- engine
27 | 		engine_copy$cache <- NULL
28 | 
29 | 		internal <- memoise::memoise(internal_cypher_query_df, cache = engine$cache)
30 | 		res <- internal(engine_copy, query, parameters, ...)
31 | 
32 | 		return(res)
33 | 	} else {
34 | 		internal_cypher_query_df(engine, query, parameters, ...)
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/man/cypher_query_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cypher_query_df.R
 3 | \name{cypher_query_df}
 4 | \alias{cypher_query_df}
 5 | \title{Execute a Cypher Query}
 6 | \usage{
 7 | cypher_query_df(engine, query, parameters = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{engine}{A neo4j_engine() or derivative providing access to a Neo4j database.}
11 | 
12 | \item{query}{A string representing the Cypher query, which should return a table. Multiple queries may be passed as a vector; if so, Neo2R::multicypher if used and the result is returned as a list of data frames.}
13 | 
14 | \item{parameters}{A list of parameters for the Cypher query, if required.}
15 | 
16 | \item{...}{Additional arguments passed to the function.}
17 | }
18 | \value{
19 | The result of the Cypher query as a data frame, or a list of data frames if multiple queries are passed.
20 | }
21 | \description{
22 | This function takes a Cypher query and parameters, executes the query using the given engine, and returns the result as a data frame.
23 | }
24 | \examples{
25 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
26 | engine <- monarch_engine()
27 | 
28 | query <- "MATCH (n) WHERE n.id IN $ids RETURN n LIMIT 10"
29 | parameters <- list(ids = c("MONDO:0007525", "MONDO:0020066", "MONDO:0034021"))
30 | 
31 | result <- cypher_query_df(engine, query, parameters)
32 | print(result)
33 | \dontshow{\}) # examplesIf}
34 | }
35 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: monarchr
 2 | Type: Package
 3 | Title: Monarch Knowledge Graph Queries
 4 | Description: R package for easy access, manipulation, and analysis of
 5 | 		Monarch KG data Resources.
 6 | Version: 2.1.2
 7 | URL: https://github.com/monarch-initiative/monarchr
 8 | BugReports: https://github.com/monarch-initiative/monarchr/issues
 9 | Authors@R:
10 |     c(
11 |   person(given = "Shawn",
12 |            family = "O'Neil",
13 |            role = c("aut","cre"),
14 |            email = "oneilsh@gmail.com",
15 |            comment = c(ORCID = "0000-0001-6220-7080")),
16 |     person(given = "Brian",
17 |            family = "Schilder",
18 |            role = c("aut","ctb"),
19 |            email = "brian_schilder@alumni.brown.edu",
20 |            comment = c(ORCID = "0000-0001-5949-2191"))
21 |      ) 
22 | License: MIT + file LICENSE
23 | Encoding: UTF-8
24 | LazyData: true
25 | Config/testthat/edition: 3
26 | Roxygen: list(markdown = TRUE)
27 | RoxygenNote: 7.3.2
28 | VignetteBuilder: knitr
29 | Imports:
30 |     assertthat,
31 |     dplyr,
32 |     httr,
33 |     kableExtra,
34 |     neo2R,
35 |     rlang,
36 |     igraph,
37 |     tibble,
38 |     tidygraph,
39 |     yaml,
40 |     knitr,
41 |     purrr,
42 |     stringr,
43 |     archive,
44 |     readr,
45 |     ggraph,
46 |     ggplot2 (>= 4.0.0),
47 |     R.utils,
48 |     RCy3,
49 |     digest,
50 |     memoise,
51 |     sets,
52 |     relations
53 | Suggests: 
54 |     testthat (>= 3.0.0),
55 |     rworkflows,
56 |     Matrix
57 | 


--------------------------------------------------------------------------------
/inst/kg_prefs.yaml:
--------------------------------------------------------------------------------
 1 | category_priority:
 2 |   - biolink:LifeStage
 3 |   - biolink:MolecularEntity
 4 |   - biolink:OrganismTaxon
 5 |   - biolink:Cell
 6 |   - biolink:CellularComponent
 7 |   - biolink:MolecularActivity
 8 |   - biolink:SequenceVariant
 9 |   - biolink:ChemicalEntity
10 |   - biolink:ChemicalOrDrugOrTreatment
11 |   - biolink:GeneProductMixin
12 |   - biolink:Protein
13 |   - biolink:Polypeptide
14 |   - biolink:Pathway
15 |   - biolink:Disease
16 |   - biolink:ChemicalEntityOrProteinOrPolypeptide
17 |   - biolink:BiologicalProcess
18 |   - biolink:Occurrent
19 |   - biolink:BiologicalProcessOrActivity
20 |   - biolink:AnatomicalEntity
21 |   - biolink:OrganismalEntity
22 |   - biolink:SubjectOfInvestigation
23 |   - biolink:Genotype
24 |   - biolink:PhenotypicFeature
25 |   - biolink:DiseaseOrPhenotypicFeature
26 |   - biolink:Gene
27 |   - biolink:MacromolecularMachineMixin
28 |   - biolink:GeneOrGeneProduct
29 |   - biolink:ChemicalEntityOrGeneOrGeneProduct
30 |   - biolink:GenomicEntity
31 |   - biolink:OntologyClass
32 |   - biolink:PhysicalEssence
33 |   - biolink:PhysicalEssenceOrOccurrent
34 |   - biolink:BiologicalEntity
35 |   - biolink:ThingWithTaxon
36 |   - biolink:NamedThing
37 |   - biolink:Entity
38 | node_property_priority:
39 |   - id
40 |   - pcategory
41 |   - name
42 |   - symbol
43 |   - in_taxon_label
44 |   - description
45 |   - synonym
46 |   - primary_knowledge_source
47 | edge_property_priority:
48 |   - subject
49 |   - predicate
50 |   - object
51 |   - primary_knowledge_source
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/man/save_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/save_kgx.R
 3 | \name{save_kgx}
 4 | \alias{save_kgx}
 5 | \title{Save a graph as a KGX-formatted .tar.gz file.}
 6 | \usage{
 7 | save_kgx(graph, filename = "saved_kgx_graph.tar.gz", ...)
 8 | }
 9 | \arguments{
10 | \item{graph}{A \code{tbl_kgx} graph to save.}
11 | 
12 | \item{filename}{File to save the graph to. Must end in .tar.gz.}
13 | 
14 | \item{...}{Other parameters (unused)}
15 | }
16 | \value{
17 | The input graph (invisibly).
18 | }
19 | \description{
20 | Given a graph, saves it using the tabular KGX format
21 | (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) for later
22 | use with \code{load_kgx()} (or even backing an engine with \code{file_engine()}). Note that if
23 | any engine is associated with the graph it is not saved.
24 | }
25 | \examples{
26 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
27 | phenos <- monarch_engine() |>
28 |           fetch_nodes(query_ids = "MONDO:0007525") |>
29 |           expand(predicates = "biolink:has_phenotype",
30 |                  categories = "biolink:PhenotypicFeature")
31 | 
32 | save_kgx(phenos, "phenos.tar.gz")
33 | 
34 | # when loading the graph, we can optionally attach an engine
35 | loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine())
36 | 
37 | loaded_phenos
38 | 
39 | # cleanup saved file
40 | file.remove("phenos.tar.gz")
41 | \dontshow{\}) # examplesIf}
42 | }
43 | 


--------------------------------------------------------------------------------
/R/layout_umap.R:
--------------------------------------------------------------------------------
 1 | #' Layout UMAP
 2 | #'
 3 | #' Generate a 2D or 2D layout of a graph using the UMAP algorithm.
 4 | #' See here for details:
 5 | #' \href{https://igraph.org/c/doc/igraph-Layout.html#igraph_layout_umap}{
 6 | #' igraph_layout_umap}
 7 | #' @param use_3d Logical, whether to use a 3D layout (TRUE) or
 8 | #'  2D layout (FALSE). Default is FALSE (2D).
 9 | #' @param prefix A character string prefix to add to the layout column names.
10 | #' @param ... Additional arguments passed to the layout function.
11 | #' @inheritParams nodes
12 | #' @returns A matrix of x and y coordinates for each node in the graph.
13 | #' @export
14 | #' @examples
15 | #' set.seed(2024)
16 | #' data(eds_marfan_kg)
17 | #' g <- eds_marfan_kg |>
18 | #'      fetch_nodes(pcategory=="biolink:Disease", limit=40) |>
19 | #'      expand(predicates = "biolink:has_phenotype",
20 | #'             categories = "biolink:PhenotypicFeature")|>
21 | #'      tidygraph::sample_n(200) |>
22 | #'      expand(categories = "biolink:Gene")
23 | #' X <- layout_umap(g)
24 | #' g <- graph_centrality(g)
25 | #' plot(g, layout=X, node_size=centrality)
26 | layout_umap <- function(graph,
27 | 												use_3d = FALSE,
28 | 												prefix="UMAP",
29 | 												...){
30 | 	if(use_3d){
31 | 		fun <- utils::getFromNamespace("layout_umap_3d_impl", "igraph")
32 | 	} else {
33 | 		fun <- utils::getFromNamespace("layout_umap_impl", "igraph")
34 | 	}
35 | 	X <- fun(graph, res=matrix())
36 | 	rownames(X) <- nodes(graph)$id
37 | 	colnames(X) <- paste0(prefix, 1:ncol(X))
38 | 	return(X)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/cytoscape.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cytoscape.R
 3 | \name{cytoscape}
 4 | \alias{cytoscape}
 5 | \title{Send a graph to Cytoscape}
 6 | \usage{
 7 | cytoscape(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to visualize.}
11 | 
12 | \item{...}{Other parameters (unused).}
13 | }
14 | \value{
15 | NULL, invisibly
16 | }
17 | \description{
18 | Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels
19 | are mapped to node \code{name} (if available, otherwise they default to node \code{id}),
20 | node color is mapped to \code{pcategory}, edge color is mapped to \code{predicate},
21 | node hover-over text is set to \code{description} (if available, otherwise node \code{id}),
22 | and edge hover-over text is set to \code{predicate}. Nodes are layed out
23 | using the Kamada-Kawai method. These properties and more may be customized in
24 | the Cytoscape application. This function requires that Cytoscape is installed
25 | and running independently of the R session.
26 | }
27 | \examples{
28 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
29 | data(eds_marfan_kg)
30 | g <- eds_marfan_kg |>
31 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
32 | 	 expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |>
33 | 	 expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene"))
34 | 
35 | # Cytoscape must be installed and running
36 | cytoscape(g)
37 | \dontshow{\}) # examplesIf}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/graph_semsim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph_semsim.R
 3 | \name{graph_semsim}
 4 | \alias{graph_semsim}
 5 | \title{Add semantic similarity}
 6 | \usage{
 7 | graph_semsim(
 8 |   graph,
 9 |   fun = igraph::similarity,
10 |   col = "similarity",
11 |   nm = "id",
12 |   sparse = TRUE,
13 |   return_matrix = FALSE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{graph}{A graph object}
19 | 
20 | \item{fun}{The similarity function to use.
21 | Default is \link[igraph]{similarity}.}
22 | 
23 | \item{col}{Name of the new edge attribute to store the similarity score.}
24 | 
25 | \item{nm}{The node attribute to use as the matrix row/colnames.}
26 | 
27 | \item{sparse}{Return a sparse matrix instead of a dense matrix.}
28 | 
29 | \item{return_matrix}{Return the similarity matrix instead of the graph.}
30 | 
31 | \item{...}{Additional arguments passed to the similarity function
32 | (\code{fun}).}
33 | }
34 | \value{
35 | Graph object with similarity added as a new edge attribute.
36 | }
37 | \description{
38 | First computes semantic similarity between all pairs of nodes in a graph.
39 | Then adds the continuous similarity score as an edge attribute.
40 | }
41 | \examples{
42 | data(eds_marfan_kg)
43 | g <- eds_marfan_kg |>
44 |           fetch_nodes(query_ids = "MONDO:0007525") |>
45 |           expand(predicates = "biolink:has_phenotype",
46 |                  categories = "biolink:PhenotypicFeature")|>
47 |           expand(categories = "biolink:Gene")
48 | g <- graph_semsim(g)
49 | edges(g)$similarity
50 | }
51 | 


--------------------------------------------------------------------------------
/R/file_engine_check.R:
--------------------------------------------------------------------------------
 1 | #' Check availability of a file-based engine
 2 | #'
 3 | #' Attempts to connect to the specified file-based engine. Returns FALSE if the file is not available or not properly formatted.
 4 | #'
 5 | #' @param filename A character string indicating the path to the file-based engine.
 6 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.
 7 | #' @return TRUE if the database is available and properly formatted, FALSE otherwise.
 8 | #' @export
 9 | #' @examplesIf file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz")
10 | #' print(file_engine_check("https://kghub.io/kg-obo/sepio/2023-06-13/sepio_kgx_tsv.tar.gz"))
11 | #' print(file_engine_check("https://no-such-host.kghub.io/sepio_kgx_tsv.tar.gz"))
12 | #' print(file_engine_check(system.file("extdata", "mondo_kgx_tsv.tar.gz", package = "monarchr")))
13 | #' print(file_engine_check(system.file("extdata", "nosuch_kgx_tsv.tar.gz", package = "monarchr")))
14 | file_engine_check <- function(filename, warn = TRUE) {
15 |     # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not
16 | 	tryCatch({
17 | 		e <- file_engine(filename)
18 | 		return(TRUE)
19 | 	}, error = function(e) {
20 | 		if(warn) {
21 |             warning(e$message)
22 |         }
23 | 		return(FALSE)
24 | 	}, warning = function(e) {
25 |         if(warn) {
26 |             warning(e$message)
27 |         }
28 |         return(FALSE)
29 |     })
30 | }
31 | 


--------------------------------------------------------------------------------
/tests/testthat/test-tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("tbl_kgx constructor follows the rules", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     # first up, properly formatted node and edge dfs should work
 9 |     nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar"))
10 |     edges_df <- data.frame(subject = c("a"), object = c("b"), predicate = c("baz"))
11 |     g <- tbl_kgx(nodes_df, edges_df)
12 |     expect_s3_class(g, "tbl_kgx")
13 | 
14 |     # if the node df is missing the id column, it should fail
15 |     nodes_df <- data.frame(category = c("foo", "bar"))
16 |     edges_df <- data.frame(subject = c("a"), object = c("b"), predicate = c("baz"))
17 |     expect_error(tbl_kgx(nodes_df, edges_df))
18 | 
19 |     # if the edge df is missing the subject column, it should fail
20 |     nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar"))
21 |     edges_df <- data.frame(object = c("b"), predicate = c("baz"))
22 |     expect_error(tbl_kgx(nodes_df, edges_df))
23 | 
24 |     # if the edge df is missing the object column, it should fail
25 |     nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar"))
26 |     edges_df <- data.frame(subject = c("a"), predicate = c("baz"))
27 |     expect_error(tbl_kgx(nodes_df, edges_df))
28 | 
29 |     # if the edge df is missing the predicate column, it should fail
30 |     nodes_df <- data.frame(id = c("a", "b"), category = c("foo", "bar"))
31 |     edges_df <- data.frame(subject = c("a"), object = c("b"))
32 |     expect_error(tbl_kgx(nodes_df, edges_df))
33 | })


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "`r read.dcf('DESCRIPTION', fields = 'Package')[1]`"  
 3 | author: "`r rworkflows::use_badges(branch='main', add_hex = FALSE, add_codecov_graphs = FALSE, add_doi = '10.5281/zenodo.14553217')`"
 4 | date: "README updated: <i>`r format( Sys.Date(), '%b-%d-%Y')`</i>"
 5 | output:
 6 |   github_document
 7 | ---
 8 | 
 9 | <!-- To modify Package/Title/Description/Authors fields, edit the DESCRIPTION file -->
10 | 
11 | ```{r, echo=FALSE, include=FALSE}
12 | pkg <- read.dcf("DESCRIPTION", fields = "Package")[1]
13 | title <- read.dcf("DESCRIPTION", fields = "Title")[1]
14 | description <- read.dcf("DESCRIPTION", fields = "Description")[1]|>
15 | 	gsub(pattern="\n",replacement=" ")
16 | URL <- read.dcf('DESCRIPTION', fields = 'URL')[1]
17 | owner <- tolower(strsplit(URL,"/")[[1]][4])
18 | ```
19 | 
20 | ## `r pkg`: `r title`
21 | 
22 | ### `r description`
23 | 
24 | `monarchr` provides a tidy interface to data hosted at
25 | <https://monarchinitiative.org>, and other knowledge graphs in KGX format (e.g. those at [KGHub](https://kghub.org/)).
26 | 
27 | -   [Website](https://monarch-initiative.github.io/monarchr/)
28 | -   [Get started](https://monarch-initiative.github.io/monarchr/articles/monarchr.html)
29 | 
30 | <!-- If you use ``r pkg``, please cite:  -->
31 | 
32 | <!-- Modify this by editing the file: inst/CITATION  -->
33 | 
34 | <!-- > `r utils::citation(pkg)$textVersion` -->
35 | 
36 | Installation:
37 | 
38 | ``` r
39 | if(!require("BiocManager")) install.packages("BiocManager")
40 | 
41 | BiocManager::install("monarch-initiative/monarchr", update=FALSE)
42 | library(monarchr)
43 | ```
44 | 


--------------------------------------------------------------------------------
/man/set_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/set_engine.R, R/set_engine.tbl_kgx.R
 3 | \name{set_engine}
 4 | \alias{set_engine}
 5 | \title{Set the engine for a graph. See warning in details.}
 6 | \usage{
 7 | set_engine(g, engine)
 8 | 
 9 | set_engine(g, engine)
10 | }
11 | \arguments{
12 | \item{g}{A tbl_kgx graph.}
13 | 
14 | \item{engine}{An engine object.}
15 | }
16 | \value{
17 | A tbl_kgx graph.
18 | 
19 | A tbl_kgx graph.
20 | }
21 | \description{
22 | Sets a given graph's engine to a given engine object and returns the graph.
23 | WARNING: changing the backing engine for a graph dynamically is not
24 | yet fully supported or tested.
25 | 
26 | Sets a given graph's engine to a given engine object and returns the graph.
27 | WARNING: changing the backing engine for a graph dynamically is not
28 | yet fully supported or tested.
29 | }
30 | \examples{
31 | # Using example KGX file packaged with monarchr
32 | data(eds_marfan_kg)
33 | 
34 | g <- eds_marfan_kg |>
35 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
36 | 
37 | other_engine <- eds_marfan_kg # this could be a different file-engine (see `file_engine()`)
38 | g <- set_engine(g, other_engine)
39 | 
40 | print(get_engine(g))
41 | 
42 | # Using example KGX file packaged with monarchr
43 | data(eds_marfan_kg)
44 | 
45 | g <- eds_marfan_kg |>
46 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
47 | 
48 | other_engine <- eds_marfan_kg # this could be a different file engine (see `file_engine()`)
49 | g <- set_engine(g, other_engine)
50 | 
51 | print(get_engine(g))
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/man/cytoscape.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cytoscape.tbl_kgx.R
 3 | \name{cytoscape.tbl_kgx}
 4 | \alias{cytoscape.tbl_kgx}
 5 | \title{Send a graph to Cytoscape}
 6 | \usage{
 7 | \method{cytoscape}{tbl_kgx}(g, ...)
 8 | }
 9 | \arguments{
10 | \item{g}{A \code{tbl_kgx()} graph to visualize.}
11 | 
12 | \item{...}{other parameters passed to RCy3 functions, e.g. \code{base.url}.}
13 | }
14 | \value{
15 | NULL, invisibly
16 | }
17 | \description{
18 | Given a tbl_kgx graph, send it to Cytoscape for visualization. Node labels
19 | are mapped to node \code{name} (if available, otherwise they default to node \code{id}),
20 | node color is mapped to \code{pcategory}, edge color is mapped to \code{predicate},
21 | node hover-over text is set to \code{description} (if available, otherwise node \code{id}),
22 | and edge hover-over text is set to \code{predicate}. Nodes are layed out
23 | using the Kamada-Kawai method. These properties and more may be customized in
24 | the Cytoscape application. This function requires that Cytoscape is installed
25 | and running independently of the R session.
26 | }
27 | \examples{
28 | \dontshow{if (FALSE) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
29 | data(eds_marfan_kg)
30 | g <- eds_marfan_kg |>
31 | 	 fetch_nodes(query_ids = "MONDO:0020066") |>
32 | 	 expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |>
33 | 	 expand(categories = c("biolink:PhenotypicFeature", "biolink:Gene"))
34 | 
35 | # Cytoscape must be installed and running
36 | cytoscape(g)
37 | \dontshow{\}) # examplesIf}
38 | }
39 | 


--------------------------------------------------------------------------------
/man/transitive_reduction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/transitive_reduction.R
 3 | \name{transitive_reduction}
 4 | \alias{transitive_reduction}
 5 | \title{Compute transitive reduction over a predicate.}
 6 | \usage{
 7 | transitive_reduction(g, predicate = "biolink:subclass_of")
 8 | }
 9 | \arguments{
10 | \item{g}{The \code{tbl_kgx} graph to compute on.}
11 | 
12 | \item{predicate}{The edge predicate to reduce over.}
13 | }
14 | \value{
15 | Graph with transitive edges added.
16 | }
17 | \description{
18 | Computes the transitive reduction of a graph, treating the specified
19 | predicate as transitive.
20 | }
21 | \examples{
22 | data(eds_marfan_kg)
23 | 
24 | g <- eds_marfan_kg |> fetch_nodes(name == "Tall stature") |>
25 | 	expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |>
26 | 	bind_edges(data.frame(from = 2,
27 | 	                      to = 9,
28 | 	                      predicate = "biolink_subclass_of",
29 | 	                      primary_knowledge_source = "hand_annotated"))
30 | 
31 | plot(g, edge_color = primary_knowledge_source)
32 | 
33 | g_closed <- g |>
34 | 	transitive_closure(predicate = "biolink:subclass_of")
35 | 
36 | plot(g_closed, edge_color = primary_knowledge_source)
37 | 
38 | g_reduced <- g_closed |>
39 | 	transitive_reduction()
40 | 
41 | plot(g_reduced, edge_color = primary_knowledge_source)
42 | }
43 | \seealso{
44 | \code{\link[=transitive_closure]{transitive_closure()}}, \code{\link[=roll_up]{roll_up()}}, \code{\link[=transfer]{transfer()}}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tbl_kgx.R
 3 | \name{tbl_kgx}
 4 | \alias{tbl_kgx}
 5 | \title{Create a KGX graph object}
 6 | \usage{
 7 | tbl_kgx(nodes = NULL, edges = NULL, attach_engine = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{nodes}{A data frame containing the nodes of the graph. Must have 'id' and 'category' columns.}
11 | 
12 | \item{edges}{A data frame containing the edges of the graph. Must have 'subject', 'predicate', and 'object' columns. Can be NULL.}
13 | 
14 | \item{attach_engine}{An engine to attach to the newly created graph for use in future queries based on the graph.}
15 | 
16 | \item{...}{Additional arguments passed to the function.}
17 | }
18 | \value{
19 | A KGX graph object.
20 | }
21 | \description{
22 | This function creates a new tbl_kgx object which inherits from tidygraph::tbl_graph, from node and edge dataframes, ensuring they conform to the KGX specification
23 | described at https://github.com/biolink/kgx/blob/master/specification/kgx-format.md. Specifically, nodes must have an 'id' and 'category' column,
24 | and edges, if provided, must have 'subject', 'predicate', and 'object' columns. The function allows graphs with no edges.
25 | The function sets 'from' and 'to' columns in the edges from 'subject' and 'object' respectively, and sets the node key to 'id'.
26 | Additional columns are allowed.
27 | }
28 | \details{
29 | This function will generally be called internally.
30 | }
31 | \examples{
32 | nodes <- data.frame(id = c("A", "B"), category = c("gene", "disease"))
33 | edges <- data.frame(subject = c("A"), predicate = c("associated_with"), object = c("B"))
34 | g <- tbl_kgx(nodes, edges)
35 | }
36 | 


--------------------------------------------------------------------------------
/man/eds_marfan_kg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_example_kgs.R
 3 | \docType{data}
 4 | \name{eds_marfan_kg}
 5 | \alias{eds_marfan_kg}
 6 | \title{Example Ehlers-Danlos and Marfan Syndrome Knowledge Graph Engine}
 7 | \format{
 8 | An object of class \code{file_engine} for use with \code{fetch_nodes()}, \code{expand()}, etc.
 9 | }
10 | \usage{
11 | data(eds_marfan_kg)
12 | }
13 | \description{
14 | A small \code{file_engine()} Knowledge Graph (KG) containing Monarch Initiative
15 | data for Ehlers-Danlos Syndrome and Marfan Syndrome, including their subtypes,
16 | all entities connected to those diseases or subtypes, and all ancestors (supertypes)
17 | of all those diseases and entities. Generated 8/15/2024 via:
18 | }
19 | \details{
20 | \if{html}{\out{<div class="sourceCode">}}\preformatted{monarch_engine() |>
21 | 	fetch_nodes(query_ids = c("MONDO:0020066", "MONDO:0007947")) |>
22 | 	expand(predicates = "biolink:subclass_of", direction = "in", transitive = TRUE) |>
23 | 	expand() |>
24 | 	expand(predicates = "biolink:subclass_of", direction = "out", transitive = TRUE)
25 | }\if{html}{\out{</div>}}
26 | 
27 | This example engine may also be loaded from file via
28 | 
29 | \if{html}{\out{<div class="sourceCode">}}\preformatted{filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
30 | eds_marfan_kg <- file_engine(filename)
31 | }\if{html}{\out{</div>}}
32 | }
33 | \examples{
34 | data(eds_marfan_kg)
35 | phenos <- eds_marfan_kg |>
36 |           fetch_nodes(query_ids = "MONDO:0007525") |>
37 |           expand(predicates = "biolink:has_phenotype",
38 |                  categories = "biolink:PhenotypicFeature")
39 | }
40 | \keyword{datasets}
41 | 


--------------------------------------------------------------------------------
/R/base_engine.R:
--------------------------------------------------------------------------------
 1 | ########### Internal functions ###########
 2 | 
 3 | #' @title base_engine
 4 | #' @description A base class for all engines
 5 | #' @param name A character string indicating the name of the engine.
 6 | #' @param preferences A named list of preferences for the engine.
 7 | #' @param ... Other parameters (unused)
 8 | #' @return An object of class `base_engine`
 9 | base_engine <- function(name = "default_engine", preferences = NULL, ...) {
10 |     # read default prefs from the package
11 |     default_preferences <- options("default_prefs")$default_prefs
12 | 
13 |     if(!is.null(preferences)) {
14 |         # if preferences is a length-1 character vector ending with .yaml, and the file exists, read it
15 |         if(is.character(preferences) &&
16 |            length(preferences) == 1 &&
17 |            grepl("\\.yaml$", preferences) &&
18 |            file.exists(preferences)) {
19 |                 preferences <- yaml::read_yaml(preferences)
20 | 
21 |         # if it's a list, just use it
22 |         } else if(is.list(preferences)) {
23 |             preferences <- preferences
24 |         }
25 |     }
26 | 
27 |     # now, if preferences is still not null, we want to override the default entries
28 |     # that are provided in the preferences list
29 |     # but just those, leaving other defaults in place
30 |     if(!is.null(preferences)) {
31 |         for(p in names(preferences)) {
32 |             default_preferences[[p]] <- preferences[[p]]
33 |         }
34 |     }
35 | 
36 |     # now set preferences to the updated default preferences
37 |     preferences <- default_preferences
38 | 
39 |     obj <- list(name = name,
40 |                 preferences = preferences)
41 | 
42 |     class(obj) <- c("base_engine", class(obj))
43 |     return(obj)
44 | }
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | monarchr
 2 | ================
 3 | [![License: MIT + file
 4 | LICENSE](https://img.shields.io/badge/license-MIT%20+%20file%20LICENSE-blue.svg)](https://cran.r-project.org/web/licenses/MIT%20+%20file%20LICENSE)
 5 | [![](https://img.shields.io/badge/devel%20version-2.1.2-black.svg)](https://github.com/monarch-initiative/monarchr)
 6 | <br> [![R build
 7 | status](https://github.com/monarch-initiative/monarchr/workflows/rworkflows/badge.svg)](https://github.com/monarch-initiative/monarchr/actions)
 8 | [![](https://codecov.io/gh/monarch-initiative/monarchr/branch/main/graph/badge.svg)](https://app.codecov.io/gh/monarch-initiative/monarchr)
 9 | [![DOI](https://zenodo.org/badge/639616520.svg)](https://doi.org/10.5281/zenodo.14553217)
10 | <br>  
11 | <h4>  
12 | Authors: <i>Shawn O’Neil, Brian Schilder</i>  
13 | </h4>
14 | <h4>  
15 | README updated: <i>Jul-20-2024</i>  
16 | </h4>
17 | 
18 | <!-- To modify Package/Title/Description/Authors fields, edit the DESCRIPTION file -->
19 | 
20 | ## `monarchr`: Monarch Knowledge Graph Queries
21 | 
22 | ### R package for easy access, manipulation, and analysis of knowledge graphs.
23 | 
24 | `monarchr` provides a tidy interface to data hosted at
25 | <https://monarchinitiative.org>, and other knowledge graphs in KGX format (e.g. those at [KGHub](https://kghub.org/)).
26 | 
27 | - [Website](https://monarch-initiative.github.io/monarchr)
28 | - [Get
29 |   started](https://monarch-initiative.github.io/monarchr/articles/monarchr)
30 | 
31 | <!-- If you use `monarchr`, please cite:  -->
32 | <!-- Modify this by editing the file: inst/CITATION  -->
33 | <!-- >  -->
34 | 
35 | Installation:
36 | 
37 | ``` r
38 | if(!require("BiocManager")) install.packages("BiocManager")
39 | 
40 | BiocManager::install("monarch-initiative/monarchr", update=FALSE)
41 | library(monarchr)
42 | ```
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-kg_join.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("kg_join works", {
 5 | 
 6 |     g1 <- tbl_kgx(nodes = tibble(id = c("a", "b", "c"),
 7 |                                  category = c("gene", "gene", "disease"),
 8 |                                  source = c("g1", "g1", "g1")),
 9 |                   edges = tibble(subject = c("a", "b"),
10 |                                  predicate = c("interacts_with", "interacts_with"),
11 |                                  object = c("b", "c")))
12 | 
13 |     g2 <- tbl_kgx(nodes = tibble(id = c("c", "c", "d"),
14 |                                  category = c("disease", "disease", "gene"),
15 |                                  source = c("g2", "g2", "g2")),
16 |                   edges = tibble(subject = c("c"),
17 |                                  predicate = c("interacts_with"),
18 |                                  object = c("d")))
19 | 
20 |     res <- kg_join(g1, g2)
21 | 
22 |     # because of the duplication of c in the nodes, we should have 5 nodes
23 |     # and because of the edge between c and d, we should have 2 edges for that one
24 |     # along with the other three edges for 5 total
25 |     expect_equal(nrow(nodes(res)), 5)
26 |     expect_equal(nrow(edges(res)), 5)
27 |     expect_equal(nrow(edges(res) |> filter(subject == "c")), 2)
28 |     expect_equal(nrow(edges(res) |> filter(subject == "a")), 1)
29 |     expect_equal(nrow(edges(res) |> filter(subject == "b")), 2)
30 |     expect_equal(nrow(edges(res) |> filter(object == "b")), 1)
31 | 
32 |     filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
33 | 
34 |     res2 <- file_engine(filename) |>
35 |     	        fetch_nodes(query_ids = "MONDO:0007525")
36 | 
37 |     # there should be no edges and one node
38 |     expect_equal(nrow(nodes(res2)), 1)
39 |     expect_equal(nrow(edges(res2)), 0)
40 | 
41 | 
42 | })
43 | 


--------------------------------------------------------------------------------
/man/transfer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/transfer.R
 3 | \name{transfer}
 4 | \alias{transfer}
 5 | \title{Transfer information over edges to nodes.}
 6 | \usage{
 7 | transfer(colname = NULL, over, direction = "out")
 8 | }
 9 | \arguments{
10 | \item{colname}{The node column to transfer information from}
11 | 
12 | \item{over}{The edge predicate to transfer information over}
13 | 
14 | \item{direction}{Whether to transfer information along the predicate direction ("out") or against ("in")}
15 | }
16 | \value{
17 | Vector or list, with one entry per node.
18 | }
19 | \description{
20 | Used to 'transfer' information from nodes to other nodes across
21 | specific predicates, either in an outward direction (along the edge
22 | direction) or inward (against the edge direction). Returns a node-property
23 | column; intended to be used with mutate() on nodes.
24 | }
25 | \details{
26 | The return value will be either a list, or if the result would be a list
27 | with all length-1 or length-0 elements, a vector with 0-length elements
28 | replaced by NA. Practically, this results in a list when necessary and a vector
29 | otherwise.
30 | }
31 | \examples{
32 | data(eds_marfan_kg)
33 | 
34 | engine |> eds_marfan_kg |>
35 | 	expand(categories = "biolink:Disease") |>
36 | 	activate(nodes) |>
37 | 	mutate(caused_by_genes =
38 | 	         transfer(name, over = "biolink:causes", direction = "out")) |>
39 | 	mutate(causes_diseases =
40 | 	         transfer(name, over = "biolink:causes", direction = "in")) |>
41 | 	plot.tbl_kgx(node_label = paste(name,
42 | 																	" caused by: ", caused_by_genes,
43 | 																	" causes: ", causes_diseases),
44 | 							 label_size = 3)
45 | 
46 | }
47 | \seealso{
48 | \code{\link[=roll_up]{roll_up()}}, \link{transitive_closure}, \code{\link[=descendants]{descendants()}}, \code{\link[=ancestors]{ancestors()}}
49 | }
50 | 


--------------------------------------------------------------------------------
/R/summarize_neighborhood.R:
--------------------------------------------------------------------------------
 1 | #' Summarize neighborhood
 2 | #'
 3 | #' Summarizes the neighborhood of the nodes of a given graph. Specifically,
 4 | #' letting $N$ be the set of nodes in the given graph, this function retrieves
 5 | #' counts of relationship predicates (or node categories) of nodes connected to
 6 | #' $N$ but not in $N$. This can be useful to examine the scale and scope of a
 7 | #' graph's collective neighborhood in the larger KG.
 8 | #'
 9 | #' Note that the number of relationships returned may be larger than the number
10 | #' of nodes they connect to; use summarize = "edges" to see edge counts between
11 | #' nodes of different categories, and summarize = "nodes" to see counts of connected
12 | #' node categories.
13 | #'
14 | #' Additionally, when using `summarize = "edges"`, the summary will include
15 | #' edges that may already be present in the query graph.
16 | #'
17 | #' It is also possible to specify the direction of edges to include in the
18 | #' neighborhood, using the direction parameter. The default is "both", which
19 | #' includes both incoming and outgoing edges.
20 | #'
21 | #'
22 | #' @param graph A query graph to summarize the surrounding neighborhood for
23 | #' @param engine (Optional) An engine to use. If not provided, the graph's most recent engine is used.
24 | #' @param direction The direction of edges to include in the neighborhood
25 | #' @param summarize Whether to summarize edges or nodes (default "edges")
26 | #'
27 | #' @return A tbl_kgx graph
28 | #'
29 | #' @export
30 | #' @examplesIf monarch_engine_check()
31 | #' monarch_search("fanconi anemia", limit = 5) |>
32 | #'   summarize_neighborhood(direction = "both", summarize = "edges")
33 | #' @import tidygraph
34 | #' @import dplyr
35 | #' @importFrom assertthat assert_that
36 | summarize_neighborhood <- function(graph, engine = NULL, direction = "both", summarize = "edges") {
37 |     UseMethod("summarize_neighborhood")
38 | }
39 | 


--------------------------------------------------------------------------------
/R/graph_semsim.R:
--------------------------------------------------------------------------------
 1 | #' Add semantic similarity
 2 | #'
 3 | #' First computes semantic similarity between all pairs of nodes in a graph.
 4 | #' Then adds the continuous similarity score as an edge attribute.
 5 | #' @param fun The similarity function to use.
 6 | #'  Default is \link[igraph]{similarity}.
 7 | #' @param col Name of the new edge attribute to store the similarity score.
 8 | #' @param nm The node attribute to use as the matrix row/colnames.
 9 | #' @param return_matrix Return the similarity matrix instead of the graph.
10 | #' @param sparse Return a sparse matrix instead of a dense matrix.
11 | #' @param ... Additional arguments passed to the similarity function
12 | #'  (\code{fun}).
13 | #' @import tidygraph
14 | #' @import dplyr
15 | #' @inheritParams nodes
16 | #' @returns Graph object with similarity added as a new edge attribute.
17 | #' @export
18 | #' @examples
19 | #' data(eds_marfan_kg)
20 | #' g <- eds_marfan_kg |>
21 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
22 | #'           expand(predicates = "biolink:has_phenotype",
23 | #'                  categories = "biolink:PhenotypicFeature")|>
24 | #'           expand(categories = "biolink:Gene")
25 | #' g <- graph_semsim(g)
26 | #' edges(g)$similarity
27 | graph_semsim <- function(graph,
28 | 												 fun=igraph::similarity,
29 | 												 col="similarity",
30 | 												 nm="id",
31 | 												 sparse=TRUE,
32 | 												 return_matrix=FALSE,
33 | 												 ...){
34 | 	active_tbl <- active(graph)
35 | 	from <- to <- NULL;
36 | 	message("Computing pairwise node similarity.")
37 | 	X <- fun(graph, ...)
38 | 	if(sparse) {
39 | 		requireNamespace("Matrix")
40 | 		X <- Matrix::Matrix(X, sparse=TRUE)
41 | 	}
42 | 	rownames(X) <- colnames(X) <- nodes(graph)[[nm]]
43 | 	if(return_matrix) return(X)
44 | 
45 | 	graph <- graph|>
46 | 		activate(edges)|>
47 | 		dplyr::mutate(!!col:=purrr::map2_dbl(from, to, ~ X[.y, .x])) |>
48 | 		activate(!!rlang::sym(active_tbl))
49 | 
50 | 	return(graph)
51 | }
52 | 


--------------------------------------------------------------------------------
/R/neo4j_engine_check.R:
--------------------------------------------------------------------------------
 1 | #' Check if a neo4j database is available and properly formatted
 2 | #' 
 3 | #' Attempts to connect to the specified Neo4J database and run a query to see if it is properly formatted. Returns FALSE if the database is not available or not properly formatted.
 4 | #' 
 5 | #' @param url A character string indicating the URL of the neo4j database.
 6 | #' @param username A character string indicating the username for the neo4j database (if needed).
 7 | #' @param password A character string indicating the password for the neo4j database (if needed).
 8 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.
 9 | #' 
10 | #' @return TRUE if the database is available and properly formatted, FALSE otherwise.
11 | #' @export
12 | #' @examples
13 | #' print(neo4j_engine_check("https://neo4j.monarchinitiative.org"))
14 | #' print(neo4j_engine_check("https://no-such-db.monarchinitiative.org"))
15 | #' 
16 | neo4j_engine_check <- function(url,
17 | 						       username = NA,
18 | 						       password = NA,
19 | 							   warn = TRUE) {
20 | 	# this will throw an error if it cannot connect:
21 | 	# graph_conn <- neo2R::startGraph(url, username = username, password = password)
22 | 
23 |     # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not
24 | 	tryCatch({
25 | 		e <- neo4j_engine(url = url, username = username, password = password)
26 | 		# check to see if we can run a query, we'll just grab one random node; this should fail 
27 | 		# if something is wrong (e.g. the database is not in KGX format)
28 | 		cypher_query(e, "MATCH (n)-[r]->(q) RETURN n, r, q LIMIT 1")
29 | 		return(TRUE)
30 | 	}, error = function(e) {
31 | 		if(warn) {
32 | 			warning(e$message)
33 | 		}
34 | 		return(FALSE)
35 | 	}, warning = function(e) {
36 | 		if(warn) {
37 | 			warning(e$message)
38 | 		}
39 | 		return(FALSE)
40 | 	})
41 | }
42 | 


--------------------------------------------------------------------------------
/R/kg_join.R:
--------------------------------------------------------------------------------
 1 | #' Join two KGX graphs by their nodes and edges.
 2 | #'
 3 | #' Given two KGX graphs, returns a new KGX graph that is the union of the two input graphs,
 4 | #' with any edges between nodes repeated for aother nodes with the same subject and object `id`.
 5 | #' The engine of the first graph is used for the new graph.
 6 | #'
 7 | #' This function first computes new node and edge data, by taking the full natural join of
 8 | #' node and edge data from the two input graphs, and then keeping unique rows. Note that nodes with
 9 | #' the same `id` that differ in any shared column are effectively kept as separate, taken
10 | #' to represent the same entity in different contexts. (However, a node with an additional property will be
11 | #' merged with a node without that property, as defined by the natural join.) In these
12 | #' cases, any edge that connects to one of these nodes is also valid for the other node, and so the
13 | #' method repeats edges across nodes with the same `id`.
14 | #'
15 | #'
16 | #' @param graph1 A `tbl_kgx()` graph.
17 | #' @param graph2 A `tbl_kgx()` graph.
18 | #' @param ... Other parameters (not used)
19 | #'
20 | #' @return A `tbl_kgx()` graph
21 | #' @export
22 | #' @examples
23 | #' ## Using example KGX file packaged with monarchr
24 | #' data(eds_marfan_kg)
25 | #'
26 | #' eds_and_phenos <- eds_marfan_kg |>
27 | #'                   fetch_nodes(query_ids = "MONDO:0007525") |>
28 | #'                   expand(predicates = "biolink:has_phenotype",
29 | #'                          categories = "biolink:PhenotypicFeature")
30 | #'
31 | #' marfan_and_phenos <- eds_marfan_kg |>
32 | #'                      fetch_nodes(query_ids = "MONDO:0007947") |>
33 | #'                      expand(predicates = "biolink:has_phenotype",
34 | #'                             categories = "biolink:PhenotypicFeature")
35 | #'
36 | #' combined <- kg_join(eds_and_phenos, marfan_and_phenos)
37 | #' print(combined)
38 | kg_join <- function(graph1, graph2, ...) {
39 |   UseMethod("kg_join")
40 | }
41 | 


--------------------------------------------------------------------------------
/man/file_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/file_engine.R
 3 | \name{file_engine}
 4 | \alias{file_engine}
 5 | \title{Create a knowledge graph engine object from a KGX-based tsv file}
 6 | \usage{
 7 | file_engine(filename, preferences = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{filename}{A character string indicating the filename or URL of the KGX-based tsv file.}
11 | 
12 | \item{preferences}{A named list of preferences for the engine.}
13 | 
14 | \item{...}{Additional arguments (unused).}
15 | }
16 | \value{
17 | An object of class \code{file_engine}
18 | }
19 | \description{
20 | Creates a knowledge graph engine backed by a KGX-based tab-separated file. This must be a filename or URL to a \code{.tar.gz} file containing a \verb{*_nodes.tsv} and \verb{*_edges.tsv} file. If a URL is provided, the file will be downloaded to the user's current working directory.
21 | }
22 | \details{
23 | Engines store preference information specifying how data are fetched and manipulated; for example,
24 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"),
25 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is
26 | stored in the engine's preferences. A default set of preferences is stored in the package for use with KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md),
27 | but these can be overridden by the user.
28 | }
29 | \examples{
30 | library(tidygraph)
31 | library(dplyr)
32 | 
33 | # Using example KGX .tar.gz file packaged with monarchr
34 | filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
35 | engine <- file_engine(filename)
36 | 
37 | res <- engine |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947"))
38 | print(res)
39 | 
40 | }
41 | \seealso{
42 | \code{neo4j_engine()}, \code{monarch_engine()}
43 | }
44 | 


--------------------------------------------------------------------------------
/man/kg_join.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kg_join.R
 3 | \name{kg_join}
 4 | \alias{kg_join}
 5 | \title{Join two KGX graphs by their nodes and edges.}
 6 | \usage{
 7 | kg_join(graph1, graph2, ...)
 8 | }
 9 | \arguments{
10 | \item{graph1}{A \code{tbl_kgx()} graph.}
11 | 
12 | \item{graph2}{A \code{tbl_kgx()} graph.}
13 | 
14 | \item{...}{Other parameters (not used)}
15 | }
16 | \value{
17 | A \code{tbl_kgx()} graph
18 | }
19 | \description{
20 | Given two KGX graphs, returns a new KGX graph that is the union of the two input graphs,
21 | with any edges between nodes repeated for aother nodes with the same subject and object \code{id}.
22 | The engine of the first graph is used for the new graph.
23 | }
24 | \details{
25 | This function first computes new node and edge data, by taking the full natural join of
26 | node and edge data from the two input graphs, and then keeping unique rows. Note that nodes with
27 | the same \code{id} that differ in any shared column are effectively kept as separate, taken
28 | to represent the same entity in different contexts. (However, a node with an additional property will be
29 | merged with a node without that property, as defined by the natural join.) In these
30 | cases, any edge that connects to one of these nodes is also valid for the other node, and so the
31 | method repeats edges across nodes with the same \code{id}.
32 | }
33 | \examples{
34 | ## Using example KGX file packaged with monarchr
35 | data(eds_marfan_kg)
36 | 
37 | eds_and_phenos <- eds_marfan_kg |>
38 |                   fetch_nodes(query_ids = "MONDO:0007525") |>
39 |                   expand(predicates = "biolink:has_phenotype",
40 |                          categories = "biolink:PhenotypicFeature")
41 | 
42 | marfan_and_phenos <- eds_marfan_kg |>
43 |                      fetch_nodes(query_ids = "MONDO:0007947") |>
44 |                      expand(predicates = "biolink:has_phenotype",
45 |                             categories = "biolink:PhenotypicFeature")
46 | 
47 | combined <- kg_join(eds_and_phenos, marfan_and_phenos)
48 | print(combined)
49 | }
50 | 


--------------------------------------------------------------------------------
/man/summarize_neighborhood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_neighborhood.R
 3 | \name{summarize_neighborhood}
 4 | \alias{summarize_neighborhood}
 5 | \title{Summarize neighborhood}
 6 | \usage{
 7 | summarize_neighborhood(
 8 |   graph,
 9 |   engine = NULL,
10 |   direction = "both",
11 |   summarize = "edges"
12 | )
13 | }
14 | \arguments{
15 | \item{graph}{A query graph to summarize the surrounding neighborhood for}
16 | 
17 | \item{engine}{(Optional) An engine to use. If not provided, the graph's most recent engine is used.}
18 | 
19 | \item{direction}{The direction of edges to include in the neighborhood}
20 | 
21 | \item{summarize}{Whether to summarize edges or nodes (default "edges")}
22 | }
23 | \value{
24 | A tbl_kgx graph
25 | }
26 | \description{
27 | Summarizes the neighborhood of the nodes of a given graph. Specifically,
28 | letting $N$ be the set of nodes in the given graph, this function retrieves
29 | counts of relationship predicates (or node categories) of nodes connected to
30 | $N$ but not in $N$. This can be useful to examine the scale and scope of a
31 | graph's collective neighborhood in the larger KG.
32 | }
33 | \details{
34 | Note that the number of relationships returned may be larger than the number
35 | of nodes they connect to; use summarize = "edges" to see edge counts between
36 | nodes of different categories, and summarize = "nodes" to see counts of connected
37 | node categories.
38 | 
39 | Additionally, when using \code{summarize = "edges"}, the summary will include
40 | edges that may already be present in the query graph.
41 | 
42 | It is also possible to specify the direction of edges to include in the
43 | neighborhood, using the direction parameter. The default is "both", which
44 | includes both incoming and outgoing edges.
45 | }
46 | \examples{
47 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
48 | monarch_search("fanconi anemia", limit = 5) |>
49 |   summarize_neighborhood(direction = "both", summarize = "edges")
50 | \dontshow{\}) # examplesIf}
51 | }
52 | 


--------------------------------------------------------------------------------
/man/expand_n.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/expand_n.R
 3 | \name{expand_n}
 4 | \alias{expand_n}
 5 | \title{Iteratively fetch additional knowledge graph edges connected to a query graph}
 6 | \usage{
 7 | expand_n(
 8 |   graph,
 9 |   return_each = FALSE,
10 |   direction = "both",
11 |   predicates = NULL,
12 |   categories = NULL,
13 |   transitive = NULL,
14 |   n = 1,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{graph}{A query \code{tbl_kgx()} graph ot query with.}
20 | 
21 | \item{return_each}{If TRUE, return a list of graphs for each iteration.
22 | If FALSE, return the final graph with all expanded edges.}
23 | 
24 | \item{direction}{The direction of associations to fetch. Can be "in", "out", or "both". Default is "both".}
25 | 
26 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.}
27 | 
28 | \item{categories}{A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.}
29 | 
30 | \item{transitive}{NULL (not used in this function).}
31 | 
32 | \item{n}{Number of expansion iterations to run.}
33 | 
34 | \item{...}{Other parameters passed to methods.}
35 | }
36 | \value{
37 | A \code{tbl_kgx()} graph
38 | }
39 | \description{
40 | Given an initialized \link{tbl_kgx} graph, iteratively expand the graph
41 | \code{n} iterations using certain predicates/categories.
42 | Arguments can either be a single value or a list of values.
43 | If an argument is provided as a list, its length must be equal to the number
44 | of iterations (\code{n}).
45 | }
46 | \examples{
47 | ## Using example KGX file packaged with monarchr
48 | data(eds_marfan_kg)
49 | g <- eds_marfan_kg |>
50 |      fetch_nodes(query_ids = "MONDO:0007525") |>
51 |      expand(predicates = "biolink:has_phenotype",
52 |             categories = "biolink:PhenotypicFeature")
53 | 
54 | g_expanded <- g |>
55 |               expand_n(predicates = "biolink:subclass_of", n=3)
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-file_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("we can load the example KG w/ data()", {
 5 | 	data(eds_marfan_kg, envir = environment())
 6 | 
 7 | 	e <- eds_marfan_kg
 8 | 
 9 | 	# repeating tests from test-fetch_nodes.file_engine.R
10 | 	g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "MONDO:0007526"))
11 | 
12 | 	# ensure that the last_engine attribute is set
13 | 	expect_true(has_attr(g, "last_engine"))
14 | 	# and that it has the right class
15 | 	expect_true(inherits(attr(g, "last_engine"), "file_engine"))
16 | 
17 | 	nodes_df <- g %>% activate(nodes) %>% as.data.frame()
18 | 	# there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635,
19 | 	# but we can't gaurantee the order
20 | 	expect_equal(nrow(nodes_df), 2)
21 | 	expect_true(all(nodes_df$id %in% c("MONDO:0007525", "MONDO:0007526")))
22 | 
23 | 	# there should be no edges
24 | 	edges_df <- g %>% activate(edges) %>% as.data.frame()
25 | 	expect_equal(nrow(edges_df), 0)
26 | })
27 | 
28 | 
29 | test_that("we can load data from url with file_engine", {
30 | 		filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
31 | 
32 |     e <- file_engine(filename)
33 | 
34 |     # the result should have class file_engine
35 |     expect_true(inherits(e, "file_engine"))
36 |     # and a filename attribute
37 |     expect_true("filename" %in% names(e))
38 |     # and a graph attribute
39 |     expect_true("graph" %in% names(e))
40 | 
41 |     # the graph attribute should be a tbl_kgx
42 |     expect_true(inherits(e$graph, "tbl_kgx"))
43 | 
44 |     # the nodes df should have a description field for this test of type character
45 |     nodes_df <- nodes(e$graph)
46 |     expect_true("description" %in% names(nodes_df))
47 | 
48 |     # the edges df should have a knowledge_source field for this test of type character
49 |     edges_df <- edges(e$graph)
50 |     expect_true("knowledge_source" %in% names(edges_df))
51 | 
52 |     # the nodes df category should be a list column
53 |     expect_true(is.list(nodes_df$category))
54 | 
55 |     # description should not be a list col
56 |     expect_true(!is.list(nodes_df$description))
57 | })
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fetch_nodes.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | 
 5 | test_that("fetch_nodes neo4j works with basid id query", {
 6 |     #testthat::skip("temporary skip")
 7 | 
 8 |     e <- monarch_engine()
 9 | 
10 |     # fetch_nodes(id %in% c("MONDO:0007525", "HGNC:4635")) should result in an error
11 |     # do so silently in the logs...
12 |     g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "HGNC:4635"))
13 | 
14 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
15 |     # there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635,
16 |     # but we can't gaurantee the order
17 |     expect_contains(2 + -1:2, nrow(nodes_df))
18 |     expect_true(all(nodes_df$id %in% c("MONDO:0007525", "HGNC:4635")))
19 | 
20 |     # there should be no edges
21 |     edges_df <- g %>% activate(edges) %>% as.data.frame()
22 |     expect_equal(nrow(edges_df), 0)
23 | })
24 | 
25 | test_that("fetch_nodes neo4j works with complex query syntax", {
26 |     e <- monarch_engine()
27 |     g <- e %>% fetch_nodes(id == "MONDO:0007525")
28 | 
29 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
30 |     expect_equal(nrow(nodes_df), 1)
31 |     expect_equal(nodes_df$id, "MONDO:0007525")
32 | 
33 |     # check to see that we can chain the fetch_nodes function with other functions
34 |     g <- e %>%
35 |       fetch_nodes(id == "MONDO:0007525") %>%
36 |       expand(categories = "biolink:Gene")
37 | 
38 |     # this result should have 3 nodes and 3 edges
39 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
40 |     expect_contains(3 + -2:2, nrow(nodes_df))
41 | 
42 |     edges_df <- g %>% activate(edges) %>% as.data.frame()
43 |     expect_contains(3 + -2:2, nrow(edges_df))
44 | })
45 | 
46 | test_that("fetch_nodes limit works with neo4j_engine", {
47 | 	e <- monarch_engine()
48 | 	expect_warning(
49 | 	g <- e %>% fetch_nodes(in_taxon_label == "Homo sapiens", limit = 10))
50 | 
51 | 	nodes_df <- g %>% activate(nodes) %>% as.data.frame()
52 | 	expect_contains(10 + -2:2, nrow(nodes_df))
53 | 
54 | 	e <- monarch_engine()
55 | 	expect_warning(
56 | 	g <- e %>% fetch_nodes(in_taxon_label == "Homo sapiens", limit = 5))
57 | 
58 | 	nodes_df <- g %>% activate(nodes) %>% as.data.frame()
59 | 	expect_contains(5 + -1:2, nrow(nodes_df))
60 | })
61 | 


--------------------------------------------------------------------------------
/R/monarch_engine_check.R:
--------------------------------------------------------------------------------
 1 | #' Check availability of Monarch Initiative API
 2 | #' 
 3 | #' Attempts to connect to the Monarch Initiative API and use the specified functionality. Returns FALSE if the API is not available the result is not as expected.
 4 | #' 
 5 | #' @param warn A logical indicating whether to print a warning message if with failure information if the database is not available or not properly formatted. Default is TRUE.
 6 | #' @param service The service to check: "search", "semsim", "graph", or a vector of these. Default is "graph".
 7 | #' @return TRUE if the available features are online, FALSE otherwise.
 8 | #' @export
 9 | #' @examples
10 | #' print(monarch_engine_check())
11 | #' 
12 | monarch_engine_check <- function(warn = TRUE, service = "graph") {
13 | 	# this will throw an error if it cannot connect:
14 | 	# graph_conn <- neo2R::startGraph(url, username = username, password = password)
15 | 
16 |     # use try to see if we can successfully create a connection; return TRUE if successful, FALSE if not
17 | 	tryCatch({
18 |         if("graph" %in% service) {
19 |     		e <- monarch_engine()
20 |     		# check to see if we can run a query, we'll just grab one random node; this should fail 
21 |     		# if something is wrong (e.g. the database is not in KGX format)
22 |     		cypher_query(e, "MATCH (n)-[r]->(q) RETURN n, r, q LIMIT 1")
23 |     		return(TRUE)
24 |         } else if("search" %in% service) {
25 |             cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5)
26 |             return(TRUE)
27 |         } else if("semsim" %in% service) {
28 |             phenos1 <- tbl_kgx(nodes = data.frame(id = c("HP:0000001", "HP:0000002", "HP:0000003")))
29 |             phenos2 <- tbl_kgx(nodes = data.frame(id = c("HP:0000004", "HP:0000005", "HP:0000006")))
30 |             semsim <- monarch_semsim(phenos1, phenos2)
31 |             return(TRUE)
32 |         } else {
33 |             warning("Service must be one of 'search', 'semsim', or 'graph'")
34 |             return(FALSE)
35 |         }
36 | 	}, error = function(e) {
37 |         if(warn) {
38 |             warning(e$message)
39 |         }
40 | 		return(FALSE)
41 | 	}, warning = function(e) {
42 |         if(warn) {
43 |             warning(e$message)
44 |         }
45 |         return(FALSE)
46 |     })
47 | }


--------------------------------------------------------------------------------
/man/plot.tbl_kgx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.tbl_kgx.R
 3 | \name{plot.tbl_kgx}
 4 | \alias{plot.tbl_kgx}
 5 | \title{Specialized \code{plot()} function for KGX graphs}
 6 | \usage{
 7 | \method{plot}{tbl_kgx}(
 8 |   g,
 9 |   ...,
10 |   layout = "auto",
11 |   node_color = pcategory,
12 |   node_shape = namespace,
13 |   edge_color = predicate,
14 |   edge_linetype = primary_knowledge_source,
15 |   node_label = name,
16 |   plot_ids = FALSE,
17 |   label_size = 2,
18 |   fan_strength = 2,
19 |   edge_alpha = 0.9,
20 |   node_alpha = 0.9
21 | )
22 | }
23 | \arguments{
24 | \item{g}{A \link{tbl_kgx} graph.}
25 | 
26 | \item{...}{
27 |   Arguments passed on to \code{\link[ggraph:ggraph]{ggraph::ggraph}}
28 |   \describe{
29 |     \item{\code{graph}}{The object containing the graph. See \emph{Details} for a list
30 | of supported classes. Or a \code{layout_ggraph} object as returned from
31 | \code{create_layout} in which case all subsequent arguments is ignored.}
32 |   }}
33 | 
34 | \item{layout}{The layout to use for the plot. Default is "auto" as used by \code{ggraph}.}
35 | 
36 | \item{node_color}{The column to use for node color. Default is "pcategory".}
37 | 
38 | \item{node_shape}{The column to use for node shape Default is "namespace".}
39 | 
40 | \item{edge_color}{The column to use for edge color. Default is "predicate".}
41 | 
42 | \item{edge_linetype}{The column to use for edge line type. Default is "primary_knowledge_source".}
43 | 
44 | \item{node_label}{The column to use for node labels. Defaults to "name".}
45 | 
46 | \item{plot_ids}{Whether to show node IDs in node labels. Defaults to FALSE.}
47 | 
48 | \item{label_size}{Size of node label text. Default is 2.}
49 | 
50 | \item{fan_strength}{Fan strength in ggraph's geom_edge_fan, Default is 2.}
51 | 
52 | \item{edge_alpha}{Alpha value for edges, default 0.9.}
53 | 
54 | \item{node_alpha}{Alpha value for nodes, default 0.9.}
55 | }
56 | \description{
57 | Specialized \code{plot()} function for KGX graphs
58 | }
59 | \examples{
60 | data(eds_marfan_kg)
61 | g <- eds_marfan_kg |>
62 |      fetch_nodes(query_ids = "MONDO:0007525") |>
63 |      expand(predicates = "biolink:has_phenotype",
64 |             categories = "biolink:PhenotypicFeature")|>
65 |      expand(categories = "biolink:Gene")
66 | plot(g)
67 | }
68 | 


--------------------------------------------------------------------------------
/R/transitive_reduction.R:
--------------------------------------------------------------------------------
 1 | #' Compute transitive reduction over a predicate.
 2 | #'
 3 | #' Computes the transitive reduction of a graph, treating the specified
 4 | #' predicate as transitive.
 5 | #'
 6 | #' @return Graph with transitive edges added.
 7 | #' @seealso [transitive_closure()], [roll_up()], [transfer()], [descendants()], [ancestors()]
 8 | #' @param g The `tbl_kgx` graph to compute on.
 9 | #' @param predicate The edge predicate to reduce over.
10 | #'
11 | #' @examples
12 | #' data(eds_marfan_kg)
13 | #'
14 | #' g <- eds_marfan_kg |> fetch_nodes(name == "Tall stature") |>
15 | #' 	expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |>
16 | #' 	bind_edges(data.frame(from = 2,
17 | #' 	                      to = 9,
18 | #' 	                      predicate = "biolink_subclass_of",
19 | #' 	                      primary_knowledge_source = "hand_annotated"))
20 | #'
21 | #' plot(g, edge_color = primary_knowledge_source)
22 | #'
23 | #' g_closed <- g |>
24 | #' 	transitive_closure(predicate = "biolink:subclass_of")
25 | #'
26 | #' plot(g_closed, edge_color = primary_knowledge_source)
27 | #'
28 | #' g_reduced <- g_closed |>
29 | #' 	transitive_reduction()
30 | #'
31 | #' plot(g_reduced, edge_color = primary_knowledge_source)
32 | #' @import tidygraph
33 | #' @import dplyr
34 | #' @importFrom sets as.set
35 | #' @importFrom relations endorelation
36 | #' @importFrom relations relation_incidence
37 | #' @export
38 | transitive_reduction <- function(g, predicate = "biolink:subclass_of") {
39 | 	# first we make a copy
40 | 	active_tbl <- active(g)
41 | 	g2 <- g
42 | 
43 | 	# in the original, remove the predicate edges
44 | 	g <- g |>
45 | 		activate(edges) |>
46 | 		filter(predicate != predicate)
47 | 
48 | 	df <- g2 |> activate(edges) |> as.data.frame()
49 | 	r <- endorelation(
50 | 		domain = lapply(unique(unlist(df[c("from", "to")])), sets::as.set),
51 | 		graph = df[c("from", "to")]
52 | 	)
53 | 	mat <- relation_incidence(relations::transitive_reduction(r))
54 | 
55 | 	keep_edges <- which(mat == 1, arr.ind = TRUE) |>
56 | 		as.data.frame() |>
57 | 		rename(from = row, to = col)
58 | 
59 | 	g_reduced <- g2 |>
60 | 		activate(edges) |>
61 | 		semi_join(keep_edges, by = c("from", "to"))
62 | 
63 | 	# merge the original w g_reduced, adding back just the reduction edges
64 | 	suppressMessages(g <- kg_join(g, g_reduced), classes = "message") # suppress joining info
65 | 	g <- g |> activate(!!rlang::sym(active_tbl))
66 | 
67 | 	return(g)
68 | }
69 | 


--------------------------------------------------------------------------------
/R/expand.R:
--------------------------------------------------------------------------------
 1 | #' Fetch additional knowledge graph edges connected to a query graph
 2 | #'
 3 | #' Given an optional KG engine (e.g. a `file_engine()`,
 4 | #' `neo4j_engine()`, or `monarch_engine()`) and a query `tbl_kgx()` graph, fetches additional nodes and edges
 5 | #' from the KG, expanding the query graph according to specific criteria. If the first parameter is an engine, that
 6 | #' engine is used; if the first parameter is a query graph, the most recent engine associated with the graph is used.
 7 | #'
 8 | #'
 9 | #' @param graph A query `tbl_kgx()` graph ot query with.
10 | #' @param engine (Optional) An engine to use for fetching query graph edges. If not provided, the graph's most recent engine is used.
11 | #' @param direction The direction of associations to fetch. Can be "in", "out", or "both". Default is "both".
12 | #' @param predicates A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.
13 | #' @param categories A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.
14 | #' @param transitive If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like `biolink:subclass_of`.
15 | #' @param ... Other parameters passed to methods.
16 | #'
17 | #' @return A `tbl_kgx()` graph
18 | #' @export
19 | #' @examplesIf monarch_engine_check()
20 | #' ## Using Monarch (hosted)
21 | #' phenos <- monarch_engine() |>
22 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
23 | #'           expand(predicates = "biolink:has_phenotype",
24 | #'                  categories = "biolink:PhenotypicFeature")
25 | #'
26 | #' print(phenos)
27 | #'
28 | #'
29 | #'
30 | #' @examples
31 | #' ## Using example KGX file packaged with monarchr
32 | #' data(eds_marfan_kg)
33 | #' phenos <- eds_marfan_kg |>
34 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
35 | #'           expand(predicates = "biolink:has_phenotype",
36 | #'                  categories = "biolink:PhenotypicFeature")
37 | #'
38 | #' print(phenos)
39 | #' @import tidygraph
40 | #' @import dplyr
41 | #' @importFrom assertthat assert_that
42 | expand <- function(graph,
43 |   								 engine = NULL,
44 | 									 direction = "both",
45 | 									 predicates = NULL,
46 | 									 categories = NULL,
47 | 									 transitive = FALSE,
48 | 
49 | 									 					 ...) {
50 | 	UseMethod("expand")
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-monarch_semsim.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | # g1 does not have a disease node, so in the result the target EDS should not be matched by any query
 5 | test_that("monarch_semsim works", {
 6 |     suppressWarnings({
 7 |         g1 <- monarch_engine() |> 
 8 |            fetch_nodes(query_ids = "MONDO:0007947") |> # Marfan syndrome
 9 |            expand(categories = "biolink:PhenotypicFeature", limit = 5) |>
10 |            activate(nodes) |>
11 |            filter(pcategory == "biolink:PhenotypicFeature") |>
12 |            mutate(source = "g1")
13 | 
14 |     # MONDO:0007522 EDS classic type
15 |         g2 <- monarch_engine() |> 
16 |            fetch_nodes(query_ids = "MONDO:0007522") |> # EDS classic type
17 |            expand(categories = "biolink:PhenotypicFeature", limit = 5) |>
18 |            activate(nodes) |>
19 |            mutate(source = "g2")
20 |      })
21 | 
22 |     sim <- monarch_semsim(g1, g2)
23 | 
24 |     # the number of edges in the result should equal the number of nodes in the query graph
25 |     expect_equal(sim |> activate(edges) |> nrow(), g1 |> activate(nodes) |> nrow())
26 |     # the edges subject should be the same as the query graph nodes
27 |     expect_equal(sim |> activate(edges) |> pull(subject) |> sort(), g1 |> activate(nodes) |> pull(id) |> sort())
28 |     # the "biolink:Disease" node in g2 should not be matched by any query
29 |     expect_equal(sum(edges(sim)$object == "MONDO:0007522"), 0)
30 | 
31 |     # now lets rerun the query, including the reverse and keeping all nodes
32 |     # using jaccard similarity for flavor
33 |     sim <- monarch_semsim(g1, g2, metric = "jaccard_similarity", include_reverse = TRUE, keep_unmatched = TRUE)
34 | 
35 |     # all query and target nodes will be subjects exactly once in the resulting edges
36 |     query_target_node_ids <- c(g1 |> activate(nodes) |> pull(id), g2 |> activate(nodes) |> pull(id))
37 |     expect_equal(sim |> activate(edges) |> pull(subject) |> sort(), query_target_node_ids |> sort())
38 | 
39 |     # the number of edges in the result should equal the number of nodes in the query graph plus the number of nodes in the target graph
40 |     expect_equal(edges(sim) |> nrow(), length(query_target_node_ids))
41 | 
42 |     # make sure the engine of the result is the same as the query graph
43 |     expect_equal(sim |> get_engine(), g1 |> get_engine())
44 | 
45 |     # test plot
46 |     #plot(sim |> graph_join(g1) |> graph_join(g2), node_color = paste(source, pcategory))
47 |     #print(sim)
48 | })


--------------------------------------------------------------------------------
/R/kg_edge_weights.R:
--------------------------------------------------------------------------------
 1 | #' Knowledge Graph edge weights
 2 | #'
 3 | #' Compute edge weights for the given \link{tbl_kgx} graph using
 4 | #' several pieces of categorical, ordinal, and continuous metadata.
 5 | #' @param fun Function to compute edge weights with across the
 6 | #' numerically encoded attributes. Default is \link{rowSums}.
 7 | #' @param normalise Normalise each encoding from 0-1 by dividing by the
 8 | #' maximum value. Default is \code{TRUE}.
 9 | #' @param encodings A list of named lists of encoding values for
10 | #' different edge attributes.
11 | #' @inheritParams nodes
12 | #' @import tidygraph
13 | #' @import dplyr
14 | #' @export
15 | #' @examples
16 | #' data(eds_marfan_kg)
17 | #' g <- eds_marfan_kg |>
18 | #'      fetch_nodes(query_ids = "MONDO:0007525") |>
19 | #'      expand(predicates = "biolink:has_phenotype",
20 | #'             categories = "biolink:PhenotypicFeature")|>
21 | #'      expand(categories = "biolink:Gene")
22 | #' g2 <- kg_edge_weights(g)
23 | #' edges(g2)$weight
24 | kg_edge_weights <- function(graph,
25 | 														normalise=TRUE,
26 | 														encodings=monarch_edge_weight_encodings(),
27 | 														fun=function(x){rowSums(x, na.rm = TRUE)}
28 | 														){
29 | 	active_tbl <- active(graph)
30 | 	encoded_cols <- c()
31 | 	for(key in names(encodings)){
32 | 		nm_encoded <- paste0(key,"_encoded")
33 | 		val <- encodings[[key]]
34 | 		if(is.null(val)){
35 | 			next
36 | 		}
37 | 		if(is.numeric(val)){
38 | 			# message(key,": numeric")
39 | 			encoded_cols <- c(encoded_cols, nm_encoded)
40 | 			graph <- graph|>
41 | 				activate(edges)|>
42 | 				mutate(!!nm_encoded:=edges(graph)[[key]])
43 | 			next
44 | 		}
45 | 		if(is.function(val)){
46 | 			# message(key,": function")
47 | 			encoded_cols <- c(encoded_cols, nm_encoded)
48 | 			graph <- graph|>
49 | 				activate(edges)|>
50 | 				mutate(!!nm_encoded:=val(!!key))
51 | 
52 | 			next
53 | 		}
54 | 		if(is.list(val)){
55 | 			# message(key,": list")
56 | 			encoded_cols <- c(encoded_cols, nm_encoded)
57 | 			graph <- graph|>
58 | 				activate(edges)|>
59 | 				mutate(!!nm_encoded:=ifelse(!!key %in% names(val), val[!!key], 0))
60 | 			next
61 | 		}
62 | 	}
63 | 	## normalise within each col
64 | 	if(normalise){
65 | 		graph <- graph|>
66 | 			activate(edges)|>
67 | 			mutate(
68 | 				across(all_of(encoded_cols),
69 | 							 ~(min(.x, na.rm = TRUE)) / (max(.x, na.rm = TRUE))
70 | 				)) |>
71 | 			activate(!!rlang::sym(active_tbl))
72 | 	}
73 | 	igraph::E(graph)$weight <- fun(edges(graph)[,unique(encoded_cols)])
74 | 	return(graph)
75 | }
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("neo4j_engine caching", {
 5 |   e <- neo4j_engine(url = c("http://neo4j.monarchinitiative.org:7474",
 6 |   													"http://neo4j.monarchinitiative.org",
 7 |   													"https://neo4j.monarchinitiative.org",
 8 |   													"http://no.such.url"),
 9 |   									cache = TRUE)
10 | 
11 |   		start_time <- Sys.time()
12 | 
13 |       g <- fetch_nodes(e, query_ids = "MONDO:0020066")
14 | 
15 |       test <- g %>% expand(direction = "in",
16 |                            predicates = "biolink:subclass_of",
17 |                            transitive = TRUE)
18 | 
19 |       end_time <- Sys.time()
20 |       no_cache_elapsed_time <- as.numeric(difftime(end_time, start_time, units = "secs"))
21 | 
22 |       start_time <- Sys.time()
23 | 
24 |     	test <- g %>% expand(direction = "in",
25 |     										 predicates = "biolink:subclass_of",
26 |     										 transitive = TRUE)
27 | 
28 |       end_time <- Sys.time()
29 |       cache_elapsed_time <- as.numeric(difftime(end_time, start_time, units = "secs"))
30 | 
31 |       # the cached version should be at least 10x faster
32 |       expect_true(cache_elapsed_time * 10 < no_cache_elapsed_time)
33 | })
34 | 
35 | 
36 | test_that("neo4j_engine works as expected (using monarch neo4j db)", {
37 |     #testthat::skip("temporary skip")
38 | 
39 |     # one of these should work
40 |     e <- neo4j_engine(url = c("http://neo4j.monarchinitiative.org",
41 |     													"https://neo4j.monarchinitiative.org",
42 |                               "http://no.such.url",
43 |                               "http://neo4j.monarchinitiative.org:7474"))
44 | 
45 |     g <- fetch_nodes(e, query_ids = "MONDO:0006043")
46 |     # this should have 6 subtypes (two direct, four under one of the direct children)
47 |     subtypes <- g %>% expand(direction = "in",
48 |                                   predicates = "biolink:subclass_of",
49 |                                   transitive = TRUE)
50 | 
51 |     nodes_df <- subtypes %>% activate(nodes) %>% as.data.frame()
52 |     edges_df <- subtypes %>% activate(edges) %>% as.data.frame()
53 |     expect_contains(7 + -2:2, nrow(nodes_df))
54 |     expect_contains(6 + -2:2, nrow(edges_df))
55 | 
56 |     # there should be a pcategory col of type character
57 |     expect_true("pcategory" %in% names(nodes_df))
58 |     expect_true(is.character(nodes_df$pcategory))
59 | 
60 |     # there should be a category col of type list
61 |     expect_true("category" %in% names(nodes_df))
62 |     expect_true(is.list(nodes_df$category))
63 | })
64 | 


--------------------------------------------------------------------------------
/man/expand.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/expand.R
 3 | \name{expand}
 4 | \alias{expand}
 5 | \title{Fetch additional knowledge graph edges connected to a query graph}
 6 | \usage{
 7 | expand(
 8 |   graph,
 9 |   engine = NULL,
10 |   direction = "both",
11 |   predicates = NULL,
12 |   categories = NULL,
13 |   transitive = FALSE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{graph}{A query \code{tbl_kgx()} graph ot query with.}
19 | 
20 | \item{engine}{(Optional) An engine to use for fetching query graph edges. If not provided, the graph's most recent engine is used.}
21 | 
22 | \item{direction}{The direction of associations to fetch. Can be "in", "out", or "both". Default is "both".}
23 | 
24 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the neighborhood. If NULL (default), all edges are considered.}
25 | 
26 | \item{categories}{A vector of node categories, indicating which nodes in the larger KG may be fetched. If NULL (default), all nodes in the larger KG are will be fetched.}
27 | 
28 | \item{transitive}{If TRUE, include transitive closure of the neighborhood. Default is FALSE. Useful in combination with predicates like \code{biolink:subclass_of}.}
29 | 
30 | \item{...}{Other parameters passed to methods.}
31 | }
32 | \value{
33 | A \code{tbl_kgx()} graph
34 | }
35 | \description{
36 | Given an optional KG engine (e.g. a \code{file_engine()},
37 | \code{neo4j_engine()}, or \code{monarch_engine()}) and a query \code{tbl_kgx()} graph, fetches additional nodes and edges
38 | from the KG, expanding the query graph according to specific criteria. If the first parameter is an engine, that
39 | engine is used; if the first parameter is a query graph, the most recent engine associated with the graph is used.
40 | }
41 | \examples{
42 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
43 | ## Using Monarch (hosted)
44 | phenos <- monarch_engine() |>
45 |           fetch_nodes(query_ids = "MONDO:0007525") |>
46 |           expand(predicates = "biolink:has_phenotype",
47 |                  categories = "biolink:PhenotypicFeature")
48 | 
49 | print(phenos)
50 | 
51 | 
52 | \dontshow{\}) # examplesIf}
53 | ## Using example KGX file packaged with monarchr
54 | data(eds_marfan_kg)
55 | phenos <- eds_marfan_kg |>
56 |           fetch_nodes(query_ids = "MONDO:0007525") |>
57 |           expand(predicates = "biolink:has_phenotype",
58 |                  categories = "biolink:PhenotypicFeature")
59 | 
60 | print(phenos)
61 | }
62 | 


--------------------------------------------------------------------------------
/R/example_graph.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | #' Return an example set of nodes from a KG engine.
 2 | #'
 3 | #' Given a KGX Neo4j KG engine, returns a graph representing the diversity
 4 | #' of node categories and edge predicates for browsing. The returned graph is guaranteed to
 5 | #' contain at least one node of every category, and at least one edge of every
 6 | #' predicate. No other guarantees are made: the example graph is not minimal
 7 | #' to satisfy these criteria, it is not random or even pseudo-random, and it
 8 | #' may not be connected.
 9 | #'
10 | #' @param engine A `neo4j_engine` object
11 | #' @param ... Other parameters (not used)
12 | #' @return A tbl_kgx graph
13 | #' @export
14 | #' @examplesIf monarch_engine_check()
15 | #' # Retrieve and print an example graph:
16 | #' g <- monarch_engine() |> example_graph()
17 | #' print(g)
18 | #' @import tidygraph
19 | #' @import dplyr
20 | example_graph.neo4j_engine <- function(engine, ...) {
21 | 	# first, let's discover the different edge types (predicates) available from the schema info
22 | 	pred_types_query <- "CALL db.schema.visualization() YIELD relationships
23 |                        UNWIND relationships AS rel
24 |                        RETURN DISTINCT type(rel) AS predicate"
25 | 	pred_types <- cypher_query_df(engine, pred_types_query)
26 | 
27 | 	# next we get a bunch of edges of the different predicate types as a graph
28 | 	sample_preds_query <- paste0("MATCH (a)-[r:`", pred_types$predicate, "`]->(b) RETURN a, b, r LIMIT 1")
29 | 	sample_preds_graph <- cypher_query(engine, query = sample_preds_query)
30 | 
31 | 	# this might not represent all categories however.
32 | 
33 | 	# So we compute the categories that are represented thus far
34 | 	used_categories <- sample_preds_graph |>
35 | 		activate(nodes) |>
36 | 		as.data.frame() |>
37 | 		pull(category) |>
38 | 		unlist() |>
39 | 		unique()
40 | 
41 | 	# get the available categories from the schema
42 | 	categories_query <- "CALL db.labels() YIELD label RETURN DISTINCT label"
43 | 	all_node_categories <- cypher_query_df(engine, categories_query)$label
44 | 
45 | 	# compute the node categories that are still needed
46 | 	needed_categories <- setdiff(all_node_categories, used_categories)
47 | 
48 | 	# now sample nodes of those categories, and an arbitrary connection
49 | 	sample_cats_query <- paste0("MATCH (a:`", needed_categories, "`) -[r]- (b) RETURN a, r, b LIMIT 1")
50 | 	sample_new_cats <- cypher_query(engine, query = sample_cats_query)
51 | 
52 | 	# finally, we join the two and return
53 | 	suppressMessages(full_sample <- kg_join(sample_preds_graph, sample_new_cats), classes = "message")
54 | 
55 | 	return(full_sample)
56 | }
57 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fetch_nodes.file-engine.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(assertthat)
 3 | 
 4 | test_that("fetch_nodes file_engine works with basid id query", {
 5 |     #testthat::skip("temporary skip")
 6 | 
 7 | 		filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
 8 | 		e <- file_engine(filename)
 9 | 
10 |     g <- fetch_nodes(e, query_ids = c("MONDO:0007525", "MONDO:0007526"))
11 | 
12 |     # ensure that the last_engine attribute is set
13 |     expect_true(has_attr(g, "last_engine"))
14 |     # and that it has the right class
15 |     expect_true(inherits(attr(g, "last_engine"), "file_engine"))
16 | 
17 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
18 |     # there should be an id column with 2 entries: MONDO:0007525 and HGNC:4635,
19 |     # but we can't gaurantee the order
20 |     expect_equal(nrow(nodes_df), 2)
21 |     expect_true(all(nodes_df$id %in% c("MONDO:0007525", "MONDO:0007526")))
22 | 
23 |     # there should be no edges
24 |     edges_df <- g %>% activate(edges) %>% as.data.frame()
25 |     expect_equal(nrow(edges_df), 0)
26 | })
27 | 
28 | test_that("fetch_nodes file_engine works with complex query syntax", {
29 | 		filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
30 | 		e <- file_engine(filename)
31 | 
32 |     # fetch_nodes(id %in% c("MONDO:0007525", "MONDO:0007526")) actually does work with file_engine
33 |     g <- e %>% fetch_nodes(id %in% c("MONDO:0007525", "MONDO:0007526"))
34 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
35 |     expect_equal(nrow(nodes_df), 2)
36 | 
37 |     # basic single-id fetch
38 |     g <- e %>% fetch_nodes(id == "MONDO:0007525")
39 | 
40 |     nodes_df <- g %>% activate(nodes) %>% as.data.frame()
41 |     expect_equal(nrow(nodes_df), 1)
42 |     expect_equal(nodes_df$id, "MONDO:0007525")
43 | 
44 |     # test a big fetch
45 |     g <- e %>% fetch_nodes("biolink:Disease" %in_list% category | "biolink:Gene" %in_list% category)
46 |     nodes_df1 <- g %>% activate(nodes) %>% as.data.frame()
47 |     expect_equal(nrow(nodes_df1), 133)
48 | })
49 | 
50 | test_that("fetch_nodes limit works with file_engine", {
51 | 	filename <- system.file("extdata", "eds_marfan_kg.tar.gz", package = "monarchr")
52 | 
53 | 	e <- file_engine(filename)
54 | 	g <- e %>% fetch_nodes("biolink:Disease" %in_list% category, limit = 10)
55 | 
56 | 	nodes_df <- g %>% activate(nodes) %>% as.data.frame()
57 | 	expect_equal(nrow(nodes_df), 10)
58 | 
59 | 	e <- file_engine(filename)
60 | 	g <- e %>% fetch_nodes("biolink:Disease" %in_list% category, limit = 5)
61 | 
62 | 	nodes_df <- g %>% activate(nodes) %>% as.data.frame()
63 | 	expect_equal(nrow(nodes_df), 5)
64 | })
65 | 
66 | 


--------------------------------------------------------------------------------
/R/monarch_search.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Search for KG nodes using the Monarch Initiative search API
 3 | #'
 4 | #' This function is a wrapper around the Monarch-hosted
 5 | #' [search API](https://api.monarchinitiative.org/v3/docs#/search/search_v3_api_search_get).
 6 | #' It returns nodes (no edges) from the Monarch KG, fetched via an instance of `monarch_engine()`.
 7 | #'
 8 | #' @param query Search query string, e.g. "Cystic fibrosis"
 9 | #' @param category A set of node category labels to limit the search to, e.g. c("biolink:Disease", "biolink:Gene")
10 | #' @param limit Maximum number of nodes to return. Default 10.
11 | #' @param ... Parameters passed to monarch_engine().
12 | #' @return A local tbl_kgx graph with no edges.
13 | #' @export
14 | #' @import tidygraph
15 | #' @import dplyr
16 | #' @importFrom assertthat assert_that
17 | #' @importFrom httr GET content http_status
18 | #' @examplesIf monarch_engine_check()
19 | #' cf_hits <- monarch_search("Cystic fibrosis", category = "biolink:Disease", limit = 5)
20 | #' print(cf_hits)
21 | #'
22 | monarch_search <- function(query,
23 |                            category = NULL,
24 |                            limit = 10,
25 |                            ...) {
26 | 
27 | 		engine <- monarch_engine(...)
28 |     api_url <- paste0(engine$preferences$monarch_api_url, "/search")
29 | 
30 |     # ensure that the limit is not null and is a length-1 integer <= 500
31 |     assert_that(!is.null(limit), is.numeric(limit), limit <= 500, msg = "limit must be a length-1 integer <= 500 for search_nodes.monarch_engine()")
32 | 
33 |     params <- list(
34 |         "q" = query,
35 |         "limit" = limit,
36 |         "offset" = 0
37 |     )
38 | 
39 |     if(!is.null(category)) {
40 |         params$category <- category
41 |     }
42 | 
43 |     # put the httr::GET call in a trycatch block to handle errors
44 |     response <- GET(api_url, query = flatten_body_for_httr(params))
45 | 
46 |     # if the response is not 200, throw an error
47 |     if(response$status_code != 200) {
48 |         stop(paste0("Error: ", response$status_code, " ", http_status(response$status_code)$message))
49 |     }
50 | 
51 |     response_content <- content(response, "parsed")
52 |     total_available <- response_content$total
53 | 
54 |     ids <- unlist(lapply(response_content$items, function(item) {
55 |         item$id
56 |     }))
57 | 
58 |     if(length(ids) == 0) {
59 |         # return an empty graph
60 |         return(tbl_kgx(nodes = data.frame(id = character(), category = list()))
61 |         )
62 |     }
63 |     if(length(ids) == 1) {
64 |         ids <- list(ids)
65 |     }
66 | 
67 |     g <- cypher_query(engine, query = "MATCH (n) WHERE n.id IN $ids RETURN n",
68 |                      parameters = list(ids = ids))
69 | 
70 |     return(g)
71 | }
72 | 


--------------------------------------------------------------------------------
/R/expand_n.R:
--------------------------------------------------------------------------------
 1 | #' Iteratively fetch additional knowledge graph edges connected to a query graph
 2 | #'
 3 | #' Given an initialized \link{tbl_kgx} graph, iteratively expand the graph
 4 | #' \code{n} iterations using certain predicates/categories.
 5 | #' Arguments can either be a single value or a list of values.
 6 | #' If an argument is provided as a list, its length must be equal to the number
 7 | #' of iterations (\code{n}).
 8 | #' @param return_each If TRUE, return a list of graphs for each iteration.
 9 | #' If FALSE, return the final graph with all expanded edges.
10 | #' @param n Number of expansion iterations to run.
11 | #' @inheritParams expand
12 | #' @param transitive NULL (not used in this function).
13 | #'
14 | #' @return
15 | #' A `tbl_kgx()` graph
16 | #' @export
17 | #' @examples
18 | #' ## Using example KGX file packaged with monarchr
19 | #' data(eds_marfan_kg)
20 | #' g <- eds_marfan_kg |>
21 | #'      fetch_nodes(query_ids = "MONDO:0007525") |>
22 | #'      expand(predicates = "biolink:has_phenotype",
23 | #'             categories = "biolink:PhenotypicFeature")
24 | #'
25 | #' g_expanded <- g |>
26 | #'               expand_n(predicates = "biolink:subclass_of", n=3)
27 | #' @import tidygraph
28 | #' @import dplyr
29 | expand_n <- function(graph,
30 | 										 return_each = FALSE,
31 | 										 direction = "both",
32 | 										 predicates = NULL,
33 | 										 categories = NULL,
34 | 										 transitive = NULL,
35 | 										 n=1,
36 | 										 ...) {
37 | 	## Check args
38 | 	## Check args
39 | 	check_len <- function(arg,n,i){
40 | 		if(is.list(arg)){
41 | 			if(length(arg) != n){
42 | 				stop(paste("When provided a list, arguments must be equal to n."))
43 | 			}
44 | 			return(arg[[i]])
45 | 		}else{
46 | 			return(arg)
47 | 		}
48 | 	}
49 | 	if(!is.null(transitive)) {
50 | 		warning("Arguments to expand_n() are passed on to expand(), except for transitive which is set to NULL. Ignoring provided setting for transitive in expand_n().")
51 | 	}
52 | 
53 | 	## Expand graph
54 | 	message(paste(
55 | 		"Initial graph size:",
56 | 		nrow(nodes(graph)),"nodes ||",nrow(edges(graph)),"edges"
57 | 	))
58 | 	if(return_each) graph_list <- list(iteration0=graph)
59 | 
60 | 	for(i in 1:n){
61 | 		message("Expanding graph: iteration ",i,"/",n)
62 | 		graph <- expand(graph = graph,
63 | 										direction = check_len(direction,n,i),
64 | 										predicates = check_len(predicates,n,i),
65 | 										categories = check_len(categories,n,i),
66 | 										transitive = FALSE,
67 | 										...)
68 | 		if(return_each) graph_list[[paste0("iteration",i)]] <- graph
69 | 		message(paste(
70 | 			"Graph size:",
71 | 			nrow(nodes(graph)),"nodes ||",nrow(edges(graph)),"edges"
72 | 		))
73 | 	}
74 | 	if(return_each){
75 | 		return(graph_list)
76 | 	} else {
77 | 		return(graph)
78 | 	}
79 | }
80 | 


--------------------------------------------------------------------------------
/R/save_kgx.R:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Save a graph as a KGX-formatted .tar.gz file.
 3 | #'
 4 | #' Given a graph, saves it using the tabular KGX format
 5 | #' (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md) for later
 6 | #' use with `load_kgx()` (or even backing an engine with `file_engine()`). Note that if
 7 | #' any engine is associated with the graph it is not saved.
 8 | #'
 9 | #' @param graph A `tbl_kgx` graph to save.
10 | #' @param filename File to save the graph to. Must end in .tar.gz.
11 | #' @param ... Other parameters (unused)
12 | #' @importFrom readr write_tsv
13 | #' @importFrom archive archive_write_files
14 | #' @return The input graph (invisibly).
15 | #' @export
16 | #' @examplesIf monarch_engine_check()
17 | #' phenos <- monarch_engine() |>
18 | #'           fetch_nodes(query_ids = "MONDO:0007525") |>
19 | #'           expand(predicates = "biolink:has_phenotype",
20 | #'                  categories = "biolink:PhenotypicFeature")
21 | #'
22 | #' save_kgx(phenos, "phenos.tar.gz")
23 | #'
24 | #' # when loading the graph, we can optionally attach an engine
25 | #' loaded_phenos <- load_kgx("phenos.tar.gz", attach_engine = monarch_engine())
26 | #'
27 | #' loaded_phenos
28 | #'
29 | #' # cleanup saved file
30 | #' file.remove("phenos.tar.gz")
31 | save_kgx <- function(graph, filename = "saved_kgx_graph.tar.gz", ...) {
32 | 	# ensure that files is a .tar.gz
33 | 	if(!grepl(".tar.gz$", filename)) {
34 | 		stop("Filename must end in .tar.gz")
35 | 	}
36 | 
37 | 	node_df <- nodes(graph)
38 | 	edge_df <- edges(graph)
39 | 
40 | 	node_df <- node_df[,colnames(node_df)[colnames(node_df) != "pcategory"]]
41 | 
42 | 	pipe_format <- function(df) {
43 | 		df |>
44 | 			# list columns need to be converted to character cols, with |-separated entries
45 | 			lapply(function(col) {
46 | 				if(is.list(col)) {
47 | 					# both empty and NA values should be empty strings
48 | 					fix <- col |> lapply(function(vec) {
49 | 						if(length(vec) == 0) {
50 | 							""
51 | 						} else {
52 | 							vec[is.na(vec)] <- ""
53 | 							paste0(vec, collapse = "|")
54 | 						}
55 | 					})
56 | 					# str(fix)
57 | 					fix |> as.character()
58 | 				} else {
59 | 					col[is.na(col)] <- ""
60 | 					as.character(col)
61 | 				}
62 | 			}) |>
63 | 			as.data.frame()
64 | 	}
65 | 
66 | 	node_df <- pipe_format(node_df)
67 | 	edge_df <- pipe_format(edge_df)
68 | 
69 | 	basename <- stringr::str_replace(filename, ".tar.gz", "")
70 | 
71 | 	node_df_file <- paste0(basename, "_nodes.tsv")
72 | 	edge_df_file <- paste0(basename, "_edges.tsv")
73 | 
74 | 	write_tsv(node_df, node_df_file, col_names = TRUE)
75 | 	write_tsv(edge_df, edge_df_file, col_names = TRUE)
76 | 
77 | 	archive_write_files(filename, c(node_df_file, edge_df_file))
78 | 
79 | 	file.remove(node_df_file)
80 | 	file.remove(edge_df_file)
81 | 
82 | 	return(invisible(graph))
83 | }
84 | 


--------------------------------------------------------------------------------
/R/fetch_nodes.R:
--------------------------------------------------------------------------------
 1 | #' Fetch nodes from a graph using a set of IDs or conditions
 2 | #'
 3 | #' This function fetches nodes (and no edges) from a knowledge graph engine based on a set of
 4 | #' conditions or a set of identifiers. If query_ids is provided, the function
 5 | #' will fetch nodes with the specified identifiers. If query_ids is NULL, the
 6 | #' function will fetch nodes based on the conditions provided. Only a limited
 7 | #' set of condition expressions are supported, see details.
 8 | #'
 9 | #' @details
10 | #' If query_ids is provided, the function will fetch nodes with the specified.
11 | #' If query_ids is NULL, the function will fetch nodes based on a condition
12 | #' expression. The following features are supported:
13 | #'
14 | #' - Matching node properties with boolean operators, e.g. `in_taxon_label == "Homo sapiens"`.
15 | #' - Matching multi-valued properties with `%in_list%`, e.g. `"biolink:Gene" %in_list% category`. NOTE: using `%in_list%` against vector queries, e.g. `in_taxon_label %in_list% c("Homo sapiens", "Mus musculus")` is *not* supported. Nor does `%in_list%` support multi-valued left hand sides; `c("biolink:Disease", "biolink:Gene") %in_list% category` will not work.
16 | #' - Boolean connectives with `|`, `&`, and `!`, e.g. `in_taxon_label == "Homo sapiens" | "biolink:Gene" %in_list% category`.
17 | #'
18 | #' If more than one condition parameter is specified, they are combined with `&`; for example,
19 | #' `fetch_nodes(engine, in_taxon_label == "Homo sapiens", "biolink:Gene" %in_list% category)` is equivalent to
20 | #' `fetch_nodes(engine, in_taxon_label == "Homo sapiens" & "biolink:Gene" %in_list% category)`.
21 | #' @param engine A graph engine object
22 | #' @param ... A set of conditions identifying the nodes to fetch, only used if query_ids is NULL
23 | #' @param query_ids A character vector of identifiers to fetch
24 | #' @param limit An integer specifying the maximum number of nodes to fetch. Default to NULL, no limit.
25 | #' @return A tbl_kgx object containing the nodes
26 | #'
27 | #' @examples
28 | #' library(tidygraph)
29 | #' library(dplyr)
30 | #'
31 | #' @examplesIf monarch_engine_check()
32 | #' monarch_engine() |>
33 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
34 | #'
35 | #' # a large query
36 | #' monarch_engine() |>
37 | #'   fetch_nodes("biolink:Disease" %in_list% category)
38 | #'
39 | #' @examples
40 | #' # file_engine supports the same features as neo4j_engine
41 | #' # (using the example KGX file packaged with monarchr)
42 | #' data(eds_marfan_kg)
43 | #'
44 | #' eds_marfan_kg |>
45 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
46 | #'
47 | #' # grab all Homo sapiens genes
48 | #' eds_marfan_kg |>
49 | #'   fetch_nodes(in_taxon_label == "Homo sapiens" & "biolink:Gene" %in_list% category)
50 | #'
51 | #' @export
52 | fetch_nodes <- function(engine, ..., query_ids = NULL, limit = NULL) {
53 |     UseMethod("fetch_nodes")
54 | }
55 | 


--------------------------------------------------------------------------------
/man/rolling.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rollup.R
 3 | \name{rolling}
 4 | \alias{rolling}
 5 | \alias{roll_up}
 6 | \alias{roll_down}
 7 | \title{Roll node data up (along) or down (against) transitive edges. Use with \code{mutate()}}
 8 | \usage{
 9 | roll_up(
10 |   column = NULL,
11 |   fun = c,
12 |   include_self = TRUE,
13 |   predicates = "biolink:subclass_of",
14 |   ...
15 | )
16 | 
17 | roll_down(
18 |   column = NULL,
19 |   fun = c,
20 |   include_self = TRUE,
21 |   predicates = "biolink:subclass_of",
22 |   ...
23 | )
24 | }
25 | \arguments{
26 | \item{column}{The node column to draw rollup or rolldown information from.}
27 | 
28 | \item{fun}{The aggregation function to use when rolling up or down. Default is \code{c}}
29 | 
30 | \item{include_self}{Whether to include each nodes' value in \code{column} in the rollup/rolldown for that node.}
31 | 
32 | \item{predicates}{A vector of relationship predicates (nodes in g are subjects in the KG), indicating which edges to consider in the rollup/rolldown. Should be transitive; default \code{biolink:subclass_of}}
33 | 
34 | \item{...}{Other parameters (unused)}
35 | }
36 | \value{
37 | Vector or list, with one entry per node.
38 | }
39 | \description{
40 | This function computes, for each node, an aggregated set of data from all
41 | descendant (for roll-ups) or ancestor (for roll-downs) nodes defined by
42 | specified edge predicates. Designed for use with \code{mutate()}
43 | on node data, for each node N, the specified \code{fun} is called
44 | on the node table \code{column} filtered to nodes that can reach (be reached by) N over
45 | \code{predicates} edges. If \code{include_self} is true, N itself is included.
46 | }
47 | \details{
48 | Note that path counts and order are not considered; rollups (rolldowns)
49 | collect information from all descendant (ancestor) nodes as a set.
50 | 
51 | The return value will be either a list, or if the result would be a list
52 | with all length-1 or length-0 elements, a vector with 0-length elements
53 | replaced by NA. Practically, this results in a list when necessary and a vector
54 | otherwise.
55 | }
56 | \examples{
57 | data(eds_marfan_kg)
58 | 
59 | eds_marfan_kg |>
60 |   fetch_nodes(name == "Tall stature" | name == "Short stature") |>
61 |   # get 2 levels of ancestors
62 |   expand_n(predicates = "biolink:subclass_of", direction = "out", n = 2) |>
63 |   activate(nodes) |>
64 |   # random count value per node
65 |   mutate(count = rpois(graph_order(), 1.5)) |>
66 |   # apply sum to descendant (and self) values
67 |   mutate(sum_count = roll_up(count, fun = sum, include_self = TRUE)) |>
68 |   plot(node_label = paste(name, " count: ", count, "sum_count: ", sum_count))
69 | 
70 | }
71 | \seealso{
72 | \code{\link[=roll_down]{roll_down()}}, \code{\link[=descendants]{descendants()}}, [ancestors(), \code{\link[=transfer]{transfer()}}, \code{\link[=transitive_closure]{transitive_closure()}}]
73 | }
74 | 


--------------------------------------------------------------------------------
/R/monarch_engine.R:
--------------------------------------------------------------------------------
 1 | #' Create a knowledge graph engine object backed by the public Monarch Neo4j instance
 2 | #'
 3 | #' Creates a knowledge graph engine backed by the publicly hosted Monarch Neo4j database, used to fetch nodes and edges from the database as local
 4 | #' graph objects.
 5 | #'
 6 | #' Engines store preference information specifying how data are fetched and manipulated; for example,
 7 | #' while node `category` is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"),
 8 | #' typically a single category is used to represent the node in a graph, and is returned as the nodes' `pcategory`. A preference list of categories to use for `pcategory` is
 9 | #' stored in the engine's preferences. A default set of preferences is stored in the package for use with this and other KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md),
10 | #' but these can be overridden by the user.
11 | #'
12 | #' The `monarch_engine()` overrides `search_nodes()` to use the Monarch search API, so setting `node_search_properties` in the preferences will not affect the search behavior. To use regex-based searching with the Monarch Neo4j instance, use `neo4j_engine()` instead and specify the Monarch Neo4j URL (https://neo4j.monarchinitiative.org).
13 | #'
14 | #' @param url (Optional) May be specified to override the default Monarch Neo4j URL. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried.
15 | #' @param api_url (Optional) May be specified to override the default Monarch API URL (specifying the location of the `/search` endpoint used by `search_nodes()`).
16 | #' @param preferences A named list of preferences for the engine.
17 | #' @param timeout Number of seconds to wait before trying the next url.
18 | #' @param ... Additional arguments passed to `neo2R::startGraph()`.
19 | #' @seealso `file_engine()`, `neo4j_engine()`
20 | #' @return An object of class `monarch_engine`
21 | #' @export
22 | #' @examplesIf monarch_engine_check()
23 | #' library(tidygraph)
24 | #' library(dplyr)
25 | #'
26 | #' monarch <- monarch_engine()
27 | #' res <- monarch |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947"))
28 | #' print(res)
29 | #'
30 | monarch_engine <- function(url = c("https://neo4j.monarchinitiative.org",
31 | 																	 "http://neo4j.monarchinitiative.org",
32 | 																	 "https://neo4j.monarchinitiative.org:7473",
33 | 																	 "http://neo4j.monarchinitiative.org:7473"),
34 |                            api_url = "https://api.monarchinitiative.org/v3/api",
35 |                            preferences = NULL,
36 | 													 timeout = 2,
37 |                            ...) {
38 |   e <- neo4j_engine(url = url, preferences = preferences, timeout = timeout, ...)
39 |   e$preferences$monarch_api_url <- api_url
40 |   class(e) <- c("monarch_engine", class(e))
41 |   return(e)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/monarch_semsim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monarch_semsim.R
 3 | \name{monarch_semsim}
 4 | \alias{monarch_semsim}
 5 | \title{Semantic similarity mapping between two graphs}
 6 | \usage{
 7 | monarch_semsim(
 8 |   query_graph,
 9 |   target_graph,
10 |   metric = "ancestor_information_content",
11 |   include_reverse = FALSE,
12 |   keep_unmatched = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{query_graph}{A tbl_kgx graph.}
17 | 
18 | \item{target_graph}{A tbl_kgx graph.}
19 | 
20 | \item{metric}{The semantic similarity metric to use. Default is \code{"ancestor_information_content"}. Also available are \code{"jaccard_similarity"} and \code{"phenodigm_score"}.}
21 | 
22 | \item{include_reverse}{Whether to include the best matches from the target graph to the query graph. Default is \code{FALSE}.}
23 | 
24 | \item{keep_unmatched}{Whether to keep nodes in the target graph that do not have a match. Default is \code{FALSE}.}
25 | }
26 | \value{
27 | A tbl_kgx graph with \code{"computed:best_matches"} edges between the nodes of the two input graphs and columns for \code{monarch_semsim_metric}, \code{monarch_semsim_score}, and \code{monarch_semsim_ancestor_id}.
28 | }
29 | \description{
30 | This function calls the Monarch-hosted semantic similarity API to compare two
31 | graphs, via the same endpoints as the Monarch Phenotype Explorer:
32 | https://monarchinitiative.org/explore#phenotype-explorer.
33 | }
34 | \details{
35 | The API returns the best matches between the nodes of the two graphs, based on
36 | a specified knowledge-graph-based metric: the default is \code{"ancestor_information_content"},
37 | also available are \code{"jaccard_similarity"} and \code{"phenodigm_score"}. The result is
38 | returned as a graph, with \code{"computed:best_matches"} edges between the nodes of the two input graphs.
39 | 
40 | By default, the function only returns the best matches from the first graph to the second graph, and
41 | removes any nodes that do not have a match. If \code{include_reverse = TRUE}, the function also returns
42 | the best matches from the second graph to the first graph.
43 | 
44 | The engine attached to the return graph is that of the query.
45 | }
46 | \examples{
47 | \dontshow{if (monarch_engine_check(service = "semsim")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
48 | 
49 | g1 <- monarch_engine() |>
50 |   fetch_nodes(query_ids = "MONDO:0007947") |>
51 |   expand(categories = "biolink:PhenotypicFeature")
52 | 
53 | g2 <- monarch_engine() |>
54 |   fetch_nodes(query_ids = "MONDO:0007522") |>
55 |   expand(categories = "biolink:PhenotypicFeature")
56 | 
57 | sim <- monarch_semsim(g1, g2)
58 | print(sim)
59 | 
60 | # also inclue the unmatched targets
61 | sim <- monarch_semsim(g1, g2, keep_unmatched = TRUE)
62 | print(sim)
63 | 
64 | # inclue reverse matches
65 | sim <- monarch_semsim(g1, g2, include_reverse = TRUE)
66 | print(sim)
67 | \dontshow{\}) # examplesIf}
68 | }
69 | 


--------------------------------------------------------------------------------
/man/monarch_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monarch_engine.R
 3 | \name{monarch_engine}
 4 | \alias{monarch_engine}
 5 | \title{Create a knowledge graph engine object backed by the public Monarch Neo4j instance}
 6 | \usage{
 7 | monarch_engine(
 8 |   url = c("https://neo4j.monarchinitiative.org", "http://neo4j.monarchinitiative.org",
 9 |     "https://neo4j.monarchinitiative.org:7473",
10 |     "http://neo4j.monarchinitiative.org:7473"),
11 |   api_url = "https://api.monarchinitiative.org/v3/api",
12 |   preferences = NULL,
13 |   timeout = 2,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{url}{(Optional) May be specified to override the default Monarch Neo4j URL. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried.}
19 | 
20 | \item{api_url}{(Optional) May be specified to override the default Monarch API URL (specifying the location of the \verb{/search} endpoint used by \code{search_nodes()}).}
21 | 
22 | \item{preferences}{A named list of preferences for the engine.}
23 | 
24 | \item{timeout}{Number of seconds to wait before trying the next url.}
25 | 
26 | \item{...}{Additional arguments passed to \code{neo2R::startGraph()}.}
27 | }
28 | \value{
29 | An object of class \code{monarch_engine}
30 | }
31 | \description{
32 | Creates a knowledge graph engine backed by the publicly hosted Monarch Neo4j database, used to fetch nodes and edges from the database as local
33 | graph objects.
34 | }
35 | \details{
36 | Engines store preference information specifying how data are fetched and manipulated; for example,
37 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"),
38 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is
39 | stored in the engine's preferences. A default set of preferences is stored in the package for use with this and other KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md),
40 | but these can be overridden by the user.
41 | 
42 | The \code{monarch_engine()} overrides \code{search_nodes()} to use the Monarch search API, so setting \code{node_search_properties} in the preferences will not affect the search behavior. To use regex-based searching with the Monarch Neo4j instance, use \code{neo4j_engine()} instead and specify the Monarch Neo4j URL (https://neo4j.monarchinitiative.org).
43 | }
44 | \examples{
45 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
46 | library(tidygraph)
47 | library(dplyr)
48 | 
49 | monarch <- monarch_engine()
50 | res <- monarch |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947"))
51 | print(res)
52 | \dontshow{\}) # examplesIf}
53 | }
54 | \seealso{
55 | \code{file_engine()}, \code{neo4j_engine()}
56 | }
57 | 


--------------------------------------------------------------------------------
/R/transitive_closure.R:
--------------------------------------------------------------------------------
 1 | #' Compute transitive closure over a predicate.
 2 | #'
 3 | #' Computes the transitive closure of a graph, treating the specified
 4 | #' predicate as transitive. Resulting edge predicates will the be the
 5 | #' same, but have primary_knowledge_source set to transitive_<predicate>.
 6 | #'
 7 | #' @return Graph with transitive edges added.
 8 | #' @seealso [roll_up()], [transfer()], [descendants()], [ancestors()]
 9 | #' @param g The `tbl_kgx` graph to compute on.
10 | #' @param predicate The edge predicate to close over.
11 | #'
12 | #' @examples
13 | #' data(eds_marfan_kg)
14 | #'
15 | #' eds_marfan_kg |> fetch_nodes(name == "Tall stature") |>
16 | #'   # get 2 levels of ancestors
17 | #'   expand_n(predicates = "biolink:subclass_of", direction = "out", n = 3) |>
18 | #'   activate(edges) |>
19 | #'   filter(primary_knowledge_source == "infores:upheno") |>
20 | #'   transitive_closure(predicate = "biolink:subclass_of") |>
21 | #'   plot(edge_color = primary_knowledge_source)
22 | #'
23 | #' @import tidygraph
24 | #' @export
25 | transitive_closure <- function(g, predicate = "biolink:subclass_of") {
26 | 	if(length(predicate) != 1) {
27 | 		stop("Error: predicate parameter of transitive_closure() must be length 1.")
28 | 	}
29 | 	# if there are no edges to close, return the input
30 | 	p <- predicate
31 | 	if(nrow(edges(g) |> filter(predicate == p)) == 0) {return(g)}
32 | 
33 | 	active_tbl <- active(g)
34 | 
35 | 	with_downstream <- g |>
36 | 		activate(nodes) |>
37 | 		mutate(downstream_nodes = roll_down(id, include_self = FALSE, predicate = predicate)) |>
38 | 		filter(!is.na(downstream_nodes))
39 | 
40 | 	# create a new edge df... start by getting the nodes and the list col
41 | 	new_edges <- nodes(with_downstream) |>
42 | 		select(id, downstream_nodes) |>
43 | 		tidyr::unnest(downstream_nodes, keep_empty = FALSE) |>
44 | 		mutate(subject = id, predicate = predicate, object = downstream_nodes, primary_knowledge_source = paste0("transitive_", predicate)) |>
45 | 		select(subject, predicate, object, primary_knowledge_source) |>
46 | 		mutate(edge_key = paste(subject, "@", predicate, "@", object))
47 | 
48 | 	node_indices <- nodes(g) |> mutate(index = dplyr::row_number()) |> select(id, index)
49 | 
50 | 	# we create edge keys so that we can filter out edges that are duplicates created as part of
51 | 	# the process above (the original, non-transitive edges)
52 | 	g <- g |>
53 | 		activate(edges) |>
54 | 		mutate(edge_key = paste(subject, "@", predicate, "@", object))
55 | 
56 | 	new_edges <- new_edges |>
57 | 		dplyr::left_join(node_indices, by = c("subject" = "id")) |>
58 | 		dplyr::rename(from = index) |>
59 | 		dplyr::left_join(node_indices, by = c("object" = "id")) |>
60 | 		dplyr::rename(to = index) |>
61 | 		mutate(edge_key = paste(subject, "@", predicate, "@", object)) |>
62 | 		filter(!edge_key %in% edges(g)$edge_key)
63 | 
64 | 	res <- g |>
65 | 		tidygraph::bind_edges(new_edges) |>
66 | 		activate(edges) |>
67 | 		select(-edge_key) |>
68 | 		activate(!!rlang::sym(active_tbl))
69 | 
70 | 	res
71 | }
72 | 


--------------------------------------------------------------------------------
/man/monarch_edge_weight_encodings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/monarch_edge_weight_encodings.R
 3 | \name{monarch_edge_weight_encodings}
 4 | \alias{monarch_edge_weight_encodings}
 5 | \title{Generate Default Edge Weight Encodings for Monarch Knowledge Graph}
 6 | \usage{
 7 | monarch_edge_weight_encodings()
 8 | }
 9 | \value{
10 | A list of encodings for various edge attributes:
11 | \describe{
12 | \item{knowledge_level}{A named list assigning weights based on knowledge assertion levels:
13 | \itemize{
14 | \item \code{"knowledge_assertion"}: \code{1} (full weight for direct assertions)
15 | \item \code{"logical_entailment"}: \code{1} (full weight for inferred relationships)
16 | \item \code{"not_provided"}: \code{0} (no weight when knowledge level is unspecified)
17 | }
18 | }
19 | \item{frequency_qualifier}{A named list mapping Human Phenotype Ontology (HPO) frequency terms to numeric weights:
20 | \itemize{
21 | \item \code{"HP:0040281"}: \code{1} ("Very frequent")
22 | \item \code{"HP:0040282"}: \code{0.75} ("Frequent")
23 | \item \code{"HP:0040283"}: \code{0.5} ("Occasional")
24 | \item \code{"HP:0040284"}: \code{0.25} ("Very rare")
25 | }
26 | }
27 | \item{negated}{A named list representing negation status:
28 | \itemize{
29 | \item \code{TRUE}: \code{-1} (negated relationships decrease the weight)
30 | \item \code{FALSE}: \code{0} (non-negated relationships have no penalty)
31 | }
32 | }
33 | \item{has_total}{\code{NULL}. Placeholder for total counts; can be overridden with custom logic.}
34 | \item{has_quotient}{An empty numeric vector. Placeholder for quotient-based weight calculations.}
35 | \item{has_count}{\code{NULL}. Placeholder for count-based weight computations.}
36 | \item{has_percentage}{\code{NULL}. Placeholder for percentage-based weight computations.}
37 | \item{has_evidence}{\code{NULL}. Placeholder for evidence-based weight computations.}
38 | \item{onset_qualifier}{\code{NULL}. Placeholder for onset qualifiers, which can influence weights.}
39 | \item{publications}{A function that computes the weight based on the number of unique publications:
40 | \itemize{
41 | \item Takes a vector of publication IDs and returns the count of unique IDs.
42 | }
43 | }
44 | }
45 | }
46 | \description{
47 | This function returns a list of default encoding schemes used to compute
48 | edge weights in Monarch knowledge graph visualizations and analyses.
49 | Each encoding represents how specific edge attributes contribute to
50 | the overall edge weight. Users can inspect these defaults and
51 | override them with custom encoding logic if needed.
52 | }
53 | \examples{
54 | # View the default edge weight encodings
55 | monarch_edge_weight_encodings()
56 | 
57 | # Override the encoding to assign more weight to logical entailments
58 | encodings <- monarch_edge_weight_encodings()
59 | encodings$knowledge_level$logical_entailment <- 0.5
60 | 
61 | # Modify the negation encoding to reduce penalty for negated relationships
62 | encodings$negated[["TRUE"]] <- -0.5
63 | 
64 | }
65 | 


--------------------------------------------------------------------------------
/man/neo4j_engine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/neo4j_engine.R
 3 | \name{neo4j_engine}
 4 | \alias{neo4j_engine}
 5 | \title{Create a knowledge graph engine object for a neo4j database}
 6 | \usage{
 7 | neo4j_engine(
 8 |   url,
 9 |   username = NA,
10 |   password = NA,
11 |   preferences = NULL,
12 |   timeout = 1,
13 |   cache = TRUE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{url}{A character string indicating the URL of the neo4j database. If given a vector, each will be tried in sequence; if a URL times out (see timeout) or fails, the next is tried.}
19 | 
20 | \item{username}{A character string indicating the username for the neo4j database (if needed).}
21 | 
22 | \item{password}{A character string indicating the password for the neo4j database (if needed).}
23 | 
24 | \item{preferences}{A named list of preferences for the engine.}
25 | 
26 | \item{timeout}{Number of sections to wait before trying the next url.}
27 | 
28 | \item{cache}{Whether to cache query results in memory for the length of the R session.}
29 | 
30 | \item{...}{Additional arguments passed to \code{neo2R::startGraph()}.}
31 | }
32 | \value{
33 | An object of class \code{neo4j_engine}
34 | }
35 | \description{
36 | Creates a knowledge graph engine backed by a neo4j database, from a URL and optional username and password. Knowledge graph "engines"
37 | are objects that store information about how to connect to a (potentially large) knowledge graph, and can be used to fetch nodes and edges from the database as local
38 | graph objects.
39 | }
40 | \details{
41 | Engines store preference information specifying how data are fetched and manipulated; for example,
42 | while node \code{category} is multi-valued (nodes may have multiple categories, for example "biolink:Gene" and "biolink:NamedThing"),
43 | typically a single category is used to represent the node in a graph, and is returned as the nodes' \code{pcategory}. A preference list of categories to use for \code{pcategory} is
44 | stored in the engine's preferences. A default set of preferences is stored in the package for use with KGX (BioLink-compatible) graphs (see https://github.com/biolink/kgx/blob/master/specification/kgx-format.md),
45 | but these can be overridden by the user.
46 | 
47 | For \code{neo4j_engine()}s, preferences are also used to set the node properties to search when using \code{search_nodes()}, defaulting to regex-based searches on id, name, and description. (The \code{monarch_engine()} is a type
48 | of \code{neo4j_engine()} with the URL set to the Monarch Neo4j instance, and overrides \code{search_nodes()} to use the Monarch search API, see \code{monarch_engine()} for details).
49 | }
50 | \examples{
51 | \dontshow{if (neo4j_engine_check("https://neo4j.monarchinitiative.org")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
52 | library(tidygraph)
53 | library(dplyr)
54 | 
55 | engine <- neo4j_engine(url = "https://neo4j.monarchinitiative.org")
56 | res <- engine |> fetch_nodes(query_ids = c("MONDO:0007522", "MONDO:0007947"))
57 | print(res)
58 | \dontshow{\}) # examplesIf}
59 | }
60 | \seealso{
61 | \code{file_engine()}, \code{monarch_engine()}
62 | }
63 | 


--------------------------------------------------------------------------------
/man/fetch_nodes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/fetch_nodes.R
 3 | \name{fetch_nodes}
 4 | \alias{fetch_nodes}
 5 | \title{Fetch nodes from a graph using a set of IDs or conditions}
 6 | \usage{
 7 | fetch_nodes(engine, ..., query_ids = NULL, limit = NULL)
 8 | }
 9 | \arguments{
10 | \item{engine}{A graph engine object}
11 | 
12 | \item{...}{A set of conditions identifying the nodes to fetch, only used if query_ids is NULL}
13 | 
14 | \item{query_ids}{A character vector of identifiers to fetch}
15 | 
16 | \item{limit}{An integer specifying the maximum number of nodes to fetch. Default to NULL, no limit.}
17 | }
18 | \value{
19 | A tbl_kgx object containing the nodes
20 | }
21 | \description{
22 | This function fetches nodes (and no edges) from a knowledge graph engine based on a set of
23 | conditions or a set of identifiers. If query_ids is provided, the function
24 | will fetch nodes with the specified identifiers. If query_ids is NULL, the
25 | function will fetch nodes based on the conditions provided. Only a limited
26 | set of condition expressions are supported, see details.
27 | }
28 | \details{
29 | If query_ids is provided, the function will fetch nodes with the specified.
30 | If query_ids is NULL, the function will fetch nodes based on a condition
31 | expression. The following features are supported:
32 | \itemize{
33 | \item Matching node properties with boolean operators, e.g. \code{in_taxon_label == "Homo sapiens"}.
34 | \item Matching multi-valued properties with \verb{\%in_list\%}, e.g. \code{"biolink:Gene" \%in_list\% category}. NOTE: using \verb{\%in_list\%} against vector queries, e.g. \code{in_taxon_label \%in_list\% c("Homo sapiens", "Mus musculus")} is \emph{not} supported. Nor does \verb{\%in_list\%} support multi-valued left hand sides; \code{c("biolink:Disease", "biolink:Gene") \%in_list\% category} will not work.
35 | \item Boolean connectives with \code{|}, \code{&}, and \code{!}, e.g. \code{in_taxon_label == "Homo sapiens" | "biolink:Gene" \%in_list\% category}.
36 | }
37 | 
38 | If more than one condition parameter is specified, they are combined with \code{&}; for example,
39 | \code{fetch_nodes(engine, in_taxon_label == "Homo sapiens", "biolink:Gene" \%in_list\% category)} is equivalent to
40 | \code{fetch_nodes(engine, in_taxon_label == "Homo sapiens" & "biolink:Gene" \%in_list\% category)}.
41 | }
42 | \examples{
43 | library(tidygraph)
44 | library(dplyr)
45 | 
46 | \dontshow{if (monarch_engine_check()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
47 | monarch_engine() |>
48 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
49 | 
50 | # a large query
51 | monarch_engine() |>
52 |   fetch_nodes("biolink:Disease" \%in_list\% category)
53 | \dontshow{\}) # examplesIf}
54 | # file_engine supports the same features as neo4j_engine
55 | # (using the example KGX file packaged with monarchr)
56 | data(eds_marfan_kg)
57 | 
58 | eds_marfan_kg |>
59 |   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
60 | 
61 | # grab all Homo sapiens genes
62 | eds_marfan_kg |>
63 |   fetch_nodes(in_taxon_label == "Homo sapiens" & "biolink:Gene" \%in_list\% category)
64 | 
65 | }
66 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | #' @export
  2 | #' @importFrom tidygraph as_tibble
  3 | nodes.tbl_kgx <- function(graph, ...) {
  4 |   tidygraph::as_tibble(graph, active = "nodes")
  5 | }
  6 | 
  7 | 
  8 | #' @export
  9 | #' @importFrom tidygraph as_tibble
 10 | edges.tbl_kgx <- function(graph, ...) {
 11 |   tidygraph::as_tibble(graph, active = "edges")
 12 | }
 13 | 
 14 | 
 15 | #' Get graph nodes table.
 16 | #'
 17 | #' @param graph A graph object
 18 | #' @param ... Other options (unused)
 19 | #' @return A tibble with the nodes of the graph
 20 | #' @importFrom tidygraph as_tibble
 21 | #' @export
 22 | #' @examples
 23 | #' # (using the example KGX file packaged with monarchr)
 24 | #' data(eds_marfan_kg)
 25 | #'
 26 | #' g <- eds_marfan_kg |>
 27 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
 28 | #'
 29 | #' print(nodes(g))
 30 | nodes <- function(graph, ...) {
 31 |   UseMethod("nodes")
 32 | }
 33 | 
 34 | #' Get graph edges table.
 35 | #'
 36 | #' @param graph Input graph
 37 | #' @param ... Other options (unused)
 38 | #' @export
 39 | #' @return A tibble with the edges of the graph
 40 | #' @importFrom tidygraph as_tibble
 41 | #' @examples
 42 | #' # (using the example KGX file packaged with monarchr)
 43 | #' data(eds_marfan_kg)
 44 | #'
 45 | #' g <- eds_marfan_kg |>
 46 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
 47 | #'
 48 | #' print(edges(g))
 49 | edges <- function(graph, ...) {
 50 |   UseMethod("edges")
 51 | }
 52 | 
 53 | #' Explode a graph into a list of single-node graphs
 54 | #'
 55 | #' @param graph A tbl_kgx graph.
 56 | #' @param ... Other options (unused)
 57 | #' @return A list of tbl_kgx graphs.
 58 | #' @examples
 59 | #' # (using the example KGX file packaged with monarchr)
 60 | #' data(eds_marfan_kg)
 61 | #'
 62 | #' g <- eds_marfan_kg |>
 63 | #'   fetch_nodes(query_ids = c("MONDO:0007525", "MONDO:0007526"))
 64 | #'
 65 | #' print(explode(g))
 66 | #' @export
 67 | explode <- function(graph, ...) {
 68 |   UseMethod("explode")
 69 | }
 70 | 
 71 | #' @export
 72 | #' @importFrom tidygraph activate
 73 | explode.tbl_kgx <- function(graph, ...) {
 74 |   nodes <- as_tibble(graph, active = "nodes")
 75 |   graphs <- lapply(nodes$id, function(node_id) {
 76 |     filter(tidygraph::activate(graph, nodes), id == node_id)
 77 |   })
 78 |   return(graphs)
 79 | }
 80 | 
 81 | 
 82 | 
 83 | #' @noRd
 84 | merge_lists <- function(a, b) {
 85 | 	# This function takes two named lists and merges them recursively. Values in
 86 | 	# the second list (`b`) override or extend those in the first list (`a`).
 87 | 	# If both `a` and `b` contain a named list at the same position, they are merged recursively.
 88 | 	#
 89 | 	# a <- list(x = list(y = 1, z = 2), foo = "bar")
 90 | 	# b <- list(x = list(y = 42, new = 99), foo = "baz", extra = "new_value")
 91 | 	# merge_lists(a, b)
 92 | 
 93 | 	for (name in names(b)) {
 94 | 		if (is.list(b[[name]]) && name %in% names(a) && is.list(a[[name]])) {
 95 | 			# Recursively merge if both elements are lists
 96 | 			a[[name]] <- merge_lists(a[[name]], b[[name]])
 97 | 		} else {
 98 | 			# Otherwise, override/add the value from b to a
 99 | 			a[[name]] <- b[[name]]
100 | 		}
101 | 	}
102 | 	return(a)
103 | }
104 | 


--------------------------------------------------------------------------------
/R/kg_join.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # given a dataframe with potential list columns,
 3 | # ensures that any NULL entries are replaced with a single NA
 4 | null_col_entries_to_na <- function(df) {
 5 | 	for(name in names(df)) {
 6 | 		col <- df[[name]]
 7 | 		if(is.list(col)) {
 8 | 			col <- lapply(col, function(el) {
 9 | 													if(is.null(el)) {
10 | 														NA
11 | 													} else {
12 | 														el
13 | 													}
14 | 												}
15 | 			              )
16 | 			df[[name]] <- col
17 | 		}
18 | 	}
19 | 
20 | 	return(df)
21 | }
22 | 
23 | #' @import tidygraph
24 | #' @import dplyr
25 | #' @export
26 | kg_join.tbl_kgx <- function(graph1, graph2, ...) {
27 |     nodes_g1 <- nodes(graph1)
28 |     nodes_g2 <- nodes(graph2)
29 | 
30 |     # we don't want to keep the to and from columns in the edges, since we'll be rebuilding edges from scratch,
31 |     # and be running them through unique()
32 |     edges_g1 <- edges(graph1) |> select(-to, -from)
33 |     edges_g2 <- edges(graph2) |> select(-to, -from)
34 | 
35 |     all_nodes <- unique(nodes_g1 |>
36 |                           full_join(nodes_g2)) |>
37 |                  mutate(idx = row_number())  # add an index column to the nodes
38 | 
39 | 
40 |     all_edges <- unique(edges_g1 |>
41 |                           full_join(edges_g2))
42 | 
43 | 
44 | 
45 |     # given a single row of an edge data frame, we need to replicate it for each distinct from/to pair that match subjects and objects
46 |     fill_edges <- function(row_df) {
47 |         # we need to replicate this row with distinct from and to information from the all_nodes data
48 |         subject_idxs <- all_nodes |> filter(id == row_df$subject) |> pull(idx)
49 |         object_idxs <- all_nodes |> filter(id == row_df$object) |> pull(idx)
50 |         # we need the cross-join of these two vectors
51 |         cross <- expand.grid(from = subject_idxs, to = object_idxs)
52 |         # now we need to add the properties from the original row to each of these new rows
53 |         # we start by replcating the row_df nrow(cross) times
54 |         row_df <- row_df[rep(1, nrow(cross)), ]
55 |         # then we add the cross data to the row_df
56 |         row_df <- cbind(row_df, cross)
57 | 
58 |         return(row_df)
59 |     }
60 | 
61 |     filled_edges <- NULL
62 | 
63 |     # not using rowwise() here so that each row is nicely contained including list columns as a df row (not a list)
64 |     # but maybe group_by() isn't right either - this triggers the call to fill_edges(.) even if there are no rows in the df
65 |     # so we have a guard here
66 |     if(nrow(all_edges) > 0) {
67 | 	    filled_edges <- all_edges |>
68 | 	      mutate(edge_idx = row_number()) |>
69 | 	      group_by(edge_idx) |>
70 | 	      do(fill_edges(.)) |>
71 | 	      as_tibble() |>
72 | 	      select(-edge_idx)  # remove the edge_idx column
73 |     }
74 | 
75 |     all_nodes <- all_nodes |>
76 |       select(-idx)  # remove the idx column
77 | 
78 |     all_nodes <- null_col_entries_to_na(all_nodes)
79 |     filled_edges <- null_col_entries_to_na(filled_edges)
80 |     res <- tbl_kgx(nodes = all_nodes, edges = filled_edges, attach_engine = get_engine(graph1, fail_if_missing = FALSE))
81 |     return(res)
82 | }
83 | 


--------------------------------------------------------------------------------
/R/transfer.R:
--------------------------------------------------------------------------------
 1 | #' Transfer information over edges to nodes.
 2 | #'
 3 | #' Used to 'transfer' information from nodes to other nodes across
 4 | #' specific predicates, either in an outward direction (along the edge
 5 | #' direction) or inward (against the edge direction). Returns a node-property
 6 | #' column; intended to be used with mutate() on nodes.
 7 | #'
 8 | #' The return value will be either a list, or if the result would be a list
 9 | #' with all length-1 or length-0 elements, a vector with 0-length elements
10 | #' replaced by NA. Practically, this results in a list when necessary and a vector
11 | #' otherwise.
12 | #'
13 | #' @return Vector or list, with one entry per node.
14 | #' @seealso [roll_up()], [transitive_closure], [descendants()], [ancestors()]
15 | #' @param colname The node column to transfer information from
16 | #' @param over The edge predicate to transfer information over
17 | #' @param direction Whether to transfer information along the predicate direction ("out") or against ("in")
18 | #'
19 | #' @examples
20 | #' data(eds_marfan_kg)
21 | #'
22 | #' engine |> eds_marfan_kg |>
23 | #' 	expand(categories = "biolink:Disease") |>
24 | #' 	activate(nodes) |>
25 | #' 	mutate(caused_by_genes =
26 | #' 	         transfer(name, over = "biolink:causes", direction = "out")) |>
27 | #' 	mutate(causes_diseases =
28 | #' 	         transfer(name, over = "biolink:causes", direction = "in")) |>
29 | #' 	plot.tbl_kgx(node_label = paste(name,
30 | #' 																	" caused by: ", caused_by_genes,
31 | #' 																	" causes: ", causes_diseases),
32 | #' 							 label_size = 3)
33 | #'
34 | #' @import tidygraph
35 | #' @export
36 | transfer <- function(colname = NULL, over, direction = "out") {
37 | 	if(direction != "in" & direction != "out") {
38 | 		stop("Error, 'toward' must be one of 'in' or 'out'.")
39 | 	}
40 | 	from = ifelse(direction == "out", "subject", "object")
41 | 	toward = ifelse(from == "subject", "object", "subject")
42 | 	#print(paste("Pulling from", from, "to", toward, "data from", {{colname}}))
43 | 
44 | 	edge_data <- .E() |> filter(predicate %in% over)
45 | 	node_data <- .N()
46 | 	# so we have a set of edges, for each unique object... we need to collect expr
47 | 	node_ids <- node_data$id
48 | 	agg_results <- lapply(node_ids, function(node_id) {
49 | 		relevant_edges <- edge_data[edge_data[[toward]] == node_id,] #|> filter(object == node_id)
50 | 		source_ids <- unique(relevant_edges[[from]])  # e.g. g1, g2, g3....
51 | 		relevant_subject_nodes <- node_data |> filter(id %in% source_ids)
52 | 		relevant_data <- relevant_subject_nodes |> pull({{colname}})
53 | 		relevant_data
54 | 	})
55 | 
56 | 	# determines if the given list can be converted safely to a vector
57 | 	vec_safe <- function(lst) {
58 | 		lengths_ok <- all(lengths(lst) == 1 | lengths(lst) == 0)
59 | 		types <- sapply(lst, typeof)
60 | 		types_ok <- length(unique(types)) == 1
61 | 		lengths_ok && types_ok
62 | 	}
63 | 
64 | 	if(vec_safe(agg_results)) {
65 | 		# length-0 lists need to be replaced with a single NA
66 | 		agg_results <- lapply(agg_results, function(el) {
67 | 			if(length(el) == 0) {
68 | 				NA
69 | 			} else {
70 | 				el
71 | 			}
72 | 		})
73 | 		agg_results <- unlist(agg_results)
74 | 	}
75 | 
76 | 	return(agg_results)
77 | }
78 | 


--------------------------------------------------------------------------------
/R/knit_print.tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom utils capture.output
 2 | clean_df <- function(df) {
 3 | 	new_df <- list()
 4 | 	for(colname in colnames(df)) {
 5 | 		col_i <- df[[colname]]
 6 | 
 7 | 		if(is.list(col_i)) {
 8 | 			colname <- paste0(colname, " (list)")
 9 | 		}
10 | 
11 | 		contents <- lapply(col_i, function(cell_data) {
12 | 			paste(capture.output(dput(cell_data, control = "useSource")), collapse = "")
13 | 		}) |> unlist()
14 | 
15 | 		new_df[[colname]] <- contents
16 | 	}
17 | 
18 | 	as_tibble(new_df)
19 | }
20 | 
21 | 
22 | #' Specialized print function for KGX graphs in knitted documents
23 | #' @param x A `tbl_kgx` graph to display.
24 | #' @param ... Other arguments (unused).
25 | #' @param show The maximum number of nodes and edges to display.
26 | #' @export
27 | #' @import knitr
28 | #' @importFrom kableExtra kable
29 | #' @importFrom kableExtra kable_styling
30 | #' @importFrom kableExtra column_spec
31 | #' @importFrom dplyr slice_head
32 | knit_print.tbl_kgx <- function(x, ..., show = 100) {
33 | 	graph <- x
34 | 
35 | 	g <- order_cols(graph)
36 | 
37 | 	nodes_colnames <- colnames(nodes(g))
38 | 
39 | 	nodes_sub <- clean_df(nodes(g)) |>
40 | 		slice_head(n = show)
41 | 
42 | 	nodes_kbl <- nodes_sub |>
43 | 		kable("html", escape = FALSE) |>
44 | 		kable_styling(fixed_thead = TRUE,
45 | 		              bootstrap_options = c("striped", "hover", "condensed"))
46 | 
47 | 	if("description" %in% nodes_colnames) {
48 | 		colnum <- seq_along(nodes_colnames)[nodes_colnames == "description"]
49 | 		nodes_kbl <- nodes_kbl |> column_spec(colnum, width_min = "300px")
50 | 	}
51 | 	if("synonym" %in% nodes_colnames) {
52 | 		colnum <- seq_along(nodes_colnames)[nodes_colnames == "synonym"]
53 | 		nodes_kbl <- nodes_kbl |> column_spec(colnum, width_min = "300px")
54 | 	}
55 | 
56 | 	edges_sub <- clean_df(edges(g)) |>
57 | 		slice_head(n = show)
58 | 
59 | 	edges_kbl <- edges_sub |>
60 | 		kable("html", escape = FALSE) |>
61 | 		kable_styling(fixed_thead = TRUE,
62 | 									bootstrap_options = c("striped", "hover", "condensed"))
63 | 
64 | 	nodes_total <- nrow(nodes(g))
65 | 	edges_total <- nrow(edges(g))
66 | 	nodes_showing <- nrow(nodes_sub)
67 | 	edges_showing <- nrow(edges_sub)
68 | 
69 | 	knitr::asis_output(knitr::knit_child(text = c(
70 | 		'',
71 | 		paste0("<pre>Graph with ", nodes_total, " nodes and ", edges_total, " edges. Expand sections below for details.</pre>"),
72 | 		'<details><summary><b>Node Data</b></summary>',
73 | 		paste0("<p>Showing ", nodes_showing, " of ", nodes_total, " nodes:</p>"),
74 | 		'<div style="font-size: 0.8em; max-height: 400px;overflow-y: auto;border-left: 1px solid #ddd;border-right:  1px solid #ddd;border-bottom: 1px solid #ddd;">',
75 | 		'```{r eval=TRUE, echo=FALSE}',
76 | 		'nodes_kbl',
77 | 		'```',
78 | 		'</div>',
79 | 		'</details>',
80 | 		'<details><summary><b>Edge Data</b></summary>',
81 | 		paste0("<p>Showing ", edges_showing, " of ", edges_total, " edges:</p>"),
82 | 		'<div style="font-size: 0.8em; max-height: 400px;overflow-y: auto;border-left: 1px solid #ddd;border-right:  1px solid #ddd;border-bottom: 1px solid #ddd;">',
83 | 		'```{r eval=TRUE, echo=FALSE}',
84 | 		'edges_kbl',
85 | 		'```',
86 | 		'</div>',
87 | 		'</details>',
88 | 		'<br />'
89 | 	), envir = environment(), quiet = TRUE))
90 | }
91 | 


--------------------------------------------------------------------------------
/R/tbl_kgx.R:
--------------------------------------------------------------------------------
 1 | #' Create a KGX graph object
 2 | #'
 3 | #' This function creates a new tbl_kgx object which inherits from tidygraph::tbl_graph, from node and edge dataframes, ensuring they conform to the KGX specification
 4 | #' described at https://github.com/biolink/kgx/blob/master/specification/kgx-format.md. Specifically, nodes must have an 'id' and 'category' column,
 5 | #' and edges, if provided, must have 'subject', 'predicate', and 'object' columns. The function allows graphs with no edges.
 6 | #' The function sets 'from' and 'to' columns in the edges from 'subject' and 'object' respectively, and sets the node key to 'id'.
 7 | #' Additional columns are allowed.
 8 | #'
 9 | #' This function will generally be called internally.
10 | #'
11 | #' @param nodes A data frame containing the nodes of the graph. Must have 'id' and 'category' columns.
12 | #' @param edges A data frame containing the edges of the graph. Must have 'subject', 'predicate', and 'object' columns. Can be NULL.
13 | #' @param attach_engine An engine to attach to the newly created graph for use in future queries based on the graph.
14 | #' @param ... Additional arguments passed to the function.
15 | #' @return A KGX graph object.
16 | #' @examples
17 | #' nodes <- data.frame(id = c("A", "B"), category = c("gene", "disease"))
18 | #' edges <- data.frame(subject = c("A"), predicate = c("associated_with"), object = c("B"))
19 | #' g <- tbl_kgx(nodes, edges)
20 | #' @export
21 | #' @importFrom tidygraph tbl_graph
22 | tbl_kgx <- function(nodes = NULL, edges = NULL, attach_engine = NULL, ...) {
23 | 	# nodes can be empty, if so we need to create an empty data frame
24 | 	if(nrow(nodes) == 0) {
25 | 		nodes <- data.frame(id = character(0))
26 | 		nodes$category <- list()
27 | 		edges <- data.frame(subject = character(0), predicate = character(0), object = character(0))
28 | 	 }
29 | 
30 | 	if(is.null(nodes$id)) { stop("Error: tbl_kgx nodes must have an 'id' column.") }
31 | 	if(is.null(nodes$id)) { stop("Error: tbl_kgx nodes must have an 'category' column.") }
32 | 
33 | 	# we do allow graphs with no edges
34 | 	if(!is.null(edges)) {
35 | 		if(is.null(edges$subject)) { stop("Error: tbl_kgx edges must have an 'subject' column.") }
36 | 		if(is.null(edges$predicate)) { stop("Error: tbl_kgx edges must have an 'predicate' column.") }
37 | 		if(is.null(edges$object)) { stop("Error: tbl_kgx edges must have an 'object' column.") }
38 | 
39 | 		# set canonical to and from columns from subject and object if they don't already exist
40 | 		if(!"from" %in% colnames(edges)) {
41 | 			edges$from <- edges$subject
42 | 		}
43 | 		if(!"to" %in% colnames(edges)) {
44 | 			edges$to <- edges$object
45 | 		}
46 | 	} else {
47 | 		# but if given no edges, we spec out subject, predicate, object cols at least (and to and from)
48 | 		edges <- data.frame(subject = character(), predicate = character(), object = character(),
49 | 												to = character(), from = character())
50 | 	}
51 | 
52 | 	g <- tidygraph::tbl_graph(nodes = nodes, edges = edges, node_key = "id")
53 | 
54 | 	attr(g, "last_engine") <- attach_engine
55 | 	class(g) <- c("tbl_kgx", class(g))
56 | 
57 | 	# if we have an engine, use its preferences for setting df column order
58 | 	# (order_cols uses the last attached engine)
59 | 	if(!is.null(attach_engine)) {
60 | 		g <- order_cols(g)
61 | 	}
62 | 
63 | 	return(g)
64 | }
65 | 


--------------------------------------------------------------------------------
/R/summary.neo4j_engine.R:
--------------------------------------------------------------------------------
 1 | #' Summarize contents of a Neo4j KG engine
 2 | #'
 3 | #' Given a Neo4j based KG engine, provides summary information in the form of
 4 | #' node counts, category counts across nodes, relationship type counts, and available properties.
 5 | #' General information about the graph is printed to the console, and a list of
 6 | #' dataframes with this information is returned invisibly. Also returned
 7 | #' are `cats`, `preds`, and `props` entries, containing lists of available
 8 | #' categories/predicates/properties for convenient auto-completion in RStudio.
 9 | #'
10 | #' @param object A `neo4j_engine` object
11 | #' @param ... Other parameters (not used)
12 | #' @param quiet Logical, whether to suppress printing of the summary
13 | #' @return A list of dataframes and named lists
14 | #' @export
15 | #' @examplesIf monarch_engine_check()
16 | #' # prints a readable summary and returns a list of dataframes
17 | #' stats <- monarch_engine() |> summary()
18 | #' print(stats)
19 | #'
20 | summary.neo4j_engine <- function(object, ..., quiet = FALSE) {
21 | 	if(!quiet) {
22 | 		cat("\n")
23 | 		cat("A Neo4j-backed knowledge graph engine.\n")
24 | 		cat("Gathering statistics, please wait...\n")
25 | 	}
26 | 
27 |     node_summary_df <- cypher_query_df(object, "MATCH (n) UNWIND labels(n) AS category WITH category, COUNT(n) AS count RETURN category, count ORDER BY count DESC")
28 |     edge_summary_df <- cypher_query_df(object, "MATCH ()-[r]->() RETURN type(r) AS predicate, COUNT(*) AS count ORDER BY count DESC")
29 | 
30 |     counts_query <- "
31 | 	    // Count the total number of nodes
32 | 			MATCH (n)
33 | 			RETURN 'nodes_total' AS Type, COUNT(n) AS Count
34 | 			UNION
35 | 			// Count the total number of edges
36 | 			MATCH ()-[r]->()
37 | 			RETURN 'edges_total' AS Type, COUNT(r) AS Count
38 | 	    "
39 | 
40 |     total_df <- cypher_query_df(object, counts_query)
41 |     total_nodes <- total_df$Count[1]
42 |     total_edges <- total_df$Count[2]
43 | 
44 |     properties <- cypher_query_df(object, "CALL db.propertyKeys()")$propertyKey
45 | 
46 | 
47 |     if(!quiet) {
48 |         cat("Total nodes: ", total_nodes, "\n")
49 |         cat("Total edges: ", total_edges, "\n")
50 |         cat("\n")
51 |         cat("Node category counts:\n")
52 |         # print the data frame without row names
53 |         print(node_summary_df, row.names = FALSE)
54 |         cat("\n")
55 |         cat("Edge type counts:\n")
56 |         # print the data frame without row names
57 |         print(edge_summary_df, row.names = FALSE)
58 |         cat("\n")
59 |         cat("Available node and edge properties:\n")
60 |         print(properties)
61 |         cat("\n\n")
62 |         cat("For more information about Biolink node (Class) and edge (Association) properties, see https://biolink.github.io/biolink-model/.")
63 |     }
64 | 
65 |     cats <- as.list(node_summary_df$category)
66 |     names(cats) <- cats
67 | 
68 |     preds <- as.list(edge_summary_df$predicate)
69 |     names(preds) <- preds
70 | 
71 |     props <- as.list(properties)
72 |     names(props) <- props
73 | 
74 |     return(invisible(list(node_summary = node_summary_df,
75 |     											edge_summary = edge_summary_df,
76 |     											total_nodes = total_nodes,
77 |     											total_edges = total_edges,
78 |     											cats = cats,
79 |     											preds = preds,
80 |     											props = props)))
81 | }
82 | 


--------------------------------------------------------------------------------