├── .github ├── .gitignore └── workflows │ ├── test-coverage.yaml │ └── R-CMD-check.yaml ├── src ├── .gitignore ├── Makevars.win ├── Makevars ├── multidim-array.cpp ├── graph-internal.cpp ├── basic-probs.cpp ├── anb-operate.cpp ├── data.cpp ├── table.cpp └── infer-test-wrappers.cpp ├── data ├── car.RData └── voting.RData ├── tests ├── testthat.R └── testthat │ ├── test-anb-internal.R │ ├── test-predict.R │ ├── test-dag.R │ ├── test-basic-assert.R │ ├── test-memoise.R │ ├── test-anb-cpts.R │ ├── test-wrap-grain.R │ ├── test-basic-misc.R │ ├── test-hc-bsej.R │ ├── test-basic-probs.R │ ├── test-learn-params-wanbia.r │ ├── test-anb-bn.R │ ├── test-learn-params-awnb.R │ ├── test-anb-dag.R │ ├── test-wrap-igraph.R │ ├── test-cpp-table.R │ ├── test-data-input.R │ ├── helper-common.R │ ├── test-wrap-mlr.R │ ├── test-anb-bn-operate.R │ ├── test-infer-anb-cpp.R │ ├── test-graph-internal.R │ ├── test-hc-fssj.R │ ├── test-learn-struct.R │ ├── test-bnc-dag-operate.R │ ├── test-infer-anb.R │ ├── test-data-statistics.R │ ├── test-learn-params-manb.R │ ├── test-learn-aode.r │ └── test-update.R ├── vignettes ├── pg_0001.png ├── pg_0002.png ├── pg_0003.png ├── pg_0004.png ├── header.tex ├── macros-paper.tex ├── macros-bnclassify.tex ├── algorithms.tex ├── macros-math.tex ├── includes │ └── abstract.rmd ├── macros-rjournal.tex ├── overview.Rmd └── methods.bib ├── CRAN-SUBMISSION ├── man ├── figures │ └── README-unnamed-chunk-2-1.png ├── is_aode.Rd ├── bnc_aode_bns.Rd ├── get_last.Rd ├── nb_dag.Rd ├── new_cache.Rd ├── get_but_last.Rd ├── get_null_safe.Rd ├── is.memoised.Rd ├── forget.Rd ├── get_ancestors.Rd ├── skip_testing.Rd ├── makeRLearner.bnc.Rd ├── bnc_aode.Rd ├── augment_kdb.Rd ├── graph_is_adjacent.Rd ├── dag.Rd ├── print.bnc_base.Rd ├── are_pdists.Rd ├── graph_get_adjacent.Rd ├── cmi_table.Rd ├── check_mlr_attached.Rd ├── graph_add_edges.Rd ├── skip_assert.Rd ├── compute_cll.Rd ├── learn_unprunned_tree.Rd ├── fast_equal.Rd ├── graph_connected_components.Rd ├── spode.Rd ├── are_factors.Rd ├── direct_tree.Rd ├── augment_ode.Rd ├── graph_union.Rd ├── make_cll.Rd ├── car.Rd ├── graph_subgraph.Rd ├── order_acyclic.Rd ├── cpt_vars_values.Rd ├── extract_ctgt.Rd ├── log_normalize.Rd ├── voting.Rd ├── augment_ode_arcs.Rd ├── augment_kdb_arcs.Rd ├── bootstrap_ss.Rd ├── compute_ll.Rd ├── graph_named_edge_matrix.Rd ├── predictLearner.bnc.Rd ├── complete_graph.Rd ├── trainLearner.bnc.Rd ├── map.Rd ├── local_ode_score_contrib.Rd ├── superparent_children.Rd ├── get_log_leaf_entries.Rd ├── make_cll_gradient.Rd ├── accuracy.Rd ├── identify_min_testing_depths.Rd ├── subset_by_colnames.Rd ├── direct_forest.Rd ├── identify_all_testing_depths.Rd ├── memoise_char.Rd ├── aode.Rd ├── as_mlr.Rd ├── bnc_dag.Rd ├── nb.Rd ├── grain_and_graph.Rd ├── cmi.Rd ├── bnc_bn.Rd ├── max_weight_forest.Rd ├── compute_wanbia_weights.Rd ├── plot.bnc_dag.Rd ├── predict.bnc_fit.Rd ├── loglik.Rd ├── bnc.Rd ├── inspect_bnc_bn.Rd ├── tan_chowliu.Rd ├── cv.Rd ├── inspect_bnc_dag.Rd └── greedy_wrapper.Rd ├── .gitignore ├── .Rbuildignore ├── meta ├── bnclassify-caret.R ├── download-statistics.R ├── graph-partial.r ├── submit.R ├── methods.csv └── check.r ├── R ├── frontend-dag.R ├── learn-aode.r ├── release.R ├── cv-multi.r ├── learn-params-awnb.R ├── frontend-graph.r ├── bncs.R ├── wrap-igraph.R ├── anb-internal.R ├── anb-bn.R ├── wrap-rpart.R ├── basic-probs.R ├── anb-bn-operate.R ├── predict.R ├── data-input.R ├── memoise.R ├── learn-hc.R ├── frontend-anb.r ├── learn-params-manb.R ├── basic-assert.R ├── wrap-mlr.R ├── anb-cpts.R ├── cv-update.R ├── basic-misc.R ├── learn-chowliu.R ├── learn-params-wanbia.R ├── wrap-gRain.R └── anb-dag.R ├── bnclassify.Rproj ├── inst ├── include │ ├── data.h │ ├── basic-misc.h │ └── multidim-array.h └── CITATION ├── DESCRIPTION ├── cran-comments.md ├── NAMESPACE ├── NEWS.md ├── Makefile └── README.Rmd /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS = -I../inst/include -------------------------------------------------------------------------------- /data/car.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/data/car.RData -------------------------------------------------------------------------------- /data/voting.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/data/voting.RData -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(bnclassify) 3 | 4 | test_check("bnclassify") -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | # Disable long types from C99 or CPP11 extensions 2 | PKG_CPPFLAGS = -I../inst/include -------------------------------------------------------------------------------- /vignettes/pg_0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0001.png -------------------------------------------------------------------------------- /vignettes/pg_0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0002.png -------------------------------------------------------------------------------- /vignettes/pg_0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0003.png -------------------------------------------------------------------------------- /vignettes/pg_0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0004.png -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.4.8 2 | Date: 2024-03-13 10:57:19 UTC 3 | SHA: cd3bec7e91d5812d98d935b82da5f6e70d63554a 4 | -------------------------------------------------------------------------------- /vignettes/header.tex: -------------------------------------------------------------------------------- 1 | \input{macros-bnclassify} 2 | \input{macros-math} 3 | \input{macros-paper} 4 | \input{macros-rjournal} 5 | -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/man/figures/README-unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /vignettes/macros-paper.tex: -------------------------------------------------------------------------------- 1 | \newcommand{\rtbl}[1]{Table~\ref{tbl:#1}} 2 | \newcommand{\req}[1]{Equation~\ref{eq:#1}} 3 | \newcommand{\rsec}[1]{Section~\ref{sec:#1}} 4 | \newcommand{\rfig}[1]{Figure~\ref{fig:#1}} 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .develop/ 2 | .design-and-manage/ 3 | .Rproj.user/ 4 | .Rproj.user 5 | .Rhistory 6 | .RData 7 | vignettes/bnclassify_cache/ 8 | .theory/ 9 | .paper 10 | inst/doc/ 11 | 12 | 13 | doc 14 | Meta 15 | /doc/ 16 | /Meta/ 17 | -------------------------------------------------------------------------------- /man/is_aode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bncs.R 3 | \name{is_aode} 4 | \alias{is_aode} 5 | \title{Is it en AODE?} 6 | \usage{ 7 | is_aode(x) 8 | } 9 | \description{ 10 | Is it en AODE? 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | .gitignore 4 | cran-comments.md 5 | meta 6 | Makefile 7 | ^\.travis\.yml$ 8 | ^README\.Rmd$ 9 | ^appveyor\.yml$ 10 | ^doc$ 11 | ^Meta$ 12 | ^CRAN-RELEASE$ 13 | ^Makefile$ 14 | ^Jenkinsfile$ 15 | ^\.github$ 16 | ^CRAN-SUBMISSION$ 17 | -------------------------------------------------------------------------------- /src/multidim-array.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | int entry_index(const std::vector & indices, const std::vector & dim_prod) { 8 | return entry_index( indices.begin(), dim_prod); 9 | } -------------------------------------------------------------------------------- /man/bnc_aode_bns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bncs.R 3 | \name{bnc_aode_bns} 4 | \alias{bnc_aode_bns} 5 | \title{Fits an AODE model.} 6 | \usage{ 7 | bnc_aode_bns(x, fit_models) 8 | } 9 | \description{ 10 | Fits an AODE model. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/get_last.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{get_last} 4 | \alias{get_last} 5 | \title{Return last element of x.} 6 | \usage{ 7 | get_last(x) 8 | } 9 | \description{ 10 | If x is NULL returns NA not NULL 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/nb_dag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-anb.r 3 | \name{nb_dag} 4 | \alias{nb_dag} 5 | \title{Returns a naive Bayes structure} 6 | \usage{ 7 | nb_dag(class, features) 8 | } 9 | \description{ 10 | Returns a naive Bayes structure 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/new_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/memoise.R 3 | \name{new_cache} 4 | \alias{new_cache} 5 | \title{Make a new cache.} 6 | \usage{ 7 | new_cache() 8 | } 9 | \description{ 10 | Make a new cache. 11 | } 12 | \author{ 13 | Hadley Wickham 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/get_but_last.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{get_but_last} 4 | \alias{get_but_last} 5 | \title{Return all but last element of x.} 6 | \usage{ 7 | get_but_last(x) 8 | } 9 | \description{ 10 | If x is NULL returns NA not NULL 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/get_null_safe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{get_null_safe} 4 | \alias{get_null_safe} 5 | \title{Get i-th element of x.} 6 | \usage{ 7 | get_null_safe(x, i) 8 | } 9 | \description{ 10 | If x is NULL returns NA not NULL 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/is.memoised.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/memoise.R 3 | \name{is.memoised} 4 | \alias{is.memoised} 5 | \title{Is it memoized?} 6 | \usage{ 7 | is.memoised(f) 8 | } 9 | \description{ 10 | Is it memoized? 11 | } 12 | \author{ 13 | Hadley Wickham 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/forget.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/memoise.R 3 | \name{forget} 4 | \alias{forget} 5 | \title{Forget a memoized function.} 6 | \usage{ 7 | forget(f) 8 | } 9 | \description{ 10 | Forget a memoized function. 11 | } 12 | \author{ 13 | Hadley Wickham 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/get_ancestors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/anb-families.R 3 | \name{get_ancestors} 4 | \alias{get_ancestors} 5 | \title{Based on gRbase::ancestors()} 6 | \usage{ 7 | get_ancestors(node, families) 8 | } 9 | \description{ 10 | Based on gRbase::ancestors() 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/skip_testing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-assert.R 3 | \name{skip_testing} 4 | \alias{skip_testing} 5 | \title{Skip while testing to isolate errors} 6 | \usage{ 7 | skip_testing() 8 | } 9 | \description{ 10 | Skip while testing to isolate errors 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /meta/bnclassify-caret.R: -------------------------------------------------------------------------------- 1 | library(caret) 2 | library(bnclassify) 3 | data("voting") 4 | fitControl <- trainControl( method = "repeatedcv", number = 10, repeats = 2) 5 | V <- na.omit(voting) 6 | data(car) 7 | set.seed(0) 8 | fit <- train(car[ , -ncol(car)], car[, 'class'], method = "nbDiscrete", 9 | trControl = fitControl ) 10 | fit 11 | -------------------------------------------------------------------------------- /man/makeRLearner.bnc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-mlr.R 3 | \name{makeRLearner.bnc} 4 | \alias{makeRLearner.bnc} 5 | \title{makeRLearner. Auxiliary mlr function.} 6 | \usage{ 7 | makeRLearner.bnc() 8 | } 9 | \description{ 10 | makeRLearner. Auxiliary mlr function. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/bnc_aode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bncs.R 3 | \name{bnc_aode} 4 | \alias{bnc_aode} 5 | \title{Returns a \code{c("bnc_aode", "bnc")} object.} 6 | \usage{ 7 | bnc_aode(models, class_var, features) 8 | } 9 | \description{ 10 | Returns a \code{c("bnc_aode", "bnc")} object. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/augment_kdb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-hc-operators.R 3 | \name{augment_kdb} 4 | \alias{augment_kdb} 5 | \title{Arcs that do not invalidate the k-DB structure} 6 | \usage{ 7 | augment_kdb(kdbk) 8 | } 9 | \description{ 10 | Arcs that do not invalidate the k-DB structure 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/graph_is_adjacent.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_is_adjacent} 4 | \alias{graph_is_adjacent} 5 | \title{Checks whether nodes are adjacent} 6 | \usage{ 7 | graph_is_adjacent(from, to, g) 8 | } 9 | \description{ 10 | Checks whether nodes are adjacent 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/dag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/anb-dag.R 3 | \name{dag} 4 | \alias{dag} 5 | \title{Get underlying graph. This should be exported.} 6 | \usage{ 7 | dag(x) 8 | } 9 | \arguments{ 10 | \item{x}{the bnc object} 11 | } 12 | \description{ 13 | Get underlying graph. This should be exported. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/print.bnc_base.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bnc-dag-operate.R 3 | \name{print.bnc_base} 4 | \alias{print.bnc_base} 5 | \title{Print basic information about a classifier.} 6 | \usage{ 7 | \method{print}{bnc_base}(x, ...) 8 | } 9 | \description{ 10 | Print basic information about a classifier. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/are_pdists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-probs.R 3 | \name{are_pdists} 4 | \alias{are_pdists} 5 | \title{Returns \code{TRUE} is \code{x} is a valid probability distribution.} 6 | \usage{ 7 | are_pdists(x) 8 | } 9 | \description{ 10 | Returns \code{TRUE} is \code{x} is a valid probability distribution. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/graph_get_adjacent.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_get_adjacent} 4 | \alias{graph_get_adjacent} 5 | \title{Finds adjacent nodes. Has not been tested much} 6 | \usage{ 7 | graph_get_adjacent(node, g) 8 | } 9 | \description{ 10 | Finds adjacent nodes. Has not been tested much 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /R/frontend-dag.R: -------------------------------------------------------------------------------- 1 | # TODO: merge with frontend anb 2 | 3 | # Adds arcs from parents to node 4 | condition_on <- function(parents, nodes, x) { 5 | # Replicate parents for each node 6 | wparents <- rep(parents, length(nodes)) 7 | wnodes <- rep(nodes, each = length(parents)) 8 | # Add edges 9 | g <- add_edges(wparents, wnodes, x) 10 | if (!skip_testing()) stopifnot(is_dag_graph(g)) 11 | g 12 | } -------------------------------------------------------------------------------- /man/cmi_table.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-statistics.R 3 | \name{cmi_table} 4 | \alias{cmi_table} 5 | \title{Returns the conditional mutual information three variables.} 6 | \usage{ 7 | cmi_table(xyz_freqs, unit = "log") 8 | } 9 | \description{ 10 | Returns the conditional mutual information three variables. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/check_mlr_attached.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-mlr.R 3 | \name{check_mlr_attached} 4 | \alias{check_mlr_attached} 5 | \title{Checks if mlr attached.} 6 | \usage{ 7 | check_mlr_attached() 8 | } 9 | \description{ 10 | mlr must be attached because otherwise `getMlrOptions()` in `makeLearner` will not be found. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/graph_add_edges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_add_edges} 4 | \alias{graph_add_edges} 5 | \title{Add edges 6 | Does not allow edges among adjacent nodes} 7 | \usage{ 8 | graph_add_edges(from, to, g) 9 | } 10 | \description{ 11 | Add edges 12 | Does not allow edges among adjacent nodes 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /man/skip_assert.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-assert.R 3 | \name{skip_assert} 4 | \alias{skip_assert} 5 | \title{Whether to do checks or not. Set TRUE to speed up debugging or building.} 6 | \usage{ 7 | skip_assert() 8 | } 9 | \description{ 10 | Whether to do checks or not. Set TRUE to speed up debugging or building. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-anb-internal.R: -------------------------------------------------------------------------------- 1 | context("anb internal") 2 | 3 | test_that("nb_dag", { 4 | # Nominal 5 | d <- nb_dag('f', letters[1:5]) 6 | expect_equal(graph_num_arcs(d), 5) 7 | expect_equal(d$edgemode, "directed") 8 | # No features 9 | d <- nb_dag('f', NULL) 10 | expect_equal(graph_num_arcs(d), 0) 11 | expect_equal(graph_num_nodes(d), 1) 12 | expect_equal(d$edgemode, "directed") 13 | }) 14 | -------------------------------------------------------------------------------- /man/compute_cll.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/infer.R 3 | \name{compute_cll} 4 | \alias{compute_cll} 5 | \title{Computes the conditional log-likelihood of the model on the provided data.} 6 | \usage{ 7 | compute_cll(x, dataset) 8 | } 9 | \description{ 10 | Computes the conditional log-likelihood of the model on the provided data. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/learn_unprunned_tree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-rpart.R 3 | \name{learn_unprunned_tree} 4 | \alias{learn_unprunned_tree} 5 | \title{Learns a unpruned \code{rpart} recursive partition.} 6 | \usage{ 7 | learn_unprunned_tree(dataset, class) 8 | } 9 | \description{ 10 | Learns a unpruned \code{rpart} recursive partition. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /tests/testthat/test-predict.R: -------------------------------------------------------------------------------- 1 | context("predict") 2 | 3 | test_that("Maximum a posteriori", { 4 | # gRain implementation change 5 | # skip_if_not_installed('gRain') 6 | # h <- nbvote() 7 | # pred <- predict(h, voting, prob = TRUE) 8 | # p <- map(pred) 9 | # accu <- sum(p == voting$Class) / nrow(voting) 10 | # expect_equal(accu, 0.9034483, tolerance = 1e-7) 11 | # gRain implementation change 12 | }) -------------------------------------------------------------------------------- /bnclassify.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: No 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /man/fast_equal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{fast_equal} 4 | \alias{fast_equal} 5 | \title{Compares all elements in a to b} 6 | \usage{ 7 | fast_equal(a, b) 8 | } 9 | \arguments{ 10 | \item{b}{numeric. Must be length one but no check is performed.} 11 | } 12 | \description{ 13 | Compares all elements in a to b 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/graph_connected_components.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_connected_components} 4 | \alias{graph_connected_components} 5 | \title{connected_components} 6 | \usage{ 7 | graph_connected_components(g) 8 | } 9 | \arguments{ 10 | \item{g}{graph_internal.} 11 | } 12 | \description{ 13 | connected_components 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/spode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-aode.r 3 | \name{spode} 4 | \alias{spode} 5 | \title{Returns a Superparent one-dependence estimator.} 6 | \usage{ 7 | spode(sp, features, class) 8 | } 9 | \arguments{ 10 | \item{sp}{character The superparent.} 11 | } 12 | \description{ 13 | Returns a Superparent one-dependence estimator. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/are_factors.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-assert.R 3 | \name{are_factors} 4 | \alias{are_factors} 5 | \title{Checks if all columns in a data frame are factors.} 6 | \usage{ 7 | are_factors(x) 8 | } 9 | \arguments{ 10 | \item{x}{a \code{data.frame}} 11 | } 12 | \description{ 13 | Checks if all columns in a data frame are factors. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/direct_tree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-anb.r 3 | \name{direct_tree} 4 | \alias{direct_tree} 5 | \title{Direct an undirected graph.} 6 | \usage{ 7 | direct_tree(g, root = NULL) 8 | } 9 | \value{ 10 | A graph. The directed tree. 11 | } 12 | \description{ 13 | The graph must be connected and the function produces a directed tree. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/augment_ode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-hc-operators.R 3 | \name{augment_ode} 4 | \alias{augment_ode} 5 | \title{Arcs that do not invalidate the tree-like structure} 6 | \usage{ 7 | augment_ode(bnc_dag, ...) 8 | } 9 | \arguments{ 10 | \item{...}{Ignored.} 11 | } 12 | \description{ 13 | Arcs that do not invalidate the tree-like structure 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/graph_union.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-anb.r 3 | \name{graph_union} 4 | \alias{graph_union} 5 | \title{Merges multiple disjoint graphs into a single one.} 6 | \usage{ 7 | graph_union(g) 8 | } 9 | \arguments{ 10 | \item{g}{A graph} 11 | } 12 | \value{ 13 | A graph 14 | } 15 | \description{ 16 | Merges multiple disjoint graphs into a single one. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/make_cll.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-params-wanbia.R 3 | \name{make_cll} 4 | \alias{make_cll} 5 | \title{Returns a function to compute negative conditional log-likelihood given feature weights} 6 | \usage{ 7 | make_cll(class_var, dataset) 8 | } 9 | \description{ 10 | Returns a function to compute negative conditional log-likelihood given feature weights 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/car.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R 3 | \docType{data} 4 | \name{car} 5 | \alias{car} 6 | \title{Car Evaluation Data Set.} 7 | \format{ 8 | A \code{data.frame} with 7 columns and 1728 rows. 9 | } 10 | \source{ 11 | \url{https://goo.gl/GTXrCz} 12 | } 13 | \description{ 14 | Data set from the UCI repository: 15 | \url{https://archive.ics.uci.edu/ml/datasets/Car+Evaluation}. 16 | } 17 | -------------------------------------------------------------------------------- /man/graph_subgraph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_subgraph} 4 | \alias{graph_subgraph} 5 | \title{Subgraph. 6 | Only for a directed graph?} 7 | \usage{ 8 | graph_subgraph(nodes, g) 9 | } 10 | \arguments{ 11 | \item{nodes}{character} 12 | 13 | \item{g}{graph_internal.} 14 | } 15 | \description{ 16 | Subgraph. 17 | Only for a directed graph? 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/order_acyclic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/anb-families.R 3 | \name{order_acyclic} 4 | \alias{order_acyclic} 5 | \title{Provide an acyclic ordering (i.e., a topological sort).} 6 | \usage{ 7 | order_acyclic(families) 8 | } 9 | \description{ 10 | Provide an acyclic ordering (i.e., a topological sort). 11 | } 12 | \references{ 13 | Beng-Jensen and Gutin, 2007, page 14. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/cpt_vars_values.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/anb-cpts.R 3 | \name{cpt_vars_values} 4 | \alias{cpt_vars_values} 5 | \title{Get just form first dimension in their own cpt, not checking for consistency 6 | in others.} 7 | \usage{ 8 | cpt_vars_values(cpts) 9 | } 10 | \description{ 11 | Get just form first dimension in their own cpt, not checking for consistency 12 | in others. 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /man/extract_ctgt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-statistics.R 3 | \name{extract_ctgt} 4 | \alias{extract_ctgt} 5 | \title{Returns a contingency table over the variables.} 6 | \usage{ 7 | extract_ctgt(cols, dataset) 8 | } 9 | \description{ 10 | Each variable may be a character vector. 11 | } 12 | \details{ 13 | Any rows with incomplete observations of the variables are ignored. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/log_normalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-probs.R 3 | \name{log_normalize} 4 | \alias{log_normalize} 5 | \title{Normalize log probabilities.} 6 | \usage{ 7 | log_normalize(lp) 8 | } 9 | \description{ 10 | Uses the log-sum-exp trick. 11 | } 12 | \references{ 13 | Murphy KP (2012). \emph{Machine learning: a probabilistic 14 | perspective}. The MIT Press. pp. 86-87. 15 | } 16 | \keyword{internal} 17 | -------------------------------------------------------------------------------- /man/voting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R 3 | \docType{data} 4 | \name{voting} 5 | \alias{voting} 6 | \title{Congress Voting Data Set.} 7 | \format{ 8 | A \code{data.frame} with 17 columns and 435 rows. 9 | } 10 | \source{ 11 | \url{https://goo.gl/GTXrCz} 12 | } 13 | \description{ 14 | Data set from the UCI repository 15 | \url{https://archive.ics.uci.edu/ml/datasets/Congressional+Voting+Records}. 16 | } 17 | -------------------------------------------------------------------------------- /man/augment_ode_arcs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-hc-operators.R 3 | \name{augment_ode_arcs} 4 | \alias{augment_ode_arcs} 5 | \title{Returns augmenting arcs that do not invalidate the ODE.} 6 | \usage{ 7 | augment_ode_arcs(bnc_dag) 8 | } 9 | \value{ 10 | a character matrix. NULL if no arcs can be added. 11 | } 12 | \description{ 13 | Returns augmenting arcs that do not invalidate the ODE. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/augment_kdb_arcs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-hc-operators.R 3 | \name{augment_kdb_arcs} 4 | \alias{augment_kdb_arcs} 5 | \title{Returns augmenting arcs that do not invalidate the k-DB.} 6 | \usage{ 7 | augment_kdb_arcs(bnc_dag, k) 8 | } 9 | \value{ 10 | a character matrix. NULL if no arcs can be added. 11 | } 12 | \description{ 13 | Returns augmenting arcs that do not invalidate the k-DB. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/bootstrap_ss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{bootstrap_ss} 4 | \alias{bootstrap_ss} 5 | \title{Return a bootstrap sub-sample.} 6 | \usage{ 7 | bootstrap_ss(dataset, proportion) 8 | } 9 | \arguments{ 10 | \item{dataset}{a \code{data.frame}} 11 | 12 | \item{proportion}{numeric given as fraction of \code{dataset} size} 13 | } 14 | \description{ 15 | Return a bootstrap sub-sample. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/compute_ll.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/infer.R 3 | \name{compute_ll} 4 | \alias{compute_ll} 5 | \title{Computes log-likelihood of the model on the provided data.} 6 | \usage{ 7 | compute_ll(x, dataset) 8 | } 9 | \arguments{ 10 | \item{x}{A \code{\link{bnc_bn}} object.} 11 | 12 | \item{dataset}{A data frame.} 13 | } 14 | \description{ 15 | Computes log-likelihood of the model on the provided data. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/graph_named_edge_matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/graph-internal.r 3 | \name{graph_named_edge_matrix} 4 | \alias{graph_named_edge_matrix} 5 | \title{Returns an edge matrix with node names (instead of node indices).} 6 | \usage{ 7 | graph_named_edge_matrix(x) 8 | } 9 | \value{ 10 | A character matrix. 11 | } 12 | \description{ 13 | Returns an edge matrix with node names (instead of node indices). 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/predictLearner.bnc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-mlr.R 3 | \name{predictLearner.bnc} 4 | \alias{predictLearner.bnc} 5 | \title{predictLearner. Auxiliary mlr function.} 6 | \usage{ 7 | predictLearner.bnc(.learner, .model, .newdata, ...) 8 | } 9 | \arguments{ 10 | \item{.learner, .model, .newdata}{Internal.} 11 | 12 | \item{...}{Internal.} 13 | } 14 | \description{ 15 | predictLearner. Auxiliary mlr function. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /inst/include/data.h: -------------------------------------------------------------------------------- 1 | #ifndef bnclassify_data_H 2 | #define bnclassify_data_H 3 | 4 | #include 5 | 6 | bool hasna(const Rcpp::DataFrame & newdata); 7 | bool hasna_features(const Rcpp::DataFrame & newdata, const SEXP & features); 8 | // TODO: This should be called at instance level, not data frame! This way, if the data set is complete, it goes through it a couple of times. 9 | Rcpp::DataFrame trim_dataset_cpp(const Rcpp::DataFrame & dataset, const Rcpp::CharacterVector & features); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /man/complete_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-graph.r 3 | \name{complete_graph} 4 | \alias{complete_graph} 5 | \title{Returns a complete unweighted graph with the given nodes.} 6 | \usage{ 7 | complete_graph(nodes) 8 | } 9 | \arguments{ 10 | \item{nodes}{A character vector.} 11 | } 12 | \value{ 13 | a \code{graphNEL} object. 14 | } 15 | \description{ 16 | Returns a complete unweighted graph with the given nodes. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/trainLearner.bnc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-mlr.R 3 | \name{trainLearner.bnc} 4 | \alias{trainLearner.bnc} 5 | \title{trainLearner. Auxiliary mlr function.} 6 | \usage{ 7 | trainLearner.bnc(.learner, .task, .subset, .weights, ...) 8 | } 9 | \arguments{ 10 | \item{.learner, .task, .subset, .weights}{Internal.} 11 | 12 | \item{...}{Internal.} 13 | } 14 | \description{ 15 | trainLearner. Auxiliary mlr function. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/map.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{map} 4 | \alias{map} 5 | \title{Assigns instances to the most likely class.} 6 | \usage{ 7 | map(pred) 8 | } 9 | \arguments{ 10 | \item{pred}{A numeric matrix. Each row corresponds to class posterior 11 | probabilities for an instance.} 12 | } 13 | \value{ 14 | a factor with the same levels as the class variable. 15 | } 16 | \description{ 17 | Ties are resolved randomly. 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/local_ode_score_contrib.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-chowliu.R 3 | \name{local_ode_score_contrib} 4 | \alias{local_ode_score_contrib} 5 | \title{Returns pairwise component of ODE (penalized) log-likelihood scores. 6 | In natural logarithms.} 7 | \usage{ 8 | local_ode_score_contrib(x, y, class, dataset) 9 | } 10 | \description{ 11 | Returns pairwise component of ODE (penalized) log-likelihood scores. 12 | In natural logarithms. 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /meta/download-statistics.R: -------------------------------------------------------------------------------- 1 | df <- cranlogs::cran_downloads('bnclassify', from = "2014-01-01", to = "2018-01-01") 2 | sum(df$count) 3 | 4 | 5 | df <- cranlogs::cran_downloads('bnclassify', from = "2014-01-01", to = "2019-09-25") 6 | sum(df$count) 7 | 8 | # install.packages('adjustedcranlogs') 9 | library(adjustedcranlogs) 10 | df <- adjustedcranlogs::adj_cran_downloads('bnclassify', from = "2014-01-01", to = "2019-09-25") 11 | head(df) 12 | tail(df) 13 | plot(df$total_downloads) 14 | plot(df$adjusted_total_downloads) 15 | -------------------------------------------------------------------------------- /man/superparent_children.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-hc-operators.R 3 | \name{superparent_children} 4 | \alias{superparent_children} 5 | \title{Return nodes which can be superparents along with their possible children.} 6 | \usage{ 7 | superparent_children(bnc_dag) 8 | } 9 | \value{ 10 | list of \code{search_state}. NULL if no orphans 11 | } 12 | \description{ 13 | Return nodes which can be superparents along with their possible children. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /R/learn-aode.r: -------------------------------------------------------------------------------- 1 | #' Returns a Superparent one-dependence estimator. 2 | #' 3 | #' @param sp character The superparent. 4 | #' @keywords internal 5 | spode <- function(sp, features, class) { 6 | stopifnot(length(sp) == 1) 7 | stopifnot(length(class) == 1) 8 | stopifnot(class != sp) 9 | stopifnot(!class %in% features) 10 | features <- setdiff(features, sp) 11 | features_graph <- nb_dag(class = sp, features = features) 12 | dag <- superimpose_node(node = class, dag = features_graph) 13 | bnc_dag(dag = dag, class = class) 14 | } -------------------------------------------------------------------------------- /man/get_log_leaf_entries.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-params-wanbia.R 3 | \name{get_log_leaf_entries} 4 | \alias{get_log_leaf_entries} 5 | \title{Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing} 6 | \usage{ 7 | get_log_leaf_entries(cpt, x) 8 | } 9 | \arguments{ 10 | \item{x}{a vector of values} 11 | } 12 | \description{ 13 | Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/make_cll_gradient.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-params-wanbia.R 3 | \name{make_cll_gradient} 4 | \alias{make_cll_gradient} 5 | \title{Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights} 6 | \usage{ 7 | make_cll_gradient(class_var, dataset) 8 | } 9 | \description{ 10 | Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /meta/graph-partial.r: -------------------------------------------------------------------------------- 1 | # 2 | # # I could currently use undirected maybe 3 | # 4 | # 5 | # # Partially directed graph with D and U edges 6 | # ## Data 7 | # - The to - from matrix 8 | # - The edge_mode vector of same length as matrix rows 9 | # 10 | # ## Funcs 11 | # - add edge. does not add node 12 | # - add arc. does not add node 13 | # 14 | # # Undirected 15 | # 16 | # ## Funs 17 | # - in_graph. used for discard reversed, for example, or similar. 18 | # - compare_graphs 19 | # 20 | # - subgraph. 21 | # - direct tree. This is a point here. -------------------------------------------------------------------------------- /src/graph-internal.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | LogicalVector graph_node_parents_inds(CharacterMatrix edges, CharacterVector node) { 6 | if (node.size() != 1) stop("Must be a single element."); 7 | // TODO: access by column name!!! 8 | const CharacterMatrix::Column & to = edges(_, 1); 9 | LogicalVector ind = to == node; 10 | // const CharacterMatrix::Column & from = edges(_, 0); 11 | // return from(ind); 12 | // unname(edges[ind, 'from']) 13 | return ind; 14 | } 15 | 16 | -------------------------------------------------------------------------------- /man/accuracy.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{accuracy} 4 | \alias{accuracy} 5 | \title{Compute predictive accuracy.} 6 | \usage{ 7 | accuracy(x, y) 8 | } 9 | \arguments{ 10 | \item{x}{A vector of predicted labels.} 11 | 12 | \item{y}{A vector of true labels.} 13 | } 14 | \description{ 15 | Compute predictive accuracy. 16 | } 17 | \examples{ 18 | 19 | data(car) 20 | nb <- bnc('nb', 'class', car, smooth = 1) 21 | p <- predict(nb, car) 22 | accuracy(p, car$class) 23 | } 24 | -------------------------------------------------------------------------------- /man/identify_min_testing_depths.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-rpart.R 3 | \name{identify_min_testing_depths} 4 | \alias{identify_min_testing_depths} 5 | \title{Identifies the lowest (closest to root) depths at which the features of a 6 | classification tree are tested.} 7 | \usage{ 8 | identify_min_testing_depths(tree) 9 | } 10 | \description{ 11 | Identifies the lowest (closest to root) depths at which the features of a 12 | classification tree are tested. 13 | } 14 | \keyword{internal} 15 | -------------------------------------------------------------------------------- /man/subset_by_colnames.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/basic-misc.R 3 | \name{subset_by_colnames} 4 | \alias{subset_by_colnames} 5 | \title{Subset a 2D structure by a vector of column names.} 6 | \usage{ 7 | subset_by_colnames(colnames, data) 8 | } 9 | \arguments{ 10 | \item{colnames}{a character vector} 11 | 12 | \item{data}{a matrix or data frame} 13 | } 14 | \description{ 15 | Not all colnames are necessarily in the columns of data; in that case this 16 | returns NA. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /vignettes/macros-bnclassify.tex: -------------------------------------------------------------------------------- 1 | \usepackage{subcaption} 2 | \def\tecvin/{``methods'' vignette} 3 | \def\tecvindesc/{details on the underlying methods and documents implementation specifics, especially where they differ from or are undocumented in the original paper} 4 | \def\invin/{``usage'' vignette} 5 | \def\invindesc/{detailed usage examples and shows how to combine the functions} 6 | \def\overvindesc/{an overview of the package and background on the implemented methods} 7 | \def\pkghelp/{concise overview of the functionalities, with pointers to relevant functions and their documentation} -------------------------------------------------------------------------------- /R/release.R: -------------------------------------------------------------------------------- 1 | release_questions <- function() { 2 | c( 3 | "Have you checked all TODOs in code?", 4 | "Have you removed all commmented-out, not used code? Also in tests.", 5 | "Have you checked on Travis?", 6 | "Does cran_comments.md show the notes from R-devel?", 7 | "Does cran_comments.md specify the correct versions of R where tested?", 8 | "Is dont_run set for long-running examples?", 9 | "Did you build vignettes?", 10 | "Have you checked spelling in vignettes and help files?", 11 | "Did you update news? Look at the commits since last version" 12 | ) 13 | } -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry( 2 | bibtype = "Article", 3 | title = "bnclassify: Learning Bayesian Network Classifiers", 4 | author = "Mihaljevi\'{c}, Bojan", "Bielza, Concha", "Larranaga, Pedro", 5 | header = "To cite bnclassify in publications, please use", 6 | journal = "The R Journal", 7 | year = 2018, 8 | volume = 10, 9 | number = 2, 10 | pages = "455--468", 11 | doi = "10.32614/RJ-2018-073", 12 | textVersion = "Mihaljevic, Bojan, Pedro Larranaga, and Concha Bielza. 2018. bnclassify: Learning Bayesian Network Classifiers. The R Journal 10 (2): 455-468" 13 | ) -------------------------------------------------------------------------------- /man/direct_forest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-anb.r 3 | \name{direct_forest} 4 | \alias{direct_forest} 5 | \title{Direct an undirected graph.} 6 | \usage{ 7 | direct_forest(g, root = NULL) 8 | } 9 | \arguments{ 10 | \item{g}{An undirected graph.} 11 | 12 | \item{root}{A character. Optional tree root.} 13 | } 14 | \value{ 15 | A directed graph 16 | } 17 | \description{ 18 | Starting from a \code{root} not, directs all arcs away from it and applies 19 | the same, recursively to its children and descendants. Produces a directed 20 | forest. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/identify_all_testing_depths.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-rpart.R 3 | \name{identify_all_testing_depths} 4 | \alias{identify_all_testing_depths} 5 | \title{Identifies all depths at which the features of a classification tree are 6 | tested.} 7 | \usage{ 8 | identify_all_testing_depths(tree) 9 | } 10 | \arguments{ 11 | \item{tree}{an \code{rpart} object} 12 | } 13 | \value{ 14 | a numeric vector. The names are the names of the variables. 15 | } 16 | \description{ 17 | Identifies all depths at which the features of a classification tree are 18 | tested. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/memoise_char.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/memoise.R 3 | \name{memoise_char} 4 | \alias{memoise_char} 5 | \title{Memoise a function.} 6 | \usage{ 7 | memoise_char(f) 8 | } 9 | \arguments{ 10 | \item{f}{a function} 11 | } 12 | \description{ 13 | Based on Hadley Wickham's memoise package. Assumes that argument to f is a 14 | character vector. 15 | } 16 | \details{ 17 | This function is a slightly modified version of 18 | \code{memoise} to avoid the use of digest. The rest functions 19 | copied as is from memoise. 20 | } 21 | \author{ 22 | Hadley Wickham, Bojan Mihaljevic 23 | } 24 | \keyword{internal} 25 | -------------------------------------------------------------------------------- /man/aode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-struct.R 3 | \name{aode} 4 | \alias{aode} 5 | \title{Learn an AODE ensemble.} 6 | \usage{ 7 | aode(class, dataset, features = NULL) 8 | } 9 | \arguments{ 10 | \item{class}{A character. Name of the class variable.} 11 | 12 | \item{dataset}{The data frame from which to learn the classifier.} 13 | 14 | \item{features}{A character vector. The names of the features. This argument 15 | is ignored if \code{dataset} is provided.} 16 | } 17 | \value{ 18 | A \code{bnc_aode} or a \code{bnc_dag} (if returning a naive Bayes) 19 | } 20 | \description{ 21 | If there is a single predictor then returns a naive Bayes. 22 | } 23 | -------------------------------------------------------------------------------- /R/cv-multi.r: -------------------------------------------------------------------------------- 1 | # Check the class is common to all dags 2 | get_common_class <- function(x) { 3 | class <- unique(vapply(x, class_var, FUN.VALUE = character(1))) 4 | # Check it is unique 5 | assertthat::is.string(class) 6 | class 7 | } 8 | ensure_list <- function(x, type = NULL) { 9 | if (!is_just(x, "list")) { 10 | x <- list(x) 11 | } 12 | if (!is.null(type)) { 13 | all_type <- all(vapply(x, inherits, type, FUN.VALUE = logical(1))) 14 | if (!all_type) stop(paste0("All elements must inherit from ", type)) 15 | } 16 | x 17 | } 18 | # Unnamed so that it would pass no names to objects created by itearting on it 19 | ensure_multi_list <- function(x, type = NULL) { 20 | unname(ensure_list(x, type = type)) 21 | } -------------------------------------------------------------------------------- /vignettes/algorithms.tex: -------------------------------------------------------------------------------- 1 | \begin{table}[ht] 2 | \centering 3 | \caption{Implemented structure learning algorithms.} 4 | \label{tbl:algorithms} 5 | \begin{tabular}{lllll} 6 | \hline 7 | Structure & Search algorithm & Score & Feature selection & Function \\ 8 | \hline 9 | NB & - & - & - & \code{nb} \\ 10 | TAN/FAN & CL-ODE & log-lik, AIC, BIC & - & \code{tan\_cl} \\ 11 | TAN & TAN-HC & accuracy & - & \code{tan\_hc} \\ 12 | TAN & TAN-HCSP & accuracy & - & \code{tan\_hcsp} \\ 13 | SNB & FSSJ & accuracy & forward & \code{fssj} \\ 14 | SNB & BSEJ & accuracy & backward & \code{bsej} \\ 15 | AODE & - & - & - & \code{aode} \\ 16 | kDB & kDB & accuracy & - & \code{kdb} \\ 17 | \hline 18 | \end{tabular} 19 | \end{table} 20 | -------------------------------------------------------------------------------- /tests/testthat/test-dag.R: -------------------------------------------------------------------------------- 1 | context("Basic DAG") 2 | 3 | test_that("Condition on", { 4 | skip_if_not_installed('gRbase') 5 | suppressWarnings(RNGversion("3.5.0")) 6 | set.seed(0) 7 | g <- gRbase::random_dag(letters[1:10], maxpar = 15) 8 | g <- graphNEL2_graph_internal(g) 9 | d <- condition_on(parents = c('i', 'g', 'e'), nodes = 'a', g) 10 | expect_equal(graph_num_arcs(d), 5) 11 | d <- graph_internal2graph_NEL(d) 12 | expect_true(is_perm(gRbase::parents('a', d), c('i', 'g', 'e'))) 13 | }) 14 | 15 | test_that("families", { 16 | a <- nbcar() 17 | l <- graphNEL_parents(a$.dag) 18 | expect_is(l, 'list') 19 | expect_equal(length(l), 7) 20 | expect_equal(l$buying, 'class') 21 | expect_equal(l$class, character()) 22 | }) 23 | 24 | -------------------------------------------------------------------------------- /man/as_mlr.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wrap-mlr.R 3 | \name{as_mlr} 4 | \alias{as_mlr} 5 | \title{Convert to \code{mlr}.} 6 | \usage{ 7 | as_mlr(x, dag, id = "1") 8 | } 9 | \arguments{ 10 | \item{x}{A \code{\link{bnc_bn}} object.} 11 | 12 | \item{dag}{A logical. Whether to learn structure on each training subsample. 13 | Parameters are always learned.} 14 | 15 | \item{id}{A character.} 16 | } 17 | \description{ 18 | Convert a \code{\link{bnc_bn}} to a \code{\link[mlr]{Learner}} 19 | object. 20 | } 21 | \examples{ 22 | data(car) 23 | nb <- bnc('nb', 'class', car, smooth = 1) 24 | \dontrun{library(mlr)} 25 | \dontrun{nb_mlr <- as_mlr(nb, dag = FALSE, id = "ode_cl_aic")} 26 | \dontrun{nb_mlr} 27 | } 28 | -------------------------------------------------------------------------------- /tests/testthat/test-basic-assert.R: -------------------------------------------------------------------------------- 1 | context("Assert") 2 | 3 | test_that("non-empty complete Nominal ", { 4 | stopifnot(is_non_empty_complete(letters)) 5 | }) 6 | test_that("non-empty complete empty", { 7 | expect_error(check_non_empty_complete(NULL), "complete") 8 | expect_error(check_non_empty_complete(character()), "complete") 9 | }) 10 | test_that("non-empty complete 1 NA", { 11 | expect_error(check_non_empty_complete(c(letters, NA)), "complete") 12 | expect_error(check_non_empty_complete(NA), "complete") 13 | expect_error(check_non_empty_complete(rep(NA, 1e2)), "complete") 14 | }) 15 | test_that("is just list",{ 16 | expect_true(is_just(list(), "list")) 17 | t <- structure(list(), class="test") 18 | expect_true(!is_just(t, "list")) 19 | }) -------------------------------------------------------------------------------- /man/bnc_dag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R 3 | \name{bnc_dag} 4 | \alias{bnc_dag} 5 | \title{Bayesian network classifier structure.} 6 | \description{ 7 | A Bayesian network classifier structure, returned by functions such as 8 | \code{\link{nb}} and \code{\link{tan_cl}}. You can plot its structure (with 9 | \code{\link[=plot.bnc_dag]{plot}}), print a summary to console 10 | (\code{\link[=print.bnc_base]{print}}), inspect it with functions documented 11 | in \code{\link{inspect_bnc_dag}}, and convert it to a graph object with 12 | \code{\link{grain_and_graph}}. 13 | } 14 | \examples{ 15 | data(car) 16 | nb <- tan_cl('class', car) 17 | nb 18 | \dontrun{plot(nb)} 19 | narcs(nb) 20 | } 21 | -------------------------------------------------------------------------------- /meta/submit.R: -------------------------------------------------------------------------------- 1 | ## ========================================= ## 2 | ## Submit ## 3 | ## ========================================= ## 4 | 5 | # must build vignettes first 6 | devtools::build_vignettes() 7 | # Must use the build args for the check to pass on Windows 8 | build_args <- c('--resave-data','--compact-vignettes=both') 9 | devtools::build(args = build_args ) 10 | 11 | check_args <- '--as-cran' 12 | # check_version = TRUE # seems this argument of devtools::check has been removed 13 | devtools::check(args = check_args , cran = TRUE, build_args = build_args ) 14 | 15 | # Full, slow release with questions 16 | # devtools::release(args = build_args) 17 | # Quick submission 18 | # devtools::submit_cran(args = build_args) -------------------------------------------------------------------------------- /meta/methods.csv: -------------------------------------------------------------------------------- 1 | Structure,Structure learning,,,,,Parameter learning,,,Other 2 | Structure,Search algorithm,Score,Feature selection,Function,Paper,Method Name,Paper,Feature selection, 3 | NB,-,-,-,nb,Minsky1961,MANB,Dash2002,Bayesian,Plotting* 4 | TAN/FAN,CL-ODE,"log-lik, AIC, BIC",-,tan_cl,Friedman1997,AWNB,Hall2007,Weighting,cross-validation 5 | TAN,TAN-HC,accuracy,-,tan_hc,Keogh2002,WANBIA,Zaidi2013,Weighting,"is_ode(), is_nb(), …" 6 | TAN,TAN-HCSP,accuracy,-,tan_hcsp,Keogh2002,,,,"features(), classes()" 7 | SNB ,FSSJ,accuracy,forward,fssj,Pazzani1996,,,,families() 8 | SNB ,BSEJ,accuracy,backward,bsej,Pazzani1996,,,,as_bnlearn() 9 | AODE,-,-,-,aode,Webb2005,,,,as_mlr() 10 | ODE-MNET,CL-MNET,"log-lik, AIC, BIC",-,mnet_cl,Friedman1997,,,,predict() 11 | kDB,kDB,accuracy,-,kdb,Blanco2005,,,, 12 | -------------------------------------------------------------------------------- /src/basic-probs.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | using namespace std::placeholders; 4 | 5 | // [[Rcpp::export]] 6 | NumericVector smooth_sideeffect(NumericVector ctgt, double smooth) { 7 | transform(ctgt.begin(), ctgt.end(), ctgt.begin(), 8 | bind(std::plus(), _1, smooth)); 9 | return ctgt; 10 | } 11 | 12 | // [[Rcpp::export]] 13 | NumericVector exp_sideeffect(NumericVector p) { 14 | double (*dexp)(double) = &std::exp; 15 | std::transform(p.begin(), p.end(), p.begin(), dexp); 16 | return p; 17 | } 18 | 19 | /*** R 20 | a <- c(0.2, 2, -1, 0) 21 | b <- c(0.2, 2, -1, 0) 22 | exp_sideeffect(a) 23 | stopifnot(all.equal(log(a), b)) 24 | 25 | a <- 1:5 26 | smooth_sideeffect(a, 10) 27 | smooth_sideeffect(a, -1) 28 | smooth_sideeffect(a, 1:5) 29 | */ 30 | -------------------------------------------------------------------------------- /man/nb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-struct.R 3 | \name{nb} 4 | \alias{nb} 5 | \title{Learn a naive Bayes network structure.} 6 | \usage{ 7 | nb(class, dataset = NULL, features = NULL) 8 | } 9 | \arguments{ 10 | \item{class}{A character. Name of the class variable.} 11 | 12 | \item{dataset}{The data frame from which to learn the classifier.} 13 | 14 | \item{features}{A character vector. The names of the features. This argument 15 | is ignored if \code{dataset} is provided.} 16 | } 17 | \value{ 18 | A \code{\link{bnc_dag}} object. 19 | } 20 | \description{ 21 | Learn a naive Bayes network structure. 22 | } 23 | \examples{ 24 | 25 | data(car) 26 | nb <- nb('class', car) 27 | nb2 <- nb('class', features = letters[1:10]) 28 | \dontrun{plot(nb2)} 29 | } 30 | -------------------------------------------------------------------------------- /vignettes/macros-math.tex: -------------------------------------------------------------------------------- 1 | \usepackage{centernot} 2 | \newcommand{\bigCI}{\mathrel{\text{\scalebox{1.07}{$\perp\mkern-10mu\perp$}}}} 3 | \newcommand{\nbigCI}{\centernot{\bigCI}} 4 | \newcommand{\CI}{\mathrel{\perp\mspace{-10mu}\perp}} 5 | \newcommand{\nCI}{\centernot{\CI}} 6 | 7 | \DeclareMathOperator*{\argmax}{arg\,max} 8 | 9 | \def\X/{\ensuremath{\mathbf{X}}} 10 | \def\x/{\ensuremath{\mathbf{x}}} 11 | \newcommand{\ith}[1]{\ensuremath{#1^{(i)}}} 12 | \def\coef/{\boldsymbol{\beta}} 13 | \def\mthetas/{\ensuremath{\boldsymbol{\theta}}} 14 | \def\gstuc/{\ensuremath{\mathcal{G}}} 15 | \def\pcgx/{\ensuremath{P(c\mid\mathbf{x})}} 16 | \def\pcxemp/{\ensuremath{\widehat P(c, \mathbf{x})}} 17 | \def\pcx/{\ensuremath{P(c, \mathbf{x})}} 18 | \def\PCX/{\ensuremath{P(C, \mathbf{X})}} 19 | \def\PCGX/{\ensuremath{P(C\mid\mathbf{X})}} 20 | -------------------------------------------------------------------------------- /tests/testthat/test-memoise.R: -------------------------------------------------------------------------------- 1 | context("memoise") 2 | 3 | test_that("memoise char nominal", { 4 | p <- function(x) { 5 | print(sample(letters, 1)) 6 | return() 7 | } 8 | set.seed(0) 9 | m <- memoise_char(p) 10 | expect_output({invisible(m(letters)); invisible(m(letters)) }, "x") 11 | set.seed(0) 12 | m <- memoise_char(p); 13 | expect_output({invisible(m("a")); invisible(m("a")) }, "x") 14 | m <- memoise_char(p) 15 | expect_error(m(NULL)) 16 | }) 17 | 18 | test_that("forget nominal", { 19 | p <- function(x) { 20 | print(sample(letters, 1)) 21 | return() 22 | } 23 | suppressWarnings(RNGversion("3.5.0")) 24 | set.seed(0) 25 | m <- memoise_char(p) 26 | expect_output({invisible(m(letters)); invisible(m(letters)) }, "x") 27 | forget(m) 28 | expect_output({invisible(m(letters)); }, "g") 29 | }) -------------------------------------------------------------------------------- /inst/include/basic-misc.h: -------------------------------------------------------------------------------- 1 | #ifndef bnclassify_basicmisc_H 2 | #define bnclassify_basicmisc_H 3 | 4 | #include 5 | 6 | bool are_disjoint(Rcpp::Nullable x, Rcpp::Nullable y); 7 | /** 8 | * A comparison that does not raise a compiler warning. 9 | */ 10 | bool safediff(unsigned int x, int y); 11 | /** 12 | * A set diff which preserves the order in the first vector. This is because rcpp setdiff does not preserve it. 13 | */ 14 | std::vector ordersetdiff(Rcpp::CharacterVector vector, Rcpp::CharacterVector remove); 15 | /** 16 | * 0-based match. (1 less than what Rcpp returns) 17 | * rcpp match was returning -2147483648 when not finding the value, and the any() test was failing, thus implemented without rcpp. 18 | */ 19 | std::vector match_zero_based(const Rcpp::CharacterVector & subset, const Rcpp::CharacterVector & superset, const std::string error_message); 20 | 21 | #endif -------------------------------------------------------------------------------- /man/grain_and_graph.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-dag.R, R/wrap-gRain.R 3 | \name{grain_and_graph} 4 | \alias{grain_and_graph} 5 | \alias{as_igraph} 6 | \alias{as_grain} 7 | \title{Convert to igraph and gRain.} 8 | \usage{ 9 | as_igraph(x) 10 | 11 | as_grain(x) 12 | } 13 | \arguments{ 14 | \item{x}{The \code{\link{bnc_bn}} object. The Bayesian network classifier.} 15 | } 16 | \description{ 17 | Convert a \code{\link{bnc_dag}} to \code{igraph} and 18 | \code{\link[gRain]{grain}} objects. 19 | } 20 | \section{Functions}{ 21 | \itemize{ 22 | \item \code{as_igraph()}: Convert to a graphNEL. 23 | 24 | \item \code{as_grain()}: Convert to a grain. 25 | 26 | }} 27 | \examples{ 28 | data(car) 29 | nb <- bnc('nb', 'class', car, smooth = 1) 30 | # Requires the grain and igraph packages installed 31 | \dontrun{g <- as_grain(nb)} 32 | \dontrun{gRain::querygrain.grain(g)$buying} 33 | } 34 | -------------------------------------------------------------------------------- /R/learn-params-awnb.R: -------------------------------------------------------------------------------- 1 | awnb <- function(class, dataset, trees = NULL, bootstrap_size = NULL) { 2 | if (is.null(trees)) trees <- 10 3 | if (is.null(bootstrap_size)) bootstrap_size <- 0.5 4 | # For each tree, get a bootstrap subsample 5 | subsamples <- replicate(trees, bootstrap_ss(dataset, bootstrap_size), 6 | simplify = FALSE) 7 | # From each sample, learn tree 8 | Wtrees <- lapply(subsamples, learn_unprunned_tree, class) 9 | # For each tree get minimum testing depth 10 | depths <- lapply(Wtrees, identify_min_testing_depths) 11 | depths <- unlist(depths, use.names = TRUE) 12 | if (length(depths) == 0) stop("Only empty trees have been learned.") 13 | features <- get_features(class, dataset) 14 | unused_features <- features[!(features %in% names(depths))] 15 | depths[unused_features] <- Inf 16 | # Compute weights and average across the features 17 | tapply(depths, names(depths), 18 | function(x) sum(x ^ -0.5), simplify = TRUE) / trees 19 | } -------------------------------------------------------------------------------- /R/frontend-graph.r: -------------------------------------------------------------------------------- 1 | # TODO use only graph internal and remove these functions. 2 | 3 | # Gets the parents of a node in the graph 4 | # Eeach nodes' parents. 5 | # return Named list of characters. 6 | graphNEL_parents <- function(g) { 7 | graph_parents(g) 8 | } 9 | subgraph <- function(vars, x) { 10 | graph_subgraph(vars, x) 11 | } 12 | connected_components <- function(x) { 13 | graph_connected_components(x) 14 | } 15 | 16 | add_edges <- function(from, to, x) { 17 | graph_add_edges(from, to, x) 18 | } 19 | add_node <- function(node, x) { 20 | graph_add_node(node, x) 21 | } 22 | remove_node <- function(node, x) { 23 | graph_remove_node(node, x) 24 | } 25 | num_arcs <- function(x) { 26 | graph_num_arcs(x) 27 | } 28 | #' Returns a complete unweighted graph with the given nodes. 29 | #' 30 | #' @param nodes A character vector. 31 | #' @return a \code{graphNEL} object. 32 | #' @keywords internal 33 | complete_graph <- function(nodes) { 34 | graph_complete_undirected(nodes) 35 | } 36 | -------------------------------------------------------------------------------- /man/cmi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-statistics.R 3 | \name{cmi} 4 | \alias{cmi} 5 | \title{Compute the (conditional) mutual information between two variables.} 6 | \usage{ 7 | cmi(x, y, dataset, z = NULL, unit = "log") 8 | } 9 | \arguments{ 10 | \item{x}{A length one character.} 11 | 12 | \item{y}{A length one character.} 13 | 14 | \item{dataset}{A data frame. Must contain x, y and, optionally, z columns.} 15 | 16 | \item{z}{A character vector.} 17 | 18 | \item{unit}{A character. Logarithm base. See \code{entropy} package.} 19 | } 20 | \description{ 21 | Computes the (conditional) mutual information between two variables. If 22 | \code{z} is not \code{NULL} then returns the conditional mutual information, 23 | \eqn{I(X;Y|Z)}. Otherwise, returns mutual information, \eqn{I(X;Y)}. 24 | } 25 | \details{ 26 | \eqn{I(X;Y|Z) = H(X|Z) + H(Y|Z) - H(X,Y,Z) - H(Z)}, where \eqn{H()} is 27 | Shannon's entropy. 28 | } 29 | \examples{ 30 | data(car) 31 | cmi('maint', 'class', car) 32 | } 33 | -------------------------------------------------------------------------------- /vignettes/includes/abstract.rmd: -------------------------------------------------------------------------------- 1 | \abstract{The \pkg{bnclassify} package provides state-of-the art algorithms for learning Bayesian network classifiers from data. For structure learning it provides variants of the greedy hill-climbing search, a well-known adaptation of the Chow-Liu algorithm and averaged one-dependence estimators. It provides Bayesian and maximum likelihood parameter estimation, as well as three naive-Bayes-specific methods based on discriminative score optimization and Bayesian model averaging. The implementation is efficient enough to allow for time-consuming discriminative scores on medium-sized data sets. The \pkg{bnclassify} package provides utilities for model evaluation, such as cross-validated accuracy and penalized log-likelihood scores, and analysis of the underlying networks, including network plotting via the \pkg{igraph} package. It is extensively tested, with over 200 automated tests that give a code coverage of 94\%. Here we present the main functionalities, illustrate them with a number of data sets, and comment on related software.} -------------------------------------------------------------------------------- /man/bnc_bn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R 3 | \name{bnc_bn} 4 | \alias{bnc_bn} 5 | \title{Bayesian network classifier with structure and parameters.} 6 | \description{ 7 | A Bayesian network classifier with structure and parameters. Returned by 8 | \code{\link{lp}} and \code{\link{bnc}} functions. You can use it to classify 9 | data (with \code{\link[=predict.bnc_fit]{predict}}). Can estimate its 10 | predictive accuracy with \code{\link{cv}}, plot its structure (with 11 | \code{\link[=plot.bnc_dag]{plot}}), print a summary to console 12 | (\code{\link[=print.bnc_base]{print}}), inspect it with functions documented 13 | in \code{\link{inspect_bnc_bn}} and \code{\link{inspect_bnc_dag}}, and 14 | convert it to mlr, grain, and graph objects --see \code{\link{as_mlr}} and 15 | \code{\link{grain_and_graph}}. 16 | } 17 | \examples{ 18 | data(car) 19 | tan <- bnc('tan_cl', 'class', car, smooth = 1) 20 | tan 21 | p <- predict(tan, car) 22 | head(p) 23 | \dontrun{plot(tan)} 24 | nparams(tan) 25 | } 26 | -------------------------------------------------------------------------------- /R/bncs.R: -------------------------------------------------------------------------------- 1 | #' Returns a \code{c("bnc_aode", "bnc")} object. 2 | #' @keywords internal 3 | bnc_aode <- function(models, class_var, features) { 4 | stopifnot(length(models) > 0, identical(names(models), unname(features))) 5 | stopifnot(all(vapply(models, is_ode, FUN.VALUE = logical(1)))) 6 | bnc <- bnc_base(class = class_var, features = features) 7 | bnc$.models <- models 8 | class(bnc) <- c('bnc_aode', class(bnc)) 9 | bnc 10 | } 11 | #' Fits an AODE model. 12 | #' @keywords internal 13 | bnc_aode_bns <- function(x, fit_models) { 14 | stopifnot(inherits(x, 'bnc_aode')) 15 | x$.models <- fit_models 16 | class(x) <- c('bnc_aode_bns', class(x), 'bnc_fit') 17 | x 18 | } 19 | #' Is it en AODE? 20 | #' 21 | #' @keywords internal 22 | is_aode <- function(x) { 23 | if (!inherits(x, c('bnc_aode'))) return (FALSE) 24 | if (length(x$.models) < 2) return (FALSE) 25 | all(sapply(x$.models, is_ode)) # TODO Should be is spode 26 | } 27 | nmodels <- function(x) { 28 | stopifnot(inherits(x, 'bnc_aode')) 29 | length(x$.models) 30 | } 31 | models <- function(x) { 32 | stopifnot(inherits(x, 'bnc_aode')) 33 | x$.models 34 | } -------------------------------------------------------------------------------- /man/max_weight_forest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/frontend-anb.r 3 | \name{max_weight_forest} 4 | \alias{max_weight_forest} 5 | \title{Returns the undirected augmenting forest.} 6 | \usage{ 7 | max_weight_forest(g) 8 | } 9 | \arguments{ 10 | \item{g}{A graph. The undirected graph with pairwise 11 | weights.} 12 | } 13 | \value{ 14 | A graph. The maximum spanning forest. 15 | } 16 | \description{ 17 | Uses Kruskal's algorithm to find the augmenting forest that maximizes the sum 18 | of pairwise weights. When the weights are class-conditional mutual 19 | information this forest maximizes the likelihood of the tree-augmented naive 20 | Bayes network. 21 | } 22 | \details{ 23 | If \code{g} is not connected than this will return a forest; otherwise it is 24 | a tree. 25 | } 26 | \references{ 27 | Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 28 | classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163. 29 | 30 | Murphy KP (2012). \emph{Machine learning: a probabilistic perspective}. The 31 | MIT Press. pp. 912-914. 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/compute_wanbia_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-params-wanbia.R 3 | \name{compute_wanbia_weights} 4 | \alias{compute_wanbia_weights} 5 | \title{Compute WANBIA weights. 6 | 7 | Computes feature weights by optimizing conditional log-likelihood. 8 | Weights are bounded to [0, 1]. Implementation based on the original paper 9 | and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}.} 10 | \usage{ 11 | compute_wanbia_weights(class, dataset, return_optim_object = FALSE) 12 | } 13 | \arguments{ 14 | \item{class}{A character. Name of the class variable.} 15 | 16 | \item{dataset}{The data frame from which to learn feature weights} 17 | 18 | \item{return_optim_object}{Return full output of `optim`} 19 | } 20 | \value{ 21 | a named numeric vector 22 | } 23 | \description{ 24 | Compute WANBIA weights. 25 | 26 | Computes feature weights by optimizing conditional log-likelihood. 27 | Weights are bounded to [0, 1]. Implementation based on the original paper 28 | and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}. 29 | } 30 | \keyword{internal} 31 | -------------------------------------------------------------------------------- /R/wrap-igraph.R: -------------------------------------------------------------------------------- 1 | graphNEL2_graph_internal <- function(x) { 2 | stopifnot(inherits(x, "igraph")) 3 | nodes <- igraph::V(x)$name 4 | edges <- igraph::as_edgelist(x) # Assuming 'x' is an igraph object 5 | colnames(edges) <- c("from", "to") 6 | # edges <- t(named_edge_matrix(x)) 7 | weights <- NULL 8 | tryCatch({ 9 | # Will fail if it does not have the attribute. 10 | weights <- igraph::E(x)$weight 11 | }, error = function(e) {}) 12 | edgemode <- ifelse(igraph::is_directed(x), "directed", "undirected") 13 | weights <- igraph::E(x)$weight 14 | graph_internal(nodes, edges, weights, edgemode) 15 | } 16 | graph_internal2graph_NEL <- function(x) { 17 | stopifnot(inherits( x, "bnc_graph_internal")) 18 | edges <- x$edges 19 | edgemode <- ifelse(x$edgemode == "directed", TRUE, FALSE) 20 | graph <- igraph::graph_from_edgelist(edges, directed = edgemode) 21 | weights <- x$weight 22 | if (length(weights ) > 0) { 23 | stopifnot(length(weights ) == length(igraph::E(graph))) 24 | igraph::set_edge_attr(graph, "weight", value = weights) 25 | } 26 | # TODO: handle undirected. If directed, then build directed graph in BH. 27 | graph 28 | } -------------------------------------------------------------------------------- /man/plot.bnc_dag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bnc-dag-operate.R 3 | \name{plot.bnc_dag} 4 | \alias{plot.bnc_dag} 5 | \title{Plot the structure.} 6 | \usage{ 7 | \method{plot}{bnc_dag}(x, y, layoutType = "dot", fontsize = NULL, ...) 8 | } 9 | \arguments{ 10 | \item{x}{The \code{\link{bnc_dag}} object. The Bayesian network classifier 11 | structure.} 12 | 13 | \item{y}{Not used} 14 | 15 | \item{layoutType}{a character. Optional.} 16 | 17 | \item{fontsize}{integer Font size for node labels. Optional.} 18 | 19 | \item{...}{Not used.} 20 | } 21 | \description{ 22 | If node labels are to small to be viewed properly, you may fix label fontsize 23 | with argument fontsize. Also, you may try multiple different layouts. 24 | } 25 | \examples{ 26 | 27 | 28 | # Requires the igraph package to be installed. 29 | data(car) 30 | nb <- nb('class', car) 31 | nb <- nb('class', car) 32 | \dontrun{plot(nb)} 33 | \dontrun{plot(nb, fontsize = 20)} 34 | \dontrun{plot(nb, layoutType = 'circo')} 35 | \dontrun{plot(nb, layoutType = 'fdp')} 36 | \dontrun{plot(nb, layoutType = 'osage')} 37 | \dontrun{plot(nb, layoutType = 'twopi')} 38 | \dontrun{plot(nb, layoutType = 'neato')} 39 | } 40 | -------------------------------------------------------------------------------- /src/anb-operate.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // Returns a subset of the edges so that any reversed ones are removed. 5 | // The last ones are kept, that is, those lower in the matrix 6 | // This should be a DAG function; 7 | // [[Rcpp::export]] 8 | LogicalVector find_non_reversed(CharacterMatrix x) { 9 | int n = x.nrow(); 10 | if (n == 0) { 11 | return LogicalVector(0); 12 | } 13 | std::vector unique(n, true); 14 | // Skip last element in the loop, consider it unique 15 | for (int row = n - 2; row >= 0; row--) { 16 | const CharacterMatrix::Row & this_row = x(row, _); 17 | // a row is unique, if non of those before is identical to it 18 | // in the reversed matrix 19 | int j = row; 20 | while (j < n - 1) { 21 | j++; 22 | // no need to check for those known to be duplicates, as that would be repeating 23 | if (unique.at(j)) { 24 | const CharacterMatrix::Row & reversed_row = x(j, _); 25 | if (reversed_row[0] == this_row[1] && reversed_row[1] == this_row[0]) { 26 | unique.at(row) = false ; 27 | // j <- n 28 | break; 29 | } 30 | } 31 | } 32 | } 33 | return wrap(unique); 34 | } -------------------------------------------------------------------------------- /src/data.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | using namespace Rcpp; 4 | 5 | // [[Rcpp::export]] 6 | bool hasna_features(const DataFrame & newdata, const SEXP & features) 7 | { 8 | // Shallow object so not a big problem with copy 9 | DataFrame data = newdata; 10 | if (!Rf_isNull(features)) { 11 | // CharacterVector feats(features); 12 | data = trim_dataset_cpp(data, features); 13 | } 14 | return hasna(data); 15 | } 16 | 17 | // [[Rcpp::export]] 18 | bool hasna(const DataFrame & newdata) 19 | { 20 | for (int i = 0; i < newdata.size(); i++) { 21 | const IntegerVector & vec = newdata.at(i); 22 | if (is_true(any(is_na(vec)))) return true; 23 | } 24 | return false; 25 | } 26 | 27 | // [[Rcpp::export]] 28 | DataFrame trim_dataset_cpp(const DataFrame & dataset, const CharacterVector & features) 29 | { 30 | const Rcpp::CharacterVector & columns = dataset.names(); 31 | if (!is_true(all(in(features, columns )))) { 32 | Rcpp::stop("Some features missing from data set."); 33 | } 34 | // Rcpp intersect may alter order of columns, but irrelevant here 35 | Rcpp::CharacterVector keep = Rcpp::intersect(columns, features); 36 | DataFrame data = dataset[keep]; 37 | return data; 38 | } -------------------------------------------------------------------------------- /vignettes/macros-rjournal.tex: -------------------------------------------------------------------------------- 1 | % implementation of rjournal commands 2 | % re-implement code to let is be used with default pandoc template 3 | 4 | \newcommand{\kbd}[1]{{\normalfont\texttt{#1}}} 5 | \newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}} 6 | \DeclareRobustCommand\samp{`\bgroup\@noligs\@sampx} 7 | \def\@sampx#1{{\normalfont\texttt{#1}}\egroup'} 8 | \newcommand{\var}[1]{{\normalfont\textsl{#1}}} 9 | \newcommand{\file}[1]{{`\normalfont\textsf{#1}'}} 10 | \newcommand{\code}[1]{\texttt{#1}} 11 | \let\option=\samp 12 | \newcommand{\dfn}[1]{{\normalfont\textsl{#1}}} 13 | % \acronym is effectively disabled since not used consistently 14 | \newcommand{\acronym}[1]{#1} 15 | \newcommand{\strong}[1]{\texorpdfstring% 16 | {{\normalfont\fontseries{b}\selectfont #1}}% 17 | {#1}} 18 | \let\pkg=\strong 19 | \newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}% 20 | \let\cpkg=\CRANpkg 21 | \newcommand{\ctv}[1]{\href{https://CRAN.R-project.org/view=#1}{\emph{#1}}} 22 | \newcommand{\BIOpkg}[1]{\href{https://www.bioconductor.org/packages/release/bioc/html/#1.html}{\pkg{#1}}} 23 | 24 | 25 | \RequirePackage{fancyvrb} 26 | \RequirePackage{alltt} 27 | 28 | \DefineVerbatimEnvironment{example}{Verbatim}{} 29 | \renewenvironment{example*}{\begin{alltt}}{\end{alltt}} 30 | -------------------------------------------------------------------------------- /R/anb-internal.R: -------------------------------------------------------------------------------- 1 | # all functions with begin wit anb_ 2 | # dag2modelstring. modelstring2dag. 3 | # When buliding a DAG, I will need the parents list per variable. 4 | # dag is a type of adj list where each has only its parents, and also includes itself in the list. 5 | # IF the DAG is topologically sorted, then class cannot be the last CPT. This is not important anyway; not a requirement. 6 | # This is related to the anb-families. It is the anb class. 7 | 8 | # TODO 'families' is just a way to represent the anb that corresponds to the cpts. but could be internal to the anb object. if indeed doing that, i would need to transform from families to adjacency lists or similar. 9 | 10 | anb_internal <- function() { 11 | } 12 | # THIS SHOULD RETURN A c("anb", "dag"), as dag is the more general class. 13 | 14 | anb_make_nb <- function(class, features) { 15 | # Check class is character and length one, features is length 0 or character, 16 | # class is not in features. 17 | check_features(features, class) 18 | # If > 0 features, add arc from class to each of them 19 | narcs <- length(features) 20 | arcs <- graph_from_to_to_edges(rep(class, narcs), features) 21 | # Set nodes as class + features 22 | nodes <- c(class, features) 23 | g <- graph_internal(nodes, arcs) 24 | g 25 | } -------------------------------------------------------------------------------- /R/anb-bn.R: -------------------------------------------------------------------------------- 1 | make_bnc_bn <- function(bnc_dag, params) { 2 | bnc_dag$.params <- params 3 | class(bnc_dag) <- c('bnc_bn', class(bnc_dag), 'bnc_fit') 4 | bnc_dag 5 | } 6 | bn2dag <- function(x) { 7 | stopifnot(inherits(x, "bnc_dag")) 8 | if (is_just(x, "bnc_dag")) { 9 | x 10 | } 11 | else { 12 | make_bnc_dag(class = class_var(x), families = families(x), 13 | dag = dag(x)) 14 | } 15 | } 16 | check_bnc_bn <- function(x) { 17 | check_bnc_dag(x) 18 | # Check CPT families match original families equal to the families of bnc 19 | params <- params(x) 20 | stopifnot(identical(cpts2families(params), families(x))) 21 | stopifnot(identical(names(values(x)), names(vars(x)))) 22 | } 23 | # Accessors 24 | 25 | #' @export 26 | #' @describeIn inspect_bnc_bn Returns the list of CPTs, in the same order as \code{\link{vars}}. 27 | params <- function(x) { 28 | stopifnot(inherits(x, "bnc_bn")) 29 | x$.params 30 | } 31 | #' @export 32 | #' @describeIn inspect_bnc_bn Returns the possible values of each variable, in the same order as \code{\link{vars}}. 33 | values <- function(x) { 34 | cpt_vars_values(params(x)) 35 | } 36 | #' @export 37 | #' @describeIn inspect_bnc_bn Returns the possible values of the class variable. 38 | classes <- function(x) { 39 | values(x)[[class_var(x)]] 40 | } -------------------------------------------------------------------------------- /tests/testthat/test-anb-cpts.R: -------------------------------------------------------------------------------- 1 | context("CPTs") 2 | 3 | test_that("subset_cpt single value", { 4 | cp <- extract_cpt(colnames(car)[5:7], car, smooth = 1) 5 | p <- subset_cpt(cp, list(lug_boot=1, safety=2, class=1)) 6 | expect_null(dim(p)) 7 | }) 8 | 9 | test_that("subset_cpt 1D cpt", { 10 | cp <- extract_cpt(colnames(car)[7], car, smooth = 1) 11 | obs <- vapply(car[, 5:6], as.integer, FUN.VALUE = integer(nrow(car))) 12 | expect_error(p <- subset_cpt(cp, obs), "vars") 13 | }) 14 | 15 | test_that("cpt cache nominal", { 16 | cache <- make_cpts_cache(car, smooth = 1) 17 | expect_identical(cache('class'), extract_cpt('class', car, smooth = 1)) 18 | expect_identical(cache(c('buying', 'class')), 19 | extract_cpt(c('buying', 'class'), car, smooth = 1)) 20 | }) 21 | 22 | test_that("cpt cache forget", { 23 | cache <- make_cpts_cache(car, smooth = 1) 24 | expect_identical(cache('class'), extract_cpt('class', car, smooth = 1)) 25 | expect_identical(cache(c('buying', 'class')), 26 | extract_cpt(c('buying', 'class'), car, smooth = 1)) 27 | expect_true(forget(cache)) 28 | expect_identical(cache('class'), extract_cpt('class', car, smooth = 1)) 29 | expect_identical(cache(c('buying', 'class')), 30 | extract_cpt(c('buying', 'class'), car, smooth = 1)) 31 | }) 32 | -------------------------------------------------------------------------------- /vignettes/overview.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "`bnclassify`: Learning Bayesian Network Classifiers" 3 | author: "Bojan Mihaljević, Concha Bielza and Pedro Larrañaga" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::pdf_document: 7 | number_sections: true 8 | keep_tex: true 9 | includes: 10 | in_header: header.tex 11 | citation_package: natbib 12 | bibliography: bnclassify.bib 13 | fontsize: 11pt 14 | vignette: > 15 | %\VignetteIndexEntry{overview} 16 | %\VignetteEngine{knitr::rmarkdown} 17 | %\VignetteEncoding{UTF-8} 18 | --- 19 | 20 | ```{r child = 'includes/abstract.rmd'} 21 | ``` 22 | \tableofcontents 23 | ```{r child = 'includes/intro.rmd'} 24 | ``` 25 | The rest of this paper is structured as follows. Section \ref{sec:bcground} provides background on Bayesian network classifiers. Section \ref{sec:functionalities} describes the implemented functionalities. Section \ref{sec:usage} illustrates usage with a synthetic data set. Section \ref{sec:implementation} discusses implementation while Section \ref{sec:relatedsw} briefly surveys related software. Finally, Section \ref{sec:conclusion} concludes and outlines future work. 26 | ```{r child = 'includes/background.rmd'} 27 | ``` 28 | ```{r child = 'includes/functionalities.rmd'} 29 | ``` 30 | ```{r child = 'includes/usage.rmd'} 31 | ``` 32 | ```{r child = 'includes/impl-related-conclusion.rmd'} 33 | ``` 34 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bnclassify 2 | Title: Learning Discrete Bayesian Network Classifiers from Data 3 | Description: State-of-the art algorithms for learning discrete Bayesian network classifiers from data, including a number of those described in Bielza & Larranaga (2014) , with functions for prediction, model evaluation and inspection. 4 | Version: 0.4.8 5 | Authors@R: c(person("Mihaljevic","Bojan",email="boki.mihaljevic@gmail.com",role=c("aut","cre", "cph")), 6 | person("Bielza","Concha",email="mcbielza@fi.upm.es",role="aut"), 7 | person("Larranaga","Pedro",email="pedro.larranaga@fi.upm.es",role="aut"), 8 | person("Wickham", "Hadley", role="ctb", comment="some code extracted from memoise package")) 9 | URL: https://github.com/bmihaljevic/bnclassify 10 | BugReports: https://github.com/bmihaljevic/bnclassify/issues 11 | Depends: 12 | R (>= 3.2.0) 13 | Imports: 14 | assertthat (>= 0.1), 15 | entropy(>= 1.2.0), 16 | matrixStats(>= 0.14.0), 17 | rpart(>= 4.1-8), 18 | Rcpp, 19 | Suggests: 20 | igraph, 21 | gRain(>= 1.2-3), 22 | gRbase(>= 1.7-0.1), 23 | mlr(>= 2.2), 24 | testthat(>= 0.8.1), 25 | knitr(>= 1.10.5), 26 | ParamHelpers(>= 1.5), 27 | rmarkdown(>= 0.7), 28 | mlbench, 29 | covr 30 | Encoding: UTF-8 31 | License: GPL (>= 2) 32 | Maintainer: Mihaljevic Bojan 33 | VignetteBuilder: knitr 34 | LinkingTo: Rcpp, BH 35 | RoxygenNote: 7.3.1 36 | -------------------------------------------------------------------------------- /tests/testthat/test-wrap-grain.R: -------------------------------------------------------------------------------- 1 | context("grain") 2 | 3 | test_that("nominal joint prob instance", { 4 | # skip_if_not_installed('gRain') 5 | # a <- nbvote() 6 | # g <- as_grain(a) 7 | # vc <- as.matrix(voting) 8 | # cp <- compute_grain_log_joint_instance(vc[1, -17], g, 'Class') 9 | # expect_true(is.numeric(cp)) 10 | # expect_equal(names(cp), levels(voting$Class)) 11 | # expect_equal(exp(as.vector(cp)), c(1.956045e-09, 1.517449e-02), tolerance = 1e-6) 12 | }) 13 | 14 | test_that(" joint prob instance with no evidence", { 15 | # skip_if_not_installed('gRain') 16 | # a <- nbvote() 17 | # g <- as_grain(a) 18 | # vc <- as.matrix(voting) 19 | # inst <- vc[1, -17] 20 | # inst[] <- NA 21 | # cp <- compute_grain_log_joint_instance(inst, g, 'Class') 22 | # expect_true(is.numeric(cp)) 23 | # expect_equal(names(cp), levels(voting$Class)) 24 | # expect_true(equivalent_num(exp(cp), params(a)[class_var(a)][[1]])) 25 | }) 26 | 27 | test_that("cpts to grain", { 28 | skip("gRain has removed the predict function") 29 | skip_if_not_installed('gRain') 30 | a <- nbvote() 31 | g <- compile_grain(params(a)) 32 | # Check compiled 33 | expect_true(g$isCompiled) 34 | # gRain cannot handle a row that has all missing features. remove 249. 35 | p <- gRain::predict.grain(g, response = 'Class', 36 | newdata = voting[-249, -17], type = "class") 37 | expect_true(all(p$pred[[1]] %in% levels(voting$Class[-249]))) 38 | }) -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | - ubuntu 22.04, R 4.3.3 (local) 3 | - macos-latest, r: release 4 | - windows-latest, r: release 5 | - ubuntu-latest, r: devel 6 | - ubuntu-latest, r: release 7 | - ubuntu-latest, r: oldrel-1 8 | - windows R-devel (win-builder) 9 | - windows R-release (win-builder) 10 | 11 | 12 | ## R CMD check results 13 | 0 errors | 0 warnings | 1 notes 14 | 15 | The not is related to the fact that this is a submission of an archived package. The words identified as possibly misspelled are last names and are not misspelled. The note is: 16 | 17 | Checking CRAN incoming feasibility ... [7s/18s] NOTE 18 | Maintainer: ‘Mihaljevic Bojan ’ 19 | 20 | New submission 21 | 22 | Package was archived on CRAN 23 | 24 | Possibly misspelled words in DESCRIPTION: 25 | Bielza (3:145) 26 | Larranaga (3:154) 27 | 28 | CRAN repository db overrides: 29 | X-CRAN-Comment: Archived on 2023-10-20 as check problems were not 30 | corrected in time. 31 | 32 | On Windows R-devel (win-builder), there was 1 WARNING and 2 NOTES. The WARNING was: 33 | 34 | * checking sizes of PDF files under 'inst/doc' ... WARNING 35 | 'gs+qpdf' made some significant size reductions: 36 | compacted 'methods.pdf' from 326Kb to 72Kb 37 | compacted 'overview.pdf' from 495Kb to 184Kb 38 | 39 | ## Reverse dependencies 40 | There are currently no downstream dependencies for this package. -------------------------------------------------------------------------------- /man/predict.bnc_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.R 3 | \name{predict.bnc_fit} 4 | \alias{predict.bnc_fit} 5 | \title{Predicts class labels or class posterior probability distributions.} 6 | \usage{ 7 | \method{predict}{bnc_fit}(object, newdata, prob = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{object}{A \code{\link{bnc_bn}} object.} 11 | 12 | \item{newdata}{A data frame containing observations whose class has to be 13 | predicted.} 14 | 15 | \item{prob}{A logical. Whether class posterior probability should be returned.} 16 | 17 | \item{...}{Ignored.} 18 | } 19 | \value{ 20 | If \code{prob=FALSE}, then returns a length-\eqn{N} factor with the 21 | same levels as the class variable in \code{x}, where \eqn{N} is the number 22 | of rows in \code{newdata}. Each element is the most likely 23 | class for the corresponding row in \code{newdata}. If \code{prob=TRUE}, 24 | returns a \eqn{N} by \eqn{C} numeric matrix, where \eqn{C} is the number of 25 | classes; each row corresponds to the class posterior of the instance. 26 | } 27 | \description{ 28 | Predicts class labels or class posterior probability distributions. 29 | } 30 | \details{ 31 | Ties are resolved randomly. Inference is much slower if 32 | \code{newdata} contains \code{NA}s. 33 | } 34 | \examples{ 35 | data(car) 36 | nb <- bnc('nb', 'class', car, smooth = 1) 37 | p <- predict(nb, car) 38 | head(p) 39 | p <- predict(nb, car, prob = TRUE) 40 | head(p) 41 | } 42 | -------------------------------------------------------------------------------- /meta/check.r: -------------------------------------------------------------------------------- 1 | ## ========================================= ## 2 | ## Run checks prior to submitting ## 3 | ## ========================================= ## 4 | 5 | - gha: not using manual currently, could change it 6 | 7 | 8 | ## for vignette size warning. Since vignettes are only built locally, I only need add this argument 9 | ## MUST use this in all my builds and checks!! 10 | # build_args <- c('--resave-data','--compact-vignettes=gs+qpdf', '--gs_quality=ebook') 11 | build_args <- c('--resave-data','--compact-vignettes=both') 12 | # check_args <- '--as-cran --use-valgrind' 13 | check_args <- '--as-cran' 14 | 15 | ## First to build win so I can proceed with local while it is tested remotely 16 | devtools::check_win_devel('.', args = build_args ) 17 | devtools::check_win_release('.', args = build_args ) 18 | devtools::check_mac_release( '.', args = build_args ) 19 | # devtools::build_win('.', version = 'R-release', args = build_args ) 20 | # devtools::build_win('.', version = 'R-devel', args = build_args ) 21 | 22 | # TODO rhub: can I set build args? 23 | rhub::check_for_cran() 24 | rhub::check_with_sanitizers() 25 | 26 | devtools::check(args = check_args , cran = TRUE, build_args = build_args, remote = TRUE, manual = TRUE ) 27 | ## cran = FALSE probably runs tests skipped on cran 28 | devtools::check(cran = FALSE, args = check_args, build_args = build_args ) 29 | devtools::check(cran = TRUE, build_args = build_args ) 30 | devtools::check(cran = FALSE, build_args = build_args ) 31 | 32 | 33 | # see submit.r -------------------------------------------------------------------------------- /tests/testthat/test-basic-misc.R: -------------------------------------------------------------------------------- 1 | context("Miscallaneous") 2 | 3 | test_that("make_last", { 4 | # Nominal 5 | # so to not modify letters 6 | e <- c(letters, NULL) 7 | e <- make_last_sideeffect(e, 'c') 8 | expect_equal(e, c(letters[-3], letters[3])) 9 | # # x not character 10 | # Currently not checking this 11 | # expect_error(make_last(1:10, 'c') , "character") 12 | # last not in x 13 | e <- c(letters, NULL) 14 | expect_error(make_last_sideeffect(e, 'A'), "not found") 15 | # last repeated in x 16 | # TODO: does not report a mistake. 17 | # expect_error(make_last_sideeffect(rep('A', 5), 'A'), "length") 18 | }) 19 | 20 | test_that("Rep factor as int", { 21 | fi <- rep_factor_as_int(factor(letters, levels = letters), 10) 22 | expect_identical(length(fi), 260L) 23 | expect_true(all(fi == 1:26)) 24 | }) 25 | 26 | test_that("Random max nominal", { 27 | suppressWarnings(RNGversion("3.5.0")) 28 | set.seed(0) 29 | x <- c(1, runif(5), 1) 30 | a <- max_random(x) 31 | b <- max_random(x) 32 | expect_true(a != b) 33 | expect_equal(x[a], x[b]) 34 | }) 35 | 36 | test_that("Boostrap nominal", { 37 | d <- bootstrap_ss(dataset = car, proportion = 0.25) 38 | expect_equal(dim(d), c(432, ncol(car))) 39 | expect_equal(colnames(d), colnames(car)) 40 | 41 | d <- bootstrap_ss(dataset = voting, proportion = 0.2) 42 | expect_equal(dim(d), c(87, 17)) 43 | expect_equal(colnames(d), colnames(voting)) 44 | }) 45 | 46 | test_that("Boostrap 0 proportion", { 47 | expect_error(bootstrap_ss(dataset = car, proportion = 0), "positive") 48 | }) -------------------------------------------------------------------------------- /tests/testthat/test-hc-bsej.R: -------------------------------------------------------------------------------- 1 | context("HC bsej") 2 | 3 | test_that("Merge supernodes", { 4 | # A NB with 6 features yields 15 models 5 | nb <- nbcar() 6 | states <- merge_supernodes(nb) 7 | expect_equal(length(states), 15) 8 | m <- states[[1]] 9 | expect_equal(narcs(m), length(features(nb)) + 1) 10 | # A single predictor 11 | nb <- nbcarp(car[, 6:7]) 12 | states <- merge_supernodes(nb) 13 | expect_true(is.null(states)) 14 | # No predictors 15 | nb <- nbcarclass() 16 | states <- merge_supernodes(nb) 17 | expect_true(is.null(states)) 18 | 19 | # relating non-singleton feature subsets 20 | nb <- nbcar() 21 | states <- merge_supernodes(nb) 22 | e <- merge_supernodes(states[[1]]) 23 | expect_equal(length(e), 10) 24 | expect_equal(families(e[[1]])$buying, c("buying", "maint", "doors", "class")) 25 | f <- merge_supernodes(e[[10]]) 26 | expect_equal(length(f), 6) 27 | }) 28 | 29 | test_that("Excludes", { 30 | # One dag per feature 31 | nb <- nbcar() 32 | cands <- excludes(nb) 33 | expect_equal(length(cands), length(features(nb))) 34 | # single state for a single feature 35 | nb <- nbcarp(car[, c(1,7)]) 36 | cands <- excludes(nb) 37 | expect_equal(length(cands), 1) 38 | expect_equal(features(cands[[1]]), character()) 39 | 40 | # NULL for no features 41 | nb <- nbcarclass() 42 | states <- excludes(nb) 43 | expect_equal(length(states), 0) 44 | }) 45 | 46 | test_that("bsej step", { 47 | nb <- nbcar() 48 | e <- bsej_step(nb) 49 | expect_equal(length(e), 6 + 15) 50 | }) -------------------------------------------------------------------------------- /inst/include/multidim-array.h: -------------------------------------------------------------------------------- 1 | #ifndef bnclassify_multidimarray_H 2 | #define bnclassify_multidimarray_H 3 | 4 | #include 5 | using namespace Rcpp; 6 | 7 | 8 | /** 9 | * It returns the index for fully specified entries. 10 | * The entries must be 0-based. 11 | */ 12 | inline int entry_index(std::vector::const_iterator begin, const std::vector & dim_prod) { 13 | return std::inner_product(begin + 1, begin + dim_prod.size(), dim_prod.begin(), *begin); 14 | } 15 | 16 | int entry_index(const std::vector & indices, const std::vector & dim_prod); 17 | 18 | /** 19 | * Returns a subset of the array, when the last dimension of indices is not specified. 20 | */ 21 | inline void subset_free_last_dim(const std::vector & array, const std::vector & dim_prod, std::vector::iterator indices_begin, 22 | std::vector & output) { 23 | // length indices = length dim prod minus one 24 | // dim prod cum prod = array length 25 | // otuput size = dim size 26 | int ndim = dim_prod.size(); 27 | std::vector::iterator last_dim = indices_begin + ndim - 1; 28 | // Start with first value for last dimension. 29 | *last_dim = 0; 30 | int sum = entry_index(indices_begin, dim_prod); 31 | // // Add an entry per each class 32 | int per_class_entries = dim_prod.at(ndim - 2); 33 | int ncpts = output.size(); 34 | // most of the time is spent in this loop. 35 | for (int i = 0; i < ncpts; i++ ) { 36 | output[i] = array.at(sum); 37 | sum += per_class_entries; 38 | } 39 | } 40 | 41 | 42 | #endif -------------------------------------------------------------------------------- /tests/testthat/test-basic-probs.R: -------------------------------------------------------------------------------- 1 | context("basic probs") 2 | 3 | make_factor <- function(n) { 4 | a <- runif(100) 5 | b <- 1 - a 6 | td <- matrix(c(a, b), ncol=2) 7 | colnames(td) <- letters[1:2] 8 | td 9 | } 10 | 11 | test_that("Multiply single factor", { 12 | td <- make_factor(1000) 13 | a <- sum_matrices(list(a=td)) 14 | expect_equal(td, a) 15 | }) 16 | 17 | test_that("Multiply Two factors", { 18 | td <- make_factor(1000) 19 | td2 <- make_factor(1000) 20 | a <- sum_matrices(list(a=td, b=td2)) 21 | expect_identical(colnames(a), letters[1:2]) 22 | expect_identical(dim(td), dim(a)) 23 | fr <- td[1, ] + td2[1, ] 24 | expect_equal(fr, a[1, ]) 25 | }) 26 | 27 | 28 | test_that("normalize", { 29 | # Nominal 30 | a <- setNames(1:5, letters[1:5]) 31 | a <- normalize(a) 32 | expect_equal(sum(a), 1) 33 | expect_equal(length(a), 5) 34 | expect_equal(unname(a), 1:5 / sum(1:5)) 35 | expect_equal(names(a), letters[1:5]) 36 | # Sum 0 37 | a <- normalize(setNames(rep(0, 10), letters[1:10])) 38 | expect_equal(sum(a), 1) 39 | expect_equal(length(a), 10) 40 | expect_equal(unname(a), rep(1 / 10, 10)) 41 | expect_equal(names(a), letters[1:10]) 42 | # Single 0. Degenerate prob. dist. 43 | a <- normalize(0) 44 | expect_equal(a, 1) 45 | }) 46 | 47 | test_that("Legal prob. distribution", { 48 | expect_true(are_pdists(matrix(1))) 49 | expect_true(!are_pdists(matrix(0))) 50 | expect_true(!are_pdists(matrix(NA, 1))) 51 | expect_true(!are_pdists(matrix(c(-1, 1), nrow=1, byrow = TRUE))) 52 | expect_true(are_pdists(matrix(1:5 / 15, nrow=1, byrow = TRUE))) 53 | }) -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v4 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /tests/testthat/test-learn-params-wanbia.r: -------------------------------------------------------------------------------- 1 | context("learn params wanbia") 2 | 3 | test_that('make cll', { 4 | w <- rep(1, 16) 5 | w <- setNames(w, colnames(v)[-ncol(v)]) 6 | cll <- make_cll('Class', v)(w) 7 | expect_equal(cll, 149.1442, tolerance = 1e-2) 8 | # Too few weights 9 | expect_error(make_cll('Class', v)(w[-16])) 10 | }) 11 | 12 | test_that("wanbia error", { 13 | w <- compute_wanbia_weights( 'Class', v, return_optim_object = TRUE) 14 | # There is an error. Not sure if this is critical. 15 | # It does not occur on Windows. Skipping the test for now 16 | # expect_equal(w$message, "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH") 17 | }) 18 | 19 | test_that("with incomplete data", { 20 | skip_on_cran() 21 | skip("too slow currently") 22 | w <- compute_wanbia_weights( 'Class', voting) 23 | # just check results is consistent 24 | expect_equal(w[['physician_fee_freeze']], 0.7730736, tolerance = 1e-6) 25 | }) 26 | 27 | test_that("check consistent result ", { 28 | skip_if_not_installed('mlbench') 29 | skip_on_cran() 30 | skip("too slow currently") 31 | require(mlbench) 32 | data("DNA") 33 | w <- compute_wanbia_weights( 'Class', DNA) 34 | expect_equal(w[['V132']], 0.99039949, tolerance = 1e-6) 35 | }) 36 | 37 | test_that("with more than two classes ", { 38 | skip_if_not_installed('mlbench') 39 | skip_on_cran() 40 | skip("too slow currently") 41 | require(mlbench) 42 | data("Soybean") 43 | w <- compute_wanbia_weights( 'Class', Soybean) 44 | # just check results is consistent 45 | expect_equal(w[['seed.tmt']], 0.42133418, tolerance = 1e-6) 46 | }) -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(AIC,bnc_bn) 4 | S3method(BIC,bnc_bn) 5 | S3method(compute_log_joint_complete,bnc_aode) 6 | S3method(compute_log_joint_complete,bnc_bn) 7 | S3method(compute_log_joint_incomplete,bnc_aode) 8 | S3method(compute_log_joint_incomplete,bnc_bn) 9 | S3method(logLik,bnc_bn) 10 | S3method(lp_implement,bnc_aode) 11 | S3method(lp_implement,bnc_dag) 12 | S3method(plot,bnc_dag) 13 | S3method(predict,bnc_fit) 14 | S3method(print,bnc_base) 15 | export(accuracy) 16 | export(aode) 17 | export(as_grain) 18 | export(as_igraph) 19 | export(as_mlr) 20 | export(awnb_weights) 21 | export(bnc) 22 | export(bsej) 23 | export(cLogLik) 24 | export(class_var) 25 | export(classes) 26 | export(cmi) 27 | export(cv) 28 | export(families) 29 | export(feature_families) 30 | export(features) 31 | export(fssj) 32 | export(is_anb) 33 | export(is_nb) 34 | export(is_ode) 35 | export(is_semi_naive) 36 | export(kdb) 37 | export(lp) 38 | export(makeRLearner.bnc) 39 | export(manb_arc_posterior) 40 | export(modelstring) 41 | export(narcs) 42 | export(nb) 43 | export(nparams) 44 | export(params) 45 | export(predictLearner.bnc) 46 | export(tan_cl) 47 | export(tan_hc) 48 | export(tan_hcsp) 49 | export(trainLearner.bnc) 50 | export(values) 51 | export(vars) 52 | importFrom(Rcpp,sourceCpp) 53 | importFrom(graphics,plot) 54 | importFrom(stats,AIC) 55 | importFrom(stats,BIC) 56 | importFrom(stats,as.formula) 57 | importFrom(stats,complete.cases) 58 | importFrom(stats,logLik) 59 | importFrom(stats,nobs) 60 | importFrom(stats,optim) 61 | importFrom(stats,predict) 62 | importFrom(stats,setNames) 63 | importFrom(utils,combn) 64 | useDynLib(bnclassify) 65 | -------------------------------------------------------------------------------- /man/loglik.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-bn-operate.R 3 | \name{loglik} 4 | \alias{loglik} 5 | \alias{AIC.bnc_bn} 6 | \alias{BIC.bnc_bn} 7 | \alias{logLik.bnc_bn} 8 | \alias{cLogLik} 9 | \title{Compute (penalized) log-likelihood.} 10 | \usage{ 11 | \method{AIC}{bnc_bn}(object, ...) 12 | 13 | \method{BIC}{bnc_bn}(object, ...) 14 | 15 | \method{logLik}{bnc_bn}(object, ...) 16 | 17 | cLogLik(object, ...) 18 | } 19 | \arguments{ 20 | \item{object}{A \code{\link{bnc_bn}} object.} 21 | 22 | \item{...}{A data frame (\eqn{\mathcal{D}}{D}).} 23 | } 24 | \description{ 25 | Compute (penalized) log-likelihood and conditional log-likelihood score of a \code{\link{bnc_bn}} object on 26 | a data set. Requires a data frame argument in addition to \code{object}. 27 | } 28 | \details{ 29 | log-likelihood = \eqn{log P(\mathcal{D} \mid \theta)}{log P(D | \theta)}, 30 | 31 | Akaike's information criterion (AIC) = \eqn{log P(\mathcal{D} \mid \theta) - 32 | \frac{1}{2} |\theta|}{log P(D | \theta) - |\theta| / 2}, 33 | 34 | The Bayesian information criterion (BIC) score: = \eqn{log P(\mathcal{D} \mid 35 | \theta) - \frac{\log N}{2} |\theta|}{log P(D | \theta) - N |\theta| / 2}, 36 | 37 | where \eqn{|\theta|} is the number of free parameters in \code{object}, 38 | \eqn{\mathcal{D}}{D} is the data set and N is the number of instances in 39 | \eqn{\mathcal{D}}{D}. 40 | 41 | \code{cLogLik} computes the conditional log-likelihood of the model. 42 | } 43 | \examples{ 44 | data(car) 45 | nb <- bnc('nb', 'class', car, smooth = 1) 46 | logLik(nb, car) 47 | AIC(nb, car) 48 | BIC(nb, car) 49 | cLogLik(nb, car) 50 | } 51 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # bnclassify 0.4.8 2 | - Switch from graph and Rgraphviz to igraph 3 | 4 | # 0.4.7 5 | - Fix a WARN on r-devel clang 6 | 7 | # 0.4.6 8 | - Work-around for gRain changes 9 | - Skip a test that fails due to changes in R-devel 10 | 11 | # 0.4.5 12 | * Pass CRAN checks with stringsAsFactors = FALSE as default 13 | 14 | # 0.4.4 15 | * Removing gRain integration to pass CRAN checks 16 | 17 | # 0.4.3 18 | * Minor changes to pass CRAN checks 19 | 20 | # 0.4.2 21 | * Minor changes to pass CRAN checks 22 | 23 | # 0.4.1 24 | * Minor changes to pass CRAN checks 25 | 26 | # 0.4.0 27 | * Optimizing some functions with RCPP 28 | * Added AODE (averaged one-dependence estimators) 29 | * Added k-db classifier 30 | * Replaced the RBGL package with BH 31 | * Replaced the graph package with custom code and BH 32 | * Added the 'overview' vignette 33 | * Minor additions: cLogLik function 34 | 35 | # 0.3.4 36 | * Added WANBIA discriminative parameter learning for naive Bayes 37 | * Remove 'runtimes' vignette and microbenchmark dependency 38 | 39 | # 0.3.3 40 | * Fixed warnings for resubmission to CRAN 41 | 42 | # 0.3.2 43 | * Improved function documentation 44 | * Updated and re-organized tutorial vignette 45 | * Added 'techical information' and 'runtimes' vignettes 46 | * Added logLik, AIC, and BIC 47 | * Memoized cpt extraction during greedy search 48 | * Deprecated lpawnb() (use lp() instead) 49 | * Fixed a number of bugs, including an error in AWNB cross-validation 50 | 51 | # 0.3.1 52 | * Minor improvements in vignette 53 | * More detailed documentation for lp() 54 | * Fixing a test error due to a change in gRain 55 | 56 | # 0.3.0 57 | * First released version of the package. 58 | -------------------------------------------------------------------------------- /tests/testthat/test-anb-bn.R: -------------------------------------------------------------------------------- 1 | context("bnc bn") 2 | 3 | test_that("nominal", { 4 | tbn <- bnc('nb', 'class', car, smooth = 0) 5 | texp <- prop.table(table(car$class, dnn = 'class') ) 6 | tout <- params(tbn)$class 7 | expect_equal(tout, texp) 8 | tvalues <- values(tbn) 9 | levs <- lapply(car, levels) 10 | expect_true(all(mapply(identical, levs, tvalues, SIMPLIFY = TRUE))) 11 | expect_identical(values(tbn)$buying, levels(car$buying)) 12 | }) 13 | 14 | test_that("nominal as grain", { 15 | skip_if_not_installed('gRain') 16 | tbn <- bnc('nb', 'class', car, smooth = 0) 17 | expect_is(as_grain(tbn), 'grain') 18 | }) 19 | 20 | 21 | test_that("bnc_bn no class in dataset ", { 22 | tbdag <- nb_dag('class', 'buying') 23 | tb <- bnc_dag(tbdag, class = 'class') 24 | expect_error(lp(tb, car[ , 1, drop = FALSE], smooth = 0), 25 | "not found") 26 | }) 27 | 28 | test_that("Just the class in dataset", { 29 | tbdag <- nb_dag('class', character()) 30 | tbdag <- bnc_dag(tbdag, class = 'class') 31 | tbn <- lp(tbdag, car, smooth = 0) 32 | expect_equal(class_var(tbn), 'class') 33 | }) 34 | 35 | test_that(" Wrong data set", { 36 | tbdag <- nb_dag('class', colnames(car)[-7]) 37 | tbdag <- bnc_dag(tbdag, class = 'class') 38 | expect_error(lp(tbdag, voting, smooth = 0) , "not found") 39 | }) 40 | 41 | test_that(" Classes", { 42 | a <- lp(nb('class', car), car, smooth = 0) 43 | expect_equal(classes(a), levels(car$class)) 44 | }) 45 | 46 | test_that("nominal wanbia", { 47 | a <- lp(nb('Class', v), v, smooth = 0, wanbia = TRUE) 48 | expect_equal(a$.weights[['handicapped_infants']], 0.00000000) 49 | expect_equal(a$.weights[['immigration']], 1.00000000) 50 | }) -------------------------------------------------------------------------------- /tests/testthat/test-learn-params-awnb.R: -------------------------------------------------------------------------------- 1 | context("AWNB") 2 | 3 | test_that("one tree", { 4 | suppressWarnings(RNGversion("3.5.0")) 5 | set.seed(0) 6 | a <- awnb('class', car, bootstrap_size = 0.5, trees = 1) 7 | expect_equal(as.vector(a['buying']), 0.5773503, tolerance = 1e-5) 8 | expect_equal(as.vector(a['doors']), 0.3779645, tolerance = 1e-5) 9 | expect_equal(as.vector(a['persons']), 1) 10 | suppressWarnings(RNGversion("3.5.0")) 11 | set.seed(0) 12 | }) 13 | 14 | test_that("one two trees", { 15 | a <- awnb('class', car, bootstrap_size = 0.5, trees = 2) 16 | expect_true(is_perm(names(a), colnames(car)[-7])) 17 | expect_equal(as.vector(a['buying']), 0.5773503, tolerance = 1e-5) 18 | expect_equal(as.vector(a['doors']), 0.3931064, tolerance = 1e-5) 19 | expect_equal(as.vector(a['persons']), 1) 20 | suppressWarnings(RNGversion("3.5.0")) 21 | set.seed(0) 22 | a <- awnb('Class', voting, bootstrap_size = 0.5, trees = 10) 23 | expect_true(is_perm(names(a), colnames(voting)[-17])) 24 | expect_equal(as.vector(a['superfund_right_to_sue']), 0.21019141, tolerance = 1e-5) 25 | expect_equal(as.vector(a['mx_missile']), 0.30487476, tolerance = 1e-5) 26 | expect_equal(as.vector(a['immigration']), 0.39963413) 27 | }) 28 | 29 | test_that("one no tree", { 30 | expect_error(awnb('Class', dataset = voting[1:2, ], 31 | bootstrap_size = 0.5, trees = 10), "empty") 32 | }) 33 | 34 | test_that("weights for features not in tree", { 35 | suppressWarnings(RNGversion("3.5.0")) 36 | set.seed(0) 37 | a <- awnb('class', car[sample(1:1000, 10), , drop = FALSE], 38 | bootstrap_size = 1, trees = 1) 39 | a 40 | expect_true(is_perm(names(a), colnames(car)[-7])) 41 | }) 42 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - uses: r-lib/actions/setup-tinytex@v2 35 | - run: tlmgr --version 36 | 37 | - uses: r-lib/actions/setup-pandoc@v2 38 | 39 | - uses: r-lib/actions/setup-r@v2 40 | with: 41 | r-version: ${{ matrix.config.r }} 42 | http-user-agent: ${{ matrix.config.http-user-agent }} 43 | use-public-rspm: true 44 | 45 | - uses: r-lib/actions/setup-r-dependencies@v2 46 | with: 47 | extra-packages: any::rcmdcheck 48 | needs: check 49 | 50 | - uses: r-lib/actions/check-r-package@v2 51 | with: 52 | upload-snapshots: true 53 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 54 | -------------------------------------------------------------------------------- /man/bnc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/learn-params.R 3 | \name{bnc} 4 | \alias{bnc} 5 | \title{Learn network structure and parameters.} 6 | \usage{ 7 | bnc( 8 | dag_learner, 9 | class, 10 | dataset, 11 | smooth, 12 | dag_args = NULL, 13 | awnb_trees = NULL, 14 | awnb_bootstrap = NULL, 15 | manb_prior = NULL, 16 | wanbia = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{dag_learner}{A character. Name of the structure learning function.} 21 | 22 | \item{class}{A character. Name of the class variable.} 23 | 24 | \item{dataset}{The data frame from which to learn network structure and 25 | parameters.} 26 | 27 | \item{smooth}{A numeric. The smoothing value (\eqn{\alpha}) for Bayesian 28 | parameter estimation. Nonnegative.} 29 | 30 | \item{dag_args}{A list. Optional additional arguments to \code{dag_learner}.} 31 | 32 | \item{awnb_trees}{An integer. The number (\eqn{M}) of bootstrap samples to 33 | generate.} 34 | 35 | \item{awnb_bootstrap}{A numeric. The size of the bootstrap subsample, 36 | relative to the size of \code{dataset} (given in [0,1]).} 37 | 38 | \item{manb_prior}{A numeric. The prior probability for an arc between the 39 | class and any feature.} 40 | 41 | \item{wanbia}{A logical. If \code{TRUE}, WANBIA feature weighting is 42 | performed.} 43 | } 44 | \description{ 45 | A convenience function to learn the structure and parameters in a single 46 | call. Must provide the name of the structure learning algorithm function; 47 | see \code{\link{bnclassify}} for the list. 48 | } 49 | \examples{ 50 | data(car) 51 | nb <- bnc('nb', 'class', car, smooth = 1) 52 | nb_manb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.3) 53 | ode_cl_aic <- bnc('tan_cl', 'class', car, smooth = 1, dag_args = list(score = 'aic')) 54 | } 55 | -------------------------------------------------------------------------------- /tests/testthat/test-anb-dag.R: -------------------------------------------------------------------------------- 1 | context("bnc dag") 2 | 3 | test_that("bnc_dag", { 4 | # Nominal 5 | g <- test_dag() 6 | bd <- bnc_dag(dag = g, class = 'A') 7 | expect_is(bd, 'bnc_dag') 8 | expect_identical(bd$.class, 'A') 9 | expect_identical(bd$.dag, g) 10 | expect_identical(features(bd), 'B') 11 | expect_identical(vars(bd), setNames(nm = LETTERS[2:1])) 12 | expect_identical(bd$.families, list(B = LETTERS[2:1], A = 'A')) 13 | 14 | # Just class 15 | g <- graph_internal(nodes = LETTERS[1], edgemode = "directed") 16 | bd <- bnc_dag(dag = g, class = 'A') 17 | expect_is(bd, 'bnc_dag') 18 | expect_identical(bd$.class, 'A') 19 | expect_identical(bd$.dag, g) 20 | expect_identical(features(bd), character()) 21 | expect_identical(vars(bd), setNames(nm = 'A')) 22 | expect_identical(bd$.families, list(A='A')) 23 | 24 | # Class not parent of all other nodes 25 | e <- list(A = 'B',B = NULL) 26 | edges <- graph_from_to_to_edges('A', 'B') 27 | g <- graph_internal(nodes = LETTERS[1:3], edges = edges, edgemode = "directed") 28 | # if (!skip_assert()) expect_error(bnc_dag(dag = g, class = 'A'), "fams_ok") 29 | # never use skip_assert here so that i always set it to false before publishing 30 | expect_error(bnc_dag(dag = g, class = 'A'), "fams_ok") 31 | }) 32 | 33 | test_that("Accessors", { 34 | # Nominal 35 | skip_if_not_installed('gRbase') 36 | suppressWarnings(RNGversion("3.5.0")) 37 | set.seed(0) 38 | ran <- random_aug_nb_dag('z', letters[-26], maxpar = 5, wgt = 0.8) 39 | dg <- bnc_dag(ran, class = 'z') 40 | expect_identical(unname(sort(vars(dg))), letters) 41 | }) 42 | 43 | test_that("feature families nominal", { 44 | n <- nbcar() 45 | a <- feature_families(n) 46 | expect_equal(length(a), 6) 47 | expect_equal(a[[1]], c('buying', 'class')) 48 | }) -------------------------------------------------------------------------------- /man/inspect_bnc_bn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-bn-operate.R, 3 | % R/anb-bn.R 4 | \name{inspect_bnc_bn} 5 | \alias{inspect_bnc_bn} 6 | \alias{nparams} 7 | \alias{manb_arc_posterior} 8 | \alias{awnb_weights} 9 | \alias{params} 10 | \alias{values} 11 | \alias{classes} 12 | \title{Inspect a Bayesian network classifier (with structure and parameters).} 13 | \usage{ 14 | nparams(x) 15 | 16 | manb_arc_posterior(x) 17 | 18 | awnb_weights(x) 19 | 20 | params(x) 21 | 22 | values(x) 23 | 24 | classes(x) 25 | } 26 | \arguments{ 27 | \item{x}{The \code{\link{bnc_bn}} object. The Bayesian network classifier.} 28 | } 29 | \description{ 30 | Functions for inspecting a \code{\link{bnc_bn}} object. In addition, you can 31 | query this object with the functions documented in 32 | \code{\link{inspect_bnc_dag}}. 33 | } 34 | \section{Functions}{ 35 | \itemize{ 36 | \item \code{nparams()}: Returns the number of free parameters in the model. 37 | 38 | \item \code{manb_arc_posterior()}: Returns the posterior of each arc from the class 39 | according to the MANB method. 40 | 41 | \item \code{awnb_weights()}: Returns the AWNB feature weights. 42 | 43 | \item \code{params()}: Returns the list of CPTs, in the same order as \code{\link{vars}}. 44 | 45 | \item \code{values()}: Returns the possible values of each variable, in the same order as \code{\link{vars}}. 46 | 47 | \item \code{classes()}: Returns the possible values of the class variable. 48 | 49 | }} 50 | \examples{ 51 | 52 | data(car) 53 | nb <- bnc('nb', 'class', car, smooth = 1) 54 | nparams(nb) 55 | nb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.5) 56 | manb_arc_posterior(nb) 57 | nb <- bnc('nb', 'class', car, smooth = 1, awnb_bootstrap = 0.5) 58 | awnb_weights(nb) 59 | } 60 | -------------------------------------------------------------------------------- /tests/testthat/test-wrap-igraph.R: -------------------------------------------------------------------------------- 1 | context("igraph") 2 | 3 | test_that("Make igraph", { 4 | skip_if_not_installed("igraph") 5 | expect_error(make_graph(nodes = LETTERS[1], from = LETTERS[1], to=1, 6 | weights = 1)) 7 | expect_error(make_graph(nodes = LETTERS[1], from =LETTERS[1],to=LETTERS[1], 8 | weights = NULL)) 9 | g <- make_graph(nodes = LETTERS[1:2], from =LETTERS[1],to=LETTERS[2], 10 | weights = 0.1) 11 | g <- make_graph(nodes = LETTERS[1], from = c(), to = c(), weights = c()) 12 | }) 13 | 14 | test_that("Complete graph", { 15 | skip_if_not_installed("igraph") 16 | g <- complete_graph(LETTERS[1:5]) 17 | g <- graph_internal2graph_NEL(g) 18 | expect_equal(length(igraph::E(g)), 10) 19 | }) 20 | 21 | test_that("Superimpose node", { 22 | skip_if_not_installed("igraph") 23 | # Nominal 24 | g <- igraph::graph(edges = c("A", "B"), directed = TRUE) 25 | sg <- superimpose_node(graphNEL2_graph_internal(g), 'C') 26 | sg <- graph_internal2graph_NEL(sg) 27 | expect_equal(sort(igraph::V(sg)$name), LETTERS[1:3]) 28 | expect_equal(igraph::ecount(sg), 3L) 29 | # Node already in dag 30 | expect_error(superimpose_node(graphNEL2_graph_internal(g), 'A'), 'nodes') 31 | }) 32 | 33 | 34 | test_that("Direct forest", { 35 | skip_if_not_installed("igraph") 36 | gr <- pairwise_ode_score_contribs(class = 'class', dataset = car, score = 'loglik') 37 | af <- max_weight_forest(gr) 38 | f <- direct_forest(g = af) 39 | # check labels, weights 40 | igf <- graph_internal2graph_NEL(f) 41 | igaf <- graph_internal2graph_NEL(af) 42 | expect_true(igraph::graph.isomorphic(igraph::as.undirected(igf), igaf)) 43 | expect_equal(sort(igraph::V(igf)$name), sort(igraph::V(igaf)$name)) 44 | # weights 45 | }) 46 | 47 | -------------------------------------------------------------------------------- /man/tan_chowliu.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/learn-struct.R 3 | \name{tan_chowliu} 4 | \alias{tan_chowliu} 5 | \alias{tan_cl} 6 | \title{Learns a one-dependence estimator using Chow-Liu's algorithm.} 7 | \usage{ 8 | tan_cl(class, dataset, score = "loglik", root = NULL) 9 | } 10 | \arguments{ 11 | \item{class}{A character. Name of the class variable.} 12 | 13 | \item{dataset}{The data frame from which to learn the classifier.} 14 | 15 | \item{score}{A character. The score to be maximized. \code{'loglik'}, 16 | \code{'bic'}, and \code{'aic'} return the maximum likelihood, maximum BIC 17 | and maximum AIC tree/forest, respectively.} 18 | 19 | \item{root}{A character. The feature to be used as root of the augmenting 20 | tree. Only one feature can be supplied, even in case of an augmenting 21 | forest. This argument is optional.} 22 | } 23 | \value{ 24 | A \code{\link{bnc_dag}} object. 25 | } 26 | \description{ 27 | Learns a one-dependence Bayesian classifier using Chow-Liu's algorithm, by 28 | maximizing either log-likelihood, the AIC or BIC scores; maximizing 29 | log-likelihood corresponds to the well-known tree augmented naive Bayes 30 | (Friedman et al., 1997). When maximizing AIC or BIC the output might be a 31 | forest-augmented rather than a tree-augmented naive Bayes. 32 | } 33 | \examples{ 34 | data(car) 35 | ll <- tan_cl('class', car, score = 'loglik') 36 | \dontrun{plot(ll)} 37 | ll <- tan_cl('class', car, score = 'loglik', root = 'maint') 38 | \dontrun{plot(ll)} 39 | aic <- tan_cl('class', car, score = 'aic') 40 | bic <- tan_cl('class', car, score = 'bic') 41 | } 42 | \references{ 43 | Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 44 | classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163. 45 | } 46 | -------------------------------------------------------------------------------- /R/wrap-rpart.R: -------------------------------------------------------------------------------- 1 | #' Learns a unpruned \code{rpart} recursive partition. 2 | #' @keywords internal 3 | learn_unprunned_tree <- function(dataset, class) { 4 | form <- as.formula(paste(class, '~ .')) 5 | # Save time by avoiding CV or surrogates 6 | control <- rpart::rpart.control(minsplit = 2, minbucket = 1, cp = 0, 7 | maxcompete = 0,maxsurrogate = 0, xval = 0) 8 | rpart::rpart(form, data = dataset, na.action = rpart::na.rpart, method 9 | = "class", parms = list(split = "information"), control = control) 10 | } 11 | #' Identifies all depths at which the features of a classification tree are 12 | #' tested. 13 | #' 14 | #' @param tree an \code{rpart} object 15 | #' @return a numeric vector. The names are the names of the variables. 16 | #' @keywords internal 17 | identify_all_testing_depths <- function(tree) { 18 | stopifnot(inherits(x = tree, what = 'rpart')) 19 | # Filter out leaves 20 | vars <- tree$frame[tree$frame$var != "" , 'var', drop = F] 21 | ordering <- as.integer(rownames(vars)) 22 | # if there are no split in the tree - return 23 | if (length(ordering) == 0) return(NULL) 24 | names(ordering) <- as.matrix(vars)[,1] 25 | # Decode the depth from the ordering number. The order of a left-most child is 2*o_p, where o_p is the order of its parent. 26 | depths <- trunc(log(base = 2, ordering)) + 1 27 | # a small check 28 | stopifnot(min(depths) == 1) 29 | depths 30 | } 31 | #' Identifies the lowest (closest to root) depths at which the features of a 32 | #' classification tree are tested. 33 | #' 34 | #' @keywords internal 35 | identify_min_testing_depths <- function(tree) { 36 | depths <- identify_all_testing_depths(tree) 37 | if (length(depths) == 0) return(NULL) 38 | stopifnot(length(names(depths)) > 0) 39 | sort(tapply(depths, names(depths), min)) 40 | } -------------------------------------------------------------------------------- /R/basic-probs.R: -------------------------------------------------------------------------------- 1 | # Adds a list of matrices. 2 | # 3 | # @return A numeric matrix. Multiplied in log space. 4 | sum_matrices <- function(matrices) { 5 | # Must have at least on member 6 | stopifnot(length(matrices) > 0) 7 | # Check all are numeric matrices of same size with same colnames 8 | n <- nrow(matrices[[1]]) 9 | nobs <- ncol(matrices[[1]]) 10 | values <- colnames(matrices[[1]]) 11 | valid <- vapply(matrices, valid_factor, n, nobs, values, 12 | FUN.VALUE = logical(1L)) 13 | stopifnot(all(valid)) 14 | sum <- Reduce('+', matrices) 15 | stopifnot(identical(colnames(sum), values)) 16 | sum 17 | } 18 | valid_factor <- function(x, nrow, ncol, colnames) { 19 | identical(dim(x), c(nrow, ncol)) && is.numeric(x) && identical(colnames, colnames(x)) 20 | } 21 | #' Normalize log probabilities. 22 | #' 23 | #' Uses the log-sum-exp trick. 24 | #' 25 | #' @references Murphy KP (2012). \emph{Machine learning: a probabilistic 26 | #' perspective}. The MIT Press. pp. 86-87. 27 | #' @keywords internal 28 | log_normalize <- function(lp) { 29 | stopifnot(is.matrix(lp)) 30 | # Check p is matrix of log probs?(<= 0) 31 | # Normalize with log sum exp 32 | log_probs <- lp - matrixStats::rowLogSumExps(lp) 33 | log_probs 34 | } 35 | exponentiate_probs <- function(p) { 36 | p <- exp_sideeffect(p) 37 | # rowAnys Does not distinguish between NA and NaN. 38 | nans <- which(matrixStats::rowAnys(p, value = NaN)) 39 | if (length(nans) > 0) { 40 | p[nans, ] <- 1 / ncol(p) 41 | } 42 | p 43 | } 44 | #' Returns \code{TRUE} is \code{x} is a valid probability distribution. 45 | #' 46 | #'@keywords internal 47 | are_pdists <- function(x) { 48 | stopifnot(is.matrix(x)) 49 | are_probs(x) && all(fast_equal(rowSums(x), 1)) 50 | } 51 | are_probs <- function(x) { 52 | !anyNA(x) && all(x >= 0) && all(x <= 1) 53 | } -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | printl: 2 | $(eval TMP := $(du myprof* | sort -n -r | head -n 1 | cut -f 2)) 3 | $(eval TMP := $(ls)) 4 | @echo $(TMP) 5 | $(TMP) 6 | ls 7 | 8 | profile: 9 | rm -f myprof.log* 10 | export LD_PRELOAD=/usr/lib/libprofiler.so; CPUPROFILE="myprof.log" R -f run-test.r 11 | du -h myprof.log* 12 | FILE = du myprof* | sort -n -r | head -n 1 | cut -f 2 13 | google-pprof --text /usr/bin/R $(FILE) 14 | 15 | clean: 16 | rm src/*.o 17 | rm src/*.so 18 | 19 | clean-vignettes: 20 | find vignettes/ -type f -print | grep -Fvf vignettes/.install_extras | grep "tex" | xargs rm -f 21 | find vignettes/ -type f -print | grep -Fvf vignettes/.install_extras | grep "pdf" | xargs rm -f 22 | rm vignettes/*.log 23 | rm vignettes/*.R 24 | 25 | .PHONY: printl 26 | 27 | vignette-includes: ~/code-papers/tex-includes/macros-paper.tex ~/code-papers/tex-includes/macros-math.tex ~/code-papers/paper-bnclassify-rjournal/RJreferences.bib ~/code-papers/paper-bnclassify-rjournal/pg_*.png 28 | cp ~/code-papers/tex-includes/macros-paper.tex vignettes/ 29 | cp ~/code-papers/tex-includes/macros-math.tex vignettes/ 30 | cp ~/code-papers/tex-includes/macros-rjournal.tex vignettes/ 31 | cat vignettes/methods.bib > vignettes/bnclassify.bib 32 | cat ~/code-papers/paper-bnclassify-rjournal/RJreferences.bib >> vignettes/bnclassify.bib 33 | cp ~/code-papers/paper-bnclassify-rjournal/pg_*.png vignettes/ 34 | 35 | clean-technical: 36 | cd vignettes; latexmk -c technical.tex 37 | cd vignettes; latexmk -C technical.tex 38 | rm -f vignettes/technical-concordance.tex 39 | rm -f vignettes/technical.bbl 40 | rm -f vignettes/technical.synctex.gz 41 | rm -f vignettes/technical.tex 42 | 43 | docs: 44 | cd .. ; rm -f bnclassify.pdf; R CMD Rd2pdf bnclassify; 45 | cd .. ; xo bnclassify.pdf 46 | 47 | check: 48 | R < meta/check.r --no-save >> ~/Desktop/checks-output 49 | -------------------------------------------------------------------------------- /tests/testthat/test-cpp-table.R: -------------------------------------------------------------------------------- 1 | context("cpp table") 2 | 3 | check_unidim <- function(db, rows, cols) { 4 | return() # currently different behaviour between r devel and current r, so skip 5 | if (!is.character(cols)) { 6 | cols <- colnames(db)[cols] 7 | } 8 | a <- table_cpp(db, cols) 9 | db <- db[, cols, drop = FALSE] 10 | b <- table(db) 11 | if (ncol(db) == 1) { 12 | # table sets name of object 13 | names(dimnames(a)) = "db" 14 | } 15 | expect_equal(a, b) 16 | } 17 | 18 | test_that("1D", { 19 | check_unidim(car, TRUE, 1) 20 | check_unidim(car, 1:5, 1) 21 | check_unidim(car, FALSE, 1) 22 | 23 | check_unidim(v, TRUE, 1) 24 | check_unidim(v, 1:5, 1) 25 | check_unidim(v, FALSE, 1) 26 | }) 27 | 28 | test_that("3D", { 29 | check_unidim(car, TRUE, 1:3) 30 | check_unidim(car, 1:5, 1:3) 31 | check_unidim(car, FALSE, 1:3) 32 | 33 | check_unidim(v, TRUE, 1:3) 34 | check_unidim(v, 1:5, 1:3) 35 | check_unidim(v, FALSE, 1:3) 36 | }) 37 | 38 | 39 | test_that("large dim", { 40 | check_unidim(car, TRUE, 1:7) 41 | check_unidim(v, TRUE, 17) 42 | }) 43 | 44 | test_that("NA", { 45 | db <- car[, 1:3] 46 | db[1, 1] <- NA 47 | check_unidim(db, TRUE, TRUE) 48 | 49 | db[1:100, 1] <- NA 50 | check_unidim(db, TRUE, TRUE) 51 | }) 52 | 53 | test_that("random", { 54 | (x = sample(0:1, 1e5, replace = T)) 55 | x <- data.frame(u = factor(x)) 56 | a <- table_cpp(x, colnames(x)) 57 | b <- table(x) 58 | # don't know why table keeps x as name. 59 | expect_equal(unname(a), unname(b)) 60 | }) 61 | 62 | test_that("ordinal", { 63 | skip_if_not_installed("mlbench") 64 | library(mlbench) 65 | data(Soybean) 66 | Soybean <- na.omit(Soybean) 67 | check_unidim(Soybean, TRUE, c('precip', 'Class')) 68 | check_unidim(Soybean, TRUE, c('precip')) 69 | }) -------------------------------------------------------------------------------- /man/cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cv.R 3 | \name{cv} 4 | \alias{cv} 5 | \title{Estimate predictive accuracy with stratified cross validation.} 6 | \usage{ 7 | cv(x, dataset, k, dag = TRUE, mean = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{List of \code{\link{bnc_bn}} or a single 11 | \code{\link{bnc_bn}}. The classifiers to evaluate.} 12 | 13 | \item{dataset}{The data frame on which to evaluate the classifiers.} 14 | 15 | \item{k}{An integer. The number of folds.} 16 | 17 | \item{dag}{A logical. Whether to learn structure on each training subsample. 18 | Parameters are always learned.} 19 | 20 | \item{mean}{A logical. Whether to return mean accuracy for each classifier or 21 | to return a k-row matrix with accuracies per fold.} 22 | } 23 | \value{ 24 | A numeric vector of same length as \code{x}, giving the predictive 25 | accuracy of each classifier. If \code{mean = FALSE} then a matrix with k 26 | rows and a column per each classifier in \code{x}. 27 | } 28 | \description{ 29 | Estimate predictive accuracy of a classifier with stratified cross 30 | validation. It learns the models from the training subsamples by repeating 31 | the learning procedures used to obtain \code{x}. It can keep the network 32 | structure fixed and re-learn only the parameters, or re-learn both structure 33 | and parameters. 34 | } 35 | \examples{ 36 | data(car) 37 | nb <- bnc('nb', 'class', car, smooth = 1) 38 | # CV a single classifier 39 | cv(nb, car, k = 10) 40 | nb_manb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.5) 41 | cv(list(nb=nb, manb=nb_manb), car, k = 10) 42 | # Get accuracies on each fold 43 | cv(list(nb=nb, manb=nb_manb), car, k = 10, mean = FALSE) 44 | ode <- bnc('tan_cl', 'class', car, smooth = 1, dag_args = list(score = 'aic')) 45 | # keep structure fixed accross training subsamples 46 | cv(ode, car, k = 10, dag = FALSE) 47 | } 48 | -------------------------------------------------------------------------------- /tests/testthat/test-data-input.R: -------------------------------------------------------------------------------- 1 | context("Data input") 2 | 3 | test_that("Check class in dataset", { 4 | expect_error(check_class_in_dataset(class = 2, voting)) 5 | expect_error(check_class_in_dataset(class = 'class', voting)) 6 | check_class_in_dataset(class = 'Class', voting) 7 | expect_error(check_class_in_dataset(class = c('Class', 'Class'), voting), 8 | "string") 9 | }) 10 | 11 | test_that("Check dataset", { 12 | check_dataset(car) 13 | expect_error(check_dataset(as.matrix(car))) 14 | tm <- cbind(car, class=car$class) 15 | expect_error(check_dataset(tm), "unique") 16 | tm <- car; colnames(tm)[1] <- NA 17 | expect_error(check_dataset(tm), 18 | "is_non_empty_complete(cnames) is not TRUE", fixed = TRUE) 19 | tm <- car; tm[[1]] <- as.numeric(tm[[1]]) 20 | expect_error(check_dataset(tm), "factors") 21 | }) 22 | 23 | test_that("Get features", { 24 | expect_error(get_features('class', voting), "disjoint") 25 | ft <- get_features('Class', voting) 26 | expect_identical(ft, colnames(voting)[-17]) 27 | ft <- get_features('class', car) 28 | expect_identical(ft, colnames(car)[-7]) 29 | tm <- car; tm[[1]] <- as.numeric(tm[[1]]) 30 | expect_error(get_features('class', tm), "factors") 31 | }) 32 | 33 | test_that("check features", { 34 | # Nominal 35 | check_features(letters[1:5], 'f') 36 | check_features(NULL, 'f') 37 | check_features(character(), 'f') 38 | # Class in features 39 | expect_error(check_features(letters[1:5], 'e'), 'class') 40 | # Empty class 41 | expect_error(check_features(character(), NULL), 'string') 42 | }) 43 | 44 | test_that("trim dataset", { 45 | # Nominal 46 | a <- trim_dataset('buying', car) 47 | expect_identical(dim(a), c(nrow(car), 1L)) 48 | # Empty vars set 49 | expect_error(trim_dataset(character(), car), "complete") 50 | # Integer vars 51 | expect_error(trim_dataset(1, car), "character") 52 | }) -------------------------------------------------------------------------------- /R/anb-bn-operate.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | #' @rdname loglik 3 | AIC.bnc_bn <- function(object, ...) { 4 | ll <- logLik(object, ...) 5 | penalize_loglik(ll, k = 1) 6 | } 7 | #' @export 8 | #' @rdname loglik 9 | BIC.bnc_bn <- function(object, ...) { 10 | ll <- logLik(object, ...) 11 | penalize_loglik(ll, k = log(nobs(ll)) / 2) 12 | } 13 | #' @export 14 | #' @rdname loglik 15 | logLik.bnc_bn <- function(object, ...) { 16 | dataset <- list(...)[[1]] 17 | if (is.null(dataset) || nrow(dataset) == 0) stop("Must provide data instances.") 18 | loglik <- compute_ll(x = object, dataset = dataset) 19 | attr(loglik, "nobs") <- nrow(dataset) 20 | if (inherits(object, "bnc_bn")) attr(loglik, "df") <- nparams(object) 21 | class(loglik) <- "logLik" 22 | loglik 23 | } 24 | #' @export 25 | #' @rdname loglik 26 | cLogLik <- function(object, ...) { 27 | dataset <- list(...)[[1]] 28 | compute_cll(object, dataset) 29 | } 30 | penalize_loglik <- function(ll, k) { 31 | as.numeric(ll) - k * attr(ll, "df") 32 | } 33 | #' @export 34 | #' @describeIn inspect_bnc_bn Returns the number of free parameters in the model. 35 | nparams <- function(x) { 36 | sum(vapply(params(x), count_cpt_free_params, FUN.VALUE = numeric(1))) 37 | } 38 | #' @export 39 | #' @describeIn inspect_bnc_bn Returns the posterior of each arc from the class 40 | #' according to the MANB method. 41 | manb_arc_posterior <- function(x) { 42 | stopifnot(inherits(x, "bnc_bn")) 43 | if (!is.null(x$.manb)) { 44 | return(x$.manb) 45 | } 46 | warning("MANB arc posterior probabilities have not been computed for x.") 47 | NULL 48 | } 49 | #' @export 50 | #' @describeIn inspect_bnc_bn Returns the AWNB feature weights. 51 | awnb_weights <- function(x) { 52 | stopifnot(inherits(x, "bnc_bn")) 53 | if (!is.null(x$.weights)) { 54 | return(x$.weights) 55 | } 56 | warning("AWNB weights have not been computed for x.") 57 | NULL 58 | } -------------------------------------------------------------------------------- /tests/testthat/helper-common.R: -------------------------------------------------------------------------------- 1 | # Common testing functions 2 | 3 | # Smooth is always 0 so that grain does not produce 0 probabilities 4 | 5 | nbvote <- function() { 6 | # data(voting, envir = parent.frame()) 7 | lp(nb('Class', voting), voting, smooth = 1) 8 | } 9 | 10 | nbvotecomp <- function() { 11 | # data(voting, envir = parent.frame()) 12 | # v <- na.omit(voting) 13 | # assign('v', v, envir = parent.frame()) 14 | lp(nb('Class', v), v, smooth = 1) 15 | } 16 | 17 | nbcar <- function() { 18 | # data(car, envir = parent.frame()) 19 | lp(nb('class', car), car, smooth = 1) 20 | } 21 | 22 | nbcarp <- function(cardata) { 23 | lp(nb('class', cardata), cardata, smooth = 1) 24 | } 25 | 26 | nbcarclass <- function() { 27 | lp(nb('class', car[, 'class', drop = FALSE]), car, smooth = 1) 28 | } 29 | 30 | random_letters_db <- function(nlet = 6, nrow = 100) { 31 | df <- replicate(nlet, random_letters_vector(nlet, nrow)) 32 | df <- as.data.frame(df, stringsAsFactors = TRUE) 33 | colnames(df) <- letters[seq_len(nlet)] 34 | df 35 | } 36 | 37 | random_letters_vector <- function(nletters, n) { 38 | sample(letters[1:nletters], n, replace = TRUE) 39 | } 40 | 41 | # Creates a random augmented NB with class as class. 42 | random_aug_nb_dag <- function(class, V, maxpar, wgt) { 43 | dg <- gRbase::random_dag(V = V, maxpar = maxpar, wgt = wgt) 44 | dg <- graphNEL2_graph_internal(dg) 45 | superimpose_node(dag = dg, class) 46 | } 47 | 48 | identical_non_call <- function(x, y) { 49 | x$.call_struct <- y$.call_struct <- NULL 50 | x$.call_bn <- y$.call_bn <- NULL 51 | expect_identical(x, y) 52 | } 53 | 54 | test_dag <- function() { 55 | edges <- graph_from_to_to_edges('A', 'B') 56 | graph_internal(nodes = LETTERS[1:2], edges, weights = NULL, edgemode = "directed") 57 | } 58 | 59 | # Load data 60 | 61 | data(car, envir = environment()) 62 | data(voting, envir = environment()) 63 | v <- na.omit(voting) 64 | alphadb <- random_letters_db() -------------------------------------------------------------------------------- /R/predict.R: -------------------------------------------------------------------------------- 1 | #'Predicts class labels or class posterior probability distributions. 2 | #' 3 | #'@details Ties are resolved randomly. Inference is much slower if 4 | #'\code{newdata} contains \code{NA}s. 5 | #' 6 | #'@export 7 | #' 8 | #'@param object A \code{\link{bnc_bn}} object. 9 | #'@param newdata A data frame containing observations whose class has to be 10 | #' predicted. 11 | #'@param prob A logical. Whether class posterior probability should be returned. 12 | #'@param ... Ignored. 13 | #'@return If \code{prob=FALSE}, then returns a length-\eqn{N} factor with the 14 | #' same levels as the class variable in \code{x}, where \eqn{N} is the number 15 | #' of rows in \code{newdata}. Each element is the most likely 16 | #' class for the corresponding row in \code{newdata}. If \code{prob=TRUE}, 17 | #' returns a \eqn{N} by \eqn{C} numeric matrix, where \eqn{C} is the number of 18 | #' classes; each row corresponds to the class posterior of the instance. 19 | #' 20 | #'@examples 21 | #'data(car) 22 | #'nb <- bnc('nb', 'class', car, smooth = 1) 23 | #'p <- predict(nb, car) 24 | #'head(p) 25 | #'p <- predict(nb, car, prob = TRUE) 26 | #'head(p) 27 | predict.bnc_fit <- function(object, newdata, prob = FALSE, ...) { 28 | pred <- compute_cp(x = object, dataset = newdata, ...) 29 | if (!prob) { 30 | pred <- map(pred) 31 | } 32 | pred 33 | } 34 | #' Assigns instances to the most likely class. 35 | #' 36 | #' Ties are resolved randomly. 37 | #' 38 | #' @param pred A numeric matrix. Each row corresponds to class posterior 39 | #' probabilities for an instance. 40 | #' @return a factor with the same levels as the class variable. 41 | #' @keywords internal 42 | map <- function(pred) { 43 | max_ind <- max.col(m = pred, ties.method = "random") 44 | classes <- colnames(pred) 45 | stopifnot(is_non_empty_complete(classes)) 46 | predicted <- classes[max_ind] 47 | # Return a factor with the levels of the class variable 48 | factor(predicted, levels = classes) 49 | } -------------------------------------------------------------------------------- /man/inspect_bnc_dag.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-dag.R, 3 | % R/bnc-dag-operate.R 4 | \name{inspect_bnc_dag} 5 | \alias{inspect_bnc_dag} 6 | \alias{class_var} 7 | \alias{features} 8 | \alias{vars} 9 | \alias{families} 10 | \alias{modelstring} 11 | \alias{feature_families} 12 | \alias{narcs} 13 | \alias{is_semi_naive} 14 | \alias{is_anb} 15 | \alias{is_nb} 16 | \alias{is_ode} 17 | \title{Inspect a Bayesian network classifier structure.} 18 | \usage{ 19 | class_var(x) 20 | 21 | features(x) 22 | 23 | vars(x) 24 | 25 | families(x) 26 | 27 | modelstring(x) 28 | 29 | feature_families(x) 30 | 31 | narcs(x) 32 | 33 | is_semi_naive(x) 34 | 35 | is_anb(x) 36 | 37 | is_nb(x) 38 | 39 | is_ode(x) 40 | } 41 | \arguments{ 42 | \item{x}{The \code{\link{bnc_dag}} object. The Bayesian network classifier 43 | structure.} 44 | } 45 | \description{ 46 | Functions for inspecting a \code{\link{bnc_dag}} object. 47 | } 48 | \section{Functions}{ 49 | \itemize{ 50 | \item \code{class_var()}: Returns the class variable. 51 | 52 | \item \code{features()}: Returns the features. 53 | 54 | \item \code{vars()}: Returns all variables (i.e., features + class). 55 | 56 | \item \code{families()}: Returns the family of each variable. 57 | 58 | \item \code{modelstring()}: Returns the model string of the network in bnlearn format (adding a space in between two families). 59 | 60 | \item \code{feature_families()}: Returns the family of each feature. 61 | 62 | \item \code{narcs()}: Returns the number of arcs. 63 | 64 | \item \code{is_semi_naive()}: Returns TRUE if \code{x} is a semi-naive Bayes. 65 | 66 | \item \code{is_anb()}: Returns TRUE if \code{x} is an augmented naive Bayes. 67 | 68 | \item \code{is_nb()}: Returns TRUE if \code{x} is a naive Bayes. 69 | 70 | \item \code{is_ode()}: Returns TRUE if \code{x} is a one-dependence estimator. 71 | 72 | }} 73 | \examples{ 74 | data(car) 75 | nb <- bnc('nb', 'class', car, smooth = 1) 76 | narcs(nb) 77 | is_ode(nb) 78 | } 79 | -------------------------------------------------------------------------------- /R/data-input.R: -------------------------------------------------------------------------------- 1 | # Checks it is a data frame, with all named unique factor columns. 2 | check_dataset <- function(dataset) { 3 | # Check dataset is a data frame 4 | stopifnot(is.data.frame(dataset)) 5 | # Check every column has a unique name 6 | cnames <- colnames(dataset) 7 | stopifnot(is_non_empty_complete(cnames), are_all_unique(cnames)) 8 | # Make sure they are all factors 9 | stopifnot(are_factors(dataset)) 10 | } 11 | # Checks class is a length 1 character 12 | check_class <- function(class) { 13 | # Check class is length 1 character 14 | stopifnot(assertthat::is.string(class)) 15 | } 16 | # Check the features are in correct format 17 | check_features <- function(features, class) { 18 | # Check class 19 | check_class(class) 20 | # Check features are NULL or character 21 | stopifnot(is.null(features) || is.character(features)) 22 | # Check class is not in features 23 | stopifnot(are_disjoint(class, features)) 24 | } 25 | # Checks class is a length 1 character found in the dataset 26 | check_class_in_dataset <- function(class, dataset) { 27 | check_class(class) 28 | # Check data set has unique column names 29 | check_dataset(dataset) 30 | # Check class in dataset 31 | stopifnot(!are_disjoint(class, colnames(dataset))) 32 | } 33 | # Gets the features in a data set 34 | get_features <- function(class, dataset) { 35 | # Check class 36 | check_class_in_dataset(class, dataset) 37 | # Return all column names other than class 38 | setdiff(colnames(dataset), class) 39 | } 40 | # Trim the dataset to the vars 41 | trim_dataset <- function(vars, dataset) { 42 | # Check dataset 43 | check_dataset(dataset) 44 | # Check vars are unique and non empty 45 | stopifnot(is_non_empty_complete(vars)) 46 | stopifnot(is.character(vars)) 47 | stopifnot(are_all_unique(vars)) 48 | # If the same return original dataset 49 | if (is_perm(vars, colnames(dataset))) { 50 | dataset 51 | } 52 | # Otherwise trim it 53 | else { 54 | dataset[ , vars, drop=FALSE] 55 | } 56 | } -------------------------------------------------------------------------------- /tests/testthat/test-wrap-mlr.R: -------------------------------------------------------------------------------- 1 | context("mlr") 2 | 3 | test_that("as mlr", { 4 | skip_on_cran() 5 | skip_if_not_installed('mlr') 6 | skip("because it now fails on r-devel") 7 | library(mlr) 8 | # Creates a learner just for fitting 9 | nf <- nbcar() 10 | ml <- as_mlr(nf, dag = FALSE) 11 | expect_identical(names(ml$par.vals$args), c('lp_fargs')) 12 | # Creates a learner for structure learning and fitting 13 | nf <- nbcar() 14 | ml <- as_mlr(nf, dag = TRUE) 15 | expect_identical(names(ml$par.vals$args), c('lp_fargs', 'dag_fargs')) 16 | }) 17 | 18 | test_that("train", { 19 | skip_on_cran() 20 | skip_if_not_installed('mlr') 21 | skip("because it now fails on r-devel") 22 | library(mlr) 23 | # mlr needs to be loaded for train() to work; otherwise it will fail because 24 | # it won't find learner options c("show.learner.output", "on.learner.error", 25 | # "on.learner.warning"). To have it working without mlr loaded maybe I must 26 | # specify these in as_mlr() 27 | t <- mlr::makeClassifTask(id = "compare", data = car, target = 'class', 28 | fixup.data = "no", check.data = FALSE) 29 | nf <- nbcar() 30 | # Train just with fitting 31 | ml <- as_mlr(nf, dag = FALSE) 32 | mod = mlr::train(ml, t, subset = sample(nrow(car), 100)) 33 | # Train with structure learning and fitting 34 | ml <- as_mlr(nf, dag = TRUE) 35 | mod = mlr::train(ml, t, subset = sample(nrow(car), 100)) 36 | 37 | detach('package:mlr') 38 | }) 39 | 40 | test_that("resample", { 41 | skip_on_cran() 42 | skip_if_not_installed('mlr') 43 | skip("because it now fails on r-devel") 44 | library(mlr) 45 | 46 | ctrl = makeFeatSelControlSequential(alpha = 0, method = "sfs") 47 | rdesc = makeResampleDesc(method = "Holdout") 48 | ct <- mlr::makeClassifTask(id = "compare", data = car, target = 'class', 49 | fixup.data = "no", check.data = FALSE) 50 | nf <- nbcar() 51 | bnl <- as_mlr(nf, dag = TRUE) 52 | sfeats = selectFeatures(learner = bnl, task = ct, resampling = rdesc, 53 | control = ctrl, show.info = FALSE) 54 | sfeats$x 55 | detach('package:mlr') 56 | }) -------------------------------------------------------------------------------- /tests/testthat/test-anb-bn-operate.R: -------------------------------------------------------------------------------- 1 | context("bnc bn operate") 2 | 3 | test_that("Free parameters", { 4 | u <- nbcar() 5 | b <- nparams(x = u) 6 | expect_equal(b, 63) 7 | 8 | u <- lp(nb('Class', voting), voting, smooth = 1) 9 | b <- nparams(u) 10 | expect_equal(b, 2 * 16 + 1) 11 | }) 12 | 13 | test_that("logLik AIC BIC", { 14 | nb <- lp(nb('class', car), car, smooth = 0) 15 | ll <- logLik(nb, car) 16 | expect_true(inherits(ll, "logLik")) 17 | expect_equal(attr(ll, "nobs"), nrow(car)) 18 | expect_equal(attr(ll, "df"), 63) 19 | expect_equal(as.vector(ll), -13503.69, tolerance = 1e-6) 20 | aic <- AIC(nb, car) 21 | expect_equal(as.vector(aic), -13566.69, tolerance = 1e-6) 22 | bic <- BIC(nb, car) 23 | expect_equal(as.vector(bic), -13738.51, tolerance = 1e-6) 24 | }) 25 | 26 | test_that("logLik AIC BIC as bnlearn", { 27 | nb <- nb('class', car[, c('buying', 'class')]) 28 | nb <- lp(nb, car, smooth = 0) 29 | ll <- logLik(nb, car) 30 | aic <- AIC(nb, car) 31 | bic <- BIC(nb, car) 32 | expect_equal(as.vector(ll), -3724.18038821656273) 33 | expect_equal( aic, -3739.18038821654636) 34 | expect_equal( bic, -3780.09078783677614) 35 | 36 | nb <- lp(nb('class', car), car, smooth = 0) 37 | ll <- logLik(nb, car) 38 | aic <- AIC(nb, car) 39 | bic <- BIC(nb, car) 40 | expect_equal(as.vector(ll), -13503.6883427047687) 41 | expect_equal( aic, -13566.6883427047687) 42 | expect_equal( bic, -13738.5120211097346) 43 | }) 44 | 45 | test_that("manb_arc_posterior", { 46 | a <- nbcar() 47 | expect_warning(manb_arc_posterior(a), "MANB arc posterior probabilities have not been computed for x.") 48 | a <- lp(a, car, smooth = 1, manb_prior = 0.1) 49 | b <- manb_arc_posterior(a) 50 | expect_equal(names(b), features(a)) 51 | expect_equal(as.vector(b["doors"]), 2.921702e-06, tolerance = 1e-7) 52 | }) 53 | 54 | test_that("awnb weights", { 55 | a <- nbcar() 56 | expect_warning(awnb_weights(a), "AWNB weights have not been computed for x.") 57 | a <- lp(a, car, smooth = 1, awnb_trees = 10) 58 | b <- awnb_weights(a) 59 | expect_equal(names(b), features(a)) 60 | expect_true(are_probs(b)) 61 | }) -------------------------------------------------------------------------------- /R/memoise.R: -------------------------------------------------------------------------------- 1 | #' Memoise a function. 2 | #' 3 | #' Based on Hadley Wickham's memoise package. Assumes that argument to f is a 4 | #' character vector. 5 | #' 6 | #' This function is a slightly modified version of 7 | #' \code{memoise} to avoid the use of digest. The rest functions 8 | #' copied as is from memoise. 9 | #' 10 | #' @keywords internal 11 | #' @author Hadley Wickham, Bojan Mihaljevic 12 | #' @param f a function 13 | memoise_char <- memoize <- function(f) { 14 | cache <- new_cache() 15 | memo_f <- function(x) { 16 | hash <- paste0(x, collapse = ';') 17 | # stopifnot(length(hash) == 1L) 18 | if (cache$has_key(hash)) { 19 | cache$get(hash) 20 | } else { 21 | res <- f(x) 22 | cache$set(hash, res) 23 | res 24 | } 25 | } 26 | attr(memo_f, "memoised") <- TRUE 27 | return(memo_f) 28 | } 29 | call_memoised_char <- function(x, cache) { 30 | do.call(cache, list(x = x)) 31 | } 32 | #' Forget a memoized function. 33 | #' 34 | #' @keywords internal 35 | #' @author Hadley Wickham 36 | forget <- function(f) { 37 | if (!is.function(f)) return(FALSE) 38 | 39 | env <- environment(f) 40 | if (!exists("cache", env, inherits = FALSE)) return(FALSE) 41 | 42 | cache <- get("cache", env) 43 | cache$reset() 44 | 45 | TRUE 46 | } 47 | #' Is it memoized? 48 | #' 49 | #' @keywords internal 50 | #' @author Hadley Wickham 51 | is.memoised <- is.memoized <- function(f) { 52 | identical(attr(f, "memoised"), TRUE) 53 | } 54 | #' Make a new cache. 55 | #' 56 | #' @keywords internal 57 | #' @author Hadley Wickham 58 | new_cache <- function() { 59 | 60 | cache <- NULL 61 | cache_reset <- function() { 62 | cache <<- new.env(TRUE, emptyenv()) 63 | } 64 | 65 | cache_set <- function(key, value) { 66 | assign(key, value, envir = cache) 67 | } 68 | 69 | cache_get <- function(key) { 70 | get(key, envir = cache, inherits = FALSE) 71 | } 72 | 73 | cache_has_key <- function(key) { 74 | exists(key, envir = cache, inherits = FALSE) 75 | } 76 | 77 | cache_reset() 78 | list( 79 | reset = cache_reset, 80 | set = cache_set, 81 | get = cache_get, 82 | has_key = cache_has_key, 83 | keys = function() ls(cache) 84 | ) 85 | } -------------------------------------------------------------------------------- /tests/testthat/test-infer-anb-cpp.R: -------------------------------------------------------------------------------- 1 | context("infer-anb-cpp") 2 | 3 | test_that("Missing features", { 4 | tn <- nbcar() 5 | expect_error(compute_joint(tn, car[, 1:2]), 6 | "Some features missing from data set.") 7 | }) 8 | 9 | test_that("Single predictor", { 10 | tn <- lp(nb('class', car[, c(1,7)]), car, smooth = 0) 11 | pt <- compute_joint(tn, car[, 1:2]) 12 | expect_identical(dim(pt), c(nrow(car), 4L)) 13 | }) 14 | 15 | test_that("0 rows dataset", { 16 | tn <- nbcar() 17 | pt <- compute_joint(tn, car[FALSE, ]) 18 | expect_identical(dim(pt), c(0L, 4L)) 19 | }) 20 | 21 | test_that("No features", { 22 | nb <- bnc_dag(nb_dag('class', NULL), 'class') 23 | nb <- lp(nb, car, smooth = 1) 24 | pt <- compute_joint(nb, car) 25 | expect_equal(as.vector(pt[1, ]), as.vector(log(params(nb)[['class']]))) 26 | 27 | pt2 <- compute_joint(nb, car[, FALSE]) 28 | expect_equal(pt, pt2) 29 | }) 30 | 31 | test_that("Make CPT", { 32 | tn <- nbcar() 33 | make_cpt_object(tn$.params$buying, class_var = class_var(tn)) 34 | # todo: need to somehow test this 35 | }) 36 | 37 | test_that("Data and unmodified", { 38 | tn <- lp(nb('class', car[ , 5:7]), car[ , 5:7], smooth = 1) 39 | mod <- tn 40 | db <- car 41 | p <- predict(tn, db) 42 | expect_equal(db, car) 43 | expect_equal(db, car) 44 | expect_equal(tn, mod) 45 | }) 46 | 47 | test_that("Bug", { 48 | skip("Local rdata file") 49 | load('~/code/bnclassify-client/rdata/tmp-debug.rdata') 50 | gr <- candidate_dags[[7]] 51 | gr <- lp_implement(gr, .mem_cpts = train[[1]]) 52 | predict(gr, test[[1]]) 53 | compute_joint(gr, test[[1]]) 54 | compute_log_joint_complete(gr, test[[1]]) 55 | compute_anb_log_joint_per_class(gr, test[[1]]) 56 | exp(compute_anb_log_joint_per_class(gr, test[[1]])) 57 | }) 58 | 59 | test_that("cpt var values nominal", { 60 | test_ind <- function() { 61 | samp <- function(n) { 62 | sample(1:n, size = 1) 63 | } 64 | dim <- c(samp(10), samp(10) , samp(10) ) 65 | index <- c(samp(dim[1]), samp(dim[2]), 1) 66 | ind <- entry_index(index - 1, dim) 67 | target <- arrayInd(ind + 1, dim) 68 | expect_true(all(index == target)) 69 | } 70 | for (i in 1:1e2 ) { 71 | test_ind() 72 | } 73 | }) -------------------------------------------------------------------------------- /tests/testthat/test-graph-internal.R: -------------------------------------------------------------------------------- 1 | context("graph internal") 2 | 3 | test_that("empty graph", { 4 | a <- graph_internal() 5 | expect_equal(length(a$nodes), 0) 6 | expect_equal(length(graph_nodes(a)), 0) 7 | expect_is(a$edges, "matrix") 8 | expect_true(mode(a$edges) == "character") 9 | }) 10 | 11 | test_that("get adjacent", { 12 | e <- graph_from_to_to_edges('a', 'b') 13 | a <- graph_internal(letters[1:5], e) 14 | expect_equal(graph_get_adjacent("a", a), 'b') 15 | expect_equal(graph_get_adjacent("b", a), 'a') 16 | expect_equal(graph_get_adjacent("c", a), character()) 17 | expect_error(graph_get_adjacent("z", a)) 18 | }) 19 | 20 | test_that("connected components", { 21 | x <- nbcar() 22 | g <- graph_connected_components(x$.dag) 23 | expect_equal(length(g), 1) 24 | 25 | g <- graph_internal() 26 | g <- graph_connected_components(g) 27 | expect_null(g) 28 | }) 29 | 30 | test_that("graph union", { 31 | edges <- graph_from_to_to_edges('A', 'B') 32 | g <- graph_internal(LETTERS[1:3], edges = edges, edgemode = "directed" ); 33 | connected <- graph_connected_components(g) 34 | gs <- lapply(connected, subgraph, g) 35 | gu <- graph_union(gs) 36 | # Currently not identical because I am missing to set some attributes in graph_internal2graph_NEL 37 | # expect_identical(gu, g) 38 | expect_identical(graph_nodes(gu), graph_nodes(g)) 39 | expect_equal(gu$edges, g$edges) 40 | }) 41 | 42 | test_that("Direct tree", { 43 | g <- graph_internal() 44 | e <- direct_tree(g) 45 | expect_equal(e$edgemode, "directed") 46 | # expect_equal(graph::ugraph(graph_internal2graph_NEL(e)), graph_internal2graph_NEL(g)) 47 | 48 | gr <- pairwise_ode_score_contribs(class = 'class', dataset = car, score = 'loglik') 49 | af <- max_weight_forest(gr) 50 | d <- direct_tree(af) 51 | expect_equal(d$edgemode, "directed") 52 | d <- direct_tree(af, 'maint') 53 | expect_equal(d$edgemode, "directed") 54 | d <- direct_tree(af, 'safety') 55 | expect_equal(d$edgemode, "directed") 56 | }) 57 | 58 | test_that("node parents no parents", { 59 | g <- graph_internal() 60 | expect_error(graph_node_parents('A', g)) 61 | 62 | p <- graph_node_parents('A', graph_internal('A')) 63 | expect_equal(p, character()) 64 | }) 65 | -------------------------------------------------------------------------------- /R/learn-hc.R: -------------------------------------------------------------------------------- 1 | # Ties are resolved randomly. 2 | greedy_search <- function(class, to_include, init, step, dataset, epsilon, k, 3 | smooth, cache_reset = NULL) { 4 | stopifnot(is_nonnegative(epsilon)) 5 | # Init loop variables 6 | scores_log <- numeric() 7 | current_dag <- NULL 8 | current_score <- -Inf 9 | candidate_dags <- list(init) 10 | # Get indices of training sets 11 | test_folds <- partition_dataset(dataset, class, k) 12 | train <- lapply(test_folds, function(x) dataset[-x, , drop = FALSE]) 13 | train <- lapply(train, make_cpts_cache, smooth = smooth) 14 | test <- lapply(test_folds, function(x) dataset[x, , drop = FALSE]) 15 | test <- lapply(test, make_evidence) 16 | # Start caches for training sets 17 | # TODO: smooth goes directly to cache. 18 | # skip asserts during greedy search 19 | skip_env$skip_assert <- TRUE 20 | while (length(candidate_dags) > 0) { 21 | # if max accuracy then break 22 | if (isTRUE(fast_equal(current_score, 1))) { break } 23 | # Score all candidate states 24 | # Update each candidate dag on the correct cache 25 | # Get the prediction for each ddag 26 | # evaluate 27 | scores <- cv_lp_partition(candidate_dags, train, test) 28 | # Stop if it is not better than current_score 29 | if (!is_improvement(scores, current_score, epsilon)) break 30 | # Make the best dag the current one 31 | best_ind <- max_random(scores) 32 | current_dag <- candidate_dags[[best_ind]] 33 | current_score <- scores[[best_ind]] 34 | scores_log <- c(scores_log, current_score) 35 | if (!is.null(cache_reset)) { 36 | if (length(scores_log) %% cache_reset == 0) lapply(train, forget) 37 | } 38 | # Generate all candidates from best state 39 | # ...tan_hcsp requires parameters dataset, smooth, and k; the rest do not 40 | candidate_dags <- step(bnc_dag = current_dag, 41 | features_to_include = to_include, 42 | train = train, test = test) 43 | } 44 | # Turn assert back on 45 | skip_env$skip_assert <- FALSE 46 | current_dag$.greedy_scores_log <- scores_log 47 | current_dag 48 | } 49 | is_improvement <- function(new_scores, current_score, epsilon) { 50 | (max(new_scores) - epsilon >= current_score) # TODO: Enable relative improvement 51 | } -------------------------------------------------------------------------------- /vignettes/methods.bib: -------------------------------------------------------------------------------- 1 | @Article{Efron1979, 2 | Title = {Bootstrap methods: Another look at the jackknife}, 3 | Author = {Efron, Bradley}, 4 | Journal = {The Annals of Statistics}, 5 | Year = {1979}, 6 | Number = {1}, 7 | Pages = {1--26}, 8 | Volume = {7}, 9 | 10 | Owner = {Bojan}, 11 | Publisher = {JSTOR}, 12 | Timestamp = {2015.09.04} 13 | } 14 | 15 | @Article{Pigott2001, 16 | Title = {A review of methods for missing data}, 17 | Author = {Pigott, Therese D}, 18 | Journal = {Educational research and evaluation}, 19 | Year = {2001}, 20 | Number = {4}, 21 | Pages = {353--383}, 22 | Volume = {7}, 23 | 24 | Publisher = {Taylor \& Francis} 25 | } 26 | 27 | @Article{Wei2011, 28 | Title = {The application of naive {B}ayes model averaging to predict {A}lzheimer's disease from genome-wide data}, 29 | Author = {Wei, Wei and Visweswaran, Shyam and Cooper, Gregory F}, 30 | Journal = {Journal of the American Medical Informatics Association}, 31 | Year = {2011}, 32 | Number = {4}, 33 | Pages = {370--375}, 34 | Volume = {18}, 35 | 36 | Owner = {Bojan}, 37 | Publisher = {The Oxford University Press}, 38 | Timestamp = {2015.09.10} 39 | } 40 | 41 | @article{Zhu1997algorithm, 42 | title={Algorithm 778: {L-BFGS-B}: {Fortran} subroutines for large-scale bound-constrained optimization}, 43 | author={Zhu, Ciyou and Byrd, Richard H and Lu, Peihuang and Nocedal, Jorge}, 44 | journal={ACM Transactions on Mathematical Software (TOMS)}, 45 | volume={23}, 46 | number={4}, 47 | pages={550--560}, 48 | year={1997}, 49 | publisher={ACM} 50 | } 51 | 52 | @Manual{Gentleman2015, 53 | Title = {graph: A package to handle graph data structures}, 54 | Author = {R. Gentleman and Elizabeth Whalen and W. Huber and S. Falcon}, 55 | Note = {R package version 1.46.0}, 56 | Year = {2015} 57 | } 58 | -------------------------------------------------------------------------------- /man/greedy_wrapper.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/0bnclassify-doc.R, R/learn-struct.R 3 | \name{greedy_wrapper} 4 | \alias{greedy_wrapper} 5 | \alias{fssj} 6 | \alias{bsej} 7 | \alias{tan_hc} 8 | \alias{kdb} 9 | \alias{tan_hcsp} 10 | \title{Learn Bayesian network classifiers in a a greedy wrapper fashion.} 11 | \usage{ 12 | fssj(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL) 13 | 14 | bsej(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL) 15 | 16 | tan_hc(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL) 17 | 18 | kdb( 19 | class, 20 | dataset, 21 | k, 22 | kdbk = 2, 23 | epsilon = 0.01, 24 | smooth = 0, 25 | cache_reset = NULL 26 | ) 27 | 28 | tan_hcsp(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL) 29 | } 30 | \arguments{ 31 | \item{class}{A character. Name of the class variable.} 32 | 33 | \item{dataset}{The data frame from which to learn the classifier.} 34 | 35 | \item{k}{An integer. The number of folds.} 36 | 37 | \item{epsilon}{A numeric. Minimum absolute improvement in accuracy required 38 | to keep searching.} 39 | 40 | \item{smooth}{A numeric. The smoothing value (\eqn{\alpha}) for Bayesian 41 | parameter estimation. Nonnegative.} 42 | 43 | \item{cache_reset}{A numeric. Number of iterations after which to reset the 44 | cache of conditional probability tables. A small number reduces the amount 45 | of memory used. \code{NULL} means the cache is never reset (the default).} 46 | 47 | \item{kdbk}{An integer. The maximum number of feature parents per feature.} 48 | } 49 | \value{ 50 | A \code{\link{bnc_dag}} object. 51 | } 52 | \description{ 53 | Greedy wrapper algorithms for learning Bayesian network classifiers. All 54 | algorithms use cross-validated estimate of predictive accuracy to evaluate 55 | candidate structures. 56 | } 57 | \examples{ 58 | data(car) 59 | tanhc <- tan_hc('class', car, k = 5, epsilon = 0) 60 | \dontrun{plot(tanhc)} 61 | 62 | } 63 | \references{ 64 | Pazzani M (1996). Constructive induction of Cartesian product 65 | attributes. In \emph{Proceedings of the Information, Statistics and 66 | Induction in Science Conference (ISIS-1996)}, pp. 66-77 67 | 68 | Koegh E and Pazzani M (2002).Learning the structure of augmented Bayesian 69 | classifiers. In \emph{International Journal on Artificial Intelligence 70 | Tools}, \bold{11}(4), pp. 587-601. 71 | } 72 | -------------------------------------------------------------------------------- /R/frontend-anb.r: -------------------------------------------------------------------------------- 1 | 2 | #' Direct an undirected graph. 3 | #' 4 | #' Starting from a \code{root} not, directs all arcs away from it and applies 5 | #' the same, recursively to its children and descendants. Produces a directed 6 | #' forest. 7 | #' 8 | #' @param g An undirected graph. 9 | #' @param root A character. Optional tree root. 10 | #' @return A directed graph 11 | #' @keywords internal 12 | direct_forest <- function(g, root = NULL) { 13 | graph_direct_forest(g, root = NULL) 14 | } 15 | #' Direct an undirected graph. 16 | #' 17 | #' The graph must be connected and the function produces a directed tree. 18 | #' @return A graph. The directed tree. 19 | #' @keywords internal 20 | direct_tree <- function(g, root = NULL) { 21 | graph_direct_tree(g, root) 22 | } 23 | direct_graph <- function(g) { 24 | graph_direct(g) 25 | } 26 | #' Returns the undirected augmenting forest. 27 | #' 28 | #' Uses Kruskal's algorithm to find the augmenting forest that maximizes the sum 29 | #' of pairwise weights. When the weights are class-conditional mutual 30 | #' information this forest maximizes the likelihood of the tree-augmented naive 31 | #' Bayes network. 32 | #' 33 | #' If \code{g} is not connected than this will return a forest; otherwise it is 34 | #' a tree. 35 | #' 36 | #' @param g A graph. The undirected graph with pairwise 37 | #' weights. 38 | #' @return A graph. The maximum spanning forest. 39 | #' @references Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 40 | #' classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163. 41 | #' 42 | #' Murphy KP (2012). \emph{Machine learning: a probabilistic perspective}. The 43 | #' MIT Press. pp. 912-914. 44 | #' @keywords internal 45 | max_weight_forest <- function(g) { 46 | graph_max_weight_forest(g) 47 | } 48 | #' Merges multiple disjoint graphs into a single one. 49 | #' 50 | #' @param g A graph 51 | #' @return A graph 52 | #' @keywords internal 53 | graph_union <- function(g) { 54 | graph_internal_union(g) 55 | } 56 | # Adds a node to DAG as root and parent of all nodes. 57 | superimpose_node <- function(dag, node) { 58 | graph_superimpose_node(dag, node) 59 | } 60 | is_dag_graph <- function(dag) { 61 | graph_is_dag(dag) 62 | } 63 | check_node <- function(node) { 64 | stopifnot(assertthat::is.string(node)) 65 | } 66 | #' Returns a naive Bayes structure 67 | #' 68 | #' @keywords internal 69 | nb_dag <- function(class, features) { 70 | anb_make_nb(class, features) 71 | } -------------------------------------------------------------------------------- /tests/testthat/test-hc-fssj.R: -------------------------------------------------------------------------------- 1 | context("HC fssj") 2 | 3 | test_that("Expand supernode nominal", { 4 | a <- nbcarp(car[, 2:7]) 5 | s <- not_cci(a) 6 | ex <- augment_supernodes(new_node = 'buying', supernodes = s, bnc_dag = a) 7 | expect_equal(length(ex), 5) 8 | maint <- ex[[1]] 9 | expect_equal(graph_num_arcs(dag(maint)), 7) 10 | expect_equal(families(maint)[['buying']], c('buying', 'maint', 'class')) 11 | 12 | a <- nbcar() 13 | expect_error(augment_supernodes(new_node = 'safety', 14 | supernodes = s, bnc_dag = a), 'already') 15 | }) 16 | 17 | test_that("includes_by_joins nominal", { 18 | a <- nbcarp(car[, 5:7]) 19 | # All possible including models 20 | # debugonce(includes_by_joins) 21 | cands <- includes_in_supernodes(a, colnames(car)[-7]) 22 | expect_equal(length(cands), 4 * 2) 23 | buying <- cands[[1]] 24 | expect_equal(families(buying)[['buying']], 25 | c('buying', 'lug_boot', 'class')) 26 | }) 27 | 28 | test_that("includes", { 29 | nb <- nb('Class') 30 | a <- includes(nb, c('crime','immigration')) 31 | expect_equal(length(a), 2) 32 | expect_equal(features(a[[1]]), 'crime') 33 | expect_equal(features(a[[2]]), 'immigration') 34 | }) 35 | 36 | test_that("includes no features", { 37 | nb <- nbcar() 38 | a <- includes(nb, character()) 39 | expect_equal(length(a), 0) 40 | }) 41 | 42 | test_that("augment supernodes", { 43 | nb <- nb('class', NULL) 44 | e <- includes_in_supernodes(nb, NULL) 45 | expect_equal(length(e), 0) 46 | 47 | nb <- nb('class', NULL) 48 | e <- includes_in_supernodes(nb, 'doors') 49 | expect_equal(length(e), 0) 50 | 51 | nb <- nb('class', NULL) 52 | e <- includes_in_supernodes(nb, c('doors', 'safety')) 53 | expect_equal(length(e), 0) 54 | 55 | nb <- nb('class', features = 'maint') 56 | e <- includes_in_supernodes(nb, c('doors', 'safety')) 57 | expect_equal(length(e), 2) 58 | 59 | nb <- nb('class', features = c('maint', 'buying')) 60 | e <- includes_in_supernodes(nb, c('doors', 'safety')) 61 | expect_equal(length(e), 4) 62 | 63 | nb <- nb('class', features = c('maint', 'buying')) 64 | e <- includes_in_supernodes(nb, c('doors', 'safety', 'lug_boot')) 65 | expect_equal(length(e), 6) 66 | 67 | nb <- nb('class', features = c('maint', 'buying', 'persons')) 68 | e <- includes_in_supernodes(nb, c('doors', 'safety', 'lug_boot')) 69 | expect_equal(length(e), 9) 70 | 71 | f <- includes_in_supernodes(e[[1]], c('doors', 'safety', 'lug_boot')) 72 | expect_equal(length(f), 6) 73 | }) -------------------------------------------------------------------------------- /R/learn-params-manb.R: -------------------------------------------------------------------------------- 1 | # Computes MANB local arc C->X posterior probabilities and uses them to compute 2 | # the MANB CTPs. 3 | # Formula for posterior assumes that smooth = 1. 4 | # @param manb_prior Currently ignored. 5 | # @return Returns a vector of probabilities of same length of ctgts. 6 | compute_manb_arc_posteriors <- function(x, ctgts, smooth, prior = 0.5) { 7 | if (!is_nb(x)) stop("MANB can only be applied to naive Bayes.") 8 | stopifnot(smooth > 0) 9 | stopifnot(identical(features(x), names(ctgts))) 10 | vapply(ctgts, compute_manb_arc_posterior, smooth = smooth, prior = prior, 11 | FUN.VALUE = numeric(1)) 12 | } 13 | # Assuming class is the last dimension. 14 | # Assuming that all instances are labelled 15 | # Returns a singe probability. 16 | compute_manb_arc_posterior <- function(nijk, smooth, prior) { 17 | # If not 2D then it is not a naive Bayes 18 | stopifnot(length(dim(nijk)) == 2, are_probs(prior)) 19 | r <- nrow(nijk) 20 | rt <- ncol(nijk) 21 | nij <- colSums(nijk) 22 | nik <- rowSums(nijk) 23 | ni <- sum(nijk) 24 | # P(D | C -> X) 25 | lpa <- rt * lgamma(r * smooth) - sum(lgamma(r * smooth + nij)) + sum(lgamma(smooth + nijk) - lgamma(smooth)) 26 | # P(D | C ... X) 27 | lpna <- lgamma(r*smooth) - lgamma(r*smooth + ni) + sum(lgamma(smooth + nik) - lgamma(smooth)) 28 | # P(C -> X) 29 | denom <- matrixStats::logSumExp(c(lpna, lpa)) 30 | lpa_post <- lpa - denom 31 | # P(C ... X) 32 | lpna_post <- lpna - denom 33 | # Check valid probs 34 | stopifnot(all.equal(exp(lpa_post) + exp(lpna_post), 1)) 35 | exp(lpa_post) 36 | # Consider priors: 37 | lpa_prior <- log(prior) 38 | lnpa_prior <- log(1 - prior) 39 | lpa_num <- lpa + lpa_prior 40 | lpna_num <- lpna + lnpa_prior 41 | denom <- matrixStats::logSumExp(c(lpa_num, lpna_num)) 42 | lpa_post <- lpa_num - denom 43 | lpna_post <- lpna_num - denom 44 | exp(lpa_post) + exp(lpna_post) 45 | exp(lpa_post) 46 | } 47 | # @param nijk A contingency table of X and C 48 | compute_manb_cpt <- function(nijk, prob_arc, smooth) { 49 | stopifnot(length(dim(nijk)) == 2, are_probs(prob_arc), length(prob_arc) == 1) 50 | lnijk <- log(ctgt2cpt(nijk, smooth = smooth)) 51 | lp_arc <- log(prob_arc) 52 | arc <- lp_arc + lnijk 53 | # no arc 54 | lni <- rowSums(nijk) 55 | lni[] <- log(normalize(lni + smooth)) 56 | lp_noarc <- log(1 - prob_arc) 57 | no_arc <- lp_noarc + lni 58 | rt <- ncol(nijk) 59 | no_arc <- rep(no_arc, rt) 60 | entries <- matrix(c(as.numeric(arc), as.numeric(no_arc)), ncol = 2) 61 | arc[] <- matrixStats::rowLogSumExps(entries) 62 | stopifnot(identical(dimnames(arc), dimnames(nijk))) 63 | exp(arc) 64 | } -------------------------------------------------------------------------------- /src/table.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | // [[Rcpp::export]] 5 | Rcpp::IntegerVector tabulate_cpp(const Rcpp::IntegerVector & v, R_xlen_t nlevels) { 6 | // Using R_xlen_t to avoid checking for entries < 0 7 | std::vector table(nlevels); 8 | R_xlen_t n = v.size(); 9 | for (R_xlen_t i = 0; i < n; ++i) { 10 | table.at( v.at(i) - 1 ) ++; 11 | } 12 | // Wrapp may throw errors with R_xlen_t 13 | // return wrap(table); 14 | IntegerVector a(table.size()); 15 | std::copy(table.begin(), table.end(), a.begin()); 16 | return a; 17 | } 18 | 19 | // Based on table() 20 | // dataframe { 21 | // for each column get the num of dims.tfm 22 | // get size of the resulting talbe 23 | // finally tabulate by the bins meaning how many are there 24 | // each value will correspond to its index in the dim array. 25 | // this is just indexing by a set of values, then you go to there and find it. 26 | // } 27 | // [[Rcpp::export]] 28 | Rcpp::IntegerVector table_cpp(const RObject & input, const RObject & columns) { 29 | if(!is(input)) stop("Must be a data frame."); 30 | DataFrame data = as(input); 31 | if(!is(columns)) stop("Must be character vector."); 32 | CharacterVector cols = as(columns); 33 | data = data[cols]; 34 | 35 | const R_xlen_t ncols = data.ncol(); 36 | if (ncols == 0) stop("No columns in data frame."); 37 | const IntegerVector & column = data.at(0); 38 | // There is a single entry for each row 39 | IntegerVector to_tabulate = no_init(column.size()); 40 | to_tabulate.fill(1); 41 | // The product of dimensions. 42 | R_xlen_t pd = 1; 43 | IntegerVector dims(ncols); 44 | List dimnames(ncols); 45 | dimnames.names() = data.names(); 46 | 47 | for (R_xlen_t i = 0; i < ncols; i++) { 48 | const IntegerVector & a = data.at(i); 49 | if(!Rf_isFactor(a)) stop("Not a factor."); 50 | const CharacterVector & factorLevels = a.attr("levels"); 51 | R_xlen_t nl = factorLevels.size(); 52 | to_tabulate = to_tabulate + pd * (a - 1L); 53 | pd = pd * nl ; 54 | dims.at(i) = nl; 55 | dimnames.at(i) = factorLevels; 56 | } 57 | 58 | to_tabulate = na_omit(to_tabulate); 59 | IntegerVector tbl = tabulate_cpp(to_tabulate, pd); 60 | tbl.attr("dim") = dims; 61 | tbl.attr("dimnames") = dimnames; 62 | tbl.attr("class") = "table"; 63 | 64 | return tbl; 65 | } 66 | 67 | 68 | /*** R 69 | kr <- foreign::read.arff('~/gd/phd/code/works-aug-semi-bayes/data/original/kr-vs-kp.arff') 70 | dbor <- kr 71 | fd <- dbor[, 1:3] 72 | microbenchmark::microbenchmark( a <- table_cpp(fd), tbl <- table(fd) ) 73 | */ 74 | -------------------------------------------------------------------------------- /tests/testthat/test-learn-struct.R: -------------------------------------------------------------------------------- 1 | context("Learn struct") 2 | 3 | test_that("nb Nominal", { 4 | n <- nb('class', car) 5 | expect_identical('class', class_var(n)) 6 | expect_equal(graph_num_arcs(dag(n)), 6) 7 | }) 8 | 9 | test_that("nb Class not in dataset", { 10 | expect_error(nb('Class', car), 'disjoint') 11 | }) 12 | 13 | test_that("nb No features ", { 14 | n <- nb('class', car[, 7, drop = FALSE]) 15 | expect_equal(graph_num_arcs(dag(n)), 0) 16 | # Not numeric dataset 17 | }) 18 | 19 | test_that("fssj nominal", { 20 | skip_on_cran() 21 | suppressWarnings(RNGversion("3.5.0")) 22 | set.seed(0) 23 | f <- fssj('class', dataset = car, k = 10, epsilon = 0.01) 24 | expect_equal(features(f), character()) 25 | suppressWarnings(RNGversion("3.5.0")) 26 | set.seed(0) 27 | f <- fssj('class', dataset = car, k = 10, epsilon = 0) 28 | expect_true(is_perm(features(f), colnames(car)[-7])) 29 | }) 30 | 31 | test_that("bsej nominal", { 32 | skip_on_cran() 33 | suppressWarnings(RNGversion("3.5.0")) 34 | set.seed(0) 35 | f <- bsej('class', dataset = car, k = 10, epsilon = 0.01) 36 | expect_equal(features(f), colnames(car)[-7]) 37 | expect_equal(narcs(f), 6 + 1 + 2 + 3) 38 | expect_equal(length(f$.greedy_scores_log), 4) 39 | expect_equal(f$.greedy_scores_log, sort(f$.greedy_scores_log)) 40 | suppressWarnings(RNGversion("3.5.0")) 41 | set.seed(0) 42 | f <- bsej('class', dataset = car, k = 10, epsilon = 0) 43 | expect_equal(features(f), colnames(car)[-7]) 44 | expect_equal(narcs(f), 13) 45 | expect_equal(length(f$.greedy_scores_log), 5) 46 | expect_equal(f$.greedy_scores_log, sort(f$.greedy_scores_log)) 47 | }) 48 | 49 | test_that("tan_hc nominal", { 50 | skip_on_cran() 51 | suppressWarnings(RNGversion("3.5.0")) 52 | set.seed(0) 53 | t <- tan_hc('class', dataset = car, k = 10, epsilon = 0) 54 | expect_equal(length(features(t)), 6) 55 | expect_equal(narcs(t), 11) 56 | }) 57 | 58 | test_that("tanhc sp nominal", { 59 | skip_on_cran() 60 | suppressWarnings(RNGversion("3.5.0")) 61 | set.seed(0) 62 | t <- tan_hcsp('class', dataset = car, k = 10, epsilon = 0) 63 | expect_equal(length(features(t)), 6) 64 | expect_equal(narcs(t), 11) 65 | nfams <- sapply(families(t), length) 66 | expect_true(max(nfams) < 5) 67 | }) 68 | 69 | test_that("kdb nominal", { 70 | skip_on_cran() 71 | suppressWarnings(RNGversion("3.5.0")) 72 | set.seed(0) 73 | t <- kdb('class', dataset = car, kdb = 1, k = 10, epsilon = 0) 74 | suppressWarnings(RNGversion("3.5.0")) 75 | set.seed(0) 76 | to <- tan_hc('class', dataset = car, k = 10, epsilon = 0) 77 | expect_false(isTRUE(all.equal(t, to))) 78 | t$.call_struct <- NULL 79 | to$.call_struct <- NULL 80 | expect_true(isTRUE(all.equal(t, to))) 81 | }) -------------------------------------------------------------------------------- /R/basic-assert.R: -------------------------------------------------------------------------------- 1 | skip_env <- new.env(parent = emptyenv()) 2 | skip_env$skip_assert <- FALSE 3 | skip_env$skip_testing <- TRUE 4 | #' Whether to do checks or not. Set TRUE to speed up debugging or building. 5 | #' @keywords internal 6 | skip_assert <- function() { 7 | skip_env$skip_assert 8 | } 9 | #' Skip while testing to isolate errors 10 | #' @keywords internal 11 | skip_testing <- function() { 12 | skip_env$skip_testing 13 | } 14 | # Tests whether two character vectors are identical when sorted 15 | is_perm <- function(x, y) { 16 | is.character(x) && identical(sort(x), sort(y)) 17 | } 18 | # use this e.g., in (I think) check_dataset 19 | is_non_empty_complete <- function(x) { 20 | (length(x) > 0) && (!anyNA(x)) 21 | } 22 | # Compares two numerics ignoring attributes and class 23 | equivalent_num <- function(x, y) { 24 | stopifnot(is.numeric(x), is.numeric(y)) 25 | # Check.attributes = FALSE does not ignore class so remove it here 26 | class(x) <- NULL 27 | class(y) <- NULL 28 | all.equal(x, y, check.attributes = FALSE) 29 | } 30 | is_subset <- function(x, y) { 31 | all(x %in% y) 32 | } 33 | is_positive <- function(x) { 34 | assertthat::is.number(x) && (x > 0) 35 | } 36 | is_nonnegative <- function(x) { 37 | assertthat::is.number(x) && (x >= 0) 38 | } 39 | is_last <- function(element, vector) { 40 | # Basically does not work for numerics due to identical 41 | identical(element, get_last(vector)) 42 | } 43 | is_at_pos <- function(element, position, vector) { 44 | # Basically does not work for numerics due to identical 45 | identical(get_null_safe(vector, position), element) 46 | } 47 | are_all_equal <- function(x) { 48 | length(unique(x)) == 1L 49 | } 50 | # use this e.g., in check_dataset 51 | are_all_unique <- function(x) { 52 | length(x) == length(unique(x)) 53 | } 54 | #' Checks if all columns in a data frame are factors. 55 | #' 56 | #' @param x a \code{data.frame} 57 | #' @keywords internal 58 | are_factors <- function(x) { 59 | # If x is not data.frame stop 60 | stopifnot(is.data.frame(x)) 61 | all(vapply(x, is.factor, FUN.VALUE = logical(1))) 62 | } 63 | is_just <- function(x, class) { 64 | identical(class(x), class) 65 | } 66 | ## Checks 67 | # Checks the object is non-empty and has no NA's 68 | check_non_empty_complete <- function(x) { 69 | stopifnot(is_non_empty_complete(x)) 70 | } 71 | are_complete_dimnames <- function(x) { 72 | if (skip_assert( )) return (TRUE) 73 | # Check x has non empty comlete dimames 74 | dnames <- dimnames(x) 75 | if (!is_non_empty_complete(dnames)) return (FALSE) 76 | # Check dimnames have non-empty complete names 77 | if (!is_non_empty_complete(names(dnames))) return (FALSE) 78 | # Check each dimension is non-empty complete 79 | all(vapply(dnames, is_non_empty_complete, FUN.VALUE = logical(1))) 80 | } -------------------------------------------------------------------------------- /tests/testthat/test-bnc-dag-operate.R: -------------------------------------------------------------------------------- 1 | context("bnc dag operate") 2 | 3 | test_that("not cci nominal naive Bayes", { 4 | nb <- nbcar() 5 | s <- not_cci(nb) 6 | expect_equal(unlist(s, use.names = FALSE), colnames(car)[1:6]) 7 | }) 8 | 9 | test_that("not cci nominal TAN", { 10 | tn <- lp(chowliu('class', car), dataset = car, smooth = 1) 11 | s <- not_cci(tn) 12 | expect_true(is.list(s)) 13 | expect_equal(length(s), 1) 14 | expect_equal(length(s[[1]]), length(features(tn))) 15 | 16 | }) 17 | 18 | test_that("not cci no features", { 19 | nb <- nbcarclass() 20 | s <- not_cci(nb) 21 | expect_null(s) 22 | }) 23 | 24 | test_that("is supernode single node", { 25 | nb <- nbcar() 26 | expect_true(is_supernode('safety', nb)) 27 | }) 28 | 29 | test_that("is supernode two nodes not supernode", { 30 | nb <- nbcar() 31 | expect_true(!is_supernode(c('safety', 'buying'), nb)) 32 | }) 33 | 34 | test_that("is supernode two nodes supernode", { 35 | t <- tan_cl('class', car) 36 | expect_true(is_supernode(c('safety', 'buying'), t)) 37 | }) 38 | 39 | test_that("is supernode class var", { 40 | nb <- nbcarclass() 41 | expect_error(is_supernode('class', nb), "class") 42 | }) 43 | 44 | test_that("is_semi_naive just class", { 45 | nb <- nbcarclass() 46 | expect_true(is_semi_naive(nb)) 47 | }) 48 | 49 | test_that("is_semi_naive naive Bayes", { 50 | nb <- nbcar() 51 | expect_true(is_semi_naive(nb)) 52 | }) 53 | 54 | test_that("include node nominal", { 55 | nb <- nbcarclass() 56 | a <- add_feature('safety', nb) 57 | expect_equal(features(a), 'safety') 58 | }) 59 | 60 | 61 | 62 | test_that("include node multiple nodes", { 63 | nb <- nbcarclass() 64 | expect_error(add_feature(c('safety', 'doors'), nb), "string") 65 | }) 66 | 67 | test_that("include node already included", { 68 | nb <- nbcar() 69 | expect_error(add_feature('safety', nb) , "already") 70 | }) 71 | 72 | test_that("remove feature nominal", { 73 | nb <- nbcar() 74 | rnb <- remove_feature('safety', nb) 75 | expect_equal(features(rnb), colnames(car)[1:5]) 76 | }) 77 | 78 | test_that("remove feature not in graph", { 79 | nb <- nbcarclass() 80 | expect_error(remove_feature('safety', nb), "not in") 81 | }) 82 | 83 | test_that("feature orphans nominal", { 84 | nb <- nbcar() 85 | o <- feature_orphans(nb) 86 | expect_equal(o, features(nb)) 87 | }) 88 | 89 | test_that("feature orphans no features", { 90 | nb <- nbcarclass() 91 | o <- feature_orphans(nb) 92 | expect_null(o) 93 | }) 94 | 95 | test_that("feature orphans ode", { 96 | nb <- nbcarp(car[, 5:7]) 97 | nb <- add_feature_parents('lug_boot', 'safety', nb) 98 | o <- feature_orphans(nb) 99 | expect_equal(o, 'lug_boot') 100 | }) 101 | 102 | -------------------------------------------------------------------------------- /tests/testthat/test-infer-anb.R: -------------------------------------------------------------------------------- 1 | context("infer anb") 2 | 3 | test_that("Nominal", { 4 | tn <- nbcar() 5 | a <- compute_joint(tn, car) 6 | expect_identical(colnames(a), levels(car$class)) 7 | }) 8 | 9 | test_that("Missing features", { 10 | tn <- nbcar() 11 | expect_error(compute_joint(tn, car[, 1:2]), 12 | "Some features missing from data set.") 13 | }) 14 | 15 | test_that("Single predictor", { 16 | tn <- lp(nb('class', car[, c(1,7)]), car, smooth = 0) 17 | pt <- compute_joint(tn, car[, 1:2]) 18 | expect_identical(dim(pt), c(nrow(car), 4L)) 19 | }) 20 | 21 | test_that("0 rows dataset", { 22 | tn <- nbcar() 23 | pt <- compute_joint(tn, car[FALSE, ]) 24 | expect_identical(dim(pt), c(0L, 4L)) 25 | }) 26 | 27 | test_that("No features", { 28 | nb <- bnc_dag(nb_dag('class', NULL), 'class') 29 | nb <- lp(nb, car, smooth = 1) 30 | pt <- compute_joint(nb, car) 31 | expect_equal(as.vector(pt[1, ]), as.vector(log(params(nb)[['class']]))) 32 | 33 | pt2 <- compute_joint(nb, car[, FALSE]) 34 | expect_equal(pt, pt2) 35 | }) 36 | 37 | test_that("matches grain", { 38 | 39 | # gRain implementation change 40 | # skip_on_cran() 41 | # skip_if_not_installed('gRain') 42 | # 43 | # tn <- nbcar() 44 | # b <- compute_joint(tn, car) 45 | # g <- as_grain(tn) 46 | # gp <- compute_grain_log_joint(grain = g, car[, -7], 'class') 47 | # expect_equal(b, gp) 48 | # 49 | # tn <- nbvotecomp() 50 | # b <- compute_joint(tn, v) 51 | # g <- as_grain(tn) 52 | # gp <- compute_grain_log_joint(grain = g, v[, -17], 'Class') 53 | # expect_equal(b, gp) 54 | # 55 | # tn <- bnc('tan_cl', class = 'class', smooth = 1, dataset = car) 56 | # b <- compute_joint(tn, car) 57 | # g <- as_grain(tn) 58 | # gp <- compute_grain_log_joint(grain = g, car[, -7], 'class') 59 | # expect_equal(b, gp) 60 | # gRain implementation change 61 | }) 62 | 63 | test_that("correct result", { 64 | carb <- car[, c(1,7)] 65 | tn <- nbcarp(carb) 66 | true_log_prob <- log(params(tn)$buying['vhigh', ]) + log(params(tn)$class) 67 | b <- compute_joint(tn, carb[1, , drop = FALSE]) 68 | expect_equal(as.vector(true_log_prob), as.vector(b[1, ])) 69 | }) 70 | 71 | test_that("different levels", { 72 | nb <- nbcar() 73 | ce <- car 74 | levels(ce$buying) <- rev(levels(ce$buying)) 75 | expect_error(compute_log_joint(nb, ce), 76 | "Levels in data set must match those in the CPTs ") 77 | }) 78 | 79 | test_that("fail with incomplete data", { 80 | v <- nbvote() 81 | expect_error(compute_joint(v, voting), "NA entries in data set.") 82 | }) 83 | 84 | 85 | # Not implemented to receive just CPTs 86 | # test_that("compute augnb lucp", { 87 | # df <- alphadb 88 | # vars <- list(letters[1:3], c(letters[4:6], letters[3])) 89 | # rct <- lapply(vars, extract_cpt, df, smooth = 0) 90 | # rcp <- extract_cpt('c', df, smooth = 0) 91 | # compute_augnb_lucp(rct, rcp, x = df) 92 | # }) -------------------------------------------------------------------------------- /R/wrap-mlr.R: -------------------------------------------------------------------------------- 1 | #' Convert to \code{mlr}. 2 | #' 3 | #' Convert a \code{\link{bnc_bn}} to a \code{\link[mlr]{Learner}} 4 | #' object. 5 | #' 6 | #' @inheritParams cv 7 | #' @param x A \code{\link{bnc_bn}} object. 8 | #' @param id A character. 9 | #' @export 10 | #' @examples 11 | #' data(car) 12 | #' nb <- bnc('nb', 'class', car, smooth = 1) 13 | #' \dontrun{library(mlr)} 14 | #' \dontrun{nb_mlr <- as_mlr(nb, dag = FALSE, id = "ode_cl_aic")} 15 | #' \dontrun{nb_mlr} 16 | as_mlr <- function(x, dag, id = "1") { 17 | check_mlr_attached() 18 | check_bnc_bn(x) 19 | args <- bnc_get_update_args(x, dag) 20 | # Call make learner with the arguments 21 | mlr::makeLearner("bnc", id = id, par.vals = list(args=args)) 22 | } 23 | #' makeRLearner. Auxiliary mlr function. 24 | #' @export makeRLearner.bnc 25 | #' @keywords internal 26 | makeRLearner.bnc <- function() { 27 | if (!requireNamespace("mlr", quietly = TRUE)) { 28 | stop("Package mlr required for this functionality.") 29 | } 30 | mlr::makeRLearnerClassif( 31 | cl = "bnc", 32 | package = "bnclassify", 33 | par.set = ParamHelpers::makeParamSet( 34 | ParamHelpers::makeUntypedLearnerParam(id = "args", default = NULL) 35 | ), 36 | properties = retrieve_bnc_properties() 37 | ) 38 | } 39 | #' trainLearner. Auxiliary mlr function. 40 | #' @export trainLearner.bnc 41 | #' @keywords internal 42 | #' @param .learner,.task,.subset,.weights Internal. 43 | #' @param ... Internal. 44 | trainLearner.bnc = function(.learner, .task, .subset, .weights, ...) { 45 | if (!requireNamespace("mlr", quietly = TRUE)) { 46 | stop("Package mlr required for this functionality.") 47 | } 48 | # Check args contain struct, struct_call and params_call 49 | args <- .learner$par.vals$args 50 | dataset <- mlr::getTaskData(.task, .subset) 51 | bnc_update(args, dataset) 52 | } 53 | #' predictLearner. Auxiliary mlr function. 54 | #' @export predictLearner.bnc 55 | #' @keywords internal 56 | #' @param .learner,.model,.newdata Internal. 57 | #' @param ... Internal. 58 | predictLearner.bnc = function(.learner, .model, .newdata, ...) { 59 | if (!requireNamespace("mlr", quietly = TRUE)) { 60 | stop("Package mlr required for this functionality.") 61 | } 62 | prob = TRUE 63 | if(.learner$predict.type == "response") prob = FALSE 64 | predict(.model$learner.model, newdata = .newdata, prob = prob) 65 | } 66 | 67 | retrieve_bnc_properties <- function() { 68 | c("oneclass", "twoclass", "multiclass", "factors", "prob", "numerics", 69 | "missings") 70 | } 71 | #' Checks if mlr attached. 72 | #' 73 | #' mlr must be attached because otherwise `getMlrOptions()` in `makeLearner` will not be found. 74 | #' @keywords internal 75 | check_mlr_attached <- function() { 76 | mlr_loaded <- 'package:mlr' %in% search() 77 | if (!mlr_loaded) { 78 | stop("mlr package must be loaded (run, e.g., library(mlr)) in order to use this functionality. Install the package first if needed.") 79 | } 80 | } -------------------------------------------------------------------------------- /R/anb-cpts.R: -------------------------------------------------------------------------------- 1 | # Checks cpts ordered according to vars() and 1D names correspond to vars() 2 | cpts2families <- function(cpts) { 3 | lapply(cpts, cpt2family) 4 | } 5 | families2cpts <- function(families, dataset, smooth, .mem_cpts) { 6 | if (!is.null(.mem_cpts)) { 7 | lapply(families, call_memoised_char, cache = .mem_cpts) 8 | } 9 | else { 10 | check_dataset(dataset) 11 | lapply(families, extract_cpt, dataset, smooth = smooth) 12 | } 13 | } 14 | extract_cpt <- function(vars, dataset, smooth) { 15 | ctgt <- extract_ctgt(vars, dataset) 16 | ctgt2cpt(ctgt, smooth = smooth) 17 | } 18 | make_cpts_cache <- function(dataset, smooth) { 19 | check_dataset(dataset) 20 | extract_cpt <- function(vars) { 21 | ctgt <- extract_ctgt(vars, dataset) 22 | ctgt2cpt(ctgt, smooth = smooth) 23 | } 24 | memoise_char(extract_cpt) 25 | } 26 | # Turns a contingency table into a conditional probability table 27 | ctgt2cpt <- function(ctgt, smooth) { 28 | # Requiring ctgt be a table. That implies it is an array. 29 | stopifnot(smooth >= 0, is.table(ctgt), are_complete_dimnames(ctgt)) 30 | # Add smooth to ctgt 31 | ctgt <- smooth_sideeffect(ctgt, smooth) 32 | normalize_ctgt(ctgt) 33 | } 34 | #' Get just form first dimension in their own cpt, not checking for consistency 35 | #' in others. 36 | #' @keywords internal 37 | cpt_vars_values <- function(cpts) { 38 | # Check the names of cpts are equal to the name of their first dim 39 | vars <- vapply(cpts, cpt_1d_var, FUN.VALUE = character(1)) 40 | stopifnot(identical(unname(vars), names(cpts))) 41 | # Return the values 42 | lapply(cpts, cpt_1d_values) 43 | } 44 | # Returns the name of the first dimensions and the values in the dimension of 45 | # the table. 46 | cpt_1d_values <- function(cpt) { 47 | # Get 1d cases and check not empty 48 | values <- get_cpt_values(cpt)[[1]] 49 | check_non_empty_complete(values) 50 | values 51 | } 52 | cpt_1d_var <- function(cpt) { 53 | var <- cpt2family(cpt)[[1]] 54 | stopifnot(assertthat::is.string(var)) 55 | var 56 | } 57 | cpt2family <- function(cpt) { 58 | # Check is a table 59 | stopifnot(is.table(cpt)) 60 | # Return names dimnames 61 | names(dimnames(cpt)) 62 | } 63 | get_cpt_values <- function(cpt) { 64 | stopifnot(is.table(cpt)) 65 | dimnames(cpt) 66 | } 67 | # Gets cpt entries using a list of indices 68 | # Returns a vector 69 | subset_cpt <- function(cpt, indices) { 70 | # check var non empty, in indices 71 | vars <- names(dimnames(cpt)) 72 | stopifnot(is_non_empty_complete(vars), is_subset(vars, names(indices))) 73 | # Get index matrix from df 74 | x_indices <- do.call('cbind', indices[vars]) 75 | cpt[x_indices] 76 | } 77 | exponentiate_cpt <- function(cpt, value) { 78 | cpt <- cpt ^ value 79 | normalize_ctgt(cpt) 80 | } 81 | get_cpt_id <- function(cpt) { 82 | make_family_id(cpt2family(cpt)) 83 | } 84 | count_cpt_free_params <- function(cpt) { 85 | d <- dim(cpt) 86 | stopifnot(length(d) >= 1) 87 | (d[1] - 1) * prod(d[-1]) 88 | } -------------------------------------------------------------------------------- /R/cv-update.R: -------------------------------------------------------------------------------- 1 | bnc_get_update_args <- function(x, dag) { 2 | stopifnot(is.logical(dag)) 3 | args <- list(lp_fargs = x$.call_bn) 4 | # lp_fargs must always be present 5 | stopifnot(!is.null(args$lp_fargs)) 6 | # If dag then include dag arguments 7 | if (dag) { 8 | args$dag_fargs <- x$.call_struct 9 | stopifnot(!is.null(args$dag_fargs)) 10 | } 11 | args 12 | } 13 | bnc_update <- function(args, dataset) { 14 | bnc_update_args(args$lp_fargs, dataset, args$dag_fargs) 15 | } 16 | bnc_update_args <- function(lp_fargs, dataset, dag_fargs = NULL) { 17 | # If dag needs to be called, call it first then feed it into lp arguments 18 | if (!is.null(dag_fargs)) { 19 | # dag_fargs contain both function name and arguments. 20 | dag <- do_bnc_call(dag_fargs, dataset) 21 | lp_fargs$x <- dag 22 | } 23 | # Wrap the result of lp before it's returned 24 | res <- do_bnc_call(lp_fargs, dataset) 25 | # bnc_wrap(res) TODO 26 | res 27 | } 28 | # Optionally updates the dag prior to updating the parameters. 29 | update <- function(x, dataset, dag) { 30 | stopifnot(is.logical(dag)) 31 | dg <- NULL 32 | if (dag) { 33 | dg <- update_dag(x, dataset) 34 | } 35 | else { 36 | dg <- bn2dag(x) 37 | } 38 | lp_args <- get_lp_update_args(x) 39 | update_lp(dag = dg, lp_fargs = lp_args, dataset = dataset) 40 | } 41 | save_bnc_call <- function(fun_name, call, env) { 42 | stopifnot(is.character(fun_name)) 43 | call[[1]] <- fun_name 44 | # To make sure that this dataset is not accidentaly used on updates. 45 | call['dataset'] <- NULL 46 | lapply(call, eval, envir = env) 47 | } 48 | do_bnc_call <- function(fargs, dataset) { 49 | fargs$dataset <- dataset 50 | call <- make_call(fargs[[1]], fargs[-1]) 51 | eval(call) 52 | } 53 | add_dag_call_arg <- function(bnc_dag, fun_name, call, env, force = FALSE) { 54 | add_call_arg(bnc_dag, fun_name, call, env, arg = '.call_struct', force = force) 55 | } 56 | remove_dag_call_arg <- function(bnc_dag) { 57 | bnc_dag[['.call_struct']] <- NULL 58 | bnc_dag 59 | } 60 | add_params_call_arg <- function(bnc_bn, call, env, force = TRUE) { 61 | add_call_arg(bnc_bn, 'lp', call, env, arg = '.call_bn', force = force) 62 | } 63 | add_call_arg <- function(bnc_dag, fun_name, call, env, arg, force) { 64 | # stopifnot(inherits(bnc_dag, "bnc_dag")) 65 | # TODO Fix this for appropriate types 66 | stopifnot(inherits(bnc_dag, "bnc_dag") || inherits(bnc_dag, "bnc_base")) 67 | if (!force) { 68 | stopifnot(is.null(bnc_dag[[arg]])) 69 | } 70 | bnc_dag[[arg]] <- save_bnc_call(fun_name, call, env) 71 | bnc_dag 72 | } 73 | get_lp_update_args <- function(x) { 74 | stopifnot(!is.null(x$.call_bn)) 75 | x$.call_bn 76 | } 77 | get_dag_update_args <- function(x) { 78 | stopifnot(!is.null(x$.call_struct)) 79 | x$.call_struct 80 | } 81 | update_dag <- function(x, dataset) { 82 | do_bnc_call(get_dag_update_args(x), dataset) 83 | } 84 | update_lp <- function(dag, lp_fargs, dataset) { 85 | lp_fargs$x <- dag 86 | do_bnc_call(lp_fargs, dataset) 87 | } -------------------------------------------------------------------------------- /R/basic-misc.R: -------------------------------------------------------------------------------- 1 | #' Return all but last element of x. 2 | #' 3 | #' If x is NULL returns NA not NULL 4 | #' @keywords internal 5 | get_but_last <- function(x) { 6 | get_null_safe(x, -length(x)) 7 | } 8 | #' Return last element of x. 9 | #' 10 | #' If x is NULL returns NA not NULL 11 | #' @keywords internal 12 | get_last <- function(x) { 13 | get_null_safe(x, length(x)) 14 | } 15 | #' Get i-th element of x. 16 | #' 17 | #' If x is NULL returns NA not NULL 18 | #' @keywords internal 19 | get_null_safe <- function(x, i) { 20 | if (length(x) == 0) NA else x[i] 21 | } 22 | # Convert a factor to integer and then replicate reps times 23 | rep_factor_as_int <- function(f, reps) { 24 | attributes(f) <- NULL 25 | stopifnot(is.integer(f)) 26 | rep(f, reps) 27 | } 28 | #' Compares all elements in a to b 29 | #' 30 | #' @param b numeric. Must be length one but no check is performed. 31 | #' @keywords internal 32 | fast_equal <- function(a, b) { 33 | # stopifnot(length(b) == 1) No check for efficiency 34 | abs(a - b) < .Machine$double.eps ^ 0.5 35 | } 36 | #' Compute predictive accuracy. 37 | #' 38 | #' @param x A vector of predicted labels. 39 | #' @param y A vector of true labels. 40 | #' @export 41 | #' 42 | #' @examples 43 | #' data(car) 44 | #' nb <- bnc('nb', 'class', car, smooth = 1) 45 | #' p <- predict(nb, car) 46 | #' accuracy(p, car$class) 47 | accuracy <- function(x, y) { 48 | count_equal(x, y) / length(x) 49 | } 50 | # a list to a matrix where the names are kept in the second column 51 | unlist_keepnames <- function(list) { 52 | lengths <- element_lengths(list) 53 | unname(cbind(unlist(list, use.names = FALSE), rep(names(list), lengths))) 54 | } 55 | element_lengths <- function(list) { 56 | vapply(list, length, FUN.VALUE = integer(1)) 57 | } 58 | max_random <- function(x) { 59 | ind <- which(fast_equal(x, max(x))) 60 | if (length(ind) > 1) { 61 | ind <- sample(ind, 1) 62 | } 63 | ind 64 | } 65 | #' Return a bootstrap sub-sample. 66 | #' 67 | #' @param dataset a \code{data.frame} 68 | #' @param proportion numeric given as fraction of \code{dataset} size 69 | #' @keywords internal 70 | bootstrap_ss <- function(dataset, proportion) { 71 | stopifnot(is_positive(proportion)) 72 | N <- nrow(dataset) 73 | stopifnot(N > 0) 74 | subsample_size <- trunc(N * proportion) 75 | dataset[sample(N, replace = T, size = subsample_size), , drop = FALSE] 76 | } 77 | make_call <- function(f, args) { 78 | f <- as.name(f) 79 | as.call(c(f, args)) 80 | } 81 | #' Subset a 2D structure by a vector of column names. 82 | #' 83 | #' Not all colnames are necessarily in the columns of data; in that case this 84 | #' returns NA. 85 | #' @param colnames a character vector 86 | #' @param data a matrix or data frame 87 | #' @keywords internal 88 | subset_by_colnames <- function(colnames, data) { 89 | stopifnot(is.character(colnames), length(colnames) == nrow(data)) 90 | ind_cols <- match(colnames, colnames(data)) 91 | ind_matrix <- cbind(seq_along(ind_cols), ind_cols) 92 | data[ind_matrix] 93 | } -------------------------------------------------------------------------------- /tests/testthat/test-data-statistics.R: -------------------------------------------------------------------------------- 1 | context("Statistics") 2 | 3 | test_that("Conditional mutual information", { 4 | # With Incomplete data. 5 | a <- cmi('physician_fee_freeze', 'Class', 6 | dataset = voting, z = 'water_project_cost_sharing') 7 | b <- cmi('physician_fee_freeze', 'Class', dataset = voting) 8 | expect_equal(a, b, tolerance = 0.001) 9 | # Mutual information 10 | a <- cmi('physician_fee_freeze', 'Class', 11 | dataset = voting, z = 'water_project_cost_sharing') 12 | b <- cmi('physician_fee_freeze', 'Class', voting) 13 | expect_equal(a, b, tolerance = 0.001) 14 | c <- cmi('buying', 'maint', dataset = car) 15 | expect_equal(c, 0) 16 | # the same as when removing missing directly. 17 | a <- cmi('physician_fee_freeze', 'Class', 18 | dataset = voting, z = 'water_project_cost_sharing') 19 | v <- voting[,c('physician_fee_freeze', 'water_project_cost_sharing', 'Class')] 20 | b <- cmi('physician_fee_freeze', 'Class', v, z = 'water_project_cost_sharing') 21 | expect_equal(a, b, tolerance = 1e-10) 22 | # Log base 23 | a <- cmi('physician_fee_freeze', 'Class', dataset = voting, unit = "log2") 24 | b <- cmi('physician_fee_freeze', 'Class', dataset = voting) 25 | expect_true(abs(a - b) > 0.1) 26 | }) 27 | 28 | test_that("Contingency table", { 29 | t <- extract_ctgt('doors', car) 30 | expect_identical(dim(t), 4L) 31 | t <- extract_ctgt(c('buying', c('doors', 'maint')), car) 32 | expect_identical(dim(t), c(4L, 4L ,4L)) 33 | d <- dimnames(t) 34 | vars <- c('buying', 'doors', 'maint') 35 | expect_identical(names(d), vars) 36 | levs <- lapply(car[, vars], levels) 37 | expect_true(all(mapply(identical, levs, d, SIMPLIFY = TRUE))) 38 | t <- extract_ctgt('crime', voting) 39 | expect_identical(sum(t), 418L) 40 | }) 41 | 42 | test_that("Degrees freedom", { 43 | tbl <- extract_ctgt(c('persons', 'doors', 'class'), car) 44 | df <- cmi_degrees_freedom(freqs_table = tbl) 45 | expect_equal(df, 24L) 46 | 47 | tbl <- extract_ctgt(c('doors', 'class', 'persons'), car) 48 | df <- cmi_degrees_freedom(freqs_table = tbl) 49 | expect_equal(df, 27L) 50 | }) 51 | 52 | test_that("Contingency table to CPT", { 53 | # Nominal 54 | suppressWarnings(RNGversion("3.5.0")) 55 | set.seed(0) 56 | tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20)) 57 | tcpt <- ctgt2cpt(tc, 1) 58 | expect_equal(colnames(tcpt), letters[1:4]) 59 | expect_equal(tcpt[, 1], setNames(c(1, 3, 4) / 8, letters[1:3])) 60 | # No smooth 61 | suppressWarnings(RNGversion("3.5.0")) 62 | set.seed(0) 63 | tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20)) 64 | tcpt <- ctgt2cpt(tc, 0) 65 | expect_equal(colnames(tcpt), letters[1:4]) 66 | expect_equal(tcpt[, 1], setNames(c(0, 2, 3) / 5, letters[1:3])) 67 | # 1D table 68 | suppressWarnings(RNGversion("3.5.0")) 69 | set.seed(0) 70 | tc <- table(random_letters_vector(4, 200)) 71 | tcpt <- ctgt2cpt(tc, 0) 72 | expect_equal(names(tcpt), letters[1:4]) 73 | expect_equal(tcpt, tc / 200) 74 | }) 75 | 76 | -------------------------------------------------------------------------------- /tests/testthat/test-learn-params-manb.R: -------------------------------------------------------------------------------- 1 | context("learn params manb") 2 | 3 | # The exact posterior probabilities compare to were obtained using the MANB implementation by Wei et al. 4 | 5 | test_that("compute manb nominal", { 6 | nb <- nbcar() 7 | u <- lapply(families(nb), extract_ctgt, car)[features(nb)] 8 | d <- compute_manb_arc_posteriors(nb, u, smooth = 1) 9 | expect_named(d, features(nb)) 10 | d <- as.vector(d) 11 | expect_equal(d, c(1, 1, 0.000026294701543, 1, 1, 1)) 12 | }) 13 | 14 | test_that("compute manb no features", { 15 | nb <- nbcarclass() 16 | a <- list() 17 | names(a) <- character() 18 | expect_equivalent(compute_manb_arc_posteriors(nb, a, smooth = 1), numeric()) 19 | }) 20 | 21 | test_that("compute manb prior", { 22 | nb <- nbcar() 23 | u <- lapply(families(nb), extract_ctgt, car)[features(nb)] 24 | d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.00001) 25 | expect_named(d, features(nb)) 26 | d <- as.vector(d) 27 | expect_equal(d, c(1, 1, 0.000000000262957, 1, 0.999981436585993, 1)) 28 | 29 | d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.03) 30 | expect_named(d, features(nb)) 31 | d <- as.vector(d) 32 | expect_equal(d, c(1, 1, 0.000000813258915, 1, 0.999999993997562, 1)) 33 | 34 | d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.95) 35 | expect_named(d, features(nb)) 36 | d <- as.vector(d) 37 | expect_equal(d, c(1, 1, 0.000499362978504, 1, 0.999999999990223, 1)) 38 | }) 39 | 40 | test_that("compute manb smooth", { 41 | nb <- nbcar() 42 | u <- lapply(families(nb), extract_ctgt, car)[features(nb)] 43 | # No error for smooth not being integer. It is close to when smooth = 1 44 | d <- compute_manb_arc_posteriors(nb, u, smooth = 0.99) 45 | d1 <- compute_manb_arc_posteriors(nb, u, smooth = 1) 46 | expect_true(sum(abs(d - d1)) < 1e-5) 47 | expect_error(compute_manb_arc_posteriors(nb, u, smooth = 0), " > 0 is not TRUE") 48 | }) 49 | 50 | 51 | test_that("compute manb not nb", { 52 | tn <- tan_cl('class', car) 53 | ctgts <- lapply(families(tn), extract_ctgt, car)[features(tn)] 54 | expect_error(compute_manb_arc_posteriors(tn, ctgts, smooth = 1), 55 | "MANB can only be applied to naive Bayes") 56 | }) 57 | 58 | test_that("compute cpt", { 59 | a <- extract_ctgt(c('doors', 'class'), car) 60 | b <- compute_manb_cpt(a, 1, smooth = 0) 61 | a <- ctgt2cpt(a, 0) 62 | expect_equal(b, a) 63 | 64 | a <- extract_ctgt(c('doors', 'class'), car) 65 | b <- compute_manb_cpt(a, 1, smooth = 1) 66 | a <- ctgt2cpt(a, 1) 67 | expect_equal(b, a) 68 | 69 | p <- extract_ctgt(c('buying', 'class'), car) 70 | u <- compute_manb_cpt(p, 0, smooth = 1) 71 | t <- ctgt2cpt(p, smooth = 1) 72 | t[] <- ctgt2cpt(extract_ctgt(c('buying'), car), smooth = 1) 73 | expect_equal(u, t) 74 | 75 | p <- extract_ctgt(c('buying', 'class'), car) 76 | u <- compute_manb_cpt(p, 0.5, smooth = 1) 77 | t <- ctgt2cpt(p, smooth = 1) 78 | pt <- t 79 | pt[] <- ctgt2cpt(extract_ctgt(c('buying'), car), smooth = 1) 80 | pt[] <- (pt + t) / 2 81 | expect_equal(u, pt) 82 | }) -------------------------------------------------------------------------------- /R/learn-chowliu.R: -------------------------------------------------------------------------------- 1 | chowliu <- function(class, dataset, score='loglik', blacklist = NULL, 2 | root = NULL) { 3 | # Get pairwise scores 4 | pairwise_scores <- 5 | pairwise_ode_score_contribs(class = class, dataset = dataset, score = score) 6 | # Get the augmenting forest 7 | aug_forest <- max_weight_forest(pairwise_scores) 8 | # Direct the forest (TODO: test the forest is effectively directed) 9 | aug_forest <- direct_forest(aug_forest, root = root) 10 | # TODO: Add blacklisting. 11 | ode <- superimpose_node(dag = aug_forest, node = class) 12 | bnc_dag(dag = ode, class = class) 13 | } 14 | pairwise_ode_score_contribs <- function(class, dataset, score) { 15 | # Check score in decomposable_ode_scores 16 | stopifnot(score %in% decomposable_ode_scores()) 17 | # Get features 18 | features = get_features(class = class, dataset = dataset) 19 | # If 0 features then return empty graph 20 | if (length(features) == 0) return(graph_empty_undirected()) 21 | # If 1 feature then return single node graph (no arcs) 22 | pairs <- complete_graph(features) 23 | if (length(features) == 1) return(pairs) 24 | # Get each pair of features 25 | edges <- pairs$edges 26 | from <- edges[, 1] 27 | to <- edges[, 2]; rm(edges) 28 | # For each get pairwise contribution to score 29 | pairwise_score <- mapply(local_ode_score_contrib, from, to, 30 | MoreArgs = list(class = class, dataset = dataset), 31 | SIMPLIFY = TRUE) 32 | stopifnot(identical(rownames(pairwise_score), decomposable_ode_scores())) 33 | # Select the score 34 | pairwise_score <- pairwise_score[score, ] 35 | # Remove negative scores (possible for BIC and AIC) and weight the edges 36 | # ...If I also removed 0 scores --which are possible for loglik-- the structure 37 | # ...may turn out a forest even when using loglik 38 | ind_keep <- pairwise_score >= 0 39 | from <- from[ind_keep] 40 | to <- to[ind_keep] 41 | pairwise_score <- pairwise_score[ind_keep] 42 | make_graph(features, from, to, pairwise_score) 43 | } 44 | #' Returns pairwise component of ODE (penalized) log-likelihood scores. 45 | #' In natural logarithms. 46 | #' @keywords internal 47 | local_ode_score_contrib <- function(x, y, class, dataset) { 48 | # If x and y and class do not have length one stop 49 | stopifnot(length(x) == 1) 50 | stopifnot(length(y) == 1) 51 | stopifnot(length(class) == 1) 52 | # Get contingency table 53 | freqs <- extract_ctgt(c(x, y, class), dataset) 54 | # Ignore dataset from here on 55 | rm(dataset) 56 | # Compute I(X;Y | Z) 57 | cmi <- cmi_table(freqs, unit = "log") 58 | # Get number of degrees of freedom 59 | df <- cmi_degrees_freedom(freqs_table = freqs) 60 | # Make sure it is non-negative 61 | stopifnot(df >= 0) 62 | # Get num. of observations in contingency table 63 | N <- sum(freqs) 64 | # Compute bic 65 | bic <- N * cmi - (log(N) / 2) * df 66 | # Compute aic 67 | aic <- N * cmi - df 68 | c(loglik = cmi, bic = bic, aic = aic) 69 | } 70 | decomposable_ode_scores <- function() { c('loglik', 'bic', 'aic') } -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | md_document: 4 | variant: markdown_github 5 | --- 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-" 12 | ) 13 | ``` 14 | # bnclassify 15 | 16 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/bnclassify)](https://cran.r-project.org/package=bnclassify) 17 | ![](https://cranlogs.r-pkg.org/badges/bnclassify?color=yellow) 18 | ![](https://cranlogs.r-pkg.org/badges/grand-total/bnclassify?color=yellowgreen) 19 | [![Research software impact](http://depsy.org/api/package/cran/bnclassify/badge.svg)](http://depsy.org/package/r/bnclassify) 20 | [![Codecov test coverage](https://codecov.io/gh/bmihaljevic/bnclassify/branch/master/graph/badge.svg)](https://app.codecov.io/gh/bmihaljevic/bnclassify?branch=master) 21 | [![R-CMD-check](https://github.com/bmihaljevic/bnclassify/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/bmihaljevic/bnclassify/actions/workflows/R-CMD-check.yaml) 22 | 23 | Implements algorithms for learning discrete Bayesian network classifiers from data, as well as functions for using these classifiers for prediction, assessing their predictive performance, and inspecting and analyzing their properties. 24 | 25 | # Example 26 | 27 | Load a data set and learn a one-dependence estimator by maximizing Akaike's information criterion (AIC) score. 28 | ```{r} 29 | library(bnclassify) 30 | data(car) 31 | tn <- tan_cl('class', car, score = 'aic') 32 | tn 33 | plot(tn) 34 | ``` 35 | 36 | After learning the network's parameters, you can use it to classify data. 37 | ```{r} 38 | tn <- lp(tn, car, smooth = 0.01) 39 | p <- predict(tn, car, prob = TRUE) 40 | head(p) 41 | p <- predict(tn, car, prob = FALSE) 42 | head(p) 43 | ``` 44 | 45 | Estimate predictive accuracy with cross validation. 46 | ```{r} 47 | cv(tn, car, k = 10) 48 | ``` 49 | 50 | Or compute the log-likelihood 51 | ```{r} 52 | logLik(tn, car) 53 | ``` 54 | 55 | # Install 56 | 57 | Make sure you have at least version 3.2.0 of R. You can install `bnclassify` from CRAN: 58 | ```{r, eval = FALSE} 59 | install.packages('bnclassify') 60 | ``` 61 | 62 | Or get the current development version from Github: 63 | ```{r, eval = FALSE} 64 | # install.packages('devtools') 65 | devtools::install_github('bmihaljevic/bnclassify') 66 | # devtools::install_github('bmihaljevic/bnclassify', build_vignettes = TRUE) 67 | ``` 68 | 69 | Ideally, you would use the `build_vignettes = TRUE` version, and thus get the vignettes, but it requires programs such as texi2dvi to be installed on your side. 70 | 71 | # Overview 72 | 73 | See an overview of the package and examples of usage: 74 | 75 | ```{r} 76 | vignette('overview', package = 'bnclassify') 77 | ``` 78 | 79 | 80 | See the list of main functionalities. 81 | 82 | ```{r, eval = FALSE} 83 | ?bnclassify 84 | ``` 85 | 86 | Use the usage vignette for more details on the functions. 87 | 88 | ``` r 89 | vignette('usage', package = 'bnclassify') 90 | ``` 91 | 92 | Then have a look at the remaining vignettes. 93 | 94 | ``` r 95 | browseVignettes("bnclassify") 96 | ``` 97 | -------------------------------------------------------------------------------- /src/infer-test-wrappers.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace Rcpp; 5 | 6 | // [[Rcpp::export]] 7 | IntegerVector test_dims2columns(const NumericVector cpt, const CharacterVector class_var, const CharacterVector columns_db) { 8 | const List & dimnames = cpt.attr("dimnames"); 9 | const CharacterVector & fam = dimnames.attr("names"); 10 | CharacterVector feature_fam = wrap(ordersetdiff(fam, class_var)); 11 | IntegerVector feature_fam_inds = match(feature_fam, columns_db); 12 | if (is_true(any(feature_fam_inds == 0))) stop("All features must be in the dataset."); 13 | feature_fam_inds = feature_fam_inds - 1; 14 | return feature_fam_inds; 15 | } 16 | 17 | // Delete? 18 | //[[Rcpp::export]] 19 | NumericVector get_row(List x, DataFrame df, int cptind) { 20 | // Model mod(x); 21 | // Evidence ds(df, mod.getFeatures()); 22 | // MappedCPT c = MappedCPT(mod.get_cpt(cptind), mod.getClassVar(), ds); 23 | // std::vector entries(mod.get_nclass()); 24 | // c.get_entries(1, entries); 25 | // return wrap(entries); 26 | return NumericVector::create(1); 27 | } 28 | 29 | //[[Rcpp::export]] 30 | NumericVector fill_vector(int size, int row, NumericVector rcpt, DataFrame df, CharacterVector features, std::string class_var) { 31 | CPT cpt(rcpt, class_var); 32 | Evidence evidence(df, features); 33 | MappedCPT m(cpt, evidence); 34 | std::vector output(size); 35 | // std::vector::iterator end = m.fill_instance_indices(row, output.begin()); 36 | // std::vector::iterator end = m.fill_instance_indices(row, output)); 37 | // NumericVector nv(std::distance(output.begin(), end)); 38 | // std::copy(output.begin(), end, nv.begin()); 39 | NumericVector nv = NumericVector::create(2); 40 | return nv; 41 | } 42 | 43 | //[[Rcpp::export]] 44 | void make_cpt_object(const NumericVector & x, std::string class_var) { 45 | CPT cpt(x, class_var); 46 | NumericVector nv = wrap(cpt.get_entries()); 47 | Rcout << nv << std::endl; 48 | 49 | IntegerVector iv = wrap(cpt.get_dimprod()); 50 | } 51 | 52 | 53 | /*** R 54 | # a <- aode('class', car) 55 | # a <- lp(a, car, smooth = 1) 56 | # cpt <- a$.models$persons$.params$buying 57 | # colnames(car) 58 | # names(dimnames(cpt)) 59 | # test_dims2columns(cpt,"class", columns_db = colnames(car)) 60 | 61 | # microbenchmark::microbenchmark( { g = do_mapped(t, dbor)} ) 62 | # microbenchmark::microbenchmark( { d = get_row(t$.params$bkblk, f, class_var(t), dbor) }) 63 | # microbenchmark::microbenchmark( { d = get_row(t$.params$bkblk, f, class_var(t), dbor) }) 64 | 65 | # simple_wrap <- function(x, dataset) { 66 | # if (!anyNA(dataset)) { 67 | # compute_joint(x, dataset) 68 | # } 69 | # } 70 | # microbenchmark::microbenchmark( { f = compute_joint(t, dbor)}, 71 | # { h = simple_wrap(t, dbor)}, 72 | # times = 1e3 ) 73 | 74 | sapply(t$.params, length) 75 | source('tests/infer-test-init.R') 76 | fill_vector(4, 99, t$.params$katri, dbor, features(t), class_var(t)) 77 | fill_vector(4, 409, t$.params[[3]], dbor, features(t), class_var(t)) 78 | */ 79 | 80 | -------------------------------------------------------------------------------- /tests/testthat/test-learn-aode.r: -------------------------------------------------------------------------------- 1 | context("aode") 2 | 3 | test_that("spode", { 4 | u <- spode(sp='D', features=LETTERS[5:9], class = 'C') 5 | expect_equal(graph_num_arcs(dag(u)), 11) 6 | expect_equal(length(graph_get_adjacent('D', dag(u))), 6) 7 | 8 | u <- spode(sp='E', features=LETTERS[c(4, 6:9)], class = 'C') 9 | expect_equal(graph_num_arcs(dag(u)), 11) 10 | expect_equal(length(graph_get_adjacent('E', dag(u))), 6) 11 | }) 12 | 13 | test_that("aode str", { 14 | # with 1 feature is an nb 15 | u <- aode(class = 'a', alphadb[, 1:2, drop = FALSE]) 16 | expect_true(is_nb(u)) 17 | 18 | u <- aode(class = 'a', random_letters_db(10)) 19 | expect_true(is_aode(u)) 20 | expect_true(is_ode(models(u)[['c']])) 21 | expect_equal(length(models(u)), 9) 22 | expect_identical(class_var(u), 'a') 23 | expect_identical(features(u), letters[2:10]) 24 | 25 | d <- models(u)[[1]] 26 | expect_equal(graph_num_arcs(dag(d)), 9 + 8) 27 | expect_equal(length(graph_get_adjacent('b', dag(d))), 9) 28 | d <- models(u)[[9]] 29 | expect_equal(graph_num_arcs(dag(d)), 9 + 8) 30 | expect_equal(length(graph_get_adjacent('j', dag(d))), 9) 31 | }) 32 | 33 | test_that("aode bnc funs", { 34 | lets <- random_letters_db(10) 35 | u <- aode(class = 'a', lets) 36 | expect_equal(class_var(u), 'a') 37 | feats <- setdiff(colnames(lets), 'a') 38 | expect_equal(features(u), feats) 39 | expect_false(is_ode(u)) 40 | expect_false(is_semi_naive(u)) 41 | expect_false(is_nb(u)) 42 | expect_error(narcs(u)) 43 | expect_error(nparams(u)) 44 | expect_error(params(u)) 45 | expect_equal(length(models(u)), 9) 46 | expect_output(print(u), regexp = 'ensemble of 9') 47 | expect_output(print(u), regexp = 'learning algorithm: aode') 48 | }) 49 | 50 | test_that("fit aode and bnc", { 51 | a <- bnc('aode', 'class', car, smooth = 1) 52 | sapply(a$models, BIC, car) # No error 53 | }) 54 | 55 | test_that("log joint", { 56 | a <- aode('class', car) 57 | a <- lp(a, car, smooth = 1) 58 | lj <- compute_log_joint(a, car) 59 | expect_equal(sum(exp(lj)), 1) 60 | }) 61 | 62 | test_that("predict", { 63 | # currently not considering weights 64 | # a <- aode('class', car, m=10000) 65 | # a <- lp(a, car, smooth = 1) 66 | 67 | # a <- bnc('aode', 'class', car, dag_args = list(m=10000), smooth = 1) 68 | # p <- predict(a, car, prob = TRUE) 69 | # cp <- params(a$models[[1]])$class 70 | # expect_equal(sum(abs(apply(p, 1, '-', cp))), 0) 71 | 72 | a <- bnc('aode', 'class', car, smooth=1) 73 | p <- predict(a, car, prob = TRUE) 74 | expect_equal(p[12, 1], c(unacc=0.793), tolerance = 0.001) # Probability from Weka 75 | 76 | # dbreast <- foreign::read.arff('~/code/teach-asdm-c01/data/dbreast-cancer.arff') 77 | # a <- bnc('aode', 'Class', dbreast, smooth=1) 78 | # p <- predict(a, dbreast, prob = TRUE) 79 | # expect_equal(p[1, 1], c("no-recurrence-events"=0.494), tolerance = 0.01) # Weka has 0.494 80 | }) 81 | 82 | test_that("incomplete data", { 83 | # gRain implementation change 84 | # # no error 85 | # skip_if_not_installed("gRain") 86 | # vt <- voting[1:10, ] 87 | # a <- aode('Class', vt) 88 | # a <- lp(a, vt, smooth = 1) 89 | # p <- predict(a, vt, prob = TRUE) 90 | # gRain implementation change 91 | }) -------------------------------------------------------------------------------- /tests/testthat/test-update.R: -------------------------------------------------------------------------------- 1 | context("Update") 2 | 3 | test_that("bnc update bnc_dag", { 4 | dgcar <- nb('class', car) 5 | # Currently, parameter fitting is required for updating; dag learning is not 6 | expect_error(bnc_get_update_args(dgcar, dag = FALSE), "lp_fargs") 7 | }) 8 | 9 | test_that("bnc update bnc_bn", { 10 | dgcar <- lp(nb('class', car), car, smooth = 1) 11 | ua <- bnc_get_update_args(dgcar, dag = FALSE) 12 | b <- bnc_update(ua, car[1:5, ]) 13 | diff <- sum(abs(params(b)[[2]] - params(dgcar)[[2]])) 14 | expect_equal(diff, 2.393358, tolerance = 1e-7) 15 | }) 16 | 17 | test_that("bnc update bnc_bn with struct learning", { 18 | dgcar <- lp(nb('class', car), car, smooth = 1) 19 | ua <- bnc_get_update_args(dgcar, dag = TRUE) 20 | b <- bnc_update(ua, car[1:5, 6:7]) 21 | expect_identical(features(b), "safety") 22 | diff <- sum(abs(params(b)[['safety']] - params(dgcar)[['safety']])) 23 | expect_equal(diff, 2.776258, tolerance = 1e-6) 24 | }) 25 | 26 | test_that("update nominal", { 27 | a <- lp(nb('class', car), car, smooth = 1e10) 28 | b <- update(a, car, dag = FALSE) 29 | identical_non_call(a, b) 30 | }) 31 | 32 | test_that("update data subset", { 33 | a <- lp(nb('class', car), car, smooth = 1) 34 | b <- update(a, car[1:5, ], dag = FALSE) 35 | diff <- sum(abs(params(a)[[2]] - params(b)[[2]])) 36 | expect_equal(diff, 2.393358, tolerance = 1e-7) 37 | }) 38 | 39 | test_that("update with dag", { 40 | t <- lp(tan_cl('class', car), car, smooth = 0.02) 41 | b <- update(t, car[1:5, ], dag = TRUE) 42 | expect_equal(narcs(b), narcs(t)) 43 | expect_true(!isTRUE(all.equal(families(b), families(t)))) 44 | }) 45 | 46 | test_that("update with dag 2", { 47 | a <- lp(nb('class', car), car, smooth = 1) 48 | b <- update(a, car[1:5, 6:7], dag = TRUE) 49 | expect_identical(features(b), "safety") 50 | diff <- sum(abs(params(b)[['safety']] - params(a)[['safety']])) 51 | expect_equal(diff, 2.776258, tolerance = 1e-6) 52 | }) 53 | 54 | test_that("Update dag", { 55 | t <- tan_cl('class', car) 56 | d <- update_dag(t, car[1, ]) 57 | expect_identical(narcs(d), narcs(t)) 58 | expect_true(!identical(families(d), families(t))) 59 | }) 60 | 61 | test_that("Update with awnb param learning", { 62 | # gRain implementation change 63 | # skip_on_cran() 64 | # skip_if_not_installed('gRain') 65 | # a <- nb('Class', voting) 66 | # suppressWarnings(RNGversion("3.5.0")) 67 | # set.seed(0) 68 | # b <- lp(a, voting, smooth = 1, awnb_trees = 1, awnb_bootstrap = 0.5) 69 | # c <- lp(a, voting, smooth = 1, awnb_trees = 45, awnb_bootstrap = 1) 70 | # d <- lp(b, voting, smooth = 1) 71 | # r <- cv(list(b, c, d), voting, k = 2, dag = FALSE) 72 | # # All three values are different 73 | # expect_equal(r, c(0.9517397, 0.9494462, 0.8988606), tolerance = 1e-6) 74 | # gRain implementation change 75 | }) 76 | 77 | test_that("Multi-update bnc_dag", { 78 | a <- nb('class', car) 79 | b <- lp(a, car, smooth = 1) 80 | expect_error(cv(list(a, b), car, k = 10, dag = FALSE), "must inherit") 81 | }) 82 | 83 | test_that("Update with non-name function", { 84 | dgcar <- lp(nb('class', car), car, smooth = 1) 85 | e <- lapply(list(dgcar), lp, car, smooth = 1) 86 | ua <- bnc_get_update_args(e[[1]], dag = FALSE) 87 | b <- bnc_update(ua, car[1:5, ]) 88 | diff <- sum(abs(params(b)[[2]] - params(dgcar)[[2]])) 89 | expect_equal(diff, 2.393358, tolerance = 1e-7) 90 | }) -------------------------------------------------------------------------------- /R/learn-params-wanbia.R: -------------------------------------------------------------------------------- 1 | # Using smooth = 1 by default 2 | fit_wanbia_nb <- function(class_var, dataset) { 3 | lp(nb(class_var, dataset), dataset, smooth = 1) 4 | } 5 | #' Returns a function to compute negative conditional log-likelihood given feature weights 6 | #' @keywords internal 7 | make_cll <- function(class_var, dataset) { 8 | check_class_in_dataset(class_var, dataset) 9 | function(w) { 10 | nb <- fit_wanbia_nb(class_var, dataset) 11 | nb <- set_weights(nb, w) 12 | - compute_cll(nb, dataset) 13 | } 14 | } 15 | #' Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights 16 | #' @keywords internal 17 | make_cll_gradient <- function(class_var, dataset) { 18 | check_class_in_dataset(class_var, dataset) 19 | function(w) { 20 | unweighted <- fit_wanbia_nb(class_var, dataset) 21 | compute_cll_gradients(unweighted, dataset, w) 22 | } 23 | } 24 | compute_cll_gradients <- function(unweighted, dataset, w) { 25 | stopifnot(is.null(unweighted$.weights)) 26 | features <- features(unweighted) 27 | db <- lapply(dataset, as.character) 28 | feats <- db[features] 29 | params <- params(unweighted)[features] 30 | 31 | weighted <- set_weights(unweighted, w) 32 | cp <- compute_cp(weighted, dataset = dataset) 33 | class_var <- class_var(weighted) 34 | grad <- mapply(cll_gradient_var, feats, params , MoreArgs = list(class = db[[class_var]], class_posterior = cp )) 35 | grad 36 | } 37 | #' Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing 38 | #' @param x a vector of values 39 | #' @keywords internal 40 | get_log_leaf_entries <- function(cpt, x) { 41 | stopifnot(is.character(x), length(dim(cpt)) == 2) 42 | entries <- matrix(numeric(length = length(x) * ncol(cpt)), ncol = ncol(cpt)) 43 | colnames(entries) <- colnames(cpt) 44 | for (i in seq_along(x)) { 45 | value <- x[i] 46 | if (is.na(value)) { 47 | entries[i, ] <- 1 48 | } 49 | else { 50 | entries[i, ] <- cpt[value, ] 51 | } 52 | } 53 | log(entries) 54 | } 55 | cll_gradient_var <- function(x, cpt, class, class_posterior) { 56 | log_theta <- get_log_leaf_entries(cpt, x) 57 | stopifnot(identical(dim(class_posterior), dim(log_theta ))) 58 | log_theta_class <- subset_by_colnames(class, log_theta) 59 | sum(- log_theta_class + rowSums(log_theta * class_posterior)) 60 | } 61 | #' Compute WANBIA weights. 62 | #' 63 | #' Computes feature weights by optimizing conditional log-likelihood. 64 | #' Weights are bounded to [0, 1]. Implementation based on the original paper 65 | #' and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}. 66 | #' 67 | #' @inheritParams nb 68 | #' @param dataset The data frame from which to learn feature weights 69 | #' @param return_optim_object Return full output of `optim` 70 | #' @return a named numeric vector 71 | #' @keywords internal 72 | compute_wanbia_weights <- function(class, dataset, return_optim_object = FALSE) { 73 | features <- get_features(class = class, dataset = dataset) 74 | # Initial weights are 1. 75 | w <- rep(1, length(features)) 76 | w <- setNames(w, features) 77 | cll <- make_cll(class, dataset) 78 | cll_gradient <- make_cll_gradient(class, dataset) 79 | w <- optim(w, cll, cll_gradient, method = 'L-BFGS-B', lower = 0, upper = 1) 80 | if (w$convergence != 0) warning(paste0("WANBIA did not converge correctly. ", w$message)) 81 | if (return_optim_object ) return (w) 82 | w$par 83 | } -------------------------------------------------------------------------------- /R/wrap-gRain.R: -------------------------------------------------------------------------------- 1 | # Accessors 2 | #' @export 3 | #' @describeIn grain_and_graph Convert to a grain. 4 | as_grain <- function(x) { 5 | stopifnot(inherits(x, "bnc_bn")) 6 | if (is.null(x$.grain)) { 7 | compile_grain(params(x)) 8 | } 9 | else { 10 | x$.grain 11 | } 12 | } 13 | # Computes the log joint probability of the observed features for each of the classes 14 | compute_grain_log_joint <- function(grain, dataset, class) { 15 | if (!requireNamespace("gRain", quietly = TRUE)) { 16 | stop("gRain needed for this function to work. Please install it.", 17 | call. = FALSE) 18 | } 19 | # Check gRain compiled 20 | stopifnot(is_grain_compiled(grain)) 21 | # Check data set 22 | check_dataset(dataset) 23 | # Check N > 0 24 | stopifnot(nrow(dataset) > 0) 25 | # If class in dataset, remove it 26 | if (class %in% colnames(dataset)) { 27 | dataset <- dataset[, setdiff(colnames(dataset), class), drop = FALSE] 28 | } 29 | cp <- t(apply(dataset, 1, compute_grain_log_joint_instance, grain, class)) 30 | # Remove any rownames if they may have left over from dataset 31 | rownames(cp) <- NULL 32 | cp 33 | } 34 | # Computes the log joint probability of the observed features for each of the classes 35 | compute_grain_log_joint_instance <- function(instance, grain, class) { 36 | if (!requireNamespace("gRain", quietly = TRUE)) { 37 | stop("gRain needed for this function to work. Please install it.", 38 | call. = FALSE) 39 | } 40 | # Instance is character 41 | stopifnot(is.character(instance)) 42 | # Get non-NA nodes in instance 43 | instance <- instance[!is.na(instance)] 44 | vars <- intersect(names(instance), grain_nodes(grain)) 45 | instance <- instance[vars] 46 | # Check class is character and not in instance 47 | check_class(class) 48 | stopifnot(!(class %in% vars)) 49 | # Set them as evidence if they are more than 0 50 | if (length(instance) > 0) { 51 | grain <- gRain::setEvidence(grain, nodes = vars, states = instance) 52 | } 53 | # gRain has a bug and cannot return unnormalized class. Therefore, using the workaround: P(C | x_evidence) * P(x_evidence) 54 | cp <- gRain::querygrain(grain, nodes = class, normalize = TRUE)[[class]] 55 | cp <- log(cp) 56 | if (length(vars) > 0) { 57 | cp <- cp + log(gRain::pEvidence(grain)) 58 | } 59 | cp 60 | } 61 | # Compiles a grain from a a list of CPTs 62 | compile_grain <- function(cpts) { 63 | if (!requireNamespace("gRain", quietly = TRUE)) { 64 | stop("gRain needed for this function to work. Please install it.", 65 | call. = FALSE) 66 | } 67 | if (!requireNamespace("gRbase", quietly = TRUE)) { 68 | stop("gRbase needed for this function to work. Please install it.", 69 | call. = FALSE) 70 | } 71 | # Check cpts is a list 72 | stopifnot(is.list(cpts)) 73 | # TODO: Check each cpt. 74 | # Convert each cpt to parray 75 | pcpts <- lapply(cpts, gRbase::as.parray, normalize = "none", smooth = 0) 76 | # Check the probabilities are left unchanged 77 | all_eq <- mapply(equivalent_num, cpts, pcpts, SIMPLIFY = TRUE) 78 | stopifnot(vapply(all_eq, isTRUE, FUN.VALUE = logical(1))) 79 | # Assemble the grain 80 | gRain::grain.CPTspec(gRain::compileCPT(pcpts)) 81 | } 82 | is_grain_compiled <- function(g) { 83 | if (!requireNamespace("gRain", quietly = TRUE)) { 84 | stop("gRain needed for this function to work. Please install it.", 85 | call. = FALSE) 86 | } 87 | inherits(g, "grain") && g$isCompiled 88 | } 89 | grain_nodes <- function(g) { 90 | g$universe$nodes 91 | } -------------------------------------------------------------------------------- /R/anb-dag.R: -------------------------------------------------------------------------------- 1 | # a basic supertype of all bnc 2 | bnc_base <- function(class, features) { 3 | obj <- list(.class = unname(class)) 4 | obj$.features <- unname(features) 5 | class(obj) <- 'bnc_base' 6 | obj 7 | } 8 | # Creates an augmented naive Bayes with structure but no parameters. 9 | bnc_dag <- function(dag, class) { 10 | families <- graphNEL2families(dag, class) 11 | # Save dag, class, features,and call 12 | make_bnc_dag(class = class, families = families, dag = dag) 13 | } 14 | make_bnc_dag <- function(class, families, dag) { 15 | # Not checking families for efficiency; they are checked in bnc_dag anyway 16 | obj <- bnc_base(class = class, features = NULL) 17 | obj$.dag = dag 18 | obj$.families = families 19 | class(obj) <- c('bnc_dag', class(obj)) 20 | obj 21 | } 22 | # Checks it is a valid bnc_dag object 23 | check_bnc_dag <- function(x) { 24 | check_bnc_dag_basic(x) 25 | # Check families 26 | check_anb_families(families(x), class_var(x)) 27 | } 28 | check_bnc_dag_basic <- function(x) { 29 | class <- class_var(x) 30 | features <- features(x) 31 | # This also checks for class. 32 | check_features(features = features, class = class) 33 | stopifnot(identical(vars(x), setNames(nm = c(features, class)))) 34 | } 35 | 36 | #' @export 37 | #' @describeIn grain_and_graph Convert to a graphNEL. 38 | as_igraph <- function(x) { 39 | stopifnot(inherits(x, "bnc_dag")) 40 | graph_internal2graph_NEL(dag(x)) 41 | } 42 | #' Get underlying graph. This should be exported. 43 | #' @keywords internal 44 | #' @param x the bnc object 45 | dag <- function(x) { 46 | stopifnot(inherits(x, "bnc_dag")) 47 | x$.dag 48 | } 49 | #' @export 50 | #' @describeIn inspect_bnc_dag Returns the class variable. 51 | class_var <- function(x) { 52 | stopifnot(inherits(x, "bnc_base")) 53 | x$.class 54 | } 55 | #' @export 56 | #' @describeIn inspect_bnc_dag Returns the features. 57 | features <- function(x) { 58 | # Implementing a generic features did not allow me to document it in inspect_bnc_dag, so I dispatch by class within the function 59 | if (inherits(x, 'bnc_dag')) { 60 | return (setdiff(vars(x), class_var(x))) 61 | } 62 | else if (inherits(x, 'bnc_base')) { 63 | return(x$.features) 64 | } 65 | stop('Must be either bnc_dag or bnc_base') 66 | } 67 | #' @export 68 | #' @describeIn inspect_bnc_dag Returns all variables (i.e., features + class). 69 | vars <- function(x) { 70 | setNames(nm = get_family_vars(families(x))) 71 | } 72 | #' @export 73 | #' @describeIn inspect_bnc_dag Returns the family of each variable. 74 | families <- function(x) { 75 | stopifnot(inherits(x, "bnc_dag")) 76 | x$.families 77 | } 78 | #' @export 79 | #' @describeIn inspect_bnc_dag Returns the model string of the network in bnlearn format (adding a space in between two families). 80 | modelstring <- function(x) { 81 | stopifnot(inherits(x, "bnc_dag")) 82 | fams <- families(x) 83 | order <- order_acyclic(families(x)) 84 | fams <- fams[order] 85 | paste(sapply(names(fams), function(node) { 86 | paste("[", node, ifelse(length(fams[[node]]) - 1 > 0, "|", ""), paste(fams[[node]][-1], sep = "", collapse = ":"), "]", sep = "") 87 | }), collapse = " ") 88 | } 89 | # # Returns all feature families excluding the class variable 90 | # # Returns all feature families excluding the class variable 91 | # feature_families <- function(x) { 92 | # feature_fams <- families(x)[features(x)] 93 | # lapply(feature_fams, family_features, class_var(x)) 94 | # } 95 | #' @export 96 | #' @describeIn inspect_bnc_dag Returns the family of each feature. 97 | feature_families <- function(x) { 98 | families(x)[features(x)] 99 | } --------------------------------------------------------------------------------