├── .github
    ├── .gitignore
    └── workflows
    │   ├── test-coverage.yaml
    │   └── R-CMD-check.yaml
├── src
    ├── .gitignore
    ├── Makevars.win
    ├── Makevars
    ├── multidim-array.cpp
    ├── graph-internal.cpp
    ├── basic-probs.cpp
    ├── anb-operate.cpp
    ├── data.cpp
    ├── table.cpp
    └── infer-test-wrappers.cpp
├── data
    ├── car.RData
    └── voting.RData
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-anb-internal.R
    │   ├── test-predict.R
    │   ├── test-dag.R
    │   ├── test-basic-assert.R
    │   ├── test-memoise.R
    │   ├── test-anb-cpts.R
    │   ├── test-wrap-grain.R
    │   ├── test-basic-misc.R
    │   ├── test-hc-bsej.R
    │   ├── test-basic-probs.R
    │   ├── test-learn-params-wanbia.r
    │   ├── test-anb-bn.R
    │   ├── test-learn-params-awnb.R
    │   ├── test-anb-dag.R
    │   ├── test-wrap-igraph.R
    │   ├── test-cpp-table.R
    │   ├── test-data-input.R
    │   ├── helper-common.R
    │   ├── test-wrap-mlr.R
    │   ├── test-anb-bn-operate.R
    │   ├── test-infer-anb-cpp.R
    │   ├── test-graph-internal.R
    │   ├── test-hc-fssj.R
    │   ├── test-learn-struct.R
    │   ├── test-bnc-dag-operate.R
    │   ├── test-infer-anb.R
    │   ├── test-data-statistics.R
    │   ├── test-learn-params-manb.R
    │   ├── test-learn-aode.r
    │   └── test-update.R
├── vignettes
    ├── pg_0001.png
    ├── pg_0002.png
    ├── pg_0003.png
    ├── pg_0004.png
    ├── header.tex
    ├── macros-paper.tex
    ├── macros-bnclassify.tex
    ├── algorithms.tex
    ├── macros-math.tex
    ├── includes
    │   └── abstract.rmd
    ├── macros-rjournal.tex
    ├── overview.Rmd
    └── methods.bib
├── CRAN-SUBMISSION
├── man
    ├── figures
    │   └── README-unnamed-chunk-2-1.png
    ├── is_aode.Rd
    ├── bnc_aode_bns.Rd
    ├── get_last.Rd
    ├── nb_dag.Rd
    ├── new_cache.Rd
    ├── get_but_last.Rd
    ├── get_null_safe.Rd
    ├── is.memoised.Rd
    ├── forget.Rd
    ├── get_ancestors.Rd
    ├── skip_testing.Rd
    ├── makeRLearner.bnc.Rd
    ├── bnc_aode.Rd
    ├── augment_kdb.Rd
    ├── graph_is_adjacent.Rd
    ├── dag.Rd
    ├── print.bnc_base.Rd
    ├── are_pdists.Rd
    ├── graph_get_adjacent.Rd
    ├── cmi_table.Rd
    ├── check_mlr_attached.Rd
    ├── graph_add_edges.Rd
    ├── skip_assert.Rd
    ├── compute_cll.Rd
    ├── learn_unprunned_tree.Rd
    ├── fast_equal.Rd
    ├── graph_connected_components.Rd
    ├── spode.Rd
    ├── are_factors.Rd
    ├── direct_tree.Rd
    ├── augment_ode.Rd
    ├── graph_union.Rd
    ├── make_cll.Rd
    ├── car.Rd
    ├── graph_subgraph.Rd
    ├── order_acyclic.Rd
    ├── cpt_vars_values.Rd
    ├── extract_ctgt.Rd
    ├── log_normalize.Rd
    ├── voting.Rd
    ├── augment_ode_arcs.Rd
    ├── augment_kdb_arcs.Rd
    ├── bootstrap_ss.Rd
    ├── compute_ll.Rd
    ├── graph_named_edge_matrix.Rd
    ├── predictLearner.bnc.Rd
    ├── complete_graph.Rd
    ├── trainLearner.bnc.Rd
    ├── map.Rd
    ├── local_ode_score_contrib.Rd
    ├── superparent_children.Rd
    ├── get_log_leaf_entries.Rd
    ├── make_cll_gradient.Rd
    ├── accuracy.Rd
    ├── identify_min_testing_depths.Rd
    ├── subset_by_colnames.Rd
    ├── direct_forest.Rd
    ├── identify_all_testing_depths.Rd
    ├── memoise_char.Rd
    ├── aode.Rd
    ├── as_mlr.Rd
    ├── bnc_dag.Rd
    ├── nb.Rd
    ├── grain_and_graph.Rd
    ├── cmi.Rd
    ├── bnc_bn.Rd
    ├── max_weight_forest.Rd
    ├── compute_wanbia_weights.Rd
    ├── plot.bnc_dag.Rd
    ├── predict.bnc_fit.Rd
    ├── loglik.Rd
    ├── bnc.Rd
    ├── inspect_bnc_bn.Rd
    ├── tan_chowliu.Rd
    ├── cv.Rd
    ├── inspect_bnc_dag.Rd
    └── greedy_wrapper.Rd
├── .gitignore
├── .Rbuildignore
├── meta
    ├── bnclassify-caret.R
    ├── download-statistics.R
    ├── graph-partial.r
    ├── submit.R
    ├── methods.csv
    └── check.r
├── R
    ├── frontend-dag.R
    ├── learn-aode.r
    ├── release.R
    ├── cv-multi.r
    ├── learn-params-awnb.R
    ├── frontend-graph.r
    ├── bncs.R
    ├── wrap-igraph.R
    ├── anb-internal.R
    ├── anb-bn.R
    ├── wrap-rpart.R
    ├── basic-probs.R
    ├── anb-bn-operate.R
    ├── predict.R
    ├── data-input.R
    ├── memoise.R
    ├── learn-hc.R
    ├── frontend-anb.r
    ├── learn-params-manb.R
    ├── basic-assert.R
    ├── wrap-mlr.R
    ├── anb-cpts.R
    ├── cv-update.R
    ├── basic-misc.R
    ├── learn-chowliu.R
    ├── learn-params-wanbia.R
    ├── wrap-gRain.R
    └── anb-dag.R
├── bnclassify.Rproj
├── inst
    ├── include
    │   ├── data.h
    │   ├── basic-misc.h
    │   └── multidim-array.h
    └── CITATION
├── DESCRIPTION
├── cran-comments.md
├── NAMESPACE
├── NEWS.md
├── Makefile
└── README.Rmd


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | PKG_CPPFLAGS = -I../inst/include


--------------------------------------------------------------------------------
/data/car.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/data/car.RData


--------------------------------------------------------------------------------
/data/voting.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/data/voting.RData


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(bnclassify)
3 | 
4 | test_check("bnclassify")


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | # Disable long types from C99 or CPP11 extensions
2 | PKG_CPPFLAGS = -I../inst/include


--------------------------------------------------------------------------------
/vignettes/pg_0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0001.png


--------------------------------------------------------------------------------
/vignettes/pg_0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0002.png


--------------------------------------------------------------------------------
/vignettes/pg_0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0003.png


--------------------------------------------------------------------------------
/vignettes/pg_0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/vignettes/pg_0004.png


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.4.8
2 | Date: 2024-03-13 10:57:19 UTC
3 | SHA: cd3bec7e91d5812d98d935b82da5f6e70d63554a
4 | 


--------------------------------------------------------------------------------
/vignettes/header.tex:
--------------------------------------------------------------------------------
1 | \input{macros-bnclassify}
2 | \input{macros-math}
3 | \input{macros-paper}
4 | \input{macros-rjournal}
5 | 


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bmihaljevic/bnclassify/HEAD/man/figures/README-unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/vignettes/macros-paper.tex:
--------------------------------------------------------------------------------
1 | \newcommand{\rtbl}[1]{Table~\ref{tbl:#1}}
2 | \newcommand{\req}[1]{Equation~\ref{eq:#1}} 
3 | \newcommand{\rsec}[1]{Section~\ref{sec:#1}}
4 | \newcommand{\rfig}[1]{Figure~\ref{fig:#1}} 
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .develop/
 2 | .design-and-manage/
 3 | .Rproj.user/
 4 | .Rproj.user
 5 | .Rhistory
 6 | .RData
 7 | vignettes/bnclassify_cache/
 8 | .theory/
 9 | .paper
10 | inst/doc/
11 | 
12 | 
13 | doc
14 | Meta
15 | /doc/
16 | /Meta/
17 | 


--------------------------------------------------------------------------------
/man/is_aode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bncs.R
 3 | \name{is_aode}
 4 | \alias{is_aode}
 5 | \title{Is it en AODE?}
 6 | \usage{
 7 | is_aode(x)
 8 | }
 9 | \description{
10 | Is it en AODE?
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | .gitignore
 4 | cran-comments.md
 5 | meta
 6 | Makefile
 7 | ^\.travis\.yml$
 8 | ^README\.Rmd$
 9 | ^appveyor\.yml$
10 | ^doc$
11 | ^Meta$
12 | ^CRAN-RELEASE$
13 | ^Makefile$
14 | ^Jenkinsfile$
15 | ^\.github$
16 | ^CRAN-SUBMISSION$
17 | 


--------------------------------------------------------------------------------
/src/multidim-array.cpp:
--------------------------------------------------------------------------------
1 | #include <Rcpp.h>  
2 | #include <multidim-array.h>
3 | 
4 | using namespace Rcpp;   
5 | 
6 | // [[Rcpp::export]]
7 | int entry_index(const std::vector<int> & indices, const std::vector<int> & dim_prod) {
8 |  return entry_index( indices.begin(), dim_prod); 
9 | }    


--------------------------------------------------------------------------------
/man/bnc_aode_bns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bncs.R
 3 | \name{bnc_aode_bns}
 4 | \alias{bnc_aode_bns}
 5 | \title{Fits an AODE model.}
 6 | \usage{
 7 | bnc_aode_bns(x, fit_models)
 8 | }
 9 | \description{
10 | Fits an AODE model.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/get_last.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{get_last}
 4 | \alias{get_last}
 5 | \title{Return last element of x.}
 6 | \usage{
 7 | get_last(x)
 8 | }
 9 | \description{
10 | If x is NULL returns NA not NULL
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/nb_dag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-anb.r
 3 | \name{nb_dag}
 4 | \alias{nb_dag}
 5 | \title{Returns a naive Bayes structure}
 6 | \usage{
 7 | nb_dag(class, features)
 8 | }
 9 | \description{
10 | Returns a naive Bayes structure
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/new_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/memoise.R
 3 | \name{new_cache}
 4 | \alias{new_cache}
 5 | \title{Make a new cache.}
 6 | \usage{
 7 | new_cache()
 8 | }
 9 | \description{
10 | Make a new cache.
11 | }
12 | \author{
13 | Hadley Wickham
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/get_but_last.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{get_but_last}
 4 | \alias{get_but_last}
 5 | \title{Return all but last element of x.}
 6 | \usage{
 7 | get_but_last(x)
 8 | }
 9 | \description{
10 | If x is NULL returns NA not NULL
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/get_null_safe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{get_null_safe}
 4 | \alias{get_null_safe}
 5 | \title{Get i-th element of x.}
 6 | \usage{
 7 | get_null_safe(x, i)
 8 | }
 9 | \description{
10 | If x is NULL returns NA not NULL
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/is.memoised.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/memoise.R
 3 | \name{is.memoised}
 4 | \alias{is.memoised}
 5 | \title{Is it memoized?}
 6 | \usage{
 7 | is.memoised(f)
 8 | }
 9 | \description{
10 | Is it memoized?
11 | }
12 | \author{
13 | Hadley Wickham
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/forget.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/memoise.R
 3 | \name{forget}
 4 | \alias{forget}
 5 | \title{Forget a memoized function.}
 6 | \usage{
 7 | forget(f)
 8 | }
 9 | \description{
10 | Forget a memoized function.
11 | }
12 | \author{
13 | Hadley Wickham
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/get_ancestors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/anb-families.R
 3 | \name{get_ancestors}
 4 | \alias{get_ancestors}
 5 | \title{Based on gRbase::ancestors()}
 6 | \usage{
 7 | get_ancestors(node, families)
 8 | }
 9 | \description{
10 | Based on gRbase::ancestors()
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/skip_testing.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-assert.R
 3 | \name{skip_testing}
 4 | \alias{skip_testing}
 5 | \title{Skip while testing to isolate errors}
 6 | \usage{
 7 | skip_testing()
 8 | }
 9 | \description{
10 | Skip while testing to isolate errors
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/meta/bnclassify-caret.R:
--------------------------------------------------------------------------------
 1 | library(caret)
 2 | library(bnclassify)
 3 | data("voting")
 4 | fitControl <- trainControl( method = "repeatedcv", number = 10, repeats = 2)
 5 | V <- na.omit(voting)
 6 | data(car)
 7 | set.seed(0)
 8 | fit <- train(car[ , -ncol(car)], car[, 'class'],  method = "nbDiscrete", 
 9 |              trControl = fitControl )
10 | fit  
11 | 


--------------------------------------------------------------------------------
/man/makeRLearner.bnc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-mlr.R
 3 | \name{makeRLearner.bnc}
 4 | \alias{makeRLearner.bnc}
 5 | \title{makeRLearner. Auxiliary mlr function.}
 6 | \usage{
 7 | makeRLearner.bnc()
 8 | }
 9 | \description{
10 | makeRLearner. Auxiliary mlr function.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/bnc_aode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bncs.R
 3 | \name{bnc_aode}
 4 | \alias{bnc_aode}
 5 | \title{Returns a \code{c("bnc_aode", "bnc")} object.}
 6 | \usage{
 7 | bnc_aode(models, class_var, features)
 8 | }
 9 | \description{
10 | Returns a \code{c("bnc_aode", "bnc")} object.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/augment_kdb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-hc-operators.R
 3 | \name{augment_kdb}
 4 | \alias{augment_kdb}
 5 | \title{Arcs that do not invalidate the k-DB structure}
 6 | \usage{
 7 | augment_kdb(kdbk)
 8 | }
 9 | \description{
10 | Arcs that do not invalidate the k-DB structure
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/graph_is_adjacent.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_is_adjacent}
 4 | \alias{graph_is_adjacent}
 5 | \title{Checks whether nodes are adjacent}
 6 | \usage{
 7 | graph_is_adjacent(from, to, g)
 8 | }
 9 | \description{
10 | Checks whether nodes are adjacent
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/dag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/anb-dag.R
 3 | \name{dag}
 4 | \alias{dag}
 5 | \title{Get underlying graph. This should be exported.}
 6 | \usage{
 7 | dag(x)
 8 | }
 9 | \arguments{
10 | \item{x}{the bnc object}
11 | }
12 | \description{
13 | Get underlying graph. This should be exported.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/print.bnc_base.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bnc-dag-operate.R
 3 | \name{print.bnc_base}
 4 | \alias{print.bnc_base}
 5 | \title{Print basic information about a classifier.}
 6 | \usage{
 7 | \method{print}{bnc_base}(x, ...)
 8 | }
 9 | \description{
10 | Print basic information about a classifier.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/are_pdists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-probs.R
 3 | \name{are_pdists}
 4 | \alias{are_pdists}
 5 | \title{Returns \code{TRUE} is \code{x} is a valid probability distribution.}
 6 | \usage{
 7 | are_pdists(x)
 8 | }
 9 | \description{
10 | Returns \code{TRUE} is \code{x} is a valid probability distribution.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/graph_get_adjacent.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_get_adjacent}
 4 | \alias{graph_get_adjacent}
 5 | \title{Finds adjacent nodes. Has not been tested much}
 6 | \usage{
 7 | graph_get_adjacent(node, g)
 8 | }
 9 | \description{
10 | Finds adjacent nodes. Has not been tested much
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/R/frontend-dag.R:
--------------------------------------------------------------------------------
 1 | # TODO: merge with frontend anb
 2 | 
 3 | # Adds arcs from parents to node
 4 | condition_on <- function(parents, nodes, x) {
 5 | #   Replicate parents for each node 
 6 |   wparents <- rep(parents, length(nodes))
 7 |   wnodes <- rep(nodes, each = length(parents))
 8 | #   Add edges
 9 |   g <- add_edges(wparents, wnodes, x) 
10 |   if (!skip_testing()) stopifnot(is_dag_graph(g))
11 |   g
12 | }


--------------------------------------------------------------------------------
/man/cmi_table.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-statistics.R
 3 | \name{cmi_table}
 4 | \alias{cmi_table}
 5 | \title{Returns the conditional mutual information three variables.}
 6 | \usage{
 7 | cmi_table(xyz_freqs, unit = "log")
 8 | }
 9 | \description{
10 | Returns the conditional mutual information three variables.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/check_mlr_attached.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-mlr.R
 3 | \name{check_mlr_attached}
 4 | \alias{check_mlr_attached}
 5 | \title{Checks if mlr attached.}
 6 | \usage{
 7 | check_mlr_attached()
 8 | }
 9 | \description{
10 | mlr must be attached because otherwise  `getMlrOptions()` in `makeLearner` will not be found.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/graph_add_edges.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_add_edges}
 4 | \alias{graph_add_edges}
 5 | \title{Add edges
 6 | Does not allow edges among adjacent nodes}
 7 | \usage{
 8 | graph_add_edges(from, to, g)
 9 | }
10 | \description{
11 | Add edges
12 | Does not allow edges among adjacent nodes
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/man/skip_assert.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-assert.R
 3 | \name{skip_assert}
 4 | \alias{skip_assert}
 5 | \title{Whether to do checks or not. Set TRUE to speed up debugging or building.}
 6 | \usage{
 7 | skip_assert()
 8 | }
 9 | \description{
10 | Whether to do checks or not. Set TRUE to speed up debugging or building.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-anb-internal.R:
--------------------------------------------------------------------------------
 1 | context("anb internal") 
 2 | 
 3 | test_that("nb_dag", {
 4 |   # Nominal
 5 |   d <- nb_dag('f', letters[1:5])
 6 |   expect_equal(graph_num_arcs(d), 5)
 7 |   expect_equal(d$edgemode, "directed")
 8 |   # No features
 9 |   d <- nb_dag('f', NULL)
10 |   expect_equal(graph_num_arcs(d), 0)
11 |   expect_equal(graph_num_nodes(d), 1) 
12 |   expect_equal(d$edgemode, "directed")
13 | })
14 | 


--------------------------------------------------------------------------------
/man/compute_cll.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/infer.R
 3 | \name{compute_cll}
 4 | \alias{compute_cll}
 5 | \title{Computes the conditional log-likelihood of the model on the provided data.}
 6 | \usage{
 7 | compute_cll(x, dataset)
 8 | }
 9 | \description{
10 | Computes the conditional log-likelihood of the model on the provided data.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/learn_unprunned_tree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-rpart.R
 3 | \name{learn_unprunned_tree}
 4 | \alias{learn_unprunned_tree}
 5 | \title{Learns a unpruned \code{rpart} recursive partition.}
 6 | \usage{
 7 | learn_unprunned_tree(dataset, class)
 8 | }
 9 | \description{
10 | Learns a unpruned \code{rpart} recursive partition.
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-predict.R:
--------------------------------------------------------------------------------
 1 | context("predict")
 2 | 
 3 | test_that("Maximum a posteriori", {   
 4 |   # gRain implementation change
 5 |   # skip_if_not_installed('gRain')
 6 |   # h <- nbvote()
 7 |   # pred <- predict(h, voting, prob = TRUE)
 8 |   # p <- map(pred)
 9 |   # accu <- sum(p == voting$Class) / nrow(voting)
10 |   # expect_equal(accu, 0.9034483, tolerance = 1e-7) 
11 |   # gRain implementation change
12 | })


--------------------------------------------------------------------------------
/bnclassify.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 


--------------------------------------------------------------------------------
/man/fast_equal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{fast_equal}
 4 | \alias{fast_equal}
 5 | \title{Compares all elements in a to b}
 6 | \usage{
 7 | fast_equal(a, b)
 8 | }
 9 | \arguments{
10 | \item{b}{numeric. Must be length one but no check is performed.}
11 | }
12 | \description{
13 | Compares all elements in a to b
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/graph_connected_components.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_connected_components}
 4 | \alias{graph_connected_components}
 5 | \title{connected_components}
 6 | \usage{
 7 | graph_connected_components(g)
 8 | }
 9 | \arguments{
10 | \item{g}{graph_internal.}
11 | }
12 | \description{
13 | connected_components
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/spode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-aode.r
 3 | \name{spode}
 4 | \alias{spode}
 5 | \title{Returns a Superparent one-dependence estimator.}
 6 | \usage{
 7 | spode(sp, features, class)
 8 | }
 9 | \arguments{
10 | \item{sp}{character The superparent.}
11 | }
12 | \description{
13 | Returns a Superparent one-dependence estimator.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/are_factors.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-assert.R
 3 | \name{are_factors}
 4 | \alias{are_factors}
 5 | \title{Checks if all columns in a data frame are factors.}
 6 | \usage{
 7 | are_factors(x)
 8 | }
 9 | \arguments{
10 | \item{x}{a \code{data.frame}}
11 | }
12 | \description{
13 | Checks if all columns in a data frame are factors.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/direct_tree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-anb.r
 3 | \name{direct_tree}
 4 | \alias{direct_tree}
 5 | \title{Direct an undirected graph.}
 6 | \usage{
 7 | direct_tree(g, root = NULL)
 8 | }
 9 | \value{
10 | A graph. The directed tree.
11 | }
12 | \description{
13 | The graph must be connected and the function produces a directed tree.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/augment_ode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-hc-operators.R
 3 | \name{augment_ode}
 4 | \alias{augment_ode}
 5 | \title{Arcs that do not invalidate the tree-like structure}
 6 | \usage{
 7 | augment_ode(bnc_dag, ...)
 8 | }
 9 | \arguments{
10 | \item{...}{Ignored.}
11 | }
12 | \description{
13 | Arcs that do not invalidate the tree-like structure
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/graph_union.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-anb.r
 3 | \name{graph_union}
 4 | \alias{graph_union}
 5 | \title{Merges multiple disjoint graphs into a single one.}
 6 | \usage{
 7 | graph_union(g)
 8 | }
 9 | \arguments{
10 | \item{g}{A graph}
11 | }
12 | \value{
13 | A graph
14 | }
15 | \description{
16 | Merges multiple disjoint graphs into a single one.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/make_cll.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-params-wanbia.R
 3 | \name{make_cll}
 4 | \alias{make_cll}
 5 | \title{Returns a function to compute negative conditional log-likelihood given feature weights}
 6 | \usage{
 7 | make_cll(class_var, dataset)
 8 | }
 9 | \description{
10 | Returns a function to compute negative conditional log-likelihood given feature weights
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/man/car.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R
 3 | \docType{data}
 4 | \name{car}
 5 | \alias{car}
 6 | \title{Car Evaluation Data Set.}
 7 | \format{
 8 | A \code{data.frame} with 7 columns and 1728 rows.
 9 | }
10 | \source{
11 | \url{https://goo.gl/GTXrCz}
12 | }
13 | \description{
14 | Data set from the UCI repository:
15 | \url{https://archive.ics.uci.edu/ml/datasets/Car+Evaluation}.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/graph_subgraph.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_subgraph}
 4 | \alias{graph_subgraph}
 5 | \title{Subgraph.  
 6 | Only for a directed graph?}
 7 | \usage{
 8 | graph_subgraph(nodes, g)
 9 | }
10 | \arguments{
11 | \item{nodes}{character}
12 | 
13 | \item{g}{graph_internal.}
14 | }
15 | \description{
16 | Subgraph.  
17 | Only for a directed graph?
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/order_acyclic.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/anb-families.R
 3 | \name{order_acyclic}
 4 | \alias{order_acyclic}
 5 | \title{Provide an acyclic ordering (i.e., a topological sort).}
 6 | \usage{
 7 | order_acyclic(families)
 8 | }
 9 | \description{
10 | Provide an acyclic ordering (i.e., a topological sort).
11 | }
12 | \references{
13 | Beng-Jensen and Gutin, 2007, page 14.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/cpt_vars_values.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/anb-cpts.R
 3 | \name{cpt_vars_values}
 4 | \alias{cpt_vars_values}
 5 | \title{Get just form first dimension in their own cpt, not checking for consistency
 6 | in others.}
 7 | \usage{
 8 | cpt_vars_values(cpts)
 9 | }
10 | \description{
11 | Get just form first dimension in their own cpt, not checking for consistency
12 | in others.
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/man/extract_ctgt.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-statistics.R
 3 | \name{extract_ctgt}
 4 | \alias{extract_ctgt}
 5 | \title{Returns a contingency table over the variables.}
 6 | \usage{
 7 | extract_ctgt(cols, dataset)
 8 | }
 9 | \description{
10 | Each variable may be a character vector.
11 | }
12 | \details{
13 | Any rows with incomplete observations of the variables are ignored.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/log_normalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-probs.R
 3 | \name{log_normalize}
 4 | \alias{log_normalize}
 5 | \title{Normalize log probabilities.}
 6 | \usage{
 7 | log_normalize(lp)
 8 | }
 9 | \description{
10 | Uses the log-sum-exp trick.
11 | }
12 | \references{
13 | Murphy KP (2012). \emph{Machine learning: a probabilistic
14 |   perspective}. The MIT Press. pp. 86-87.
15 | }
16 | \keyword{internal}
17 | 


--------------------------------------------------------------------------------
/man/voting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R
 3 | \docType{data}
 4 | \name{voting}
 5 | \alias{voting}
 6 | \title{Congress Voting Data Set.}
 7 | \format{
 8 | A \code{data.frame} with 17 columns and 435 rows.
 9 | }
10 | \source{
11 | \url{https://goo.gl/GTXrCz}
12 | }
13 | \description{
14 | Data set from the UCI repository
15 | \url{https://archive.ics.uci.edu/ml/datasets/Congressional+Voting+Records}.
16 | }
17 | 


--------------------------------------------------------------------------------
/man/augment_ode_arcs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-hc-operators.R
 3 | \name{augment_ode_arcs}
 4 | \alias{augment_ode_arcs}
 5 | \title{Returns augmenting arcs that do not invalidate the ODE.}
 6 | \usage{
 7 | augment_ode_arcs(bnc_dag)
 8 | }
 9 | \value{
10 | a character matrix. NULL if no arcs can be added.
11 | }
12 | \description{
13 | Returns augmenting arcs that do not invalidate the ODE.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/augment_kdb_arcs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-hc-operators.R
 3 | \name{augment_kdb_arcs}
 4 | \alias{augment_kdb_arcs}
 5 | \title{Returns augmenting arcs that do not invalidate the k-DB.}
 6 | \usage{
 7 | augment_kdb_arcs(bnc_dag, k)
 8 | }
 9 | \value{
10 | a character matrix. NULL if no arcs can be added.
11 | }
12 | \description{
13 | Returns augmenting arcs that do not invalidate the k-DB.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/bootstrap_ss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{bootstrap_ss}
 4 | \alias{bootstrap_ss}
 5 | \title{Return a bootstrap sub-sample.}
 6 | \usage{
 7 | bootstrap_ss(dataset, proportion)
 8 | }
 9 | \arguments{
10 | \item{dataset}{a \code{data.frame}}
11 | 
12 | \item{proportion}{numeric given as fraction of \code{dataset} size}
13 | }
14 | \description{
15 | Return a bootstrap sub-sample.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/compute_ll.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/infer.R
 3 | \name{compute_ll}
 4 | \alias{compute_ll}
 5 | \title{Computes log-likelihood of the model on the provided data.}
 6 | \usage{
 7 | compute_ll(x, dataset)
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{\link{bnc_bn}} object.}
11 | 
12 | \item{dataset}{A data frame.}
13 | }
14 | \description{
15 | Computes log-likelihood of the model on the provided data.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/graph_named_edge_matrix.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/graph-internal.r
 3 | \name{graph_named_edge_matrix}
 4 | \alias{graph_named_edge_matrix}
 5 | \title{Returns an edge matrix with node names (instead of node indices).}
 6 | \usage{
 7 | graph_named_edge_matrix(x)
 8 | }
 9 | \value{
10 | A character matrix.
11 | }
12 | \description{
13 | Returns an edge matrix with node names (instead of node indices).
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/predictLearner.bnc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-mlr.R
 3 | \name{predictLearner.bnc}
 4 | \alias{predictLearner.bnc}
 5 | \title{predictLearner. Auxiliary mlr function.}
 6 | \usage{
 7 | predictLearner.bnc(.learner, .model, .newdata, ...)
 8 | }
 9 | \arguments{
10 | \item{.learner, .model, .newdata}{Internal.}
11 | 
12 | \item{...}{Internal.}
13 | }
14 | \description{
15 | predictLearner. Auxiliary mlr function.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/inst/include/data.h:
--------------------------------------------------------------------------------
 1 | #ifndef bnclassify_data_H
 2 | #define bnclassify_data_H
 3 | 
 4 | #include <Rcpp.h> 
 5 | 
 6 | bool hasna(const Rcpp::DataFrame & newdata);
 7 | bool hasna_features(const Rcpp::DataFrame & newdata, const SEXP & features);
 8 | // TODO: This should be called at instance level, not data frame! This way, if the data set is complete, it goes through it a couple of times.
 9 | Rcpp::DataFrame trim_dataset_cpp(const Rcpp::DataFrame & dataset, const Rcpp::CharacterVector & features);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/man/complete_graph.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-graph.r
 3 | \name{complete_graph}
 4 | \alias{complete_graph}
 5 | \title{Returns a complete unweighted graph with the given nodes.}
 6 | \usage{
 7 | complete_graph(nodes)
 8 | }
 9 | \arguments{
10 | \item{nodes}{A character vector.}
11 | }
12 | \value{
13 | a \code{graphNEL} object.
14 | }
15 | \description{
16 | Returns a complete unweighted graph with the given nodes.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/trainLearner.bnc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-mlr.R
 3 | \name{trainLearner.bnc}
 4 | \alias{trainLearner.bnc}
 5 | \title{trainLearner. Auxiliary mlr function.}
 6 | \usage{
 7 | trainLearner.bnc(.learner, .task, .subset, .weights, ...)
 8 | }
 9 | \arguments{
10 | \item{.learner, .task, .subset, .weights}{Internal.}
11 | 
12 | \item{...}{Internal.}
13 | }
14 | \description{
15 | trainLearner. Auxiliary mlr function.
16 | }
17 | \keyword{internal}
18 | 


--------------------------------------------------------------------------------
/man/map.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{map}
 4 | \alias{map}
 5 | \title{Assigns instances to the most likely class.}
 6 | \usage{
 7 | map(pred)
 8 | }
 9 | \arguments{
10 | \item{pred}{A numeric matrix. Each row corresponds to class posterior 
11 | probabilities for an instance.}
12 | }
13 | \value{
14 | a factor with the same levels as the class variable.
15 | }
16 | \description{
17 | Ties are resolved randomly.
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/local_ode_score_contrib.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-chowliu.R
 3 | \name{local_ode_score_contrib}
 4 | \alias{local_ode_score_contrib}
 5 | \title{Returns pairwise component of ODE (penalized) log-likelihood scores. 
 6 | In natural logarithms.}
 7 | \usage{
 8 | local_ode_score_contrib(x, y, class, dataset)
 9 | }
10 | \description{
11 | Returns pairwise component of ODE (penalized) log-likelihood scores. 
12 | In natural logarithms.
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/meta/download-statistics.R:
--------------------------------------------------------------------------------
 1 | df <- cranlogs::cran_downloads('bnclassify', from = "2014-01-01", to = "2018-01-01")
 2 | sum(df$count) 
 3 | 
 4 | 
 5 | df <- cranlogs::cran_downloads('bnclassify', from = "2014-01-01", to = "2019-09-25") 
 6 | sum(df$count) 
 7 | 
 8 | # install.packages('adjustedcranlogs')
 9 | library(adjustedcranlogs)
10 | df <- adjustedcranlogs::adj_cran_downloads('bnclassify', from = "2014-01-01", to = "2019-09-25")
11 | head(df)
12 | tail(df)
13 | plot(df$total_downloads)
14 | plot(df$adjusted_total_downloads)
15 | 


--------------------------------------------------------------------------------
/man/superparent_children.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-hc-operators.R
 3 | \name{superparent_children}
 4 | \alias{superparent_children}
 5 | \title{Return nodes which can be superparents along with their possible children.}
 6 | \usage{
 7 | superparent_children(bnc_dag)
 8 | }
 9 | \value{
10 | list of \code{search_state}. NULL if no orphans
11 | }
12 | \description{
13 | Return nodes which can be superparents along with their possible children.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/R/learn-aode.r:
--------------------------------------------------------------------------------
 1 | #' Returns a Superparent one-dependence estimator. 
 2 | #' 
 3 | #' @param sp character The superparent.
 4 | #' @keywords internal
 5 | spode <- function(sp, features, class) {
 6 |   stopifnot(length(sp) == 1)
 7 |   stopifnot(length(class) == 1)
 8 |   stopifnot(class != sp)
 9 |   stopifnot(!class %in% features)
10 |   features <- setdiff(features, sp)
11 |   features_graph <- nb_dag(class = sp, features = features)
12 |   dag <- superimpose_node(node = class, dag = features_graph) 
13 |   bnc_dag(dag = dag, class = class)
14 | } 


--------------------------------------------------------------------------------
/man/get_log_leaf_entries.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-params-wanbia.R
 3 | \name{get_log_leaf_entries}
 4 | \alias{get_log_leaf_entries}
 5 | \title{Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing}
 6 | \usage{
 7 | get_log_leaf_entries(cpt, x)
 8 | }
 9 | \arguments{
10 | \item{x}{a vector of values}
11 | }
12 | \description{
13 | Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/make_cll_gradient.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-params-wanbia.R
 3 | \name{make_cll_gradient}
 4 | \alias{make_cll_gradient}
 5 | \title{Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights}
 6 | \usage{
 7 | make_cll_gradient(class_var, dataset)
 8 | }
 9 | \description{
10 | Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights
11 | }
12 | \keyword{internal}
13 | 


--------------------------------------------------------------------------------
/meta/graph-partial.r:
--------------------------------------------------------------------------------
 1 | # 
 2 | # # I could currently use undirected maybe
 3 | # 
 4 | # 
 5 | # # Partially directed graph with D and U edges
 6 | # ## Data
 7 | # - The to - from matrix
 8 | # - The edge_mode vector of same length as matrix rows
 9 | # 
10 | # ## Funcs
11 | # - add edge. does not add node
12 | # - add arc. does not add node 
13 | # 
14 | # # Undirected
15 | # 
16 | # ## Funs
17 | # - in_graph. used for discard reversed, for example, or similar.
18 | # - compare_graphs
19 | # 
20 | # - subgraph.
21 | # - direct tree. This is a point here.


--------------------------------------------------------------------------------
/src/graph-internal.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;   
 3 | 
 4 | // [[Rcpp::export]]
 5 | LogicalVector graph_node_parents_inds(CharacterMatrix edges, CharacterVector node) { 
 6 |   if (node.size() != 1) stop("Must be a single element.");
 7 |   // TODO: access by column name!!! 
 8 |   const CharacterMatrix::Column & to = edges(_, 1);
 9 |   LogicalVector ind = to == node;   
10 |   // const CharacterMatrix::Column & from = edges(_, 0);
11 |   // return from(ind); 
12 |   // unname(edges[ind, 'from'])
13 |   return ind;
14 | }
15 | 
16 | 


--------------------------------------------------------------------------------
/man/accuracy.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{accuracy}
 4 | \alias{accuracy}
 5 | \title{Compute predictive accuracy.}
 6 | \usage{
 7 | accuracy(x, y)
 8 | }
 9 | \arguments{
10 | \item{x}{A vector of predicted labels.}
11 | 
12 | \item{y}{A vector of true labels.}
13 | }
14 | \description{
15 | Compute predictive accuracy.
16 | }
17 | \examples{
18 |  
19 | data(car)
20 | nb <- bnc('nb', 'class', car, smooth = 1)
21 | p <- predict(nb, car)
22 | accuracy(p, car$class)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/identify_min_testing_depths.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-rpart.R
 3 | \name{identify_min_testing_depths}
 4 | \alias{identify_min_testing_depths}
 5 | \title{Identifies the lowest (closest to root) depths at which the features of a 
 6 | classification tree are tested.}
 7 | \usage{
 8 | identify_min_testing_depths(tree)
 9 | }
10 | \description{
11 | Identifies the lowest (closest to root) depths at which the features of a 
12 | classification tree are tested.
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/man/subset_by_colnames.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/basic-misc.R
 3 | \name{subset_by_colnames}
 4 | \alias{subset_by_colnames}
 5 | \title{Subset a 2D structure by a vector of column names.}
 6 | \usage{
 7 | subset_by_colnames(colnames, data)
 8 | }
 9 | \arguments{
10 | \item{colnames}{a character vector}
11 | 
12 | \item{data}{a matrix or data frame}
13 | }
14 | \description{
15 | Not all colnames are necessarily in the columns of data; in that case this
16 | returns NA.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/vignettes/macros-bnclassify.tex:
--------------------------------------------------------------------------------
1 | \usepackage{subcaption} 
2 | \def\tecvin/{``methods'' vignette}
3 | \def\tecvindesc/{details on the underlying methods and documents implementation specifics, especially where they differ from or are undocumented in the original paper} 
4 | \def\invin/{``usage'' vignette}  
5 | \def\invindesc/{detailed usage examples and shows how to combine the functions}   
6 | \def\overvindesc/{an overview of the package and background on the implemented methods}
7 | \def\pkghelp/{concise overview of the functionalities, with pointers to relevant functions and their documentation}


--------------------------------------------------------------------------------
/R/release.R:
--------------------------------------------------------------------------------
 1 | release_questions <- function() {
 2 |   c(
 3 |     "Have you checked all TODOs in code?",
 4 |     "Have you removed all commmented-out, not used code? Also in tests.",
 5 |     "Have you checked on Travis?",
 6 |     "Does cran_comments.md show the notes from R-devel?",
 7 |     "Does cran_comments.md specify the correct versions of R where tested?",
 8 |     "Is dont_run set for long-running examples?",
 9 |     "Did you build vignettes?",
10 |     "Have you checked spelling in vignettes and help files?",
11 |     "Did you update news? Look at the commits since last version"
12 |   )
13 | }


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(
 2 |   bibtype  = "Article",
 3 |   title    = "bnclassify: Learning Bayesian Network Classifiers",
 4 |   author   = "Mihaljevi\'{c}, Bojan", "Bielza, Concha", "Larranaga, Pedro",
 5 |   header = "To cite bnclassify in publications, please use",
 6 |   journal  = "The R Journal",
 7 |   year     = 2018,
 8 |   volume   = 10,
 9 |   number   = 2,
10 |   pages    = "455--468",
11 |   doi      = "10.32614/RJ-2018-073",
12 |   textVersion = "Mihaljevic, Bojan, Pedro Larranaga, and Concha Bielza. 2018. bnclassify: Learning Bayesian Network Classifiers. The R Journal 10 (2): 455-468"
13 | )


--------------------------------------------------------------------------------
/man/direct_forest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-anb.r
 3 | \name{direct_forest}
 4 | \alias{direct_forest}
 5 | \title{Direct an undirected graph.}
 6 | \usage{
 7 | direct_forest(g, root = NULL)
 8 | }
 9 | \arguments{
10 | \item{g}{An undirected graph.}
11 | 
12 | \item{root}{A character. Optional tree root.}
13 | }
14 | \value{
15 | A directed graph
16 | }
17 | \description{
18 | Starting from a \code{root} not, directs all arcs away from it and applies 
19 | the same, recursively to its children and descendants. Produces a directed
20 | forest.
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/identify_all_testing_depths.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-rpart.R
 3 | \name{identify_all_testing_depths}
 4 | \alias{identify_all_testing_depths}
 5 | \title{Identifies all depths at which the features of a classification tree are
 6 | tested.}
 7 | \usage{
 8 | identify_all_testing_depths(tree)
 9 | }
10 | \arguments{
11 | \item{tree}{an \code{rpart} object}
12 | }
13 | \value{
14 | a numeric vector. The names are the names of the variables.
15 | }
16 | \description{
17 | Identifies all depths at which the features of a classification tree are
18 | tested.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/memoise_char.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/memoise.R
 3 | \name{memoise_char}
 4 | \alias{memoise_char}
 5 | \title{Memoise a function.}
 6 | \usage{
 7 | memoise_char(f)
 8 | }
 9 | \arguments{
10 | \item{f}{a function}
11 | }
12 | \description{
13 | Based on Hadley Wickham's memoise package. Assumes that argument to f is a
14 | character vector.
15 | }
16 | \details{
17 | This function is a slightly modified version of
18 | \code{memoise} to avoid the use of digest. The rest functions
19 | copied as is from memoise.
20 | }
21 | \author{
22 | Hadley Wickham, Bojan Mihaljevic
23 | }
24 | \keyword{internal}
25 | 


--------------------------------------------------------------------------------
/man/aode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-struct.R
 3 | \name{aode}
 4 | \alias{aode}
 5 | \title{Learn an AODE ensemble.}
 6 | \usage{
 7 | aode(class, dataset, features = NULL)
 8 | }
 9 | \arguments{
10 | \item{class}{A character. Name of the class variable.}
11 | 
12 | \item{dataset}{The data frame from which to learn the classifier.}
13 | 
14 | \item{features}{A character vector. The names of the features. This argument
15 | is ignored if \code{dataset} is provided.}
16 | }
17 | \value{
18 | A \code{bnc_aode} or a \code{bnc_dag} (if returning a naive Bayes)
19 | }
20 | \description{
21 | If there is a single predictor then returns a naive Bayes.
22 | }
23 | 


--------------------------------------------------------------------------------
/R/cv-multi.r:
--------------------------------------------------------------------------------
 1 | # Check the class is common to all dags
 2 | get_common_class <- function(x) {
 3 |   class <- unique(vapply(x, class_var, FUN.VALUE = character(1)))
 4 |   # Check it is unique
 5 |   assertthat::is.string(class)
 6 |   class
 7 | }   
 8 | ensure_list <- function(x, type = NULL) {
 9 |   if (!is_just(x, "list")) {
10 |     x <- list(x)
11 |   }
12 |   if (!is.null(type)) {
13 |     all_type <- all(vapply(x, inherits, type, FUN.VALUE = logical(1)))
14 |     if (!all_type) stop(paste0("All elements must inherit from ", type))
15 |   }
16 |   x
17 | }
18 | # Unnamed so that it would pass no names to objects created by itearting on it
19 | ensure_multi_list <- function(x, type = NULL) {
20 |   unname(ensure_list(x, type = type))
21 | }


--------------------------------------------------------------------------------
/vignettes/algorithms.tex:
--------------------------------------------------------------------------------
 1 | \begin{table}[ht]
 2 | \centering
 3 | \caption{Implemented structure learning algorithms.} 
 4 | \label{tbl:algorithms}
 5 | \begin{tabular}{lllll}
 6 |   \hline
 7 | Structure & Search algorithm & Score & Feature selection & Function \\ 
 8 |   \hline
 9 | NB & - & - & - & \code{nb} \\ 
10 |   TAN/FAN & CL-ODE & log-lik, AIC, BIC & - & \code{tan\_cl} \\ 
11 |   TAN & TAN-HC & accuracy & - & \code{tan\_hc} \\ 
12 |   TAN & TAN-HCSP & accuracy & - & \code{tan\_hcsp} \\ 
13 |   SNB  & FSSJ & accuracy & forward & \code{fssj} \\ 
14 |   SNB  & BSEJ & accuracy & backward & \code{bsej} \\ 
15 |   AODE & - & - & - & \code{aode} \\ 
16 |   kDB & kDB & accuracy & - & \code{kdb} \\ 
17 |    \hline
18 | \end{tabular}
19 | \end{table}
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dag.R:
--------------------------------------------------------------------------------
 1 | context("Basic DAG")
 2 | 
 3 | test_that("Condition on", {
 4 |   skip_if_not_installed('gRbase') 
 5 |   suppressWarnings(RNGversion("3.5.0"))
 6 |   set.seed(0)
 7 |   g <- gRbase::random_dag(letters[1:10], maxpar = 15)
 8 |   g <- graphNEL2_graph_internal(g)
 9 |   d <- condition_on(parents = c('i', 'g', 'e'), nodes = 'a', g)
10 |   expect_equal(graph_num_arcs(d), 5)
11 |   d <- graph_internal2graph_NEL(d)
12 |   expect_true(is_perm(gRbase::parents('a', d), c('i', 'g', 'e')))
13 | })
14 | 
15 | test_that("families", {
16 |   a <- nbcar()
17 |   l <- graphNEL_parents(a$.dag)
18 |   expect_is(l, 'list')
19 |   expect_equal(length(l), 7)
20 |   expect_equal(l$buying, 'class')
21 |   expect_equal(l$class, character())
22 | })
23 | 
24 | 


--------------------------------------------------------------------------------
/man/as_mlr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wrap-mlr.R
 3 | \name{as_mlr}
 4 | \alias{as_mlr}
 5 | \title{Convert to \code{mlr}.}
 6 | \usage{
 7 | as_mlr(x, dag, id = "1")
 8 | }
 9 | \arguments{
10 | \item{x}{A \code{\link{bnc_bn}} object.}
11 | 
12 | \item{dag}{A logical. Whether to learn structure on each training subsample. 
13 | Parameters are always learned.}
14 | 
15 | \item{id}{A character.}
16 | }
17 | \description{
18 | Convert a \code{\link{bnc_bn}} to a \code{\link[mlr]{Learner}}
19 | object.
20 | }
21 | \examples{
22 | data(car)
23 | nb <- bnc('nb', 'class', car, smooth = 1)
24 | \dontrun{library(mlr)}
25 | \dontrun{nb_mlr <- as_mlr(nb, dag = FALSE, id = "ode_cl_aic")}
26 | \dontrun{nb_mlr}
27 | }
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-basic-assert.R:
--------------------------------------------------------------------------------
 1 | context("Assert")
 2 | 
 3 | test_that("non-empty complete Nominal ", {
 4 |   stopifnot(is_non_empty_complete(letters))
 5 | })
 6 | test_that("non-empty complete empty", {
 7 |   expect_error(check_non_empty_complete(NULL), "complete")
 8 |   expect_error(check_non_empty_complete(character()), "complete")
 9 | })  
10 | test_that("non-empty complete 1 NA", {
11 |     expect_error(check_non_empty_complete(c(letters, NA)), "complete")
12 |   expect_error(check_non_empty_complete(NA), "complete")
13 |   expect_error(check_non_empty_complete(rep(NA, 1e2)), "complete")
14 | })
15 | test_that("is just list",{
16 |   expect_true(is_just(list(), "list"))
17 |   t <- structure(list(), class="test")
18 |   expect_true(!is_just(t, "list"))
19 | })


--------------------------------------------------------------------------------
/man/bnc_dag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R
 3 | \name{bnc_dag}
 4 | \alias{bnc_dag}
 5 | \title{Bayesian network classifier structure.}
 6 | \description{
 7 | A Bayesian network classifier structure, returned by functions such as 
 8 | \code{\link{nb}} and \code{\link{tan_cl}}. You can plot its structure (with 
 9 | \code{\link[=plot.bnc_dag]{plot}}), print a summary to console 
10 | (\code{\link[=print.bnc_base]{print}}), inspect it with functions documented
11 | in \code{\link{inspect_bnc_dag}}, and convert it to a graph object with 
12 | \code{\link{grain_and_graph}}.
13 | }
14 | \examples{
15 | data(car)
16 | nb <- tan_cl('class', car)   
17 | nb
18 | \dontrun{plot(nb)}
19 | narcs(nb)
20 | }
21 | 


--------------------------------------------------------------------------------
/meta/submit.R:
--------------------------------------------------------------------------------
 1 | ## ========================================= ## 
 2 | ## Submit                                    ## 
 3 | ## ========================================= ## 
 4 | 
 5 | # must build vignettes first
 6 | devtools::build_vignettes()
 7 | # Must use the build args for the check to pass on Windows   
 8 | build_args <- c('--resave-data','--compact-vignettes=both')
 9 | devtools::build(args = build_args )
10 | 
11 | check_args <-  '--as-cran'
12 | # check_version = TRUE # seems this argument of devtools::check has been removed
13 | devtools::check(args = check_args , cran = TRUE,  build_args = build_args ) 
14 | 
15 | # Full, slow release with questions
16 | # devtools::release(args = build_args) 
17 | # Quick submission
18 | # devtools::submit_cran(args = build_args)


--------------------------------------------------------------------------------
/meta/methods.csv:
--------------------------------------------------------------------------------
 1 | Structure,Structure learning,,,,,Parameter learning,,,Other
 2 | Structure,Search algorithm,Score,Feature selection,Function,Paper,Method Name,Paper,Feature selection,
 3 | NB,-,-,-,nb,Minsky1961,MANB,Dash2002,Bayesian,Plotting*
 4 | TAN/FAN,CL-ODE,"log-lik, AIC, BIC",-,tan_cl,Friedman1997,AWNB,Hall2007,Weighting,cross-validation
 5 | TAN,TAN-HC,accuracy,-,tan_hc,Keogh2002,WANBIA,Zaidi2013,Weighting,"is_ode(), is_nb(), …"
 6 | TAN,TAN-HCSP,accuracy,-,tan_hcsp,Keogh2002,,,,"features(), classes()"
 7 | SNB ,FSSJ,accuracy,forward,fssj,Pazzani1996,,,,families()
 8 | SNB ,BSEJ,accuracy,backward,bsej,Pazzani1996,,,,as_bnlearn()
 9 | AODE,-,-,-,aode,Webb2005,,,,as_mlr()
10 | ODE-MNET,CL-MNET,"log-lik, AIC, BIC",-,mnet_cl,Friedman1997,,,,predict()
11 | kDB,kDB,accuracy,-,kdb,Blanco2005,,,,
12 | 


--------------------------------------------------------------------------------
/src/basic-probs.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | using namespace std::placeholders;
 4 | 
 5 | // [[Rcpp::export]]
 6 | NumericVector smooth_sideeffect(NumericVector ctgt, double smooth) { 
 7 |   transform(ctgt.begin(), ctgt.end(), ctgt.begin(),
 8 |             bind(std::plus<double>(), _1, smooth));     
 9 |   return ctgt;
10 | } 
11 | 
12 | // [[Rcpp::export]]
13 | NumericVector exp_sideeffect(NumericVector p) { 
14 |     double (*dexp)(double) = &std::exp;
15 |     std::transform(p.begin(), p.end(), p.begin(), dexp);   
16 |     return p;
17 | }  
18 | 
19 | /*** R
20 | a <- c(0.2, 2, -1, 0)
21 | b <- c(0.2, 2, -1, 0)
22 | exp_sideeffect(a)
23 | stopifnot(all.equal(log(a), b))
24 | 
25 | a <- 1:5
26 | smooth_sideeffect(a, 10)
27 | smooth_sideeffect(a, -1)
28 | smooth_sideeffect(a, 1:5)
29 | */
30 | 


--------------------------------------------------------------------------------
/man/nb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-struct.R
 3 | \name{nb}
 4 | \alias{nb}
 5 | \title{Learn a naive Bayes network structure.}
 6 | \usage{
 7 | nb(class, dataset = NULL, features = NULL)
 8 | }
 9 | \arguments{
10 | \item{class}{A character. Name of the class variable.}
11 | 
12 | \item{dataset}{The data frame from which to learn the classifier.}
13 | 
14 | \item{features}{A character vector. The names of the features. This argument
15 | is ignored if \code{dataset} is provided.}
16 | }
17 | \value{
18 | A \code{\link{bnc_dag}} object.
19 | }
20 | \description{
21 | Learn a naive Bayes network structure.
22 | }
23 | \examples{
24 |  
25 | data(car)
26 | nb <- nb('class', car)   
27 | nb2 <- nb('class', features = letters[1:10])
28 | \dontrun{plot(nb2)}
29 | }
30 | 


--------------------------------------------------------------------------------
/vignettes/macros-math.tex:
--------------------------------------------------------------------------------
 1 | \usepackage{centernot}
 2 | \newcommand{\bigCI}{\mathrel{\text{\scalebox{1.07}{$\perp\mkern-10mu\perp$}}}}
 3 | \newcommand{\nbigCI}{\centernot{\bigCI}} 
 4 | \newcommand{\CI}{\mathrel{\perp\mspace{-10mu}\perp}}
 5 | \newcommand{\nCI}{\centernot{\CI}}
 6 | 
 7 | \DeclareMathOperator*{\argmax}{arg\,max}
 8 | 
 9 | \def\X/{\ensuremath{\mathbf{X}}}
10 | \def\x/{\ensuremath{\mathbf{x}}}
11 | \newcommand{\ith}[1]{\ensuremath{#1^{(i)}}} 
12 | \def\coef/{\boldsymbol{\beta}} 
13 | \def\mthetas/{\ensuremath{\boldsymbol{\theta}}} 
14 | \def\gstuc/{\ensuremath{\mathcal{G}}}
15 | \def\pcgx/{\ensuremath{P(c\mid\mathbf{x})}}
16 | \def\pcxemp/{\ensuremath{\widehat P(c, \mathbf{x})}}
17 | \def\pcx/{\ensuremath{P(c, \mathbf{x})}}
18 | \def\PCX/{\ensuremath{P(C, \mathbf{X})}}
19 | \def\PCGX/{\ensuremath{P(C\mid\mathbf{X})}}
20 | 


--------------------------------------------------------------------------------
/tests/testthat/test-memoise.R:
--------------------------------------------------------------------------------
 1 | context("memoise")
 2 | 
 3 | test_that("memoise char nominal", {
 4 |   p <- function(x) {
 5 |     print(sample(letters, 1))
 6 |     return()
 7 |   }
 8 |   set.seed(0)
 9 |   m <- memoise_char(p)
10 |   expect_output({invisible(m(letters)); invisible(m(letters)) }, "x")
11 |   set.seed(0)
12 |   m <- memoise_char(p);  
13 |   expect_output({invisible(m("a")); invisible(m("a")) }, "x")
14 |   m <- memoise_char(p)
15 |   expect_error(m(NULL))
16 | })
17 | 
18 | test_that("forget nominal", {
19 |   p <- function(x) {
20 |     print(sample(letters, 1))
21 |     return()
22 |   } 
23 |   suppressWarnings(RNGversion("3.5.0"))
24 |   set.seed(0)
25 |   m <- memoise_char(p)
26 |   expect_output({invisible(m(letters)); invisible(m(letters)) }, "x")
27 |   forget(m)
28 |   expect_output({invisible(m(letters)); }, "g")
29 | })


--------------------------------------------------------------------------------
/inst/include/basic-misc.h:
--------------------------------------------------------------------------------
 1 | #ifndef bnclassify_basicmisc_H
 2 | #define bnclassify_basicmisc_H
 3 | 
 4 | #include <Rcpp.h>
 5 | 
 6 | bool are_disjoint(Rcpp::Nullable<Rcpp::CharacterVector> x, Rcpp::Nullable<Rcpp::CharacterVector> y);
 7 | /**
 8 |  * A comparison that does not raise a compiler warning.
 9 |  */
10 | bool safediff(unsigned int x, int y);   
11 | /**
12 |  * A set diff which preserves the order in the first vector. This is because rcpp setdiff does not preserve it.
13 |  */
14 | std::vector<std::string> ordersetdiff(Rcpp::CharacterVector vector, Rcpp::CharacterVector remove);   
15 | /**
16 |  * 0-based match. (1 less than what Rcpp returns)
17 |  * rcpp match was returning -2147483648 when not finding the value, and the any() test was failing, thus implemented without rcpp.
18 |  */
19 | std::vector<int> match_zero_based(const Rcpp::CharacterVector & subset, const Rcpp::CharacterVector & superset, const std::string error_message);
20 | 
21 | #endif


--------------------------------------------------------------------------------
/man/grain_and_graph.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-dag.R, R/wrap-gRain.R
 3 | \name{grain_and_graph}
 4 | \alias{grain_and_graph}
 5 | \alias{as_igraph}
 6 | \alias{as_grain}
 7 | \title{Convert to igraph and gRain.}
 8 | \usage{
 9 | as_igraph(x)
10 | 
11 | as_grain(x)
12 | }
13 | \arguments{
14 | \item{x}{The \code{\link{bnc_bn}} object. The Bayesian network classifier.}
15 | }
16 | \description{
17 | Convert a \code{\link{bnc_dag}} to \code{igraph} and
18 | \code{\link[gRain]{grain}} objects.
19 | }
20 | \section{Functions}{
21 | \itemize{
22 | \item \code{as_igraph()}: Convert to a graphNEL.
23 | 
24 | \item \code{as_grain()}: Convert to a grain.
25 | 
26 | }}
27 | \examples{
28 | data(car)
29 | nb <- bnc('nb', 'class', car, smooth = 1)
30 | # Requires the grain and igraph packages installed
31 | \dontrun{g <- as_grain(nb)}
32 | \dontrun{gRain::querygrain.grain(g)$buying}
33 | }
34 | 


--------------------------------------------------------------------------------
/R/learn-params-awnb.R:
--------------------------------------------------------------------------------
 1 | awnb <- function(class, dataset, trees = NULL, bootstrap_size = NULL) {
 2 |   if (is.null(trees)) trees <- 10
 3 |   if (is.null(bootstrap_size)) bootstrap_size <- 0.5
 4 | # For each tree, get a bootstrap subsample 
 5 |   subsamples <- replicate(trees, bootstrap_ss(dataset, bootstrap_size), 
 6 |                           simplify = FALSE)
 7 | # From each sample, learn tree
 8 |   Wtrees <- lapply(subsamples, learn_unprunned_tree, class)
 9 | # For each tree get minimum testing depth 
10 |   depths <- lapply(Wtrees, identify_min_testing_depths)
11 |   depths <- unlist(depths, use.names = TRUE)
12 |   if (length(depths) == 0) stop("Only empty trees have been learned.")
13 |   features <- get_features(class, dataset)
14 |   unused_features <- features[!(features %in% names(depths))]
15 |   depths[unused_features] <- Inf
16 | # Compute weights and average across the features
17 |   tapply(depths, names(depths), 
18 |          function(x) sum(x ^ -0.5), simplify = TRUE) / trees
19 | }


--------------------------------------------------------------------------------
/R/frontend-graph.r:
--------------------------------------------------------------------------------
 1 |  # TODO use only graph internal and remove these functions.
 2 | 
 3 | # Gets the parents of a node in the graph
 4 | # Eeach nodes' parents.
 5 | # return Named list of characters.
 6 | graphNEL_parents <- function(g) {  
 7 |   graph_parents(g)
 8 | } 
 9 | subgraph <- function(vars, x) {
10 |   graph_subgraph(vars, x)
11 | }
12 | connected_components <- function(x) {
13 |  graph_connected_components(x)
14 | }
15 | 
16 | add_edges <- function(from, to, x) { 
17 |   graph_add_edges(from, to, x)
18 | } 
19 | add_node <- function(node, x) {
20 |   graph_add_node(node, x)
21 | }
22 | remove_node <- function(node, x) {
23 |   graph_remove_node(node, x)
24 | }
25 | num_arcs <- function(x) {
26 |   graph_num_arcs(x)
27 | } 
28 | #' Returns a complete unweighted graph with the given nodes.
29 | #' 
30 | #' @param nodes A character vector.
31 | #' @return a \code{graphNEL} object.
32 | #' @keywords internal
33 | complete_graph <- function(nodes) {   
34 |   graph_complete_undirected(nodes) 
35 | } 
36 | 


--------------------------------------------------------------------------------
/man/cmi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-statistics.R
 3 | \name{cmi}
 4 | \alias{cmi}
 5 | \title{Compute the (conditional) mutual information between two variables.}
 6 | \usage{
 7 | cmi(x, y, dataset, z = NULL, unit = "log")
 8 | }
 9 | \arguments{
10 | \item{x}{A length one character.}
11 | 
12 | \item{y}{A length one character.}
13 | 
14 | \item{dataset}{A data frame. Must contain x, y and, optionally, z columns.}
15 | 
16 | \item{z}{A character vector.}
17 | 
18 | \item{unit}{A character. Logarithm base. See \code{entropy} package.}
19 | }
20 | \description{
21 | Computes the (conditional) mutual information between two variables. If 
22 | \code{z} is not \code{NULL} then returns the conditional mutual information,
23 | \eqn{I(X;Y|Z)}. Otherwise, returns mutual information, \eqn{I(X;Y)}.
24 | }
25 | \details{
26 | \eqn{I(X;Y|Z) = H(X|Z) + H(Y|Z) - H(X,Y,Z) - H(Z)}, where \eqn{H()} is 
27 | Shannon's entropy.
28 | }
29 | \examples{
30 | data(car)
31 | cmi('maint', 'class', car)
32 | }
33 | 


--------------------------------------------------------------------------------
/vignettes/includes/abstract.rmd:
--------------------------------------------------------------------------------
1 | \abstract{The \pkg{bnclassify} package provides state-of-the art algorithms for learning Bayesian network classifiers from data. For structure learning it provides variants of the greedy hill-climbing search, a well-known adaptation of the Chow-Liu algorithm and averaged one-dependence estimators. It provides Bayesian and maximum likelihood parameter estimation, as well as three naive-Bayes-specific methods based on discriminative score optimization and Bayesian model averaging. The implementation is efficient enough to allow for time-consuming discriminative scores on medium-sized data sets. The \pkg{bnclassify} package provides utilities for model evaluation, such as cross-validated accuracy and penalized log-likelihood scores, and analysis of the underlying networks, including network plotting via the \pkg{igraph} package. It is extensively tested, with over 200 automated tests that give a code coverage of 94\%. Here we present the main functionalities, illustrate them with a number of data sets, and comment on related software.}  


--------------------------------------------------------------------------------
/man/bnc_bn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R
 3 | \name{bnc_bn}
 4 | \alias{bnc_bn}
 5 | \title{Bayesian network classifier with structure and parameters.}
 6 | \description{
 7 | A Bayesian network classifier with structure and parameters. Returned by 
 8 | \code{\link{lp}} and \code{\link{bnc}} functions. You can use it to classify
 9 | data (with \code{\link[=predict.bnc_fit]{predict}}). Can estimate its
10 | predictive accuracy with \code{\link{cv}}, plot its structure (with 
11 | \code{\link[=plot.bnc_dag]{plot}}), print a summary to console 
12 | (\code{\link[=print.bnc_base]{print}}), inspect it with functions documented 
13 | in \code{\link{inspect_bnc_bn}} and \code{\link{inspect_bnc_dag}}, and
14 | convert it to mlr, grain, and graph objects --see \code{\link{as_mlr}} and 
15 | \code{\link{grain_and_graph}}.
16 | }
17 | \examples{
18 | data(car)
19 | tan <- bnc('tan_cl', 'class', car, smooth = 1)   
20 | tan
21 | p <- predict(tan, car)
22 | head(p)
23 | \dontrun{plot(tan)}
24 | nparams(tan)
25 | }
26 | 


--------------------------------------------------------------------------------
/R/bncs.R:
--------------------------------------------------------------------------------
 1 |  #' Returns a \code{c("bnc_aode", "bnc")} object.
 2 | #' @keywords internal
 3 | bnc_aode <- function(models, class_var, features) {
 4 |   stopifnot(length(models) > 0, identical(names(models), unname(features)))
 5 |   stopifnot(all(vapply(models, is_ode, FUN.VALUE = logical(1))))
 6 |   bnc <- bnc_base(class = class_var, features = features)
 7 |   bnc$.models <- models
 8 |   class(bnc) <- c('bnc_aode', class(bnc))
 9 |   bnc
10 | }
11 | #' Fits an AODE model.
12 | #' @keywords internal
13 | bnc_aode_bns <- function(x, fit_models) {
14 |   stopifnot(inherits(x, 'bnc_aode'))
15 |   x$.models <- fit_models
16 |   class(x) <- c('bnc_aode_bns', class(x), 'bnc_fit')
17 |   x
18 | }
19 | #' Is it en AODE?
20 | #'
21 | #' @keywords internal
22 | is_aode <- function(x) {
23 |   if (!inherits(x, c('bnc_aode'))) return (FALSE)
24 |   if (length(x$.models) < 2) return (FALSE)
25 |   all(sapply(x$.models, is_ode)) # TODO Should be is spode
26 | }
27 | nmodels <- function(x) {
28 |  stopifnot(inherits(x, 'bnc_aode')) 
29 |  length(x$.models)
30 | }
31 | models <- function(x) { 
32 |  stopifnot(inherits(x, 'bnc_aode'))  
33 |  x$.models
34 | }


--------------------------------------------------------------------------------
/man/max_weight_forest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/frontend-anb.r
 3 | \name{max_weight_forest}
 4 | \alias{max_weight_forest}
 5 | \title{Returns the undirected augmenting forest.}
 6 | \usage{
 7 | max_weight_forest(g)
 8 | }
 9 | \arguments{
10 | \item{g}{A graph. The undirected graph with pairwise 
11 | weights.}
12 | }
13 | \value{
14 | A graph. The maximum spanning forest.
15 | }
16 | \description{
17 | Uses Kruskal's algorithm to find the augmenting forest that maximizes the sum
18 | of pairwise weights. When the weights are class-conditional mutual
19 | information this forest maximizes the likelihood of the tree-augmented naive
20 | Bayes network.
21 | }
22 | \details{
23 | If \code{g} is not connected than this will return a forest; otherwise it is 
24 | a tree.
25 | }
26 | \references{
27 | Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 
28 |   classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163.
29 |   
30 |   Murphy KP (2012). \emph{Machine learning: a probabilistic perspective}. The
31 |   MIT Press. pp. 912-914.
32 | }
33 | \keyword{internal}
34 | 


--------------------------------------------------------------------------------
/man/compute_wanbia_weights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-params-wanbia.R
 3 | \name{compute_wanbia_weights}
 4 | \alias{compute_wanbia_weights}
 5 | \title{Compute WANBIA weights.
 6 |  
 7 | Computes feature weights by optimizing conditional log-likelihood.  
 8 | Weights are bounded to [0, 1]. Implementation based on the original paper 
 9 | and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}.}
10 | \usage{
11 | compute_wanbia_weights(class, dataset, return_optim_object = FALSE)
12 | }
13 | \arguments{
14 | \item{class}{A character. Name of the class variable.}
15 | 
16 | \item{dataset}{The data frame from which to learn feature weights}
17 | 
18 | \item{return_optim_object}{Return full output of `optim`}
19 | }
20 | \value{
21 | a named numeric vector
22 | }
23 | \description{
24 | Compute WANBIA weights.
25 |  
26 | Computes feature weights by optimizing conditional log-likelihood.  
27 | Weights are bounded to [0, 1]. Implementation based on the original paper 
28 | and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}.
29 | }
30 | \keyword{internal}
31 | 


--------------------------------------------------------------------------------
/R/wrap-igraph.R:
--------------------------------------------------------------------------------
 1 | graphNEL2_graph_internal <- function(x) {
 2 |   stopifnot(inherits(x, "igraph"))
 3 |   nodes <- igraph::V(x)$name
 4 |   edges <- igraph::as_edgelist(x)  # Assuming 'x' is an igraph object
 5 |   colnames(edges) <- c("from", "to")
 6 |   # edges <- t(named_edge_matrix(x)) 
 7 |   weights <- NULL
 8 |   tryCatch({
 9 |     # Will fail if it does not have the attribute.
10 |     weights <- igraph::E(x)$weight
11 |   }, error = function(e) {})
12 |   edgemode <- ifelse(igraph::is_directed(x), "directed", "undirected")
13 |   weights <- igraph::E(x)$weight
14 |   graph_internal(nodes, edges, weights, edgemode) 
15 | }  
16 | graph_internal2graph_NEL <- function(x) {
17 |     stopifnot(inherits( x, "bnc_graph_internal")) 
18 |     edges <- x$edges 
19 |     edgemode <- ifelse(x$edgemode == "directed", TRUE, FALSE)
20 |     graph <- igraph::graph_from_edgelist(edges, directed = edgemode)
21 |     weights <- x$weight
22 |     if (length(weights ) > 0) {
23 |       stopifnot(length(weights ) == length(igraph::E(graph)))
24 |       igraph::set_edge_attr(graph, "weight", value = weights)
25 |     }
26 |     # TODO: handle undirected. If directed, then build directed graph in BH.
27 |     graph 
28 | }  


--------------------------------------------------------------------------------
/man/plot.bnc_dag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bnc-dag-operate.R
 3 | \name{plot.bnc_dag}
 4 | \alias{plot.bnc_dag}
 5 | \title{Plot the structure.}
 6 | \usage{
 7 | \method{plot}{bnc_dag}(x, y, layoutType = "dot", fontsize = NULL, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{The \code{\link{bnc_dag}} object. The Bayesian network classifier
11 | structure.}
12 | 
13 | \item{y}{Not used}
14 | 
15 | \item{layoutType}{a character. Optional.}
16 | 
17 | \item{fontsize}{integer Font size for node labels. Optional.}
18 | 
19 | \item{...}{Not used.}
20 | }
21 | \description{
22 | If node labels are to small to be viewed properly, you may fix label fontsize
23 | with argument fontsize. Also, you may try multiple different layouts.
24 | }
25 | \examples{
26 |  
27 | 
28 | # Requires the igraph package to be installed.
29 | data(car)
30 | nb <- nb('class', car)
31 | nb <- nb('class', car)
32 | \dontrun{plot(nb)}
33 | \dontrun{plot(nb, fontsize = 20)}
34 | \dontrun{plot(nb, layoutType = 'circo')}
35 | \dontrun{plot(nb, layoutType = 'fdp')}
36 | \dontrun{plot(nb, layoutType = 'osage')}
37 | \dontrun{plot(nb, layoutType = 'twopi')}
38 | \dontrun{plot(nb, layoutType = 'neato')}
39 | }
40 | 


--------------------------------------------------------------------------------
/src/anb-operate.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;
 3 | 
 4 | // Returns a subset of the edges so that any reversed ones are removed.
 5 | // The last ones are kept, that is, those lower in the matrix 
 6 | // This should be  a DAG function; 
 7 | // [[Rcpp::export]]
 8 | LogicalVector find_non_reversed(CharacterMatrix x) {  
 9 |   int n = x.nrow();
10 |   if (n == 0) {
11 |     return LogicalVector(0); 
12 |   } 
13 |   std::vector<bool> unique(n, true);    
14 |   // Skip last element in the loop, consider it unique
15 |   for (int row = n - 2; row >= 0; row--) {
16 |     const CharacterMatrix::Row & this_row = x(row, _);
17 |      // a row is unique, if non of those before is identical to it
18 |      // in the reversed matrix 
19 |     int j = row;
20 |     while (j < n - 1) {
21 |       j++;
22 |        // no need to check for those known to be duplicates, as that would be repeating
23 |       if (unique.at(j)) { 
24 |         const CharacterMatrix::Row & reversed_row = x(j, _);
25 |         if (reversed_row[0]  == this_row[1] && reversed_row[1] == this_row[0]) {
26 |           unique.at(row) = false ;
27 |           // j <- n
28 |           break;
29 |         } 
30 |       }
31 |     }
32 |   }
33 |   return wrap(unique);
34 | }


--------------------------------------------------------------------------------
/src/data.cpp:
--------------------------------------------------------------------------------
 1 | #include <data.h>
 2 | 
 3 | using namespace Rcpp;    
 4 |  
 5 | // [[Rcpp::export]]
 6 | bool hasna_features(const DataFrame & newdata, const SEXP & features)
 7 | {  
 8 |   // Shallow object so not a big problem with copy
 9 |   DataFrame data = newdata; 
10 |   if (!Rf_isNull(features)) {
11 |     // CharacterVector feats(features); 
12 |     data = trim_dataset_cpp(data, features);  
13 |   }
14 |   return hasna(data);
15 | }
16 | 
17 | // [[Rcpp::export]]
18 | bool hasna(const DataFrame & newdata) 
19 | {  
20 |   for (int i = 0; i < newdata.size(); i++) { 
21 |    const IntegerVector & vec = newdata.at(i);
22 |    if (is_true(any(is_na(vec)))) return true;  
23 |   }  
24 |   return false;
25 | }  
26 | 
27 | // [[Rcpp::export]]
28 | DataFrame trim_dataset_cpp(const DataFrame & dataset, const CharacterVector & features)
29 | {
30 |      const Rcpp::CharacterVector & columns = dataset.names();
31 |      if (!is_true(all(in(features, columns )))) {
32 |        Rcpp::stop("Some features missing from data set.");
33 |      }
34 |      // Rcpp intersect may alter order of columns, but irrelevant here
35 |      Rcpp::CharacterVector keep = Rcpp::intersect(columns, features);
36 |      DataFrame data = dataset[keep]; 
37 |      return data;
38 |  }


--------------------------------------------------------------------------------
/vignettes/macros-rjournal.tex:
--------------------------------------------------------------------------------
 1 | % implementation of rjournal commands
 2 | % re-implement code to let is be used with default pandoc template
 3 | 
 4 | \newcommand{\kbd}[1]{{\normalfont\texttt{#1}}}
 5 | \newcommand{\key}[1]{{\normalfont\texttt{\uppercase{#1}}}}
 6 | \DeclareRobustCommand\samp{`\bgroup\@noligs\@sampx}
 7 | \def\@sampx#1{{\normalfont\texttt{#1}}\egroup'}
 8 | \newcommand{\var}[1]{{\normalfont\textsl{#1}}}
 9 | \newcommand{\file}[1]{{`\normalfont\textsf{#1}'}} 
10 | \newcommand{\code}[1]{\texttt{#1}}
11 | \let\option=\samp
12 | \newcommand{\dfn}[1]{{\normalfont\textsl{#1}}}
13 | % \acronym is effectively disabled since not used consistently
14 | \newcommand{\acronym}[1]{#1}
15 | \newcommand{\strong}[1]{\texorpdfstring%
16 | {{\normalfont\fontseries{b}\selectfont #1}}%
17 | {#1}}
18 | \let\pkg=\strong
19 | \newcommand{\CRANpkg}[1]{\href{https://CRAN.R-project.org/package=#1}{\pkg{#1}}}%
20 | \let\cpkg=\CRANpkg
21 | \newcommand{\ctv}[1]{\href{https://CRAN.R-project.org/view=#1}{\emph{#1}}}
22 | \newcommand{\BIOpkg}[1]{\href{https://www.bioconductor.org/packages/release/bioc/html/#1.html}{\pkg{#1}}}
23 | 
24 | 
25 | \RequirePackage{fancyvrb}
26 | \RequirePackage{alltt}
27 | 
28 | \DefineVerbatimEnvironment{example}{Verbatim}{}
29 | \renewenvironment{example*}{\begin{alltt}}{\end{alltt}}
30 | 


--------------------------------------------------------------------------------
/R/anb-internal.R:
--------------------------------------------------------------------------------
 1 | # all functions with begin wit anb_
 2 | # dag2modelstring. modelstring2dag. 
 3 | # When buliding a DAG, I will need the parents list per variable.
 4 | # dag is a type of adj list where each has only its parents, and also includes itself in the list.
 5 | # IF the DAG is topologically sorted, then class cannot be the last CPT. This is not important anyway; not a requirement.
 6 | # This is related to the anb-families. It is the anb class. 
 7 | 
 8 | # TODO 'families' is just a way to represent the anb that corresponds to the cpts. but could be internal to the anb object. if indeed doing that, i would need to transform from families to adjacency lists or similar.
 9 | 
10 | anb_internal <- function() {
11 | } 
12 | # THIS SHOULD RETURN A c("anb", "dag"), as dag is the more general class. 
13 | 
14 | anb_make_nb <- function(class, features) {
15 | #   Check class is character and length one, features is length 0 or character,
16 | #   class is not in features.
17 |     check_features(features, class)
18 | #   If > 0 features, add arc from class to each of them
19 |     narcs <- length(features)
20 |     arcs  <- graph_from_to_to_edges(rep(class, narcs), features) 
21 | #   Set nodes as class + features 
22 |     nodes <- c(class, features)
23 |     g <- graph_internal(nodes, arcs) 
24 |     g 
25 | }             


--------------------------------------------------------------------------------
/R/anb-bn.R:
--------------------------------------------------------------------------------
 1 | make_bnc_bn <- function(bnc_dag, params) {
 2 |   bnc_dag$.params <- params
 3 |   class(bnc_dag) <- c('bnc_bn', class(bnc_dag), 'bnc_fit')
 4 |   bnc_dag
 5 | }
 6 | bn2dag <- function(x) {
 7 |   stopifnot(inherits(x, "bnc_dag"))
 8 |   if (is_just(x, "bnc_dag")) {
 9 |     x
10 |   }
11 |   else {
12 |     make_bnc_dag(class = class_var(x), families = families(x), 
13 |                  dag = dag(x))  
14 |   }
15 | }
16 | check_bnc_bn <- function(x) {
17 |   check_bnc_dag(x)
18 |   # Check CPT families match original families equal to the families of bnc 
19 |   params <- params(x)
20 |   stopifnot(identical(cpts2families(params), families(x)))
21 |   stopifnot(identical(names(values(x)), names(vars(x))))
22 | }
23 | # Accessors 
24 | 
25 | #' @export 
26 | #' @describeIn inspect_bnc_bn Returns the list of CPTs, in the same order as \code{\link{vars}}.
27 | params <- function(x) {
28 |   stopifnot(inherits(x, "bnc_bn"))  
29 |   x$.params
30 | }
31 | #' @export 
32 | #' @describeIn inspect_bnc_bn Returns the possible values of each variable, in the same order as \code{\link{vars}}.
33 | values <- function(x) {
34 |   cpt_vars_values(params(x))
35 | }
36 | #' @export 
37 | #' @describeIn inspect_bnc_bn Returns the possible values of the class variable.
38 | classes <- function(x) {
39 |   values(x)[[class_var(x)]]
40 | }


--------------------------------------------------------------------------------
/tests/testthat/test-anb-cpts.R:
--------------------------------------------------------------------------------
 1 | context("CPTs")
 2 | 
 3 | test_that("subset_cpt single value", {
 4 |   cp <- extract_cpt(colnames(car)[5:7], car, smooth = 1)
 5 |   p <- subset_cpt(cp, list(lug_boot=1, safety=2, class=1))
 6 |   expect_null(dim(p))
 7 | })  
 8 | 
 9 | test_that("subset_cpt 1D cpt", {
10 |   cp <- extract_cpt(colnames(car)[7], car, smooth = 1)
11 |   obs <- vapply(car[, 5:6], as.integer, FUN.VALUE = integer(nrow(car)))
12 |   expect_error(p <- subset_cpt(cp, obs), "vars")
13 | })
14 | 
15 | test_that("cpt cache nominal", {
16 |   cache <- make_cpts_cache(car, smooth = 1)
17 |   expect_identical(cache('class'), extract_cpt('class', car, smooth = 1))
18 |   expect_identical(cache(c('buying', 'class')),
19 |                    extract_cpt(c('buying', 'class'), car, smooth = 1))
20 | })
21 | 
22 | test_that("cpt cache forget", {
23 |   cache <- make_cpts_cache(car, smooth = 1)
24 |   expect_identical(cache('class'), extract_cpt('class', car, smooth = 1))
25 |   expect_identical(cache(c('buying', 'class')),
26 |                    extract_cpt(c('buying', 'class'), car, smooth = 1))
27 |   expect_true(forget(cache))
28 |   expect_identical(cache('class'), extract_cpt('class', car, smooth = 1))
29 |   expect_identical(cache(c('buying', 'class')),
30 |                    extract_cpt(c('buying', 'class'), car, smooth = 1))
31 | })
32 | 


--------------------------------------------------------------------------------
/vignettes/overview.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "`bnclassify`: Learning Bayesian Network Classifiers"  
 3 | author: "Bojan Mihaljević, Concha Bielza and Pedro Larrañaga"
 4 | date: "`r Sys.Date()`"  
 5 | output:
 6 |   rmarkdown::pdf_document:
 7 |     number_sections: true
 8 |     keep_tex: true
 9 |     includes:
10 |           in_header: header.tex
11 |     citation_package: natbib 
12 | bibliography: bnclassify.bib  
13 | fontsize: 11pt
14 | vignette: >
15 |   %\VignetteIndexEntry{overview}
16 |   %\VignetteEngine{knitr::rmarkdown}
17 |   %\VignetteEncoding{UTF-8}
18 | --- 
19 | 
20 | ```{r child = 'includes/abstract.rmd'} 
21 | ``` 
22 | \tableofcontents   
23 | ```{r child = 'includes/intro.rmd'} 
24 | ``` 
25 | The rest of this paper is structured as follows. Section \ref{sec:bcground} provides background on Bayesian network classifiers. Section \ref{sec:functionalities} describes the implemented functionalities. Section \ref{sec:usage} illustrates usage with a synthetic data set. Section \ref{sec:implementation} discusses implementation while Section \ref{sec:relatedsw} briefly surveys related software. Finally, Section \ref{sec:conclusion} concludes and outlines future work.   
26 | ```{r child = 'includes/background.rmd'}
27 | ``` 
28 | ```{r child = 'includes/functionalities.rmd'}
29 | ``` 
30 | ```{r child = 'includes/usage.rmd'}
31 | ``` 
32 | ```{r child = 'includes/impl-related-conclusion.rmd'}
33 | ```
34 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bnclassify
 2 | Title: Learning Discrete Bayesian Network Classifiers from Data
 3 | Description: State-of-the art algorithms for learning discrete Bayesian network classifiers from data, including a number of those described in Bielza & Larranaga (2014) <doi:10.1145/2576868>, with functions for prediction, model evaluation and inspection.
 4 | Version: 0.4.8
 5 | Authors@R: c(person("Mihaljevic","Bojan",email="boki.mihaljevic@gmail.com",role=c("aut","cre", "cph")),
 6 |     person("Bielza","Concha",email="mcbielza@fi.upm.es",role="aut"),
 7 |     person("Larranaga","Pedro",email="pedro.larranaga@fi.upm.es",role="aut"), 
 8 |     person("Wickham", "Hadley", role="ctb", comment="some code extracted from memoise package"))
 9 | URL: https://github.com/bmihaljevic/bnclassify
10 | BugReports: https://github.com/bmihaljevic/bnclassify/issues
11 | Depends:
12 |     R (>= 3.2.0)
13 | Imports:
14 |     assertthat (>= 0.1),
15 |     entropy(>= 1.2.0),
16 |     matrixStats(>= 0.14.0),
17 |     rpart(>= 4.1-8),
18 |     Rcpp,
19 | Suggests:
20 |     igraph,
21 |     gRain(>= 1.2-3),
22 |     gRbase(>= 1.7-0.1),
23 |     mlr(>= 2.2),
24 |     testthat(>= 0.8.1),
25 |     knitr(>= 1.10.5),
26 |     ParamHelpers(>= 1.5),
27 |     rmarkdown(>= 0.7),
28 |     mlbench,
29 |     covr
30 | Encoding: UTF-8
31 | License: GPL (>= 2)
32 | Maintainer: Mihaljevic Bojan <boki.mihaljevic@gmail.com>
33 | VignetteBuilder: knitr
34 | LinkingTo: Rcpp, BH
35 | RoxygenNote: 7.3.1
36 | 


--------------------------------------------------------------------------------
/tests/testthat/test-wrap-grain.R:
--------------------------------------------------------------------------------
 1 | context("grain")
 2 | 
 3 | test_that("nominal joint prob instance", { 
 4 |   # skip_if_not_installed('gRain')
 5 |   # a <- nbvote()
 6 |   # g <- as_grain(a)
 7 |   # vc <- as.matrix(voting)
 8 |   # cp <- compute_grain_log_joint_instance(vc[1, -17], g, 'Class')
 9 |   # expect_true(is.numeric(cp))
10 |   # expect_equal(names(cp), levels(voting$Class))
11 |   # expect_equal(exp(as.vector(cp)), c(1.956045e-09, 1.517449e-02), tolerance = 1e-6)
12 | })
13 | 
14 | test_that(" joint prob instance with no evidence", { 
15 |   # skip_if_not_installed('gRain')
16 |   # a <- nbvote()
17 |   # g <- as_grain(a)
18 |   # vc <- as.matrix(voting)
19 |   # inst <- vc[1, -17]
20 |   # inst[] <- NA
21 |   # cp <- compute_grain_log_joint_instance(inst, g, 'Class')
22 |   # expect_true(is.numeric(cp))
23 |   # expect_equal(names(cp), levels(voting$Class))
24 |   # expect_true(equivalent_num(exp(cp), params(a)[class_var(a)][[1]]))
25 | })
26 | 
27 | test_that("cpts to grain", {
28 |   skip("gRain has removed the predict function")
29 |   skip_if_not_installed('gRain')
30 |   a <- nbvote()
31 |   g <- compile_grain(params(a))
32 |   # Check compiled
33 |   expect_true(g$isCompiled)
34 |   # gRain cannot handle a row that has all missing features. remove 249.
35 |   p <- gRain::predict.grain(g, response = 'Class',  
36 |                             newdata = voting[-249, -17], type = "class")
37 |   expect_true(all(p$pred[[1]] %in% levels(voting$Class[-249])))
38 | })


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | - ubuntu 22.04, R 4.3.3 (local) 
 3 | - macos-latest,   r: release
 4 | - windows-latest, r: release
 5 | - ubuntu-latest,   r: devel
 6 | - ubuntu-latest,   r: release
 7 | - ubuntu-latest,   r: oldrel-1
 8 | - windows R-devel (win-builder) 
 9 | - windows R-release (win-builder)  
10 | 
11 | 
12 | ## R CMD check results   
13 | 0 errors | 0 warnings | 1 notes
14 | 
15 | The not is related to the fact that this is a submission of an archived package. The words identified as possibly misspelled are last names and are not misspelled. The note is:
16 | 
17 |   Checking CRAN incoming feasibility ... [7s/18s] NOTE
18 |     Maintainer: ‘Mihaljevic Bojan <boki.mihaljevic@gmail.com>’
19 |     
20 |     New submission
21 |     
22 |     Package was archived on CRAN
23 |     
24 |     Possibly misspelled words in DESCRIPTION:
25 |       Bielza (3:145)
26 |       Larranaga (3:154)
27 |     
28 |     CRAN repository db overrides:
29 |       X-CRAN-Comment: Archived on 2023-10-20 as check problems were not
30 |         corrected in time.
31 | 
32 | On Windows R-devel (win-builder), there was 1 WARNING and 2 NOTES. The WARNING was:
33 | 
34 | * checking sizes of PDF files under 'inst/doc' ... WARNING
35 |   'gs+qpdf' made some significant size reductions:
36 |      compacted 'methods.pdf' from 326Kb to 72Kb
37 |      compacted 'overview.pdf' from 495Kb to 184Kb
38 |      
39 | ## Reverse dependencies
40 | There are currently no downstream dependencies for this package.


--------------------------------------------------------------------------------
/man/predict.bnc_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.R
 3 | \name{predict.bnc_fit}
 4 | \alias{predict.bnc_fit}
 5 | \title{Predicts class labels or class posterior probability distributions.}
 6 | \usage{
 7 | \method{predict}{bnc_fit}(object, newdata, prob = FALSE, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{A \code{\link{bnc_bn}} object.}
11 | 
12 | \item{newdata}{A data frame containing observations whose class has to be 
13 | predicted.}
14 | 
15 | \item{prob}{A logical. Whether class posterior probability should be returned.}
16 | 
17 | \item{...}{Ignored.}
18 | }
19 | \value{
20 | If \code{prob=FALSE}, then returns a length-\eqn{N} factor with the 
21 |  same levels as the class variable in \code{x}, where \eqn{N} is the number 
22 |  of rows in \code{newdata}. Each element is the most likely 
23 |  class for the corresponding row in \code{newdata}. If \code{prob=TRUE}, 
24 |  returns a \eqn{N} by \eqn{C} numeric matrix, where \eqn{C} is the number of 
25 |  classes; each row corresponds to the class posterior of the instance.
26 | }
27 | \description{
28 | Predicts class labels or class posterior probability distributions.
29 | }
30 | \details{
31 | Ties are resolved randomly. Inference is much slower if
32 | \code{newdata} contains \code{NA}s.
33 | }
34 | \examples{
35 | data(car)
36 | nb <- bnc('nb', 'class', car, smooth = 1)
37 | p <- predict(nb, car)
38 | head(p)
39 | p <- predict(nb, car, prob = TRUE)
40 | head(p)
41 | }
42 | 


--------------------------------------------------------------------------------
/meta/check.r:
--------------------------------------------------------------------------------
 1 | ## ========================================= ## 
 2 | ## Run checks prior to submitting            ##
 3 | ## ========================================= ## 
 4 | 
 5 | - gha: not using manual currently, could change it
 6 | 
 7 | 
 8 | ## for vignette size warning. Since vignettes are only built locally, I only need add this argument
 9 | ## MUST use this in all my builds and checks!!
10 | # build_args <- c('--resave-data','--compact-vignettes=gs+qpdf', '--gs_quality=ebook')
11 | build_args <- c('--resave-data','--compact-vignettes=both')
12 | # check_args <-  '--as-cran --use-valgrind'
13 | check_args <-  '--as-cran'
14 | 
15 | ## First to build win so I can proceed with local while it is tested remotely 
16 | devtools::check_win_devel('.', args = build_args ) 
17 | devtools::check_win_release('.', args = build_args ) 
18 | devtools::check_mac_release( '.', args = build_args ) 
19 | # devtools::build_win('.', version = 'R-release', args = build_args ) 
20 | # devtools::build_win('.', version = 'R-devel', args = build_args )  
21 | 
22 | # TODO rhub: can I set build args?
23 | rhub::check_for_cran()
24 | rhub::check_with_sanitizers()
25 | 
26 | devtools::check(args = check_args , cran = TRUE, build_args = build_args, remote = TRUE, manual = TRUE )
27 | ## cran = FALSE probably runs tests skipped on cran
28 | devtools::check(cran = FALSE, args = check_args, build_args = build_args )
29 | devtools::check(cran = TRUE, build_args = build_args )    
30 | devtools::check(cran = FALSE, build_args = build_args ) 
31 | 
32 | 
33 | # see submit.r


--------------------------------------------------------------------------------
/tests/testthat/test-basic-misc.R:
--------------------------------------------------------------------------------
 1 | context("Miscallaneous")
 2 | 
 3 | test_that("make_last", {
 4 | #   Nominal
 5 |   # so to not modify letters
 6 |   e <- c(letters, NULL)
 7 |   e <- make_last_sideeffect(e, 'c')
 8 |   expect_equal(e, c(letters[-3], letters[3]))
 9 | # #   x not character
10 |   # Currently not checking this
11 | #   expect_error(make_last(1:10, 'c')  , "character")
12 | # last not in x  
13 |   e <- c(letters, NULL)
14 |   expect_error(make_last_sideeffect(e, 'A'), "not found")
15 | # last repeated in x  
16 |   # TODO: does not report a mistake.
17 |   # expect_error(make_last_sideeffect(rep('A', 5), 'A'), "length")
18 | })
19 | 
20 | test_that("Rep factor as int", {
21 |   fi <- rep_factor_as_int(factor(letters, levels = letters), 10)
22 |   expect_identical(length(fi), 260L)
23 |   expect_true(all(fi == 1:26))
24 | })
25 | 
26 | test_that("Random max nominal", { 
27 |   suppressWarnings(RNGversion("3.5.0"))
28 |   set.seed(0)
29 |   x <- c(1, runif(5), 1)
30 |   a <- max_random(x)
31 |   b <- max_random(x)
32 |   expect_true(a != b)
33 |   expect_equal(x[a], x[b])
34 | })
35 | 
36 | test_that("Boostrap nominal", {
37 |   d <- bootstrap_ss(dataset = car, proportion = 0.25)  
38 |   expect_equal(dim(d), c(432, ncol(car)))
39 |   expect_equal(colnames(d), colnames(car))
40 |   
41 |   d <- bootstrap_ss(dataset = voting, proportion = 0.2)  
42 |   expect_equal(dim(d), c(87, 17))
43 |   expect_equal(colnames(d), colnames(voting))  
44 | })
45 | 
46 | test_that("Boostrap 0 proportion", {
47 |   expect_error(bootstrap_ss(dataset = car, proportion = 0), "positive")  
48 | })


--------------------------------------------------------------------------------
/tests/testthat/test-hc-bsej.R:
--------------------------------------------------------------------------------
 1 | context("HC bsej")
 2 | 
 3 | test_that("Merge supernodes", {
 4 |   #  A NB with 6 features yields 15 models 
 5 |   nb <- nbcar()
 6 |   states <- merge_supernodes(nb)
 7 |   expect_equal(length(states), 15)    
 8 |   m <- states[[1]]
 9 |   expect_equal(narcs(m), length(features(nb)) + 1)  
10 |   # A single predictor
11 |   nb <- nbcarp(car[, 6:7])
12 |   states <- merge_supernodes(nb)
13 |   expect_true(is.null(states))  
14 |   # No predictors   
15 |   nb <- nbcarclass()
16 |   states <- merge_supernodes(nb)
17 |   expect_true(is.null(states))
18 |   
19 |   # relating non-singleton feature subsets
20 |   nb <- nbcar()
21 |   states <- merge_supernodes(nb)  
22 |   e <- merge_supernodes(states[[1]])  
23 |   expect_equal(length(e), 10)
24 |   expect_equal(families(e[[1]])$buying, c("buying", "maint",  "doors",  "class"))
25 |   f <- merge_supernodes(e[[10]])  
26 |   expect_equal(length(f), 6)
27 | })
28 | 
29 | test_that("Excludes", {
30 |   # One dag per feature
31 |   nb <- nbcar()
32 |   cands <- excludes(nb)
33 |   expect_equal(length(cands), length(features(nb)))    
34 |   # single state for a single feature
35 |   nb <- nbcarp(car[, c(1,7)])
36 |   cands <- excludes(nb)
37 |   expect_equal(length(cands), 1)          
38 |   expect_equal(features(cands[[1]]), character())
39 |   
40 |   # NULL for no features 
41 |   nb <- nbcarclass()
42 |   states <- excludes(nb)
43 |   expect_equal(length(states), 0)          
44 | })
45 | 
46 | test_that("bsej step", {
47 |   nb <- nbcar()
48 |   e <- bsej_step(nb)  
49 |   expect_equal(length(e), 6 + 15)
50 | })


--------------------------------------------------------------------------------
/inst/include/multidim-array.h:
--------------------------------------------------------------------------------
 1 | #ifndef bnclassify_multidimarray_H
 2 | #define bnclassify_multidimarray_H
 3 | 
 4 | #include <Rcpp.h>
 5 | using namespace Rcpp;   
 6 | 
 7 | 
 8 |  /** 
 9 |   * It returns the index for fully specified entries.
10 |   * The entries must be 0-based.
11 |   */
12 | inline int entry_index(std::vector<int>::const_iterator begin, const std::vector<int> & dim_prod) {   
13 |  return std::inner_product(begin + 1, begin + dim_prod.size(), dim_prod.begin(), *begin);
14 | } 
15 | 
16 | int entry_index(const std::vector<int> & indices, const std::vector<int> & dim_prod);    
17 | 
18 | /**
19 | * Returns a subset of the array, when the last dimension of indices is not specified.
20 | */
21 | inline void subset_free_last_dim(const std::vector<double> & array, const std::vector<int> & dim_prod, std::vector<int>::iterator indices_begin, 
22 |                      std::vector<double> & output) {
23 |   // length indices = length dim prod minus one
24 |   // dim prod cum prod = array length    
25 |   // otuput size = dim size
26 |   int ndim = dim_prod.size();
27 |    std::vector<int>::iterator last_dim = indices_begin + ndim - 1;
28 |   // Start with first value for last dimension.
29 |     *last_dim = 0;
30 |    int sum = entry_index(indices_begin, dim_prod);
31 |    // // Add an entry per each class
32 |    int per_class_entries   = dim_prod.at(ndim - 2);
33 |   int ncpts = output.size();
34 |   // most of the time is spent in this loop.
35 |    for (int i = 0; i < ncpts; i++ ) {
36 |      output[i] =  array.at(sum);
37 |      sum += per_class_entries; 
38 |    }  
39 | }
40 | 
41 | 
42 | #endif


--------------------------------------------------------------------------------
/tests/testthat/test-basic-probs.R:
--------------------------------------------------------------------------------
 1 | context("basic probs")
 2 | 
 3 | make_factor <- function(n) {
 4 |   a <- runif(100)
 5 |   b <- 1 - a 
 6 |   td <- matrix(c(a, b), ncol=2)
 7 |   colnames(td) <- letters[1:2]
 8 |   td
 9 | }
10 | 
11 | test_that("Multiply single factor", {
12 |   td <- make_factor(1000)
13 |   a <- sum_matrices(list(a=td))
14 |   expect_equal(td, a)
15 | })  
16 | 
17 | test_that("Multiply Two factors", {
18 |   td <- make_factor(1000)
19 |   td2 <- make_factor(1000)
20 |   a <- sum_matrices(list(a=td, b=td2))
21 |   expect_identical(colnames(a), letters[1:2])
22 |   expect_identical(dim(td), dim(a))
23 |   fr <- td[1, ] + td2[1, ]
24 |   expect_equal(fr, a[1, ])
25 | })
26 | 
27 | 
28 | test_that("normalize", {
29 |   # Nominal 
30 |   a <- setNames(1:5, letters[1:5])
31 |   a <- normalize(a)
32 |   expect_equal(sum(a), 1)
33 |   expect_equal(length(a), 5)
34 |   expect_equal(unname(a), 1:5 / sum(1:5))
35 |   expect_equal(names(a), letters[1:5])
36 |   # Sum 0 
37 |   a <- normalize(setNames(rep(0, 10), letters[1:10]))
38 |   expect_equal(sum(a), 1)
39 |   expect_equal(length(a), 10)
40 |   expect_equal(unname(a), rep(1 / 10, 10))
41 |   expect_equal(names(a), letters[1:10])
42 |   # Single 0. Degenerate prob. dist.
43 |   a <- normalize(0)
44 |   expect_equal(a, 1)
45 | })
46 | 
47 | test_that("Legal prob. distribution", {
48 |   expect_true(are_pdists(matrix(1)))
49 |   expect_true(!are_pdists(matrix(0)))
50 |   expect_true(!are_pdists(matrix(NA, 1)))
51 |   expect_true(!are_pdists(matrix(c(-1, 1), nrow=1, byrow = TRUE)))
52 |   expect_true(are_pdists(matrix(1:5 / 15, nrow=1, byrow = TRUE)))
53 | })


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | jobs:
12 |   test-coverage:
13 |     runs-on: ubuntu-latest
14 |     env:
15 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
16 | 
17 |     steps:
18 |       - uses: actions/checkout@v4
19 | 
20 |       - uses: r-lib/actions/setup-r@v2
21 |         with:
22 |           use-public-rspm: true
23 | 
24 |       - uses: r-lib/actions/setup-r-dependencies@v2
25 |         with:
26 |           extra-packages: any::covr
27 |           needs: coverage
28 | 
29 |       - name: Test coverage
30 |         run: |
31 |           covr::codecov(
32 |             quiet = FALSE,
33 |             clean = FALSE,
34 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
35 |           )
36 |         shell: Rscript {0}
37 | 
38 |       - name: Show testthat output
39 |         if: always()
40 |         run: |
41 |           ## --------------------------------------------------------------------
42 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
43 |         shell: bash
44 | 
45 |       - name: Upload test results
46 |         if: failure()
47 |         uses: actions/upload-artifact@v4
48 |         with:
49 |           name: coverage-test-failures
50 |           path: ${{ runner.temp }}/package
51 | 


--------------------------------------------------------------------------------
/tests/testthat/test-learn-params-wanbia.r:
--------------------------------------------------------------------------------
 1 | context("learn params wanbia")
 2 | 
 3 | test_that('make cll', {
 4 |   w <- rep(1, 16)
 5 |   w <- setNames(w, colnames(v)[-ncol(v)]) 
 6 |   cll <- make_cll('Class', v)(w) 
 7 |   expect_equal(cll, 149.1442, tolerance = 1e-2)
 8 |   # Too few weights
 9 |   expect_error(make_cll('Class', v)(w[-16]))
10 | })     
11 |  
12 | test_that("wanbia error", {  
13 |   w <- compute_wanbia_weights( 'Class', v, return_optim_object = TRUE)  
14 |   # There is an error. Not sure if this is critical.
15 |   # It does not occur on Windows. Skipping the test for now
16 |   # expect_equal(w$message, "ERROR: ABNORMAL_TERMINATION_IN_LNSRCH")  
17 | }) 
18 | 
19 | test_that("with incomplete data", {   
20 |   skip_on_cran()
21 |   skip("too slow currently")
22 |   w <- compute_wanbia_weights( 'Class', voting)   
23 |   # just check results is consistent
24 |   expect_equal(w[['physician_fee_freeze']], 0.7730736, tolerance = 1e-6)
25 | })
26 | 
27 | test_that("check consistent result ", {    
28 |   skip_if_not_installed('mlbench')
29 |   skip_on_cran()
30 |   skip("too slow currently")
31 |   require(mlbench)
32 |   data("DNA") 
33 |   w <- compute_wanbia_weights( 'Class', DNA)   
34 |   expect_equal(w[['V132']], 0.99039949, tolerance = 1e-6) 
35 | }) 
36 | 
37 | test_that("with more than two classes ", {   
38 |   skip_if_not_installed('mlbench')
39 |   skip_on_cran()
40 |   skip("too slow currently")
41 |   require(mlbench)
42 |   data("Soybean") 
43 |   w <- compute_wanbia_weights( 'Class', Soybean)   
44 |   # just check results is consistent
45 |   expect_equal(w[['seed.tmt']], 0.42133418, tolerance = 1e-6) 
46 | })


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(AIC,bnc_bn)
 4 | S3method(BIC,bnc_bn)
 5 | S3method(compute_log_joint_complete,bnc_aode)
 6 | S3method(compute_log_joint_complete,bnc_bn)
 7 | S3method(compute_log_joint_incomplete,bnc_aode)
 8 | S3method(compute_log_joint_incomplete,bnc_bn)
 9 | S3method(logLik,bnc_bn)
10 | S3method(lp_implement,bnc_aode)
11 | S3method(lp_implement,bnc_dag)
12 | S3method(plot,bnc_dag)
13 | S3method(predict,bnc_fit)
14 | S3method(print,bnc_base)
15 | export(accuracy)
16 | export(aode)
17 | export(as_grain)
18 | export(as_igraph)
19 | export(as_mlr)
20 | export(awnb_weights)
21 | export(bnc)
22 | export(bsej)
23 | export(cLogLik)
24 | export(class_var)
25 | export(classes)
26 | export(cmi)
27 | export(cv)
28 | export(families)
29 | export(feature_families)
30 | export(features)
31 | export(fssj)
32 | export(is_anb)
33 | export(is_nb)
34 | export(is_ode)
35 | export(is_semi_naive)
36 | export(kdb)
37 | export(lp)
38 | export(makeRLearner.bnc)
39 | export(manb_arc_posterior)
40 | export(modelstring)
41 | export(narcs)
42 | export(nb)
43 | export(nparams)
44 | export(params)
45 | export(predictLearner.bnc)
46 | export(tan_cl)
47 | export(tan_hc)
48 | export(tan_hcsp)
49 | export(trainLearner.bnc)
50 | export(values)
51 | export(vars)
52 | importFrom(Rcpp,sourceCpp)
53 | importFrom(graphics,plot)
54 | importFrom(stats,AIC)
55 | importFrom(stats,BIC)
56 | importFrom(stats,as.formula)
57 | importFrom(stats,complete.cases)
58 | importFrom(stats,logLik)
59 | importFrom(stats,nobs)
60 | importFrom(stats,optim)
61 | importFrom(stats,predict)
62 | importFrom(stats,setNames)
63 | importFrom(utils,combn)
64 | useDynLib(bnclassify)
65 | 


--------------------------------------------------------------------------------
/man/loglik.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-bn-operate.R
 3 | \name{loglik}
 4 | \alias{loglik}
 5 | \alias{AIC.bnc_bn}
 6 | \alias{BIC.bnc_bn}
 7 | \alias{logLik.bnc_bn}
 8 | \alias{cLogLik}
 9 | \title{Compute (penalized) log-likelihood.}
10 | \usage{
11 | \method{AIC}{bnc_bn}(object, ...)
12 | 
13 | \method{BIC}{bnc_bn}(object, ...)
14 | 
15 | \method{logLik}{bnc_bn}(object, ...)
16 | 
17 | cLogLik(object, ...)
18 | }
19 | \arguments{
20 | \item{object}{A \code{\link{bnc_bn}} object.}
21 | 
22 | \item{...}{A data frame (\eqn{\mathcal{D}}{D}).}
23 | }
24 | \description{
25 | Compute (penalized) log-likelihood and conditional log-likelihood score of a \code{\link{bnc_bn}} object on
26 | a data set. Requires a data frame argument in addition to \code{object}.
27 | }
28 | \details{
29 | log-likelihood =  \eqn{log P(\mathcal{D} \mid \theta)}{log P(D | \theta)},
30 | 
31 | Akaike's information criterion (AIC) = \eqn{log P(\mathcal{D} \mid \theta) - 
32 | \frac{1}{2} |\theta|}{log P(D | \theta) - |\theta| / 2},
33 | 
34 | The Bayesian information criterion (BIC) score: = \eqn{log P(\mathcal{D} \mid
35 | \theta) - \frac{\log N}{2} |\theta|}{log P(D | \theta) - N |\theta| / 2},
36 | 
37 | where \eqn{|\theta|} is the number of free parameters in \code{object}, 
38 | \eqn{\mathcal{D}}{D} is the data set and N is the number of instances in 
39 | \eqn{\mathcal{D}}{D}.
40 | 
41 | \code{cLogLik} computes the conditional log-likelihood of the model.
42 | }
43 | \examples{
44 | data(car)
45 | nb <- bnc('nb', 'class', car, smooth = 1)
46 | logLik(nb, car)   
47 | AIC(nb, car)
48 | BIC(nb, car)
49 | cLogLik(nb, car)   
50 | }
51 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # bnclassify 0.4.8
 2 | - Switch from graph and Rgraphviz to igraph
 3 | 
 4 | # 0.4.7
 5 | - Fix a WARN on r-devel clang
 6 | 
 7 | # 0.4.6
 8 | - Work-around for gRain changes
 9 | - Skip a test that fails due to changes in R-devel
10 | 
11 | # 0.4.5
12 | * Pass CRAN checks with stringsAsFactors = FALSE as default
13 | 
14 | # 0.4.4
15 | * Removing gRain integration to pass CRAN checks 
16 | 
17 | # 0.4.3
18 | * Minor changes to pass CRAN checks
19 | 
20 | # 0.4.2
21 | * Minor changes to pass CRAN checks
22 | 
23 | # 0.4.1
24 | * Minor changes to pass CRAN checks
25 | 
26 | # 0.4.0
27 | * Optimizing some functions with RCPP
28 | * Added AODE (averaged one-dependence estimators)
29 | * Added k-db classifier
30 | * Replaced the RBGL package with BH
31 | * Replaced the graph package with custom code and BH 
32 | * Added the 'overview' vignette
33 | * Minor additions: cLogLik function
34 | 
35 | # 0.3.4
36 | * Added WANBIA discriminative parameter learning for naive Bayes
37 | * Remove 'runtimes' vignette and microbenchmark dependency
38 | 
39 | # 0.3.3
40 | * Fixed warnings for resubmission to CRAN
41 | 
42 | # 0.3.2
43 | * Improved function documentation
44 | * Updated and re-organized tutorial vignette 
45 | * Added 'techical information' and 'runtimes' vignettes
46 | * Added logLik, AIC, and BIC
47 | * Memoized cpt extraction during greedy search 
48 | * Deprecated lpawnb() (use lp() instead)
49 | * Fixed a number of bugs, including an error in AWNB cross-validation 
50 | 
51 | # 0.3.1
52 | * Minor improvements in vignette 
53 | * More detailed documentation for lp()
54 | * Fixing a test error due to a change in gRain
55 | 
56 | # 0.3.0
57 | * First released version of the package.
58 | 


--------------------------------------------------------------------------------
/tests/testthat/test-anb-bn.R:
--------------------------------------------------------------------------------
 1 | context("bnc bn")
 2 | 
 3 | test_that("nominal", {
 4 |   tbn <- bnc('nb', 'class', car, smooth = 0)
 5 |   texp <- prop.table(table(car$class, dnn = 'class') )
 6 |   tout <- params(tbn)$class 
 7 |   expect_equal(tout, texp)
 8 |   tvalues <- values(tbn)
 9 |   levs <- lapply(car, levels)
10 |   expect_true(all(mapply(identical, levs, tvalues, SIMPLIFY = TRUE)))
11 |   expect_identical(values(tbn)$buying, levels(car$buying))
12 | })
13 | 
14 | test_that("nominal as grain", {
15 |   skip_if_not_installed('gRain')
16 |   tbn <- bnc('nb', 'class', car, smooth = 0)
17 |   expect_is(as_grain(tbn), 'grain')
18 | })
19 | 
20 | 
21 | test_that("bnc_bn no class in dataset ", {     
22 |   tbdag <- nb_dag('class', 'buying')
23 |   tb <- bnc_dag(tbdag, class = 'class')
24 |   expect_error(lp(tb, car[ , 1, drop = FALSE], smooth = 0),
25 |                "not found")
26 | })
27 | 
28 | test_that("Just the class in dataset", {
29 |   tbdag <- nb_dag('class', character())
30 |   tbdag <- bnc_dag(tbdag, class = 'class')
31 |   tbn <- lp(tbdag, car, smooth = 0)    
32 |   expect_equal(class_var(tbn), 'class')
33 | })  
34 | 
35 | test_that(" Wrong data set", {
36 |   tbdag <- nb_dag('class', colnames(car)[-7])
37 |   tbdag <- bnc_dag(tbdag, class = 'class')
38 |   expect_error(lp(tbdag, voting, smooth = 0)   , "not found")
39 | })
40 | 
41 | test_that(" Classes", {
42 |   a <- lp(nb('class', car), car, smooth = 0)   
43 |   expect_equal(classes(a), levels(car$class))
44 | })
45 | 
46 | test_that("nominal wanbia", {
47 |   a <- lp(nb('Class', v), v, smooth = 0, wanbia = TRUE)   
48 |   expect_equal(a$.weights[['handicapped_infants']], 0.00000000) 
49 |   expect_equal(a$.weights[['immigration']], 1.00000000) 
50 | })


--------------------------------------------------------------------------------
/tests/testthat/test-learn-params-awnb.R:
--------------------------------------------------------------------------------
 1 | context("AWNB") 
 2 | 
 3 | test_that("one tree", {
 4 |   suppressWarnings(RNGversion("3.5.0"))
 5 |   set.seed(0)
 6 |   a <- awnb('class', car, bootstrap_size = 0.5, trees = 1)
 7 |   expect_equal(as.vector(a['buying']), 0.5773503, tolerance = 1e-5)
 8 |   expect_equal(as.vector(a['doors']), 0.3779645, tolerance = 1e-5)
 9 |   expect_equal(as.vector(a['persons']), 1)
10 |   suppressWarnings(RNGversion("3.5.0"))
11 |   set.seed(0)
12 | })
13 | 
14 | test_that("one two trees", {  
15 |   a <- awnb('class', car, bootstrap_size = 0.5, trees = 2)
16 |   expect_true(is_perm(names(a), colnames(car)[-7]))
17 |   expect_equal(as.vector(a['buying']), 0.5773503, tolerance = 1e-5)
18 |   expect_equal(as.vector(a['doors']), 0.3931064, tolerance = 1e-5)
19 |   expect_equal(as.vector(a['persons']), 1) 
20 |   suppressWarnings(RNGversion("3.5.0"))
21 |   set.seed(0)
22 |   a <- awnb('Class', voting, bootstrap_size = 0.5, trees = 10)
23 |   expect_true(is_perm(names(a), colnames(voting)[-17]))
24 |   expect_equal(as.vector(a['superfund_right_to_sue']), 0.21019141, tolerance = 1e-5)
25 |   expect_equal(as.vector(a['mx_missile']), 0.30487476, tolerance = 1e-5)
26 |   expect_equal(as.vector(a['immigration']), 0.39963413)
27 | })
28 | 
29 | test_that("one no tree", {
30 |   expect_error(awnb('Class', dataset = voting[1:2, ],  
31 |                     bootstrap_size = 0.5, trees = 10), "empty")
32 | })
33 | 
34 | test_that("weights for features not in tree", {
35 |   suppressWarnings(RNGversion("3.5.0"))
36 |   set.seed(0)
37 |   a <- awnb('class', car[sample(1:1000, 10), , drop = FALSE], 
38 |             bootstrap_size = 1, trees = 1)
39 |   a
40 |   expect_true(is_perm(names(a), colnames(car)[-7]))
41 | })
42 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macos-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v4
33 |       
34 |       - uses: r-lib/actions/setup-tinytex@v2
35 |       - run: tlmgr --version
36 | 
37 |       - uses: r-lib/actions/setup-pandoc@v2
38 | 
39 |       - uses: r-lib/actions/setup-r@v2
40 |         with:
41 |           r-version: ${{ matrix.config.r }}
42 |           http-user-agent: ${{ matrix.config.http-user-agent }}
43 |           use-public-rspm: true
44 | 
45 |       - uses: r-lib/actions/setup-r-dependencies@v2
46 |         with:
47 |           extra-packages: any::rcmdcheck
48 |           needs: check
49 | 
50 |       - uses: r-lib/actions/check-r-package@v2
51 |         with:
52 |           upload-snapshots: true
53 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
54 | 


--------------------------------------------------------------------------------
/man/bnc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/learn-params.R
 3 | \name{bnc}
 4 | \alias{bnc}
 5 | \title{Learn network structure and parameters.}
 6 | \usage{
 7 | bnc(
 8 |   dag_learner,
 9 |   class,
10 |   dataset,
11 |   smooth,
12 |   dag_args = NULL,
13 |   awnb_trees = NULL,
14 |   awnb_bootstrap = NULL,
15 |   manb_prior = NULL,
16 |   wanbia = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{dag_learner}{A character. Name of the structure learning function.}
21 | 
22 | \item{class}{A character. Name of the class variable.}
23 | 
24 | \item{dataset}{The data frame from which to learn network structure and 
25 | parameters.}
26 | 
27 | \item{smooth}{A numeric. The smoothing value (\eqn{\alpha}) for Bayesian 
28 | parameter estimation. Nonnegative.}
29 | 
30 | \item{dag_args}{A list. Optional additional arguments to \code{dag_learner}.}
31 | 
32 | \item{awnb_trees}{An integer. The number (\eqn{M}) of bootstrap samples to 
33 | generate.}
34 | 
35 | \item{awnb_bootstrap}{A numeric. The size of the bootstrap subsample, 
36 | relative to the size of \code{dataset} (given in [0,1]).}
37 | 
38 | \item{manb_prior}{A numeric. The prior probability for an arc between the 
39 | class and any feature.}
40 | 
41 | \item{wanbia}{A logical. If \code{TRUE}, WANBIA feature weighting is
42 | performed.}
43 | }
44 | \description{
45 | A convenience function to learn the structure and parameters in a single 
46 | call. Must provide the name of the structure learning algorithm function;
47 | see \code{\link{bnclassify}} for the list.
48 | }
49 | \examples{
50 | data(car)
51 | nb <- bnc('nb', 'class', car, smooth = 1)
52 | nb_manb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.3)
53 | ode_cl_aic <- bnc('tan_cl', 'class', car, smooth = 1, dag_args = list(score = 'aic'))
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/testthat/test-anb-dag.R:
--------------------------------------------------------------------------------
 1 | context("bnc dag")
 2 | 
 3 | test_that("bnc_dag", {
 4 | # Nominal 
 5 |   g <- test_dag() 
 6 |   bd <- bnc_dag(dag = g, class = 'A')
 7 |   expect_is(bd, 'bnc_dag')
 8 |   expect_identical(bd$.class, 'A')
 9 |   expect_identical(bd$.dag, g)
10 |   expect_identical(features(bd), 'B')
11 |   expect_identical(vars(bd), setNames(nm = LETTERS[2:1]))
12 |   expect_identical(bd$.families, list(B = LETTERS[2:1], A = 'A'))
13 |   
14 | # Just class 
15 |   g <- graph_internal(nodes = LETTERS[1], edgemode = "directed")
16 |   bd <- bnc_dag(dag = g, class = 'A')
17 |   expect_is(bd, 'bnc_dag')
18 |   expect_identical(bd$.class, 'A')
19 |   expect_identical(bd$.dag, g)
20 |   expect_identical(features(bd), character())
21 |   expect_identical(vars(bd), setNames(nm = 'A'))
22 |   expect_identical(bd$.families, list(A='A'))
23 |   
24 | # Class not parent of all other nodes
25 |   e <- list(A = 'B',B = NULL)
26 |   edges <- graph_from_to_to_edges('A', 'B')
27 |   g <- graph_internal(nodes = LETTERS[1:3], edges = edges, edgemode = "directed")
28 |   # if (!skip_assert()) expect_error(bnc_dag(dag = g, class = 'A'), "fams_ok")
29 |   # never use skip_assert here so that i always set it to false before publishing
30 |   expect_error(bnc_dag(dag = g, class = 'A'), "fams_ok")
31 | })
32 | 
33 | test_that("Accessors", {
34 | # Nominal  
35 |   skip_if_not_installed('gRbase') 
36 |   suppressWarnings(RNGversion("3.5.0"))
37 |   set.seed(0)
38 |   ran <- random_aug_nb_dag('z', letters[-26], maxpar = 5, wgt = 0.8)
39 |   dg <- bnc_dag(ran, class = 'z')
40 |   expect_identical(unname(sort(vars(dg))), letters)  
41 | })
42 | 
43 | test_that("feature families nominal", {
44 |   n <- nbcar()
45 |   a <- feature_families(n)
46 |   expect_equal(length(a), 6)
47 |   expect_equal(a[[1]], c('buying', 'class'))
48 | })


--------------------------------------------------------------------------------
/man/inspect_bnc_bn.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-bn-operate.R,
 3 | %   R/anb-bn.R
 4 | \name{inspect_bnc_bn}
 5 | \alias{inspect_bnc_bn}
 6 | \alias{nparams}
 7 | \alias{manb_arc_posterior}
 8 | \alias{awnb_weights}
 9 | \alias{params}
10 | \alias{values}
11 | \alias{classes}
12 | \title{Inspect a Bayesian network classifier (with structure and parameters).}
13 | \usage{
14 | nparams(x)
15 | 
16 | manb_arc_posterior(x)
17 | 
18 | awnb_weights(x)
19 | 
20 | params(x)
21 | 
22 | values(x)
23 | 
24 | classes(x)
25 | }
26 | \arguments{
27 | \item{x}{The \code{\link{bnc_bn}} object. The Bayesian network classifier.}
28 | }
29 | \description{
30 | Functions for inspecting a \code{\link{bnc_bn}} object. In addition, you can 
31 | query this object with the functions documented in 
32 | \code{\link{inspect_bnc_dag}}.
33 | }
34 | \section{Functions}{
35 | \itemize{
36 | \item \code{nparams()}: Returns the number of free parameters in the model.
37 | 
38 | \item \code{manb_arc_posterior()}: Returns the posterior of each arc from the class
39 | according to the MANB method.
40 | 
41 | \item \code{awnb_weights()}: Returns the AWNB feature weights.
42 | 
43 | \item \code{params()}: Returns the list of CPTs, in the same order as \code{\link{vars}}.
44 | 
45 | \item \code{values()}: Returns the possible values of each variable, in the same order as \code{\link{vars}}.
46 | 
47 | \item \code{classes()}: Returns the possible values of the class variable.
48 | 
49 | }}
50 | \examples{
51 |  
52 | data(car)
53 | nb <- bnc('nb', 'class', car, smooth = 1)
54 | nparams(nb)
55 | nb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.5)
56 | manb_arc_posterior(nb)
57 | nb <- bnc('nb', 'class', car, smooth = 1, awnb_bootstrap = 0.5)
58 | awnb_weights(nb)
59 | }
60 | 


--------------------------------------------------------------------------------
/tests/testthat/test-wrap-igraph.R:
--------------------------------------------------------------------------------
 1 | context("igraph")
 2 | 
 3 | test_that("Make igraph", {
 4 |   skip_if_not_installed("igraph")
 5 |   expect_error(make_graph(nodes = LETTERS[1], from = LETTERS[1], to=1, 
 6 |                           weights = 1))
 7 |   expect_error(make_graph(nodes = LETTERS[1], from =LETTERS[1],to=LETTERS[1],
 8 |                           weights = NULL))
 9 |   g <- make_graph(nodes = LETTERS[1:2], from =LETTERS[1],to=LETTERS[2], 
10 |                   weights = 0.1)
11 |   g <- make_graph(nodes = LETTERS[1], from = c(), to = c(), weights = c())
12 | })
13 | 
14 | test_that("Complete graph", {
15 |   skip_if_not_installed("igraph")
16 |   g <- complete_graph(LETTERS[1:5])  
17 |   g <- graph_internal2graph_NEL(g)
18 |   expect_equal(length(igraph::E(g)), 10)
19 | })
20 | 
21 | test_that("Superimpose node", {
22 |   skip_if_not_installed("igraph")
23 | #    Nominal
24 |   g <- igraph::graph(edges = c("A", "B"), directed = TRUE)
25 |   sg <- superimpose_node(graphNEL2_graph_internal(g), 'C')
26 |   sg <- graph_internal2graph_NEL(sg)  
27 |   expect_equal(sort(igraph::V(sg)$name), LETTERS[1:3])
28 |   expect_equal(igraph::ecount(sg), 3L)
29 | #    Node already in dag   
30 |   expect_error(superimpose_node(graphNEL2_graph_internal(g), 'A'), 'nodes')
31 | })
32 | 
33 | 
34 | test_that("Direct forest", {
35 |   skip_if_not_installed("igraph")
36 |   gr <- pairwise_ode_score_contribs(class = 'class', dataset = car, score = 'loglik') 
37 |   af <- max_weight_forest(gr)
38 |   f <- direct_forest(g = af)
39 |   # check labels, weights
40 |   igf <- graph_internal2graph_NEL(f)
41 |   igaf <- graph_internal2graph_NEL(af)
42 |   expect_true(igraph::graph.isomorphic(igraph::as.undirected(igf), igaf))
43 |   expect_equal(sort(igraph::V(igf)$name), sort(igraph::V(igaf)$name))
44 |   # weights
45 | })
46 | 
47 | 


--------------------------------------------------------------------------------
/man/tan_chowliu.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/learn-struct.R
 3 | \name{tan_chowliu}
 4 | \alias{tan_chowliu}
 5 | \alias{tan_cl}
 6 | \title{Learns a one-dependence estimator using Chow-Liu's algorithm.}
 7 | \usage{
 8 | tan_cl(class, dataset, score = "loglik", root = NULL)
 9 | }
10 | \arguments{
11 | \item{class}{A character. Name of the class variable.}
12 | 
13 | \item{dataset}{The data frame from which to learn the classifier.}
14 | 
15 | \item{score}{A character. The score to be maximized. \code{'loglik'}, 
16 | \code{'bic'}, and \code{'aic'} return the maximum likelihood, maximum BIC 
17 | and maximum AIC tree/forest, respectively.}
18 | 
19 | \item{root}{A character. The feature to be used as root of the augmenting 
20 | tree. Only one feature can be supplied, even in case of an augmenting 
21 | forest. This argument is optional.}
22 | }
23 | \value{
24 | A \code{\link{bnc_dag}} object.
25 | }
26 | \description{
27 | Learns a one-dependence Bayesian classifier using Chow-Liu's algorithm, by 
28 | maximizing either log-likelihood, the AIC or BIC scores; maximizing 
29 | log-likelihood corresponds to the well-known tree augmented naive Bayes 
30 | (Friedman et al., 1997). When maximizing AIC or BIC the output might be a
31 | forest-augmented rather than a tree-augmented naive Bayes.
32 | }
33 | \examples{
34 | data(car)
35 | ll <- tan_cl('class', car, score = 'loglik')   
36 | \dontrun{plot(ll)}
37 | ll <- tan_cl('class', car, score = 'loglik', root = 'maint')   
38 | \dontrun{plot(ll)}
39 | aic <- tan_cl('class', car, score = 'aic')   
40 | bic <- tan_cl('class', car, score = 'bic')   
41 | }
42 | \references{
43 | Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 
44 |   classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163.
45 | }
46 | 


--------------------------------------------------------------------------------
/R/wrap-rpart.R:
--------------------------------------------------------------------------------
 1 | #' Learns a unpruned \code{rpart} recursive partition.
 2 | #' @keywords internal
 3 | learn_unprunned_tree <- function(dataset, class) {
 4 |   form <- as.formula(paste(class, '~ .'))    
 5 |   # Save time by avoiding CV or surrogates
 6 |   control <- rpart::rpart.control(minsplit = 2, minbucket = 1, cp = 0, 
 7 |                                   maxcompete = 0,maxsurrogate = 0, xval = 0)
 8 |   rpart::rpart(form, data = dataset, na.action = rpart::na.rpart, method
 9 |                = "class", parms = list(split = "information"), control = control)
10 | }
11 | #' Identifies all depths at which the features of a classification tree are
12 | #' tested.
13 | #'  
14 | #' @param tree an \code{rpart} object
15 | #' @return a numeric vector. The names are the names of the variables.
16 | #' @keywords internal
17 | identify_all_testing_depths <- function(tree) {
18 |   stopifnot(inherits(x = tree, what = 'rpart'))
19 |   #   Filter out leaves
20 |   vars <- tree$frame[tree$frame$var != "<leaf>" , 'var', drop = F]
21 |   ordering <- as.integer(rownames(vars))
22 |   # if there are no split in the tree - return
23 |   if (length(ordering) == 0) return(NULL) 
24 |   names(ordering) <- as.matrix(vars)[,1]
25 |   # Decode the depth from the ordering number. The order of a left-most child is 2*o_p, where o_p is the order of its parent.
26 |   depths <- trunc(log(base = 2, ordering)) + 1
27 |   # a small check
28 |   stopifnot(min(depths) == 1)
29 |   depths
30 | }
31 | #' Identifies the lowest (closest to root) depths at which the features of a 
32 | #' classification tree are tested.
33 | #' 
34 | #' @keywords internal
35 | identify_min_testing_depths <- function(tree) {
36 |   depths <- identify_all_testing_depths(tree)
37 |   if (length(depths) == 0) return(NULL)
38 |   stopifnot(length(names(depths)) > 0)
39 |   sort(tapply(depths, names(depths), min))
40 | }


--------------------------------------------------------------------------------
/R/basic-probs.R:
--------------------------------------------------------------------------------
 1 | # Adds a list of matrices. 
 2 | # 
 3 | # @return A numeric matrix. Multiplied in log space.
 4 | sum_matrices <- function(matrices) {
 5 |   # Must have at least on member 
 6 |   stopifnot(length(matrices) > 0)
 7 |   # Check all are numeric matrices of same size with same colnames
 8 |   n <- nrow(matrices[[1]])
 9 |   nobs <- ncol(matrices[[1]])
10 |   values <- colnames(matrices[[1]])
11 |   valid <- vapply(matrices, valid_factor, n, nobs, values, 
12 |                   FUN.VALUE = logical(1L))
13 |   stopifnot(all(valid))
14 |   sum <- Reduce('+', matrices)
15 |   stopifnot(identical(colnames(sum), values))
16 |   sum
17 | }
18 | valid_factor <- function(x, nrow, ncol, colnames) {
19 |   identical(dim(x), c(nrow, ncol)) && is.numeric(x) && identical(colnames, colnames(x))
20 | }  
21 | #' Normalize log probabilities. 
22 | #' 
23 | #' Uses the log-sum-exp trick.
24 | #' 
25 | #' @references Murphy KP (2012). \emph{Machine learning: a probabilistic
26 | #'   perspective}. The MIT Press. pp. 86-87.
27 | #' @keywords internal
28 | log_normalize <- function(lp) {
29 |   stopifnot(is.matrix(lp))
30 |   # Check p is matrix of log probs?(<= 0)
31 |   # Normalize with log sum exp 
32 |   log_probs <- lp - matrixStats::rowLogSumExps(lp)
33 |   log_probs 
34 | }
35 | exponentiate_probs <- function(p) {
36 |   p <- exp_sideeffect(p)
37 |   # rowAnys Does not distinguish between NA and NaN. 
38 |   nans <- which(matrixStats::rowAnys(p, value = NaN))
39 |   if (length(nans) > 0) {
40 |     p[nans, ] <- 1 / ncol(p)
41 |   } 
42 |   p
43 | }
44 | #' Returns \code{TRUE} is \code{x} is a valid probability distribution.
45 | #' 
46 | #'@keywords internal
47 | are_pdists <- function(x) {
48 |   stopifnot(is.matrix(x))  
49 |   are_probs(x) && all(fast_equal(rowSums(x), 1))  
50 | }
51 | are_probs <- function(x) {
52 |   !anyNA(x) && all(x >= 0) && all(x <= 1)
53 | }  


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | printl: 
 2 | 	$(eval TMP := $(du  myprof* | sort -n -r | head -n 1 | cut -f 2))
 3 | 	$(eval TMP := $(ls))
 4 | 	@echo $(TMP) 
 5 | 	$(TMP) 
 6 | 	ls 
 7 | 
 8 | profile: 
 9 | 	rm -f myprof.log*
10 | 	export LD_PRELOAD=/usr/lib/libprofiler.so; CPUPROFILE="myprof.log" R -f run-test.r 
11 | 	du -h myprof.log*
12 | 	FILE = du  myprof* | sort -n -r | head -n 1 | cut -f 2
13 | 	google-pprof --text /usr/bin/R $(FILE) 
14 | 
15 | clean:
16 | 	rm src/*.o
17 | 	rm src/*.so
18 | 
19 | clean-vignettes:
20 | 	find vignettes/ -type f -print | grep -Fvf vignettes/.install_extras | grep "tex" | xargs rm -f
21 | 	find vignettes/ -type f -print | grep -Fvf vignettes/.install_extras | grep "pdf" | xargs rm -f
22 | 	rm vignettes/*.log
23 | 	rm vignettes/*.R
24 | 
25 | .PHONY: printl
26 | 
27 | vignette-includes: ~/code-papers/tex-includes/macros-paper.tex ~/code-papers/tex-includes/macros-math.tex ~/code-papers/paper-bnclassify-rjournal/RJreferences.bib ~/code-papers/paper-bnclassify-rjournal/pg_*.png 
28 | 	cp ~/code-papers/tex-includes/macros-paper.tex  vignettes/  
29 | 	cp ~/code-papers/tex-includes/macros-math.tex  vignettes/ 
30 | 	cp ~/code-papers/tex-includes/macros-rjournal.tex  vignettes/ 
31 | 	cat vignettes/methods.bib > vignettes/bnclassify.bib
32 | 	cat ~/code-papers/paper-bnclassify-rjournal/RJreferences.bib >> vignettes/bnclassify.bib
33 | 	cp ~/code-papers/paper-bnclassify-rjournal/pg_*.png vignettes/
34 | 
35 | clean-technical:
36 | 	cd vignettes; latexmk -c technical.tex
37 | 	cd vignettes; latexmk -C technical.tex
38 | 	rm -f vignettes/technical-concordance.tex
39 | 	rm -f vignettes/technical.bbl
40 | 	rm -f vignettes/technical.synctex.gz
41 | 	rm -f vignettes/technical.tex
42 | 
43 | docs:
44 | 	cd .. ; rm -f bnclassify.pdf; R CMD Rd2pdf bnclassify; 
45 | 	cd .. ; xo  bnclassify.pdf 
46 | 
47 | check:
48 | 	R < meta/check.r --no-save >> ~/Desktop/checks-output
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-cpp-table.R:
--------------------------------------------------------------------------------
 1 | context("cpp table")
 2 | 
 3 | check_unidim <- function(db, rows, cols) {
 4 |   return() # currently different behaviour between r devel and current r, so skip
 5 |   if (!is.character(cols)) {
 6 |     cols <- colnames(db)[cols] 
 7 |   }
 8 |   a <- table_cpp(db, cols)
 9 |   db <- db[, cols, drop = FALSE]
10 |   b <- table(db)
11 |   if (ncol(db) == 1) {
12 |    # table sets name of object
13 |    names(dimnames(a)) = "db" 
14 |   }
15 |   expect_equal(a, b)   
16 | }
17 | 
18 | test_that("1D", {  
19 |  check_unidim(car, TRUE, 1)  
20 |  check_unidim(car, 1:5, 1)    
21 |  check_unidim(car, FALSE, 1)      
22 |  
23 |  check_unidim(v, TRUE, 1)  
24 |  check_unidim(v, 1:5, 1)    
25 |  check_unidim(v, FALSE, 1)      
26 | })
27 | 
28 | test_that("3D", { 
29 |  check_unidim(car, TRUE, 1:3)  
30 |  check_unidim(car, 1:5, 1:3)    
31 |  check_unidim(car, FALSE, 1:3)      
32 |  
33 |  check_unidim(v, TRUE, 1:3)  
34 |  check_unidim(v, 1:5, 1:3)    
35 |  check_unidim(v, FALSE, 1:3)      
36 | })
37 | 
38 | 
39 | test_that("large dim", { 
40 |  check_unidim(car, TRUE, 1:7)  
41 |  check_unidim(v, TRUE, 17)  
42 | })   
43 | 
44 | test_that("NA", {
45 |   db <- car[, 1:3]
46 |   db[1, 1] <- NA 
47 |   check_unidim(db, TRUE, TRUE)   
48 |   
49 |   db[1:100, 1] <- NA 
50 |   check_unidim(db, TRUE, TRUE)   
51 | }) 
52 | 
53 | test_that("random", {
54 |   (x = sample(0:1, 1e5, replace = T))
55 |   x <- data.frame(u = factor(x))
56 |   a <- table_cpp(x, colnames(x))
57 |   b <- table(x)
58 |   # don't know why table keeps x as name.
59 |   expect_equal(unname(a), unname(b))   
60 | }) 
61 | 
62 | test_that("ordinal", {
63 |   skip_if_not_installed("mlbench")
64 |   library(mlbench)
65 |   data(Soybean)
66 |   Soybean <- na.omit(Soybean)  
67 |   check_unidim(Soybean, TRUE, c('precip', 'Class'))
68 |   check_unidim(Soybean, TRUE, c('precip'))
69 | })


--------------------------------------------------------------------------------
/man/cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cv.R
 3 | \name{cv}
 4 | \alias{cv}
 5 | \title{Estimate predictive accuracy with stratified cross validation.}
 6 | \usage{
 7 | cv(x, dataset, k, dag = TRUE, mean = TRUE)
 8 | }
 9 | \arguments{
10 | \item{x}{List of \code{\link{bnc_bn}} or a single 
11 | \code{\link{bnc_bn}}. The classifiers to evaluate.}
12 | 
13 | \item{dataset}{The data frame on which to evaluate the classifiers.}
14 | 
15 | \item{k}{An integer. The number of folds.}
16 | 
17 | \item{dag}{A logical. Whether to learn structure on each training subsample. 
18 | Parameters are always learned.}
19 | 
20 | \item{mean}{A logical. Whether to return mean accuracy for each classifier or
21 | to return a k-row matrix with accuracies per fold.}
22 | }
23 | \value{
24 | A numeric vector of same length as \code{x}, giving the predictive
25 |   accuracy of each classifier. If \code{mean = FALSE} then a matrix with k
26 |   rows and a column per each classifier in \code{x}.
27 | }
28 | \description{
29 | Estimate predictive accuracy of a classifier with stratified cross 
30 | validation. It learns the models from the training subsamples by repeating 
31 | the learning procedures used to obtain \code{x}. It can keep the network 
32 | structure fixed and re-learn only the parameters, or re-learn both structure 
33 | and parameters.
34 | }
35 | \examples{
36 | data(car)
37 | nb <- bnc('nb', 'class', car, smooth = 1) 
38 | # CV a single classifier
39 | cv(nb, car, k = 10) 
40 | nb_manb <- bnc('nb', 'class', car, smooth = 1, manb_prior = 0.5) 
41 | cv(list(nb=nb, manb=nb_manb), car, k = 10)
42 | # Get accuracies on each fold
43 | cv(list(nb=nb, manb=nb_manb), car, k = 10, mean = FALSE)
44 | ode <- bnc('tan_cl', 'class', car, smooth = 1, dag_args = list(score = 'aic')) 
45 | # keep structure fixed accross training subsamples
46 | cv(ode, car, k = 10, dag = FALSE)
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data-input.R:
--------------------------------------------------------------------------------
 1 | context("Data input")
 2 | 
 3 | test_that("Check class in dataset", {
 4 |   expect_error(check_class_in_dataset(class = 2, voting))  
 5 |   expect_error(check_class_in_dataset(class = 'class', voting))
 6 |   check_class_in_dataset(class = 'Class', voting)
 7 |   expect_error(check_class_in_dataset(class = c('Class', 'Class'), voting), 
 8 |                "string")
 9 | })
10 | 
11 | test_that("Check dataset", {
12 |   check_dataset(car)
13 |   expect_error(check_dataset(as.matrix(car)))
14 |   tm <- cbind(car, class=car$class)
15 |   expect_error(check_dataset(tm), "unique")
16 |   tm <- car; colnames(tm)[1] <- NA
17 |   expect_error(check_dataset(tm),
18 |                "is_non_empty_complete(cnames) is not TRUE", fixed = TRUE)
19 |   tm <- car; tm[[1]] <- as.numeric(tm[[1]])
20 |   expect_error(check_dataset(tm), "factors")
21 | })
22 | 
23 | test_that("Get features", {
24 |   expect_error(get_features('class', voting), "disjoint")
25 |   ft <- get_features('Class', voting)  
26 |   expect_identical(ft, colnames(voting)[-17])
27 |   ft <- get_features('class', car)  
28 |   expect_identical(ft, colnames(car)[-7])
29 |   tm <- car; tm[[1]] <- as.numeric(tm[[1]])
30 |   expect_error(get_features('class', tm), "factors")
31 | })
32 | 
33 | test_that("check features", {
34 |   # Nominal 
35 |   check_features(letters[1:5], 'f')
36 |   check_features(NULL, 'f')
37 |   check_features(character(), 'f')
38 |   # Class in features
39 |   expect_error(check_features(letters[1:5], 'e'), 'class')
40 |   # Empty class
41 |   expect_error(check_features(character(), NULL), 'string')
42 | })
43 | 
44 | test_that("trim dataset", {
45 |   # Nominal
46 |   a <- trim_dataset('buying', car)
47 |   expect_identical(dim(a), c(nrow(car), 1L))
48 |   # Empty vars set 
49 |   expect_error(trim_dataset(character(), car), "complete")
50 |   # Integer vars
51 |   expect_error(trim_dataset(1, car), "character")
52 | })


--------------------------------------------------------------------------------
/R/anb-bn-operate.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | #' @rdname loglik 
 3 | AIC.bnc_bn <- function(object, ...) {
 4 |   ll <- logLik(object, ...)
 5 |   penalize_loglik(ll, k = 1)
 6 | }
 7 | #' @export
 8 | #' @rdname loglik 
 9 | BIC.bnc_bn <- function(object, ...) {
10 |   ll <- logLik(object, ...)
11 |   penalize_loglik(ll, k = log(nobs(ll)) / 2)
12 | }
13 | #' @export
14 | #' @rdname loglik 
15 | logLik.bnc_bn <- function(object, ...) {  
16 |   dataset <- list(...)[[1]]
17 |   if (is.null(dataset) || nrow(dataset) == 0) stop("Must provide data instances.")
18 |   loglik <- compute_ll(x = object, dataset = dataset)  
19 |   attr(loglik, "nobs") <- nrow(dataset)
20 |   if (inherits(object, "bnc_bn")) attr(loglik, "df") <- nparams(object)
21 |   class(loglik) <- "logLik"
22 |   loglik
23 | } 
24 | #' @export
25 | #' @rdname loglik 
26 | cLogLik <- function(object, ...) {   
27 |   dataset <- list(...)[[1]]
28 |   compute_cll(object, dataset)
29 | } 
30 | penalize_loglik <- function(ll, k) {
31 |   as.numeric(ll) - k * attr(ll, "df")
32 | }
33 | #' @export 
34 | #' @describeIn inspect_bnc_bn Returns the number of free parameters in the model.
35 | nparams <- function(x) {      
36 |   sum(vapply(params(x), count_cpt_free_params, FUN.VALUE = numeric(1)))
37 | }
38 | #' @export
39 | #' @describeIn inspect_bnc_bn Returns the posterior of each arc from the class
40 | #'   according to the MANB method.
41 | manb_arc_posterior <- function(x) {
42 |   stopifnot(inherits(x, "bnc_bn"))  
43 |   if (!is.null(x$.manb)) {
44 |     return(x$.manb)
45 |   }
46 |   warning("MANB arc posterior probabilities have not been computed for x.")
47 |   NULL
48 | }
49 | #' @export 
50 | #' @describeIn inspect_bnc_bn Returns the AWNB feature weights.
51 | awnb_weights <- function(x) {
52 |   stopifnot(inherits(x, "bnc_bn"))  
53 |   if (!is.null(x$.weights)) {
54 |     return(x$.weights)
55 |   }
56 |   warning("AWNB weights have not been computed for x.")
57 |   NULL
58 | }


--------------------------------------------------------------------------------
/tests/testthat/helper-common.R:
--------------------------------------------------------------------------------
 1 | # Common testing functions
 2 | 
 3 | # Smooth is always 0 so that grain does not produce 0 probabilities
 4 | 
 5 | nbvote <- function() {
 6 |   # data(voting, envir = parent.frame())
 7 |   lp(nb('Class', voting), voting, smooth = 1)
 8 | }
 9 | 
10 | nbvotecomp <- function() {
11 | #   data(voting, envir = parent.frame())
12 | #   v <- na.omit(voting)
13 |   # assign('v', v, envir = parent.frame())
14 |   lp(nb('Class', v), v, smooth = 1)
15 | }
16 | 
17 | nbcar <- function() {
18 |   # data(car, envir = parent.frame())
19 |   lp(nb('class', car), car, smooth = 1)
20 | }
21 | 
22 | nbcarp <- function(cardata) {
23 |   lp(nb('class', cardata), cardata, smooth = 1)
24 | }
25 | 
26 | nbcarclass <- function() {
27 |   lp(nb('class', car[, 'class', drop = FALSE]), car, smooth = 1)
28 | }
29 | 
30 | random_letters_db <- function(nlet = 6, nrow = 100) {
31 |   df <- replicate(nlet, random_letters_vector(nlet, nrow))
32 |   df <- as.data.frame(df, stringsAsFactors = TRUE)
33 |   colnames(df) <- letters[seq_len(nlet)]
34 |   df
35 | }
36 | 
37 | random_letters_vector <- function(nletters, n) {
38 |   sample(letters[1:nletters], n, replace = TRUE)
39 | }
40 | 
41 | # Creates a random augmented NB with class as class. 
42 | random_aug_nb_dag <- function(class, V, maxpar, wgt) {
43 |   dg <- gRbase::random_dag(V = V, maxpar = maxpar, wgt = wgt)
44 |   dg <- graphNEL2_graph_internal(dg)
45 |   superimpose_node(dag = dg, class)
46 | }
47 | 
48 | identical_non_call <- function(x, y) {
49 |   x$.call_struct <- y$.call_struct <- NULL
50 |   x$.call_bn <- y$.call_bn <- NULL 
51 |   expect_identical(x, y)
52 | }
53 | 
54 | test_dag <- function() {
55 |   edges <- graph_from_to_to_edges('A', 'B')
56 |   graph_internal(nodes = LETTERS[1:2], edges,  weights = NULL, edgemode = "directed") 
57 | } 
58 | 
59 | # Load data
60 | 
61 | data(car, envir = environment())
62 | data(voting, envir = environment())
63 | v <- na.omit(voting)
64 | alphadb <- random_letters_db()


--------------------------------------------------------------------------------
/R/predict.R:
--------------------------------------------------------------------------------
 1 | #'Predicts class labels or class posterior probability distributions.
 2 | #'
 3 | #'@details Ties are resolved randomly. Inference is much slower if
 4 | #'\code{newdata} contains \code{NA}s.
 5 | #'
 6 | #'@export
 7 | #'
 8 | #'@param object A \code{\link{bnc_bn}} object.
 9 | #'@param newdata A data frame containing observations whose class has to be 
10 | #'  predicted.
11 | #'@param prob A logical. Whether class posterior probability should be returned.
12 | #'@param ... Ignored.
13 | #'@return If \code{prob=FALSE}, then returns a length-\eqn{N} factor with the 
14 | #'  same levels as the class variable in \code{x}, where \eqn{N} is the number 
15 | #'  of rows in \code{newdata}. Each element is the most likely 
16 | #'  class for the corresponding row in \code{newdata}. If \code{prob=TRUE}, 
17 | #'  returns a \eqn{N} by \eqn{C} numeric matrix, where \eqn{C} is the number of 
18 | #'  classes; each row corresponds to the class posterior of the instance.
19 | #'  
20 | #'@examples 
21 | #'data(car)
22 | #'nb <- bnc('nb', 'class', car, smooth = 1)
23 | #'p <- predict(nb, car)
24 | #'head(p)
25 | #'p <- predict(nb, car, prob = TRUE)
26 | #'head(p)
27 | predict.bnc_fit <- function(object, newdata, prob = FALSE, ...) {      
28 |   pred <- compute_cp(x = object, dataset = newdata, ...)  
29 |   if (!prob) {
30 |     pred <- map(pred)
31 |   }
32 |   pred
33 | }
34 | #' Assigns instances to the most likely class.
35 | #' 
36 | #' Ties are resolved randomly.
37 | #' 
38 | #' @param pred A numeric matrix. Each row corresponds to class posterior 
39 | #'   probabilities for an instance.
40 | #' @return a factor with the same levels as the class variable.
41 | #' @keywords internal
42 | map <- function(pred) {
43 |   max_ind <- max.col(m = pred, ties.method = "random") 
44 |   classes <- colnames(pred)
45 |   stopifnot(is_non_empty_complete(classes))
46 |   predicted <- classes[max_ind]
47 |   #   Return a factor with the levels of the class variable
48 |   factor(predicted, levels = classes)
49 | }


--------------------------------------------------------------------------------
/man/inspect_bnc_dag.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/anb-dag.R,
 3 | %   R/bnc-dag-operate.R
 4 | \name{inspect_bnc_dag}
 5 | \alias{inspect_bnc_dag}
 6 | \alias{class_var}
 7 | \alias{features}
 8 | \alias{vars}
 9 | \alias{families}
10 | \alias{modelstring}
11 | \alias{feature_families}
12 | \alias{narcs}
13 | \alias{is_semi_naive}
14 | \alias{is_anb}
15 | \alias{is_nb}
16 | \alias{is_ode}
17 | \title{Inspect a Bayesian network classifier structure.}
18 | \usage{
19 | class_var(x)
20 | 
21 | features(x)
22 | 
23 | vars(x)
24 | 
25 | families(x)
26 | 
27 | modelstring(x)
28 | 
29 | feature_families(x)
30 | 
31 | narcs(x)
32 | 
33 | is_semi_naive(x)
34 | 
35 | is_anb(x)
36 | 
37 | is_nb(x)
38 | 
39 | is_ode(x)
40 | }
41 | \arguments{
42 | \item{x}{The \code{\link{bnc_dag}} object. The Bayesian network classifier
43 | structure.}
44 | }
45 | \description{
46 | Functions for inspecting a \code{\link{bnc_dag}} object.
47 | }
48 | \section{Functions}{
49 | \itemize{
50 | \item \code{class_var()}: Returns the class variable.
51 | 
52 | \item \code{features()}: Returns the features.
53 | 
54 | \item \code{vars()}: Returns all variables (i.e., features + class).
55 | 
56 | \item \code{families()}: Returns the family of each variable.
57 | 
58 | \item \code{modelstring()}: Returns the model string of the network in bnlearn format (adding a space in between two families).
59 | 
60 | \item \code{feature_families()}: Returns the family of each feature.
61 | 
62 | \item \code{narcs()}: Returns the number of arcs.
63 | 
64 | \item \code{is_semi_naive()}: Returns TRUE if \code{x} is a semi-naive Bayes.
65 | 
66 | \item \code{is_anb()}: Returns TRUE if \code{x} is an augmented naive Bayes.
67 | 
68 | \item \code{is_nb()}: Returns TRUE if \code{x} is a naive Bayes.
69 | 
70 | \item \code{is_ode()}: Returns TRUE if \code{x} is a one-dependence estimator.
71 | 
72 | }}
73 | \examples{
74 | data(car)
75 | nb <- bnc('nb', 'class', car, smooth = 1)
76 | narcs(nb)
77 | is_ode(nb)
78 | }
79 | 


--------------------------------------------------------------------------------
/R/data-input.R:
--------------------------------------------------------------------------------
 1 | # Checks it is a data frame, with all named unique factor columns.
 2 | check_dataset <- function(dataset) {  
 3 | #   Check dataset is a data frame  
 4 |   stopifnot(is.data.frame(dataset))  
 5 | #   Check every column has a unique name 
 6 |   cnames <- colnames(dataset)
 7 |   stopifnot(is_non_empty_complete(cnames), are_all_unique(cnames))
 8 | #   Make sure they are all factors 
 9 |   stopifnot(are_factors(dataset))
10 | }
11 | # Checks class is a length 1 character
12 | check_class <- function(class) { 	
13 | #   Check class is length 1 character   
14 |   stopifnot(assertthat::is.string(class))
15 | } 
16 | # Check the features are in correct format
17 | check_features <- function(features, class) {
18 |   # Check class 
19 |   check_class(class)
20 |   # Check features are NULL or character 
21 |   stopifnot(is.null(features) || is.character(features))
22 |   # Check class is not in features
23 |   stopifnot(are_disjoint(class, features))
24 |  }
25 | # Checks class is a length 1 character found in the dataset
26 | check_class_in_dataset <- function(class, dataset) {
27 |   check_class(class)
28 |   #   Check data set has unique column names
29 |   check_dataset(dataset)  
30 |   #   Check class in dataset
31 |   stopifnot(!are_disjoint(class, colnames(dataset)))  
32 | }
33 | # Gets the features in a data set
34 | get_features <- function(class, dataset) {
35 | #   Check class 
36 |   check_class_in_dataset(class, dataset)
37 | #   Return all column names other than class  
38 |   setdiff(colnames(dataset), class)
39 | }
40 | # Trim the dataset to the vars 
41 | trim_dataset <- function(vars, dataset) {
42 |   # Check dataset
43 |   check_dataset(dataset)
44 |   # Check vars are unique and non empty 
45 |   stopifnot(is_non_empty_complete(vars))
46 |   stopifnot(is.character(vars))
47 |   stopifnot(are_all_unique(vars))
48 |   # If the same return original dataset
49 |   if (is_perm(vars, colnames(dataset))) {
50 |     dataset
51 |   }
52 |   # Otherwise trim it
53 |   else {
54 |     dataset[ , vars, drop=FALSE]
55 |   }
56 | }


--------------------------------------------------------------------------------
/tests/testthat/test-wrap-mlr.R:
--------------------------------------------------------------------------------
 1 | context("mlr")
 2 | 
 3 | test_that("as mlr", {
 4 |   skip_on_cran()
 5 |   skip_if_not_installed('mlr') 
 6 |   skip("because it now fails on r-devel") 
 7 |   library(mlr)
 8 |   # Creates a learner just for fitting
 9 |   nf <- nbcar()
10 |   ml <- as_mlr(nf, dag = FALSE)
11 |   expect_identical(names(ml$par.vals$args), c('lp_fargs'))
12 |   # Creates a learner for structure learning and fitting
13 |   nf <- nbcar()
14 |   ml <- as_mlr(nf, dag = TRUE)
15 |   expect_identical(names(ml$par.vals$args), c('lp_fargs', 'dag_fargs'))
16 | })
17 | 
18 | test_that("train", {
19 |   skip_on_cran()
20 |   skip_if_not_installed('mlr')
21 |   skip("because it now fails on r-devel")
22 |   library(mlr)
23 |   # mlr needs to be loaded for train() to work; otherwise it will fail because
24 |   # it won't find learner options c("show.learner.output", "on.learner.error",
25 |   # "on.learner.warning"). To have it working without mlr loaded maybe I must
26 |   # specify these in as_mlr()
27 |   t <- mlr::makeClassifTask(id = "compare", data = car, target = 'class', 
28 |                             fixup.data = "no", check.data = FALSE)  
29 |   nf <- nbcar()
30 |   # Train just with fitting
31 |   ml <- as_mlr(nf, dag = FALSE)
32 |   mod = mlr::train(ml, t, subset = sample(nrow(car), 100))
33 |   # Train with structure learning and fitting
34 |   ml <- as_mlr(nf, dag = TRUE)
35 |   mod = mlr::train(ml, t, subset = sample(nrow(car), 100))
36 |   
37 |   detach('package:mlr')
38 | })
39 | 
40 | test_that("resample", {
41 |   skip_on_cran()
42 |   skip_if_not_installed('mlr')
43 |   skip("because it now fails on r-devel") 
44 |   library(mlr)
45 |   
46 |   ctrl = makeFeatSelControlSequential(alpha = 0, method = "sfs")
47 |   rdesc = makeResampleDesc(method = "Holdout")
48 |   ct <- mlr::makeClassifTask(id = "compare", data = car, target = 'class', 
49 |                             fixup.data = "no", check.data = FALSE)  
50 |   nf <- nbcar()
51 |   bnl <- as_mlr(nf, dag = TRUE)
52 |   sfeats = selectFeatures(learner = bnl, task = ct, resampling = rdesc,
53 |                           control = ctrl, show.info = FALSE)
54 |   sfeats$x
55 |   detach('package:mlr')
56 | })


--------------------------------------------------------------------------------
/tests/testthat/test-anb-bn-operate.R:
--------------------------------------------------------------------------------
 1 | context("bnc bn operate")
 2 | 
 3 | test_that("Free parameters", {    
 4 |   u <- nbcar()
 5 |   b <- nparams(x = u)
 6 |   expect_equal(b, 63)  
 7 |   
 8 |   u <- lp(nb('Class', voting), voting, smooth = 1)
 9 |   b <- nparams(u)
10 |   expect_equal(b, 2 * 16 + 1)
11 | })
12 | 
13 | test_that("logLik AIC BIC", {
14 |   nb <- lp(nb('class', car), car, smooth = 0)
15 |   ll <- logLik(nb, car)  
16 |   expect_true(inherits(ll, "logLik"))
17 |   expect_equal(attr(ll, "nobs"), nrow(car))
18 |   expect_equal(attr(ll, "df"), 63)
19 |   expect_equal(as.vector(ll), -13503.69, tolerance = 1e-6)
20 |   aic <- AIC(nb, car)
21 |   expect_equal(as.vector(aic), -13566.69, tolerance = 1e-6)
22 |   bic <- BIC(nb, car)    
23 |   expect_equal(as.vector(bic), -13738.51, tolerance = 1e-6)
24 | })
25 | 
26 | test_that("logLik AIC BIC as bnlearn", {
27 |   nb <- nb('class', car[, c('buying', 'class')])
28 |   nb <- lp(nb, car, smooth = 0)  
29 |   ll <- logLik(nb, car)  
30 |   aic <- AIC(nb, car)
31 |   bic <- BIC(nb, car)     
32 |   expect_equal(as.vector(ll), -3724.18038821656273)    
33 |   expect_equal( aic, -3739.18038821654636)  
34 |   expect_equal( bic, -3780.09078783677614)   
35 |   
36 |   nb <- lp(nb('class', car), car, smooth = 0)
37 |   ll <- logLik(nb, car)  
38 |   aic <- AIC(nb, car)
39 |   bic <- BIC(nb, car)     
40 |   expect_equal(as.vector(ll), -13503.6883427047687)
41 |   expect_equal( aic, -13566.6883427047687)
42 |   expect_equal( bic, -13738.5120211097346)
43 | })
44 | 
45 | test_that("manb_arc_posterior", {
46 |   a <- nbcar()  
47 |   expect_warning(manb_arc_posterior(a), "MANB arc posterior probabilities have not been computed for x.")
48 |   a <- lp(a, car, smooth = 1, manb_prior = 0.1)  
49 |   b <- manb_arc_posterior(a)
50 |   expect_equal(names(b), features(a))
51 |   expect_equal(as.vector(b["doors"]), 2.921702e-06, tolerance = 1e-7)
52 | })
53 | 
54 | test_that("awnb weights", {
55 |   a <- nbcar()  
56 |   expect_warning(awnb_weights(a), "AWNB weights have not been computed for x.")
57 |   a <- lp(a, car, smooth = 1, awnb_trees = 10)  
58 |   b <- awnb_weights(a)
59 |   expect_equal(names(b), features(a))
60 |   expect_true(are_probs(b))
61 | })


--------------------------------------------------------------------------------
/R/memoise.R:
--------------------------------------------------------------------------------
 1 | #' Memoise a function.
 2 | #' 
 3 | #' Based on Hadley Wickham's memoise package. Assumes that argument to f is a
 4 | #' character vector.
 5 | #' 
 6 | #' This function is a slightly modified version of
 7 | #' \code{memoise} to avoid the use of digest. The rest functions
 8 | #' copied as is from memoise.
 9 | #' 
10 | #' @keywords internal
11 | #' @author Hadley Wickham, Bojan Mihaljevic
12 | #' @param f a function
13 | memoise_char <- memoize <- function(f) {
14 |   cache <- new_cache()
15 |   memo_f <- function(x) {
16 |     hash <- paste0(x, collapse = ';')
17 |     # stopifnot(length(hash) == 1L)
18 |     if (cache$has_key(hash)) {
19 |       cache$get(hash)
20 |     } else {
21 |       res <- f(x)
22 |       cache$set(hash, res)
23 |       res
24 |     }
25 |   }
26 |   attr(memo_f, "memoised") <- TRUE
27 |   return(memo_f)
28 | }
29 | call_memoised_char <- function(x, cache) {
30 |   do.call(cache, list(x = x))
31 | }
32 | #' Forget a memoized function. 
33 | #' 
34 | #' @keywords internal
35 | #' @author Hadley Wickham
36 | forget <- function(f) {
37 |   if (!is.function(f)) return(FALSE)
38 |   
39 |   env <- environment(f)
40 |   if (!exists("cache", env, inherits = FALSE)) return(FALSE)
41 |   
42 |   cache <- get("cache", env)
43 |   cache$reset()
44 |   
45 |   TRUE
46 | }
47 | #' Is it memoized?
48 | #' 
49 | #' @keywords internal
50 | #' @author Hadley Wickham
51 | is.memoised <- is.memoized <- function(f) {
52 |   identical(attr(f, "memoised"), TRUE)
53 | }
54 | #' Make a new cache.
55 | #' 
56 | #' @keywords internal
57 | #' @author Hadley Wickham
58 | new_cache <- function() {
59 |   
60 |   cache <- NULL
61 |   cache_reset <- function() {
62 |     cache <<- new.env(TRUE, emptyenv())
63 |   }
64 |   
65 |   cache_set <- function(key, value) {
66 |     assign(key, value, envir = cache)
67 |   }
68 |   
69 |   cache_get <- function(key) {
70 |     get(key, envir = cache, inherits = FALSE)
71 |   }
72 |   
73 |   cache_has_key <- function(key) {
74 |     exists(key, envir = cache, inherits = FALSE)
75 |   }
76 |   
77 |   cache_reset()
78 |   list(
79 |     reset = cache_reset,
80 |     set = cache_set,
81 |     get = cache_get,
82 |     has_key = cache_has_key,
83 |     keys = function() ls(cache)
84 |   )
85 | }


--------------------------------------------------------------------------------
/tests/testthat/test-infer-anb-cpp.R:
--------------------------------------------------------------------------------
 1 | context("infer-anb-cpp") 
 2 | 
 3 | test_that("Missing features", {
 4 |   tn <- nbcar()
 5 |   expect_error(compute_joint(tn, car[, 1:2]), 
 6 |                "Some features missing from data set.")
 7 | })
 8 | 
 9 | test_that("Single predictor", {
10 |   tn <- lp(nb('class', car[, c(1,7)]), car, smooth = 0)
11 |   pt <- compute_joint(tn, car[, 1:2])
12 |   expect_identical(dim(pt), c(nrow(car), 4L))
13 | })
14 | 
15 | test_that("0 rows dataset", {
16 |   tn <- nbcar()
17 |   pt <- compute_joint(tn, car[FALSE, ])
18 |   expect_identical(dim(pt), c(0L, 4L))
19 | })
20 | 
21 | test_that("No features", {
22 |   nb <- bnc_dag(nb_dag('class', NULL), 'class')
23 |   nb <- lp(nb, car, smooth = 1)
24 |   pt <- compute_joint(nb, car)
25 |   expect_equal(as.vector(pt[1, ]), as.vector(log(params(nb)[['class']])))
26 |   
27 |   pt2 <- compute_joint(nb, car[, FALSE])
28 |   expect_equal(pt, pt2)
29 | })
30 | 
31 | test_that("Make CPT", { 
32 |  tn <- nbcar()
33 |  make_cpt_object(tn$.params$buying, class_var = class_var(tn)) 
34 |  # todo: need to somehow test this
35 | }) 
36 | 
37 | test_that("Data and unmodified", {  
38 |  tn <- lp(nb('class', car[ , 5:7]), car[ , 5:7], smooth = 1)
39 |  mod <- tn
40 |  db <- car
41 |  p <- predict(tn, db)
42 |  expect_equal(db, car)
43 |  expect_equal(db, car)
44 |  expect_equal(tn, mod)
45 | })    
46 | 
47 | test_that("Bug", {
48 |  skip("Local rdata file")
49 |  load('~/code/bnclassify-client/rdata/tmp-debug.rdata') 
50 |  gr <- candidate_dags[[7]] 
51 |  gr <- lp_implement(gr, .mem_cpts = train[[1]])
52 |  predict(gr, test[[1]])
53 |  compute_joint(gr, test[[1]])
54 |  compute_log_joint_complete(gr, test[[1]])
55 |  compute_anb_log_joint_per_class(gr, test[[1]])
56 |  exp(compute_anb_log_joint_per_class(gr, test[[1]]))
57 | }) 
58 | 
59 | test_that("cpt var values nominal", {
60 |   test_ind <- function() {
61 |     samp <-  function(n) {
62 |       sample(1:n, size = 1)
63 |     } 
64 |     dim <- c(samp(10), samp(10) , samp(10) )
65 |     index <- c(samp(dim[1]), samp(dim[2]), 1)
66 |     ind <- entry_index(index - 1, dim)
67 |     target <- arrayInd(ind + 1, dim)
68 |     expect_true(all(index == target))
69 |   }
70 |   for (i in 1:1e2 ) {
71 |    test_ind()
72 |   }
73 | })


--------------------------------------------------------------------------------
/tests/testthat/test-graph-internal.R:
--------------------------------------------------------------------------------
 1 | context("graph internal")
 2 |   
 3 | test_that("empty graph", { 
 4 |    a <- graph_internal()
 5 |    expect_equal(length(a$nodes), 0)
 6 |    expect_equal(length(graph_nodes(a)), 0)
 7 |    expect_is(a$edges, "matrix")
 8 |    expect_true(mode(a$edges) == "character")
 9 | })
10 |   
11 | test_that("get adjacent", {  
12 |   e <- graph_from_to_to_edges('a', 'b') 
13 |   a <- graph_internal(letters[1:5], e)
14 |   expect_equal(graph_get_adjacent("a", a), 'b')
15 |   expect_equal(graph_get_adjacent("b", a), 'a')
16 |   expect_equal(graph_get_adjacent("c", a), character())
17 |   expect_error(graph_get_adjacent("z", a))
18 | })
19 | 
20 | test_that("connected components", { 
21 |   x <- nbcar()  
22 |   g <- graph_connected_components(x$.dag)
23 |   expect_equal(length(g), 1)
24 |   
25 |   g <- graph_internal()
26 |   g <- graph_connected_components(g)
27 |   expect_null(g)  
28 | })
29 | 
30 | test_that("graph union", { 
31 |   edges <- graph_from_to_to_edges('A', 'B')
32 |   g <- graph_internal(LETTERS[1:3], edges = edges, edgemode = "directed" ); 
33 |   connected <- graph_connected_components(g) 
34 |   gs <- lapply(connected, subgraph, g)  
35 |   gu <- graph_union(gs)  
36 |   # Currently not identical because I am missing to set some attributes in graph_internal2graph_NEL 
37 |   # expect_identical(gu, g)
38 |   expect_identical(graph_nodes(gu), graph_nodes(g))
39 |   expect_equal(gu$edges, g$edges)
40 | })
41 | 
42 | test_that("Direct tree", {
43 |   g <- graph_internal()
44 |   e <- direct_tree(g)
45 |   expect_equal(e$edgemode, "directed")
46 |   # expect_equal(graph::ugraph(graph_internal2graph_NEL(e)), graph_internal2graph_NEL(g))
47 |   
48 |   gr <- pairwise_ode_score_contribs(class = 'class', dataset = car, score = 'loglik')
49 |   af <- max_weight_forest(gr)
50 |   d <- direct_tree(af)   
51 |   expect_equal(d$edgemode, "directed")
52 |   d <- direct_tree(af, 'maint')
53 |   expect_equal(d$edgemode, "directed")
54 |   d <- direct_tree(af, 'safety')
55 |   expect_equal(d$edgemode, "directed")
56 | })
57 | 
58 | test_that("node parents no parents", {  
59 |   g <- graph_internal()
60 |   expect_error(graph_node_parents('A', g))
61 |   
62 |   p <- graph_node_parents('A', graph_internal('A')) 
63 |   expect_equal(p, character())
64 | })
65 | 


--------------------------------------------------------------------------------
/R/learn-hc.R:
--------------------------------------------------------------------------------
 1 | # Ties are resolved randomly.
 2 | greedy_search <- function(class, to_include, init, step, dataset, epsilon, k, 
 3 |                           smooth, cache_reset = NULL) {
 4 |   stopifnot(is_nonnegative(epsilon))
 5 |   #  Init loop variables   
 6 |   scores_log <- numeric()  
 7 |   current_dag <- NULL
 8 |   current_score <- -Inf 
 9 |   candidate_dags <- list(init)
10 |   #   Get indices of training sets 
11 |   test_folds <- partition_dataset(dataset, class, k)
12 |   train <- lapply(test_folds, function(x) dataset[-x, , drop = FALSE])
13 |   train <- lapply(train, make_cpts_cache, smooth = smooth)
14 |   test <- lapply(test_folds, function(x) dataset[x, , drop = FALSE])
15 |   test <- lapply(test, make_evidence)
16 |   #   Start caches for training sets 
17 |   # TODO: smooth goes directly to cache. 
18 |   # skip asserts during greedy search
19 |   skip_env$skip_assert <- TRUE
20 |   while (length(candidate_dags) > 0) {
21 |     # if max accuracy then break
22 |     if (isTRUE(fast_equal(current_score, 1))) { break }
23 |     #     Score all candidate states
24 |     #     Update each candidate dag on the correct cache 
25 |     #     Get the prediction for each ddag
26 |     #     evaluate 
27 |     scores <- cv_lp_partition(candidate_dags, train, test)
28 |     #     Stop if it is not better than current_score 
29 |     if (!is_improvement(scores, current_score, epsilon)) break         
30 |     #     Make the best dag the current one
31 |     best_ind <- max_random(scores)
32 |     current_dag <- candidate_dags[[best_ind]]
33 |     current_score <- scores[[best_ind]]
34 |     scores_log <- c(scores_log, current_score)
35 |     if (!is.null(cache_reset)) {
36 |       if (length(scores_log) %% cache_reset == 0) lapply(train, forget)
37 |     }
38 |     # Generate all candidates from best state 
39 |     # ...tan_hcsp requires parameters dataset, smooth, and k; the rest do not
40 |     candidate_dags <- step(bnc_dag = current_dag,
41 |                            features_to_include = to_include, 
42 |                            train = train, test = test)
43 |   }      
44 |   # Turn assert back on  
45 |   skip_env$skip_assert <- FALSE
46 |   current_dag$.greedy_scores_log <- scores_log
47 |   current_dag
48 | }
49 | is_improvement <- function(new_scores, current_score, epsilon) {
50 |   (max(new_scores) - epsilon >= current_score) # TODO: Enable relative improvement
51 | }


--------------------------------------------------------------------------------
/vignettes/methods.bib:
--------------------------------------------------------------------------------
 1 | @Article{Efron1979,
 2 |   Title                    = {Bootstrap methods: Another look at the jackknife},
 3 |   Author                   = {Efron, Bradley},
 4 |   Journal                  = {The Annals of Statistics},
 5 |   Year                     = {1979},
 6 |   Number                   = {1},
 7 |   Pages                    = {1--26},
 8 |   Volume                   = {7},
 9 | 
10 |   Owner                    = {Bojan},
11 |   Publisher                = {JSTOR},
12 |   Timestamp                = {2015.09.04}
13 | }
14 | 
15 | @Article{Pigott2001,
16 |   Title                    = {A review of methods for missing data},
17 |   Author                   = {Pigott, Therese D},
18 |   Journal                  = {Educational research and evaluation},
19 |   Year                     = {2001},
20 |   Number                   = {4},
21 |   Pages                    = {353--383},
22 |   Volume                   = {7},
23 | 
24 |   Publisher                = {Taylor \& Francis}
25 | }
26 | 
27 | @Article{Wei2011,
28 |   Title                    = {The application of naive {B}ayes model averaging to predict {A}lzheimer's disease from genome-wide data},
29 |   Author                   = {Wei, Wei and Visweswaran, Shyam and Cooper, Gregory F},
30 |   Journal                  = {Journal of the American Medical Informatics Association},
31 |   Year                     = {2011},
32 |   Number                   = {4},
33 |   Pages                    = {370--375},
34 |   Volume                   = {18},
35 | 
36 |   Owner                    = {Bojan},
37 |   Publisher                = {The Oxford University Press},
38 |   Timestamp                = {2015.09.10}
39 | }
40 | 
41 | @article{Zhu1997algorithm,
42 |   title={Algorithm 778: {L-BFGS-B}: {Fortran} subroutines for large-scale bound-constrained optimization},
43 |   author={Zhu, Ciyou and Byrd, Richard H and Lu, Peihuang and Nocedal, Jorge},
44 |   journal={ACM Transactions on Mathematical Software (TOMS)},
45 |   volume={23},
46 |   number={4},
47 |   pages={550--560},
48 |   year={1997},
49 |   publisher={ACM}
50 | }
51 | 
52 | @Manual{Gentleman2015,
53 |   Title                    = {graph: A package to handle graph data structures},
54 |   Author                   = {R. Gentleman and Elizabeth Whalen and W. Huber and S. Falcon},
55 |   Note                     = {R package version 1.46.0},
56 |   Year                     = {2015}
57 | }
58 | 


--------------------------------------------------------------------------------
/man/greedy_wrapper.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/0bnclassify-doc.R, R/learn-struct.R
 3 | \name{greedy_wrapper}
 4 | \alias{greedy_wrapper}
 5 | \alias{fssj}
 6 | \alias{bsej}
 7 | \alias{tan_hc}
 8 | \alias{kdb}
 9 | \alias{tan_hcsp}
10 | \title{Learn Bayesian network classifiers in a a greedy wrapper fashion.}
11 | \usage{
12 | fssj(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL)
13 | 
14 | bsej(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL)
15 | 
16 | tan_hc(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL)
17 | 
18 | kdb(
19 |   class,
20 |   dataset,
21 |   k,
22 |   kdbk = 2,
23 |   epsilon = 0.01,
24 |   smooth = 0,
25 |   cache_reset = NULL
26 | )
27 | 
28 | tan_hcsp(class, dataset, k, epsilon = 0.01, smooth = 0, cache_reset = NULL)
29 | }
30 | \arguments{
31 | \item{class}{A character. Name of the class variable.}
32 | 
33 | \item{dataset}{The data frame from which to learn the classifier.}
34 | 
35 | \item{k}{An integer. The number of folds.}
36 | 
37 | \item{epsilon}{A numeric. Minimum absolute improvement in accuracy required 
38 | to keep searching.}
39 | 
40 | \item{smooth}{A numeric. The smoothing value (\eqn{\alpha}) for Bayesian 
41 | parameter estimation. Nonnegative.}
42 | 
43 | \item{cache_reset}{A numeric. Number of iterations after which to reset the 
44 | cache of conditional probability tables. A small number reduces the amount
45 | of memory used. \code{NULL} means the cache is never reset (the default).}
46 | 
47 | \item{kdbk}{An integer. The maximum number of feature parents per feature.}
48 | }
49 | \value{
50 | A \code{\link{bnc_dag}} object.
51 | }
52 | \description{
53 | Greedy wrapper algorithms for learning Bayesian network classifiers. All 
54 | algorithms use cross-validated estimate of predictive accuracy to evaluate 
55 | candidate structures.
56 | }
57 | \examples{
58 | data(car)
59 | tanhc <- tan_hc('class', car, k = 5, epsilon = 0)  
60 | \dontrun{plot(tanhc)}
61 |   
62 | }
63 | \references{
64 | Pazzani M (1996). Constructive induction of Cartesian product 
65 |   attributes. In \emph{Proceedings of the Information, Statistics and 
66 |   Induction in Science Conference (ISIS-1996)}, pp. 66-77
67 |   
68 |   Koegh E and Pazzani M (2002).Learning the structure of augmented Bayesian 
69 |   classifiers. In \emph{International Journal on Artificial Intelligence 
70 |   Tools}, \bold{11}(4), pp. 587-601.
71 | }
72 | 


--------------------------------------------------------------------------------
/R/frontend-anb.r:
--------------------------------------------------------------------------------
 1 | 
 2 | #' Direct an undirected graph.
 3 | #' 
 4 | #' Starting from a \code{root} not, directs all arcs away from it and applies 
 5 | #' the same, recursively to its children and descendants. Produces a directed
 6 | #' forest.
 7 | #' 
 8 | #' @param g An undirected graph.
 9 | #' @param root A character. Optional tree root.
10 | #' @return A directed graph 
11 | #' @keywords internal
12 | direct_forest <- function(g, root = NULL) {    
13 |   graph_direct_forest(g, root = NULL)
14 | }
15 | #' Direct an undirected graph.
16 | #' 
17 | #' The graph must be connected and the function produces a directed tree. 
18 | #' @return A graph. The directed tree.
19 | #' @keywords internal
20 | direct_tree <- function(g, root = NULL) { 
21 |   graph_direct_tree(g, root) 
22 | }
23 | direct_graph <- function(g) { 
24 |   graph_direct(g)
25 | }
26 | #' Returns the undirected augmenting forest.
27 | #' 
28 | #' Uses Kruskal's algorithm to find the augmenting forest that maximizes the sum
29 | #' of pairwise weights. When the weights are class-conditional mutual
30 | #' information this forest maximizes the likelihood of the tree-augmented naive
31 | #' Bayes network.
32 | #' 
33 | #' If \code{g} is not connected than this will return a forest; otherwise it is 
34 | #' a tree.
35 | #' 
36 | #' @param g A graph. The undirected graph with pairwise 
37 | #'   weights.
38 | #' @return A graph. The maximum spanning forest.
39 | #' @references Friedman N, Geiger D and Goldszmidt M (1997). Bayesian network 
40 | #'   classifiers. \emph{Machine Learning}, \bold{29}, pp. 131--163.
41 | #'   
42 | #'   Murphy KP (2012). \emph{Machine learning: a probabilistic perspective}. The
43 | #'   MIT Press. pp. 912-914.
44 | #' @keywords internal
45 | max_weight_forest <- function(g) {           
46 |   graph_max_weight_forest(g)
47 | }
48 | #' Merges multiple disjoint graphs into a single one.
49 | #' 
50 | #' @param g A graph
51 | #' @return A graph
52 | #' @keywords internal
53 | graph_union <- function(g) { 
54 |   graph_internal_union(g)
55 | }
56 | # Adds a node to DAG as root and parent of all nodes.
57 | superimpose_node <- function(dag, node) {  
58 |   graph_superimpose_node(dag, node) 
59 | }
60 | is_dag_graph <- function(dag) {  
61 |   graph_is_dag(dag)
62 | } 
63 | check_node <- function(node) {
64 |   stopifnot(assertthat::is.string(node))
65 | }
66 | #' Returns a naive Bayes structure
67 | #' 
68 | #' @keywords internal
69 | nb_dag <- function(class, features) { 
70 |  anb_make_nb(class, features)  
71 | }


--------------------------------------------------------------------------------
/tests/testthat/test-hc-fssj.R:
--------------------------------------------------------------------------------
 1 | context("HC fssj") 
 2 | 
 3 | test_that("Expand supernode nominal", {
 4 |   a <- nbcarp(car[, 2:7])
 5 |   s <- not_cci(a)
 6 |   ex <- augment_supernodes(new_node = 'buying', supernodes = s, bnc_dag = a)
 7 |   expect_equal(length(ex), 5)
 8 |   maint <- ex[[1]]
 9 |   expect_equal(graph_num_arcs(dag(maint)), 7)
10 |   expect_equal(families(maint)[['buying']], c('buying', 'maint', 'class'))
11 |   
12 |   a <- nbcar()
13 |   expect_error(augment_supernodes(new_node = 'safety', 
14 |                                  supernodes = s, bnc_dag = a), 'already')
15 | })
16 | 
17 | test_that("includes_by_joins nominal", {
18 |   a <- nbcarp(car[, 5:7])
19 |   # All possible including models
20 |   # debugonce(includes_by_joins)
21 |   cands <- includes_in_supernodes(a, colnames(car)[-7])
22 |   expect_equal(length(cands), 4 * 2)
23 |   buying <- cands[[1]]
24 |   expect_equal(families(buying)[['buying']], 
25 |                c('buying', 'lug_boot', 'class'))
26 | })
27 | 
28 | test_that("includes", {
29 |   nb <- nb('Class')
30 |   a <- includes(nb, c('crime','immigration'))
31 |   expect_equal(length(a), 2)
32 |   expect_equal(features(a[[1]]), 'crime')  
33 |   expect_equal(features(a[[2]]), 'immigration')  
34 | })
35 | 
36 | test_that("includes no features", {  
37 |   nb <- nbcar()
38 |   a <- includes(nb, character())
39 |   expect_equal(length(a), 0)
40 | })
41 | 
42 | test_that("augment supernodes", {
43 |   nb <- nb('class', NULL)
44 |   e <- includes_in_supernodes(nb, NULL)
45 |   expect_equal(length(e), 0)
46 |   
47 |   nb <- nb('class', NULL)
48 |   e <- includes_in_supernodes(nb, 'doors')
49 |   expect_equal(length(e), 0)  
50 |   
51 |   nb <- nb('class', NULL)
52 |   e <- includes_in_supernodes(nb, c('doors', 'safety'))
53 |   expect_equal(length(e), 0)
54 |   
55 |   nb <- nb('class', features = 'maint')
56 |   e <- includes_in_supernodes(nb, c('doors', 'safety'))
57 |   expect_equal(length(e), 2)
58 |   
59 |   nb <- nb('class', features = c('maint', 'buying'))
60 |   e <- includes_in_supernodes(nb, c('doors', 'safety'))
61 |   expect_equal(length(e), 4)
62 |   
63 |   nb <- nb('class', features = c('maint', 'buying'))
64 |   e <- includes_in_supernodes(nb, c('doors', 'safety', 'lug_boot'))
65 |   expect_equal(length(e), 6)  
66 |   
67 |   nb <- nb('class', features = c('maint', 'buying', 'persons'))
68 |   e <- includes_in_supernodes(nb, c('doors', 'safety', 'lug_boot'))
69 |   expect_equal(length(e), 9)  
70 |   
71 |   f <- includes_in_supernodes(e[[1]], c('doors', 'safety', 'lug_boot'))
72 |   expect_equal(length(f), 6)
73 | })


--------------------------------------------------------------------------------
/R/learn-params-manb.R:
--------------------------------------------------------------------------------
 1 | # Computes MANB local arc C->X posterior probabilities and uses them to compute
 2 | # the MANB CTPs. 
 3 | # Formula for posterior assumes that smooth = 1. 
 4 | #  @param manb_prior Currently ignored.
 5 | # @return Returns a vector of probabilities of same length of ctgts. 
 6 | compute_manb_arc_posteriors <- function(x, ctgts, smooth, prior = 0.5) {
 7 |   if (!is_nb(x)) stop("MANB can only be applied to naive Bayes.")
 8 |   stopifnot(smooth > 0)
 9 |   stopifnot(identical(features(x), names(ctgts)))
10 |   vapply(ctgts, compute_manb_arc_posterior, smooth = smooth, prior = prior, 
11 |          FUN.VALUE = numeric(1))
12 | }
13 | # Assuming class is the last dimension.
14 | # Assuming that all instances are labelled
15 | # Returns a singe probability. 
16 | compute_manb_arc_posterior <- function(nijk, smooth, prior) {
17 |   #  If not 2D then it is not a naive Bayes
18 |   stopifnot(length(dim(nijk)) == 2, are_probs(prior))
19 |   r <- nrow(nijk)
20 |   rt <- ncol(nijk)
21 |   nij <- colSums(nijk)
22 |   nik <- rowSums(nijk)
23 |   ni <- sum(nijk)
24 |   # P(D | C -> X)
25 |   lpa <- rt * lgamma(r * smooth) - sum(lgamma(r * smooth + nij)) + sum(lgamma(smooth + nijk) - lgamma(smooth)) 
26 |   # P(D | C ... X)
27 |   lpna <- lgamma(r*smooth) - lgamma(r*smooth + ni) + sum(lgamma(smooth + nik) - lgamma(smooth))  
28 |   # P(C -> X)
29 |   denom <- matrixStats::logSumExp(c(lpna, lpa))
30 |   lpa_post <- lpa - denom
31 |   # P(C ... X)  
32 |   lpna_post <- lpna - denom
33 |   # Check valid probs
34 |   stopifnot(all.equal(exp(lpa_post) + exp(lpna_post), 1))
35 |   exp(lpa_post)
36 |   # Consider priors: 
37 |   lpa_prior <- log(prior) 
38 |   lnpa_prior <- log(1 - prior) 
39 |   lpa_num <- lpa + lpa_prior
40 |   lpna_num <- lpna + lnpa_prior
41 |   denom <- matrixStats::logSumExp(c(lpa_num, lpna_num))
42 |   lpa_post <- lpa_num - denom
43 |   lpna_post <- lpna_num - denom
44 |   exp(lpa_post) + exp(lpna_post)
45 |   exp(lpa_post)
46 | }
47 | # @param nijk A contingency table of X and C
48 | compute_manb_cpt <- function(nijk, prob_arc, smooth) {
49 |   stopifnot(length(dim(nijk)) == 2, are_probs(prob_arc), length(prob_arc) == 1)
50 |   lnijk <- log(ctgt2cpt(nijk, smooth = smooth))
51 |   lp_arc <- log(prob_arc)
52 |   arc <- lp_arc +  lnijk
53 |   # no arc
54 |   lni <- rowSums(nijk)
55 |   lni[] <- log(normalize(lni + smooth)) 
56 |   lp_noarc <- log(1 - prob_arc)
57 |   no_arc <- lp_noarc + lni
58 |   rt <- ncol(nijk)
59 |   no_arc <- rep(no_arc, rt)
60 |   entries <- matrix(c(as.numeric(arc), as.numeric(no_arc)), ncol = 2)
61 |   arc[] <- matrixStats::rowLogSumExps(entries)
62 |   stopifnot(identical(dimnames(arc), dimnames(nijk)))
63 |   exp(arc)
64 | }	


--------------------------------------------------------------------------------
/src/table.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h>
 2 | using namespace Rcpp;    
 3 | 
 4 | // [[Rcpp::export]]  
 5 | Rcpp::IntegerVector tabulate_cpp(const Rcpp::IntegerVector & v, R_xlen_t nlevels) {
 6 |   // Using R_xlen_t to avoid checking for entries < 0
 7 |   std::vector<R_xlen_t> table(nlevels);   
 8 |   R_xlen_t n =  v.size();
 9 |   for (R_xlen_t i = 0; i < n; ++i) { 
10 |     table.at( v.at(i) - 1 ) ++;
11 |   }    
12 |   // Wrapp may throw errors with R_xlen_t
13 |   // return wrap(table); 
14 |   IntegerVector  a(table.size());
15 |   std::copy(table.begin(), table.end(), a.begin());
16 |   return a;
17 | }
18 | 
19 | // Based on table()  
20 | // dataframe {
21 | //   for each column get the num of dims.tfm 
22 | //   get size of the resulting talbe 
23 | //   finally tabulate by the bins meaning how many are there 
24 | //   each value will correspond to its index in the dim array.
25 | //   this is just indexing by a set of values, then you go to there and find it. 
26 | // }
27 | // [[Rcpp::export]]
28 | Rcpp::IntegerVector table_cpp(const RObject & input, const RObject & columns) { 
29 |   if(!is<DataFrame>(input)) stop("Must be a data frame.");
30 |   DataFrame data = as<DataFrame>(input);  
31 |   if(!is<CharacterVector>(columns)) stop("Must be character vector."); 
32 |   CharacterVector cols = as<CharacterVector>(columns);  
33 |   data = data[cols];
34 |   
35 |   const R_xlen_t ncols = data.ncol();  
36 |   if (ncols == 0) stop("No columns in data frame.");  
37 |   const IntegerVector & column = data.at(0);
38 |   // There is a single entry for each row
39 |   IntegerVector to_tabulate = no_init(column.size());
40 |   to_tabulate.fill(1);
41 |   // The product of dimensions.
42 |   R_xlen_t pd = 1;
43 |   IntegerVector  dims(ncols);
44 |   List  dimnames(ncols);  
45 |   dimnames.names() = data.names();
46 |   
47 |   for (R_xlen_t i = 0; i < ncols; i++) {
48 |     const IntegerVector & a = data.at(i);  
49 |     if(!Rf_isFactor(a)) stop("Not a factor."); 
50 |     const CharacterVector & factorLevels = a.attr("levels"); 
51 |     R_xlen_t nl = factorLevels.size();
52 |     to_tabulate = to_tabulate + pd * (a - 1L);
53 |     pd = pd * nl ; 
54 |     dims.at(i) = nl; 
55 |     dimnames.at(i) = factorLevels;
56 |   }    
57 |   
58 |   to_tabulate = na_omit(to_tabulate);
59 |   IntegerVector tbl = tabulate_cpp(to_tabulate, pd);
60 |   tbl.attr("dim") =  dims;
61 |   tbl.attr("dimnames") =  dimnames;
62 |   tbl.attr("class") =  "table";
63 |   
64 |   return tbl;
65 | }
66 | 
67 | 
68 | /*** R 
69 | kr <- foreign::read.arff('~/gd/phd/code/works-aug-semi-bayes/data/original/kr-vs-kp.arff')
70 | dbor <- kr
71 | fd <- dbor[, 1:3]
72 | microbenchmark::microbenchmark( a <- table_cpp(fd), tbl <- table(fd)   )
73 | */
74 | 


--------------------------------------------------------------------------------
/tests/testthat/test-learn-struct.R:
--------------------------------------------------------------------------------
 1 | context("Learn struct")
 2 | 
 3 | test_that("nb  Nominal", {
 4 |   n <- nb('class', car)
 5 |   expect_identical('class', class_var(n))
 6 |   expect_equal(graph_num_arcs(dag(n)), 6) 
 7 | })
 8 | 
 9 | test_that("nb   Class not in dataset", {
10 |   expect_error(nb('Class', car), 'disjoint')
11 | })
12 | 
13 | test_that("nb No features ", {
14 |   n <- nb('class', car[, 7, drop = FALSE])
15 |   expect_equal(graph_num_arcs(dag(n)), 0)
16 |   # Not numeric dataset
17 | })
18 | 
19 | test_that("fssj nominal", {
20 |   skip_on_cran() 
21 |   suppressWarnings(RNGversion("3.5.0"))
22 |   set.seed(0)
23 |   f <- fssj('class', dataset = car, k = 10, epsilon = 0.01)
24 |   expect_equal(features(f), character()) 
25 |   suppressWarnings(RNGversion("3.5.0"))
26 |   set.seed(0)
27 |   f <- fssj('class', dataset = car, k = 10, epsilon = 0)
28 |   expect_true(is_perm(features(f), colnames(car)[-7]))
29 | })
30 | 
31 | test_that("bsej nominal", {
32 |   skip_on_cran() 
33 |   suppressWarnings(RNGversion("3.5.0"))
34 |   set.seed(0)
35 |   f <- bsej('class', dataset = car, k = 10, epsilon = 0.01)
36 |   expect_equal(features(f), colnames(car)[-7])
37 |   expect_equal(narcs(f), 6 + 1 + 2 + 3)
38 |   expect_equal(length(f$.greedy_scores_log), 4)
39 |   expect_equal(f$.greedy_scores_log, sort(f$.greedy_scores_log)) 
40 |   suppressWarnings(RNGversion("3.5.0"))
41 |   set.seed(0)
42 |   f <- bsej('class', dataset = car, k = 10, epsilon = 0)
43 |   expect_equal(features(f), colnames(car)[-7])
44 |   expect_equal(narcs(f), 13)
45 |   expect_equal(length(f$.greedy_scores_log), 5)
46 |   expect_equal(f$.greedy_scores_log, sort(f$.greedy_scores_log))
47 | })
48 | 
49 | test_that("tan_hc nominal", {
50 |   skip_on_cran() 
51 |   suppressWarnings(RNGversion("3.5.0"))
52 |   set.seed(0)
53 |   t <- tan_hc('class', dataset = car, k = 10, epsilon = 0)
54 |   expect_equal(length(features(t)), 6)
55 |   expect_equal(narcs(t), 11)
56 | })
57 | 
58 | test_that("tanhc sp nominal", {
59 |   skip_on_cran() 
60 |   suppressWarnings(RNGversion("3.5.0"))
61 |   set.seed(0)
62 |   t <- tan_hcsp('class', dataset = car, k = 10, epsilon = 0)
63 |   expect_equal(length(features(t)), 6)
64 |   expect_equal(narcs(t), 11)
65 |   nfams <- sapply(families(t), length)
66 |   expect_true(max(nfams) < 5) 
67 | }) 
68 | 
69 | test_that("kdb nominal", {
70 |   skip_on_cran() 
71 |   suppressWarnings(RNGversion("3.5.0"))
72 |   set.seed(0)
73 |   t <- kdb('class', dataset = car, kdb = 1, k = 10, epsilon = 0) 
74 |   suppressWarnings(RNGversion("3.5.0"))
75 |   set.seed(0)
76 |   to <- tan_hc('class', dataset = car, k = 10, epsilon = 0)
77 |   expect_false(isTRUE(all.equal(t, to)))
78 |   t$.call_struct <- NULL
79 |   to$.call_struct <- NULL
80 |   expect_true(isTRUE(all.equal(t, to)))
81 | })  


--------------------------------------------------------------------------------
/R/basic-assert.R:
--------------------------------------------------------------------------------
 1 | skip_env <- new.env(parent = emptyenv()) 
 2 | skip_env$skip_assert <- FALSE
 3 | skip_env$skip_testing <- TRUE 
 4 | #' Whether to do checks or not. Set TRUE to speed up debugging or building. 
 5 | #' @keywords  internal
 6 | skip_assert <- function() {
 7 |   skip_env$skip_assert 
 8 | } 
 9 | #' Skip while testing to isolate errors
10 | #' @keywords  internal
11 | skip_testing <- function() {
12 |   skip_env$skip_testing
13 | }
14 | # Tests whether two character vectors are identical when sorted
15 | is_perm <- function(x, y) {
16 |   is.character(x) && identical(sort(x), sort(y))
17 | }
18 | # use this e.g., in (I think) check_dataset
19 | is_non_empty_complete <- function(x) {
20 |   (length(x) > 0)  && (!anyNA(x))
21 | }
22 | # Compares two numerics ignoring attributes and class
23 | equivalent_num <- function(x, y) {
24 |   stopifnot(is.numeric(x), is.numeric(y))
25 |   # Check.attributes = FALSE does not ignore class so remove it here
26 |   class(x) <- NULL
27 |   class(y) <- NULL
28 |   all.equal(x, y, check.attributes = FALSE)
29 | } 
30 | is_subset <- function(x, y) {
31 |   all(x %in% y)
32 | }
33 | is_positive <- function(x) {
34 |   assertthat::is.number(x) && (x > 0)
35 | }
36 | is_nonnegative <- function(x) {
37 |   assertthat::is.number(x) && (x >= 0)
38 | }
39 | is_last <- function(element, vector) { 
40 |   # Basically does not work for numerics due to identical
41 |   identical(element, get_last(vector))
42 | }
43 | is_at_pos <- function(element, position, vector) {
44 |   # Basically does not work for numerics due to identical
45 |   identical(get_null_safe(vector, position), element)
46 | }
47 | are_all_equal <- function(x) {
48 |   length(unique(x)) == 1L
49 | }
50 | # use this e.g., in  check_dataset
51 | are_all_unique <- function(x) {	
52 | 	length(x) == length(unique(x))
53 | }
54 | #' Checks if all columns in a data frame are factors.
55 | #' 
56 | #' @param x a \code{data.frame} 
57 | #' @keywords internal
58 | are_factors <- function(x) {
59 |   #   If x is not data.frame stop
60 |   stopifnot(is.data.frame(x))
61 |   all(vapply(x, is.factor, FUN.VALUE = logical(1)))
62 | }
63 | is_just <- function(x, class) {
64 |   identical(class(x), class)
65 | }
66 | ## Checks
67 | # Checks the object is non-empty and has no NA's
68 | check_non_empty_complete <- function(x) {
69 |   stopifnot(is_non_empty_complete(x))
70 | }
71 | are_complete_dimnames <- function(x) {
72 |   if (skip_assert( )) return (TRUE)
73 |   # Check x has non empty comlete dimames
74 |   dnames <- dimnames(x)
75 |   if (!is_non_empty_complete(dnames)) return (FALSE)
76 |   # Check dimnames have non-empty complete names
77 |   if (!is_non_empty_complete(names(dnames))) return (FALSE)
78 |   # Check each dimension is non-empty complete
79 |   all(vapply(dnames, is_non_empty_complete, FUN.VALUE = logical(1)))
80 | }


--------------------------------------------------------------------------------
/tests/testthat/test-bnc-dag-operate.R:
--------------------------------------------------------------------------------
  1 | context("bnc dag operate")
  2 | 
  3 | test_that("not cci nominal naive Bayes", {
  4 |   nb <- nbcar()
  5 |   s <- not_cci(nb)
  6 |   expect_equal(unlist(s, use.names = FALSE), colnames(car)[1:6])
  7 | })
  8 | 
  9 | test_that("not cci nominal TAN", {
 10 |   tn <- lp(chowliu('class', car), dataset = car, smooth = 1)
 11 |   s <- not_cci(tn)
 12 |   expect_true(is.list(s))
 13 |   expect_equal(length(s), 1)  
 14 |   expect_equal(length(s[[1]]), length(features(tn))) 
 15 |   
 16 | })
 17 | 
 18 | test_that("not cci no features", {
 19 |   nb <- nbcarclass()
 20 |   s <- not_cci(nb)
 21 |   expect_null(s)
 22 | })
 23 | 
 24 | test_that("is supernode single node", {
 25 |   nb <- nbcar()
 26 |   expect_true(is_supernode('safety', nb))
 27 | })
 28 | 
 29 | test_that("is supernode two nodes not supernode", {
 30 |   nb <- nbcar()
 31 |   expect_true(!is_supernode(c('safety', 'buying'), nb))
 32 | })
 33 | 
 34 | test_that("is supernode two nodes supernode", {
 35 |   t <- tan_cl('class', car)
 36 |   expect_true(is_supernode(c('safety', 'buying'), t))
 37 | })
 38 | 
 39 | test_that("is supernode class var", {
 40 |   nb <- nbcarclass()
 41 |   expect_error(is_supernode('class', nb), "class")
 42 | })
 43 | 
 44 | test_that("is_semi_naive just class", {
 45 |   nb <- nbcarclass()
 46 |   expect_true(is_semi_naive(nb))
 47 | })
 48 | 
 49 | test_that("is_semi_naive naive Bayes", {
 50 |   nb <- nbcar()
 51 |   expect_true(is_semi_naive(nb))
 52 | })
 53 | 
 54 | test_that("include node nominal", {
 55 |   nb <- nbcarclass()
 56 |   a <- add_feature('safety', nb)
 57 |   expect_equal(features(a), 'safety')
 58 | })
 59 | 
 60 | 
 61 | 
 62 | test_that("include node multiple nodes", {
 63 |   nb <- nbcarclass()
 64 |   expect_error(add_feature(c('safety', 'doors'), nb), "string")
 65 | })
 66 | 
 67 | test_that("include node already included", {
 68 |   nb <- nbcar()
 69 |   expect_error(add_feature('safety', nb)  , "already")
 70 | })
 71 | 
 72 | test_that("remove feature nominal", {
 73 |   nb <- nbcar()
 74 |   rnb <- remove_feature('safety', nb)
 75 |   expect_equal(features(rnb), colnames(car)[1:5])
 76 | })
 77 | 
 78 | test_that("remove feature not in graph", {
 79 |   nb <- nbcarclass()
 80 |   expect_error(remove_feature('safety', nb), "not in")
 81 | })
 82 | 
 83 | test_that("feature orphans nominal", {
 84 |   nb <- nbcar()
 85 |   o <- feature_orphans(nb)
 86 |   expect_equal(o, features(nb))
 87 | })
 88 | 
 89 | test_that("feature orphans no features", {
 90 |   nb <- nbcarclass()
 91 |   o <- feature_orphans(nb)
 92 |   expect_null(o)
 93 | })
 94 | 
 95 | test_that("feature orphans ode", {
 96 |   nb <- nbcarp(car[, 5:7])
 97 |   nb <- add_feature_parents('lug_boot', 'safety', nb)  
 98 |   o <- feature_orphans(nb)
 99 |   expect_equal(o, 'lug_boot')
100 | })
101 | 
102 | 


--------------------------------------------------------------------------------
/tests/testthat/test-infer-anb.R:
--------------------------------------------------------------------------------
 1 | context("infer anb")
 2 | 
 3 | test_that("Nominal", {
 4 |   tn <- nbcar()
 5 |   a <- compute_joint(tn, car)
 6 |   expect_identical(colnames(a), levels(car$class))
 7 | })
 8 | 
 9 | test_that("Missing features", {
10 |   tn <- nbcar()
11 |   expect_error(compute_joint(tn, car[, 1:2]), 
12 |                "Some features missing from data set.")
13 | })
14 | 
15 | test_that("Single predictor", {
16 |   tn <- lp(nb('class', car[, c(1,7)]), car, smooth = 0)
17 |   pt <- compute_joint(tn, car[, 1:2])
18 |   expect_identical(dim(pt), c(nrow(car), 4L))
19 | })
20 | 
21 | test_that("0 rows dataset", {
22 |   tn <- nbcar()
23 |   pt <- compute_joint(tn, car[FALSE, ])
24 |   expect_identical(dim(pt), c(0L, 4L))
25 | })
26 | 
27 | test_that("No features", {
28 |   nb <- bnc_dag(nb_dag('class', NULL), 'class')
29 |   nb <- lp(nb, car, smooth = 1)
30 |   pt <- compute_joint(nb, car)
31 |   expect_equal(as.vector(pt[1, ]), as.vector(log(params(nb)[['class']])))
32 |   
33 |   pt2 <- compute_joint(nb, car[, FALSE])
34 |   expect_equal(pt, pt2)
35 | })  
36 |  
37 | test_that("matches grain", {
38 |   
39 |   # gRain implementation change
40 |   # skip_on_cran()
41 |   # skip_if_not_installed('gRain')
42 |   # 
43 |   # tn <- nbcar()
44 |   # b <- compute_joint(tn, car)
45 |   # g <- as_grain(tn)
46 |   # gp <- compute_grain_log_joint(grain = g, car[, -7], 'class')
47 |   # expect_equal(b, gp) 
48 |   # 
49 |   # tn <- nbvotecomp()
50 |   # b <- compute_joint(tn, v)
51 |   # g <- as_grain(tn)
52 |   # gp <- compute_grain_log_joint(grain = g, v[, -17], 'Class')
53 |   # expect_equal(b, gp)
54 |   # 
55 |   # tn <- bnc('tan_cl', class = 'class', smooth = 1, dataset = car)
56 |   # b <- compute_joint(tn, car)
57 |   # g <- as_grain(tn)
58 |   # gp <- compute_grain_log_joint(grain = g, car[, -7], 'class')
59 |   # expect_equal(b, gp) 
60 |   # gRain implementation change
61 | })
62 | 
63 | test_that("correct result", {
64 |   carb <- car[, c(1,7)]
65 |   tn <- nbcarp(carb)
66 |   true_log_prob <- log(params(tn)$buying['vhigh', ]) + log(params(tn)$class)
67 |   b <- compute_joint(tn, carb[1, , drop = FALSE])
68 |   expect_equal(as.vector(true_log_prob), as.vector(b[1, ]))
69 | })
70 | 
71 | test_that("different levels", {
72 |   nb <- nbcar()
73 |   ce <- car
74 |   levels(ce$buying) <-  rev(levels(ce$buying))
75 |   expect_error(compute_log_joint(nb, ce), 
76 |                "Levels in data set must match those in the CPTs ")
77 | })
78 | 
79 | test_that("fail with incomplete data", {
80 |   v <- nbvote()
81 |   expect_error(compute_joint(v, voting), "NA entries in data set.")
82 | })
83 | 
84 | 
85 | # Not implemented to receive just CPTs
86 | # test_that("compute augnb lucp", {
87 | #   df <- alphadb
88 | #   vars <- list(letters[1:3], c(letters[4:6], letters[3]))
89 | #   rct <- lapply(vars, extract_cpt, df, smooth = 0)
90 | #   rcp <- extract_cpt('c', df, smooth = 0)
91 | #   compute_augnb_lucp(rct, rcp, x = df)
92 | # })


--------------------------------------------------------------------------------
/R/wrap-mlr.R:
--------------------------------------------------------------------------------
 1 | #' Convert to \code{mlr}.
 2 | #' 
 3 | #' Convert a \code{\link{bnc_bn}} to a \code{\link[mlr]{Learner}}
 4 | #' object. 
 5 | #'   
 6 | #' @inheritParams cv
 7 | #' @param x A \code{\link{bnc_bn}} object.
 8 | #' @param id A character.
 9 | #' @export
10 | #' @examples 
11 | #' data(car)
12 | #' nb <- bnc('nb', 'class', car, smooth = 1)
13 | #' \dontrun{library(mlr)}
14 | #' \dontrun{nb_mlr <- as_mlr(nb, dag = FALSE, id = "ode_cl_aic")}
15 | #' \dontrun{nb_mlr}
16 | as_mlr <- function(x, dag, id = "1") {
17 |   check_mlr_attached()  
18 |   check_bnc_bn(x)
19 |   args <- bnc_get_update_args(x, dag)
20 |   # Call make learner with the arguments
21 |   mlr::makeLearner("bnc", id = id, par.vals = list(args=args))
22 | }
23 | #' makeRLearner. Auxiliary mlr function. 
24 | #' @export makeRLearner.bnc 
25 | #' @keywords internal
26 | makeRLearner.bnc <- function() {
27 |   if (!requireNamespace("mlr", quietly = TRUE)) {
28 |     stop("Package mlr required for this functionality.")
29 |   }
30 |   mlr::makeRLearnerClassif(
31 |     cl = "bnc",
32 |     package = "bnclassify",
33 |     par.set = ParamHelpers::makeParamSet(
34 |       ParamHelpers::makeUntypedLearnerParam(id = "args", default = NULL)
35 |     ),
36 |     properties = retrieve_bnc_properties()
37 |   )
38 | }
39 | #' trainLearner. Auxiliary mlr function. 
40 | #' @export trainLearner.bnc 
41 | #' @keywords internal
42 | #' @param .learner,.task,.subset,.weights Internal.
43 | #' @param ... Internal.
44 | trainLearner.bnc = function(.learner, .task, .subset, .weights, ...) {
45 |   if (!requireNamespace("mlr", quietly = TRUE)) {
46 |     stop("Package mlr required for this functionality.")
47 |   }
48 |   # Check args contain struct, struct_call and params_call
49 |   args <-  .learner$par.vals$args
50 |   dataset <- mlr::getTaskData(.task, .subset)
51 |   bnc_update(args, dataset)
52 | }
53 | #' predictLearner. Auxiliary mlr function. 
54 | #' @export predictLearner.bnc 
55 | #' @keywords internal
56 | #' @param .learner,.model,.newdata Internal.
57 | #' @param ... Internal.
58 | predictLearner.bnc = function(.learner, .model, .newdata, ...) {
59 |   if (!requireNamespace("mlr", quietly = TRUE)) {
60 |     stop("Package mlr required for this functionality.")
61 |   }
62 |   prob = TRUE
63 |   if(.learner$predict.type == "response") prob = FALSE
64 |   predict(.model$learner.model, newdata = .newdata, prob = prob)
65 | }
66 | 
67 | retrieve_bnc_properties <- function() {
68 |   c("oneclass", "twoclass", "multiclass", "factors", "prob", "numerics", 
69 |     "missings")
70 | }
71 | #' Checks if mlr attached.
72 | #' 
73 | #' mlr must be attached because otherwise  `getMlrOptions()` in `makeLearner` will not be found.
74 | #' @keywords internal
75 | check_mlr_attached <- function() {
76 |   mlr_loaded <- 'package:mlr' %in% search()
77 |   if (!mlr_loaded) {
78 |     stop("mlr package must be loaded (run, e.g., library(mlr)) in order to use this functionality. Install the package first if needed.")
79 |   } 
80 | } 


--------------------------------------------------------------------------------
/R/anb-cpts.R:
--------------------------------------------------------------------------------
 1 | # Checks cpts ordered according to vars() and 1D names correspond to vars()
 2 | cpts2families <- function(cpts) {
 3 |   lapply(cpts, cpt2family) 
 4 | }
 5 | families2cpts <- function(families, dataset, smooth, .mem_cpts) {
 6 |   if (!is.null(.mem_cpts)) {
 7 |     lapply(families, call_memoised_char, cache = .mem_cpts)
 8 |   }
 9 |   else {
10 |     check_dataset(dataset)
11 |     lapply(families, extract_cpt, dataset, smooth = smooth)
12 |   }
13 | }
14 | extract_cpt <- function(vars, dataset, smooth) {
15 |   ctgt <- extract_ctgt(vars, dataset)
16 |   ctgt2cpt(ctgt, smooth = smooth)
17 | }
18 | make_cpts_cache <- function(dataset, smooth) {
19 |   check_dataset(dataset)
20 |   extract_cpt <- function(vars) {
21 |     ctgt <- extract_ctgt(vars, dataset)
22 |     ctgt2cpt(ctgt, smooth = smooth)
23 |   }
24 |   memoise_char(extract_cpt)
25 | }
26 | # Turns a contingency table into a conditional probability table  
27 | ctgt2cpt <- function(ctgt, smooth) {
28 |   # Requiring ctgt be a table. That implies it is an array.
29 |   stopifnot(smooth >= 0, is.table(ctgt), are_complete_dimnames(ctgt))
30 |   # Add smooth to ctgt 
31 |   ctgt <- smooth_sideeffect(ctgt, smooth)
32 |   normalize_ctgt(ctgt) 
33 | } 
34 | #' Get just form first dimension in their own cpt, not checking for consistency
35 | #' in others.
36 | #' @keywords internal
37 | cpt_vars_values <- function(cpts) {
38 |   # Check the names of cpts are equal to the name of their first dim 
39 |   vars <- vapply(cpts, cpt_1d_var, FUN.VALUE = character(1))
40 |   stopifnot(identical(unname(vars), names(cpts)))
41 |   # Return the values
42 |   lapply(cpts, cpt_1d_values)
43 | }
44 | # Returns the name of the first dimensions and the values in the dimension of
45 | # the table.
46 | cpt_1d_values <- function(cpt) {
47 |   # Get 1d cases and check not empty
48 |   values <- get_cpt_values(cpt)[[1]]
49 |   check_non_empty_complete(values)
50 |   values
51 | }
52 | cpt_1d_var <- function(cpt) {
53 |   var <- cpt2family(cpt)[[1]]
54 |   stopifnot(assertthat::is.string(var))
55 |   var
56 | }
57 | cpt2family <- function(cpt) {
58 |   # Check is a table
59 |   stopifnot(is.table(cpt))
60 |   # Return names dimnames
61 |   names(dimnames(cpt))
62 | }
63 | get_cpt_values <- function(cpt) {
64 |   stopifnot(is.table(cpt))
65 |   dimnames(cpt)
66 | }
67 | # Gets cpt entries using a list of indices
68 | # Returns a vector
69 | subset_cpt <- function(cpt, indices) {
70 |   # check var non empty, in indices
71 |   vars <- names(dimnames(cpt))
72 |   stopifnot(is_non_empty_complete(vars), is_subset(vars, names(indices)))
73 |   # Get index matrix from df 
74 |   x_indices <- do.call('cbind', indices[vars])
75 |   cpt[x_indices]
76 | }
77 | exponentiate_cpt <- function(cpt, value) {
78 |   cpt <- cpt ^ value
79 |   normalize_ctgt(cpt) 
80 | }
81 | get_cpt_id <- function(cpt) {
82 |   make_family_id(cpt2family(cpt))
83 | }
84 | count_cpt_free_params <- function(cpt) {
85 |   d <- dim(cpt)
86 |   stopifnot(length(d) >= 1)
87 |   (d[1] - 1) * prod(d[-1])
88 | }


--------------------------------------------------------------------------------
/R/cv-update.R:
--------------------------------------------------------------------------------
 1 | bnc_get_update_args <- function(x, dag) {
 2 |   stopifnot(is.logical(dag))
 3 |   args <- list(lp_fargs = x$.call_bn)
 4 |   # lp_fargs must always be present
 5 |   stopifnot(!is.null(args$lp_fargs))
 6 |   # If dag then include dag arguments
 7 |   if (dag) {
 8 |     args$dag_fargs <- x$.call_struct
 9 |     stopifnot(!is.null(args$dag_fargs))
10 |   }
11 |   args
12 | }
13 | bnc_update <- function(args, dataset) {
14 |   bnc_update_args(args$lp_fargs, dataset, args$dag_fargs)
15 | }
16 | bnc_update_args <- function(lp_fargs, dataset, dag_fargs = NULL) {
17 |   # If dag needs to be called, call it first then feed it into lp arguments
18 |   if (!is.null(dag_fargs)) {
19 |     # dag_fargs contain both function name and arguments. 
20 |     dag <- do_bnc_call(dag_fargs, dataset)
21 |     lp_fargs$x <- dag
22 |   }
23 |   # Wrap the result of lp before it's returned 
24 |   res <- do_bnc_call(lp_fargs, dataset)
25 |   # bnc_wrap(res) TODO
26 |   res
27 | }
28 | # Optionally updates the dag prior to updating the parameters.
29 | update <- function(x, dataset, dag) {
30 |   stopifnot(is.logical(dag))
31 |   dg <- NULL
32 |   if (dag) {
33 |     dg <- update_dag(x, dataset)
34 |   }
35 |   else {
36 |     dg <- bn2dag(x)
37 |   }
38 |   lp_args <- get_lp_update_args(x)
39 |   update_lp(dag = dg, lp_fargs = lp_args, dataset = dataset)
40 | }
41 | save_bnc_call <- function(fun_name, call, env) {
42 |   stopifnot(is.character(fun_name))
43 |   call[[1]] <- fun_name
44 |   # To make sure that this dataset is not accidentaly used on updates.
45 |   call['dataset'] <- NULL
46 |   lapply(call, eval, envir = env)
47 | }
48 | do_bnc_call <- function(fargs, dataset) {
49 |   fargs$dataset <- dataset
50 |   call <- make_call(fargs[[1]], fargs[-1])
51 |   eval(call)
52 | }
53 | add_dag_call_arg <- function(bnc_dag, fun_name, call, env, force = FALSE) {
54 |   add_call_arg(bnc_dag, fun_name, call, env, arg = '.call_struct', force = force)
55 | }
56 | remove_dag_call_arg <- function(bnc_dag) {
57 |   bnc_dag[['.call_struct']] <- NULL
58 |   bnc_dag
59 | }
60 | add_params_call_arg <- function(bnc_bn, call, env, force = TRUE) {
61 |   add_call_arg(bnc_bn, 'lp', call, env, arg = '.call_bn', force = force)
62 | }
63 | add_call_arg <- function(bnc_dag, fun_name, call, env, arg, force) {
64 |   # stopifnot(inherits(bnc_dag, "bnc_dag"))
65 |   # TODO  Fix this for appropriate types
66 |   stopifnot(inherits(bnc_dag, "bnc_dag") || inherits(bnc_dag, "bnc_base"))
67 |   if (!force) { 
68 |     stopifnot(is.null(bnc_dag[[arg]]))
69 |   }
70 |   bnc_dag[[arg]] <- save_bnc_call(fun_name, call, env)
71 |   bnc_dag
72 | }
73 | get_lp_update_args <- function(x) {
74 |   stopifnot(!is.null(x$.call_bn))
75 |   x$.call_bn
76 | }
77 | get_dag_update_args <- function(x) {
78 |   stopifnot(!is.null(x$.call_struct))
79 |   x$.call_struct
80 | }
81 | update_dag <- function(x, dataset)  {
82 |   do_bnc_call(get_dag_update_args(x), dataset)
83 | }
84 | update_lp <- function(dag, lp_fargs, dataset) {
85 |   lp_fargs$x <- dag
86 |   do_bnc_call(lp_fargs, dataset)
87 | }


--------------------------------------------------------------------------------
/R/basic-misc.R:
--------------------------------------------------------------------------------
 1 | #' Return all but last element of x. 
 2 | #' 
 3 | #' If x is NULL returns NA not NULL
 4 | #' @keywords internal
 5 | get_but_last <- function(x) { 
 6 |   get_null_safe(x, -length(x))
 7 | } 
 8 | #' Return last element of x. 
 9 | #' 
10 | #' If x is NULL returns NA not NULL
11 | #' @keywords internal
12 | get_last <- function(x) { 
13 |   get_null_safe(x, length(x))
14 | } 
15 | #' Get i-th element of x. 
16 | #' 
17 | #' If x is NULL returns NA not NULL
18 | #' @keywords internal
19 | get_null_safe <- function(x, i) {
20 |   if (length(x) == 0) NA else x[i]
21 | }
22 | # Convert a factor to integer and then replicate reps times
23 | rep_factor_as_int <- function(f, reps) { 
24 |   attributes(f) <- NULL
25 |   stopifnot(is.integer(f))
26 |   rep(f, reps)
27 | }
28 | #' Compares all elements in a to b
29 | #' 
30 | #' @param b numeric. Must be length one but no check is performed.
31 | #' @keywords internal
32 | fast_equal <- function(a, b) {
33 |   #   stopifnot(length(b) == 1) No check for efficiency
34 |   abs(a - b) < .Machine$double.eps ^ 0.5
35 | }
36 | #' Compute predictive accuracy. 
37 | #' 
38 | #' @param x A vector of predicted labels.
39 | #' @param y A vector of true labels.
40 | #' @export
41 | #' 
42 | #' @examples  
43 | #' data(car)
44 | #' nb <- bnc('nb', 'class', car, smooth = 1)
45 | #' p <- predict(nb, car)
46 | #' accuracy(p, car$class)
47 | accuracy <- function(x, y) {
48 |   count_equal(x, y) / length(x)
49 | }
50 | #  a list to a matrix where the names are kept in the second column
51 | unlist_keepnames <- function(list) {
52 |   lengths <- element_lengths(list)
53 |   unname(cbind(unlist(list, use.names = FALSE), rep(names(list), lengths)))
54 | }
55 | element_lengths <- function(list) {
56 |   vapply(list, length, FUN.VALUE = integer(1))
57 | }
58 | max_random <- function(x) {
59 |   ind <- which(fast_equal(x, max(x)))
60 |   if (length(ind) > 1) {
61 |     ind <- sample(ind, 1)
62 |   }
63 |   ind  
64 | }
65 | #' Return a bootstrap sub-sample.
66 | #' 
67 | #' @param dataset a \code{data.frame}  
68 | #' @param proportion numeric given as fraction of \code{dataset} size
69 | #' @keywords internal
70 | bootstrap_ss <- function(dataset, proportion) {
71 |   stopifnot(is_positive(proportion))
72 |   N <- nrow(dataset)
73 |   stopifnot(N > 0)
74 |   subsample_size <- trunc(N * proportion)
75 |   dataset[sample(N, replace = T, size = subsample_size), , drop = FALSE]
76 | }
77 | make_call <- function(f, args) {
78 |   f <- as.name(f)
79 |   as.call(c(f, args))
80 | }
81 | #' Subset a 2D structure by a vector of column names.
82 | #' 
83 | #' Not all colnames are necessarily in the columns of data; in that case this
84 | #' returns NA.
85 | #' @param colnames a character vector
86 | #' @param data a matrix or data frame 
87 | #' @keywords internal
88 | subset_by_colnames <- function(colnames, data) {
89 |   stopifnot(is.character(colnames), length(colnames) == nrow(data)) 
90 |   ind_cols <- match(colnames, colnames(data))
91 |   ind_matrix <- cbind(seq_along(ind_cols), ind_cols)
92 |   data[ind_matrix]   
93 | }


--------------------------------------------------------------------------------
/tests/testthat/test-data-statistics.R:
--------------------------------------------------------------------------------
 1 | context("Statistics")
 2 | 
 3 | test_that("Conditional mutual information", {  
 4 | # With Incomplete data.   
 5 |   a <- cmi('physician_fee_freeze', 'Class', 
 6 |                dataset = voting, z = 'water_project_cost_sharing')  
 7 |   b <- cmi('physician_fee_freeze', 'Class', dataset = voting)
 8 |   expect_equal(a, b, tolerance = 0.001)
 9 |   # Mutual information
10 |   a <- cmi('physician_fee_freeze', 'Class', 
11 |            dataset = voting, z = 'water_project_cost_sharing')  
12 |   b <- cmi('physician_fee_freeze', 'Class', voting)
13 |   expect_equal(a, b, tolerance = 0.001)
14 |   c <- cmi('buying', 'maint', dataset = car)
15 |   expect_equal(c, 0)
16 | # the same as when removing missing directly.
17 |   a <- cmi('physician_fee_freeze', 'Class', 
18 |            dataset = voting, z = 'water_project_cost_sharing')  
19 |   v <- voting[,c('physician_fee_freeze', 'water_project_cost_sharing', 'Class')]
20 |   b <- cmi('physician_fee_freeze', 'Class', v, z = 'water_project_cost_sharing')
21 |   expect_equal(a, b, tolerance = 1e-10)
22 | # Log base
23 |   a <- cmi('physician_fee_freeze', 'Class', dataset = voting, unit = "log2")
24 |   b <- cmi('physician_fee_freeze', 'Class', dataset = voting)
25 |   expect_true(abs(a - b) > 0.1)
26 | })
27 | 
28 | test_that("Contingency table", {
29 | 	t <- extract_ctgt('doors', car)
30 | 	expect_identical(dim(t), 4L) 
31 | 	t <- extract_ctgt(c('buying', c('doors', 'maint')), car)
32 | 	expect_identical(dim(t), c(4L, 4L ,4L)) 
33 | 	d <- dimnames(t)
34 | 	vars <- c('buying', 'doors', 'maint')
35 | 	expect_identical(names(d), vars)
36 | 	levs <- lapply(car[, vars], levels)
37 | 	expect_true(all(mapply(identical, levs, d, SIMPLIFY = TRUE)))
38 | 	t <- extract_ctgt('crime', voting)
39 | 	expect_identical(sum(t), 418L) 
40 | })
41 | 
42 | test_that("Degrees freedom", {
43 |   tbl <- extract_ctgt(c('persons', 'doors', 'class'), car)
44 |   df <- cmi_degrees_freedom(freqs_table = tbl)
45 |   expect_equal(df, 24L)
46 | 
47 |   tbl <- extract_ctgt(c('doors', 'class', 'persons'), car)
48 |   df <- cmi_degrees_freedom(freqs_table = tbl)
49 |   expect_equal(df, 27L)
50 | })
51 | 
52 | test_that("Contingency table to CPT", {  
53 |   # Nominal
54 |   suppressWarnings(RNGversion("3.5.0"))
55 |   set.seed(0) 
56 |   tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20))
57 |   tcpt <- ctgt2cpt(tc, 1)
58 |   expect_equal(colnames(tcpt), letters[1:4])
59 |   expect_equal(tcpt[, 1], setNames(c(1, 3, 4) / 8, letters[1:3]))
60 |   # No smooth  
61 |   suppressWarnings(RNGversion("3.5.0"))
62 |   set.seed(0) 
63 |   tc <- table(random_letters_vector(3, 20), random_letters_vector(4, 20))
64 |   tcpt <- ctgt2cpt(tc, 0)
65 |   expect_equal(colnames(tcpt), letters[1:4])
66 |   expect_equal(tcpt[, 1], setNames(c(0, 2, 3) / 5, letters[1:3]))
67 |   # 1D table 
68 |   suppressWarnings(RNGversion("3.5.0"))
69 |   set.seed(0)
70 |   tc <- table(random_letters_vector(4, 200))
71 |   tcpt <- ctgt2cpt(tc, 0)
72 |   expect_equal(names(tcpt), letters[1:4])
73 |   expect_equal(tcpt, tc / 200)
74 | })
75 | 
76 | 


--------------------------------------------------------------------------------
/tests/testthat/test-learn-params-manb.R:
--------------------------------------------------------------------------------
 1 | context("learn params manb")
 2 | 
 3 | # The exact posterior probabilities compare to were obtained using the MANB implementation by Wei et al. 
 4 | 
 5 | test_that("compute manb nominal", {
 6 |   nb <- nbcar()
 7 |   u <- lapply(families(nb), extract_ctgt, car)[features(nb)]
 8 |   d <- compute_manb_arc_posteriors(nb, u, smooth = 1)
 9 |   expect_named(d, features(nb))
10 |   d <- as.vector(d)
11 |   expect_equal(d, c(1, 1, 0.000026294701543, 1, 1, 1))
12 | })
13 | 
14 | test_that("compute manb no features", {
15 |   nb <- nbcarclass()
16 |   a <- list()
17 |   names(a) <- character()
18 |   expect_equivalent(compute_manb_arc_posteriors(nb, a, smooth = 1), numeric())
19 | })
20 | 
21 | test_that("compute manb prior", {
22 |   nb <- nbcar()
23 |   u <- lapply(families(nb), extract_ctgt, car)[features(nb)]
24 |   d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.00001)
25 |   expect_named(d, features(nb))
26 |   d <- as.vector(d)
27 |   expect_equal(d, c(1, 1, 0.000000000262957, 1, 0.999981436585993, 1))
28 |   
29 |   d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.03)
30 |   expect_named(d, features(nb))
31 |   d <- as.vector(d)
32 |   expect_equal(d, c(1, 1, 0.000000813258915, 1, 0.999999993997562, 1))
33 |   
34 |   d <- compute_manb_arc_posteriors(nb, u, smooth = 1, prior = 0.95)
35 |   expect_named(d, features(nb))
36 |   d <- as.vector(d)
37 |   expect_equal(d, c(1, 1, 0.000499362978504, 1, 0.999999999990223, 1))
38 | })
39 | 
40 | test_that("compute manb smooth", {
41 |   nb <- nbcar()
42 |   u <- lapply(families(nb), extract_ctgt, car)[features(nb)]
43 |   # No error for smooth not being integer. It is close to when smooth = 1
44 |   d <- compute_manb_arc_posteriors(nb, u, smooth = 0.99)
45 |   d1 <- compute_manb_arc_posteriors(nb, u, smooth = 1)
46 |   expect_true(sum(abs(d - d1)) < 1e-5)
47 |   expect_error(compute_manb_arc_posteriors(nb, u, smooth = 0), " > 0 is not TRUE")
48 | })
49 | 
50 | 
51 | test_that("compute manb not nb", {
52 |   tn <- tan_cl('class', car)
53 |   ctgts <- lapply(families(tn), extract_ctgt, car)[features(tn)]
54 |   expect_error(compute_manb_arc_posteriors(tn, ctgts, smooth = 1), 
55 |                "MANB can only be applied to naive Bayes")
56 | })
57 | 
58 | test_that("compute cpt", {
59 |   a <- extract_ctgt(c('doors', 'class'), car)
60 |   b <- compute_manb_cpt(a, 1, smooth = 0)
61 |   a <- ctgt2cpt(a, 0)
62 |   expect_equal(b, a)
63 |   
64 |   a <- extract_ctgt(c('doors', 'class'), car)
65 |   b <- compute_manb_cpt(a, 1, smooth = 1)
66 |   a <- ctgt2cpt(a, 1)
67 |   expect_equal(b, a)
68 |   
69 |   p <- extract_ctgt(c('buying', 'class'), car)
70 |   u <- compute_manb_cpt(p, 0, smooth = 1)
71 |   t <- ctgt2cpt(p, smooth = 1)
72 |   t[] <- ctgt2cpt(extract_ctgt(c('buying'), car), smooth = 1)
73 |   expect_equal(u, t)
74 |   
75 |   p <- extract_ctgt(c('buying', 'class'), car)
76 |   u <- compute_manb_cpt(p, 0.5, smooth = 1)
77 |   t <- ctgt2cpt(p, smooth = 1)
78 |   pt <- t
79 |   pt[] <- ctgt2cpt(extract_ctgt(c('buying'), car), smooth = 1)
80 |   pt[] <- (pt + t) / 2
81 |   expect_equal(u, pt)
82 | })


--------------------------------------------------------------------------------
/R/learn-chowliu.R:
--------------------------------------------------------------------------------
 1 | chowliu <- function(class, dataset, score='loglik', blacklist = NULL, 
 2 |                     root = NULL) {
 3 | # Get pairwise scores   
 4 |   pairwise_scores <- 
 5 |     pairwise_ode_score_contribs(class = class, dataset = dataset, score = score)
 6 | # Get the augmenting forest   
 7 |   aug_forest <- max_weight_forest(pairwise_scores)  
 8 | # Direct the forest (TODO: test the forest is effectively directed) 
 9 |   aug_forest <- direct_forest(aug_forest, root = root)    
10 | # TODO: Add blacklisting. 
11 |   ode <- superimpose_node(dag =  aug_forest, node = class)  
12 |   bnc_dag(dag = ode, class = class)
13 | }
14 | pairwise_ode_score_contribs <- function(class, dataset, score) {
15 | #   Check score in decomposable_ode_scores
16 |   stopifnot(score %in% decomposable_ode_scores())
17 | # Get features   
18 |   features = get_features(class = class, dataset = dataset)
19 | # If 0 features then return empty graph   
20 |   if (length(features) == 0) return(graph_empty_undirected()) 
21 | # If 1 feature then return single node graph (no arcs)
22 |   pairs <- complete_graph(features)  
23 |   if (length(features) == 1) return(pairs)  
24 | # Get each pair of features 
25 |   edges <- pairs$edges  
26 |   from <- edges[, 1]
27 |   to <- edges[, 2]; rm(edges)
28 | # For each get pairwise contribution to score
29 |   pairwise_score <- mapply(local_ode_score_contrib, from, to, 
30 |                      MoreArgs = list(class = class, dataset = dataset), 
31 |                      SIMPLIFY = TRUE)
32 |   stopifnot(identical(rownames(pairwise_score), decomposable_ode_scores()))
33 | # Select the score 
34 |   pairwise_score <- pairwise_score[score, ]  
35 | # Remove negative scores (possible for BIC and AIC) and weight the edges
36 | # ...If I also removed 0 scores --which are possible for loglik-- the structure
37 | # ...may turn out a forest even when using loglik
38 |   ind_keep <- pairwise_score >= 0
39 |   from <- from[ind_keep]
40 |   to <- to[ind_keep]
41 |   pairwise_score <- pairwise_score[ind_keep]
42 |   make_graph(features, from, to, pairwise_score)
43 | }
44 | #' Returns pairwise component of ODE (penalized) log-likelihood scores. 
45 | #' In natural logarithms.  
46 | #' @keywords internal
47 | local_ode_score_contrib <- function(x, y, class, dataset) {  
48 |   #   If x and y and class do not have length one stop
49 |   stopifnot(length(x) == 1)
50 |   stopifnot(length(y) == 1)
51 |   stopifnot(length(class) == 1)  
52 | #  Get contingency table  
53 |   freqs <- extract_ctgt(c(x, y, class), dataset)  
54 | # Ignore dataset from here on
55 |   rm(dataset)
56 | #  Compute I(X;Y | Z) 
57 |   cmi <- cmi_table(freqs, unit = "log")
58 | #   Get number of degrees of freedom
59 |   df <- cmi_degrees_freedom(freqs_table = freqs)  
60 | #  Make sure it is non-negative 
61 |   stopifnot(df >= 0)
62 | #  Get num. of observations in contingency table
63 |   N <- sum(freqs)
64 | #  Compute bic
65 |   bic <- N * cmi  - (log(N) / 2) * df
66 | #  Compute aic 
67 |   aic <- N * cmi  - df 
68 |   c(loglik = cmi, bic = bic, aic = aic)  
69 | }
70 | decomposable_ode_scores <- function() { c('loglik', 'bic', 'aic') }


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output:
 3 |   md_document:
 4 |     variant: markdown_github
 5 | ---
 6 | 
 7 | ```{r, echo = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-"
12 | )
13 | ```
14 | # bnclassify
15 | 
16 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/bnclassify)](https://cran.r-project.org/package=bnclassify)
17 | ![](https://cranlogs.r-pkg.org/badges/bnclassify?color=yellow)
18 | ![](https://cranlogs.r-pkg.org/badges/grand-total/bnclassify?color=yellowgreen)
19 | [![Research software impact](http://depsy.org/api/package/cran/bnclassify/badge.svg)](http://depsy.org/package/r/bnclassify)
20 | [![Codecov test coverage](https://codecov.io/gh/bmihaljevic/bnclassify/branch/master/graph/badge.svg)](https://app.codecov.io/gh/bmihaljevic/bnclassify?branch=master)
21 | [![R-CMD-check](https://github.com/bmihaljevic/bnclassify/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/bmihaljevic/bnclassify/actions/workflows/R-CMD-check.yaml)
22 | 
23 | Implements algorithms for learning discrete Bayesian network classifiers from data, as well as functions for using these classifiers for prediction, assessing their predictive performance, and inspecting and analyzing their properties.
24 | 
25 | # Example 
26 | 
27 | Load a data set and learn a one-dependence estimator by maximizing Akaike's information criterion (AIC) score.
28 | ```{r}
29 | library(bnclassify)
30 | data(car)
31 | tn <- tan_cl('class', car, score = 'aic')
32 | tn
33 | plot(tn)
34 | ```
35 | 
36 | After learning the network's parameters, you can use it to classify data. 
37 | ```{r}
38 | tn <- lp(tn, car, smooth = 0.01)
39 | p <- predict(tn, car, prob = TRUE)
40 | head(p)
41 | p <- predict(tn, car, prob = FALSE)
42 | head(p)
43 | ```
44 | 
45 | Estimate predictive accuracy with cross validation.
46 | ```{r}
47 | cv(tn, car, k = 10)
48 | ```
49 | 
50 | Or compute the log-likelihood 
51 | ```{r}
52 | logLik(tn, car)
53 | ```
54 | 
55 | # Install
56 | 
57 | Make sure you have at least version 3.2.0 of R. You can install `bnclassify` from CRAN:
58 | ```{r, eval = FALSE}
59 | install.packages('bnclassify')
60 | ```
61 | 
62 | Or get the current development version from Github:
63 | ```{r, eval = FALSE}
64 | # install.packages('devtools')
65 | devtools::install_github('bmihaljevic/bnclassify')
66 | # devtools::install_github('bmihaljevic/bnclassify', build_vignettes = TRUE)
67 | ```
68 | 
69 | Ideally, you would use the `build_vignettes = TRUE` version, and thus get the vignettes, but it requires programs such as texi2dvi to be installed on your side. 
70 | 
71 | # Overview
72 | 
73 | See an overview of the package and examples of usage:
74 | 
75 | ```{r} 
76 | vignette('overview', package = 'bnclassify')
77 | ```
78 | 
79 | 
80 | See the list of main functionalities.
81 | 
82 | ```{r, eval = FALSE}
83 | ?bnclassify
84 | ```
85 | 
86 | Use the usage vignette for more details on the functions.
87 | 
88 | ``` r
89 | vignette('usage', package = 'bnclassify')
90 | ```
91 | 
92 | Then have a look at the remaining vignettes.
93 | 
94 | ``` r
95 | browseVignettes("bnclassify")
96 | ```
97 | 


--------------------------------------------------------------------------------
/src/infer-test-wrappers.cpp:
--------------------------------------------------------------------------------
 1 | #include <Rcpp.h> 
 2 | #include <infer.h>
 3 | 
 4 | using namespace Rcpp;
 5 | 
 6 | // [[Rcpp::export]]
 7 | IntegerVector test_dims2columns(const NumericVector cpt, const CharacterVector class_var,  const CharacterVector columns_db) {  
 8 |   const List & dimnames = cpt.attr("dimnames");
 9 |   const CharacterVector & fam = dimnames.attr("names");
10 |   CharacterVector feature_fam = wrap(ordersetdiff(fam, class_var)); 
11 |   IntegerVector feature_fam_inds = match(feature_fam, columns_db);
12 |   if (is_true(any(feature_fam_inds == 0)))  stop("All features must be in the dataset.");
13 |   feature_fam_inds = feature_fam_inds - 1; 
14 |   return feature_fam_inds; 
15 | }
16 | 
17 | // Delete?
18 | //[[Rcpp::export]]
19 | NumericVector get_row(List x, DataFrame df, int cptind) { 
20 |   // Model mod(x);
21 |   // Evidence ds(df, mod.getFeatures()); 
22 |   // MappedCPT c = MappedCPT(mod.get_cpt(cptind), mod.getClassVar(), ds);
23 |   // std::vector<double> entries(mod.get_nclass());
24 |   // c.get_entries(1, entries);
25 |   // return wrap(entries);
26 |   return NumericVector::create(1);
27 | }   
28 | 
29 | //[[Rcpp::export]]
30 | NumericVector fill_vector(int size, int row, NumericVector rcpt, DataFrame df, CharacterVector features, std::string class_var) { 
31 |   CPT cpt(rcpt, class_var);
32 |   Evidence evidence(df, features);
33 |   MappedCPT  m(cpt, evidence);
34 |   std::vector<int> output(size);
35 |   // std::vector<int>::iterator end = m.fill_instance_indices(row, output.begin());
36 |   // std::vector<int>::iterator end = m.fill_instance_indices(row, output));
37 |   // NumericVector  nv(std::distance(output.begin(), end));
38 |   // std::copy(output.begin(), end, nv.begin());
39 |   NumericVector  nv = NumericVector::create(2);
40 |   return nv;
41 | } 
42 | 
43 | //[[Rcpp::export]]
44 | void make_cpt_object(const NumericVector & x, std::string class_var) { 
45 |  CPT cpt(x, class_var); 
46 |  NumericVector nv = wrap(cpt.get_entries());
47 |  Rcout << nv << std::endl; 
48 |  
49 |  IntegerVector iv = wrap(cpt.get_dimprod());
50 | }
51 | 
52 | 
53 | /*** R
54 | # a <- aode('class', car)  
55 | # a <- lp(a, car, smooth = 1)
56 | # cpt <- a$.models$persons$.params$buying
57 | # colnames(car)  
58 | # names(dimnames(cpt))
59 | # test_dims2columns(cpt,"class", columns_db = colnames(car))
60 | 
61 | # microbenchmark::microbenchmark(  { g = do_mapped(t, dbor)} )
62 | # microbenchmark::microbenchmark(    { d = get_row(t$.params$bkblk, f, class_var(t), dbor)  })
63 | # microbenchmark::microbenchmark(    { d = get_row(t$.params$bkblk, f, class_var(t), dbor)  })
64 | 
65 | # simple_wrap <- function(x, dataset) {
66 | #   if (!anyNA(dataset)) {
67 | #     compute_joint(x, dataset)
68 | #   }
69 | # }  
70 | # microbenchmark::microbenchmark( { f = compute_joint(t, dbor)},
71 | #                                   { h  = simple_wrap(t, dbor)},
72 | #                                 times = 1e3 )
73 | 
74 | sapply(t$.params, length)
75 | source('tests/infer-test-init.R')
76 | fill_vector(4, 99, t$.params$katri, dbor, features(t), class_var(t))
77 | fill_vector(4, 409, t$.params[[3]], dbor, features(t), class_var(t))
78 | */
79 | 
80 | 


--------------------------------------------------------------------------------
/tests/testthat/test-learn-aode.r:
--------------------------------------------------------------------------------
 1 | context("aode")
 2 | 
 3 | test_that("spode", {
 4 |   u <- spode(sp='D', features=LETTERS[5:9], class = 'C')
 5 |   expect_equal(graph_num_arcs(dag(u)), 11)
 6 |   expect_equal(length(graph_get_adjacent('D', dag(u))), 6)
 7 |   
 8 |   u <- spode(sp='E', features=LETTERS[c(4, 6:9)], class = 'C')
 9 |   expect_equal(graph_num_arcs(dag(u)), 11)
10 |   expect_equal(length(graph_get_adjacent('E', dag(u))), 6)
11 | })
12 | 
13 | test_that("aode str", {  
14 |   # with 1 feature is an nb
15 |   u <- aode(class = 'a', alphadb[, 1:2, drop = FALSE])  
16 |   expect_true(is_nb(u))  
17 |   
18 |   u <- aode(class = 'a', random_letters_db(10))  
19 |   expect_true(is_aode(u))    
20 |   expect_true(is_ode(models(u)[['c']]))  
21 |   expect_equal(length(models(u)), 9)
22 |   expect_identical(class_var(u), 'a') 
23 |   expect_identical(features(u), letters[2:10])
24 |   
25 |   d <- models(u)[[1]]
26 |   expect_equal(graph_num_arcs(dag(d)), 9 + 8)   
27 |   expect_equal(length(graph_get_adjacent('b', dag(d))), 9)
28 |   d <- models(u)[[9]]
29 |   expect_equal(graph_num_arcs(dag(d)), 9 + 8)
30 |   expect_equal(length(graph_get_adjacent('j', dag(d))), 9)
31 | }) 
32 | 
33 | test_that("aode bnc funs", {    
34 |   lets <- random_letters_db(10)
35 |   u <- aode(class = 'a', lets)  
36 |   expect_equal(class_var(u), 'a')
37 |   feats <- setdiff(colnames(lets), 'a') 
38 |   expect_equal(features(u), feats)
39 |   expect_false(is_ode(u)) 
40 |   expect_false(is_semi_naive(u)) 
41 |   expect_false(is_nb(u))  
42 |   expect_error(narcs(u))
43 |   expect_error(nparams(u))  
44 |   expect_error(params(u)) 
45 |   expect_equal(length(models(u)), 9)
46 |   expect_output(print(u), regexp = 'ensemble of 9') 
47 |   expect_output(print(u), regexp = 'learning algorithm:    aode') 
48 | })
49 | 
50 | test_that("fit aode and bnc", {
51 |   a <- bnc('aode', 'class', car, smooth = 1)
52 |   sapply(a$models, BIC, car) # No error
53 | })
54 | 
55 | test_that("log joint", {   
56 |   a <- aode('class', car)  
57 |   a <- lp(a, car, smooth = 1)
58 |   lj <- compute_log_joint(a, car)
59 |   expect_equal(sum(exp(lj)), 1)     
60 | })
61 | 
62 | test_that("predict", {  
63 |   # currently not considering weights
64 |   # a <- aode('class', car, m=10000)  
65 |   # a <- lp(a, car, smooth = 1) 
66 |  
67 |   # a <- bnc('aode', 'class', car, dag_args = list(m=10000), smooth = 1)  
68 |   # p <- predict(a, car, prob = TRUE)  
69 |   # cp <- params(a$models[[1]])$class
70 |   # expect_equal(sum(abs(apply(p, 1, '-', cp))), 0)
71 |   
72 |   a <- bnc('aode', 'class', car, smooth=1)  
73 |   p <- predict(a, car, prob = TRUE)  
74 |   expect_equal(p[12, 1], c(unacc=0.793), tolerance = 0.001) # Probability from Weka     
75 |  
76 |   # dbreast <- foreign::read.arff('~/code/teach-asdm-c01/data/dbreast-cancer.arff')
77 |   # a <- bnc('aode', 'Class', dbreast, smooth=1)  
78 |   # p <- predict(a, dbreast, prob = TRUE)  
79 |   # expect_equal(p[1, 1], c("no-recurrence-events"=0.494), tolerance = 0.01) # Weka has 0.494   
80 | })  
81 | 
82 | test_that("incomplete data", {    
83 |   # gRain implementation change
84 |   # # no error
85 |   # skip_if_not_installed("gRain")
86 |   # vt <- voting[1:10, ] 
87 |   # a <- aode('Class', vt)   
88 |   # a <- lp(a, vt, smooth = 1) 
89 |   # p <- predict(a, vt, prob = TRUE)     
90 |   # gRain implementation change
91 | })  


--------------------------------------------------------------------------------
/tests/testthat/test-update.R:
--------------------------------------------------------------------------------
 1 | context("Update")
 2 | 
 3 | test_that("bnc update bnc_dag", {
 4 |   dgcar <- nb('class', car)  
 5 |   # Currently, parameter fitting is required for updating; dag learning is not
 6 |   expect_error(bnc_get_update_args(dgcar, dag = FALSE), "lp_fargs")
 7 | })
 8 | 
 9 | test_that("bnc update bnc_bn", {
10 | 	dgcar <- lp(nb('class', car), car, smooth = 1)	
11 | 	ua <- bnc_get_update_args(dgcar, dag = FALSE)
12 | 	b <- bnc_update(ua, car[1:5, ])  
13 |   diff <- sum(abs(params(b)[[2]]  - params(dgcar)[[2]]))
14 | 	expect_equal(diff, 2.393358, tolerance = 1e-7)
15 | })
16 | 
17 | test_that("bnc update bnc_bn with struct learning", {
18 |   dgcar <- lp(nb('class', car), car, smooth = 1)	
19 |   ua <- bnc_get_update_args(dgcar, dag = TRUE)
20 |   b <- bnc_update(ua, car[1:5, 6:7])
21 |   expect_identical(features(b), "safety")
22 |   diff <- sum(abs(params(b)[['safety']]  - params(dgcar)[['safety']]))
23 |   expect_equal(diff, 2.776258, tolerance = 1e-6)
24 | })
25 | 
26 | test_that("update nominal", {
27 |   a <- lp(nb('class', car), car, smooth = 1e10)	  
28 |   b <- update(a, car, dag = FALSE)
29 |   identical_non_call(a, b) 
30 | })
31 | 
32 | test_that("update data subset", {
33 |   a <- lp(nb('class', car), car, smooth = 1)
34 |   b <- update(a, car[1:5, ], dag = FALSE)
35 |   diff <- sum(abs(params(a)[[2]]  - params(b)[[2]]))
36 |   expect_equal(diff, 2.393358, tolerance = 1e-7)
37 | })
38 | 
39 | test_that("update with dag", {
40 |   t <- lp(tan_cl('class', car), car, smooth = 0.02)
41 |   b <- update(t, car[1:5, ], dag = TRUE)
42 |   expect_equal(narcs(b), narcs(t))
43 |   expect_true(!isTRUE(all.equal(families(b), families(t))))
44 | })
45 | 
46 | test_that("update with dag 2", {
47 |   a <- lp(nb('class', car), car, smooth = 1)	
48 |   b <- update(a, car[1:5, 6:7], dag = TRUE)
49 |   expect_identical(features(b), "safety")
50 |   diff <- sum(abs(params(b)[['safety']]  - params(a)[['safety']]))
51 |   expect_equal(diff, 2.776258, tolerance = 1e-6)
52 | })
53 | 
54 | test_that("Update dag", {
55 |   t <- tan_cl('class', car)
56 |   d <- update_dag(t, car[1, ])  
57 |   expect_identical(narcs(d), narcs(t))
58 |   expect_true(!identical(families(d), families(t)))
59 | })
60 | 
61 | test_that("Update with awnb param learning", { 
62 |   # gRain implementation change
63 |   # skip_on_cran()
64 |   # skip_if_not_installed('gRain')
65 |   # a <- nb('Class', voting) 
66 |   # suppressWarnings(RNGversion("3.5.0"))
67 |   # set.seed(0)
68 |   # b <- lp(a, voting, smooth = 1, awnb_trees = 1, awnb_bootstrap = 0.5)
69 |   # c <- lp(a, voting, smooth = 1, awnb_trees = 45, awnb_bootstrap = 1)
70 |   # d <- lp(b, voting, smooth = 1)
71 |   # r <- cv(list(b, c, d), voting, k = 2, dag = FALSE)
72 |   # # All three values are different
73 |   # expect_equal(r, c(0.9517397, 0.9494462, 0.8988606), tolerance = 1e-6) 
74 |   # gRain implementation change
75 | })
76 | 
77 | test_that("Multi-update bnc_dag", {
78 |   a <- nb('class', car)
79 |   b <- lp(a, car, smooth = 1)
80 |   expect_error(cv(list(a, b), car, k = 10, dag = FALSE), "must inherit")
81 | })
82 | 
83 | test_that("Update with non-name function", {
84 |   dgcar <- lp(nb('class', car), car, smooth = 1)	
85 |   e <- lapply(list(dgcar), lp, car, smooth = 1)
86 |   ua <- bnc_get_update_args(e[[1]], dag = FALSE)
87 |   b <- bnc_update(ua, car[1:5, ])  
88 |   diff <- sum(abs(params(b)[[2]]  - params(dgcar)[[2]]))
89 |   expect_equal(diff, 2.393358, tolerance = 1e-7)
90 | })


--------------------------------------------------------------------------------
/R/learn-params-wanbia.R:
--------------------------------------------------------------------------------
 1 | # Using smooth = 1 by default
 2 | fit_wanbia_nb <- function(class_var, dataset) { 
 3 |   lp(nb(class_var, dataset), dataset, smooth = 1)
 4 | }  
 5 | #' Returns a function to compute negative conditional log-likelihood given feature weights
 6 | #' @keywords internal
 7 | make_cll <- function(class_var, dataset) {   
 8 |   check_class_in_dataset(class_var, dataset)
 9 |   function(w) {   
10 |     nb <- fit_wanbia_nb(class_var, dataset)
11 |     nb <- set_weights(nb, w)
12 |     - compute_cll(nb, dataset) 
13 |   }    
14 | } 
15 | #' Returns a function to compute the gradient of negative conditional log-likelihood with respect to feature weights
16 | #' @keywords internal
17 | make_cll_gradient <- function(class_var, dataset) { 
18 |  check_class_in_dataset(class_var, dataset)
19 |  function(w) {  
20 |   unweighted <- fit_wanbia_nb(class_var, dataset)
21 |   compute_cll_gradients(unweighted, dataset, w)  
22 |  } 
23 | }  
24 | compute_cll_gradients <- function(unweighted, dataset, w) {
25 |   stopifnot(is.null(unweighted$.weights))
26 |   features <- features(unweighted)
27 |   db <- lapply(dataset, as.character)
28 |   feats <- db[features]
29 |   params <- params(unweighted)[features]
30 |   
31 |   weighted <- set_weights(unweighted, w)
32 |   cp <- compute_cp(weighted, dataset = dataset)
33 |   class_var <- class_var(weighted)
34 |   grad <- mapply(cll_gradient_var, feats, params , MoreArgs = list(class = db[[class_var]], class_posterior = cp )) 
35 |   grad 
36 | }
37 | #' Assuming that the cpt is a leaf, returns 1 instead of a CPT entry when value missing
38 | #' @param  x a vector of values
39 | #' @keywords  internal
40 | get_log_leaf_entries  <- function(cpt, x) { 
41 |   stopifnot(is.character(x), length(dim(cpt)) == 2) 
42 |   entries <- matrix(numeric(length = length(x) *  ncol(cpt)), ncol = ncol(cpt))
43 |   colnames(entries) <- colnames(cpt)
44 |   for (i in seq_along(x)) {
45 |     value <- x[i]
46 |     if (is.na(value)) { 
47 |       entries[i, ] <- 1
48 |     }
49 |     else { 
50 |       entries[i, ] <- cpt[value, ] 
51 |     }
52 |   }
53 |   log(entries)
54 | }
55 | cll_gradient_var <- function(x, cpt, class, class_posterior) { 
56 |   log_theta <- get_log_leaf_entries(cpt, x)  
57 |   stopifnot(identical(dim(class_posterior), dim(log_theta )))
58 |   log_theta_class <- subset_by_colnames(class, log_theta) 
59 |   sum(- log_theta_class + rowSums(log_theta * class_posterior)) 
60 | } 
61 | #' Compute WANBIA weights.
62 | #'  
63 | #' Computes feature weights by optimizing conditional log-likelihood.  
64 | #' Weights are bounded to [0, 1]. Implementation based on the original paper 
65 | #' and the code provided at \url{https://sourceforge.net/projects/rawnaivebayes}.
66 | #' 
67 | #' @inheritParams nb
68 | #' @param dataset The data frame from which to learn feature weights
69 | #' @param return_optim_object Return full output of `optim`
70 | #' @return a named numeric vector
71 | #' @keywords internal
72 | compute_wanbia_weights <- function(class, dataset, return_optim_object = FALSE) {
73 |   features <- get_features(class = class, dataset = dataset)
74 |   # Initial weights are 1.
75 |   w <- rep(1, length(features))
76 |   w <- setNames(w, features)
77 |   cll <- make_cll(class, dataset) 
78 |   cll_gradient <- make_cll_gradient(class, dataset) 
79 |   w <- optim(w, cll, cll_gradient, method = 'L-BFGS-B', lower = 0, upper = 1)
80 |   if (w$convergence != 0) warning(paste0("WANBIA did not converge correctly. ", w$message))
81 |   if (return_optim_object ) return (w)
82 |   w$par
83 | }   


--------------------------------------------------------------------------------
/R/wrap-gRain.R:
--------------------------------------------------------------------------------
 1 | # Accessors 
 2 | #' @export
 3 | #' @describeIn grain_and_graph Convert to a grain.
 4 | as_grain <- function(x) {
 5 |   stopifnot(inherits(x, "bnc_bn"))  
 6 |   if (is.null(x$.grain))  {
 7 |     compile_grain(params(x))  
 8 |   }
 9 |   else {
10 |     x$.grain  
11 |   }  
12 | }
13 | # Computes the log joint probability of the observed features for each of the classes
14 | compute_grain_log_joint <- function(grain, dataset, class) {
15 |   if (!requireNamespace("gRain", quietly = TRUE)) {
16 |     stop("gRain needed for this function to work. Please install it.",
17 |          call. = FALSE)
18 |   }
19 |   # Check gRain compiled
20 |   stopifnot(is_grain_compiled(grain))
21 |   # Check data set
22 |   check_dataset(dataset)
23 |   # Check N > 0 
24 |   stopifnot(nrow(dataset) > 0)
25 |   # If class in dataset, remove it 
26 |   if (class %in% colnames(dataset)) {
27 |     dataset <- dataset[, setdiff(colnames(dataset), class), drop = FALSE]
28 |   }
29 |   cp <- t(apply(dataset, 1, compute_grain_log_joint_instance, grain, class))
30 |   # Remove any rownames if they may have left over from dataset
31 |   rownames(cp) <- NULL
32 |   cp
33 | }
34 | # Computes the log joint probability of the observed features for each of the classes
35 | compute_grain_log_joint_instance <- function(instance, grain, class) {
36 |   if (!requireNamespace("gRain", quietly = TRUE)) {
37 |     stop("gRain needed for this function to work. Please install it.",
38 |          call. = FALSE)
39 |   }
40 |   # Instance is character
41 |   stopifnot(is.character(instance))    
42 |   # Get non-NA nodes in instance 
43 |   instance <- instance[!is.na(instance)]
44 |   vars <- intersect(names(instance), grain_nodes(grain))
45 |   instance <- instance[vars]
46 |   # Check class is character and not in instance  
47 |   check_class(class) 
48 |   stopifnot(!(class %in% vars))
49 |   # Set them as evidence if they are more than 0
50 |   if (length(instance) > 0) {  
51 | 	  grain <- gRain::setEvidence(grain, nodes = vars, states = instance)
52 |   }
53 |   # gRain has a bug and cannot return unnormalized class. Therefore, using the workaround: P(C | x_evidence) * P(x_evidence)
54 |   cp <- gRain::querygrain(grain, nodes = class, normalize = TRUE)[[class]]  
55 |   cp <- log(cp)
56 |   if (length(vars) > 0) {
57 |     cp <- cp + log(gRain::pEvidence(grain))
58 |   }
59 |   cp
60 | }
61 | # Compiles a grain from a a list of CPTs
62 | compile_grain <- function(cpts) {
63 |   if (!requireNamespace("gRain", quietly = TRUE)) {
64 |     stop("gRain needed for this function to work. Please install it.",
65 |          call. = FALSE)
66 |   }
67 |   if (!requireNamespace("gRbase", quietly = TRUE)) {
68 |     stop("gRbase needed for this function to work. Please install it.",
69 |          call. = FALSE)
70 |   }
71 |   # Check cpts is a list 
72 |   stopifnot(is.list(cpts))
73 |   # TODO: Check each cpt. 
74 |   # Convert each cpt to parray
75 |   pcpts <- lapply(cpts, gRbase::as.parray, normalize = "none", smooth = 0)
76 |   # Check the probabilities are left unchanged 
77 |   all_eq <- mapply(equivalent_num, cpts, pcpts, SIMPLIFY = TRUE)
78 |   stopifnot(vapply(all_eq, isTRUE, FUN.VALUE = logical(1)))
79 |   # Assemble the grain
80 |   gRain::grain.CPTspec(gRain::compileCPT(pcpts)) 
81 | }
82 | is_grain_compiled <- function(g) {
83 |   if (!requireNamespace("gRain", quietly = TRUE)) {
84 |     stop("gRain needed for this function to work. Please install it.",
85 |          call. = FALSE)
86 |   }
87 |   inherits(g, "grain") && g$isCompiled
88 | }
89 | grain_nodes <- function(g) {
90 |   g$universe$nodes
91 | }


--------------------------------------------------------------------------------
/R/anb-dag.R:
--------------------------------------------------------------------------------
 1 | # a basic supertype of all bnc
 2 | bnc_base <- function(class, features) {  
 3 |   obj <- list(.class = unname(class))
 4 |   obj$.features <- unname(features)
 5 |   class(obj) <- 'bnc_base'
 6 |   obj
 7 | } 
 8 | # Creates an augmented naive Bayes with structure but no parameters.
 9 | bnc_dag <- function(dag, class) {
10 |   families <- graphNEL2families(dag, class)  
11 | #   Save dag, class, features,and call 
12 |   make_bnc_dag(class = class, families = families, dag = dag)
13 | }
14 | make_bnc_dag <- function(class, families, dag) {
15 |   # Not checking families for efficiency; they are checked in bnc_dag anyway
16 |   obj <- bnc_base(class = class, features = NULL)
17 |   obj$.dag = dag
18 |   obj$.families = families
19 |   class(obj) <- c('bnc_dag', class(obj))
20 |   obj
21 | }
22 | # Checks it is a valid bnc_dag object 
23 | check_bnc_dag <- function(x) {
24 |   check_bnc_dag_basic(x)
25 |   # Check families
26 |   check_anb_families(families(x), class_var(x))  
27 | } 
28 | check_bnc_dag_basic <- function(x) {
29 |   class <- class_var(x)
30 |   features <- features(x)
31 |   # This also checks for class.
32 |   check_features(features = features, class = class)
33 |   stopifnot(identical(vars(x), setNames(nm = c(features, class))))
34 | }
35 | 
36 | #' @export 
37 | #' @describeIn grain_and_graph Convert to a graphNEL.
38 | as_igraph <- function(x) {
39 |   stopifnot(inherits(x, "bnc_dag"))
40 |   graph_internal2graph_NEL(dag(x))
41 | } 
42 | #' Get underlying graph. This should be exported.
43 | #' @keywords  internal 
44 | #' @param x the bnc object
45 | dag <- function(x) {
46 |   stopifnot(inherits(x, "bnc_dag"))
47 |   x$.dag
48 | }
49 | #' @export 
50 | #' @describeIn  inspect_bnc_dag Returns the class variable.
51 | class_var <- function(x) {
52 |   stopifnot(inherits(x, "bnc_base"))
53 |   x$.class
54 | }
55 | #' @export 
56 | #' @describeIn  inspect_bnc_dag Returns the features.
57 | features <- function(x) {
58 |   # Implementing a generic features did not allow me to document it in inspect_bnc_dag, so I dispatch by class within the function
59 |   if (inherits(x, 'bnc_dag')) {
60 |     return (setdiff(vars(x), class_var(x)))
61 |   }
62 |   else if (inherits(x, 'bnc_base')) {
63 |     return(x$.features)
64 |   }
65 |   stop('Must be either bnc_dag or bnc_base')
66 | }
67 | #' @export 
68 | #' @describeIn  inspect_bnc_dag Returns all variables (i.e., features + class).
69 | vars <- function(x) {
70 |   setNames(nm = get_family_vars(families(x)))
71 | }
72 | #' @export 
73 | #' @describeIn  inspect_bnc_dag Returns the family of each variable.
74 | families <- function(x) {
75 |   stopifnot(inherits(x, "bnc_dag"))
76 |   x$.families
77 | } 
78 | #' @export 
79 | #' @describeIn  inspect_bnc_dag Returns the model string of the network in bnlearn format (adding a space in between two families).
80 | modelstring <- function(x) {
81 |   stopifnot(inherits(x, "bnc_dag"))
82 |   fams <- families(x)
83 |   order <- order_acyclic(families(x))
84 |   fams <- fams[order] 
85 |   paste(sapply(names(fams), function(node) { 
86 |     paste("[", node, ifelse(length(fams[[node]]) - 1 > 0, "|", ""), paste(fams[[node]][-1], sep = "", collapse = ":"), "]", sep = "")
87 |   }), collapse = " ")   
88 | }
89 | # # Returns all feature families excluding the class variable
90 | # # Returns all feature families excluding the class variable
91 | # feature_families <- function(x) {
92 | #   feature_fams <- families(x)[features(x)]
93 | #   lapply(feature_fams, family_features, class_var(x))
94 | # }
95 | #' @export 
96 | #' @describeIn  inspect_bnc_dag Returns the family of each feature.
97 | feature_families <- function(x) {
98 |   families(x)[features(x)]
99 | }


--------------------------------------------------------------------------------