├── data
    ├── alarm.rda
    ├── asia.rda
    ├── marks.rda
    ├── coronary.rda
    ├── lizards.rda
    ├── hailfinder.rda
    ├── insurance.rda
    ├── gaussian.test.rda
    ├── learning.test.rda
    └── clgaussian.test.rda
├── src
    ├── minimal
    │   ├── table.h
    │   ├── unique.h
    │   ├── strings.h
    │   ├── data.frame.h
    │   ├── common.h
    │   ├── unique.c
    │   ├── strings.c
    │   └── tiers.c
    ├── include
    │   ├── learning.h
    │   ├── bn.h
    │   ├── sampling.h
    │   ├── globals.h
    │   ├── rcore.h
    │   └── graph.h
    ├── sanitization
    │   ├── data.h
    │   └── covariance.c
    ├── core
    │   ├── sort.h
    │   ├── math.functions.h
    │   ├── sets.h
    │   ├── correlation.h
    │   ├── sampling.h
    │   ├── moments.h
    │   ├── sort.c
    │   ├── allocations.h
    │   ├── covariance.matrix.h
    │   ├── uppertriangular.c
    │   ├── uppertriangular.h
    │   ├── allocations.c
    │   └── contingency.tables.h
    ├── preprocessing
    │   ├── enums.c
    │   └── preprocessing.h
    ├── inference
    │   ├── rinterface
    │   │   ├── likelihood.weighting.c
    │   │   ├── rbn.c
    │   │   └── cpdist.c
    │   ├── loglikelihood
    │   │   ├── loglikelihood.h
    │   │   └── common.c
    │   └── likelihood.weighting.c
    ├── tests
    │   ├── gaussian
    │   │   ├── df.adjust.c
    │   │   └── gaussian.tests.c
    │   ├── rinterface
    │   │   ├── indep.test.c
    │   │   └── htest.c
    │   ├── enums.c
    │   ├── omnibus
    │   │   └── custom.test.c
    │   └── discrete
    │   │   └── df.adjust.c
    ├── graphs
    │   ├── graphs.h
    │   ├── completely.directed.c
    │   ├── rinterface
    │   │   ├── path.matrix.c
    │   │   ├── connected.components.c
    │   │   └── sid.c
    │   └── connected.components.c
    ├── fitted
    │   ├── rinterface
    │   │   └── nparams.c
    │   ├── enums.c
    │   ├── nparams.c
    │   └── fitted.h
    ├── test.counter.c
    ├── parameters
    │   ├── parameters.h
    │   ├── discrete
    │   │   └── classic.discrete.c
    │   └── rinterface
    │   │   └── classic.discrete.c
    ├── foreign
    │   └── parse.c
    ├── arcs
    │   ├── bind.c
    │   └── arcs2amat.c
    ├── scores
    │   ├── enum.c
    │   ├── custom.score.c
    │   ├── nml.regret.c
    │   └── nml.regret.table.c
    ├── math
    │   └── linear.algebra.h
    └── bnlearn
    │   ├── nparams.c
    │   └── shd.c
├── R
    ├── utils-amat.R
    ├── likelihood.R
    ├── nparams.R
    ├── refit.lm.R
    ├── utils-elist.R
    ├── sanitization-formula.R
    ├── utils-tests.R
    ├── frontend-averaging.R
    ├── aracne.R
    ├── simulation.R
    ├── utils-plot.R
    ├── sanitization-mutual.R
    ├── scores.R
    ├── chow.liu.R
    ├── utils-cluster.R
    ├── utils-print.R
    ├── frontend-amat.R
    ├── backend-score.R
    ├── test.R
    ├── sanitization-enumeration.R
    ├── frontend-formula.R
    ├── data.preprocessing.R
    ├── sanitization-amat.R
    ├── sanitization-classifiers.R
    ├── frontend-queries.R
    ├── averaged.network.R
    ├── classifiers.R
    ├── sanitization-misc.R
    ├── frontend-causal.R
    ├── sanitization-bootstrap.R
    ├── bootstrap.R
    ├── frontend-mvnorm.R
    ├── graph-generation.R
    └── frontend-foreign.R
├── man
    ├── pcalg.Rd
    ├── test.counter.Rd
    ├── gaussian-test.Rd
    ├── clgaussian-test.Rd
    ├── ordering.Rd
    ├── configs.Rd
    ├── dsep.Rd
    ├── igraphpkg.Rd
    ├── plot.bn.strength.Rd
    ├── bayesian.network.classifiers.Rd
    ├── graphpkg.Rd
    ├── learning-test.Rd
    ├── alpha.star.Rd
    ├── statspkg.Rd
    ├── mvnorm.Rd
    ├── marks.Rd
    ├── mi.matrix.Rd
    ├── bn.strength-class.Rd
    ├── kl.Rd
    ├── plot.bn.Rd
    ├── nodeops.Rd
    ├── lizards.Rd
    ├── rbn.Rd
    ├── gRain.Rd
    ├── bf.Rd
    ├── foreign.Rd
    ├── bn.fit.plots.Rd
    ├── bnboot.Rd
    ├── bn.kcv.class.Rd
    ├── count.graphs.Rd
    ├── modelstring.Rd
    ├── blacklist.Rd
    ├── coronary.Rd
    ├── whitelists.and.blacklists.Rd
    ├── hybrid.Rd
    ├── asia.Rd
    ├── graph.Rd
    └── hc.Rd
└── DESCRIPTION


/data/alarm.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/alarm.rda


--------------------------------------------------------------------------------
/data/asia.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/asia.rda


--------------------------------------------------------------------------------
/data/marks.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/marks.rda


--------------------------------------------------------------------------------
/data/coronary.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/coronary.rda


--------------------------------------------------------------------------------
/data/lizards.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/lizards.rda


--------------------------------------------------------------------------------
/data/hailfinder.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/hailfinder.rda


--------------------------------------------------------------------------------
/data/insurance.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/insurance.rda


--------------------------------------------------------------------------------
/data/gaussian.test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/gaussian.test.rda


--------------------------------------------------------------------------------
/data/learning.test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/learning.test.rda


--------------------------------------------------------------------------------
/data/clgaussian.test.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/clgaussian.test.rda


--------------------------------------------------------------------------------
/src/minimal/table.h:
--------------------------------------------------------------------------------
1 | #ifndef TABLE_HEADER
2 | #define TABLE_HEADER
3 | 
4 | SEXP minimal_table(SEXP dataframe, SEXP missing);
5 | 
6 | #endif
7 | 


--------------------------------------------------------------------------------
/src/minimal/unique.h:
--------------------------------------------------------------------------------
1 | #ifndef R_UNIQUE_HEADER
2 | #define R_UNIQUE_HEADER
3 | 
4 | SEXP unique(SEXP array);
5 | SEXP dupe(SEXP array);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/include/learning.h:
--------------------------------------------------------------------------------
1 | 
2 | /* shared between hill climbing and tabu search. */
3 | void bestop_update(SEXP bestop, char *op, const char *from, const char *to);
4 | 
5 | 


--------------------------------------------------------------------------------
/src/sanitization/data.h:
--------------------------------------------------------------------------------
1 | #ifndef DATA_SANITIZATION_HEADER
2 | #define DATA_SANITIZATION_HEADER
3 | 
4 | SEXP data_type(SEXP data);
5 | SEXP count_observed_values(SEXP data);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/core/sort.h:
--------------------------------------------------------------------------------
1 | #ifndef SORTING_HEADER
2 | #define SORTING_HEADER
3 | 
4 | void i_sort(int *array, int *indexes, int length);
5 | void d_sort(double *array, int *indexes, int length);
6 | 
7 | #endif
8 | 


--------------------------------------------------------------------------------
/src/include/bn.h:
--------------------------------------------------------------------------------
1 | 
2 | /* from cache.structure.c */
3 | SEXP cache_structure(SEXP nodes, SEXP amat, SEXP debug);
4 | SEXP cache_node_structure(int cur, SEXP nodes, int *amat, int nrow,
5 |     int *status, bool debugging);
6 | 


--------------------------------------------------------------------------------
/src/minimal/strings.h:
--------------------------------------------------------------------------------
1 | #ifndef R_STRINGS_HEADER
2 | #define R_STRINGS_HEADER
3 | 
4 | SEXP string_delete(SEXP array, SEXP string, int *idx);
5 | SEXP string_setdiff(SEXP large, SEXP small);
6 | SEXP mkStringVec(int n, ...);
7 | 
8 | #endif
9 | 


--------------------------------------------------------------------------------
/src/include/sampling.h:
--------------------------------------------------------------------------------
 1 | #include "../core/contingency.tables.h"
 2 | 
 3 | /* from rbn.c */
 4 | void c_rbn_master(SEXP fitted, SEXP result, SEXP n, SEXP fix, bool add_metadata,
 5 |     bool debugging);
 6 | 
 7 | /* from likelihood.weighting.c */
 8 | void c_lw_weights(SEXP fitted, SEXP data, int n, double *w, SEXP keep,
 9 |     bool debugging);
10 | 


--------------------------------------------------------------------------------
/src/minimal/data.frame.h:
--------------------------------------------------------------------------------
 1 | #ifndef DATA_FRAME_HEADER
 2 | #define DATA_FRAME_HEADER
 3 | 
 4 | SEXP minimal_data_frame(SEXP obj);
 5 | SEXP c_dataframe_column(SEXP dataframe, SEXP name, bool drop, bool keep_names);
 6 | SEXP node2df(SEXP target, int n);
 7 | SEXP fitnode2df(SEXP fitted, SEXP node, int n);
 8 | SEXP fit2df(SEXP fitted, int n);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/src/core/math.functions.h:
--------------------------------------------------------------------------------
 1 | #ifndef MATH_FUNCTIONS_HEADER
 2 | #define MATH_FUNCTIONS_HEADER
 3 | 
 4 | int imax(int x, int y);
 5 | int all_max(double *array, int length, int *maxima, int *indexes, double *buf);
 6 | int i_which_max(int *array, int length);
 7 | int d_which_max(double *array, int length);
 8 | int ld_which_max(long double *array, int length);
 9 | 
10 | #endif
11 | 


--------------------------------------------------------------------------------
/src/preprocessing/enums.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "preprocessing.h"
 3 | 
 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value;
 5 | 
 6 | discretization_e discretization_to_enum(const char *label) {
 7 | 
 8 |   ENTRY("quantile", QUANTILE);
 9 |   ENTRY("interval", INTERVAL);
10 |   ENTRY("hartemink", HARTEMINK);
11 | 
12 |   return ENOMETHOD;
13 | 
14 | }/*DISCRETIZATION_TO_ENUM*/
15 | 


--------------------------------------------------------------------------------
/R/utils-amat.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # convert a set of arcs to a (real) adjacency matrix.
 3 | arcs2amat = function(arcs, nodes) {
 4 | 
 5 |   .Call(call_arcs2amat,
 6 |         arcs = as.character(arcs),
 7 |         nodes = as.character(nodes))
 8 | 
 9 | }#ARCS2AMAT
10 | 
11 | # convert an adjacency matrix back to a set of arcs.
12 | amat2arcs = function(a, nodes) {
13 | 
14 |   .Call(call_amat2arcs,
15 |         amat = a,
16 |         nodes = nodes)
17 | 
18 | }#AMAT2ARCS
19 | 
20 | 


--------------------------------------------------------------------------------
/src/core/sets.h:
--------------------------------------------------------------------------------
 1 | #ifndef SETS_HEADER
 2 | #define SETS_HEADER
 3 | 
 4 | void cfg(SEXP parents, int *configurations, int *nlevels);
 5 | void c_fast_config(int **columns, int nrow, int ncol, int *levels,
 6 |     int *configurations, int *nlevels, int offset);
 7 | SEXP c_configurations(SEXP parents, int factor, int all_levels);
 8 | 
 9 | void first_subset(int *work, int n, int offset);
10 | int next_subset(int *work, int n, int max, int offset);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/src/core/correlation.h:
--------------------------------------------------------------------------------
 1 | #ifndef CORRELATION_HEADER
 2 | #define CORRELATION_HEADER
 3 | 
 4 | #include "covariance.matrix.h"
 5 | 
 6 | double c_fast_cor(double *xx, double *yy, int num, double xm, double ym,
 7 |     long double xsd, long double ysd);
 8 | double c_cor_with_missing(double *x, double *y, int nobs, double *xm,
 9 |     double *ym, double *xsd, double *ysd, int *ncomplete);
10 | double c_fast_pcor(covariance cov, int v1, int v2, int *err, bool decomp);
11 | 
12 | #endif
13 | 


--------------------------------------------------------------------------------
/R/likelihood.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # compute the log-likelihood of some data for a given network.
 3 | loglikelihood = function(fitted, data, by.sample = FALSE, keep = names(fitted),
 4 |     propagate.missing = FALSE, debug = FALSE) {
 5 | 
 6 |   .Call(call_loglikelihood_function,
 7 |         fitted = fitted,
 8 |         data = data,
 9 |         by.sample = by.sample,
10 |         keep.nodes = keep,
11 |         propagate.missing = propagate.missing,
12 |         debug = debug)
13 | 
14 | }#LOGLIKELIHOOD
15 | 


--------------------------------------------------------------------------------
/src/core/sampling.h:
--------------------------------------------------------------------------------
 1 | #ifndef SAMPLING_HEADER
 2 | #define SAMPLING_HEADER
 3 | 
 4 | void SampleNoReplace(int k, int n, int *y, int *x);
 5 | #define RandomPermutation(n, y, x) SampleNoReplace(n, n, y, x)
 6 | void SampleReplace(int k, int n, int *y, int *x);
 7 | void ProbSampleReplace(int n, double *probs, int *values, int ns, int *samples);
 8 | void CondProbSampleReplace(int nprobs, int nconf, double *probs, int *conf,
 9 |     int *values, int ns, int *samples, bool *warn);
10 | 
11 | #endif
12 | 


--------------------------------------------------------------------------------
/R/nparams.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # number of parameters for a DAG and an associated data set.
 3 | nparams.backend = function(x, data, debug = FALSE) {
 4 | 
 5 |   .Call(call_nparams_cgnet,
 6 |         graph = x,
 7 |         data = .data.frame.column(data, names(x$nodes)),
 8 |         debug = debug)
 9 | 
10 | }#NPARAMS.BACKEND
11 | 
12 | # number of parameters for a fitted network.
13 | nparams.fitted = function(x, debug = FALSE) {
14 | 
15 |   .Call(call_nparams_fitted,
16 |         bn = x,
17 |         debug = debug)
18 | 
19 | }#NPARAMS.FITTED
20 | 
21 | 


--------------------------------------------------------------------------------
/src/inference/rinterface/likelihood.weighting.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../include/sampling.h"
 3 | 
 4 | /* compute the weights of the likelihood weighting particles. */
 5 | SEXP lw_weights(SEXP fitted, SEXP data, SEXP keep, SEXP debug) {
 6 | 
 7 | int n = length(VECTOR_ELT(data, 0));
 8 | SEXP weights;
 9 | 
10 |   PROTECT(weights = allocVector(REALSXP, n));
11 | 
12 |   c_lw_weights(fitted, data, n, REAL(weights), keep, isTRUE(debug));
13 | 
14 |   UNPROTECT(1);
15 | 
16 |   return weights;
17 | 
18 | }/*LW_WEIGHTS*/
19 | 
20 | 


--------------------------------------------------------------------------------
/src/core/moments.h:
--------------------------------------------------------------------------------
 1 | #ifndef MOMENTS_HEADER
 2 | #define MOMENTS_HEADER
 3 | 
 4 | double c_sse(double *data, double mean, int nrow);
 5 | double c_mean(double *data, int nrow);
 6 | 
 7 | void c_meanvec(double **data, double *mean, int nrow, int ncol, int first);
 8 | void c_ssevec(double **data, double *sse, double *means, int nrow, int ncol,
 9 |     int first);
10 | 
11 | void c_sd(double *xx, int nobs, int p, double mean, double *sd);
12 | void c_cgsd(double *xx, int *z, int *nz, int nobs, int nstrata, int p,
13 |     long double *means, double *sd);
14 | 
15 | #endif
16 | 


--------------------------------------------------------------------------------
/R/refit.lm.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # backend for the as.lm() functions.
 3 | lm.refit.node = function(node, data) {
 4 | 
 5 |   # construct the model formula, inserting the intercept term as needed.
 6 |   if (length(node$parents) == 0)
 7 |     model = paste(node$node, "~ 1")
 8 |   else
 9 |     model = paste(node$node, "~", paste(node$parents, collapse = "+"))
10 |   # fit the model.
11 |   lm.fit = lm(model, data = data)
12 |   # replace the formula in the call to lm() for legibility.
13 |   lm.fit$call$formula = formula(model)
14 | 
15 |   return(lm.fit)
16 | 
17 | }#LM.REFIT.NODE
18 | 
19 | 


--------------------------------------------------------------------------------
/R/utils-elist.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # convert a set of arcs to a (real) edge list.
 3 | arcs2elist = function(arcs, nodes, weights = NULL, nid = TRUE, sublist = TRUE,
 4 |     parents = FALSE) {
 5 | 
 6 |   .Call(call_arcs2elist,
 7 |         arcs = arcs,
 8 |         nodes = nodes,
 9 |         weigths = weights,
10 |         nid = nid,
11 |         sublist = sublist,
12 |         parents = parents)
13 | 
14 | }#ARCS2ELIST
15 | 
16 | # convert an edge list into an arc set.
17 | elist2arcs = function(elist) {
18 | 
19 |   .Call(call_elist2arcs,
20 |         elist = elist)
21 | 
22 | }#ELIST2ARCS
23 | 
24 | 


--------------------------------------------------------------------------------
/src/tests/gaussian/df.adjust.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../tests.h"
 3 | 
 4 | double gaussian_cdf(test_e test, int num, int nz) {
 5 | 
 6 | double df = 0;
 7 | 
 8 |   switch(test) {
 9 | 
10 |     case COR:
11 |       df = num - nz - 2;
12 |       break;
13 | 
14 |     case MI_G:
15 |     case MI_G_SH:
16 |       df = 1;
17 |       break;
18 | 
19 |     case ZF:
20 |       df = num - nz - 3;
21 |       break;
22 | 
23 |     default:
24 |       error("no degrees of freedom for this test.");
25 | 
26 |   }/*SWITCH*/
27 | 
28 |   return df;
29 | 
30 | }/*GAUSSIAN_CDF*/
31 | 


--------------------------------------------------------------------------------
/src/minimal/common.h:
--------------------------------------------------------------------------------
 1 | #ifndef R_COMMON_HEADER
 2 | #define R_COMMON_HEADER
 3 | 
 4 | /* macro for the number of levels of a factor. */
 5 | #define NLEVELS(x) length(getAttrib(x, R_LevelsSymbol))
 6 | 
 7 | /* from common.c */
 8 | bool c_is(SEXP obj, const char *str);
 9 | SEXP getListElement(SEXP list, char *str);
10 | void *DATAPTR(SEXP x);
11 | SEXP mkReal(double x);
12 | SEXP mkRealVec(int n, ...);
13 | SEXP int2fac(SEXP vector, int *nlevels);
14 | void setDimNames(SEXP obj, SEXP rownames, SEXP colnames);
15 | SEXP subset_by_name(SEXP vec, int n, ...);
16 | bool all_equal(SEXP vec, SEXP val);
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/inference/rinterface/rbn.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../include/sampling.h"
 3 | #include "../../minimal/data.frame.h"
 4 | 
 5 | /* generate random observations from a bayesian network. */
 6 | SEXP rbn_master(SEXP fitted, SEXP n, SEXP fix, SEXP add_metadata, SEXP debug) {
 7 | 
 8 | SEXP result;
 9 | 
10 |   /* allocate the return value. */
11 |   PROTECT(result = fit2df(fitted, INT(n)));
12 |   /* generate the random observations. */
13 |   c_rbn_master(fitted, result, n, fix, isTRUE(add_metadata), isTRUE(debug));
14 | 
15 |   UNPROTECT(1);
16 | 
17 |   return result;
18 | 
19 | }/*RBN_MASTER*/
20 | 
21 | 


--------------------------------------------------------------------------------
/src/tests/gaussian/gaussian.tests.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | 
 3 | /* correlation transform for the exact t-test. */
 4 | double cor_t_trans(double cor, double df) {
 5 | 
 6 |   return cor * sqrt(df) / sqrt(1 - cor * cor);
 7 | 
 8 | }/*COR_T_TRANS*/
 9 | 
10 | /* correlation transform for Fizher's Z test. */
11 | double cor_zf_trans(double cor, double df) {
12 | 
13 |   return log((1 + cor)/(1 - cor)) / 2 * sqrt(df);
14 | 
15 | }/*COR_ZF_TRANS*/
16 | 
17 | /* correlation transform for mutual information. */
18 | double cor_mi_trans(double cor) {
19 | 
20 |   return - 0.5 * log(1 - cor * cor);
21 | 
22 | }/*COR_MI_TRANS*/
23 | 
24 | 


--------------------------------------------------------------------------------
/src/graphs/graphs.h:
--------------------------------------------------------------------------------
 1 | #ifndef GRAPHICAL_FUNCTIONS_HEADER
 2 | #define GRAPHICAL_FUNCTIONS_HEADER
 3 | 
 4 | int ug_connected_components(int *amat, char **labels, int nnodes, int **buffer,
 5 |     int *buflen, bool debugging);
 6 | void dag_path_matrix(int *amat, int *pathmat, int nnodes, char **labels,
 7 |     int *root_ids, int nroots, bool debugging);
 8 | void initialize_reachability_matrix(int *amat, int *pmat, int nnodes,
 9 |     int target, bool *cond_set, int *rmat, char **labels, bool debugging);
10 | void complete_reachability_matrix(int *initial_rmat, int *final_rmat,
11 |     int nnodes, char **labels, bool debugging);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/include/globals.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* the global test counter, now living in C-land. */
 3 | extern double test_counter;
 4 | 
 5 | /* symbols used to set object attributes. */
 6 | extern SEXP BN_ModelstringSymbol;
 7 | extern SEXP BN_NodesSymbol;
 8 | extern SEXP BN_ProbSymbol;
 9 | extern SEXP BN_MethodSymbol;
10 | extern SEXP BN_WeightsSymbol;
11 | extern SEXP BN_DsepsetSymbol;
12 | extern SEXP BN_MetaDataSymbol;
13 | extern SEXP BN_NobsSymbol;
14 | extern SEXP BN_DfSymbol;
15 | extern SEXP BN_CutpointsSymbol;
16 | 
17 | /* shortcuts to boolean SEXPs. */
18 | extern SEXP TRUESEXP, FALSESEXP;
19 | 
20 | /* numerical constants */
21 | #define MACHINE_TOL sqrt(DBL_EPSILON)
22 | 
23 | 


--------------------------------------------------------------------------------
/R/sanitization-formula.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # check the string representation (aka the formula) of a network.
 3 | check.modelstring = function(string) {
 4 | 
 5 |   # check the type.
 6 |   if (!is.string(string))
 7 |     stop("string must be a character string.")
 8 | 
 9 |   # check the syntax (separate regexps for root nodes and non-root ndoes).
10 |   correct.format = paste("^(",
11 |     "\\[[^\\[\\]\\|:]+?\\]",
12 |     "|",
13 |     "\\[[^\\[\\]\\|:]+?\\|[^\\[\\]\\|:]+?([:]{0,1}[^\\[\\]\\|:])*?\\]",
14 |   ")+$", sep = "")
15 | 
16 |   if (!grepl(correct.format, string, perl = TRUE))
17 |     stop("malformed model string format (see ?modelstring).")
18 | 
19 | }#CHECK.MODELSTRING
20 | 
21 | 


--------------------------------------------------------------------------------
/src/include/rcore.h:
--------------------------------------------------------------------------------
 1 | #define USE_FC_LEN_T
 2 | #include <R.h>
 3 | #include <Rinternals.h>
 4 | #include <Rmath.h>
 5 | #include <R_ext/Applic.h>
 6 | #include <R_ext/Lapack.h>
 7 | #include <R_ext/Linpack.h>
 8 | #include <stdbool.h>
 9 | 
10 | /* for backwards compatibility with older R versions. */
11 | #ifndef MAYBE_REFERENCED
12 | #define MAYBE_REFERENCED(x) (NAMED(x) > 0)
13 | #endif
14 | 
15 | /* utility macros. */
16 | #define isTRUE(logical) ((LOGICAL(logical)[0]) == TRUE)
17 | #define INT(x) INTEGER(x)[0]
18 | #define NUM(x) REAL(x)[0]
19 | #define NODE(i) CHAR(STRING_ELT(nodes, i))
20 | #define MIN(a,b) (((a)<(b))?(a):(b))
21 | #define MAX(a,b) (((a)>(b))?(a):(b))
22 | 
23 | 


--------------------------------------------------------------------------------
/src/core/sort.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | 
 3 | /* quick sort of an integer vector, with or without indexes. */
 4 | void i_sort(int *array, int *indexes, int length) {
 5 | 
 6 |   if (length == 0)
 7 |     return;
 8 | 
 9 |   if (!indexes)
10 |     R_qsort_int(array, 1, length);
11 |   else
12 |     R_qsort_int_I(array, indexes, 1, length);
13 | 
14 | }/*I_SORT*/
15 | 
16 | /* quick sort of a double vector, with or without indexes. */
17 | void d_sort(double *array, int *indexes, int length) {
18 | 
19 |   if (length == 0)
20 |     return;
21 | 
22 |   if (!indexes)
23 |     R_qsort(array, 1, length);
24 |   else
25 |     R_qsort_I(array, indexes, 1, length);
26 | 
27 | }/*D_SORT*/
28 | 


--------------------------------------------------------------------------------
/src/fitted/rinterface/nparams.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../fitted/fitted.h"
 3 | 
 4 | /* compute the number of parameters of a fitted model. */
 5 | SEXP nparams_fitted(SEXP fitted, SEXP debug) {
 6 | 
 7 | double node_params = 0, all_params = 0;
 8 | fitted_bn bn = fitted_network_from_SEXP(fitted);
 9 | 
10 |   for (int i = 0; i < bn.nnodes; i++) {
11 | 
12 |     node_params = nparams_fitted_node(bn.ldists[i], bn.node_types[i]);
13 | 
14 |     if (isTRUE(debug))
15 |       Rprintf("* node %s has %.0lf parameter(s).\n", bn.labels[i], node_params);
16 | 
17 |     all_params += node_params;
18 | 
19 |   }/*FOR*/
20 | 
21 |   FreeFittedBN(bn);
22 | 
23 |   return ScalarReal(all_params);
24 | 
25 | }/*NPARAMS_FITTED*/
26 | 
27 | 


--------------------------------------------------------------------------------
/man/pcalg.Rd:
--------------------------------------------------------------------------------
 1 | \name{pcalg integration}
 2 | \alias{pcalg integration}
 3 | \alias{as.bn.pcAlgo}
 4 | \title{Import and export networks from the pcalg package}
 5 | \description{
 6 | 
 7 |   Convert \code{pcAlgo} objects to \code{bn} objects.
 8 | 
 9 | }
10 | \usage{
11 | \method{as.bn}{pcAlgo}(x, ..., check.cycles = TRUE)
12 | }
13 | \arguments{
14 |   \item{x}{an object of class \code{pcAlgo}.}
15 |   \item{\dots}{extra arguments from the generic method (currently ignored).}
16 |   \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will
17 |     not be checked for cycles.}
18 | }
19 | \value{
20 | 
21 |   An object of class \code{bn}.
22 | 
23 | }
24 | \author{Marco Scutari}
25 | \keyword{utilities}
26 | \keyword{interfaces to other packages}
27 | 


--------------------------------------------------------------------------------
/man/test.counter.Rd:
--------------------------------------------------------------------------------
 1 | \name{test counter}
 2 | \alias{test.counter}
 3 | \alias{increment.test.counter}
 4 | \alias{reset.test.counter}
 5 | \title{Manipulating the test counter}
 6 | \description{
 7 | 
 8 |   Check, increment or reset the test/score counter used in structure learning
 9 |   algorithms.
10 | 
11 | }
12 | \usage{
13 | test.counter()
14 | increment.test.counter(i = 1)
15 | reset.test.counter()
16 | }
17 | \arguments{
18 |   \item{i}{a numeric value, which is added to the test counter.}
19 | }
20 | \value{
21 | 
22 |   A numeric value, the current value of the test counter.
23 | 
24 | }
25 | \examples{
26 | data(learning.test)
27 | hc(learning.test)
28 | test.counter()
29 | reset.test.counter()
30 | test.counter()
31 | }
32 | \author{Marco Scutari}
33 | \keyword{convenience functions}
34 | 


--------------------------------------------------------------------------------
/src/tests/rinterface/indep.test.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../tests.h"
 3 | 
 4 | /* independence tests, frontend to be used in R code. */
 5 | SEXP indep_test(SEXP x, SEXP y, SEXP sx, SEXP data, SEXP test, SEXP alpha,
 6 |     SEXP extra_args, SEXP learning, SEXP complete) {
 7 | 
 8 |   /* if either node to test is not provided, return a zero-length numeric
 9 |    * vector. */
10 |   if (length(x) == 0 || length(y) == 0)
11 |     return allocVector(REALSXP, 0);
12 | 
13 |   /* filter for NULL and empty strings to make it easy to interface with R. */
14 |   if (length(sx) == 0 || sx == R_NilValue)
15 |     return utest(x, y, data, test, alpha, extra_args, learning, complete);
16 |   else
17 |     return ctest(x, y, sx, data, test, alpha, extra_args, learning, complete);
18 | 
19 | }/*INDEP_TEST*/
20 | 


--------------------------------------------------------------------------------
/src/core/allocations.h:
--------------------------------------------------------------------------------
 1 | #ifndef ALLOCATIONS_HEADER
 2 | #define ALLOCATIONS_HEADER
 3 | 
 4 | /* memory allocation. */
 5 | void *Calloc1D(size_t R, size_t size);
 6 | void **Calloc2D(size_t R, size_t C, size_t size);
 7 | void ***Calloc3D(size_t R, size_t C, size_t L, size_t size);
 8 | void *Realloc1D(void *p, size_t R, size_t size);
 9 | void BN_Free1D(void *p);
10 | void BN_Free2D(void **p, size_t R);
11 | void BN_Free3D(void ***p, size_t R, size_t C);
12 | 
13 | #define Free1D(p) \
14 |   do { \
15 |     BN_Free1D((void *)p); \
16 |     p = NULL; \
17 |   } while(0)
18 | #define Free2D(p, R) \
19 |   do { \
20 |     BN_Free2D((void **)p, R); \
21 |     p = NULL; \
22 |   } while (0)
23 | #define Free3D(p, R, C) \
24 |   do { \
25 |     BN_Free3D((void ***)p, R, C); \
26 |     p = NULL; \
27 |   } while (0)
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/man/gaussian-test.Rd:
--------------------------------------------------------------------------------
 1 | \name{gaussian.test}
 2 | \docType{data}
 3 | \alias{gaussian.test}
 4 | \title{Synthetic (continuous) data set to test learning algorithms}
 5 | \description{
 6 | 
 7 |   This a synthetic data set used as a test case in the \pkg{bnlearn} package.
 8 | 
 9 | }
10 | \usage{
11 | data(gaussian.test)
12 | }
13 | \format{
14 | 
15 |   The \code{gaussian.test} data set contains seven normal (Gaussian) variables.
16 | 
17 | }
18 | \note{
19 | 
20 |   The R script to generate data from this network is available from
21 |   \url{https://www.bnlearn.com/documentation/networks/}.
22 | 
23 | }
24 | \examples{
25 | # load the data.
26 | data(gaussian.test)
27 | # create and plot the network structure.
28 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]")
29 | \dontrun{graphviz.plot(dag)}
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/src/test.counter.c:
--------------------------------------------------------------------------------
 1 | #include "include/rcore.h"
 2 | 
 3 | /* initialize the global test counter. */
 4 | double test_counter = 0;
 5 | 
 6 | /* increment the global test counter. */
 7 | SEXP increment_test_counter(SEXP n) {
 8 | 
 9 |   switch(TYPEOF(n)) {
10 | 
11 |     case REALSXP:
12 |       test_counter += NUM(n);
13 |       break;
14 | 
15 |     case INTSXP:
16 |       test_counter += INT(n);
17 |       break;
18 | 
19 |   }/*SWITCH*/
20 | 
21 |   return R_NilValue;
22 | 
23 | }/*INCREMENT_TEST_COUNTER*/
24 | 
25 | /* reset the global test counter. */
26 | SEXP reset_test_counter(void) {
27 | 
28 |   test_counter = 0;
29 | 
30 |   return R_NilValue;
31 | 
32 | }/*RESET_TEST_COUNTER*/
33 | 
34 | /* return the global test counter, for R to see. */
35 | SEXP get_test_counter(void) {
36 | 
37 |   return ScalarReal(test_counter);
38 | 
39 | }/*GET_TEST_COUNTER*/
40 | 
41 | 


--------------------------------------------------------------------------------
/man/clgaussian-test.Rd:
--------------------------------------------------------------------------------
 1 | \name{clgaussian.test}
 2 | \docType{data}
 3 | \alias{clgaussian.test}
 4 | \title{Synthetic (mixed) data set to test learning algorithms}
 5 | \description{
 6 | 
 7 |   This a synthetic data set used as a test case in the \pkg{bnlearn} package.
 8 | 
 9 | }
10 | \usage{
11 | data(clgaussian.test)
12 | }
13 | \format{
14 | 
15 |   The \code{clgaussian.test} data set contains one normal (Gaussian) variable,
16 |   4 discrete variables and 3 conditional Gaussian variables.
17 | 
18 | }
19 | \note{
20 | 
21 |   The R script to generate data from this network is available from
22 |   \url{https://www.bnlearn.com/documentation/networks/}.
23 | 
24 | }
25 | \examples{
26 | # load the data.
27 | data(clgaussian.test)
28 | # create and plot the network structure.
29 | dag = model2network("[A][B][C][H][D|A:H][F|B:C][E|B:D][G|A:D:E:F]")
30 | \dontrun{graphviz.plot(dag)}
31 | }
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/src/parameters/parameters.h:
--------------------------------------------------------------------------------
 1 | #ifndef PARAMETER_LEARNING_HEADER
 2 | #define PARAMETER_LEARNING_HEADER
 3 | 
 4 | #include "../math/linear.algebra.h"
 5 | 
 6 | /* bit-field tracking all possible errors in the hierarchical Dirichlet
 7 |  * parameter estimation. */
 8 | typedef struct {
 9 | 
10 |   unsigned int outer_em_convergence_fail  : 1;
11 |   unsigned int kappa_tau_convergence_fail : 1;
12 |   unsigned int tau_convergence_fail       : 1;
13 |   unsigned int tau_is_zero                : 1;
14 |   unsigned int kappa_convergence_fail     : 1;
15 |   unsigned int padding                    : 3;  /* pad to 1 byte. */
16 | 
17 | } hdstatus;
18 | 
19 | void c_classic_discrete_parameters(int *counts, double *cpt, int nrows,
20 |     int ncols, double alpha, bool replace);
21 | hdstatus c_hierarchical_dirichlet_parameters(cmcmap counts, double alpha0,
22 |     double s, bool debugging, double *nu);
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/src/inference/loglikelihood/loglikelihood.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOGLIKELIHOOD_FUNCTIONS_HEADER
 2 | #define LOGLIKELIHOOD_FUNCTIONS_HEADER
 3 | 
 4 | bool check_locally_incomplete_data(fitted_bn bn, meta m, bool debugging);
 5 | 
 6 | void bysample_discrete_loglikelihood(fitted_bn bn, ddata dt, double *loglik,
 7 |     bool debugging);
 8 | void bysample_gaussian_loglikelihood(fitted_bn bn, gdata dt, double *loglik,
 9 |     bool robust, bool debugging);
10 | void bysample_clgaussian_loglikelihood(fitted_bn bn, cgdata dt, double *loglik,
11 |     bool robust, bool debugging);
12 | 
13 | double data_discrete_loglikelihood(fitted_bn bn, ddata dt, bool propagate,
14 |     bool debugging);
15 | double data_gaussian_loglikelihood(fitted_bn bn, gdata dt, double *scratch,
16 |     bool propagate, bool debugging);
17 | double data_clgaussian_loglikelihood(fitted_bn bn, cgdata dt, double *scratch,
18 |     bool propagate, bool debugging);
19 | 
20 | #endif
21 | 
22 | 


--------------------------------------------------------------------------------
/R/utils-tests.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # compute the sample size / CPT cells ratio.
 3 | obs.per.cell = function(x, y, z = NULL, data) {
 4 | 
 5 |   opc = 0
 6 |   ndata = nrow(data)
 7 |   nlx = nlevels(data[, x])
 8 |   nly = nlevels(data[, y])
 9 | 
10 |   # return +Inf for continuous data to bypass countermeasures
11 |   # thought for sparce discrete data.
12 |   if (data.type(data) == "continuous")
13 |     return(Inf)
14 | 
15 |   if (is.null(z) || (length(z) == 0)) {
16 | 
17 |     opc = ndata / (nlx * nly)
18 | 
19 |   }#THEN
20 |   else if (is.character(z)) {
21 | 
22 |     if (length(z) == 1)
23 |       opc = ndata / (nlx * nly * nlevels(data[, z]))
24 |     else if (length(z) > 1)
25 |       opc = ndata / (nlx * nly *
26 |         prod(sapply(z, function(col) { nlevels(data[, col]) } )))
27 | 
28 |   }#THEN
29 |   else if (is.factor(z)) {
30 | 
31 |     opc = ndata / (nlx * nly * nlevels(z))
32 | 
33 |   }#ELSE
34 | 
35 |   return(opc)
36 | 
37 | }#OBS.PER.CELL
38 | 
39 | 


--------------------------------------------------------------------------------
/man/ordering.Rd:
--------------------------------------------------------------------------------
 1 | \name{node ordering utilities}
 2 | \alias{node ordering utilities}
 3 | \alias{node.ordering}
 4 | \title{Partial node orderings}
 5 | \description{
 6 | 
 7 |   Find the partial node ordering implied by a network.
 8 | 
 9 | }
10 | \usage{
11 | node.ordering(x, debug = FALSE)
12 | }
13 | \arguments{
14 |   \item{x}{an object of class \code{bn} or \code{bn.fit}.}
15 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
16 |     printed; otherwise the function is completely silent.}
17 | }
18 | \value{
19 | 
20 |   \code{node.ordering()} returns a vector of character strings, an ordered set
21 |   of node labels.
22 | 
23 | }
24 | \note{
25 | 
26 |   \code{node.ordering()} supports only completely directed Bayesian networks.
27 | 
28 | }
29 | \examples{
30 | dag = random.graph(LETTERS[1:10])
31 | ord = node.ordering(dag)
32 | ord
33 | }
34 | \author{Marco Scutari}
35 | \keyword{convenience functions}
36 | \keyword{graphs}
37 | 


--------------------------------------------------------------------------------
/src/fitted/enums.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../minimal/common.h"
 3 | #include "fitted.h"
 4 | 
 5 | fitted_node_e fitted_node_to_enum(SEXP object) {
 6 | 
 7 |   if (c_is(object, "bn.fit.dnode"))
 8 |     return DNODE;
 9 |   else if (c_is(object, "bn.fit.onode"))
10 |     return ONODE;
11 |   else if (c_is(object, "bn.fit.gnode"))
12 |     return GNODE;
13 |   else if (c_is(object, "bn.fit.cgnode"))
14 |     return CGNODE;
15 | 
16 |   return ENOFIT;
17 | 
18 | }/*FITTED_NODE_TO_ENUM*/
19 | 
20 | fitted_net_e fitted_net_to_enum(SEXP object) {
21 | 
22 |   if (c_is(object, "bn.fit.dnet"))
23 |     return DNET;
24 |   else if (c_is(object, "bn.fit.onet"))
25 |     return ONET;
26 |   else if (c_is(object, "bn.fit.donet"))
27 |     return DONET;
28 |   else if (c_is(object, "bn.fit.gnet"))
29 |     return GNET;
30 |   else if (c_is(object, "bn.fit.cgnet"))
31 |     return CGNET;
32 | 
33 |   return ENONET;
34 | 
35 | }/*FITTED_NET_TO_ENUM*/
36 | 


--------------------------------------------------------------------------------
/man/configs.Rd:
--------------------------------------------------------------------------------
 1 | \name{configs}
 2 | \alias{configs}
 3 | \title{Construct configurations of discrete variables}
 4 | \description{
 5 | 
 6 |   Create configurations of discrete variables, which can be used in modelling
 7 |   conditional probability tables.
 8 | 
 9 | }
10 | \usage{
11 |   configs(data, all = TRUE)
12 | }
13 | \arguments{
14 |   \item{data}{a data frame containing factor columns.}
15 |   \item{all}{a boolean value. If \code{TRUE} all configuration are included as
16 |    levels in the return value;  otherwise only configurations which are actually
17 |    observed are considered.}
18 | }
19 | \value{
20 | 
21 |   A factor with one element for each row of \code{data}, and levels as
22 |   specified by \code{all}.
23 | 
24 | }
25 | \examples{
26 | data(learning.test)
27 | configs(learning.test, all = TRUE)
28 | configs(learning.test, all = FALSE)
29 | }
30 | \author{Marco Scutari}
31 | \keyword{data preprocessing}
32 | \keyword{convenience functions}
33 | 


--------------------------------------------------------------------------------
/src/sanitization/covariance.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/covariance.matrix.h"
 3 | #include "../math/linear.algebra.h"
 4 | #include "../include/globals.h"
 5 | 
 6 | /* check that a matrix is symmetric and satisfies Cauchy-Schwarz. */
 7 | SEXP check_covariance(SEXP covmat) {
 8 | 
 9 | int i = 0, j = 0, n = (int)sqrt(length(covmat));
10 | double *cov = REAL(covmat);
11 | 
12 |   for (i = 0; i < n; i++)
13 |     for (j = i + 1; j < n; j++) {
14 | 
15 |       /* firstly, check symmetry. */
16 |       if (fabs(cov[CMC(i, j, n)] - cov[CMC(j, i, n)]) > MACHINE_TOL)
17 |         error("'covmat' (%d, %d) is not symmetric.", i + 1, j + 1);
18 | 
19 |       /* secondly, check Cauchy-Schwarz. */
20 |       if (cov[CMC(i, j, n)] > sqrt(cov[CMC(i, i, n)] * cov[CMC(j, j, n)]))
21 |         error("'covmat' (%d, %d) does not satisfy the Cauchy-Schwarz "
22 |           "inequality.", i + 1, j + 1);
23 | 
24 |     }/*FOR*/
25 | 
26 |   return R_NilValue;
27 | 
28 | }/*CHECK_COVARIANCE*/
29 | 


--------------------------------------------------------------------------------
/R/frontend-averaging.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # build the averaged network structure using arc strengths and a
 3 | # significance threshold.
 4 | averaged.network = function(strength, threshold) {
 5 | 
 6 |   # check the main argument.
 7 |   check.bn.strength(strength, valid = c("bootstrap", "bayes-factor"))
 8 |   # check the strength threshold.
 9 |   threshold = check.threshold(threshold, strength)
10 | 
11 |   avg = averaged.network.backend(strength = strength, threshold = threshold)
12 | 
13 |   # add the metadata for the print() method.
14 |   avg$learning$algo = "averaged"
15 |   avg$learning$args = list(threshold = threshold)
16 | 
17 |   return(avg)
18 | 
19 | }#AVERAGED.NETWORK
20 | 
21 | # compute the inclusion threshold from a set of arc strengths.
22 | inclusion.threshold = function(strength) {
23 | 
24 |   # check the (only) argument.
25 |   check.bn.strength(strength, valid = c("bootstrap", "bayes-factor"))
26 | 
27 |   threshold(strength = strength, method = "l1")
28 | 
29 | }#INCLUSION.THRESHOLD
30 | 


--------------------------------------------------------------------------------
/man/dsep.Rd:
--------------------------------------------------------------------------------
 1 | \name{dsep}
 2 | \alias{dsep}
 3 | \title{Test d-separation}
 4 | \description{
 5 | 
 6 |   Check whether two nodes are d-separated.
 7 | 
 8 | }
 9 | \usage{
10 | dsep(bn, x, y, z)
11 | }
12 | \arguments{
13 |   \item{bn}{an object of class \code{bn}.}
14 |   \item{x,y}{a character string, the label of a node.}
15 |   \item{z}{an optional vector of character strings, the label of the
16 |     (candidate) d-separating nodes. It defaults to the empty set.}
17 | }
18 | \value{
19 | 
20 |   \code{dsep()} returns \code{TRUE} if \code{x} and \code{y} are
21 |   d-separated by \code{z}, and \code{FALSE} otherwise.
22 | 
23 | }
24 | \references{
25 | 
26 |   Koller D, Friedman N (2009). \emph{Probabilistic Graphical Models: Principles
27 |     and Techniques}. MIT Press.
28 | 
29 | }
30 | \examples{
31 | bn = model2network("[A][C|A][B|C]")
32 | dsep(bn, "A", "B", "C")
33 | bn = model2network("[A][C][B|A:C]")
34 | dsep(bn, "A", "B", "C")
35 | }
36 | \author{Marco Scutari}
37 | \keyword{graphs}
38 | 


--------------------------------------------------------------------------------
/R/aracne.R:
--------------------------------------------------------------------------------
 1 | 
 2 | aracne.backend = function(x, estimator, whitelist, blacklist, debug = FALSE) {
 3 | 
 4 |   # fix the whitelist and the blacklist to keep the C side simple.
 5 |   nodes = names(x)
 6 | 
 7 |   if (!is.null(blacklist)) {
 8 | 
 9 |     # arcs must be blacklisted in both directions, so keep only
10 |     # the undirected ones.
11 |     blacklist = blacklist[which.undirected(blacklist, nodes), , drop = TRUE]
12 |     # keep only one direction for each blacklisted arc.
13 |     blacklist = pdag2dag.backend(blacklist, nodes)
14 | 
15 |   }#THEN
16 | 
17 |   if (!is.null(whitelist)) {
18 | 
19 |     # keep only one direction for each whitelisted arc.
20 |     whitelist = pdag2dag.backend(whitelist, nodes)
21 | 
22 |   }#THEN
23 | 
24 |   .Call(call_aracne,
25 |         data = x,
26 |         estimator = estimator,
27 |         whitelist = whitelist,
28 |         blacklist = blacklist,
29 |         complete = attr(x, "metadata")$complete.nodes,
30 |         debug = debug)
31 | 
32 | }#ARACNE.BACKEND
33 | 
34 | 


--------------------------------------------------------------------------------
/R/simulation.R:
--------------------------------------------------------------------------------
 1 | # do a partial ordering of the nodes of a graph.
 2 | topological.ordering = function(x, start = NULL, reverse = FALSE,
 3 |     debug = FALSE) {
 4 | 
 5 |   if (is.null(start))
 6 |     roots = root.leaf.nodes(x, leaf = reverse)
 7 |   else
 8 |     roots = start
 9 | 
10 |   to.do = .Call(call_topological_ordering,
11 |                 bn = x,
12 |                 root.nodes = roots,
13 |                 reverse = reverse,
14 |                 debug = debug)
15 | 
16 |   if (is.null(start))
17 |     return(names(sort(to.do)))
18 |   else
19 |     return(names(sort(to.do[to.do > 0])))
20 | 
21 | }#SCHEDULE
22 | 
23 | # use the Logic Sampling (LS) algorithm as described in "Bayesian Artificial
24 | # Intelligence", Korb & Nicholson, chap 3.6.1.
25 | rbn.backend = function(x, n, fix = TRUE, debug = FALSE) {
26 | 
27 |   .Call(call_rbn_master,
28 |         fitted = x,
29 |         n = as.integer(n),
30 |         fix = fix,
31 |         add.metadata = FALSE,
32 |         debug = debug)
33 | 
34 | }#RBN.BACKEND
35 | 
36 | 


--------------------------------------------------------------------------------
/src/preprocessing/preprocessing.h:
--------------------------------------------------------------------------------
 1 | #ifndef PREPROCESSING_HEADER
 2 | #define PREPROCESSING_HEADER
 3 | 
 4 | #include "../core/data.table.h"
 5 | 
 6 | /* enum for discretization methods, to be matched from the label string passed
 7 |  * down from R. */
 8 | typedef enum {
 9 |   ENOMETHOD    =  0, /* error code, no such discretization method. */
10 |   INTERVAL     =  1, /* interval discretization (marginal). */
11 |   QUANTILE     =  2, /* quantile discretization (marginal). */
12 |   HARTEMINK    =  3  /* Hartemink discretization (pairwise). */
13 | } discretization_e;
14 | 
15 | discretization_e discretization_to_enum(const char *label);
16 | 
17 | int interval_discretization(double *orig, int *factor, int nbreaks,
18 |     double *cutpoints, int nobs, bool debugging);
19 | int quantile_discretization(double *orig, int *factor, int nbreaks,
20 |     double *cutpoints, int nobs, bool complete, bool debugging);
21 | void hartemink_discretization(ddata work, int *nbreaks, double **cutpoints,
22 |     bool debugging);
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/src/tests/enums.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "tests.h"
 3 | 
 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value;
 5 | 
 6 | test_e test_to_enum(const char *label) {
 7 | 
 8 |   ENTRY("mi", MI);
 9 |   ENTRY("mi-adf", MI_ADF);
10 |   ENTRY("x2", X2);
11 |   ENTRY("x2-adf", X2_ADF);
12 |   ENTRY("jt", JT);
13 |   ENTRY("cor", COR);
14 |   ENTRY("zf", ZF);
15 |   ENTRY("mi-g", MI_G);
16 |   ENTRY("mi-cg", MI_CG);
17 |   ENTRY("mi-sh", MI_SH);
18 |   ENTRY("mi-g-sh", MI_G_SH);
19 |   ENTRY("mc-mi", MC_MI);
20 |   ENTRY("mc-x2", MC_X2);
21 |   ENTRY("sp-mi", SP_MI);
22 |   ENTRY("sp-x2", SP_X2);
23 |   ENTRY("mc-jt", MC_JT);
24 |   ENTRY("smc-mi", SMC_MI);
25 |   ENTRY("smc-x2", SMC_X2);
26 |   ENTRY("smc-jt", SMC_JT);
27 |   ENTRY("mc-cor", MC_COR);
28 |   ENTRY("mc-mi-g", MC_MI_G);
29 |   ENTRY("mc-zf", MC_ZF);
30 |   ENTRY("smc-cor", SMC_COR);
31 |   ENTRY("smc-zf", SMC_ZF);
32 |   ENTRY("smc-mi-g", SMC_MI_G);
33 |   ENTRY("custom-test", CUSTOM_T);
34 | 
35 |   return ENOTEST;
36 | 
37 | }/*TEST_TO_ENUM*/
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/R/utils-plot.R:
--------------------------------------------------------------------------------
 1 | # print an underlined label in the plot.
 2 | underlined = function(x, y, label, col){
 3 | 
 4 |   text(x, y, label, col = col, font = 2)
 5 |   sw = strwidth(label)
 6 |   sh = strheight(label)
 7 |   lines(x + c(-sw/2, sw/2), rep(y - 1.5*sh/2, 2), col = col)
 8 | 
 9 | }#UNDERLINED
10 | 
11 | # compute the largest possible expansion factor for which a string fits the box.
12 | largest.cex = function(node, height, width, hfrac = 0.7, wfrac = 0.9) {
13 | 
14 |   # fit vertically.
15 |   guess  = hfrac * height / strheight(node, cex = 1)
16 |   best = optimize(f = function(x) abs(strheight(node, cex = x) - hfrac * height),
17 |            interval = guess * c(0.5, 2), tol = 0.025)$minimum
18 | 
19 |   # fit horizonally.
20 |   best = best * min(wfrac * width / strwidth(node, cex = best), 1)
21 | 
22 |   return(best)
23 | 
24 | }#LARGEST.CEX
25 | 
26 | # created a lighter tint of a given colour.
27 | lighter.colour = function(col, offset = 0.25) {
28 | 
29 |   rgb = col2rgb(col)[, 1]
30 |   do.call("rgb", as.list((rgb + (255 - rgb) * offset) / 255))
31 | 
32 | }#LIGHTER.COLOUR
33 | 


--------------------------------------------------------------------------------
/R/sanitization-mutual.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # check the estimator for the mutual information.
 3 | check.mi.estimator = function(estimator, data) {
 4 | 
 5 |   # check which type of data we are dealing with.
 6 |   type = data.type(data)
 7 | 
 8 |   if (!missing(estimator) && !is.null(estimator)) {
 9 | 
10 |     check.label(estimator, choices = available.mi,
11 |       labels = mi.estimator.labels, argname = "mutual information estimator")
12 | 
13 |     # check if it's the right estimator for the data (discrete, continuous).
14 |     if ((type %!in% discrete.data.types) &&
15 |         (estimator %in% available.discrete.mi))
16 |       stop("estimator '", estimator, "' may only be used with discrete data.")
17 |     if ((type != "continuous") && (estimator %in% available.continuous.mi))
18 |       stop("estimator '", estimator, "' may only be used with continuous data.")
19 | 
20 |     return(estimator)
21 | 
22 |   }#THEN
23 |   else {
24 | 
25 |     if (type %in% discrete.data.types)
26 |       return("mi")
27 |     else
28 |       return("mi-g")
29 | 
30 |   }#ELSE
31 | 
32 | }#CHECK.MI.ESTIMATOR
33 | 
34 | 


--------------------------------------------------------------------------------
/src/foreign/parse.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | 
 3 | /* find the matching closed brace. */
 4 | SEXP match_brace(SEXP lines, SEXP start, SEXP open_brace, SEXP close_brace) {
 5 | 
 6 | int depth = 0, open = 0, line_id = INT(start) - 1;
 7 | const char *current = NULL;
 8 | const char *op = CHAR(STRING_ELT(open_brace, 0));
 9 | const char *cl = CHAR(STRING_ELT(close_brace, 0));
10 | 
11 |   do {
12 | 
13 |     /* dereference the current line. */
14 |     current = CHAR(STRING_ELT(lines, line_id));
15 | 
16 |     /* increment the depth counter if an open brace is found. */
17 |     if (strstr(current, op)) {
18 | 
19 |       /* be sure no to exit from the do-while loop until an open curly brace
20 |        * has been spotted. */
21 |       open = 1;
22 |       depth++;
23 | 
24 |     }/*THEN*/
25 |     /* decrement the depth counter if a closed brace is found. */
26 |     if (strstr(current, cl))
27 |       depth--;
28 | 
29 |     /* increment the line id. */
30 |     line_id++;
31 | 
32 |   } while ((depth > 0) || (open == 0));
33 | 
34 |   return ScalarInteger(line_id);
35 | 
36 | }/*MATCH_BRACE*/
37 | 
38 | 


--------------------------------------------------------------------------------
/R/scores.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # compute individual node contributions to the network score.
 3 | per.node.score = function(network, data, score, targets, extra.args,
 4 |     debug = FALSE) {
 5 | 
 6 |   .Call(call_per_node_score,
 7 |         network = network,
 8 |         data = data,
 9 |         score = score,
10 |         targets = targets,
11 |         extra.args = extra.args,
12 |         debug = debug)
13 | 
14 | }#PER.NODE.SCORE
15 | 
16 | # complete a prior over arcs as per Castelo and Siebes.
17 | cs.completed.prior = function(beta, nodes, learning = FALSE) {
18 | 
19 |   beta = .Call(call_castelo_completion,
20 |                prior = beta,
21 |                nodes = nodes,
22 |                learning = learning)
23 | 
24 |   class(beta) = c("prior", "prior.cs", "data.frame")
25 |   attr(beta, "nodes") = nodes
26 | 
27 |   return(beta)
28 | 
29 | }#CS.COMPLETED.PRIOR
30 | 
31 | # compute the optimal imaginary sample size for a discrete network.
32 | alpha.star.backend = function(x, data, debug = FALSE) {
33 | 
34 |   .Call(call_alpha_star,
35 |     x = x,
36 |     data = data,
37 |     debug = debug)
38 | 
39 | }#ALPHA.STAR.BACKEND
40 | 
41 | 


--------------------------------------------------------------------------------
/man/igraphpkg.Rd:
--------------------------------------------------------------------------------
 1 | \name{igraph integration}
 2 | \alias{igraph integration}
 3 | \alias{as.bn.igraph}
 4 | \alias{as.igraph}
 5 | \alias{as.igraph.bn}
 6 | \alias{as.igraph.bn.fit}
 7 | \title{Import and export networks from the igraph package}
 8 | \description{
 9 | 
10 |   Convert \code{bn} and \code{bn.fit} objects to \code{igraph} and vice versa.
11 | 
12 | }
13 | \usage{
14 | \method{as.bn}{igraph}(x, ..., check.cycles = TRUE)
15 | \method{as.igraph}{bn}(x, ...)
16 | \method{as.igraph}{bn.fit}(x, ...)
17 | }
18 | \arguments{
19 |   \item{x}{an object of class \code{bn}, \code{bn.fit}, or \code{igraph}.}
20 |   \item{\dots}{extra arguments from the generic method (currently ignored).}
21 |   \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will
22 |     not be checked for cycles.}
23 | }
24 | \value{
25 | 
26 |   An object of the relevant class.
27 | 
28 | }
29 | \examples{
30 | \dontrun{
31 | a = bn.fit(hc(learning.test), learning.test)
32 | b = as.igraph(a)
33 | plot(b, edge.arrow.mode = 2L * !igraph::which_mutual(b))
34 | c = as.bn(b)}}
35 | \author{Marco Scutari}
36 | \keyword{utilities}
37 | \keyword{interfaces to other packages}
38 | 


--------------------------------------------------------------------------------
/man/plot.bn.strength.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot.bn.strength}
 2 | \alias{plot.bn.strength}
 3 | \title{Plot arc strengths derived from bootstrap}
 4 | \description{
 5 | 
 6 |   Plot arc strengths derived from bootstrap resampling.
 7 | 
 8 | }
 9 | \usage{
10 | \method{plot}{bn.strength}(x, draw.threshold = TRUE, main = NULL,
11 |   xlab = "arc strengths", ylab = "CDF(arc strengths)", ...)
12 | }
13 | \arguments{
14 |   \item{x}{an object of class \code{bn.strength}.}
15 |   \item{draw.threshold}{a boolean value. If \code{TRUE}, a dashed vertical line
16 |     is drawn at the threshold.}
17 |   \item{main,xlab,ylab}{character strings, the main title and the axes labels.}
18 |   \item{\dots}{other graphical parameters to be passed through to plotting
19 |     functions.}
20 | }
21 | \note{
22 |   The \code{xlim} and \code{ylim} arguments are always overridden.
23 | }
24 | \examples{
25 | data(learning.test)
26 | 
27 | start = random.graph(nodes = names(learning.test), num = 50)
28 | netlist = lapply(start, function(net) {
29 |   hc(learning.test, score = "bde", iss = 10, start = net) })
30 | arcs = custom.strength(netlist, nodes = names(learning.test), cpdag = FALSE)
31 | plot(arcs)
32 | }
33 | \author{Marco Scutari}
34 | \keyword{plots}
35 | 


--------------------------------------------------------------------------------
/man/bayesian.network.classifiers.Rd:
--------------------------------------------------------------------------------
 1 | \name{network-classifiers}
 2 | \alias{network-classifiers}
 3 | \alias{network classifiers}
 4 | \title{Bayesian network Classifiers}
 5 | \description{
 6 | 
 7 |   Structure learning algorithms for Bayesian network classifiers.
 8 | 
 9 | }
10 | \details{
11 | 
12 |   The algorithms are aimed at classification, and favour predictive power over
13 |   the ability to recover the correct network structure. The implementation in
14 |   \pkg{bnlearn} assumes that all variables, including the classifiers, are
15 |   discrete.
16 | 
17 |   \itemize{
18 | 
19 |     \item \emph{Naive Bayes} (\code{\link{naive.bayes}}): a very simple
20 |       algorithm assuming that all classifiers are independent and using the
21 |       posterior probability of the target variable for classification.
22 |     \item \emph{Tree-Augmented Naive Bayes} (\code{\link{tree.bayes}}): an
23 |       improvement over naive Bayes, this algorithms uses Chow-Liu to approximate
24 |       the dependence structure of the classifiers.
25 | 
26 |      Friedman N, Geiger D, Goldszmit M (1997). "Bayesian Network Classifiers."
27 |        \emph{Machine Learning}, \strong{29}:131--163.
28 | 
29 |   }
30 | 
31 | }
32 | \keyword{package}
33 | 


--------------------------------------------------------------------------------
/src/core/covariance.matrix.h:
--------------------------------------------------------------------------------
 1 | #ifndef COVARIANCE_MATRIX_HEADER
 2 | #define COVARIANCE_MATRIX_HEADER
 3 | 
 4 | /* covariance matrix, with additional fields to carry around its own SVD
 5 |  * decomposition and dimension. */
 6 | typedef struct {
 7 | 
 8 |   int dim;     /* dimension of the covariance matrix. */
 9 |   double *mat; /* pointer to the matrix. */
10 |   double *u;   /* SVD decomposition, U left matrix, for c_svd(). */
11 |   double *d;   /* SVD decomposition, vector for U's diagonal, for c_svd(). */
12 |   double *vt;  /* SVD decomposition, V^t right matrix, for c_svd(). */
13 | 
14 | } covariance;
15 | 
16 | covariance new_covariance(int dim, bool decomp);
17 | void print_covariance(covariance cov);
18 | void copy_covariance(covariance *src, covariance *copy);
19 | void covariance_drop_variable(covariance *full, covariance *sub, int to_drop);
20 | void FreeCOV(covariance cov);
21 | 
22 | void c_covmat(double **data, double *mean, int nrow, int ncol, covariance cov,
23 |     int first);
24 | void c_update_covmat(double **data, double *mean, int update, int nrow,
25 |     int ncol, double *mat);
26 | void c_covmat_with_missing(double **data, int nrow, int ncol,
27 |     bool *missing_partial, bool *missing_all, double *mean, double *mat,
28 |     int *ncomplete);
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/inference/loglikelihood/common.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../fitted/fitted.h"
 3 | #include "../../core/data.table.h"
 4 | 
 5 | /* check whether the data are complete for all the local distributions of
 6 |  * interest. */
 7 | bool check_locally_incomplete_data(fitted_bn bn, meta m, bool debugging) {
 8 | 
 9 | bool early_return = FALSE;
10 | 
11 |   for (int i = 0; i < m.ncols; i++) {
12 | 
13 |     if (!m.flag[i].fixed)
14 |       continue;
15 | 
16 |     /* for the data to be locally complete, the node itself... */
17 |     if (!m.flag[i].complete) {
18 | 
19 |       early_return  = TRUE;
20 |       goto incomplete_data;
21 | 
22 |     }/*THEN*/
23 | 
24 |     /* ... and its parents must be complete. */
25 |     for (int j = 0; j < bn.ldists[i].nparents; j++)
26 |        if (!m.flag[bn.ldists[i].parents[j]].complete) {
27 | 
28 |          early_return = TRUE;
29 |          goto incomplete_data;
30 | 
31 |        }/*THEN*/
32 | 
33 | incomplete_data:
34 |     if (early_return) {
35 | 
36 |       if (debugging)
37 |         Rprintf("* incomplete data for node %s, the log-likelihood is NA.\n",
38 |           bn.labels[i]);
39 | 
40 |       return TRUE;
41 | 
42 |     }/*THEN*/
43 | 
44 |   }/*FOR*/
45 | 
46 |   return FALSE;
47 | 
48 | }/*CHECK_LOCALLY_INCOMPLETE_DATA*/
49 | 
50 | 


--------------------------------------------------------------------------------
/src/include/graph.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /* from filter.arcs.c */
 3 | SEXP which_undirected(SEXP arcs, SEXP nodes);
 4 | 
 5 | /* from tiers.c */
 6 | SEXP tiers(SEXP nodes, SEXP debug);
 7 | 
 8 | /* from filter.arcs.c */
 9 | SEXP c_unique_arcs(SEXP arcs, SEXP nodes, bool warnlevel);
10 | 
11 | /* from fitted.c */
12 | SEXP root_nodes(SEXP bn, SEXP leaves);
13 | 
14 | /* from simulation.c */
15 | SEXP topological_ordering(SEXP bn, SEXP root_nodes, SEXP reverse, SEXP debug);
16 | void topological_sort(SEXP fitted, int *poset, int nnodes);
17 | 
18 | /* from path.c */
19 | int c_has_path(int start, int stop, int *amat, int n, SEXP nodes,
20 |     bool ugraph, bool notdirect, int *path, int *counter, bool debugging);
21 | bool c_directed_path(int start, int stop, int *amat, int n, SEXP nodes,
22 |     int *path, int *counter, bool debugging);
23 | int c_uptri3_path(short int *uptri, int *depth, int from, int to, int nnodes,
24 |     SEXP nodes, bool debugging);
25 | 
26 | /* from hash.c */
27 | SEXP arc_hash(SEXP arcs, SEXP nodes, bool uptri, bool sort);
28 | void c_arc_hash(int narcs, int nnodes, int *from, int *to, int *uptri,
29 |   int *cmc, bool sort);
30 | SEXP c_amat_hash(int *amat, int nnodes);
31 | 
32 | /* from arcs2amat.c */
33 | SEXP arcs2amat(SEXP arcs, SEXP nodes);
34 | SEXP amat2arcs(SEXP amat, SEXP nodes);
35 | 
36 | 


--------------------------------------------------------------------------------
/R/chow.liu.R:
--------------------------------------------------------------------------------
 1 | 
 2 | chow.liu.backend = function(x, nodes, estimator, whitelist, blacklist,
 3 |     conditional = NULL, debug = FALSE) {
 4 | 
 5 |   # fix the whitelist and the blacklist to keep the C side simple.
 6 |   if (!is.null(blacklist)) {
 7 | 
 8 |     # arcs must be blacklisted in both directions, so keep only
 9 |     # the undirected ones.
10 |     blacklist = blacklist[which.undirected(blacklist, nodes), , drop = TRUE]
11 |     # keep only one direction for each blacklisted arc.
12 |     blacklist = pdag2dag.backend(blacklist, nodes)
13 | 
14 |   }#THEN
15 | 
16 |   if (!is.null(whitelist)) {
17 | 
18 |     # keep only one direction for each whitelisted arc.
19 |     whitelist = pdag2dag.backend(whitelist, nodes)
20 | 
21 |     # the chow-liu algorithms allows the selection of exactly length(nodes) arcs,
22 |     # so the whitelist must contain less.
23 |     if (nrow(whitelist) > length(nodes))
24 |       stop("too many whitelisted arcs, there can be only ", length(nodes), ".")
25 | 
26 |   }#THEN
27 | 
28 |   .Call(call_chow_liu,
29 |         data = x,
30 |         nodes = nodes,
31 |         estimator = estimator,
32 |         whitelist = whitelist,
33 |         blacklist = blacklist,
34 |         complete = attr(x, "metadata")$complete.nodes,
35 |         conditional = conditional,
36 |         debug = debug)
37 | 
38 | }#CHOW.LIU.BACKEND
39 | 
40 | 


--------------------------------------------------------------------------------
/src/parameters/discrete/classic.discrete.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../core/contingency.tables.h"
 3 | #include "../../math/linear.algebra.h"
 4 | 
 5 | void c_classic_discrete_parameters(int *counts, double *cpt, int nrows,
 6 |     int ncols, double alpha, bool replace) {
 7 | 
 8 | long double colsum = 0;
 9 | 
10 |   /* add the imaginary sample size, if any, ... */
11 |   for (int i = 0; i < nrows * ncols; i++)
12 |     cpt[i] = counts[i] + alpha / (nrows * ncols);
13 | 
14 |   /* ... and normalize the columns to sum up to 1. */
15 |   for (int j = 0; j < ncols; j++) {
16 | 
17 |     colsum = 0;
18 |     for (int i = 0; i < nrows; i++)
19 |       colsum += cpt[CMC(i, j, nrows)];
20 | 
21 |     /* some columns cannot be normalized: either fill them with NaNs or with
22 |      * a uniform distribution. */
23 |     if (colsum == 0) {
24 | 
25 |       if (replace) {
26 | 
27 |         for (int i = 0; i < nrows; i++)
28 |           cpt[CMC(i, j, nrows)] = (double) 1 / nrows;
29 | 
30 |       }/*THEN*/
31 |       else {
32 | 
33 |         for (int i = 0; i < nrows; i++)
34 |           cpt[CMC(i, j, nrows)] = NA_REAL;
35 | 
36 |       }/*ELSE*/
37 | 
38 |     }/*THEN*/
39 |     else {
40 | 
41 |       for (int i = 0; i < nrows; i++)
42 |         cpt[CMC(i, j, nrows)] /= colsum;
43 | 
44 |     }/*ELSE*/
45 | 
46 |   }/*FOR*/
47 | 
48 | }/*C_CLASSIC_DISCRETE_PARAMETERS*/
49 | 
50 | 


--------------------------------------------------------------------------------
/man/graphpkg.Rd:
--------------------------------------------------------------------------------
 1 | \name{graph integration}
 2 | \alias{graph integration}
 3 | \alias{as.bn.graphNEL}
 4 | \alias{as.bn.graphAM}
 5 | \alias{as.graphNEL}
 6 | \alias{as.graphAM}
 7 | \alias{as.graphNEL.bn}
 8 | \alias{as.graphNEL.bn.fit}
 9 | \alias{as.graphAM.bn}
10 | \alias{as.graphAM.bn.fit}
11 | \title{Import and export networks from the graph package}
12 | \description{
13 | 
14 |   Convert \code{bn} and \code{bn.fit} objects to \code{graphNEL} and
15 |   \code{graphAM} objects and vice versa.
16 | 
17 | }
18 | \usage{
19 | \method{as.bn}{graphNEL}(x, ..., check.cycles = TRUE)
20 | \method{as.bn}{graphAM}(x, ..., check.cycles = TRUE)
21 | \method{as.graphNEL}{bn}(x)
22 | \method{as.graphNEL}{bn.fit}(x)
23 | \method{as.graphAM}{bn}(x)
24 | \method{as.graphAM}{bn.fit}(x)
25 | }
26 | \arguments{
27 |   \item{x}{an object of class \code{bn}, \code{bn.fit}, \code{graphNEL},
28 |     \code{graphAM}.}
29 |   \item{\dots}{extra arguments from the generic method (currently ignored).}
30 |   \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will
31 |     not be checked for cycles.}
32 | }
33 | \value{
34 | 
35 |   An object of the relevant class.
36 | 
37 | }
38 | \examples{
39 | \dontrun{
40 | library(graph)
41 | a = bn.fit(hc(learning.test), learning.test)
42 | b = as.graphNEL(a)
43 | c = as.bn(b)}}
44 | \author{Marco Scutari}
45 | \keyword{utilities}
46 | \keyword{interfaces to other packages}
47 | 


--------------------------------------------------------------------------------
/src/core/uppertriangular.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "allocations.h"
 3 | #include "uppertriangular.h"
 4 | 
 5 | uppertriangular new_uppertriangular(int dim) {
 6 | 
 7 | uppertriangular sym = { 0 };
 8 | 
 9 |   sym.dim = dim;
10 |   sym.mat = Calloc1D(dim * (dim - 1) / 2, sizeof(double));
11 | 
12 |   return sym;
13 | 
14 | }/*NEW_UPPERTRIANGULAR*/
15 | 
16 | void FreeUPPERTRIANGULAR(uppertriangular sym) {
17 | 
18 |   Free1D(sym.names);
19 |   Free1D(sym.mat);
20 | 
21 | }/*FREEUPPERTRIANGULAR*/
22 | 
23 | int uppertriangular_size(uppertriangular sym) {
24 | 
25 |   return sym.dim * (sym.dim - 1) / 2;
26 | 
27 | }/*UPPTERTRIANGULAR_SIZE*/
28 | 
29 | void uppertriangular_copy_names(uppertriangular *sym, const char **names) {
30 | 
31 | int i = 0;
32 | 
33 |   (*sym).names = Calloc1D((*sym).dim, sizeof(char *));
34 | 
35 |   for (i = 0; i < (*sym).dim; i++)
36 |     (*sym).names[i] = names[i];
37 | 
38 | }/*UPPERTRIANGULAR_COPY_NAMES*/
39 | 
40 | /* inverse function of the UPTRI3() macro. */
41 | void INV_UPTRI3(int x, int n, int *res) {
42 | 
43 | int c = 0, r = 0, cn = n - 1;
44 | 
45 |   for (r = 0; r < n; r++) {
46 | 
47 |     if (x < cn) {
48 | 
49 |       c = n - (cn - x);
50 |       break;
51 | 
52 |     }/*THEN*/
53 |     else {
54 | 
55 |       cn += n - (r + 2);
56 | 
57 |     }/*ELSE*/
58 | 
59 |   }/*FOR*/
60 | 
61 |   res[0] = r;
62 |   res[1] = c;
63 | 
64 | }/*INV_UPTRI3*/
65 | 
66 | 


--------------------------------------------------------------------------------
/man/learning-test.Rd:
--------------------------------------------------------------------------------
 1 | \name{learning.test}
 2 | \docType{data}
 3 | \alias{learning.test}
 4 | \title{Synthetic (discrete) data set to test learning algorithms}
 5 | \description{
 6 | 
 7 |   This a synthetic data set used as a test case in the \pkg{bnlearn} package.
 8 | 
 9 | }
10 | \usage{
11 | data(learning.test)
12 | }
13 | \format{
14 | 
15 |   The \code{learning.test} data set contains the following variables:
16 |   \itemize{
17 | 
18 |     \item \code{A}, a three-level factor with levels \code{a}, \code{b} and
19 |       \code{c}.
20 |     \item \code{B}, a three-level factor with levels \code{a}, \code{b} and
21 |       \code{c}.
22 |     \item \code{C}, a three-level factor with levels \code{a}, \code{b} and
23 |       \code{c}.
24 |     \item \code{D}, a three-level factor with levels \code{a}, \code{b} and
25 |       \code{c}.
26 |     \item \code{E}, a three-level factor with levels \code{a}, \code{b} and
27 |       \code{c}.
28 |     \item \code{F}, a two-level factor with levels \code{a} and \code{b}.
29 | 
30 |   }
31 | 
32 | }
33 | \note{
34 | 
35 |   The R script to generate data from this network is available from
36 |   \url{https://www.bnlearn.com/documentation/networks/}.
37 | 
38 | }
39 | \examples{
40 | # load the data.
41 | data(learning.test)
42 | # create and plot the network structure.
43 | dag = model2network("[A][C][F][B|A][D|A:C][E|B:F]")
44 | \dontrun{graphviz.plot(dag)}
45 | }
46 | \keyword{datasets}
47 | 


--------------------------------------------------------------------------------
/R/utils-cluster.R:
--------------------------------------------------------------------------------
 1 | # check whether the cluster is running.
 2 | isClusterRunning = function(cl) {
 3 | 
 4 |   tryCatch(any(unlist(parallel::clusterEvalQ(cl, TRUE))),
 5 |     error = function(err) { FALSE })
 6 | 
 7 | }#ISCLUSTERRUNNING
 8 | 
 9 | # check the status of the snow/parallel cluster.
10 | check.cluster = function(cluster) {
11 | 
12 |   if (missing(cluster) || is.null(cluster))
13 |     return(NULL)
14 |   if (!is(cluster, "cluster"))
15 |     stop("cluster is not a valid cluster object.")
16 |   check.and.load.package("parallel")
17 |   if (!isClusterRunning(cluster))
18 |     stop("the cluster is stopped.")
19 | 
20 |   return(cluster)
21 | 
22 | }#CHECK.CLUSTER
23 | 
24 | # get the number of slaves.
25 | nSlaves = function(cluster) {
26 | 
27 |   length(cluster)
28 | 
29 | }#NSLAVES
30 | 
31 | slaves.setup = function(cluster) {
32 | 
33 |   # set the test counter in all the cluster nodes.
34 |   parallel::clusterEvalQ(cluster, library(bnlearn))
35 |   parallel::clusterEvalQ(cluster, reset.test.counter())
36 | 
37 | }#SLAVE.SETUP
38 | 
39 | # smart parSapply() that falls back to standard sapply(), but with defaults to
40 | # simplify = FALSE.
41 | smartSapply = function(cl, ..., simplify = FALSE, USE.NAMES = TRUE) {
42 | 
43 |   if (is.null(cl))
44 |     sapply(..., simplify = simplify, USE.NAMES = USE.NAMES)
45 |   else
46 |     parallel::parSapplyLB(cl = cl, ..., simplify = simplify, USE.NAMES = USE.NAMES)
47 | 
48 | }#SMARTSAPPLY
49 | 
50 | 


--------------------------------------------------------------------------------
/src/arcs/bind.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../include/bn.h"
 3 | #include "../minimal/strings.h"
 4 | #include "../minimal/common.h"
 5 | #include "../math/linear.algebra.h"
 6 | 
 7 | /* faster rbind() implementation for arc sets. */
 8 | SEXP arcs_rbind (SEXP matrix1, SEXP matrix2, SEXP reverse2) {
 9 | 
10 | int i = 0, j = 0, m1 = length(matrix1)/2, m2 = length(matrix2)/2;
11 | SEXP res;
12 | 
13 |   /* allocate the return value. */
14 |   PROTECT(res = allocMatrix(STRSXP, m1 + m2, 2));
15 |   /* allocate and initialize the column names. */
16 |   setDimNames(res, R_NilValue, mkStringVec(2, "from", "to"));
17 | 
18 |   /* copy the elements of the first matrix. */
19 |   for (i  = 0; i < m1; i++)
20 |     for (j = 0; j < 2; j++)
21 |       SET_STRING_ELT(res, CMC(i, j, m1 + m2), STRING_ELT(matrix1, CMC(i, j, m1)));
22 | 
23 |   /* copy the elements of the second matrix, reversing the order of the
24 |    * columns as needed. */
25 |   if (isTRUE(reverse2)) {
26 | 
27 |     for (i = 0; i < m2; i++)
28 |       for(j = 0; j < 2; j++)
29 |         SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, 1 - j, m2)));
30 | 
31 |   }/*THEN*/
32 |   else {
33 | 
34 |     for (i = 0; i < m2; i++)
35 |       for(j = 0; j < 2; j++)
36 |         SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, j, m2)));
37 | 
38 |   }/*ELSE*/
39 | 
40 |   UNPROTECT(1);
41 | 
42 |   return res;
43 | 
44 | }/*ARCS_RBIND*/
45 | 
46 | 


--------------------------------------------------------------------------------
/R/utils-print.R:
--------------------------------------------------------------------------------
 1 | # advanced cat which correctly handles ini-like lines and short line widths.
 2 | wcat = function(header, value, indent = 2) {
 3 | 
 4 |   # get the number of available columns.
 5 |   columns = options("width")
 6 | 
 7 |   header = sprintf("%-40s", paste0(strrep(" ", indent), header, ":"))
 8 | 
 9 |   # blatantly ignore any line width shorter than 45, trying to support
10 |   # that case is a losing proposition.
11 |   if ((columns >= nchar(header) + nchar(value) + 1) || (columns < 45)) {
12 | 
13 |     # if there are enough columns print the string as is.
14 |     cat(paste(header, value, sep = " "), "\n")
15 | 
16 |   }#THEN
17 |   else {
18 | 
19 |     # if there are not enough columns print the header on one row
20 |     # (left-aligned) and the value on the following line (right-aligned).
21 |     cat(header, "\n", sprintf(paste("%", columns, "s", sep = ""), value), "\n")
22 | 
23 |   }#ELSE
24 | 
25 | }#WCAT
26 | 
27 | # advanced cat handles model strings and short line widths.
28 | fcat = function(modelstr, indent = 0) {
29 | 
30 |   # measure the number of available columns.
31 |   columns = options("width")
32 | 
33 |   if ((columns >= nchar(modelstr)) || columns < 45) {
34 | 
35 |     cat(strrep(" ", indent), modelstr, "\n")
36 | 
37 |   }#THEN
38 |   else {
39 | 
40 |     cat(paste(strsplit(modelstr, "\\]")[[1]], "]", sep = ""),
41 |       fill = TRUE, sep = "", labels = strrep(" ", indent))
42 | 
43 |   }#ELSE
44 | 
45 | }#FCAT
46 | 


--------------------------------------------------------------------------------
/src/scores/enum.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "scores.h"
 3 | 
 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value;
 5 | 
 6 | score_e score_to_enum(const char *label) {
 7 | 
 8 |   ENTRY("loglik", LOGLIK);
 9 |   ENTRY("aic", AIC);
10 |   ENTRY("bic", BIC);
11 |   ENTRY("ebic", EBIC);
12 |   ENTRY("bde", BDE);
13 |   ENTRY("bds", BDS);
14 |   ENTRY("bdj", BDJ);
15 |   ENTRY("k2", K2);
16 |   ENTRY("mbde", MBDE);
17 |   ENTRY("bdla", BDLA);
18 |   ENTRY("pred-loglik", PRED_LOGLIK);
19 |   ENTRY("fnml", FNML);
20 |   ENTRY("qnml", QNML);
21 |   ENTRY("nal", NAL);
22 |   ENTRY("pnal", PNAL);
23 |   ENTRY("loglik-g", LOGLIK_G);
24 |   ENTRY("aic-g", AIC_G);
25 |   ENTRY("bic-g", BIC_G);
26 |   ENTRY("ebic-g", EBIC_G);
27 |   ENTRY("bge", BGE);
28 |   ENTRY("pred-loglik-g", PRED_LOGLIK_G);
29 |   ENTRY("nal-g", NAL_G);
30 |   ENTRY("pnal-g", PNAL_G);
31 |   ENTRY("loglik-cg", LOGLIK_CG);
32 |   ENTRY("aic-cg", AIC_CG);
33 |   ENTRY("bic-cg", BIC_CG);
34 |   ENTRY("ebic-cg", EBIC_CG);
35 |   ENTRY("pred-loglik-cg", PRED_LOGLIK_CG);
36 |   ENTRY("nal-cg", NAL_CG);
37 |   ENTRY("pnal-cg", PNAL_CG);
38 |   ENTRY("custom-score", CUSTOM);
39 | 
40 |   return ENOSCORE;
41 | 
42 | }/*SCORE_TO_ENUM*/
43 | 
44 | gprior_e gprior_to_enum(const char *label) {
45 | 
46 |   ENTRY("uniform", UNIFORM);
47 |   ENTRY("vsp", VSP);
48 |   ENTRY("cs", CS);
49 |   ENTRY("marginal", MU);
50 | 
51 |   return ENOPRIOR;
52 | 
53 | }/*GPRIOR_TO_ENUM*/
54 | 
55 | 


--------------------------------------------------------------------------------
/src/math/linear.algebra.h:
--------------------------------------------------------------------------------
 1 | #ifndef LINEAR_ALGEBRA_HEADER
 2 | #define LINEAR_ALGEBRA_HEADER
 3 | 
 4 | #include "../core/covariance.matrix.h"
 5 | 
 6 | /* column-major coordinates conversion macro. */
 7 | #define CMC(i, j, nrow) ((i) + (j) * (nrow))
 8 | 
 9 | /* matrix with elements stored in column-major order. */
10 | typedef struct{
11 | 
12 |   int nrows;      /* first dimension. */
13 |   int ncols;      /* second dimension */
14 |   int *el;        /* pointer to the column-major matrix. */
15 | 
16 | } cmcmap;
17 | 
18 | #define CMEL(matrix, i, j) matrix.el[CMC(i, j, matrix.nrows)]
19 | 
20 | double c_logdet(double *matrix, int rows);
21 | void c_udvt(double **u, double **d, double **vt, int ncol);
22 | void c_svd(double *A, double *U, double *D, double *V, int *nrow, int *ncol,
23 |     int *mindim, bool strict, int *errcode);
24 | void c_ginv(covariance cov, covariance mpinv);
25 | void c_qr(double *qr, double *y, int nrow, int ncol, double *fitted,
26 |     double *resid, double *beta, double *sd);
27 | 
28 | void c_ols(double **x, double *y, int nrow, int ncol, double *fitted,
29 |     double *resid, double *beta, double *sd, int *nobs, bool missing);
30 | void c_cls(double **x, double *y, int *z, int nrow, int ncol, int ncond,
31 |     double *fitted, double *resid, double *beta, double *sd, int *nobs,
32 |     bool missing);
33 | 
34 | void c_qr_matrix(double *qr, double **x, int nrow, int ncol, int *complete,
35 |     int ncomplete);
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/man/alpha.star.Rd:
--------------------------------------------------------------------------------
 1 | \name{alpha.star}
 2 | \alias{alpha.star}
 3 | \title{Estimate the optimal imaginary sample size for BDe(u)}
 4 | \description{
 5 | 
 6 |   Estimate the optimal value of the imaginary sample size for the BDe score,
 7 |   assuming a uniform prior and given a network structure and a data set.
 8 | 
 9 | }
10 | \usage{
11 |   alpha.star(x, data, debug = FALSE)
12 | }
13 | \arguments{
14 |   \item{x}{an object of class \code{bn} (for \code{bn.fit} and \code{custom.fit})
15 |     or an object of class \code{bn.fit} (for \code{bn.net}).}
16 |   \item{data}{a data frame containing the variables in the model.}
17 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
18 |     printed; otherwise the function is completely silent.}
19 | }
20 | \value{
21 | 
22 |   \code{alpha.star()} returns a positive number, the estimated optimal imaginary
23 |   sample size value.
24 | 
25 | }
26 | \examples{
27 | data(learning.test)
28 | dag = hc(learning.test, score = "bic")
29 | 
30 | for (i in 1:3) {
31 | 
32 |   a = alpha.star(dag, learning.test)
33 |   dag = hc(learning.test, score = "bde", iss = a)
34 | 
35 | }#FOR
36 | }
37 | \references{
38 | 
39 |   Steck H (2008). "Learning the Bayesian Network Structure: Dirichlet Prior
40 |     versus Data." \emph{Proceedings of the 24th Conference on Uncertainty in
41 |     Artificial Intelligence}, 511--518.
42 | 
43 | }
44 | \author{Marco Scutari}
45 | \keyword{structure learning}
46 | \keyword{network scores}
47 | 


--------------------------------------------------------------------------------
/man/statspkg.Rd:
--------------------------------------------------------------------------------
 1 | \name{lm integration}
 2 | \alias{lm integration}
 3 | \alias{as.lm}
 4 | \alias{as.lm.bn}
 5 | \alias{as.lm.bn.fit}
 6 | \alias{as.lm.bn.fit.gnode}
 7 | \title{Produce lm objects from Bayesian networks}
 8 | \description{
 9 | 
10 |   Take a \code{bn} object or \code{bn.fit} object encoding a Gaussian network
11 |   and refit all the local distributions using \code{lm()}. This makes it
12 |   possible to use all the functions provided by R for \code{lm} objects
13 |   (\code{summary}, \code{anova}, etc.) to investigate the network.
14 | 
15 | }
16 | \usage{
17 | \method{as.lm}{bn}(x, data, ...)
18 | \method{as.lm}{bn.fit}(x, data, ...)
19 | \method{as.lm}{bn.fit.gnode}(x, data, ...)
20 | }
21 | \arguments{
22 |   \item{x}{an object of class \code{bn}, \code{bn.fit} or \code{bn.fit.gnode}.}
23 |   \item{data}{a data frame containing the variables in the model.}
24 |   \item{\dots}{additional arguments, currently ignored.}
25 | }
26 | \value{
27 | 
28 |   If \code{x} is an object of class \code{bn} or \code{bn.fit}, \code{as.lm()}
29 |   returns a list of \code{lm} objects, one for each node in \code{x}. If
30 |   \code{x} is an object of class \code{bn} or \code{bn.fit.gnode},
31 |   \code{as.lm()} returns a single \code{lm} object.
32 | 
33 | }
34 | \examples{
35 | dag = hc(gaussian.test)
36 | fitted = bn.fit(dag, gaussian.test)
37 | as.lm(dag, gaussian.test)
38 | as.lm(fitted, gaussian.test)
39 | as.lm(fitted$F, gaussian.test)
40 | }
41 | \author{Marco}
42 | \keyword{interfaces to other packages}
43 | 


--------------------------------------------------------------------------------
/man/mvnorm.Rd:
--------------------------------------------------------------------------------
 1 | \name{multivariate normal distribution}
 2 | \alias{mvnorm2gbn}
 3 | \alias{gbn2mvnorm}
 4 | \title{Gaussian Bayesian networks and multivariate normals}
 5 | \description{
 6 | 
 7 |   Convert a Gaussian Bayesian network into the multivariate normal distribution
 8 |   that is its global distribution, and vice versa.
 9 | 
10 | }
11 | \usage{
12 | gbn2mvnorm(fitted)
13 | mvnorm2gbn(dag, mu, sigma)
14 | }
15 | \arguments{
16 |   \item{fitted}{an object of class \code{bn.fit}.}
17 |   \item{dag}{an object of class \code{bn}, the structure of the network that
18 |     will be returned.}
19 |   \item{mu}{a numeric vector, the expectation of the multivariate normal.}
20 |   \item{sigma}{a square numeric matrix, the covariance matrix of the
21 |     multivariate normal.}
22 | }
23 | \value{
24 | 
25 |   \code{gbn2mvnorm()} returns a list with elements \code{"mu"} (the vector of
26 |   expectations) and \code{"sigma"} (the covariance matrix).
27 | 
28 |   \code{mvnorm2gbn()} returns an object of class \code{bn.fit}.
29 | 
30 | }
31 | \references{
32 | 
33 |   Pourahmadi M (2011). "Covariance Estimation: The GLM and Regularization
34 |     Perspectives." \emph{Statistical Science}, 26(3), 369--387.
35 | 
36 | }
37 | \examples{
38 | data(gaussian.test)
39 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]")
40 | bn = bn.fit(dag, gaussian.test)
41 | mvn = gbn2mvnorm(bn)
42 | bn2 = mvnorm2gbn(dag, mu = mvn$mu, sigma = mvn$sigma)
43 | all.equal(bn, bn2)
44 | }
45 | \author{Marco Scutari}
46 | \seealso{\link{bn.fit}.}
47 | \keyword{parameter learning}
48 | 


--------------------------------------------------------------------------------
/man/marks.Rd:
--------------------------------------------------------------------------------
 1 | \name{marks}
 2 | \docType{data}
 3 | \alias{marks}
 4 | \title{Examination marks data set}
 5 | \description{
 6 | 
 7 |   Examination marks of 88 students on five different topics, from Mardia (1979).
 8 | 
 9 | }
10 | \usage{
11 | data(marks)
12 | }
13 | \format{
14 | 
15 |   The \code{marks} data set contains the following variables, one for each
16 |   topic in the examination:
17 |   \itemize{
18 | 
19 |     \item \code{MECH} (\emph{mechanics})
20 |     \item \code{VECT} (\emph{vectors})
21 |     \item \code{ALG} (\emph{algebra})
22 |     \item \code{ANL} (\emph{analysis})
23 |     \item \code{STAT} (\emph{statistics})
24 | 
25 |   }
26 | 
27 |   All are measured on the same scale (0-100).
28 | 
29 | }
30 | \source{
31 | 
32 |   Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd
33 |     edition.
34 | 
35 |   Mardia KV, Kent JT, Bibby JM (1979). \emph{Multivariate Analysis}. Academic
36 |     Press.
37 | 
38 |   Whittaker J (1990). \emph{Graphical Models in Applied Multivariate
39 |     Statistics}. Wiley.
40 | 
41 | }
42 | \examples{
43 | # This is the undirected graphical model from Edwards (2000).
44 | data(marks)
45 | ug = empty.graph(names(marks))
46 | arcs(ug, check.cycles = FALSE) = matrix(
47 |   c("MECH", "VECT", "MECH", "ALG", "VECT", "MECH", "VECT", "ALG",
48 |     "ALG",  "MECH", "ALG", "VECT", "ALG",  "ANL", "ALG",  "STAT",
49 |     "ANL",  "ALG", "ANL",  "STAT", "STAT", "ALG", "STAT", "ANL"),
50 |   ncol = 2, byrow = TRUE,
51 |   dimnames = list(c(), c("from", "to")))
52 | \dontrun{graphviz.plot(ug)}
53 | }
54 | \keyword{datasets}
55 | 


--------------------------------------------------------------------------------
/src/graphs/completely.directed.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/allocations.h"
 3 | #include "../core/uppertriangular.h"
 4 | #include "../math/linear.algebra.h"
 5 | 
 6 | /* Beware: this function is based on the assumption that each arc is unique
 7 |  * in the arc set; otherwise the counter may be wrong, leading to false
 8 |  * negatives. */
 9 | 
10 | /* determine whether a graph is DAG or a PDAG/UG. */
11 | SEXP completely_directed(SEXP arcs, SEXP nodes) {
12 | 
13 | int i = 0, nrow = length(arcs)/2, n = LENGTH(nodes);
14 | int *a = NULL;
15 | short int *checklist = NULL;
16 | SEXP try;
17 | 
18 |   /* match the node labels in the arc set. */
19 |   PROTECT(try = match(nodes, arcs, 0));
20 |   a = INTEGER(try);
21 | 
22 |   /* allocate and initialize the checklist. */
23 |   checklist = Calloc1D(UPTRI_MATRIX(n), sizeof(short int));
24 | 
25 |   for (i = 0; i < nrow; i++) {
26 | 
27 |     if (checklist[UPTRI(a[CMC(i, 0, nrow)], a[CMC(i, 1, nrow)], n)] == 0) {
28 | 
29 |       /* this arc is not present in the checklist; add it. */
30 |       checklist[UPTRI(a[CMC(i, 0, nrow)], a[CMC(i, 1, nrow)], n)] = 1;
31 | 
32 |     }/*THEN*/
33 |     else {
34 | 
35 |       /* this arc or its opposite already present in the checklist; the graph
36 |        * has at least an undirected arc, so return FALSE. */
37 |       UNPROTECT(1);
38 | 
39 |       Free1D(checklist);
40 | 
41 |       return ScalarLogical(FALSE);
42 | 
43 |     }/*THEN*/
44 | 
45 |   }/*FOR*/
46 | 
47 |   UNPROTECT(1);
48 | 
49 |   Free1D(checklist);
50 | 
51 |   return ScalarLogical(TRUE);
52 | 
53 | }/*COMPLETELY_DIRECTED*/
54 | 
55 | 


--------------------------------------------------------------------------------
/src/fitted/nparams.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../fitted/fitted.h"
 3 | #include "../math/linear.algebra.h"
 4 | 
 5 | /* compute the number of parameters of a fitted model. */
 6 | double nparams_fitted_node(ldist ld, fitted_node_e type) {
 7 | 
 8 | int nrows = 0, nconfigs = 0;
 9 | double node_params = 0;
10 | 
11 |   switch(type) {
12 | 
13 |     /* ... if it's a discrete node: */
14 |     case DNODE:
15 |     case ONODE:
16 | 
17 |       nrows = ld.d.dims[0];
18 |       nconfigs = ld.d.nconfigs;
19 | 
20 |       /* the number of parameters for each node is (number of rows - 1),
21 |        * either in total or for each parents configuration. */
22 |       node_params = nrows * nconfigs - nconfigs;
23 | 
24 |       break;
25 | 
26 |     /* ... if it's a Gaussian node: */
27 |     case GNODE:
28 | 
29 |       /* the number of parameters is (number of regression coefficients, one
30 |        * per parent) plus two (intercept and standard error). */
31 |       node_params = ld.nparents + 2;
32 | 
33 |       break;
34 | 
35 |     /* ... if it's a conditional Gaussian node: */
36 |     case CGNODE:
37 | 
38 |       nconfigs = ld.cg.nconfigs;
39 |       nrows = ld.cg.ncoefs;
40 | 
41 |       /* the number of parameters is (number of continuous parents + 2) for each
42 |        * configuration of the discrete parents (of which there should be at
43 |        * least one). */
44 |       node_params += nconfigs * (nrows + 1);
45 | 
46 |       break;
47 | 
48 |     case ENOFIT:
49 |     default:
50 |       break;
51 | 
52 |   }/*SWITCH*/
53 | 
54 |   return node_params;
55 | 
56 | }/*NPARAMS_FITTED_NODE*/
57 | 
58 | 


--------------------------------------------------------------------------------
/man/mi.matrix.Rd:
--------------------------------------------------------------------------------
 1 | \name{local discovery algorithms}
 2 | \alias{local discovery algorithms}
 3 | \alias{aracne}
 4 | \alias{chow.liu}
 5 | \title{Local discovery structure learning algorithms}
 6 | \description{
 7 | 
 8 |   ARACNE and Chow-Liu learn simple graphs structures from data using pairwise
 9 |   mutual information coefficients.
10 | 
11 | }
12 | \usage{
13 | aracne(x, whitelist = NULL, blacklist = NULL, mi = NULL, debug = FALSE)
14 | chow.liu(x, whitelist = NULL, blacklist = NULL, mi = NULL, debug = FALSE)
15 | }
16 | \arguments{
17 |   \item{x}{a data frame containing the variables in the model.}
18 |   \item{whitelist}{a data frame with two columns (optionally labeled "from"
19 |     and "to"), containing a set of arcs to be included in the graph.}
20 |   \item{blacklist}{a data frame with two columns (optionally labeled "from"
21 |     and "to"), containing a set of arcs not to be included in the graph.}
22 |   \item{mi}{a character string, the estimator used for the pairwise (i.e.
23 |     unconditional) mutual information coefficients in the ARACNE and Chow-Liu
24 |     algorithms. Possible values are \code{mi} (discrete mutual information)
25 |     and \code{mi-g} (Gaussian mutual information).}
26 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
27 |     printed; otherwise the function is completely silent.}
28 | }
29 | \value{
30 | 
31 |   An object of class \code{bn}. See \code{\link{bn-class}} for details.
32 | 
33 | }
34 | \author{Marco Scutari}
35 | \seealso{\link{constraint-based algorithms}, \link{score-based algorithms},
36 |   \link{hybrid algorithms}.}
37 | \keyword{local learning}
38 | 


--------------------------------------------------------------------------------
/man/bn.strength-class.Rd:
--------------------------------------------------------------------------------
 1 | \name{bn.strength class}
 2 | \alias{bn.strength class}
 3 | \alias{bn.strength-class}
 4 | \alias{bn.strength}
 5 | \title{The bn.strength class structure}
 6 | \description{
 7 | 
 8 |   The structure of an object of S3 class \code{bn.strength}.
 9 | 
10 | }
11 | \details{
12 | 
13 |   An object of class \code{bn.strength} is a data frame with the following
14 |   columns (one row for each arc):
15 | 
16 |   \itemize{
17 | 
18 |     \item \code{from, to}: the nodes incident on the arc.
19 |     \item \code{strength}: the strength of the arc. See
20 |       \code{\link{arc.strength}}, \code{\link{boot.strength}},
21 |       \code{\link{custom.strength}} and \code{\link{strength.plot}}
22 |       for details.
23 | 
24 |   }
25 | 
26 |   and some additional attributes:
27 | 
28 |   \itemize{
29 | 
30 |     \item \code{nodes}: a vector of character strings, the labels of the nodes
31 |       of the network(s) the strength were computed from.
32 |     \item \code{method}: a character string, the method used to compute the
33 |       strength coefficients. It can be equal to \code{test}, \code{score} or
34 |       \code{bootstrap}.
35 |     \item \code{threshold}: a numeric value, the threshold used to determine
36 |       if a strength coefficient is significant.
37 | 
38 |   }
39 | 
40 |   An optional column called \code{direction} may also be present, giving the
41 |   probability of the direction of an arc given its presence in the graph.
42 | 
43 |   Only the \code{plot()} method is defined for this class; therefore, it can
44 |   be manipulated as a standard data frame.
45 | 
46 | }
47 | \author{Marco Scutari}
48 | \keyword{classes}
49 | 


--------------------------------------------------------------------------------
/man/kl.Rd:
--------------------------------------------------------------------------------
 1 | \name{information theoretic quantities}
 2 | \alias{H}
 3 | \alias{KL}
 4 | \title{Compute the distance between two fitted Bayesian networks}
 5 | \description{
 6 | 
 7 |   Compute Shannon's entropy of a fitted Bayesian network and the
 8 |   Kullback-Leibler divergence between two fitted Bayesian networks.
 9 | 
10 | }
11 | \usage{
12 | H(P)
13 | KL(P, Q)
14 | }
15 | \arguments{
16 |   \item{P, Q}{objects of class \code{bn.fit}.}
17 | }
18 | \note{
19 | 
20 |   Note that in the case of Gaussian and conditional Gaussian netwoks the
21 |   divergence can be negative. Regardless of the type of network, if at least one
22 |   of the two networks is singular the divergence can be infinite.
23 | 
24 |   If any of the parameters of the two networks are \code{NA}s, the divergence
25 |   will also be \code{NA}.
26 | 
27 | }
28 | \value{
29 | 
30 |   \code{H()} and \code{KL()} return a single numeric value.
31 | 
32 | }
33 | \examples{
34 | \dontrun{
35 | # discrete networks
36 | dag = model2network("[A][C][F][B|A][D|A:C][E|B:F]")
37 | fitted1 = bn.fit(dag, learning.test, method = "mle")
38 | fitted2 = bn.fit(dag, learning.test, method = "bayes", iss = 20)
39 | 
40 | H(fitted1)
41 | H(fitted2)
42 | 
43 | KL(fitted1, fitted1)
44 | KL(fitted2, fitted2)
45 | KL(fitted1, fitted2)
46 | }
47 | 
48 | # continuous, singular networks.
49 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]")
50 | singular = fitted1 = bn.fit(dag, gaussian.test)
51 | singular$A = list(coef = coef(fitted1[["A"]]) + runif(1), sd = 0)
52 | 
53 | H(singular)
54 | H(fitted1)
55 | 
56 | KL(singular, fitted1)
57 | KL(fitted1, singular)
58 | }
59 | \author{Marco Scutari}
60 | \keyword{parameter learning}
61 | \keyword{inference}
62 | 


--------------------------------------------------------------------------------
/src/core/uppertriangular.h:
--------------------------------------------------------------------------------
 1 | #ifndef UPPER_TRIANGULAR_HEADER
 2 | #define UPPER_TRIANGULAR_HEADER
 3 | 
 4 | /* upper triangular matrix (not including the diagonal). */
 5 | typedef struct {
 6 | 
 7 |   int dim;             /* dimension of the symmetrix matrix. */
 8 |   const char **names;  /* row and column names (assumed to be identical). */
 9 |   double *mat;         /* pointer to the matrix. */
10 | 
11 | } uppertriangular;
12 | 
13 | /*
14 |  *  Coordinate system for an upper triangular matrix:
15 |  *
16 |  *  [(row - 1) * ncols + ncols] - [row * (row - 1) / 2]
17 |  *
18 |  *  the first term is the standard row major order coordinates;
19 |  *  the second one is an adjustment to account for the missing
20 |  *  lower half of the matrix.
21 |  *
22 |  */
23 | 
24 | /* this macro swaps its arguments to avoid "memory not mapped" errors. */
25 | #define UPTRI(x, y, n) \
26 |   (((x) <= (y)) ? \
27 |     ((x) - 1) * n + (y) - 1 - ((x) * ((x) - 1)) / 2 : \
28 |     ((y) - 1) * n + (x) - 1 - ((y) * ((y) - 1)) / 2)
29 | 
30 | /* coordinate system for an upper triangular matrix (not including
31 |  *  * the diagonal elements). */
32 | #define UPTRI3(r, c, n) (UPTRI(r, c, n) - ((r > c) ? c : r))
33 | void INV_UPTRI3(int x, int n, int *res);
34 | 
35 | /* dimension of the upper triangular part of a n x n matrix. */
36 | #define UPTRI_MATRIX(n) (n) * ((n) + 1) / 2
37 | 
38 | /* upper triangular matrices. */
39 | uppertriangular new_uppertriangular(int dim);
40 | void uppertriangular_copy_names(uppertriangular *sym, const char **names);
41 | void FreeUPPERTRIANGULAR(uppertriangular sym);
42 | int uppertriangular_size(uppertriangular sym);
43 | #define UTREL(sym, i, j) sym.mat[UPTRI3((i) + 1, (j) + 1, sym.dim)]
44 | 
45 | #endif
46 | 


--------------------------------------------------------------------------------
/src/graphs/rinterface/path.matrix.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../core/allocations.h"
 3 | #include "../../include/graph.h"
 4 | #include "../../include/globals.h"
 5 | #include "../../minimal/common.h"
 6 | #include "../../minimal/strings.h"
 7 | #include "../../math/linear.algebra.h"
 8 | #include "../graphs.h"
 9 | 
10 | /* build a path matrix, telling whether each node is reachable from another. */
11 | SEXP path_matrix(SEXP x, SEXP debug) {
12 | 
13 | int nnodes = 0;
14 | char **labels = NULL;
15 | SEXP arcs, nodes, amat, roots, root_ids, pathmat;
16 | 
17 |   /* extract the relevant information from the network. */
18 |   arcs = getListElement(x, "arcs");
19 |   nodes = getListElement(x, "nodes");
20 |   nodes = getAttrib(nodes, R_NamesSymbol);
21 |   nnodes = length(nodes);
22 |   /* dereference the node labels. */
23 |   labels = Calloc1D(nnodes, sizeof(char *));
24 |   for (int i = 0; i < nnodes; i++)
25 |     labels[i] = (char *) CHAR(STRING_ELT(nodes, i));
26 | 
27 |   /* identify the root nodes. */
28 |   PROTECT(roots = root_nodes(x, FALSESEXP));
29 |   PROTECT(root_ids = match(nodes, roots, 0));
30 | 
31 |   /* contruct the adjacency matrix. */
32 |   PROTECT(amat = arcs2amat(arcs, nodes));
33 | 
34 |   /* allocate the path matrix. */
35 |   PROTECT(pathmat = allocMatrix(LGLSXP, nnodes, nnodes));
36 |   setAttrib(pathmat, R_DimNamesSymbol, getAttrib(amat, R_DimNamesSymbol));
37 |   memset(INTEGER(pathmat), '\0', nnodes * nnodes * sizeof(int));
38 | 
39 |   dag_path_matrix(INTEGER(amat), LOGICAL(pathmat), nnodes, labels,
40 |     INTEGER(root_ids), length(root_ids), isTRUE(debug));
41 | 
42 |   UNPROTECT(4);
43 | 
44 |   Free1D(labels);
45 | 
46 |   return pathmat;
47 | 
48 | }/*PATH_MATRIX*/
49 | 
50 | 


--------------------------------------------------------------------------------
/src/scores/custom.score.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../minimal/data.frame.h"
 3 | #include "../minimal/common.h"
 4 | 
 5 | /* evaluate a user-provided, custom decomposable score function. */
 6 | double custom_score_function(SEXP target, SEXP x, SEXP data, SEXP custom_fn,
 7 |     SEXP custom_args, bool debugging) {
 8 | 
 9 | SEXP nodes_info, target_info, parents, call, args_iterator, result;
10 | 
11 |   /* allocate and populate the pairlist to be valuated. */
12 |   PROTECT(args_iterator = call = allocLang(5));
13 |   /* first slot, the function name. */
14 |   SETCAR(args_iterator, custom_fn);
15 |   args_iterator = CDR(args_iterator);
16 |   /* second slot, the label of the target node. */
17 |   SETCAR(args_iterator, target);
18 |   args_iterator = CDR(args_iterator);
19 |   /* third slot, the labels of the parents. */
20 |   nodes_info = getListElement(x, "nodes");
21 |   target_info = getListElement(nodes_info, (char *)CHAR(STRING_ELT(target, 0)));
22 |   parents = getListElement(target_info, "parents");
23 |   SETCAR(args_iterator, parents);
24 |   args_iterator = CDR(args_iterator);
25 |   /* fourth slot, the data. */
26 |   SETCAR(args_iterator, data);
27 |   args_iterator = CDR(args_iterator);
28 |   /* fifth slot, the optional arguments passed as a list. */
29 |   SETCAR(args_iterator, custom_args);
30 |   /* evaluate the custom score function. */
31 |   PROTECT(result = eval(call, R_GlobalEnv));
32 | 
33 |   /* the return value must be a scalar, real number. */
34 |   if ((TYPEOF(result) != REALSXP) || (length(result) != 1))
35 |     error("the score for node %s must be a scalar, real value.",
36 |       CHAR(STRING_ELT(target, 0)));
37 | 
38 |   UNPROTECT(2);
39 | 
40 |   return NUM(result);
41 | 
42 | }/*CUSTOM_SCORE_FUNCTION*/
43 | 


--------------------------------------------------------------------------------
/src/scores/nml.regret.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/allocations.h"
 3 | #include "../math/linear.algebra.h"
 4 | #include "scores.h"
 5 | 
 6 | /* regret approximation for large N */
 7 | double regret_fn_szp1(double N, double K) {
 8 | 
 9 | double hK   = 0.5 * K;
10 | double hK_h = hK - 0.5;
11 | double lgammahK = lgamma(hK);
12 | double lgammahK_h = lgamma(hK_h);
13 | double gamma_ratio = exp(lgammahK - lgammahK_h);
14 | double gamma_ratio_term = M_SQRT1_2 - gamma_ratio * K/3/sqrt(N);
15 | 
16 |   return (hK_h) * (log(N) - M_LN2)
17 |            + M_LN_SQRT_PI - lgammahK
18 |            + 0.5 - gamma_ratio_term * gamma_ratio_term
19 |            + (3 + K * (K - 2) * (2 * K + 1)) / (36.0 * N);
20 | 
21 | }/*REGRET_FN_SZP1*/
22 | 
23 | /* regret approximation for large K */
24 | double regret_fn_szp2(double N, double K) {
25 | 
26 | double a = K/N;
27 | double Ca = 0.5 * (1 + sqrt(1 + 4/a));
28 | 
29 |   return N * (log(a) + (a + 2) * log(Ca) - 1/Ca) - 0.5 * log(Ca + 2/a);
30 | 
31 | }/*REGRET_FN_SZP2*/
32 | 
33 | /* allocate and fill the regret table. */
34 | double nml_regret(double n, double k) {
35 | 
36 |   if (n == 1) {
37 | 
38 |     return log(k);
39 | 
40 |   }/*THEN*/
41 |   else if (k == 1 || n == 0) {
42 | 
43 |     return 0.0;
44 | 
45 |   }/*THEN*/
46 |   else if (n <= MAX_REGRET_TABLE_N && k <= MAX_REGRET_TABLE_K) {
47 | 
48 |     if (regret_table == NULL)
49 |       regret_table = get_regret_table(MAX_REGRET_TABLE_N, MAX_REGRET_TABLE_K);
50 | 
51 |     return regret_table[CMC((int) k, (int) n, MAX_REGRET_TABLE_K + 1)];
52 | 
53 |   }/*THEN*/
54 |   else {
55 | 
56 |     double approx1 = regret_fn_szp1(n, k);
57 |     double approx2 = regret_fn_szp2(n, k);
58 | 
59 |     return (approx1 < approx2) ? approx1 : approx2;
60 | 
61 |   }/*ELSE*/
62 | 
63 | }/*NML_REGRET*/
64 | 


--------------------------------------------------------------------------------
/R/frontend-amat.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # build an adjacency matrix from a graph.
 3 | amat = function(x) {
 4 | 
 5 |   check.bn.or.fit(x)
 6 | 
 7 |   if (is(x, "bn"))
 8 |     arcs2amat(x$arcs, names(x$nodes))
 9 |   else
10 |     arcs2amat(fit2arcs(x), names(x))
11 | 
12 | }#AMAT
13 | 
14 | # rebuild the network structure using a new adjacency matrix.
15 | "amat<-" = function(x, check.cycles = TRUE, check.illegal = TRUE, debug = FALSE,
16 |     value) {
17 | 
18 |   check.bn(x)
19 |   # a node is needed.
20 |   if (missing(value))
21 |     stop("no adjacency matrix specified.")
22 |   # check logical arguments.
23 |   check.logical(check.cycles)
24 |   check.logical(check.illegal)
25 |   check.logical(debug)
26 |   # check the adjacency matrix.
27 |   value = check.amat(amat = value, nodes = names(x$nodes))
28 | 
29 |   # update the arcs of the network.
30 |   x$arcs = amat2arcs(value, names(x$nodes))
31 | 
32 |   # check whether the the graph contains directed cycles.
33 |   if (check.cycles)
34 |     if (!is.acyclic(nodes = names(x$nodes), arcs = x$arcs, debug = debug,
35 |            directed = TRUE))
36 |       stop("the specified network contains cycles.")
37 |   # check whether any arc is illegal.
38 |   if (check.illegal) {
39 | 
40 |     illegal = which.listed(x$arcs, x$learning$illegal)
41 | 
42 |     if (any(illegal)) {
43 | 
44 |       illegal = apply(x$arcs[illegal, , drop = FALSE], 1,
45 |                   function(x) { paste(" (", x[1], ", ", x[2], ")", sep = "")  })
46 | 
47 |       stop("the following arcs are not valid due to the parametric assumptions of the network:",
48 |         illegal, ".")
49 | 
50 |     }#THEN
51 | 
52 |   }#THEN
53 | 
54 |   # update the network structure.
55 |   x$nodes = cache.structure(names(x$nodes),
56 |               amat = as.integer(value), debug = debug)
57 | 
58 |   return(x)
59 | 
60 | }#AMAT<-
61 | 
62 | 


--------------------------------------------------------------------------------
/man/plot.bn.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot.bn}
 2 | \alias{plot.bn}
 3 | \title{Plot a Bayesian network}
 4 | \description{
 5 | 
 6 |   Plot the graph associated with a small Bayesian network.
 7 | 
 8 | }
 9 | \usage{
10 | \method{plot}{bn}(x, ylim = c(0,600), xlim = ylim, radius = 250,
11 |   arrow = 35, highlight = NULL, color = "red", ...)
12 | }
13 | \arguments{
14 |   \item{x}{an object of class \code{bn}.}
15 |   \item{ylim}{a numeric vector with two components containing the range of the
16 |     y-axis.}
17 |   \item{xlim}{a numeric vector with two components containing the range of the
18 |     x-axis.}
19 |   \item{radius}{a numeric value containing the radius of the nodes.}
20 |   \item{arrow}{a numeric value containing the length of the arrow heads.}
21 |   \item{highlight}{a vector of character strings, representing the labels of
22 |     the nodes (and corresponding arcs) to be highlighted.}
23 |   \item{color}{an integer or character string (the highlight colour).}
24 |   \item{\dots}{other graphical parameters to be passed through to plotting
25 |     functions.}
26 | }
27 | \note{
28 |   The following arguments are always overridden:
29 |   \itemize{
30 |     \item \code{axes} is set to \code{FALSE}.
31 |     \item \code{xlab} is set to an empty string.
32 |     \item \code{ylab} is set to an empty string.
33 |   }
34 | }
35 | \examples{
36 | data(learning.test)
37 | cpdag = pc.stable(learning.test)
38 | 
39 | plot(cpdag)
40 | 
41 | ## highlight node B and related arcs.
42 | plot(cpdag, highlight = "B")
43 | ## highlight B and its Markov blanket.
44 | plot(cpdag, highlight = c("B", mb(cpdag, "B")))
45 | 
46 | ## a more compact plot.
47 | par(oma = rep(0, 4), mar = rep(0, 4), mai = rep(0, 4),
48 |   plt = c(0.06, 0.94, 0.12, 0.88))
49 | plot(cpdag)
50 | }
51 | \seealso{\code{\link{graphviz.plot}}.}
52 | \author{Marco Scutari}
53 | \keyword{plots}
54 | 


--------------------------------------------------------------------------------
/src/minimal/unique.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | 
 3 | /* return the unique elements from an input vector.*/
 4 | SEXP unique(SEXP array) {
 5 | 
 6 | int *d = NULL, i = 0, k = 0, dup_counter = 0, n = length(array);
 7 | int *res = NULL, *a = NULL;
 8 | SEXP dup, result = R_NilValue;
 9 | 
10 |   PROTECT(dup = duplicated(array, FALSE));
11 |   d = LOGICAL(dup);
12 | 
13 |   switch(TYPEOF(array)) {
14 | 
15 |     case INTSXP:
16 | 
17 |       a = INTEGER(array);
18 | 
19 |       for (i = 0; i < n; i++)
20 |         if ((d[i] == 0) && (a[i] != NA_INTEGER))
21 |           dup_counter++;
22 | 
23 |       PROTECT(result = allocVector(INTSXP, dup_counter));
24 |       res = INTEGER(result);
25 | 
26 |       for (i = 0; i < n; i++)
27 |         if ((d[i] == 0) && (a[i] != NA_INTEGER))
28 |           res[k++] = a[i];
29 | 
30 |       break;
31 | 
32 |     case STRSXP:
33 | 
34 |       for (i = 0; i < n; i++)
35 |         if (d[i] == 0)
36 |           dup_counter++;
37 | 
38 |       PROTECT(result = allocVector(STRSXP, dup_counter));
39 | 
40 |       for (i = 0; i < n; i++)
41 |         if (d[i] == 0)
42 |           SET_STRING_ELT(result, k++, STRING_ELT(array, i));
43 | 
44 |       break;
45 | 
46 |     default:
47 | 
48 |       error("this SEXP type is not handled in unique().");
49 | 
50 |   }/*SWITCH*/
51 | 
52 |   UNPROTECT(2);
53 | 
54 |   return result;
55 | 
56 | }/*UNIQUE*/
57 | 
58 | /* determine which elements are dupes. */
59 | SEXP dupe(SEXP array) {
60 | 
61 | int i = 0, n = length(array);
62 | int *res = NULL, *tmp = NULL;
63 | SEXP result, temp;
64 | 
65 |   PROTECT(result = duplicated(array, FALSE));
66 |   PROTECT(temp = duplicated(array, TRUE));
67 |   res = LOGICAL(result);
68 |   tmp = LOGICAL(temp);
69 | 
70 |   for (i = 0; i < n; i++)
71 |     res[i] = res[i] || tmp[i];
72 | 
73 |   UNPROTECT(2);
74 | 
75 |   return result;
76 | 
77 | }/*DUPE*/
78 | 
79 | 


--------------------------------------------------------------------------------
/R/backend-score.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # compare two network scores in an efficient way.
 3 | score.delta = function(arc, network, data, score, score.delta,
 4 |     reference.score, op, extra, decomposable = TRUE) {
 5 | 
 6 |   .Call(call_score_delta,
 7 |         arc = arc,
 8 |         network = network,
 9 |         data = data,
10 |         score = score,
11 |         score.delta = score.delta,
12 |         reference.score = reference.score,
13 |         op = op,
14 |         extra = extra,
15 |         decomposable = decomposable)
16 | 
17 | }#SCORE.DELTA
18 | 
19 | # create a data frame or an adjacency matrix containing the arcs to be added.
20 | arcs.to.be.added = function(amat, nodes, blacklist = NULL, whitelist = NULL,
21 |     nparents = NULL, maxp = Inf, arcs = TRUE) {
22 | 
23 |   .Call(call_hc_to_be_added,
24 |         arcs = amat,
25 |         blacklist = blacklist,
26 |         whitelist = whitelist,
27 |         nparents = nparents,
28 |         maxp = maxp,
29 |         nodes = nodes,
30 |         convert = arcs)
31 | 
32 | }#ARCS.TO.BE.ADDED
33 | 
34 | # create a data frame containing the arcs to be dropped:
35 | # arcs                   arcs already in the graph.
36 | # !is.listed(whitelist)  exclude whitelisted arcs.
37 | arcs.to.be.dropped = function(arcs, whitelist) {
38 | 
39 |   if (!is.null(whitelist))
40 |     return(arcs[!which.listed(arcs, whitelist), , drop = FALSE])
41 |   else
42 |     return(arcs)
43 | 
44 | }#ARCS.TO.BE.DROPPED
45 | 
46 | # create a data frame containing the arcs to be reversed:
47 | arcs.to.be.reversed = function(arcs, blacklist, nparents, maxp = Inf) {
48 | 
49 |   if (!is.null(blacklist))
50 |     arcs = arcs[!which.listed(arcs[, c(2, 1), drop = FALSE], blacklist), , drop = FALSE]
51 | 
52 |   if (!missing(nparents))
53 |     arcs = arcs[nparents[arcs[, 1]] < maxp, , drop = FALSE]
54 | 
55 |   return(arcs)
56 | 
57 | }#ARCS.TO.BE.REVERSED
58 | 
59 | 


--------------------------------------------------------------------------------
/man/nodeops.Rd:
--------------------------------------------------------------------------------
 1 | \name{node operations}
 2 | \alias{node operations}
 3 | \alias{add.node}
 4 | \alias{remove.node}
 5 | \alias{rename.nodes}
 6 | \alias{nodes<-}
 7 | \alias{nodes<-,bn-method}
 8 | \alias{nodes<-,bn.fit-method}
 9 | \alias{nodes<-,bn.naive-method}
10 | \alias{nodes<-,bn.tan-method}
11 | \title{Manipulate nodes in a graph}
12 | \description{
13 | 
14 |   Add, remove and rename nodes in a graph.
15 | 
16 | }
17 | \usage{
18 | # add and remove nodes.
19 | add.node(x, node)
20 | remove.node(x, node)
21 | 
22 | # re-label nodes.
23 | rename.nodes(x, names)
24 | \S4method{nodes}{bn}(object) <- value
25 | \S4method{nodes}{bn.fit}(object) <- value
26 | }
27 | \arguments{
28 |   \item{x}{an object of class \code{bn} for \code{add.node()} and
29 |     \code{remove.node()}; an object of class \code{bn} or \code{bn.fit} for
30 |     \code{rename.nodes()}.}
31 |   \item{object}{an object of class \code{bn} or \code{bn.fit}.}
32 |   \item{node}{a character string, the label of a node.}
33 |   \item{value, names}{a vector of character strings, the new set of labels
34 |     that wll be used as to rename the nodes.}
35 | }
36 | \details{
37 | 
38 |   \code{add.node()} adds a new (isolated) node to an existing \code{bn} object.
39 | 
40 |   \code{remove.node()} removes a node from a \code{bn} object.
41 | 
42 |   \code{rename.nodes()} replaces the node labels with new ones, relabelling the
43 |   whole node set. The assignment method for \code{nodes()} is an alias of
44 |   \code{rename.nodes()}.
45 | 
46 | }
47 | \value{
48 | 
49 |   \code{add.node()}, \code{remove.node()} and \code{rename.nodes()} return an
50 |   updated \code{bn} object.
51 | 
52 | }
53 | \examples{
54 | dag = random.graph(LETTERS[1:5])
55 | add.node(dag, "Z")
56 | remove.node(dag, "A")
57 | 
58 | nodes(dag)
59 | nodes(dag) = LETTERS[6:10]
60 | nodes(dag)
61 | }
62 | \author{Marco Scutari}
63 | \keyword{graphs}
64 | 


--------------------------------------------------------------------------------
/R/test.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # conditional and unconditional independence tests (vectorized in x, scalar
 3 | # in y).
 4 | indep.test = function(x, y, sx, data, test, extra.args = list(), alpha = 1,
 5 |     learning = TRUE) {
 6 | 
 7 |   .Call(call_indep_test,
 8 |         x = x,
 9 |         y = y,
10 |         sx = sx[sx != ""],
11 |         data = data,
12 |         test = test,
13 |         alpha = alpha,
14 |         extra.args = extra.args,
15 |         learning = learning,
16 |         complete = attr(data, "metadata")$complete.nodes)
17 | 
18 | }#INDEP.TEST
19 | 
20 | # test against all possible subsets of the conditioning set (scalar in both x
21 | # and y).
22 | allsubs.test = function(x, y, sx, fixed = character(0), data, test,
23 |     extra.args = list(), alpha = 1, min = 0, max = length(sx), debug = FALSE) {
24 | 
25 |   .Call(call_allsubs_test,
26 |         x = x,
27 |         y = y,
28 |         sx = c(fixed, sx),
29 |         fixed = fixed,
30 |         data = data,
31 |         test = test,
32 |         alpha = alpha,
33 |         extra.args = extra.args,
34 |         min = as.integer(min),
35 |         max = as.integer(min(max, length(sx))),
36 |         complete = attr(data, "metadata")$complete.nodes,
37 |         debug = debug)
38 | 
39 | }#ALLSUBS.TEST
40 | 
41 | # test each variable in turn given the rest as a conditioning set.
42 | roundrobin.test = function(x, z, fixed, data, test, extra.args = list(),
43 |     alpha = 1, debug = FALSE) {
44 | 
45 |   if (length(z) == 0)
46 |     return(structure(numeric(0), names = character(0)))
47 | 
48 |   .Call(call_roundrobin_test,
49 |         x = x,
50 |         z = z,
51 |         fixed = fixed,
52 |         data = data,
53 |         test = test,
54 |         alpha = alpha,
55 |         extra.args = extra.args,
56 |         complete = attr(data, "metadata")$complete.nodes,
57 |         debug = debug)
58 | 
59 | }#ROUNDROBIN.TEST
60 | 
61 | 


--------------------------------------------------------------------------------
/man/lizards.Rd:
--------------------------------------------------------------------------------
 1 | \name{lizards}
 2 | \docType{data}
 3 | \alias{lizards}
 4 | \title{Lizards' perching behaviour data set}
 5 | \description{
 6 | 
 7 |   Real-world data set about the perching behaviour of two species of lizards in
 8 |   the South Bimini island, from Shoener (1968).
 9 | 
10 | }
11 | \usage{
12 | data(lizards)
13 | }
14 | \format{
15 | 
16 |   The \code{lizards} data set contains the following variables:
17 |   \itemize{
18 | 
19 |     \item \code{Species} (\emph{the species of the lizard}): a two-level factor
20 |       with levels \code{Sagrei} and \code{Distichus}.
21 |     \item \code{Height} (\emph{perch height}): a two-level factor with levels
22 |       \code{high} (greater than 4.75 feet) and \code{low} (lesser or equal to
23 |       4.75 feet).
24 |     \item \code{Diameter} (\emph{perch diameter}): a two-level factor with
25 |       levels \code{narrow} (greater than 4 inches) and \code{wide} (lesser or
26 |       equal to 4 inches).
27 | 
28 |   }
29 | 
30 | }
31 | \source{
32 | 
33 |   Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd
34 |     edition.
35 | 
36 |   Fienberg SE (1980). \emph{The Analysis of Cross-Classified Categorical Data}.
37 |     Springer, 2nd edition.
38 | 
39 |   Schoener TW (1968). "The Anolis Lizards of Bimini: Resource Partitioning in a
40 |     Complex Fauna." \emph{Ecology}, \strong{49}(4):704--726.
41 | 
42 | }
43 | \examples{
44 | # load the data.
45 | data(lizards)
46 | # create and plot the network structure.
47 | dag = model2network("[Species][Diameter|Species][Height|Species]")
48 | \dontrun{graphviz.plot(dag, shape = "ellipse")}
49 | 
50 | # This data set is useful as it offers nominal values for
51 | # the conditional mutual information and X^2 tests.
52 | ci.test("Height", "Diameter", "Species", test = "mi", data = lizards)
53 | ci.test("Height", "Diameter", "Species", test = "x2", data = lizards)
54 | }
55 | \keyword{datasets}
56 | 


--------------------------------------------------------------------------------
/src/scores/nml.regret.table.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/allocations.h"
 3 | #include "../math/linear.algebra.h"
 4 | #include "scores.h"
 5 | 
 6 | double get_regret_k2(int N, double *logs, double *logfacs) {
 7 | 
 8 | double res = 0.0;
 9 | 
10 |   for (int n = 0; n <= N; ++n) {
11 | 
12 |     double term = logfacs[N] - logfacs[n] - logfacs[N - n];
13 | 
14 |     if (n)
15 |       term += n * (logs[n] - logs[N]);
16 |     if (N - n)
17 |       term += (N - n) * (logs[N - n] - logs[N]);
18 | 
19 |     res += exp(term);
20 | 
21 |   }/*FOR*/
22 | 
23 |   return log(res);
24 | 
25 | }/*GET_REGRET_K2*/
26 | 
27 | void fill_regrets_up_to_K(int K, int n, double *regret_table) {
28 | 
29 | double rk_prev_prev = 1.0;
30 | double rk_prev = exp(regret_table[CMC(2, n, K + 1)]);
31 | double rk = 0;
32 | 
33 |   for (int k = 3; k <= K; ++k) {
34 | 
35 |     rk = rk_prev + rk_prev_prev / (k - 2) * n;
36 |     regret_table[CMC(k, n, K + 1)] = log(rk);
37 |     rk_prev_prev = rk_prev;
38 |     rk_prev = rk;
39 | 
40 |   }/*FOR*/
41 | 
42 | }/*FILL_REGRETS_UP_TO_K*/
43 | 
44 | double *get_regret_table(int N, int K) {
45 | 
46 | double *logs = (double *) Calloc1D((N + 1), sizeof(double));
47 | double *logfacs = (double *) Calloc1D((N + 1), sizeof(double));
48 | double *regret_table = (double *) Calloc1D((N + 1) * (K + 1), sizeof(double));
49 | 
50 |   /* cache a table of logarithms. */
51 |   for (int n = 1; n <= N; ++n)
52 |     logs[n] = log(n);
53 | 
54 |   /* cache a table of log-factorials (computed as log-Gammas). */
55 |   for (int n = 1; n <= N; ++n)
56 |     logfacs[n] = lgammafn(n + 1);
57 | 
58 |   for (int n = 1; n <= N; ++n) {
59 | 
60 |     regret_table[CMC(2, n, K + 1)] = get_regret_k2(n, logs, logfacs);
61 |     fill_regrets_up_to_K(K, n, regret_table);
62 | 
63 |   }/*FOR*/
64 | 
65 |   Free1D(logfacs);
66 |   Free1D(logs);
67 | 
68 |   return regret_table;
69 | 
70 | }/*GET_REGRET_TABLE*/
71 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bnlearn
 2 | Type: Package
 3 | Title: Bayesian Network Structure Learning, Parameter Learning and
 4 |         Inference
 5 | Version: 5.1
 6 | Date: 2025-08-18
 7 | Depends: R (>= 4.4.0), methods
 8 | Suggests: parallel, graph, Rgraphviz, igraph, lattice, gRbase, gRain
 9 |         (>= 1.3-3), Rmpfr, gmp
10 | Authors@R: c(person(given = "Marco", family = "Scutari", role = c("aut", "cre"),
11 |                     email = "scutari@bnlearn.com"),
12 |              person(given = "Tomi", family = "Silander", role = "ctb"))
13 | Maintainer: Marco Scutari <scutari@bnlearn.com>
14 | Description: Bayesian network structure learning, parameter learning and inference.
15 |   This package implements constraint-based (PC, GS, IAMB, Inter-IAMB, Fast-IAMB, MMPC,
16 |   Hiton-PC, HPC), pairwise (ARACNE and Chow-Liu), score-based (Hill-Climbing and Tabu
17 |   Search) and hybrid (MMHC, RSMAX2, H2PC) structure learning algorithms for discrete,
18 |   Gaussian and conditional Gaussian networks, along with many score functions and
19 |   conditional independence tests.
20 |   The Naive Bayes and the Tree-Augmented Naive Bayes (TAN) classifiers are also implemented.
21 |   Some utility functions (model comparison and manipulation, random data generation, arc
22 |   orientation testing, simple and advanced plots) are included, as well as support for
23 |   parameter estimation (maximum likelihood and Bayesian) and inference, conditional
24 |   probability queries, cross-validation, bootstrap and model averaging.
25 |   Development snapshots with the latest bugfixes are available from <https://www.bnlearn.com/>.
26 | URL: https://www.bnlearn.com/
27 | SystemRequirements: USE_C17
28 | License: GPL (>= 2)
29 | LazyData: yes
30 | NeedsCompilation: yes
31 | Packaged: 2025-08-20 08:45:12 UTC; fizban
32 | Author: Marco Scutari [aut, cre],
33 |   Tomi Silander [ctb]
34 | Repository: CRAN
35 | Date/Publication: 2025-08-20 12:50:13 UTC
36 | 


--------------------------------------------------------------------------------
/R/sanitization-enumeration.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # sanitize the extra arguments passed to the graph enumerations.
 3 | check.enumeration.args = function(type, extra) {
 4 | 
 5 |   # check the number of nodes.
 6 |   if (has.argument(type, "nodes", enumerations.extra.args)) {
 7 | 
 8 |     if (!is.positive.vector(extra[["nodes"]]))
 9 |       stop("'nodes' must be positive integers, the number(s) of nodes in the graph.")
10 | 
11 |   }#THEN
12 | 
13 |   # check the number of root nodes.
14 |   if (has.argument(type, "k", enumerations.extra.args))
15 |     check.graph.root.nodes(extra[["k"]], N = extra[["nodes"]])
16 | 
17 |   # check the number of arcs.
18 |   if (has.argument(type, "r", enumerations.extra.args))
19 |     check.graph.narcs(extra[["r"]], N = extra[["nodes"]])
20 | 
21 |   # check the markov equivalence class.
22 |   if (has.argument(type, "eqclass", enumerations.extra.args))
23 |     valid.cpdag.backend(extra[["eqclass"]])
24 | 
25 |   # warn about and remove unused arguments.
26 |   extra = check.unused.args(extra, enumerations.extra.args[[type]])
27 | 
28 |   return(extra)
29 | 
30 | }#CHECK.ENUMERATION.ARGS
31 | 
32 | # check the number of root nodes.
33 | check.graph.root.nodes = function(k, N) {
34 | 
35 |   if (is.null(k))
36 |     stop("unspecified number of root nodes.")
37 | 
38 |   if (!is.positive.integer(k))
39 |     stop("the number of root nodes must be a positive integer.")
40 |   if (k > max(N))
41 |     warning("the number of root nodes is larger than the number of nodes.")
42 | 
43 | }#CHECK.GRAPH.ROOT.NODES
44 | 
45 | # check the number of arcs.
46 | check.graph.narcs = function(r, N) {
47 | 
48 |   if (is.null(r))
49 |     stop("unspecified number of arcs.")
50 | 
51 |   if (!is.non.negative.integer(r))
52 |     stop("the number of arcs must be a positive integer.")
53 |   if (r > choose(max(N), 2))
54 |     warning("the number of arcs is larger than the maximum possible number of arcs.")
55 | 
56 | }#CHECK.GRAPH.NARCS
57 | 


--------------------------------------------------------------------------------
/src/parameters/rinterface/classic.discrete.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../core/contingency.tables.h"
 3 | #include "../../minimal/data.frame.h"
 4 | #include "../../minimal/table.h"
 5 | #include "../parameters.h"
 6 | 
 7 | SEXP classic_discrete_parameters(SEXP data, SEXP node, SEXP parents, SEXP iss,
 8 |     SEXP replace_unidentifiable, SEXP missing) {
 9 | 
10 | double *cpt = 0, alpha = 0;
11 | bool replace = isTRUE(replace_unidentifiable);
12 | SEXP nodes_in_order, relevant_data, counts, cptable;
13 | 
14 |   /* subset the data with the node labels in the right order. */
15 |   PROTECT(nodes_in_order = allocVector(STRSXP, length(parents) + 1));
16 |   SET_STRING_ELT(nodes_in_order, 0, STRING_ELT(node, 0));
17 |   for (int i = 0; i < length(parents); i++)
18 |     SET_STRING_ELT(nodes_in_order, i + 1, STRING_ELT(parents, i));
19 | 
20 |   PROTECT(relevant_data = c_dataframe_column(data, nodes_in_order, FALSE, TRUE));
21 | 
22 |   /* implement the maximum likelihood estimator as a particular case of the
23 |    * posterior estimator with prior mass equal to zero. */
24 |   if (iss == R_NilValue)
25 |     alpha = 0;
26 |   else
27 |     alpha = NUM(iss);
28 | 
29 |   /* compute the counts that are the sufficient statistic. */
30 |   PROTECT(counts = minimal_table(relevant_data, missing));
31 | 
32 |   /* prepare the conditional probability table... */
33 |   PROTECT(cptable = allocVector(REALSXP, length(counts)));
34 |   setAttrib(cptable, R_DimSymbol, getAttrib(counts, R_DimSymbol));
35 |   setAttrib(cptable, R_DimNamesSymbol, getAttrib(counts, R_DimNamesSymbol));
36 |   setAttrib(cptable, R_ClassSymbol, mkString("table"));
37 |   cpt = REAL(cptable);
38 | 
39 |   /* ... and estimate it. */
40 |   c_classic_discrete_parameters(INTEGER(counts), cpt, nrows(cptable),
41 |       length(cptable) / nrows(cptable), alpha, replace);
42 | 
43 |   UNPROTECT(4);
44 | 
45 |   return cptable;
46 | 
47 | }/*CLASSIC_DISCRETE_PARAMETERS*/
48 | 


--------------------------------------------------------------------------------
/src/minimal/strings.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | 
 3 | /* setdiff() for two vectors of character strings. */
 4 | SEXP string_setdiff(SEXP large, SEXP small) {
 5 | 
 6 | int i = 0, k = 0, *t = NULL, nl = length(large), ns = length(small);
 7 | SEXP try, diff;
 8 | 
 9 |   /* match the elements of the smaller set against the larger set. */
10 |   PROTECT(try = match(small, large, 0));
11 |   t = INTEGER(try);
12 |   /* allocate the return value. */
13 |   PROTECT(diff = allocVector(STRSXP, nl - ns));
14 |   /* copy the elements that are not shared between te two. */
15 |   for (i = 0, k = 0; i < nl; i++)
16 |     if (t[i] == 0)
17 |       SET_STRING_ELT(diff, k++, STRING_ELT(large, i));
18 | 
19 |   UNPROTECT(2);
20 | 
21 |   return diff;
22 | 
23 | }/*STRING_SETDIFF*/
24 | 
25 | /* delete a string for a STRSXP array. */
26 | SEXP string_delete(SEXP array, SEXP string, int *idx) {
27 | 
28 | int i = 0, k = 0, *t = NULL, n = length(array);
29 | SEXP try, new_array;
30 | 
31 |   PROTECT(try = match(array, string, 0));
32 |   t = INTEGER(try);
33 | 
34 |   /* optional, save the index of the string. */
35 |   if (idx)
36 |     *idx = *t;
37 | 
38 |   if (*t == 0) {
39 | 
40 |     UNPROTECT(1);
41 | 
42 |     return array;
43 | 
44 |   }/*THEN*/
45 | 
46 |   /* allocate the new array. */
47 |   PROTECT(new_array = allocVector(STRSXP, n - 1));
48 | 
49 |   for (i = 0, k = 0; i < n; i++)
50 |     if (i != *t - 1)
51 |       SET_STRING_ELT(new_array, k++, STRING_ELT(array, i));
52 | 
53 |   UNPROTECT(2);
54 | 
55 |   return new_array;
56 | 
57 | }/*STRING_DELETE*/
58 | 
59 | /* variadic version of mkString(). */
60 | SEXP mkStringVec(int n, ...) {
61 | 
62 | va_list strings;
63 | int i = 0;
64 | SEXP vec;
65 | 
66 |   PROTECT(vec = allocVector(STRSXP, n));
67 |   va_start(strings, n);
68 |   for (i = 0; i < n; i++)
69 |     SET_STRING_ELT(vec, i, mkChar(va_arg(strings, char *)));
70 |   va_end(strings);
71 |   UNPROTECT(1);
72 | 
73 |   return vec;
74 | 
75 | }/*MKSTRINGVEC*/
76 | 


--------------------------------------------------------------------------------
/src/graphs/rinterface/connected.components.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../core/allocations.h"
 3 | #include "../../include/graph.h"
 4 | #include "../../minimal/common.h"
 5 | #include "../graphs.h"
 6 | 
 7 | /* identify the connected components in an undirected graph. */
 8 | SEXP connected_components(SEXP x, SEXP debug) {
 9 | 
10 | int nnodes = 0, ncomponents = 0;
11 | int **buffer = NULL, *buflen = NULL;
12 | char **labels = NULL;
13 | SEXP arcs, nodes, amat, temp, components;
14 | 
15 |   /* extract the relevant information from the network. */
16 |   arcs = getListElement(x, "arcs");
17 |   nodes = getListElement(x, "nodes");
18 |   nodes = getAttrib(nodes, R_NamesSymbol);
19 |   nnodes = length(nodes);
20 | 
21 |   /* contruct the adjacency matrix. */
22 |   PROTECT(amat = arcs2amat(arcs, nodes));
23 | 
24 |   /* buffer to store the connected components. */
25 |   buffer = Calloc1D(nnodes, sizeof(int **));
26 |   buflen = Calloc1D(nnodes, sizeof(int));
27 |   /* dereference the node labels. */
28 |   labels = Calloc1D(nnodes, sizeof(char *));
29 |   for (int i = 0; i < nnodes; i++)
30 |     labels[i] = (char *) CHAR(STRING_ELT(nodes, i));
31 | 
32 |   /* identify the connected components. */
33 |   ncomponents = ug_connected_components(INTEGER(amat), labels, nnodes, buffer,
34 |                   buflen, isTRUE(debug));
35 | 
36 |   /* return the components and the respective sets of node labels. */
37 |   PROTECT(components = allocVector(VECSXP, ncomponents));
38 | 
39 |   for (int i = 0; i < ncomponents; i++) {
40 | 
41 |     PROTECT(temp = allocVector(STRSXP, buflen[i]));
42 |     for (int j = 0; j < buflen[i]; j++)
43 |       SET_STRING_ELT(temp, j, STRING_ELT(nodes, buffer[i][j]));
44 |     SET_VECTOR_ELT(components, i, temp);
45 | 
46 |     UNPROTECT(1);
47 | 
48 |   }/*FOR*/
49 | 
50 |   UNPROTECT(2);
51 | 
52 |   Free2D(buffer, nnodes);
53 |   Free1D(buflen);
54 |   Free1D(labels);
55 | 
56 |   return components;
57 | 
58 | }/*CONNECTED_COMPONENTS*/
59 | 


--------------------------------------------------------------------------------
/R/frontend-formula.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # describe the network with a "model string".
 3 | modelstring = function(x) {
 4 | 
 5 |   check.bn.or.fit(x)
 6 |   # no model string if the graph is partially directed.
 7 |   if (is(x, "bn"))
 8 |     if (!is.completely.directed(x))
 9 |       stop("the graph is only partially directed.")
10 | 
11 |   modelstring.backend(x)
12 | 
13 | }#MODELSTRING
14 | 
15 | # set a specific network structure with the model string.
16 | "modelstring<-" = function(x, debug = FALSE, value) {
17 | 
18 |   check.modelstring(value)
19 |   check.logical(debug)
20 | 
21 |   model2network.backend(value, node.order = names(x$nodes), debug = debug)
22 | 
23 | }#MODELSTRING<-
24 | 
25 | # bn-to-character (i.e. the model string) conversion function.
26 | # an alias of modelstring().
27 | as.character.bn = function(x, ...) {
28 | 
29 |   # warn about unused arguments.
30 |   check.unused.args(list(...), character(0))
31 | 
32 |   modelstring(x)
33 | 
34 | }#AS.CHARACTER.BN
35 | 
36 | # generate an object of class bn from a model string.
37 | model2network = function(string, ordering = NULL, debug = FALSE) {
38 | 
39 |   # check string's class and format.
40 |   check.modelstring(string)
41 |   # check the node ordering; NULL is ok this time, it lets the backend decide.
42 |   if (!is.null(ordering))
43 |     check.nodes(ordering)
44 | 
45 |   check.logical(debug)
46 | 
47 |   result = model2network.backend(string, node.order = ordering, debug = debug)
48 | 
49 |   # check the node ordering again now that the graph is built.
50 |   if (!is.null(ordering)) {
51 | 
52 |     check.nodes(ordering, graph = result, min.nodes = length(result$nodes),
53 |       max.nodes = length(result$nodes))
54 | 
55 |   }#THEN
56 | 
57 |   return(result)
58 | 
59 | }#MODEL2NETWORK
60 | 
61 | # model-string-to-bn conversion function.
62 | as.bn.character = function(x, ...) {
63 | 
64 |   # warn about unused arguments.
65 |   check.unused.args(list(...), character(0))
66 | 
67 |   model2network(x)
68 | 
69 | }#AS.BN.CHARACTER
70 | 
71 | 


--------------------------------------------------------------------------------
/src/inference/rinterface/cpdist.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../include/sampling.h"
 3 | #include "../../minimal/data.frame.h"
 4 | #include "../../minimal/strings.h"
 5 | #include "../../include/globals.h"
 6 | #include "../../core/math.functions.h"
 7 | 
 8 | /* sample observations with the associated likelihood weights. */
 9 | SEXP cpdist_lw(SEXP fitted, SEXP nodes, SEXP n, SEXP fix, SEXP debug) {
10 | 
11 | int nsims = INT(n), max_id = 0;
12 | double *weights = NULL;
13 | SEXP result, simulation, wgt, from;
14 | 
15 |   /* allocate the scratch space for the simulation. */
16 |   PROTECT(simulation = fit2df(fitted, nsims));
17 |   /* generate the random observations. */
18 |   c_rbn_master(fitted, simulation, n, fix, TRUE, FALSE);
19 | 
20 |   if (isTRUE(debug))
21 |     Rprintf("* generated %d samples from the bayesian network.\n", nsims);
22 | 
23 |   /* compute the weights. */
24 |   PROTECT(wgt = allocVector(REALSXP, nsims));
25 |   weights = REAL(wgt);
26 |   PROTECT(from = getAttrib(fix, R_NamesSymbol));
27 |   c_lw_weights(fitted, simulation, nsims, weights, from, FALSE);
28 | 
29 |   /* if all weights are zero or NA, the evidence is making it impossible to
30 |    * generate a set of valid random observations. */
31 |   max_id = d_which_max(weights, nsims);
32 | 
33 |   if (max_id == NA_INTEGER)
34 |     error("all weights are NA, the probability of the evidence is impossible to compute.");
35 |   if (weights[d_which_max(weights, nsims) - 1] == 0)
36 |     error("all weights are zero, the evidence has probability zero.");
37 | 
38 |   /* prepare the return value with all the attributes. */
39 |   PROTECT(result = c_dataframe_column(simulation, nodes, FALSE, TRUE));
40 |   minimal_data_frame(result);
41 |   setAttrib(result, BN_WeightsSymbol, wgt);
42 |   setAttrib(result, BN_MethodSymbol, mkString("lw"));
43 |   setAttrib(result, R_ClassSymbol, mkStringVec(2, "bn.cpdist", "data.frame"));
44 | 
45 |   UNPROTECT(4);
46 | 
47 |   return result;
48 | 
49 | }/*CPDIST_LW*/
50 | 
51 | 


--------------------------------------------------------------------------------
/man/rbn.Rd:
--------------------------------------------------------------------------------
 1 | \name{rbn}
 2 | \alias{rbn}
 3 | \title{Simulate random samples from a given Bayesian network}
 4 | \description{
 5 | 
 6 |   Simulate random samples from a given Bayesian network.
 7 | 
 8 | }
 9 | \usage{
10 | rbn(x, n = 1, \dots, debug = FALSE)
11 | }
12 | \arguments{
13 |   \item{x}{an object of class \code{bn.fit}.}
14 |   \item{n}{a positive integer giving the number of observations to generate.}
15 |   \item{...}{additional arguments for the parameter estimation prcoedure, see
16 |     again \code{\link{bn.fit}} for details.}
17 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
18 |     printed; otherwise the function is completely silent.}
19 | }
20 | \details{
21 | 
22 |   \code{rbn()} implements forward/logic sampling: values for the root nodes are
23 |   sampled from their (unconditional) distribution, then those of their children
24 |   conditional on the respective parent sets. This is done iteratively until
25 |   values have been sampled for all nodes.
26 | 
27 |   If \code{x} contains \code{NA} parameter estimates (because of unobserved
28 |   discrete parents configurations in the data the parameters were learned from),
29 |   \code{rbn} will produce samples that contain \code{NA}s when those parents
30 |   configurations appear in the simulated samples. See \code{\link{bn.fit}} for
31 |   details on how to make sure \code{bn.fit} objects contain no \code{NA}
32 |   parameter estimates.
33 | 
34 | }
35 | \value{
36 | 
37 |   A data frame with the same structure as the data originally used to to fit the
38 |   parameters of the Bayesian network.
39 | 
40 | }
41 | \references{
42 | 
43 |   Korb K, Nicholson AE (2010). \emph{Bayesian Artificial Intelligence}.
44 |     Chapman & Hall/CRC, 2nd edition.
45 | 
46 | }
47 | \examples{
48 | data(learning.test)
49 | dag = hc(learning.test)
50 | fitted = bn.fit(dag, learning.test)
51 | rbn(fitted, 5)
52 | }
53 | \author{Marco Scutari}
54 | \seealso{\code{\link{cpdist}}.}
55 | \keyword{inference}
56 | \keyword{simulation}
57 | 


--------------------------------------------------------------------------------
/R/data.preprocessing.R:
--------------------------------------------------------------------------------
 1 | 
 2 | dedup.backend = function(data, threshold, debug = FALSE) {
 3 | 
 4 |   .Call(call_dedup,
 5 |         data = data,
 6 |         threshold = threshold,
 7 |         complete = attr(data, "metadata")$complete.nodes,
 8 |         debug = debug)
 9 | 
10 | }#DEDUP.BACKEND
11 | 
12 | discretize.backend = function(data, method, breaks, ordered = FALSE, extra.args,
13 |     debug = FALSE) {
14 | 
15 |   if (method %in% c("quantile", "interval")) {
16 | 
17 |     discretized = marginal.discretize.backend(data = data, method = method,
18 |                     breaks = breaks, ordered = ordered, debug = debug)
19 | 
20 |   }#THEN
21 |   else if (method == "hartemink") {
22 | 
23 |     discretized = joint.discretize.backend(data = data, method = method,
24 |                     breaks = breaks, ordered = ordered,
25 |                     initial.discretization = extra.args$idisc,
26 |                     initial.breaks = extra.args$ibreaks, debug = debug)
27 | 
28 |   }#ELSE
29 | 
30 |   # ensure that the attribute with the metadata set by check.data() is removed.
31 |   attr(discretized, "metadata") = NULL
32 | 
33 |   return(discretized)
34 | 
35 | }#DISCRETIZE.BACKEND
36 | 
37 | marginal.discretize.backend = function(data, method, breaks, ordered = FALSE,
38 |     debug = FALSE) {
39 | 
40 |   .Call(call_marginal_discretize,
41 |         data = data,
42 |         method = method,
43 |         breaks = as.integer(breaks),
44 |         ordered = ordered,
45 |         debug = debug)
46 | 
47 | }#MARGINAL.DISCRETIZE.BACKEND
48 | 
49 | joint.discretize.backend = function(data, method, breaks, ordered = FALSE,
50 |     initial.discretization = "quantile", initial.breaks, debug = FALSE) {
51 | 
52 |   .Call(call_joint_discretize,
53 |         data = data,
54 |         method = method,
55 |         breaks = as.integer(breaks),
56 |         ordered = ordered,
57 |         initial.discretization = initial.discretization,
58 |         initial.breaks = as.integer(initial.breaks),
59 |         debug = debug)
60 | 
61 | }#JOINT.DISCRETIZE.BACKEND
62 | 


--------------------------------------------------------------------------------
/R/sanitization-amat.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # sanity check adjacency matrices.
 3 | check.amat = function(amat, nodes) {
 4 | 
 5 |   # a node is needed.
 6 |   if (missing(amat))
 7 |     stop("no adjacency matrix specified.")
 8 |   # the adjacency matrix must, well, be a matrix.
 9 |   if (!is(amat, "matrix") || (ncol(amat) != nrow(amat)) || (length(dim(amat)) != 2))
10 |     stop("an adjacency matrix must be a 2-dimensional square matrix.")
11 |   # check the dimensions against the number of nodes in the graph.
12 |   if (any(dim(amat) != length(nodes)))
13 |     stop("the dimensions of the adjacency matrix do not agree with the number of nodes in the graph.")
14 |   # column names must be valid node labels.
15 |   if (!is.null(colnames(amat)))
16 |     if (any(colnames(amat) %!in% nodes))
17 |       stop("node (column label) not present in the graph.")
18 |   # column names must be valid node labels.
19 |   if (!is.null(rownames(amat)))
20 |     if (any(rownames(amat) %!in% nodes))
21 |       stop("node (row label) not present in the graph.")
22 |   # column names must match with row names.
23 |   if (!is.null(colnames(amat)) && !is.null(rownames(amat))) {
24 | 
25 |     if (!identical(colnames(amat), rownames(amat)))
26 |       stop("row/column names mismatch in the adjacency matrix.")
27 | 
28 |     if (!identical(colnames(amat), nodes) || !identical(rownames(amat), nodes)) {
29 | 
30 |       warning("rearranging the rows/columns of the adjacency matrix.")
31 | 
32 |       amat = amat[nodes, nodes, drop = FALSE]
33 | 
34 |     }#THEN
35 | 
36 |   }#THEN
37 |   # make really sure the adjacency matrix is made up of integers.
38 |   if (storage.mode(amat) != "integer")
39 |     storage.mode(amat) = "integer"
40 |   # check the elements of the matrix.
41 |   if (!all((amat == 0L) | (amat == 1L)))
42 |     stop("all the elements of an adjacency matrix must be equal to either 0 or 1.")
43 |   # no arcs from a node to itself.
44 |   if (any(diag(amat) != 0))
45 |     stop("the elements on the diagonal must be zero.")
46 | 
47 |   return(amat)
48 | 
49 | }#CHECK.AMAT
50 | 
51 | 


--------------------------------------------------------------------------------
/src/core/allocations.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "allocations.h"
 3 | 
 4 | void *Calloc1D(size_t R, size_t size) {
 5 | 
 6 | void *p = NULL;
 7 | 
 8 |   if (R == 0)
 9 |     return NULL;
10 | 
11 |   p = calloc(R, size);
12 | 
13 |   if (!p)
14 |     error("unable to allocate a %llu array.", (unsigned long long)R);
15 | 
16 |   return p;
17 | 
18 | }/*CALLOC1D*/
19 | 
20 | void *Realloc1D(void *p, size_t R, size_t size) {
21 | 
22 |   p = realloc(p, R * size);
23 | 
24 |   if (!p)
25 |     error("unable to reallocate a %llu array.", (unsigned long long)R);
26 | 
27 |   return p;
28 | 
29 | }/*REALLOC1D*/
30 | 
31 | void BN_Free1D(void *p) {
32 | 
33 |   free(p);
34 | 
35 | }/*FREE1D*/
36 | 
37 | void **Calloc2D(size_t R, size_t C, size_t size) {
38 | 
39 | void **p = NULL;
40 | 
41 |   /* no corner cases, both dimensions required to be positive. */
42 |   if ((R == 0) || (C == 0))
43 |     error("trying to allocate a %llux%llu two-dimensional array.",
44 |         (unsigned long long)R, (unsigned long long)C);
45 | 
46 |   p = Calloc1D(R, sizeof(void *));
47 | 
48 |   for (int i = 0; i < R; i++)
49 |     p[i] = Calloc1D(C, size);
50 | 
51 |   return p;
52 | 
53 | }/*CALLOC2D*/
54 | 
55 | void BN_Free2D(void **p, size_t R) {
56 | 
57 |   for (int i = 0; i < R; i++)
58 |     free(p[i]);
59 |   free(p);
60 | 
61 | }/*FREE2D*/
62 | 
63 | void ***Calloc3D(size_t R, size_t C, size_t L, size_t size) {
64 | 
65 | void ***p = NULL;
66 | 
67 |   /* no corner cases, all three dimensions required to be positive. */
68 |   if ((R == 0) || (C == 0) || (L == 0))
69 |     error("trying to allocate a %llux%llux%llu three-dimensional array.",
70 |         (unsigned long long)R, (unsigned long long)C, (unsigned long long)L);
71 | 
72 |   p = Calloc1D(R, sizeof(void *));
73 |   for (int i = 0; i < R; i++)
74 |     p[i] = Calloc2D(C, L, size);
75 | 
76 |   return p;
77 | 
78 | }/*CALLOC3D*/
79 | 
80 | void BN_Free3D(void ***p, size_t R, size_t C) {
81 | 
82 |   for (int i = 0; i < R; i++)
83 |     BN_Free2D(p[i], C);
84 |   free(p);
85 | 
86 | }/*FREE3D*/
87 | 
88 | 


--------------------------------------------------------------------------------
/R/sanitization-classifiers.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # sanitize the extra arguments passed to Bayesian classifiers.
 3 | check.classifier.args = function(method, data, training, explanatory,
 4 |     extra.args) {
 5 | 
 6 |   # check the label of the mutual information estimator.
 7 |   if (has.argument(method, "estimator", learning.extra.args))
 8 |     extra.args[["estimator"]] =
 9 |       check.mi.estimator(extra.args[["estimator"]], data = data)
10 | 
11 |   # check the node to use the root of the tree (if not specified pick the first
12 |   # explanatory variable assuming natural ordering).
13 |   if (has.argument(method, "root", learning.extra.args)) {
14 | 
15 |     if (!is.null(extra.args[["root"]]))
16 |       check.nodes(extra.args[["root"]], graph = explanatory, max.nodes = 1)
17 |     else
18 |       extra.args[["root"]] = explanatory[1]
19 | 
20 |   }#THEN
21 | 
22 |   # warn about and remove unused arguments.
23 |   extra.args = check.unused.args(extra.args, learning.extra.args[[method]])
24 | 
25 |   return(extra.args)
26 | 
27 | }#CHECK.CLASSIFIER.ARGS
28 | 
29 | # check a prior distribution against the observed variable.
30 | check.classifier.prior = function(prior, training) {
31 | 
32 |   if (missing(prior) || is.null(prior)) {
33 | 
34 |     # use the empirical probabilities in the fitted network, or a flat prior
35 |     # as a last resort.
36 |     if (is(training, c("bn.fit.dnode", "bn.fit.onode")))
37 |       prior = training$prob
38 |     else
39 |       prior = rep(1, nlevels(training))
40 | 
41 |   }#THEN
42 |   else {
43 | 
44 |     if (is(training, c("bn.fit.dnode", "bn.fit.onode")))
45 |       nlvls = dim(training$prob)[1]
46 |     else
47 |       nlvls = nlevels(training)
48 | 
49 |     if (length(prior) != nlvls)
50 |       stop("the prior distribution and the training variable have a different number of levels.")
51 |     if (!is.nonnegative.vector(prior))
52 |       stop("the prior distribution must be expressed as a probability vector.")
53 | 
54 |     # make sure the prior probabilities sum to one.
55 |     prior = prior / sum(prior)
56 | 
57 |   }#ELSE
58 | 
59 |   return(prior)
60 | 
61 | }#CHECK.CLASSIFIER.PRIOR
62 | 
63 | 


--------------------------------------------------------------------------------
/src/tests/omnibus/custom.test.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../tests.h"
 3 | 
 4 | double custom_test_function(SEXP x, SEXP y, SEXP z, SEXP data, SEXP custom_fn,
 5 |     SEXP custom_args, double *pvalue) {
 6 | 
 7 | double statistic = 0, temp_pvalue = 0;
 8 | SEXP call, args_iterator, result;
 9 | 
10 |   /* allocate and populate the pairlist to be valuated. */
11 |   PROTECT(args_iterator = call = allocLang(6));
12 |   /* first slot, the function name. */
13 |   SETCAR(args_iterator, custom_fn);
14 |   args_iterator = CDR(args_iterator);
15 |   /* second slot, the label of the first node. */
16 |   SETCAR(args_iterator, x);
17 |   args_iterator = CDR(args_iterator);
18 |   /* third slot, the label of the second node. */
19 |   SETCAR(args_iterator, y);
20 |   args_iterator = CDR(args_iterator);
21 |   /* fourth slot, the labels of the conditioning set (NULL if empty). */
22 |   SETCAR(args_iterator, z);
23 |   args_iterator = CDR(args_iterator);
24 |   /* fifth slot, the data. */
25 |   SETCAR(args_iterator, data);
26 |   args_iterator = CDR(args_iterator);
27 |   /* sixth slot, the optional arguments passed as a list. */
28 |   SETCAR(args_iterator, custom_args);
29 |   /* evaluate the custom score function. */
30 |   PROTECT(result = eval(call, R_GlobalEnv));
31 | 
32 |   /* the return value must be a scalar, real number. */
33 |   if ((TYPEOF(result) != REALSXP) || (length(result) != 2))
34 |     error("the test for nodes %s and %s must return two scalar, real values.",
35 |       CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0)));
36 | 
37 |   /* the second element of the return value is a p-value. */
38 |   temp_pvalue = REAL(result)[1];
39 |   if (ISNAN(temp_pvalue))
40 |     error("the test for nodes %s and %s has a NA p-value.",
41 |         CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0)));
42 |   if ((temp_pvalue < 0) || (temp_pvalue > 1))
43 |     error("the test for nodes %s and %s has a p-value not in [0, 1].",
44 |         CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0)));
45 | 
46 |   statistic = REAL(result)[0];
47 |   *pvalue = temp_pvalue;
48 | 
49 |   UNPROTECT(2);
50 | 
51 |   return statistic;
52 | 
53 | }/*CUSTOM_TEST_FUNCTION*/
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/man/gRain.Rd:
--------------------------------------------------------------------------------
 1 | \name{gRain integration}
 2 | \alias{gRain integration}
 3 | \alias{as.bn.fit}
 4 | \alias{as.bn.fit.grain}
 5 | \alias{as.bn.grain}
 6 | \alias{as.grain}
 7 | \alias{as.grain.bn.fit}
 8 | \alias{as.grain.bn}
 9 | \title{Import and export networks from the gRain package}
10 | \description{
11 | 
12 |   Convert \code{bn.fit} objects to \code{grain} objects and vice versa.
13 | 
14 | }
15 | \usage{
16 | \method{as.bn.fit}{grain}(x, including.evidence = FALSE, ...)
17 | \method{as.grain}{bn.fit}(x)
18 | \method{as.bn}{grain}(x, ...)
19 | }
20 | \arguments{
21 |   \item{x}{an object of class \code{grain(code)} (for \code{as.bn.fit}) or
22 |     \code{bn.fit()} (for \code{as.grain}).}
23 |   \item{including.evidence}{a boolean value. If \code{FALSE}, the \code{grain}
24 |     object is converted without considering any evidence that has been set into
25 |     it. If \code{TRUE}, any hard evidence is carried over into the \code{bn.fit}
26 |     object as a zero-one probability distribution.}
27 |   \item{\dots}{extra arguments from the generic method (currently ignored).}
28 | }
29 | \note{
30 | 
31 |   Conditional probability tables in \code{grain} objects must be completely
32 |   specified; on the other hand, \code{bn.fit} allows \code{NaN} values for
33 |   unobserved parents' configurations. Such \code{bn.fit} objects will be
34 |   converted to $m$ \code{grain} objects by replacing the missing conditional
35 |   probability distributions with uniform distributions.
36 | 
37 |   Another solution to this problem is to fit another \code{bn.fit} with
38 |   \code{method = "bayes"} and a low \code{iss} value, using the same data
39 |   and network structure.
40 | 
41 |   Ordinal nodes will be treated as categorical by \code{as.grain},
42 |   disregarding the ordering of the levels.
43 | 
44 | }
45 | \value{
46 | 
47 |   An object of class \code{grain} (for \code{as.grain}), \code{bn.fit} (for
48 |   \code{as.bn.fit}) or \code{bn} (for \code{as.bn}).
49 | 
50 | }
51 | \examples{
52 | \dontrun{
53 | library(gRain)
54 | a = bn.fit(hc(learning.test), learning.test)
55 | b = as.grain(a)
56 | c = as.bn.fit(b)}}
57 | \author{Marco Scutari}
58 | \keyword{utilities}
59 | \keyword{interfaces to other packages}
60 | 


--------------------------------------------------------------------------------
/man/bf.Rd:
--------------------------------------------------------------------------------
 1 | \name{BF}
 2 | \alias{BF}
 3 | \title{Bayes factor between two network structures}
 4 | \description{
 5 | 
 6 |   Compute the Bayes factor between the structures of two Bayesian networks..
 7 | 
 8 | }
 9 | \usage{
10 | BF(num, den, data, score, ..., log = TRUE)
11 | }
12 | \arguments{
13 |   \item{num, den}{two objects of class \code{bn}, corresponding to the numerator
14 |     and the denominator models in the Bayes factor.}
15 |   \item{data}{a data frame containing the data to be used to compute the Bayes
16 |     factor.}
17 |   \item{score}{a character string, the label of a posterior network score or
18 |     \code{custom} for the custom score. If none is specified, the default score
19 |     is the \emph{Bayesian Dirichlet equivalent} score (\code{bde}) for discrete
20 |     networks and the \emph{Bayesian Gaussian score} (\code{bge}) for Gaussian
21 |     networks. Other kinds of Bayesian networks are not currently supported.}
22 |   \item{\dots}{extra tuning arguments for the posterior scores. See
23 |     \code{\link{score}} for details.}
24 |   \item{log}{a boolean value. If \code{TRUE} the Bayes factor is given as
25 |     log(BF).}
26 | }
27 | \note{
28 | 
29 |   The Bayes factor for two network structures, by definition, is the ratio of
30 |   the respective marginal likelihoods which is equivalent to the ration of
31 |   the corresponding posterior probabilities if we assume the \code{uniform}
32 |   prior over all possible DAGs. However, note that it is possible to specify
33 |   different priors using the \dQuote{\code{...}} arguments of \code{BF()}; in
34 |   that case the value returned by the function will not be the classic Bayes
35 |   factor.
36 | 
37 | }
38 | \value{
39 | 
40 |   A single numeric value, the Bayes factor of the two network structures
41 |   \code{num} and \code{den}.
42 | 
43 | }
44 | \examples{
45 | data(learning.test)
46 | 
47 | dag1 = model2network("[A][B][F][C|B][E|B][D|A:B:C]")
48 | dag2 = model2network("[A][C][B|A][D|A][E|D][F|A:C:E]")
49 | BF(dag1, dag2, learning.test, score = "bds", iss = 1)
50 | }
51 | \seealso{\code{\link{score}}, \code{\link{compare}},
52 |   \code{\link{bf.strength}}.}
53 | \author{Marco Scutari}
54 | \keyword{network scores}
55 | \keyword{structure learning}
56 | 


--------------------------------------------------------------------------------
/R/frontend-queries.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # perform conditional probability queries.
 3 | cpquery = function(fitted, event, evidence, cluster = NULL, method = "ls", ...,
 4 |     debug = FALSE) {
 5 | 
 6 |   check.fit(fitted)
 7 |   check.logical(debug)
 8 |   # check event and evidence.
 9 |   if (missing(event))
10 |     stop("the expression describing the event is missing.")
11 |   if (missing(evidence))
12 |     stop("the expression describing the evidence is missing.")
13 |   # check the generation method.
14 |   check.label(method, choices = cpq.algorithms, labels = cpq.labels,
15 |     argname = "query method", see = "cpquery")
16 |   # check the cluster.
17 |   cluster = check.cluster(cluster)
18 | 
19 |   if (!is.null(cluster)) {
20 | 
21 |     # disable debugging, the slaves do not cat() here.
22 |     if (debug) {
23 | 
24 |       warning("disabling debugging output for parallel computing.")
25 |       debug = FALSE
26 | 
27 |     }#THEN
28 | 
29 |   }#THEN
30 | 
31 |   extra.args = check.cpq.args(fitted = fitted, event = NULL,
32 |                  extra.args = list(...), method = method, action = "cpquery")
33 | 
34 |   # deparse the expression for the event before passing it to
35 |   # the backend and beyond.
36 |   event = substitute(event)
37 | 
38 |   # recheck event and evidence expression after deparsing.
39 |   if (!(is.language(event) || identical(event, TRUE)))
40 |     stop("event must be an unevaluated expression or TRUE.")
41 |   if (method == "ls") {
42 | 
43 |     if (missing(evidence))
44 |       stop("the expression describing the evidence is missing.")
45 | 
46 |     # deparse evidence expression before passing it to the backend and beyond.
47 |     evidence = substitute(evidence)
48 |     # recheck event and evidence expression after deparsing.
49 |     if (!(is.language(evidence) || identical(evidence, TRUE)))
50 |       stop("evidence must be an unevaluated expression or TRUE.")
51 | 
52 |   }#THEN
53 |   else if (method == "lw") {
54 | 
55 |     evidence = check.evidence(evidence, fitted)
56 | 
57 |   }#THEN
58 | 
59 |   # special-case pointless queries.
60 |   if (isTRUE(event))
61 |     return(1)
62 | 
63 |   conditional.probability.query(fitted = fitted, event = event,
64 |     evidence = evidence, method = method, extra = extra.args, cluster = cluster,
65 |     debug = debug)
66 | 
67 | }#CPQUERY
68 | 
69 | 


--------------------------------------------------------------------------------
/src/bnlearn/nparams.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/allocations.h"
 3 | #include "../minimal/common.h"
 4 | #include "../math/linear.algebra.h"
 5 | 
 6 | /* get the number of parameters of the whole network (mixed case, also handles
 7 |  * discrete and Gaussian networks). */
 8 | SEXP nparams_cgnet(SEXP graph, SEXP data, SEXP debug) {
 9 | 
10 | int i = 0, j = 0, nnodes = 0;
11 | int *nlevels = NULL, *index = NULL, ngp = 0;
12 | double nconfig = 0, node_params = 0, all_params = 0;
13 | bool debugging = isTRUE(debug);
14 | SEXP nodes = R_NilValue, node_data, parents, temp;
15 | 
16 |   /* get nodes' number and data. */
17 |   node_data = getListElement(graph, "nodes");
18 |   nnodes = length(node_data);
19 |   PROTECT(nodes = getAttrib(node_data, R_NamesSymbol));
20 |   /* cache the number of levels of each variables (zero = continuous). */
21 |   nlevels = Calloc1D(nnodes, sizeof(int));
22 |   for (i = 0; i < nnodes; i++) {
23 | 
24 |     temp = VECTOR_ELT(data, i);
25 |     if (TYPEOF(temp) == INTSXP)
26 |       nlevels[i] = NLEVELS(temp);
27 | 
28 |   }/*FOR*/
29 | 
30 |   for (i = 0; i < nnodes; i++) {
31 | 
32 |     /* extract the parents of the node and match them. */
33 |     parents = getListElement(VECTOR_ELT(node_data, i), "parents");
34 |     PROTECT(temp = match(nodes, parents, 0));
35 |     index = INTEGER(temp);
36 | 
37 |     /* compute the number of regressors and of configurations. */
38 |     for (j = 0, ngp = 0, nconfig = 1; j < length(parents); j++) {
39 | 
40 |       if (nlevels[index[j] - 1] == 0)
41 |         ngp++;
42 |       else
43 |         nconfig *= nlevels[index[j] - 1];
44 | 
45 |     }/*FOR*/
46 |     /* compute the overall number of parameters as regressors plus intercept
47 |      * and standard error (if continuous) or the number of levels - 1 (if
48 |      * discrete) times the configurations of the discrete parents. */
49 |     node_params = nconfig * (nlevels[i] == 0 ? ngp + 2 : nlevels[i] - 1);
50 | 
51 |     if (debugging)
52 |       Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params);
53 | 
54 |     /* update the return value. */
55 |     all_params += node_params;
56 | 
57 |     UNPROTECT(1);
58 | 
59 |   }/*FOR*/
60 | 
61 |   Free1D(nlevels);
62 |   UNPROTECT(1);
63 | 
64 |   return ScalarReal(all_params);
65 | 
66 | }/*NPARAMS_CGNET*/
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/man/foreign.Rd:
--------------------------------------------------------------------------------
 1 | \name{foreign files utilities}
 2 | \alias{read.bif}
 3 | \alias{write.bif}
 4 | \alias{read.dsc}
 5 | \alias{write.dsc}
 6 | \alias{read.net}
 7 | \alias{write.net}
 8 | \alias{write.dot}
 9 | \title{Read and write BIF, NET, DSC and DOT files}
10 | \description{
11 | 
12 |   Read networks saved from other programs into \code{bn.fit} objects, and dump
13 |   \code{bn} and \code{bn.fit} objects into files for other programs to read.
14 | 
15 | }
16 | \usage{
17 | # Old (non-XML) Bayesian Interchange format.
18 | read.bif(file, debug = FALSE)
19 | write.bif(file, fitted)
20 | 
21 | # Microsoft Interchange format.
22 | read.dsc(file, debug = FALSE)
23 | write.dsc(file, fitted)
24 | 
25 | # HUGIN flat network format.
26 | read.net(file, debug = FALSE)
27 | write.net(file, fitted)
28 | 
29 | # Graphviz DOT format.
30 | write.dot(file, graph)
31 | }
32 | \arguments{
33 |   \item{file}{a connection object or a character string.}
34 |   \item{fitted}{an object of class \code{bn.fit}.}
35 |   \item{graph}{an object of class \code{bn} or \code{bn.fit}.}
36 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
37 |     printed; otherwise the function is completely silent.}
38 | }
39 | \note{
40 | 
41 |   All the networks present in the Bayesian Network Repository have associated
42 |   BIF, DSC and NET files that can be imported with \code{read.bif()},
43 |   \code{read.dsc()} and \code{read.net()}.
44 | 
45 |   HUGIN can import and export NET files; Netica can read (but not write) DSC
46 |   files; and GeNIe can read and write both DSC and NET files.
47 | 
48 |   DOT files can be read by Graphviz, Gephi and a variety of other programs.
49 | 
50 |   Please note that these functions work on a "best effort" basis, as the parsing
51 |   of these formats have been implemented by reverse engineering the file format
52 |   from publicly available examples.
53 | 
54 | }
55 | \value{
56 | 
57 |   \code{read.bif()}, \code{read.dsc()} and \code{read.net()} return an object of class
58 |   \code{bn.fit}.
59 | 
60 |   \code{write.bif()}, \code{write.dsc()}, \code{write.net()} and \code{write.dot()}
61 |   return \code{NULL} invisibly.
62 | 
63 | }
64 | \references{
65 | 
66 |   Bayesian Network Repository, \url{https://www.bnlearn.com/bnrepository/}.
67 | 
68 | }
69 | \author{Marco Scutari}
70 | \keyword{import/export to file}
71 | 


--------------------------------------------------------------------------------
/R/averaged.network.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # model averaging for bootstrapped network structures.
 3 | averaged.network.backend = function(strength, threshold) {
 4 | 
 5 |   nodes = attr(strength, "nodes")
 6 |   e = empty.graph(nodes)
 7 | 
 8 |   # arcs with a strength of one should always be selected, regardless of
 9 |   # the threshold.
10 |   significant = (strength$strength > threshold) | (strength$strength == 1)
11 | 
12 |   # filter also the direction if present in the bn.strength object.
13 |   if ("direction" %in% names(strength))
14 |     significant = significant & (strength$direction >= 0.5)
15 | 
16 |   # nothing to see, move along.
17 |   if (!any(significant))
18 |     return(e)
19 | 
20 |   candidate.arcs = as.matrix(strength[significant, c("from", "to"), drop = FALSE])
21 | 
22 |   if (all(which.undirected(candidate.arcs))) {
23 | 
24 |     # update the arcs of the network, no cycles.
25 |     e$arcs = candidate.arcs
26 | 
27 |   }#THEN
28 |   else {
29 | 
30 |     # update the arcs of the network, minding cycles.
31 |     e$arcs = .Call(call_smart_network_averaging,
32 |                    arcs = candidate.arcs,
33 |                    nodes = nodes,
34 |                    weights = strength$strength[significant])
35 | 
36 |   }#ELSE
37 | 
38 |   # update the network structure.
39 |   e$nodes = cache.structure(nodes, arcs = e$arcs)
40 |   # add back illegal arcs, so that cpdag() works correctly.
41 |   if ("illegal" %in% names(attributes(strength)))
42 |     e$learning$illegal = attr(strength, "illegal")
43 | 
44 |   return(e)
45 | 
46 | }#AVERAGED.NETWORK.BACKEND
47 | 
48 | # compute the significance threshold for Friedman's confidence.
49 | threshold = function(strength, method = "l1") {
50 | 
51 |   # do not blow up with graphs with only 1 node.
52 |   if (nrow(strength) == 0)
53 |     return(0)
54 | 
55 |   e = ecdf(strength$strength)
56 |   u = knots(e)
57 | 
58 |   if (method == "l1") {
59 | 
60 |     norm = function(p)
61 |       sum( diff(unique(c(0, u, 1))) * abs(e(unique(c(0, u[u < 1]))) - p))
62 | 
63 |   }#THEN
64 | 
65 |   p0 = optimize(f = norm, interval = c(0, 1))$minimum
66 | 
67 |   # double-check the boundaries, they are legal solutions but optimize() does
68 |   # not check them.
69 |   if (norm(1) < norm(p0))
70 |     p0 = 1
71 |   if (norm(0) < norm(p0))
72 |     p0 = 0
73 | 
74 |   quantile(strength$strength, p0, type = 1, names = FALSE)
75 | 
76 | }#THRESHOLD
77 | 
78 | 


--------------------------------------------------------------------------------
/man/bn.fit.plots.Rd:
--------------------------------------------------------------------------------
 1 | \name{bn.fit plots}
 2 | \alias{bn.fit plots}
 3 | \alias{bn.fit.qqplot}
 4 | \alias{bn.fit.xyplot}
 5 | \alias{bn.fit.histogram}
 6 | \alias{bn.fit.barchart}
 7 | \alias{bn.fit.dotplot}
 8 | \title{Plot fitted Bayesian networks}
 9 | \description{
10 | 
11 |   Plot functions for the \code{bn.fit}, \code{bn.fit.dnode} and
12 |   \code{bn.fit.gnode} classes, based on the \pkg{lattice} package.
13 | 
14 | }
15 | \usage{
16 | ## for Gaussian Bayesian networks.
17 | bn.fit.qqplot(fitted, xlab = "Theoretical Quantiles",
18 |   ylab = "Sample Quantiles", main, ...)
19 | bn.fit.histogram(fitted, density = TRUE, xlab = "Residuals",
20 |   ylab = ifelse(density, "Density", ""), main, ...)
21 | bn.fit.xyplot(fitted, xlab = "Fitted values", ylab = "Residuals", main, ...)
22 | ## for discrete (multinomial and ordinal) Bayesian networks.
23 | bn.fit.barchart(fitted, xlab = "Probabilities", ylab = "Levels", main, ...)
24 | bn.fit.dotplot(fitted, xlab = "Probabilities", ylab = "Levels", main, ...)
25 | }
26 | \arguments{
27 |   \item{fitted}{an object of class \code{bn.fit}, \code{bn.fit.dnode} or
28 |     \code{bn.fit.gnode}.}
29 |   \item{xlab, ylab, main}{the label of the x axis, of the y axis, and the plot
30 |     title.}
31 |   \item{density}{a boolean value. If \code{TRUE} the histogram is plotted using
32 |     relative frequencies, and the matching normal density is added to the plot.}
33 |   \item{\dots}{additional arguments to be passed to \pkg{lattice} functions.}
34 | }
35 | \details{
36 | 
37 |   \code{bn.fit.qqplot()} draws a quantile-quantile plot of the residuals.
38 | 
39 |   \code{bn.fit.histogram()} draws a histogram of the residuals, using either
40 |     absolute or relative frequencies.
41 | 
42 |   \code{bn.fit.xyplot()} plots the residuals versus the fitted values.
43 | 
44 |   \code{bn.fit.barchart()} and \code{bn.fit.dotplot} plot the probabilities in
45 |     the conditional probability table associated with each node.
46 | 
47 | }
48 | \value{
49 | 
50 |   The \pkg{lattice} plot objects. Note that if auto-printing is turned off (for
51 |   example when the code is loaded with the \code{source} function), the return
52 |   value must be printed explicitly for the plot to be displayed.
53 | 
54 | }
55 | \seealso{
56 |   \code{\link{bn.fit}}, \code{\link{bn.fit class}}.
57 | }
58 | \author{Marco Scutari}
59 | \keyword{interfaces to other packages}
60 | \keyword{plots}
61 | 


--------------------------------------------------------------------------------
/src/tests/discrete/df.adjust.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../tests.h"
 3 | 
 4 | double discrete_df(test_e test, int *ni, int llx, int *nj, int lly) {
 5 | 
 6 | int i = 0, j = 0, alx = 0, aly = 0;
 7 | double df = 0;
 8 | 
 9 |   switch(test) {
10 | 
11 |     /* usual degrees of freedom. */
12 |     case MI:
13 |     case MI_SH:
14 |     case X2:
15 |       df = (llx - 1) * (lly - 1);
16 |       break;
17 | 
18 |     /* adjust degrees of freedom: zeroes are considered structural if they
19 |      * are part of a column or a row with a zero marginal, the rest are
20 |      * considered sampling zeros. */
21 |     case MI_ADF:
22 |     case X2_ADF:
23 |       for (i = 0; i < llx; i++)
24 |         alx += (ni[i] > 0);
25 |       for (j = 0; j < lly; j++)
26 |         aly += (nj[j] > 0);
27 | 
28 |       /* ensure the degrees of freedom will not be negative. */
29 |       alx = (alx >= 1) ? alx : 1;
30 |       aly = (aly >= 1) ? aly : 1;
31 | 
32 |       df = (alx - 1) * (aly - 1);
33 | 
34 |       break;
35 | 
36 |     default:
37 |       error("no degrees of freedom for this test.");
38 | 
39 |   }/*SWITCH*/
40 | 
41 |   return df;
42 | 
43 | }/*DISCRETE_DF*/
44 | 
45 | double discrete_cdf(test_e test, int **ni, int llx, int **nj, int lly, int llz) {
46 | 
47 | int i = 0, j = 0, k = 0, alx = 0, aly = 0;
48 | double df = 0;
49 | 
50 |   switch(test) {
51 | 
52 |     /* usual degrees of freedom. */
53 |     case MI:
54 |     case MI_SH:
55 |     case X2:
56 |       df = (llx - 1) * (lly - 1) * llz;
57 |       break;
58 | 
59 |     /* adjust degrees of freedom: zeroes are considered structural if they
60 |      * are part of a column or a row with a zero marginal, the rest are
61 |      * considered sampling zeros. */
62 |     case MI_ADF:
63 |     case X2_ADF:
64 |       for (k = 0; k < llz; k++) {
65 | 
66 |         alx = aly = 0;
67 | 
68 |         for (i = 0; i < llx; i++)
69 |           alx += (ni[k][i] > 0);
70 |         for (j = 0; j < lly; j++)
71 |           aly += (nj[k][j] > 0);
72 | 
73 |         /* ensure the degrees of freedom will not be negative. */
74 |         alx = (alx >= 1) ? alx : 1;
75 |         aly = (aly >= 1) ? aly : 1;
76 | 
77 |         df += (alx - 1) * (aly - 1);
78 | 
79 |       }/*FOR*/
80 | 
81 |       break;
82 | 
83 |     default:
84 |       error("no degrees of freedom for this test.");
85 | 
86 |   }/*SWITCH*/
87 | 
88 |   return df;
89 | 
90 | }/*DISCRETE_CDF*/
91 | 
92 | 


--------------------------------------------------------------------------------
/man/bnboot.Rd:
--------------------------------------------------------------------------------
 1 | \name{bn.boot}
 2 | \alias{bn.boot}
 3 | \title{Nonparametric bootstrap of Bayesian networks}
 4 | \description{
 5 | 
 6 |   Apply a user-specified function to the Bayesian network structures learned
 7 |   from bootstrap samples of the original data.
 8 | 
 9 | }
10 | \usage{
11 | bn.boot(data, statistic, R = 200, m = nrow(data), algorithm,
12 |   algorithm.args = list(), statistic.args = list(), cluster,
13 |   debug = FALSE)
14 | }
15 | \arguments{
16 |   \item{data}{a data frame containing the variables in the model.}
17 |   \item{statistic}{a function or a character string (the name of a function)
18 |     to be applied to each bootstrap replicate.}
19 |   \item{R}{a positive integer, the number of bootstrap replicates.}
20 |   \item{m}{a positive integer, the size of each bootstrap replicate.}
21 |   \item{algorithm}{a character string, the learning algorithm to be applied
22 |     to the bootstrap replicates. See \code{\link{structure learning}} and the
23 |     documentation of each algorithm for details.}
24 |   \item{algorithm.args}{a list of extra arguments to be passed to the learning
25 |     algorithm.}
26 |   \item{statistic.args}{a list of extra arguments to be passed to the function
27 |     specified by \code{statistic}.}
28 |   \item{cluster}{an optional cluster object from package \pkg{parallel}.}
29 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
30 |     printed; otherwise the function is completely silent.}
31 | }
32 | \details{
33 | 
34 |   The first argument of \code{statistic} is the \code{bn} object encoding the
35 |   network structure learned from the bootstrap sample; the arguments specified
36 |   in \code{statistics.args} are extracted from the list and passed to
37 |   \code{statistics} as the 2nd, 3rd, etc. arguments.
38 | 
39 | }
40 | \value{
41 | 
42 |   A list containing the results of the calls to \code{statistic}.
43 | 
44 | }
45 | \references{
46 | 
47 |   Friedman N, Goldszmidt M, Wyner A (1999). "Data Analysis with Bayesian
48 |     Networks: A Bootstrap Approach." \emph{Proceedings of the 15th Annual
49 |     Conference on Uncertainty in Artificial Intelligence}, 196--201.
50 | 
51 | }
52 | \examples{
53 | \dontrun{
54 | data(learning.test)
55 | bn.boot(data = learning.test, R = 2, m = 500, algorithm = "gs",
56 |   statistic = arcs)
57 | }
58 | }
59 | \author{Marco Scutari}
60 | \seealso{\code{\link{bn.cv}}, \code{\link{rbn}}.}
61 | \keyword{resampling}
62 | 


--------------------------------------------------------------------------------
/man/bn.kcv.class.Rd:
--------------------------------------------------------------------------------
 1 | \name{bn.kcv class}
 2 | \alias{bn.kcv class}
 3 | \alias{bn.kcv-class}
 4 | \alias{bn.kcv.list class}
 5 | \alias{bn.kcv.list-class}
 6 | \title{The bn.kcv class structure}
 7 | \description{
 8 | 
 9 |   The structure of an object of S3 class \code{bn.kcv} or \code{bn.kcv.list}.
10 | 
11 | }
12 | \details{
13 | 
14 |   An object of class \code{bn.kcv.list} is a list whose elements are objects
15 |   of class \code{bn.kcv}.
16 | 
17 |   An object of class \code{bn.kcv} is a list whose elements correspond to the
18 |   iterations of a k-fold cross-validation. Each element contains the following
19 |   objects:
20 | 
21 |   \itemize{
22 | 
23 |     \item \code{test}: an integer vector, the indexes of the observations
24 |       used as a test set.
25 |     \item \code{fitted}: an object of class \code{bn.fit}, the Bayesian network
26 |       fitted from the training set.
27 |     \item \code{learning}: the \code{learning} element of the \code{bn} object
28 |       that was used for parameter learning from the training set (either learned
29 |       from the training set as well or specified by the user).
30 |     \item \code{loss}: the value of the loss function (for
31 |       \code{method = "hold-out"} or \code{loss = "logl"}), or \code{NA}
32 |       (otherwise).
33 | 
34 |   }
35 | 
36 |   If the loss function requires to predict values from the test sets, each
37 |   element also contains:
38 | 
39 |   \itemize{
40 | 
41 |     \item \code{predicted}: a factor or a numeric vector, the predicted values
42 |       for the target node in the test set.
43 |     \item \code{observed}: a factor or a numeric vector, the observed values
44 |       for the target node in the test set.
45 | 
46 |   }
47 | 
48 |   In addition, an object of class \code{bn.kcv} has the following attributes:
49 | 
50 |   \itemize{
51 | 
52 |     \item \code{loss}: a character string, the label of the loss function.
53 |     \item \code{mean}: the mean of the values of the loss function computed in
54 |       the \code{k} iterations of the cross-validation, which is printed as the
55 |       "expected loss" or averaged to compute the "average loss over the runs".
56 |     \item \code{bn}: either a character string (the label of the learning
57 |       algorithm to be applied to the training data in each iteration) or an
58 |       object of class \code{bn} (a fixed network structure).
59 | 
60 |   }
61 | 
62 | }
63 | \author{Marco Scutari}
64 | \keyword{classes}
65 | 


--------------------------------------------------------------------------------
/R/classifiers.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # backend of the naive Bayes classifier.
 3 | naive.bayes.backend = function(data, training, explanatory) {
 4 | 
 5 |   # cache the node set.
 6 |   nodes = c(training, explanatory)
 7 |   # create the empty graph.
 8 |   res = empty.graph(nodes)
 9 |   # create the set of arcs outgoing from the training variable.
10 |   res$arcs = matrix(c(rep(training, length(explanatory)), explanatory),
11 |                ncol = 2, byrow = FALSE)
12 |   # update the network structure.
13 |   res$nodes = cache.structure(nodes, arcs = res$arcs)
14 |   # set a second class "bn.naive" to reroute method dispatch as needed.
15 |   class(res) = c("bn.naive", "bn")
16 | 
17 |   return(res)
18 | 
19 | }#NAIVE.BAYES.BACKEND
20 | 
21 | # backend of the TAN algorithm.
22 | tan.backend = function(data, training, explanatory, whitelist, blacklist, mi,
23 |     root, debug = FALSE) {
24 | 
25 |   # cache the node set.
26 |   nodes = c(training, explanatory)
27 |   # create the empty graph.
28 |   res = empty.graph(nodes)
29 |   # create the set of arcs outgoing from the training variable.
30 |   class.arcs = matrix(c(rep(training, length(explanatory)), explanatory),
31 |                  ncol = 2, byrow = FALSE)
32 | 
33 |   # separate features and target class variable in data and metadata.
34 |   features.data =
35 |     .data.frame.column(data, explanatory, drop = FALSE, keep.names = TRUE)
36 |   attr(features.data, "metadata") = collect.metadata(features.data)
37 |   class.data = .data.frame.column(data, training, drop = TRUE)
38 | 
39 |   # call chow-liu to build the rest of the network.
40 |   chow.liu.arcs =
41 |     chow.liu.backend(x = features.data, nodes = explanatory, estimator = mi,
42 |       whitelist = whitelist, blacklist = blacklist, conditional = class.data,
43 |       debug = debug)
44 | 
45 |   # set the directions of the arcs in the Chow-Liu tree.
46 |   chow.liu.arcs = .Call(call_tree_directions,
47 |                         arcs = chow.liu.arcs,
48 |                         nodes = explanatory,
49 |                         root = root,
50 |                         debug = FALSE)
51 | 
52 |   # merge learned and predetermined arcs.
53 |   res$arcs = arcs.rbind(class.arcs, chow.liu.arcs)
54 |   # update the network structure.
55 |   res$nodes = cache.structure(nodes, arcs = res$arcs)
56 |   # set a second class "bn.tan" to reroute method dispatch as needed.
57 |   class(res) = c("bn.tan", "bn")
58 | 
59 |   return(res)
60 | 
61 | }#TAN.BACKEND
62 | 
63 | 


--------------------------------------------------------------------------------
/man/count.graphs.Rd:
--------------------------------------------------------------------------------
 1 | \name{graph enumeration}
 2 | \alias{count.graphs}
 3 | \alias{graph enumeration}
 4 | \title{Count graphs with specific characteristics}
 5 | \description{
 6 |   Count directed acyclic graphs of various sizes and/or with specific
 7 |   characteristics.
 8 | }
 9 | \usage{
10 | count.graphs(type = "all.dags", ..., debug = FALSE)
11 | }
12 | \arguments{
13 |   \item{type}{a character string, the label describing the types of graphs to
14 |     be counted (see below).}
15 |   \item{\dots}{additional parameters (see below).}
16 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
17 |     printed; otherwise the function is completely silent. Ignored in some
18 |     generation methods.}
19 | }
20 | \details{
21 | 
22 |   The types of graphs, and the associated additional parameters, are:
23 | 
24 |   \itemize{
25 | 
26 |     \item \code{"all-dags"}: all directed acyclic graphs with \code{nodes}
27 |       nodes.
28 |     \item \code{"dags-given-ordering"}: all directed acyclic graphs
29 |       with a specific topological ordering and \code{nodes} nodes.
30 |     \item \code{"dags-with-k-roots"}: all directed acyclic graphs with \code{k}
31 |       root nodes and \code{nodes} nodes.
32 |     \item \code{"dags-with-r-arcs"}: all directed acyclic graphs with \code{r}
33 |       arcs and \code{nodes} nodes.
34 |     \item \code{"dags-in-equivalence-class"}: all directed acyclic arcs in the
35 |       equivalence class described by \code{eqclass}.
36 | 
37 |   }
38 | 
39 | }
40 | \value{
41 | 
42 |   \code{count.graphs()} returns an object of class \code{bigz} from the
43 |   \pkg{gmp} package, a vector with the graph counts.
44 | 
45 | }
46 | \references{
47 | 
48 |   Harary F, Palmer EM (1973). "Graphical Enumeration." Academic Press.
49 | 
50 |   Rodionov VI (1992). "On the Number of Labeled Acyclic Digraphs."
51 |     \emph{Discrete Mathematics}, \strong{105}:319--321.
52 | 
53 |   Liskovets VA (1976). "On the Number of Maximal Vertices of a Random Acyclic
54 |     Digraph." \emph{Theory of Probability and its Applications},
55 |     \strong{20}(2):401--409.
56 | 
57 |   Wienobst M, Luttermann M, Bannach M, Liskiewicz (2023). "Efficient Enumeration
58 |     of Markov Equivalent DAGs." \emph{Proceedings of the 37th AAAI Conference on
59 |     Artificial Intelligence}, 12313--12320.
60 | 
61 | }
62 | \examples{
63 | \dontrun{
64 | count.graphs("dags.with.r.arcs", nodes = 3:6, r = 2)
65 | }}
66 | \author{Marco Scutari}
67 | \keyword{graphs}
68 | 


--------------------------------------------------------------------------------
/R/sanitization-misc.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # check labels for various arguments.
 3 | check.label = function(arg, choices, labels, argname, see) {
 4 | 
 5 |   if (!is.string(arg))
 6 |     stop("the ", argname, " must be a single character string.")
 7 | 
 8 |   if (arg %in% choices)
 9 |     return(invisible(NULL))
10 | 
11 |   # concatenate valid values, optinally with labels.
12 |   if (missing(labels)) {
13 | 
14 |     choices = paste(paste('"', choices, '"', sep = ""), collapse = ", ")
15 | 
16 |   }#THEN
17 |   else {
18 | 
19 |     labels = paste("(", labels[choices], ")", sep = "")
20 |     choices = paste('"', choices, '"', sep = "")
21 |     nl = length(labels)
22 |     choices = paste(choices, labels, collapse = ", ")
23 | 
24 |   }#ELSE
25 | 
26 |   # mention the most relevant manual page.
27 |   if (missing(see))
28 |     see = character(0)
29 |   else
30 |     see = paste(" See ?", see, " for details.", sep = "")
31 | 
32 |   # build the error message.
33 |   errmsg = paste("valid ", argname, "(s) are ", choices, ".", see, sep = "")
34 | 
35 |   # make sure the error message is not truncated if possible at all.
36 |   errlen = unlist(options("warning.length"), use.names = FALSE)
37 |   options("warning.length" = max(1000, min(8170, nchar(errmsg) + 20)))
38 |   on.exit(options("warning.length" = errlen))
39 | 
40 |   stop(errmsg)
41 | 
42 | }#CHECK.LABEL
43 | 
44 | # warn about unused arguments and remove them.
45 | check.unused.args = function(dots, used.args) {
46 | 
47 |   if (is(dots, "list"))
48 |     args = names(dots)
49 |   else
50 |     args = dots
51 | 
52 |   unused = args %!in% used.args
53 | 
54 |   if (any(unused))
55 |     warning("unused argument(s):", paste0(" '", args[unused], "'"), ".")
56 | 
57 |   # remove unused args to avoid problems later on (like conflicts in partial
58 |   # name matching).
59 |   dots = dots[used.args]
60 | 
61 |   return(dots)
62 | 
63 | }#CHECK.UNUSED.ARGS
64 | 
65 | # check whether a package is loaded.
66 | check.and.load.package = function(pkg) {
67 | 
68 |   # silence all warnings while looking for suggested packages.
69 |   warning.level  = as.numeric(options("warn"))
70 |   options("warn" = -1)
71 |   on.exit(options("warn" = warning.level))
72 | 
73 |   if (!requireNamespace(pkg))
74 |     stop("this function requires the ", pkg, " package.")
75 | 
76 | }#CHECK.AND.LOAD.PACKAGE
77 | 
78 | # reverse lookup for optional arguments.
79 | has.argument = function(label, arg, lookup) {
80 | 
81 |   arg %in% lookup[[label]]
82 | 
83 | }#HAS.ARGUMENT
84 | 


--------------------------------------------------------------------------------
/R/frontend-causal.R:
--------------------------------------------------------------------------------
 1 | # mutilated network used for interventions and in likelihood weighting.
 2 | mutilated = function(x, evidence) {
 3 | 
 4 |   check.bn.or.fit(x)
 5 |   # check the evidence, disallowing non-ideal interventions if needed.
 6 |   evidence = check.evidence(evidence, graph = x,
 7 |                 ideal.only = is(x, "bn.fit.gnet"))
 8 | 
 9 |   if (is(x, "bn")) {
10 | 
11 |     # the network must be a directed acyclic graph to treat it as causal.
12 |     if (!is.completely.directed(x))
13 |       stop("the graph is only partially directed.")
14 |     if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE))
15 |       stop("the graph contains cycles.")
16 | 
17 |     return(mutilated.backend.bn(x, evidence))
18 | 
19 |   }#THEN
20 |   else {
21 | 
22 |     return(mutilated.backend.fitted(x, evidence))
23 | 
24 |   }#ELSE
25 | 
26 | }#MUTILATED
27 | 
28 | # twin network used for counterfactuals.
29 | twin = function(x) {
30 | 
31 |   check.bn.or.fit(x)
32 | 
33 |   if (is(x, "bn.fit"))
34 |     x = bn.net(x)
35 |   else {
36 | 
37 |     # the network must be a directed acyclic graph to treat it as causal.
38 |     if (!is.completely.directed(x))
39 |       stop("the graph is only partially directed.")
40 |     if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE))
41 |       stop("the graph contains cycles.")
42 | 
43 |   }#ELSE
44 | 
45 |   twin.backend.bn(x)
46 | 
47 | }#TWIN
48 | 
49 | # perform an intervention on a network or fitted network.
50 | intervention = function(x, evidence) {
51 | 
52 |   # the mutilation encodes the intervention into the network.
53 |   mutilated(x = x, evidence = evidence)
54 | 
55 | }#INTERVENTION
56 | 
57 | # set up a counterfactual network.
58 | counterfactual = function(x, evidence, merging = TRUE) {
59 | 
60 |   check.bn.or.fit(x)
61 |   check.logical(merging)
62 | 
63 |  # check the evidence, disallowing non-ideal interventions if needed.
64 |   evidence = check.evidence(evidence, graph = x,
65 |                 ideal.only = is(x, "bn.fit.gnet"))
66 | 
67 |   if (is(x, "bn.fit"))
68 |     x = bn.net(x)
69 |   else {
70 | 
71 |     # the network must be a directed acyclic graph to treat it as causal.
72 |     if (!is.completely.directed(x))
73 |       stop("the graph is only partially directed.")
74 |     if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE))
75 |       stop("the graph contains cycles.")
76 | 
77 |   }#ELSE
78 | 
79 |   counterfactual.backend.bn(x = x, evidence = evidence, merging = merging)
80 | 
81 | }#COUNTERFACTUAL
82 | 
83 | 


--------------------------------------------------------------------------------
/src/inference/likelihood.weighting.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../include/globals.h"
 3 | #include "../core/math.functions.h"
 4 | #include "../fitted/fitted.h"
 5 | #include "../core/data.table.h"
 6 | #include "loglikelihood/loglikelihood.h"
 7 | #include "../minimal/common.h"
 8 | 
 9 | void c_lw_weights(SEXP fitted, SEXP data, int n, double *w, SEXP keep,
10 |     bool debugging) {
11 | 
12 | int i = 0, max_el = 0;
13 | double maxw = 0;
14 | fitted_bn bn = fitted_network_from_SEXP(fitted);
15 | SEXP metadata, complete_nodes, nodes_in_fitted, keep2;
16 | 
17 |   memset(w, '\0', n * sizeof(double));
18 |   /* find out which nodes to compute the log-likelihood for, zero-indexed. */
19 |   PROTECT(nodes_in_fitted = getAttrib(fitted, R_NamesSymbol));
20 |   PROTECT(keep2 = match(keep, nodes_in_fitted, 0));
21 | 
22 |   /* extract the metadata from the data. */
23 |   PROTECT(metadata = getAttrib(data, BN_MetaDataSymbol));
24 |   PROTECT(complete_nodes = getListElement(metadata, "complete.nodes"));
25 | 
26 |   if (bn.type == DNET || bn.type == ONET || bn.type == DONET) {
27 | 
28 |     ddata dt = ddata_from_SEXP(data, 0);
29 |     meta_copy_names(&(dt.m), 0, data);
30 |     meta_init_flags(&(dt.m), 0, complete_nodes, keep2);
31 | 
32 |     bysample_discrete_loglikelihood(bn, dt, w, debugging);
33 | 
34 |     FreeDDT(dt);
35 | 
36 |   }/*THEN*/
37 |   else if (bn.type == GNET) {
38 | 
39 |     gdata dt = gdata_from_SEXP(data, 0);
40 |     meta_copy_names(&(dt.m), 0, data);
41 |     meta_init_flags(&(dt.m), 0, complete_nodes, keep2);
42 | 
43 |     bysample_gaussian_loglikelihood(bn, dt, w, TRUE, debugging);
44 | 
45 |     FreeGDT(dt);
46 | 
47 |   }/*THEN*/
48 |   else if (bn.type == CGNET) {
49 | 
50 |     cgdata dt = cgdata_from_SEXP(data, 0, 0);
51 |     meta_copy_names(&(dt.m), 0, data);
52 |     meta_init_flags(&(dt.m), 0, complete_nodes, keep2);
53 | 
54 |     bysample_clgaussian_loglikelihood(bn, dt, w, TRUE, debugging);
55 | 
56 |     FreeCGDT(dt);
57 | 
58 |   }/*THEN*/
59 | 
60 |   FreeFittedBN(bn);
61 | 
62 |   UNPROTECT(4);
63 | 
64 |   /* rescale before exponentiating them into probabilities (if possible). */
65 |   max_el = d_which_max(w, n);
66 | 
67 |   if (max_el == NA_INTEGER)
68 |     return;
69 |   else if ((max_el == 1) && (w[0] == R_NegInf))
70 |     memset(w, '\0', n * sizeof(double));
71 |   else {
72 | 
73 |     maxw = w[max_el - 1];
74 |     for (i = 0; i < n; i++)
75 |       w[i] = exp(w[i] - maxw);
76 | 
77 |   }/*ELSE*/
78 | 
79 | }/*C_LW_WEIGHTS*/
80 | 
81 | 


--------------------------------------------------------------------------------
/man/modelstring.Rd:
--------------------------------------------------------------------------------
 1 | \name{model string utilities}
 2 | \alias{model string utilities}
 3 | \alias{modelstring}
 4 | \alias{modelstring<-}
 5 | \alias{model2network}
 6 | \alias{as.character.bn}
 7 | \alias{as.bn}
 8 | \alias{as.bn.character}
 9 | \title{Build a model string from a Bayesian network and vice versa}
10 | \description{
11 | 
12 |   Build a model string from a Bayesian network and vice versa.
13 | 
14 | }
15 | \usage{
16 | modelstring(x)
17 | modelstring(x, debug = FALSE) <- value
18 | 
19 | model2network(string, ordering = NULL, debug = FALSE)
20 | 
21 | \method{as.character}{bn}(x, ...)
22 | \method{as.bn}{character}(x, ...)
23 | }
24 | \arguments{
25 |   \item{x}{an object of class \code{bn}. \code{modelstring()} (but not its
26 |     replacement form) accepts also objects of class \code{bn.fit}.}
27 |   \item{string}{a character string describing the Bayesian network.}
28 |   \item{ordering}{the labels of all the nodes in the graph; their order is the
29 |     node ordering used in the construction of the \code{bn} object. If
30 |     \code{NULL} the nodes are sorted alphabetically.}
31 |   \item{value}{a character string, the same as the \code{string}.}
32 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
33 |     printed; otherwise the function is completely silent.}
34 |   \item{\dots}{extra arguments from the generic method (currently ignored).}
35 | }
36 | \details{
37 | 
38 |   The strings returned by \code{modelstringi()} have the same format as the ones
39 |   returned by the \code{modelstring()} function in package \pkg{deal}; network
40 |   structures may be easily exported to and imported from that package (via the
41 |   \code{model2network} function).
42 | 
43 |   The format of the model strings is as follows. The local structure of each
44 |   node is enclosed in square brackets ("\code{[]}"); the first string is the
45 |   label of that node. The parents of the node (if any) are listed after a
46 |   ("\code{|}") and separated by colons ("\code{:}"). All nodes (including
47 |   isolated and root nodes) must be listed.
48 | 
49 | }
50 | \value{
51 | 
52 |   \code{model2network()} and \code{as.bn()} return an object of class \code{bn};
53 |   \code{modelstring()} and \code{as.character.bn()} return a character string.
54 | 
55 | }
56 | \examples{
57 | data(learning.test)
58 | dag = hc(learning.test)
59 | dag
60 | modelstring(dag)
61 | dag2 = model2network(modelstring(dag))
62 | dag2
63 | all.equal(dag, dag2)
64 | }
65 | \author{Marco Scutari}
66 | \keyword{convenience functions}
67 | 


--------------------------------------------------------------------------------
/R/sanitization-bootstrap.R:
--------------------------------------------------------------------------------
 1 | # check bootstrap arguments (when they are passed as variable length args).
 2 | check.bootstrap.args = function(extra.args, network, data) {
 3 | 
 4 |   # check the number of bootstrap replicates.
 5 |   extra.args[["R"]] = check.replicates(extra.args[["R"]])
 6 |   # check the size of each bootstrap sample.
 7 |   extra.args[["m"]] = check.bootsize(extra.args[["m"]], data)
 8 |   # check the learning algorithm.
 9 |   algorithm = check.learning.algorithm(extra.args[["algorithm"]], bn = network)
10 |   # check the extra arguments for the learning algorithm.
11 |   algorithm.args =
12 |     check.learning.algorithm.args(extra.args[["algorithm.args"]],
13 |       algorithm = algorithm, bn = network)
14 | 
15 |   extra.args[["algorithm"]] = algorithm
16 |   extra.args[["algorithm.args"]] = algorithm.args
17 | 
18 |   # remap additional arguments used in hybrid algorithms.
19 |   if (algorithm %in% hybrid.algorithms) {
20 | 
21 |     # there's no need to sanitize these arguments, it's done either in
22 |     # the calls to the structure learning algorithms.
23 |     if (is.null(extra.args[["algorithm.args"]][["restrict"]]))
24 |       extra.args[["algorithm.args"]][["restrict"]] = network$learning$restrict
25 |     if (is.null(extra.args[["algorithm.args"]]$maximize))
26 |       extra.args[["algorithm.args"]][["maximize"]] = network$learning$maximize
27 |     if (is.null(extra.args[["algorithm.args"]]$test))
28 |       extra.args[["algorithm.args"]][["test"]] = network$learning$rstest
29 |     if (is.null(extra.args[["algorithm.args"]]$score))
30 |       extra.args[["algorithm.args"]][["score"]] = network$learning$maxscore
31 | 
32 |   }#THEN
33 | 
34 |   # warn about and remove unused arguments.
35 |   extra.args =
36 |     check.unused.args(extra.args, c("R", "m", "algorithm", "algorithm.args"))
37 | 
38 |   return(extra.args)
39 | 
40 | }#CHECK.BOOTSTRAP.ARGS
41 | 
42 | # check the number of bootstrap replicates.
43 | check.replicates = function(R, default = 200) {
44 | 
45 |   if (missing(R) || is.null(R))
46 |     R = default
47 |   else if (!is.positive.integer(R))
48 |     stop("the number of bootstrap replicates must be a positive integer.")
49 | 
50 |   return(R)
51 | 
52 | }#CHECK.REiPLICATES
53 | 
54 | # check the size of bootstrap replicates.
55 | check.bootsize = function(m, data) {
56 | 
57 |   if (missing(m) || is.null(m))
58 |     m = nrow(data)
59 |   else if (!is.positive.integer(m))
60 |     stop("bootstrap sample size must be a positive integer.")
61 | 
62 |   return(m)
63 | 
64 | }#CHECK.BOOTSIZE
65 | 


--------------------------------------------------------------------------------
/src/core/contingency.tables.h:
--------------------------------------------------------------------------------
 1 | #ifndef CONTINGENCY_TABLES_HEADER
 2 | #define CONTINGENCY_TABLES_HEADER
 3 | 
 4 | /* one-dimensional contingency table. */
 5 | typedef struct {
 6 | 
 7 |   int llx;     /* first (and only) dimension. */
 8 |   int nobs;    /* total count over all cells. */
 9 |   int *n;      /* contingency table. */
10 | 
11 | } counts1d;
12 | 
13 | /* two-dimensional contingency table. */
14 | typedef struct{
15 | 
16 |   int llx;      /* first dimension. */
17 |   int lly;      /* second dimension */
18 |   int nobs;     /* total count over all cells. */
19 |   int **n;      /* contingency table. */
20 |   int *ni;      /* marginal counts for the first dimension. */
21 |   int *nj;      /* marginal counts for the second dimension. */
22 | 
23 | } counts2d;
24 | 
25 | /* three-dimensional contingency table, as an array of two-dimensional tables
26 |  * spanning the third dimension. */
27 | typedef struct {
28 | 
29 |   int llx;      /* first dimension. */
30 |   int lly;      /* second dimension */
31 |   int llz;      /* third dimension. */
32 |   int nobs;     /* total count over all cells. */
33 |   int ***n;     /* contingency table. */
34 |   int **ni;     /* marginal counts for the first dimension. */
35 |   int **nj;     /* marginal counts for the second dimension. */
36 |   int *nk;      /* marginal counts for the third dimension. */
37 | 
38 | } counts3d;
39 | 
40 | counts1d new_1d_table(int llx);
41 | counts2d new_2d_table(int llx, int lly, bool margins);
42 | counts3d new_3d_table(int llx, int lly, int llz);
43 | 
44 | void fill_1d_table(int *xx, counts1d *table, int num);
45 | void fill_2d_table(int *xx, int *yy, counts2d *table, int num);
46 | void fill_3d_table(int *xx, int *yy, int *zz, counts3d *table, int num);
47 | 
48 | void refill_1d_table(int *xx, counts1d *table, int num);
49 | void refill_2d_table(int *xx, int *yy, counts2d *table, int num);
50 | void refill_3d_table(int *xx, int *yy, int *zz, counts3d *table, int num);
51 | 
52 | void resize_1d_table(int llx, counts1d *table);
53 | void resize_2d_table(int llx, int lly, counts2d *table);
54 | void resize_3d_table(int llx, int lly, int llz, counts3d *table);
55 | 
56 | void print_1d_table(counts1d table);
57 | void print_2d_table(counts2d table);
58 | void print_3d_table(counts3d table);
59 | 
60 | void Free1DTAB(counts1d table);
61 | void Free2DTAB(counts2d table);
62 | void Free3DTAB(counts3d table);
63 | 
64 | void rcounts2d(counts2d table, double *fact, int *workspace);
65 | void rcounts3d(counts3d table, double *fact, int *workspace);
66 | 
67 | #endif
68 | 


--------------------------------------------------------------------------------
/man/blacklist.Rd:
--------------------------------------------------------------------------------
 1 | \name{utilities for whitelists and blacklists}
 2 | \alias{whitelist}
 3 | \alias{blacklist}
 4 | \alias{ordering2blacklist}
 5 | \alias{tiers2blacklist}
 6 | \alias{set2blacklist}
 7 | \title{Get or create whitelists and blacklists}
 8 | \description{
 9 | 
10 |   Extract whitelists and blacklists from an object of class \code{bn}, or create
11 |   them for use in structure learning.
12 | 
13 | }
14 | \usage{
15 | whitelist(x)
16 | blacklist(x)
17 | 
18 | ordering2blacklist(nodes)
19 | tiers2blacklist(tiers)
20 | set2blacklist(set)
21 | }
22 | \arguments{
23 |   \item{x}{an object of class \code{bn}.}
24 |   \item{nodes,set}{a vector of character strings, the labels of the nodes.}
25 |   \item{tiers}{a vector of character strings or a list, see below.}
26 | 
27 | }
28 | \details{
29 | 
30 |   \code{ordering2blacklist()} takes a vector of character strings (the labels
31 |   of the nodes), which specifies a complete node ordering. An object of class
32 |   \code{bn} or \code{bn.fit}; in that case, the node ordering is derived by the
33 |   graph. In both cases, the blacklist returned by \code{ordering2blacklist()}
34 |   contains all the possible arcs that violate the specified node ordering.
35 | 
36 |   \code{tiers2blacklist()} takes (again) a vector of character strings (the
37 |   labels of the nodes), which specifies a complete node ordering, or a list of
38 |   character vectors, which specifies a partial node ordering. In the latter
39 |   case, all arcs going from a node in a particular element of the list
40 |   (sometimes known as \emph{tier}) to a node in one of the previous elements
41 |   are blacklisted. Arcs between nodes in the same element are not blacklisted.
42 | 
43 |   \code{set2blacklist()} creates a blacklist containing all the arcs between any
44 |   two of the nodes whose labels are passed as the argument \code{set}.
45 | 
46 | }
47 | \value{
48 | 
49 |   \code{whitelist()} and \code{blacklist()} return a matrix of character string
50 |   with two columns, named \code{from} and \code{to}, if whitelist or a blacklist
51 |   have been used to learn the \code{bn} object passed as their argument.
52 | 
53 |   \code{ordering2blacklist()}, \code{tiers2blacklist()} and
54 |   \code{set2blacklist()} return a sanitized \code{blacklist} (a two-column
55 |   matrix, whose columns are labeled \code{from} and \code{to}).
56 | 
57 | }
58 | \examples{
59 | tiers2blacklist(list(LETTERS[1:3], LETTERS[4:6]))
60 | set2blacklist(LETTERS[1:3])
61 | ordering2blacklist(LETTERS[1:6])
62 | }
63 | 
64 | \author{Marco Scutari}
65 | \keyword{convenience functions}
66 | \keyword{structure learning}
67 | 


--------------------------------------------------------------------------------
/man/coronary.Rd:
--------------------------------------------------------------------------------
 1 | \name{coronary}
 2 | \docType{data}
 3 | \alias{coronary}
 4 | \title{Coronary heart disease data set}
 5 | \description{
 6 | 
 7 |   Probable risk factors for coronary thrombosis, comprising data from 1841 men.
 8 | 
 9 | }
10 | \usage{
11 | data(coronary)
12 | }
13 | \format{
14 | 
15 |   The \code{coronary} data set contains the following 6 variables:
16 |   \itemize{
17 | 
18 |     \item \code{Smoking} (\emph{smoking}): a two-level factor with levels
19 |       \code{no} and \code{yes}.
20 |     \item \code{M. Work} (\emph{strenuous mental work}): a two-level factor
21 |       with levels \code{no} and \code{yes}.
22 |     \item \code{P. Work} (\emph{strenuous physical work}): a two-level factor
23 |       with levels \code{no} and \code{yes}.
24 |     \item \code{Pressure} (\emph{systolic blood pressure}): a two-level factor
25 |       with levels \code{<140} and \code{>140}.
26 |     \item \code{Proteins} (\emph{ratio of beta and alpha lipoproteins}): a
27 |       two-level factor with levels \code{<3} and \code{>3}.
28 |     \item \code{Family} (\emph{family anamnesis of coronary heart disease}): a
29 |       two-level factor with levels \code{neg} and \code{pos}.
30 | 
31 |   }
32 | 
33 | }
34 | \source{
35 | 
36 |   Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd
37 |     edition.
38 | 
39 |   Reinis Z, Pokorny J, Basika V, Tiserova J, Gorican K, Horakova D, Stuchlikova
40 |     E, Havranek T, Hrabovsky F (1981). "Prognostic Significance of the Risk
41 |     Profile in the Prevention of Coronary Heart Disease." \emph{Bratisl Lek
42 |     Listy}, \strong{76}:137--150. Published on Bratislava Medical Journal,
43 |     in Czech.
44 | 
45 |   Whittaker J (1990). \emph{Graphical Models in Applied Multivariate
46 |     Statistics}. Wiley.
47 | 
48 | }
49 | \examples{
50 | 
51 | # This is the undirected graphical model from Whittaker (1990).
52 | data(coronary)
53 | ug = empty.graph(names(coronary))
54 | arcs(ug, check.cycles = FALSE) = matrix(
55 |   c("Family", "M. Work", "M. Work", "Family",
56 |     "M. Work", "P. Work", "P. Work", "M. Work",
57 |     "M. Work", "Proteins", "Proteins", "M. Work",
58 |     "M. Work", "Smoking", "Smoking", "M. Work",
59 |     "P. Work", "Smoking", "Smoking", "P. Work",
60 |     "P. Work", "Proteins", "Proteins", "P. Work",
61 |     "Smoking", "Proteins", "Proteins", "Smoking",
62 |     "Smoking", "Pressure", "Pressure", "Smoking",
63 |     "Pressure", "Proteins", "Proteins", "Pressure"),
64 |   ncol = 2, byrow = TRUE,
65 |   dimnames = list(c(), c("from", "to")))
66 | \dontrun{graphviz.plot(ug, shape = "ellipse")}
67 | }
68 | \keyword{datasets}
69 | 


--------------------------------------------------------------------------------
/man/whitelists.and.blacklists.Rd:
--------------------------------------------------------------------------------
 1 | \name{whitelists-blacklists}
 2 | \alias{whitelists-blacklists}
 3 | \alias{whitelists and blacklists}
 4 | \title{Whitelists and blacklists in structure learning}
 5 | \description{
 6 | 
 7 |   How whitelists and blacklists are used in structure learning.
 8 | 
 9 | }
10 | \section{Constraint-based Algorithms}{
11 | 
12 |   Constraint-based algorithms support arc whitelisting and blacklisting as
13 |   follows:
14 | 
15 |   \itemize{
16 | 
17 |     \item blacklisted arcs are never present in the learned graph.
18 |     \item arcs whitelisted in one direction only (i.e.
19 |       \eqn{A \rightarrow B}{A -> B} is whitelisted but
20 |       \eqn{B \rightarrow A}{B -> A} is not) have the respective reverse arcs
21 |       blacklisted, and are always present in the learned graph.
22 |     \item arcs whitelisted in both directions (i.e. both
23 |       \eqn{A \rightarrow B}{A -> B} and \eqn{B \rightarrow A}{B -> A} are
24 | 	  whitelisted) are present in the learned graph, but their direction is set
25 | 	  by the learning algorithm.
26 | 
27 |   }
28 | 
29 |   Any arc whitelisted and blacklisted at the same time is assumed to be
30 |   whitelisted, and is thus removed from the blacklist.
31 | 
32 | }
33 | \section{Score-based Algorithms}{
34 | 
35 |   Score-based algorithms support arc whitelisting and blacklisting as follows:
36 | 
37 |   \itemize{
38 | 
39 |     \item blacklisted arcs are never present in the learned graph.
40 |     \item arcs can only be whitelisted in a single direction, and are always
41 |       present in the learned graph; it is not possible to whitelist arcs in both
42 |       directions.
43 | 
44 |   }
45 | 
46 | }
47 | \section{Hybrid Algorithms}{
48 | 
49 |   Hybrid algorithms use constraint-based (or pairwise mutual information)
50 |   algorithms in the \emph{restrict phase} and score-based algorithms in the
51 |   \emph{maximize phase}. Hence whitelists and blacklists are supported as
52 |   follows:
53 | 
54 |   \itemize{
55 | 
56 |     \item whitelists and blacklists should be specified for the algorithm used
57 |       in the restrict phase.
58 |     \item if the whitelist contains any undirected arc, its consistent extension
59 |       is used instead in the maximize phase.
60 | 
61 |   }
62 | 
63 | }
64 | \section{Pairwise Mutual Information Algorithms}{
65 | 
66 |   In algorithms that learn undirected graphs, such as ARACNE and Chow-Liu, arcs
67 |   are treated as being whitelisted or blacklisted in both directions even if
68 |   only one direction is listed in the whitelist or blacklist. Again blacklisted
69 |   arcs are never present in the learned graph and whitelisted arcs are
70 |   guaranteed to be present in the learned graph.
71 | 
72 | }
73 | \keyword{package}
74 | 


--------------------------------------------------------------------------------
/src/tests/rinterface/htest.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../minimal/strings.h"
 3 | #include "../../minimal/common.h"
 4 | #include "../tests.h"
 5 | 
 6 | SEXP c_create_htest(double stat, SEXP test, double pvalue, double df,
 7 |     SEXP extra_args) {
 8 | 
 9 | test_e test_type = test_to_enum(CHAR(STRING_ELT(test, 0)));
10 | SEXP result, s, n, B, params;
11 | 
12 |   /* allocate the return value. */
13 |   PROTECT(result = allocVector(VECSXP, 7));
14 |   /* set the class. */
15 |   setAttrib(result, R_ClassSymbol, mkString("htest"));
16 |   /* set the names of the elements. */
17 |   setAttrib(result, R_NamesSymbol, mkStringVec(7, "statistic", "p.value",
18 |     "method", "null.value", "alternative", "data.name", "parameter"));
19 | 
20 |   /* set the test statistic. */
21 |   PROTECT(s = ScalarReal(stat));
22 |   setAttrib(s, R_NamesSymbol, test);
23 |   SET_VECTOR_ELT(result, 0, s);
24 | 
25 |   /* set the p-value. */
26 |   SET_VECTOR_ELT(result, 1, ScalarReal(pvalue));
27 | 
28 |   /* set the label of the test. */
29 |   SET_VECTOR_ELT(result, 2, mkString(""));
30 | 
31 |   /* set the value of the statistic under the null. */
32 |   PROTECT(n = ScalarReal(0));
33 |   setAttrib(n, R_NamesSymbol, mkString("value"));
34 |   SET_VECTOR_ELT(result, 3, n);
35 | 
36 |   /* set the alternative hypothesis. */
37 |   if (test_type == CUSTOM_T)
38 |     SET_VECTOR_ELT(result, 4, R_NilValue);
39 |   else if (IS_TWO_SIDED(test_type))
40 |     SET_VECTOR_ELT(result, 4, mkString("two.sided"));
41 |   else
42 |     SET_VECTOR_ELT(result, 4, mkString("greater"));
43 | 
44 |   /* set the data description string. */
45 |   SET_VECTOR_ELT(result, 5, mkString(""));
46 | 
47 |   /* save the relevant parameters of the test. */
48 |   B = getListElement(extra_args, "B");
49 | 
50 |   if (B != R_NilValue) {
51 | 
52 |     if (ISNAN(df)) {
53 | 
54 |       PROTECT(params = ScalarReal(INT(B)));
55 |       setAttrib(params, R_NamesSymbol, mkString("Monte Carlo samples"));
56 |       SET_VECTOR_ELT(result, 6, params);
57 |       UNPROTECT(1);
58 | 
59 |     }/*THEN*/
60 |     else {
61 | 
62 |       PROTECT(params = allocVector(REALSXP, 2));
63 |       REAL(params)[0] = df;
64 |       REAL(params)[1] = INT(B);
65 |       setAttrib(params, R_NamesSymbol, mkStringVec(2, "df", "Monte Carlo samples"));
66 |       SET_VECTOR_ELT(result, 6, params);
67 |       UNPROTECT(1);
68 | 
69 |     }/*ELSE*/
70 | 
71 |   }/*THEN*/
72 |   else {
73 | 
74 |     if (!ISNAN(df)) {
75 | 
76 |       PROTECT(params = ScalarReal(df));
77 |       setAttrib(params, R_NamesSymbol, mkString("df"));
78 |       SET_VECTOR_ELT(result, 6, params);
79 |       UNPROTECT(1);
80 | 
81 |     }/*THEN*/
82 | 
83 |   }/*ELSE*/
84 | 
85 |   UNPROTECT(3);
86 | 
87 |   return result;
88 | 
89 | }/*C_CREATE_HTEST*/
90 | 
91 | 


--------------------------------------------------------------------------------
/R/bootstrap.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # simple nonparametric bootstrap implementation.
 3 | bootstrap.backend = function(data, statistic, R, m, algorithm,
 4 |     algorithm.args = list(), statistic.args = list(), cluster = NULL,
 5 |     debug = FALSE) {
 6 | 
 7 |   # allocate the result list.
 8 |   res = as.list(seq(R))
 9 | 
10 |   bootstrap.replicate = function(r, data, m, algorithm, algorithm.args,
11 |       statistic, statistic.args, debug) {
12 | 
13 |     if (debug) {
14 | 
15 |       cat("----------------------------------------------------------------\n")
16 |       cat("* bootstrap replicate", r, ".\n")
17 | 
18 |     }#THEN
19 | 
20 |     # generate the r-th bootstrap sample by resampling with replacement.
21 |     resampling = sample(nrow(data), m, replace = TRUE)
22 | 
23 |     # user-provided lists of manipulated observations for the mbde score must
24 |     # be remapped to match the bootstrap sample.
25 |     if (!is.null(algorithm.args$score) && (algorithm.args$score == "mbde") &&
26 |           !is.null(algorithm.args$exp)) {
27 | 
28 |       algorithm.args$exp = lapply(algorithm.args$exp, function(x) {
29 | 
30 |         x = match(x, resampling)
31 |         x = x[!is.na(x)]
32 | 
33 |       })
34 | 
35 |     }#THEN
36 | 
37 |     # generate the bootstrap sample.
38 |     replicate = data[resampling, , drop = FALSE]
39 | 
40 |     if (debug)
41 |       cat("* learning bayesian network structure.\n")
42 | 
43 |     # learn the network structure from the bootstrap sample.
44 |     bn = do.call(algorithm, c(list(x = replicate), algorithm.args))
45 | 
46 |     if (debug) {
47 | 
48 |       print(bn)
49 |       cat("* computing user-defined statistic.\n")
50 | 
51 |     }#THEN
52 | 
53 |     # apply the user-defined function to the newly-learned bayesian network;
54 |     # the bayesian network is passed as the first argument hoping it will end
55 |     # at the right place thanks to the positional matching.
56 |     res = do.call(statistic, c(list(bn), statistic.args))
57 | 
58 |     if (debug) {
59 | 
60 |       cat("  > the function returned:\n")
61 |       print(res)
62 | 
63 |     }#THEN
64 | 
65 |     return(res)
66 | 
67 |   }#BOOTSTRAP.REPLICATE
68 | 
69 |   if (!is.null(cluster)) {
70 | 
71 |     res = parallel::parLapplyLB(cluster, res, bootstrap.replicate, data = data,
72 |             m = m, algorithm = algorithm,
73 |             algorithm.args = algorithm.args, statistic = statistic,
74 |             statistic.args = statistic.args, debug = debug)
75 | 
76 |   }#THEN
77 |   else {
78 | 
79 |     res = lapply(res, bootstrap.replicate, data = data, m = m,
80 |             algorithm = algorithm, algorithm.args = algorithm.args,
81 |             statistic = statistic, statistic.args = statistic.args,
82 |             debug = debug)
83 | 
84 |   }#ELSE
85 | 
86 |   return(res)
87 | 
88 | }#BOOTSTRAP.BACKEND
89 | 
90 | 


--------------------------------------------------------------------------------
/src/bnlearn/shd.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../include/graph.h"
 3 | #include "../minimal/common.h"
 4 | #include "../math/linear.algebra.h"
 5 | 
 6 | SEXP shd(SEXP learned, SEXP golden, SEXP debug) {
 7 | 
 8 | int i = 0, j = 0, c1 = 0, c2 = 0, shd = 0, nnodes = 0;
 9 | int *lrn = NULL, *ref = NULL;
10 | bool debugging = isTRUE(debug);
11 | SEXP temp, nodes, l, r;
12 | 
13 |   /* get the labels of the nodes. */
14 |   temp = getListElement(learned, "nodes");
15 |   PROTECT(nodes = getAttrib(temp, R_NamesSymbol));
16 |   nnodes = length(nodes);
17 | 
18 |   /* get the arcs of the learned network. */
19 |   temp = getListElement(learned, "arcs");
20 |   /* build the adjacency matrix. */
21 |   PROTECT(l = arcs2amat(temp, nodes));
22 |   lrn = INTEGER(l);
23 | 
24 |   /* get the arcs of the golden standard network. */
25 |   temp = getListElement(golden, "arcs");
26 |   /* build the adjacency matrix. */
27 |   PROTECT(r = arcs2amat(temp, nodes));
28 |   ref = INTEGER(r);
29 | 
30 |   for (i = 0; i < nnodes; i++) {
31 | 
32 |     for (j = i + 1; j < nnodes; j++) {
33 | 
34 |       /* compute coordinates only once per iteration. */
35 |       c1 = CMC(i, j, nnodes);
36 |       c2 = CMC(j, i, nnodes);
37 | 
38 |       /* the two arcs are identical, nothing to do. */
39 |       if ((lrn[c1] == ref[c1]) && (lrn[c2] == ref[c2]))
40 |         continue;
41 | 
42 |       if (debugging) {
43 | 
44 |         Rprintf("* arcs between %s and %s do not match.\n", NODE(i), NODE(j));
45 | 
46 |         if ((lrn[c1] == 1) && (lrn[c2] == 1))
47 |           Rprintf("  > the learned network contains %s - %s.\n", NODE(i), NODE(j));
48 |         else if ((lrn[c1] == 0) && (lrn[c2] == 0))
49 |           Rprintf("  > the learned network contains no arc between %s and %s.\n", NODE(i), NODE(j));
50 |         else if ((lrn[c1] == 1) && (lrn[c2] == 0))
51 |           Rprintf("  > the learned network contains %s -> %s.\n", NODE(i), NODE(j));
52 |         else if ((lrn[c1] == 0) && (lrn[c2] == 1))
53 |           Rprintf("  > the learned network contains %s -> %s.\n", NODE(j), NODE(i));
54 | 
55 |         if ((ref[c1] == 1) && (ref[c2] == 1))
56 |           Rprintf("  > the true network contains %s - %s.\n", NODE(i), NODE(j));
57 |         else if ((ref[c1] == 0) && (ref[c2] == 0))
58 |           Rprintf("  > the true network contains no arc between %s and %s.\n", NODE(i), NODE(j));
59 |         else if ((ref[c1] == 1) && (ref[c2] == 0))
60 |           Rprintf("  > the true network contains %s -> %s.\n", NODE(i), NODE(j));
61 |         else if ((ref[c1] == 0) && (ref[c2] == 1))
62 |           Rprintf("  > the true network contains %s -> %s.\n", NODE(j), NODE(i));
63 | 
64 |       }/*THEN*/
65 | 
66 |       /* increase the distance by one. */
67 |       shd++;
68 | 
69 |     }/*FOR*/
70 | 
71 |   }/*FOR*/
72 | 
73 |   UNPROTECT(3);
74 | 
75 |   return ScalarInteger(shd);
76 | 
77 | }/*SHD*/
78 | 
79 | 


--------------------------------------------------------------------------------
/src/arcs/arcs2amat.c:
--------------------------------------------------------------------------------
  1 | #include "../include/rcore.h"
  2 | #include "../include/bn.h"
  3 | #include "../minimal/strings.h"
  4 | #include "../minimal/common.h"
  5 | #include "../math/linear.algebra.h"
  6 | 
  7 | /* convert an arc set to an adjacency matrix. */
  8 | SEXP arcs2amat(SEXP arcs, SEXP nodes) {
  9 | 
 10 | int k = 0, nrow = length(arcs) / 2, dims = length(nodes);
 11 | int *res = NULL, *coords = NULL;
 12 | SEXP result, try;
 13 | 
 14 |   /* allocate and initialize the adjacency matrix. */
 15 |   PROTECT(result = allocMatrix(INTSXP, dims, dims));
 16 |   res = INTEGER(result);
 17 |   memset(res, '\0', sizeof(int) * dims * dims);
 18 | 
 19 |   /* set rownames and colnames to the node labels. */
 20 |   setDimNames(result, nodes, nodes);
 21 | 
 22 |   /* nothing to do if there are no arcs. */
 23 |   if (nrow == 0) {
 24 | 
 25 |     UNPROTECT(1);
 26 |     return result;
 27 | 
 28 |   }/*THEN*/
 29 | 
 30 |   /* match the node labels in the arc set. */
 31 |   PROTECT(try = match(nodes, arcs, 0));
 32 |   coords = INTEGER(try);
 33 | 
 34 |   /* iterate over the arcs. */
 35 |   for (k = 0; k < nrow; k++)
 36 |     res[CMC(coords[k] - 1, coords[k + nrow] - 1, dims)] = 1;
 37 | 
 38 |   UNPROTECT(2);
 39 | 
 40 |   return result;
 41 | 
 42 | }/*ARCS2AMAT*/
 43 | 
 44 | /* convert an adjacency matrix to an arc set. */
 45 | SEXP amat2arcs(SEXP amat, SEXP nodes) {
 46 | 
 47 | int i = 0, j = 0, k = 0, nrow = length(nodes), narcs = 0;
 48 | int *a = INTEGER(amat);
 49 | SEXP arcs;
 50 | 
 51 |   /* count the number of arcs in the adjacency matrix. */
 52 |   for (i = 0; i < nrow; i++) {
 53 | 
 54 |     for (j = 0; j < nrow; j++) {
 55 | 
 56 |       if (a[CMC(i, j, nrow)] == 1) narcs++;
 57 | 
 58 |     }/*FOR*/
 59 | 
 60 |   }/*FOR*/
 61 | 
 62 |   /* allocate the arc set and set the column names. */
 63 |   PROTECT(arcs = allocMatrix(STRSXP, narcs, 2));
 64 |   setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to"));
 65 | 
 66 |   /* if there are no arcs, return an empty arc set. */
 67 |   if (narcs == 0) {
 68 | 
 69 |     UNPROTECT(1);
 70 |     return arcs;
 71 | 
 72 |   }/*THEN*/
 73 | 
 74 |   /* fill the arc set from the adjacency matrix. */
 75 |   for (i = 0; i < nrow; i++) {
 76 | 
 77 |     for (j = 0; j < nrow; j++) {
 78 | 
 79 |       /* colnames and rownames are completely ignored to avoid hitting some
 80 |        * corner cases present in the old R code.  */
 81 |       if (a[CMC(i, j, nrow)] == 1) {
 82 | 
 83 |          SET_STRING_ELT(arcs, k, STRING_ELT(nodes, i));
 84 |          SET_STRING_ELT(arcs, k + 1 * narcs, STRING_ELT(nodes, j));
 85 |          k++;
 86 | 
 87 |       }/*THEN*/
 88 | 
 89 |       /* no more arcs, get out of both loops. */
 90 |       if (k == narcs) goto end;
 91 | 
 92 |     }/*FOR*/
 93 | 
 94 |   }/*FOR*/
 95 | 
 96 | end:
 97 | 
 98 |   UNPROTECT(1);
 99 | 
100 |   return arcs;
101 | 
102 | }/*AMAT2ARCS*/
103 | 
104 | 


--------------------------------------------------------------------------------
/src/graphs/rinterface/sid.c:
--------------------------------------------------------------------------------
 1 | #include "../../include/rcore.h"
 2 | #include "../../core/allocations.h"
 3 | #include "../../include/graph.h"
 4 | #include "../../include/globals.h"
 5 | #include "../../minimal/common.h"
 6 | #include "../../minimal/strings.h"
 7 | #include "../../math/linear.algebra.h"
 8 | #include "../graphs.h"
 9 | 
10 | /* build a reachability matrix that takes non-causal paths into account. */
11 | SEXP reachability_matrix(SEXP dag, SEXP path_matrix, SEXP target_node,
12 |     SEXP adjustment_set, SEXP debug) {
13 | 
14 | int nnodes = 0, target = 0;
15 | int *initial_rmat = NULL, *final_rmat = NULL;
16 | char **labels = NULL;
17 | bool *aset = NULL;
18 | SEXP nodes, arcs, amat, names, reachability, temp;
19 | 
20 |   /* dereference the node labels and the arcs. */
21 |   arcs = getListElement(dag, "arcs");
22 |   nodes = getListElement(dag, "nodes");
23 |   nodes = getAttrib(nodes, R_NamesSymbol);
24 |   nnodes = length(nodes);
25 |   /* for the node labels, duplicate them for use in the reachability matrix. */
26 |   labels = Calloc1D(2 * nnodes, sizeof(char *));
27 |   for (int i = 0; i < nnodes; i++)
28 |     labels[i] = labels[i + nnodes] = (char *) CHAR(STRING_ELT(nodes, i));
29 | 
30 |   /* contruct the adjacency matrix. */
31 |   PROTECT(amat = arcs2amat(arcs, nodes));
32 | 
33 |   /* dereference the target node. */
34 |   PROTECT(temp = match(nodes, target_node, 0));
35 |   target = INT(temp) - 1;
36 |   UNPROTECT(1);
37 | 
38 |   /* explode the adjustment set into a logical vector to reduce the amount of
39 |    * nested loops later. */
40 |   PROTECT(temp = match(nodes, adjustment_set, 0));
41 |   aset = Calloc1D(nnodes, sizeof(bool));
42 |   for (int i = 0; i < length(adjustment_set); i++)
43 |     aset[INTEGER(temp)[i] - 1] = TRUE;
44 |   UNPROTECT(1);
45 | 
46 |   /* initialize the reachability matrix. */
47 |   initial_rmat = Calloc1D(4 * nnodes * nnodes, sizeof(int));
48 | 
49 |   initialize_reachability_matrix(INTEGER(amat), INTEGER(path_matrix), nnodes,
50 |       target, aset, initial_rmat, labels, isTRUE(debug));
51 | 
52 |   /* allocate the final reachability matrix with all the attributes. */
53 |   PROTECT(reachability = allocMatrix(LGLSXP, 2 * nnodes, 2 * nnodes));
54 |   final_rmat = LOGICAL(reachability);
55 |   memset(final_rmat, '\0', 4 * nnodes * nnodes * sizeof(int));
56 | 
57 |   PROTECT(names = allocVector(STRSXP, 2 * nnodes));
58 |   for (int i = 0; i < 2 * nnodes; i++)
59 |     SET_STRING_ELT(names, i, STRING_ELT(nodes, i % nnodes));
60 |   setDimNames(reachability, names, names);
61 | 
62 |   /* traverse the paths in the reachability matrix to complete it. */
63 |   complete_reachability_matrix(initial_rmat, final_rmat, 2 * nnodes, labels,
64 |     isTRUE(debug));
65 | 
66 |   Free1D(aset);
67 |   Free1D(labels);
68 |   Free1D(initial_rmat);
69 | 
70 |   UNPROTECT(3);
71 | 
72 |   return reachability;
73 | 
74 | }/*REACHABILITY_MATRIX*/
75 | 


--------------------------------------------------------------------------------
/src/graphs/connected.components.c:
--------------------------------------------------------------------------------
 1 | #include "../include/rcore.h"
 2 | #include "../core/allocations.h"
 3 | #include "../math/linear.algebra.h"
 4 | 
 5 | /* identify the connected components in an undirected graph. */
 6 | int ug_connected_components(int *amat, char **labels, int nnodes, int **buffer,
 7 |     int *buflen, bool debugging) {
 8 | 
 9 | int *elements = NULL, nelements = 0, ncomponents = 0;
10 | bool changed = TRUE, *visited = NULL;
11 | 
12 |   /* status vector for the nodes. */
13 |   visited = Calloc1D(nnodes, sizeof(bool));
14 |   /* depth of the nodes in the breadth-first search. */
15 |   elements = Calloc1D(nnodes, sizeof(int));
16 | 
17 |   for (int i = 0; i < nnodes; i++) {
18 | 
19 |     /* for each node we have not visited already... */
20 |     if (visited[i])
21 |       continue;
22 |     else if (debugging)
23 |       Rprintf("* checking node %s.\n", labels[i]);
24 | 
25 |     /* ... mark it as visited. */
26 |     visited[i] = TRUE;
27 | 
28 |     /* reset the connected component... */
29 |     memset(elements, '\0', nnodes * sizeof(int));
30 |     nelements = 1;
31 |     elements[i] = 1;
32 |     changed = FALSE;
33 | 
34 |     /* in a breadth-first search... */
35 |     for (int depth = 2; depth < nnodes; depth++) {
36 | 
37 |       /* ... check the remaining nodes... */
38 |       for (int j = 0; j < nnodes; j++) {
39 | 
40 |         /* .. if we have visited them in the last iteration... */
41 |         if (elements[j] != depth - 1)
42 |           continue;
43 | 
44 |         /* ... mark their neighbours. */
45 |         for (int k = 0; k < nnodes; k++) {
46 | 
47 |           if ((amat[CMC(j, k, nnodes)] != 0) && (elements[k] == 0)) {
48 | 
49 |             elements[k] = depth;
50 |             visited[k] = TRUE;
51 |             changed = TRUE;
52 |             nelements++;
53 | 
54 |           }/*THEN*/
55 | 
56 |         }/*FOR*/
57 | 
58 |       }/*FOR*/
59 | 
60 |       /* if we found no new nodes, we are done with this component. */
61 |       if (changed)
62 |         changed = FALSE;
63 |       else
64 |         break;
65 | 
66 |     }/*FOR*/
67 | 
68 |     /* save the elements of the connected component. */
69 |     buffer[ncomponents] = Calloc1D(nelements, sizeof(int));
70 |     buflen[ncomponents] = nelements;
71 |     for (int k = 0, l = 0; k < nnodes; k++)
72 |       if (elements[k] > 0)
73 |         buffer[ncomponents][l++] = k;
74 | 
75 |     if (debugging) {
76 | 
77 |       Rprintf("  @ connected component %d, %d nodes:\n",
78 |         ncomponents, nelements);
79 |       for (int k = 0; k < buflen[ncomponents]; k++)
80 |         Rprintf("%s ", labels[buffer[ncomponents][k]]);
81 |       Rprintf("\n");
82 | 
83 |     }/*THEN*/
84 | 
85 |     /* move to the next component. */
86 |     ncomponents++;
87 | 
88 |   }/*FOR*/
89 | 
90 |   Free1D(visited);
91 |   Free1D(elements);
92 | 
93 |   return ncomponents;
94 | 
95 | }/*UG_CONNECTED_COMPONENTS*/
96 | 
97 | 


--------------------------------------------------------------------------------
/man/hybrid.Rd:
--------------------------------------------------------------------------------
 1 | \name{hybrid algorithms}
 2 | \alias{hybrid algorithms}
 3 | \alias{rsmax2}
 4 | \alias{mmhc}
 5 | \alias{h2pc}
 6 | \title{Hybrid structure learning algorithms}
 7 | \description{
 8 | 
 9 |   Learn the structure of a Bayesian network with Max-Min Hill Climbing (MMHC),
10 |   Hybrid HPC (H2PC), and the more general 2-phase Restricted Maximization
11 |   (RSMAX2) hybrid algorithms.
12 | 
13 | }
14 | \usage{
15 | rsmax2(x, whitelist = NULL, blacklist = NULL, restrict = "si.hiton.pc",
16 |   maximize = "hc", restrict.args = list(), maximize.args = list(), debug = FALSE)
17 | mmhc(x, whitelist = NULL, blacklist = NULL, restrict.args = list(),
18 |   maximize.args = list(), debug = FALSE)
19 | h2pc(x, whitelist = NULL, blacklist = NULL, restrict.args = list(),
20 |   maximize.args = list(), debug = FALSE)
21 | }
22 | \arguments{
23 |   \item{x}{a data frame containing the variables in the model.}
24 |   \item{whitelist}{a data frame with two columns (optionally labeled "from" and
25 |     "to"), containing a set of arcs to be included in the graph.}
26 |   \item{blacklist}{a data frame with two columns (optionally labeled "from" and
27 |     "to"), containing a set of arcs not to be included in the graph.}
28 |   \item{restrict}{a character string, the constraint-based or local search
29 |     algorithm to be used in the \dQuote{restrict} phase. See
30 |     \code{\link{structure learning}} and the documentation of each algorithm for
31 |     details.}
32 |   \item{maximize}{a character string, the score-based algorithm to be used in
33 |     the \dQuote{maximize} phase. Possible values are \code{hc} and \code{tabu}.
34 |     See \code{\link{structure learning}} for details.}
35 |   \item{restrict.args}{a list of arguments to be passed to the algorithm
36 |     specified by \code{restrict}, such as \code{test} or \code{alpha}.}
37 |   \item{maximize.args}{a list of arguments to be passed to the algorithm
38 |     specified by \code{maximize}, such as \code{restart} for hill-climbing or
39 |     \code{tabu} for tabu search.}
40 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
41 |     printed; otherwise the function is completely silent.}
42 | }
43 | \note{
44 | 
45 |   \code{mmhc()} is simply \code{rsmax2()} with \code{restrict} set to
46 |   \code{mmpc} and \code{maximize} set to \code{hc}. Similarly, \code{h2pc} is
47 |   simply \code{rsmax2()} with \code{restrict} set to \code{hpc}and
48 |   \code{maximize} set to \code{hc}.
49 | 
50 |   See \code{\link{structure learning}} for a complete list of structure learning
51 |   algorithms with the respective references.
52 | 
53 | }
54 | \value{
55 | 
56 |   An object of class \code{bn}. See \code{\link{bn-class}} for details.
57 | 
58 | }
59 | \author{Marco Scutari}
60 | \seealso{\link{local discovery algorithms},
61 |   \link{score-based algorithms}, \link{constraint-based algorithms}.}
62 | \keyword{structure learning}
63 | 


--------------------------------------------------------------------------------
/man/asia.Rd:
--------------------------------------------------------------------------------
 1 | \name{asia}
 2 | \docType{data}
 3 | \alias{asia}
 4 | \title{Asia (synthetic) data set by Lauritzen and Spiegelhalter}
 5 | \description{
 6 | 
 7 |   Small synthetic data set from Lauritzen and Spiegelhalter (1988) about lung
 8 |   diseases (tuberculosis, lung cancer or bronchitis) and visits to Asia.
 9 | 
10 | }
11 | \usage{
12 | data(asia)
13 | }
14 | \format{
15 | 
16 |   The \code{asia} data set contains the following variables:
17 |   \itemize{
18 | 
19 |     \item \code{D} (\emph{dyspnoea}), a two-level factor with levels \code{yes}
20 |       and \code{no}.
21 |     \item \code{T} (\emph{tuberculosis}), a two-level factor with levels
22 |       \code{yes} and \code{no}.
23 |     \item \code{L} (\emph{lung cancer}), a two-level factor with levels
24 |       \code{yes} and \code{no}.
25 |     \item \code{B} (\emph{bronchitis}), a two-level factor with levels
26 |       \code{yes} and \code{no}.
27 |     \item \code{A} (\emph{visit to Asia}), a two-level factor with levels
28 |       \code{yes} and \code{no}.
29 |     \item \code{S} (\emph{smoking}), a two-level factor with levels \code{yes}
30 |       and \code{no}.
31 |     \item \code{X} (\emph{chest X-ray}), a two-level factor with levels
32 |       \code{yes} and \code{no}.
33 |     \item \code{E} (\emph{tuberculosis versus lung cancer/bronchitis}), a
34 |       two-level factor with levels \code{yes} and \code{no}.
35 | 
36 |   }
37 | 
38 | }
39 | \note{
40 | 
41 |   Lauritzen and Spiegelhalter (1988) motivate this example as follows:
42 | 
43 |   \dQuote{Shortness-of-breath (dyspnoea) may be due to tuberculosis, lung
44 |   cancer or bronchitis, or none of them, or more than one of them. A recent
45 |   visit to Asia increases the chances of tuberculosis, while smoking is known
46 |   to be a risk factor for both lung cancer and bronchitis. The results of a
47 |   single chest X-ray do not discriminate between lung cancer and tuberculosis,
48 |   as neither does the presence or absence of dyspnoea.}
49 | 
50 |   Standard learning algorithms are not able to recover the true structure of
51 |   the network because of the presence of a node (\code{E}) with conditional
52 |   probabilities equal to both 0 and 1. Monte Carlo tests seems to behave
53 |   better than their parametric counterparts.
54 | 
55 |   The complete BN can be downloaded from
56 |   \url{https://www.bnlearn.com/bnrepository/}.
57 | 
58 | }
59 | \source{
60 | 
61 |   Lauritzen S, Spiegelhalter D (1988). "Local Computation with Probabilities
62 | 	  on Graphical Structures and their Application to Expert Systems (with
63 | 	  discussion)." \emph{Journal of the Royal Statistical Society: Series B},
64 |     \strong{50}(2):157--224.
65 | 
66 | }
67 | \examples{
68 | # load the data.
69 | data(asia)
70 | # create and plot the network structure.
71 | dag = model2network("[A][S][T|A][L|S][B|S][D|B:E][E|T:L][X|E]")
72 | \dontrun{graphviz.plot(dag)}
73 | }
74 | \keyword{datasets}
75 | 


--------------------------------------------------------------------------------
/R/frontend-mvnorm.R:
--------------------------------------------------------------------------------
 1 | 
 2 | # transform the local distributions in a Gaussian BN into the multivariate
 3 | # normal that is the global distribution.
 4 | gbn2mvnorm = function(fitted) {
 5 | 
 6 |   # the network must be a Gaussian BN.
 7 |   check.fit(fitted)
 8 |   if (!is(fitted, "bn.fit.gnet"))
 9 |     stop("'fitted' must be an object of class 'bn.fit.gnet'.")
10 | 
11 |   gbn2mvnorm.backend(fitted)
12 | 
13 | }#GBN2MVNORM
14 | 
15 | # factorize a multivariate normal distribution into the local distributions that
16 | # make up a Gaussian BN into the multivariate.
17 | mvnorm2gbn = function(dag, mu, sigma) {
18 | 
19 |   # the network must be completely directed and acyclic.
20 |   check.bn(dag)
21 |   if (!is.completely.directed(dag))
22 |     stop("'dag' is only partially directed.")
23 |   if (!is.acyclic(dag$arcs, names(dag$nodes), directed = TRUE))
24 |     stop("'dag' contains cycles.")
25 | 
26 |   nodes = names(dag$nodes)
27 |   ordnodes = topological.ordering(dag)
28 |   nnodes = length(nodes)
29 | 
30 |   # check the covariance matrix.
31 |   if (missing(sigma))
32 |     stop("'sigma' is missing.")
33 |   if (!is(sigma, "matrix") || (ncol(sigma) != nrow(sigma)) ||
34 |        (length(dim(sigma)) != 2))
35 |     stop("'sigma' must be a 2-dimensional square matrix.")
36 |   if ((nrow(sigma) != nnodes) || (ncol(sigma) != nnodes))
37 |     stop("'sigma' must be a square matrix of size ", nnodes, ".")
38 |   if (!is.real.vector(sigma))
39 |     stop("the elements of 'sigma' must be real numbers.")
40 |   check.covariance(sigma)
41 | 
42 |   # the nodes in the dag must match the variables in the covariance matrix.
43 |   dims = dimnames(sigma)
44 |   if (is.null(dims)) {
45 | 
46 |     dimnames(sigma) = list(nodes, nodes)
47 | 
48 |   }#THEN
49 |   else {
50 | 
51 |     if (!setequal(dims[[1]], dims[[2]]))
52 |       stop("the row names and the column names of 'sigma' are different.")
53 |     if (!setequal(dims[[1]], nodes))
54 |       stop("the row names of 'sigma' do not match with the nodes in 'dag'.")
55 |     if (!setequal(dims[[2]], nodes))
56 |       stop("the column names of 'sigma' do not match with the nodes in 'dag'.")
57 | 
58 |   }#ELSE
59 | 
60 |   # check the mean vector
61 |   if (missing(mu)) {
62 | 
63 |     mu = structure(rep(0, nnodes), names = ordnodes)
64 |     warning("'mu' is missing, all variables are assumed to the centered.")
65 | 
66 |   }#THEN
67 |   else {
68 | 
69 |     if (!is.real.vector(mu))
70 |       stop("'mu' must be a vector of real numbers.")
71 |     if (length(mu) != nnodes)
72 |       stop("'mu' has ", length(mu), " elements, but 'sigma' has ", nnodes, ".")
73 | 
74 |     if (is.null(names(mu)))
75 |       names(mu) = nodes
76 |     else if (!setequal(names(mu), nodes))
77 |       stop("the names of the elements of 'mu' do not match with the nodes in 'dag'.")
78 | 
79 |   }#THEN
80 | 
81 |   mvnorm2gbn.backend(dag = dag, mu = mu, sigma = sigma)
82 | 
83 | }#MVNORM2GBN
84 | 
85 | 


--------------------------------------------------------------------------------
/R/graph-generation.R:
--------------------------------------------------------------------------------
 1 | 
 2 | random.graph.backend = function(num, nodes, method, extra.args, debug = FALSE) {
 3 | 
 4 |   if (method == "ordered") {
 5 | 
 6 |     res = ordered.graph(num = num, nodes = nodes, prob = extra.args$prob)
 7 | 
 8 |   }#THEN
 9 |   else if (method == "ic-dag") {
10 | 
11 |     # adjust the number of graph to generate with the stepping factor.
12 |     num = num * extra.args$every
13 | 
14 |     res = ide.cozman.graph(num = num, nodes = nodes,
15 |             burn.in = extra.args$burn.in,
16 |             max.in.degree = extra.args$max.in.degree,
17 |             max.out.degree = extra.args$max.out.degree,
18 |             max.degree = extra.args$max.degree,
19 |             connected = TRUE, debug = debug)
20 | 
21 |     # keep only every k-th network.
22 |     if (num > 1) {
23 | 
24 |       res = res[seq(from = extra.args$every, to = num, by = extra.args$every)]
25 | 
26 |     }#THEN
27 | 
28 |   }#THEN
29 |   else if (method == "melancon") {
30 | 
31 |     # adjust the number of graph to generate with the stepping factor.
32 |     num = num * extra.args$every
33 | 
34 |     res = ide.cozman.graph(num = num, nodes = nodes,
35 |             burn.in = extra.args$burn.in,
36 |             max.in.degree = extra.args$max.in.degree,
37 |             max.out.degree = extra.args$max.out.degree,
38 |             max.degree = extra.args$max.degree,
39 |             connected = FALSE, debug = debug)
40 | 
41 |     # keep only every k-th network.
42 |     if (num > 1) {
43 | 
44 |       res = res[seq(from = extra.args$every, to = num, by = extra.args$every)]
45 | 
46 |     }#THEN
47 | 
48 |   }#THEN
49 | 
50 |   return(res)
51 | 
52 | }#RANDOM.GRAPH.BACKEND
53 | 
54 | # generate a random directed acyclic graph.
55 | ordered.graph = function(num, nodes, prob) {
56 | 
57 |   .Call(call_ordered_graph,
58 |         nodes = nodes,
59 |         num = as.integer(num),
60 |         prob = prob)
61 | 
62 | }#ORDERED.GRAPH
63 | 
64 | # generate a random directed acyclic graph accordin to a uniform
65 | # probability distribution over the space of connected graphs (if
66 | # connected = TRUE) or the space of graphs (if connected = FALSE).
67 | ide.cozman.graph = function(num, nodes, burn.in, max.in.degree,
68 |     max.out.degree, max.degree, connected, debug = FALSE) {
69 | 
70 |   .Call(call_ide_cozman_graph,
71 |         nodes = nodes,
72 |         num = as.integer(num),
73 |         burn.in = as.integer(burn.in),
74 |         max.in.degree = as.numeric(max.in.degree),
75 |         max.out.degree = as.numeric(max.out.degree),
76 |         max.degree = as.numeric(max.degree),
77 |         connected = connected,
78 |         debug = debug)
79 | 
80 | }#IDE.COZMAN.GRAPH
81 | 
82 | # generate an empty 'bn' object given a set of nodes.
83 | empty.graph.backend = function(nodes, num = 1) {
84 | 
85 |   .Call(call_empty_graph,
86 |         nodes = nodes,
87 |         num = as.integer(num))
88 | 
89 | }#EMPTY.GRAPH.BACKEND
90 | 


--------------------------------------------------------------------------------
/R/frontend-foreign.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # read a BIF file into a bn.fit object.
  3 | read.bif = function(file, debug = FALSE) {
  4 | 
  5 |   # load the BIF file into memory.
  6 |   lines = readLines(file)
  7 | 
  8 |   read.foreign.backend(lines, format = "bif", filename = file, debug = debug)
  9 | 
 10 | }#READ.BIF
 11 | 
 12 | # dump a bn.fit object into a BIF file.
 13 | write.bif = function(file, fitted) {
 14 | 
 15 |   check.fit(fitted)
 16 |   # only discrete bayesian networks are supported.
 17 |   if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet")))
 18 |     stop("only discrete Bayesian networks can be exported into BIF format.")
 19 | 
 20 |   # open the file for writing.
 21 |   fd = file(description = file, open = "w")
 22 | 
 23 |   write.foreign.backend(fd, fitted = fitted, format = "bif")
 24 | 
 25 |   close(fd)
 26 | 
 27 |   invisible(NULL)
 28 | 
 29 | }#WRITE.BIF
 30 | 
 31 | # read a DSC file into a bn.fit object.
 32 | read.dsc = function(file, debug = FALSE) {
 33 | 
 34 |   # load the DSC file into memory.
 35 |   lines = readLines(file)
 36 | 
 37 |   read.foreign.backend(lines, format = "dsc", filename = file, debug = debug)
 38 | 
 39 | }#READ.DSC
 40 | 
 41 | # dump a bn.fit object into a DSC file.
 42 | write.dsc = function(file, fitted) {
 43 | 
 44 |   check.fit(fitted)
 45 |   # only discrete bayesian networks are supported.
 46 |   if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet")))
 47 |     stop("only discrete Bayesian networks can be exported into DSC format.")
 48 | 
 49 |   # open the file for writing.
 50 |   fd = file(description = file, open = "w")
 51 | 
 52 |   write.foreign.backend(fd, fitted = fitted, format = "dsc")
 53 | 
 54 |   close(fd)
 55 | 
 56 |   invisible(NULL)
 57 | 
 58 | }#WRITE.DSC
 59 | 
 60 | # read a NET file into a bn.fit object.
 61 | read.net = function(file, debug = FALSE) {
 62 | 
 63 |   # load the NET file into memory.
 64 |   lines = readLines(file)
 65 | 
 66 |   read.foreign.backend(lines, format = "net", filename = file, debug = debug)
 67 | 
 68 | }#READ.NET
 69 | 
 70 | # dump a bn.fit object into a NET file.
 71 | write.net = function(file, fitted) {
 72 | 
 73 |   check.fit(fitted)
 74 |   # only discrete bayesian networks are supported.
 75 |   if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet")))
 76 |     stop("only discrete Bayesian networks can be exported into DSC format.")
 77 | 
 78 |   # open the file for writing.
 79 |   fd = file(description = file, open = "w")
 80 | 
 81 |   write.foreign.backend(fd, fitted = fitted, format = "net")
 82 | 
 83 |   close(fd)
 84 | 
 85 |   invisible(NULL)
 86 | 
 87 | }#WRITE.NET
 88 | 
 89 | write.dot = function(file, graph) {
 90 | 
 91 |   check.bn.or.fit(graph)
 92 | 
 93 |   # open the file for writing.
 94 |   fd = file(description = file, open = "w")
 95 | 
 96 |   write.dot.backend(fd, graph = graph)
 97 | 
 98 |   close(fd)
 99 | 
100 |   invisible(NULL)
101 | 
102 | }#WRITE.DOT
103 | 
104 | 


--------------------------------------------------------------------------------
/src/minimal/tiers.c:
--------------------------------------------------------------------------------
  1 | #include "../include/rcore.h"
  2 | #include "../core/allocations.h"
  3 | #include "../include/bn.h"
  4 | #include "strings.h"
  5 | #include "common.h"
  6 | 
  7 | SEXP tiers(SEXP nodes, SEXP debug) {
  8 | 
  9 | int i = 0, j = 0, k = 0, narcs = 0, nnodes = 0, ntiers = length(nodes);
 10 | int *tier_size = NULL, tier_start = 0, cur = 0;
 11 | bool debugging = isTRUE(debug);
 12 | SEXP flattened, blacklist, temp;
 13 | 
 14 |   /* allocate the counters for tiers' sizes. */
 15 |   tier_size = Calloc1D(ntiers, sizeof(int));
 16 | 
 17 |   if (!isString(nodes)) {
 18 | 
 19 |     /* "node" is a list, each tier is an element. */
 20 |     for (i = ntiers - 1; i >= 0; i--) {
 21 | 
 22 |       temp = VECTOR_ELT(nodes, i);
 23 |       tier_size[i] = length(temp);
 24 |       nnodes += tier_size[i];
 25 |       narcs += (nnodes - tier_size[i]) * tier_size[i];
 26 | 
 27 |     }/*FOR*/
 28 | 
 29 |     /* flatten the tiers to keep manipulation later on simple. */
 30 |     PROTECT(flattened = allocVector(STRSXP, nnodes));
 31 | 
 32 |     for (i = 0, k = 0; i < ntiers; i++) {
 33 | 
 34 |       temp = VECTOR_ELT(nodes, i);
 35 | 
 36 |       for (j = 0; j < tier_size[i]; j++)
 37 |         SET_STRING_ELT(flattened, k++, STRING_ELT(temp, j));
 38 | 
 39 |     }/*FOR*/
 40 | 
 41 |   }/*THEN*/
 42 |   else {
 43 | 
 44 |     /* "node" is a character vector, which means that each node is in its own tier
 45 |      * and that there is no need to flatten it. */
 46 |     flattened = nodes;
 47 |     nnodes = length(nodes);
 48 |     for (i = 0; i < ntiers; i++)
 49 |       tier_size[i] = 1;
 50 | 
 51 |     /* the blacklist is the one resulting from a complete node ordering. */
 52 |     narcs = ntiers * (ntiers - 1) / 2;
 53 | 
 54 |   }/*ELSE*/
 55 | 
 56 |   /* allocate the return value. */
 57 |   PROTECT(blacklist = allocMatrix(STRSXP, narcs, 2));
 58 | 
 59 |   for (k = 0, i = 0; k < nnodes; k++) {
 60 | 
 61 |     temp = STRING_ELT(flattened, k);
 62 | 
 63 |     if (debugging)
 64 |       Rprintf("* current node is %s in tier %d, position %d of %d.\n",
 65 |         CHAR(temp), i + 1, k + 1, nnodes);
 66 | 
 67 |     for (j = tier_start + tier_size[i]; j < nnodes; j++) {
 68 | 
 69 |       if (debugging)
 70 |         Rprintf("  > blacklisting %s -> %s\n", CHAR(STRING_ELT(flattened, j)), CHAR(temp));
 71 | 
 72 |       SET_STRING_ELT(blacklist, cur, STRING_ELT(flattened, j));
 73 |       SET_STRING_ELT(blacklist, cur + narcs, temp);
 74 |       cur++;
 75 | 
 76 |     }/*FOR*/
 77 | 
 78 |     while (k >= tier_start + tier_size[i] - 1) {
 79 | 
 80 |       tier_start += tier_size[i++];
 81 | 
 82 |       if (i == ntiers)
 83 |         break;
 84 | 
 85 |     }/*WHILE*/
 86 | 
 87 |     if (i == ntiers)
 88 |       break;
 89 | 
 90 |   }/*FOR*/
 91 | 
 92 |   /* set the column names. */
 93 |   setDimNames(blacklist, R_NilValue, mkStringVec(2, "from", "to"));
 94 | 
 95 |   Free1D(tier_size);
 96 | 
 97 |   if (!isString(nodes))
 98 |     UNPROTECT(2);
 99 |   else
100 |     UNPROTECT(1);
101 | 
102 |   return blacklist;
103 | 
104 | }/*TIERS*/
105 | 
106 | 


--------------------------------------------------------------------------------
/man/graph.Rd:
--------------------------------------------------------------------------------
 1 | \name{graph utilities}
 2 | \alias{graph utilities}
 3 | \alias{acyclic}
 4 | \alias{directed}
 5 | \alias{valid.dag}
 6 | \alias{valid.cpdag}
 7 | \alias{valid.ug}
 8 | \alias{path.exists}
 9 | \alias{path}
10 | \alias{path,bn-method}
11 | \alias{path,bn.fit-method}
12 | \alias{path,bn.naive-method}
13 | \alias{path,bn.tan-method}
14 | \alias{skeleton}
15 | \alias{pdag2dag}
16 | \alias{subgraph}
17 | \title{Utilities to manipulate graphs}
18 | \description{
19 | 
20 |   Check and manipulate graph-related properties of an object of class \code{bn}.
21 | 
22 | }
23 | \usage{
24 | # check whether the graph is acyclic/completely directed.
25 | acyclic(x, directed = FALSE, debug = FALSE)
26 | directed(x)
27 | valid.dag(x, debug = FALSE)
28 | valid.cpdag(x, debug = FALSE)
29 | valid.ug(x, debug = FALSE)
30 | # check whether there is a path between two nodes.
31 | path.exists(x, from, to, direct = TRUE, underlying.graph = FALSE, debug = FALSE)
32 | # build the skeleton or a complete orientation of the graph.
33 | skeleton(x)
34 | pdag2dag(x, ordering)
35 | # build a subgraph spanning a subset of nodes.
36 | subgraph(x, nodes)
37 | }
38 | \arguments{
39 |   \item{x}{an object of class \code{bn}. \code{skeleton()}, \code{acyclic()},
40 |     \code{directed()} and \code{path.exixsts()} also accept objects of class
41 |     \code{bn.fit}.}
42 |   \item{from}{a character string, the label of a node.}
43 |   \item{to}{a character string, the label of a node (different from
44 |     \code{from}).}
45 |   \item{direct}{a boolean value. If \code{FALSE} ignore any arc between
46 |     \code{from} and \code{to} when looking for a path.}
47 |   \item{underlying.graph}{a boolean value. If \code{TRUE} the underlying
48 |     undirected graph is used instead of the (directed) one from the \code{x}
49 |     argument.}
50 |   \item{ordering}{the labels of all the nodes in the graph; their order is the
51 |     node ordering used to set the direction of undirected arcs.}
52 |   \item{nodes}{the labels of the nodes that induce the subgraph.}
53 |   \item{directed}{a boolean value. If \code{TRUE} only completely directed
54 |     cycles are considered; otherwise undirected arcs will also be considered
55 |     and treated as arcs present in both directions.}
56 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
57 |     printed; otherwise the function is completely silent.}
58 | }
59 | \value{
60 | 
61 |   \code{acyclic()}, \code{path()} and \code{directed()} return a boolean value. \cr
62 |   \code{skeleton()}, \code{pdag2dag()} and \code{subgraph()} return an object of
63 |   class \code{bn}.
64 | 
65 |   \code{valid.dag()}, \code{valid.cpdag()} and \code{valid.ug()} return a
66 |   logical value.
67 | 
68 | }
69 | \references{
70 | 
71 |   Bang-Jensen J, Gutin G (2009). \emph{Digraphs: Theory, Algorithms and
72 |     Applications}. Springer, 2nd edition.
73 | 
74 | }
75 | \examples{
76 | data(learning.test)
77 | cpdag = pc.stable(learning.test)
78 | 
79 | acyclic(cpdag)
80 | directed(cpdag)
81 | dag = pdag2dag(cpdag, ordering = LETTERS[1:6])
82 | dag
83 | directed(dag)
84 | skeleton(dag)
85 | }
86 | \author{Marco Scutari}
87 | \keyword{graphs}
88 | 


--------------------------------------------------------------------------------
/man/hc.Rd:
--------------------------------------------------------------------------------
 1 | \name{score-based algorithms}
 2 | \alias{score-based algorithms}
 3 | \alias{hc}
 4 | \alias{tabu}
 5 | \title{Score-based structure learning algorithms}
 6 | \description{
 7 | 
 8 |   Learn the structure of a Bayesian network using a hill-climbing (HC) or a
 9 |   Tabu search (TABU) greedy search.
10 | 
11 | }
12 | \usage{
13 | hc(x, start = NULL, whitelist = NULL, blacklist = NULL, score = NULL, ...,
14 |   debug = FALSE, restart = 0, perturb = 1, max.iter = Inf, maxp = Inf, optimized = TRUE)
15 | tabu(x, start = NULL, whitelist = NULL, blacklist = NULL, score = NULL, ...,
16 |   debug = FALSE, tabu = 10, max.tabu = tabu, max.iter = Inf, maxp = Inf, optimized = TRUE)
17 | }
18 | \arguments{
19 |   \item{x}{a data frame containing the variables in the model.}
20 |   \item{start}{an object of class \code{bn}, the preseeded directed acyclic
21 |     graph used to initialize the algorithm. If none is specified, an empty one
22 |     (i.e. without any arc) is used.}
23 |   \item{whitelist}{a data frame with two columns (optionally labeled "from" and
24 |     "to"), containing a set of arcs to be included in the graph.}
25 |   \item{blacklist}{a data frame with two columns (optionally labeled "from" and
26 |     "to"), containing a set of arcs not to be included in the graph.}
27 |   \item{score}{a character string, the label of the network score to be used in
28 |     the algorithm. If none is specified, the default score is the \emph{Bayesian
29 |     Information Criterion} for both discrete and continuous data sets. See
30 |     \code{\link{network scores}} for details.}
31 |   \item{\dots}{additional tuning parameters for the network score. See
32 |     \code{\link{score}} for details.}
33 |   \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is
34 |     printed; otherwise the function is completely silent.}
35 |   \item{restart}{an integer, the number of random restarts.}
36 |   \item{tabu}{a positive integer number, the length of the tabu list used in the
37 |     \code{tabu} function.}
38 |   \item{max.tabu}{a positive integer number, the iterations tabu search can
39 |     perform without improving the best network score.}
40 |   \item{perturb}{an integer, the number of attempts to randomly
41 |     insert/remove/reverse an arc on every random restart.}
42 |   \item{max.iter}{an integer, the maximum number of iterations.}
43 |   \item{maxp}{the maximum number of parents allowed for a node in any network
44 |     that is considered in the search, including that that is returned. The
45 |     default value is \code{Inf}.}
46 |   \item{optimized}{a boolean value. If \code{TRUE} (the default), score caching
47 |     is used to speed up structure learning.}
48 | }
49 | \note{
50 | 
51 |   See \code{\link{structure learning}} for a complete list of structure learning
52 |   algorithms with the respective references.
53 | 
54 | }
55 | \value{
56 | 
57 |   An object of class \code{bn}. See \code{\link{bn-class}} for details.
58 | 
59 | }
60 | \author{Marco Scutari}
61 | \seealso{\code{\link{network scores}}, \link{constraint-based algorithms},
62 |   \link{hybrid algorithms}, \link{local discovery algorithms},
63 |   \link{alpha.star}.}
64 | \keyword{structure learning}
65 | 


--------------------------------------------------------------------------------
/src/fitted/fitted.h:
--------------------------------------------------------------------------------
 1 | #ifndef BN_FIT_OBJECTS_HEADER
 2 | #define BN_FIT_OBJECTS_HEADER
 3 | 
 4 | /* enum for fitted node types, to match the class in the R objects. */
 5 | typedef enum {
 6 |   ENOFIT    =  0, /* error code, no such node type. */
 7 |   DNODE     =  1, /* categorical node. */
 8 |   ONODE     =  2, /* ordinal node. */
 9 |   GNODE     =  3, /* Gaussian node. */
10 |   CGNODE    =  4  /* conditional Gaussian node. */
11 | } fitted_node_e;
12 | 
13 | /* enum for fitted network types, to match the class in the R objects. */
14 | typedef enum {
15 |   ENONET    =  0, /* error code, no such network type. */
16 |   DNET      =  1, /* discrete Bayesian networks. */
17 |   ONET      =  2, /* ordinal Bayesian networks. */
18 |   DONET     =  3, /* mixed categorical and ordinal nodes. */
19 |   GNET      =  4, /* Gaussian Bayesian networks. */
20 |   CGNET     =  5  /* conditional Gaussian networks. */
21 | } fitted_net_e;
22 | 
23 | fitted_node_e fitted_node_to_enum(SEXP object);
24 | fitted_net_e fitted_net_to_enum(SEXP object);
25 | 
26 | /* a local distribution, meant to be embedded in the fitted_bn struct below. */
27 | typedef struct {
28 | 
29 |   int nparents;      /* number of parents. */
30 |   int *parents;      /* indexes of the parent nodes. */
31 | 
32 |   union {
33 | 
34 |     struct {
35 | 
36 |       int ndims;       /* number of dimensions of the CPT. */
37 |       int *dims;       /* dimensions of the CPT. */
38 |       int nconfigs;    /* number of parents configurations. */
39 |       double *cpt;     /* conditional probability table. */
40 | 
41 |     } d;
42 | 
43 |     struct {
44 | 
45 |       int ncoefs;      /* number of regression coefficients. */
46 |       double *coefs;   /* regression coefficients. */
47 |       double sd;       /* standard error of the residuals. */
48 | 
49 |     } g;
50 | 
51 |     struct {
52 | 
53 |       int ndparents;   /* number of discrete parents. */
54 |       int ngparents;   /* number of continuous parents. */
55 |       int *dparents;   /* indexes of the discrete parents in the parents array. */
56 |       int *gparents;   /* indexes of the continuous parents in the parents array. */
57 |       int ncoefs;      /* number of regression coefficients. */
58 |       int nconfigs;    /* number of discrete parents configurations. */
59 |       double *coefs;   /* regression coefficients. */
60 |       double *sd;      /* standard errors of the residuals. */
61 | 
62 |     } cg;
63 | 
64 |   };
65 | 
66 | } ldist;
67 | 
68 | /* a fitted Bayesian network, mapping to the bn.fit class in R code. */
69 | typedef struct {
70 | 
71 |   fitted_net_e type;           /* network type (discrete, Gaussian, etc.). */
72 |   int nnodes;                  /* number of nodes in the network. */
73 |   const char **labels;         /* node labels. */
74 |   fitted_node_e *node_types;   /* node types (discrete, Gaussian, etc.). */
75 |   ldist *ldists;               /* local distributions for the nodes. */
76 | 
77 | } fitted_bn;
78 | 
79 | fitted_bn fitted_network_from_SEXP(SEXP fitted);
80 | void print_fitted_network(fitted_bn);
81 | void FreeFittedBN(fitted_bn bn);
82 | 
83 | double nparams_fitted_node(ldist ld, fitted_node_e type);
84 | 
85 | #endif
86 | 


--------------------------------------------------------------------------------