├── data ├── alarm.rda ├── asia.rda ├── marks.rda ├── coronary.rda ├── lizards.rda ├── hailfinder.rda ├── insurance.rda ├── gaussian.test.rda ├── learning.test.rda └── clgaussian.test.rda ├── src ├── minimal │ ├── table.h │ ├── unique.h │ ├── strings.h │ ├── data.frame.h │ ├── common.h │ ├── unique.c │ ├── strings.c │ └── tiers.c ├── include │ ├── learning.h │ ├── bn.h │ ├── sampling.h │ ├── globals.h │ ├── rcore.h │ └── graph.h ├── sanitization │ ├── data.h │ └── covariance.c ├── core │ ├── sort.h │ ├── math.functions.h │ ├── sets.h │ ├── correlation.h │ ├── sampling.h │ ├── moments.h │ ├── sort.c │ ├── allocations.h │ ├── covariance.matrix.h │ ├── uppertriangular.c │ ├── uppertriangular.h │ ├── allocations.c │ └── contingency.tables.h ├── preprocessing │ ├── enums.c │ └── preprocessing.h ├── inference │ ├── rinterface │ │ ├── likelihood.weighting.c │ │ ├── rbn.c │ │ └── cpdist.c │ ├── loglikelihood │ │ ├── loglikelihood.h │ │ └── common.c │ └── likelihood.weighting.c ├── tests │ ├── gaussian │ │ ├── df.adjust.c │ │ └── gaussian.tests.c │ ├── rinterface │ │ ├── indep.test.c │ │ └── htest.c │ ├── enums.c │ ├── omnibus │ │ └── custom.test.c │ └── discrete │ │ └── df.adjust.c ├── graphs │ ├── graphs.h │ ├── completely.directed.c │ ├── rinterface │ │ ├── path.matrix.c │ │ ├── connected.components.c │ │ └── sid.c │ └── connected.components.c ├── fitted │ ├── rinterface │ │ └── nparams.c │ ├── enums.c │ ├── nparams.c │ └── fitted.h ├── test.counter.c ├── parameters │ ├── parameters.h │ ├── discrete │ │ └── classic.discrete.c │ └── rinterface │ │ └── classic.discrete.c ├── foreign │ └── parse.c ├── arcs │ ├── bind.c │ └── arcs2amat.c ├── scores │ ├── enum.c │ ├── custom.score.c │ ├── nml.regret.c │ └── nml.regret.table.c ├── math │ └── linear.algebra.h └── bnlearn │ ├── nparams.c │ └── shd.c ├── R ├── utils-amat.R ├── likelihood.R ├── nparams.R ├── refit.lm.R ├── utils-elist.R ├── sanitization-formula.R ├── utils-tests.R ├── frontend-averaging.R ├── aracne.R ├── simulation.R ├── utils-plot.R ├── sanitization-mutual.R ├── scores.R ├── chow.liu.R ├── utils-cluster.R ├── utils-print.R ├── frontend-amat.R ├── backend-score.R ├── test.R ├── sanitization-enumeration.R ├── frontend-formula.R ├── data.preprocessing.R ├── sanitization-amat.R ├── sanitization-classifiers.R ├── frontend-queries.R ├── averaged.network.R ├── classifiers.R ├── sanitization-misc.R ├── frontend-causal.R ├── sanitization-bootstrap.R ├── bootstrap.R ├── frontend-mvnorm.R ├── graph-generation.R └── frontend-foreign.R ├── man ├── pcalg.Rd ├── test.counter.Rd ├── gaussian-test.Rd ├── clgaussian-test.Rd ├── ordering.Rd ├── configs.Rd ├── dsep.Rd ├── igraphpkg.Rd ├── plot.bn.strength.Rd ├── bayesian.network.classifiers.Rd ├── graphpkg.Rd ├── learning-test.Rd ├── alpha.star.Rd ├── statspkg.Rd ├── mvnorm.Rd ├── marks.Rd ├── mi.matrix.Rd ├── bn.strength-class.Rd ├── kl.Rd ├── plot.bn.Rd ├── nodeops.Rd ├── lizards.Rd ├── rbn.Rd ├── gRain.Rd ├── bf.Rd ├── foreign.Rd ├── bn.fit.plots.Rd ├── bnboot.Rd ├── bn.kcv.class.Rd ├── count.graphs.Rd ├── modelstring.Rd ├── blacklist.Rd ├── coronary.Rd ├── whitelists.and.blacklists.Rd ├── hybrid.Rd ├── asia.Rd ├── graph.Rd └── hc.Rd └── DESCRIPTION /data/alarm.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/alarm.rda -------------------------------------------------------------------------------- /data/asia.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/asia.rda -------------------------------------------------------------------------------- /data/marks.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/marks.rda -------------------------------------------------------------------------------- /data/coronary.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/coronary.rda -------------------------------------------------------------------------------- /data/lizards.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/lizards.rda -------------------------------------------------------------------------------- /data/hailfinder.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/hailfinder.rda -------------------------------------------------------------------------------- /data/insurance.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/insurance.rda -------------------------------------------------------------------------------- /data/gaussian.test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/gaussian.test.rda -------------------------------------------------------------------------------- /data/learning.test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/learning.test.rda -------------------------------------------------------------------------------- /data/clgaussian.test.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/bnlearn/HEAD/data/clgaussian.test.rda -------------------------------------------------------------------------------- /src/minimal/table.h: -------------------------------------------------------------------------------- 1 | #ifndef TABLE_HEADER 2 | #define TABLE_HEADER 3 | 4 | SEXP minimal_table(SEXP dataframe, SEXP missing); 5 | 6 | #endif 7 | -------------------------------------------------------------------------------- /src/minimal/unique.h: -------------------------------------------------------------------------------- 1 | #ifndef R_UNIQUE_HEADER 2 | #define R_UNIQUE_HEADER 3 | 4 | SEXP unique(SEXP array); 5 | SEXP dupe(SEXP array); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/include/learning.h: -------------------------------------------------------------------------------- 1 | 2 | /* shared between hill climbing and tabu search. */ 3 | void bestop_update(SEXP bestop, char *op, const char *from, const char *to); 4 | 5 | -------------------------------------------------------------------------------- /src/sanitization/data.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_SANITIZATION_HEADER 2 | #define DATA_SANITIZATION_HEADER 3 | 4 | SEXP data_type(SEXP data); 5 | SEXP count_observed_values(SEXP data); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/core/sort.h: -------------------------------------------------------------------------------- 1 | #ifndef SORTING_HEADER 2 | #define SORTING_HEADER 3 | 4 | void i_sort(int *array, int *indexes, int length); 5 | void d_sort(double *array, int *indexes, int length); 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/include/bn.h: -------------------------------------------------------------------------------- 1 | 2 | /* from cache.structure.c */ 3 | SEXP cache_structure(SEXP nodes, SEXP amat, SEXP debug); 4 | SEXP cache_node_structure(int cur, SEXP nodes, int *amat, int nrow, 5 | int *status, bool debugging); 6 | -------------------------------------------------------------------------------- /src/minimal/strings.h: -------------------------------------------------------------------------------- 1 | #ifndef R_STRINGS_HEADER 2 | #define R_STRINGS_HEADER 3 | 4 | SEXP string_delete(SEXP array, SEXP string, int *idx); 5 | SEXP string_setdiff(SEXP large, SEXP small); 6 | SEXP mkStringVec(int n, ...); 7 | 8 | #endif 9 | -------------------------------------------------------------------------------- /src/include/sampling.h: -------------------------------------------------------------------------------- 1 | #include "../core/contingency.tables.h" 2 | 3 | /* from rbn.c */ 4 | void c_rbn_master(SEXP fitted, SEXP result, SEXP n, SEXP fix, bool add_metadata, 5 | bool debugging); 6 | 7 | /* from likelihood.weighting.c */ 8 | void c_lw_weights(SEXP fitted, SEXP data, int n, double *w, SEXP keep, 9 | bool debugging); 10 | -------------------------------------------------------------------------------- /src/minimal/data.frame.h: -------------------------------------------------------------------------------- 1 | #ifndef DATA_FRAME_HEADER 2 | #define DATA_FRAME_HEADER 3 | 4 | SEXP minimal_data_frame(SEXP obj); 5 | SEXP c_dataframe_column(SEXP dataframe, SEXP name, bool drop, bool keep_names); 6 | SEXP node2df(SEXP target, int n); 7 | SEXP fitnode2df(SEXP fitted, SEXP node, int n); 8 | SEXP fit2df(SEXP fitted, int n); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /src/core/math.functions.h: -------------------------------------------------------------------------------- 1 | #ifndef MATH_FUNCTIONS_HEADER 2 | #define MATH_FUNCTIONS_HEADER 3 | 4 | int imax(int x, int y); 5 | int all_max(double *array, int length, int *maxima, int *indexes, double *buf); 6 | int i_which_max(int *array, int length); 7 | int d_which_max(double *array, int length); 8 | int ld_which_max(long double *array, int length); 9 | 10 | #endif 11 | -------------------------------------------------------------------------------- /src/preprocessing/enums.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "preprocessing.h" 3 | 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value; 5 | 6 | discretization_e discretization_to_enum(const char *label) { 7 | 8 | ENTRY("quantile", QUANTILE); 9 | ENTRY("interval", INTERVAL); 10 | ENTRY("hartemink", HARTEMINK); 11 | 12 | return ENOMETHOD; 13 | 14 | }/*DISCRETIZATION_TO_ENUM*/ 15 | -------------------------------------------------------------------------------- /R/utils-amat.R: -------------------------------------------------------------------------------- 1 | 2 | # convert a set of arcs to a (real) adjacency matrix. 3 | arcs2amat = function(arcs, nodes) { 4 | 5 | .Call(call_arcs2amat, 6 | arcs = as.character(arcs), 7 | nodes = as.character(nodes)) 8 | 9 | }#ARCS2AMAT 10 | 11 | # convert an adjacency matrix back to a set of arcs. 12 | amat2arcs = function(a, nodes) { 13 | 14 | .Call(call_amat2arcs, 15 | amat = a, 16 | nodes = nodes) 17 | 18 | }#AMAT2ARCS 19 | 20 | -------------------------------------------------------------------------------- /src/core/sets.h: -------------------------------------------------------------------------------- 1 | #ifndef SETS_HEADER 2 | #define SETS_HEADER 3 | 4 | void cfg(SEXP parents, int *configurations, int *nlevels); 5 | void c_fast_config(int **columns, int nrow, int ncol, int *levels, 6 | int *configurations, int *nlevels, int offset); 7 | SEXP c_configurations(SEXP parents, int factor, int all_levels); 8 | 9 | void first_subset(int *work, int n, int offset); 10 | int next_subset(int *work, int n, int max, int offset); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /src/core/correlation.h: -------------------------------------------------------------------------------- 1 | #ifndef CORRELATION_HEADER 2 | #define CORRELATION_HEADER 3 | 4 | #include "covariance.matrix.h" 5 | 6 | double c_fast_cor(double *xx, double *yy, int num, double xm, double ym, 7 | long double xsd, long double ysd); 8 | double c_cor_with_missing(double *x, double *y, int nobs, double *xm, 9 | double *ym, double *xsd, double *ysd, int *ncomplete); 10 | double c_fast_pcor(covariance cov, int v1, int v2, int *err, bool decomp); 11 | 12 | #endif 13 | -------------------------------------------------------------------------------- /R/likelihood.R: -------------------------------------------------------------------------------- 1 | 2 | # compute the log-likelihood of some data for a given network. 3 | loglikelihood = function(fitted, data, by.sample = FALSE, keep = names(fitted), 4 | propagate.missing = FALSE, debug = FALSE) { 5 | 6 | .Call(call_loglikelihood_function, 7 | fitted = fitted, 8 | data = data, 9 | by.sample = by.sample, 10 | keep.nodes = keep, 11 | propagate.missing = propagate.missing, 12 | debug = debug) 13 | 14 | }#LOGLIKELIHOOD 15 | -------------------------------------------------------------------------------- /src/core/sampling.h: -------------------------------------------------------------------------------- 1 | #ifndef SAMPLING_HEADER 2 | #define SAMPLING_HEADER 3 | 4 | void SampleNoReplace(int k, int n, int *y, int *x); 5 | #define RandomPermutation(n, y, x) SampleNoReplace(n, n, y, x) 6 | void SampleReplace(int k, int n, int *y, int *x); 7 | void ProbSampleReplace(int n, double *probs, int *values, int ns, int *samples); 8 | void CondProbSampleReplace(int nprobs, int nconf, double *probs, int *conf, 9 | int *values, int ns, int *samples, bool *warn); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /R/nparams.R: -------------------------------------------------------------------------------- 1 | 2 | # number of parameters for a DAG and an associated data set. 3 | nparams.backend = function(x, data, debug = FALSE) { 4 | 5 | .Call(call_nparams_cgnet, 6 | graph = x, 7 | data = .data.frame.column(data, names(x$nodes)), 8 | debug = debug) 9 | 10 | }#NPARAMS.BACKEND 11 | 12 | # number of parameters for a fitted network. 13 | nparams.fitted = function(x, debug = FALSE) { 14 | 15 | .Call(call_nparams_fitted, 16 | bn = x, 17 | debug = debug) 18 | 19 | }#NPARAMS.FITTED 20 | 21 | -------------------------------------------------------------------------------- /src/inference/rinterface/likelihood.weighting.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../include/sampling.h" 3 | 4 | /* compute the weights of the likelihood weighting particles. */ 5 | SEXP lw_weights(SEXP fitted, SEXP data, SEXP keep, SEXP debug) { 6 | 7 | int n = length(VECTOR_ELT(data, 0)); 8 | SEXP weights; 9 | 10 | PROTECT(weights = allocVector(REALSXP, n)); 11 | 12 | c_lw_weights(fitted, data, n, REAL(weights), keep, isTRUE(debug)); 13 | 14 | UNPROTECT(1); 15 | 16 | return weights; 17 | 18 | }/*LW_WEIGHTS*/ 19 | 20 | -------------------------------------------------------------------------------- /src/core/moments.h: -------------------------------------------------------------------------------- 1 | #ifndef MOMENTS_HEADER 2 | #define MOMENTS_HEADER 3 | 4 | double c_sse(double *data, double mean, int nrow); 5 | double c_mean(double *data, int nrow); 6 | 7 | void c_meanvec(double **data, double *mean, int nrow, int ncol, int first); 8 | void c_ssevec(double **data, double *sse, double *means, int nrow, int ncol, 9 | int first); 10 | 11 | void c_sd(double *xx, int nobs, int p, double mean, double *sd); 12 | void c_cgsd(double *xx, int *z, int *nz, int nobs, int nstrata, int p, 13 | long double *means, double *sd); 14 | 15 | #endif 16 | -------------------------------------------------------------------------------- /R/refit.lm.R: -------------------------------------------------------------------------------- 1 | 2 | # backend for the as.lm() functions. 3 | lm.refit.node = function(node, data) { 4 | 5 | # construct the model formula, inserting the intercept term as needed. 6 | if (length(node$parents) == 0) 7 | model = paste(node$node, "~ 1") 8 | else 9 | model = paste(node$node, "~", paste(node$parents, collapse = "+")) 10 | # fit the model. 11 | lm.fit = lm(model, data = data) 12 | # replace the formula in the call to lm() for legibility. 13 | lm.fit$call$formula = formula(model) 14 | 15 | return(lm.fit) 16 | 17 | }#LM.REFIT.NODE 18 | 19 | -------------------------------------------------------------------------------- /R/utils-elist.R: -------------------------------------------------------------------------------- 1 | 2 | # convert a set of arcs to a (real) edge list. 3 | arcs2elist = function(arcs, nodes, weights = NULL, nid = TRUE, sublist = TRUE, 4 | parents = FALSE) { 5 | 6 | .Call(call_arcs2elist, 7 | arcs = arcs, 8 | nodes = nodes, 9 | weigths = weights, 10 | nid = nid, 11 | sublist = sublist, 12 | parents = parents) 13 | 14 | }#ARCS2ELIST 15 | 16 | # convert an edge list into an arc set. 17 | elist2arcs = function(elist) { 18 | 19 | .Call(call_elist2arcs, 20 | elist = elist) 21 | 22 | }#ELIST2ARCS 23 | 24 | -------------------------------------------------------------------------------- /src/tests/gaussian/df.adjust.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../tests.h" 3 | 4 | double gaussian_cdf(test_e test, int num, int nz) { 5 | 6 | double df = 0; 7 | 8 | switch(test) { 9 | 10 | case COR: 11 | df = num - nz - 2; 12 | break; 13 | 14 | case MI_G: 15 | case MI_G_SH: 16 | df = 1; 17 | break; 18 | 19 | case ZF: 20 | df = num - nz - 3; 21 | break; 22 | 23 | default: 24 | error("no degrees of freedom for this test."); 25 | 26 | }/*SWITCH*/ 27 | 28 | return df; 29 | 30 | }/*GAUSSIAN_CDF*/ 31 | -------------------------------------------------------------------------------- /src/minimal/common.h: -------------------------------------------------------------------------------- 1 | #ifndef R_COMMON_HEADER 2 | #define R_COMMON_HEADER 3 | 4 | /* macro for the number of levels of a factor. */ 5 | #define NLEVELS(x) length(getAttrib(x, R_LevelsSymbol)) 6 | 7 | /* from common.c */ 8 | bool c_is(SEXP obj, const char *str); 9 | SEXP getListElement(SEXP list, char *str); 10 | void *DATAPTR(SEXP x); 11 | SEXP mkReal(double x); 12 | SEXP mkRealVec(int n, ...); 13 | SEXP int2fac(SEXP vector, int *nlevels); 14 | void setDimNames(SEXP obj, SEXP rownames, SEXP colnames); 15 | SEXP subset_by_name(SEXP vec, int n, ...); 16 | bool all_equal(SEXP vec, SEXP val); 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /src/inference/rinterface/rbn.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../include/sampling.h" 3 | #include "../../minimal/data.frame.h" 4 | 5 | /* generate random observations from a bayesian network. */ 6 | SEXP rbn_master(SEXP fitted, SEXP n, SEXP fix, SEXP add_metadata, SEXP debug) { 7 | 8 | SEXP result; 9 | 10 | /* allocate the return value. */ 11 | PROTECT(result = fit2df(fitted, INT(n))); 12 | /* generate the random observations. */ 13 | c_rbn_master(fitted, result, n, fix, isTRUE(add_metadata), isTRUE(debug)); 14 | 15 | UNPROTECT(1); 16 | 17 | return result; 18 | 19 | }/*RBN_MASTER*/ 20 | 21 | -------------------------------------------------------------------------------- /src/tests/gaussian/gaussian.tests.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | 3 | /* correlation transform for the exact t-test. */ 4 | double cor_t_trans(double cor, double df) { 5 | 6 | return cor * sqrt(df) / sqrt(1 - cor * cor); 7 | 8 | }/*COR_T_TRANS*/ 9 | 10 | /* correlation transform for Fizher's Z test. */ 11 | double cor_zf_trans(double cor, double df) { 12 | 13 | return log((1 + cor)/(1 - cor)) / 2 * sqrt(df); 14 | 15 | }/*COR_ZF_TRANS*/ 16 | 17 | /* correlation transform for mutual information. */ 18 | double cor_mi_trans(double cor) { 19 | 20 | return - 0.5 * log(1 - cor * cor); 21 | 22 | }/*COR_MI_TRANS*/ 23 | 24 | -------------------------------------------------------------------------------- /src/graphs/graphs.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPHICAL_FUNCTIONS_HEADER 2 | #define GRAPHICAL_FUNCTIONS_HEADER 3 | 4 | int ug_connected_components(int *amat, char **labels, int nnodes, int **buffer, 5 | int *buflen, bool debugging); 6 | void dag_path_matrix(int *amat, int *pathmat, int nnodes, char **labels, 7 | int *root_ids, int nroots, bool debugging); 8 | void initialize_reachability_matrix(int *amat, int *pmat, int nnodes, 9 | int target, bool *cond_set, int *rmat, char **labels, bool debugging); 10 | void complete_reachability_matrix(int *initial_rmat, int *final_rmat, 11 | int nnodes, char **labels, bool debugging); 12 | 13 | #endif 14 | -------------------------------------------------------------------------------- /src/include/globals.h: -------------------------------------------------------------------------------- 1 | 2 | /* the global test counter, now living in C-land. */ 3 | extern double test_counter; 4 | 5 | /* symbols used to set object attributes. */ 6 | extern SEXP BN_ModelstringSymbol; 7 | extern SEXP BN_NodesSymbol; 8 | extern SEXP BN_ProbSymbol; 9 | extern SEXP BN_MethodSymbol; 10 | extern SEXP BN_WeightsSymbol; 11 | extern SEXP BN_DsepsetSymbol; 12 | extern SEXP BN_MetaDataSymbol; 13 | extern SEXP BN_NobsSymbol; 14 | extern SEXP BN_DfSymbol; 15 | extern SEXP BN_CutpointsSymbol; 16 | 17 | /* shortcuts to boolean SEXPs. */ 18 | extern SEXP TRUESEXP, FALSESEXP; 19 | 20 | /* numerical constants */ 21 | #define MACHINE_TOL sqrt(DBL_EPSILON) 22 | 23 | -------------------------------------------------------------------------------- /R/sanitization-formula.R: -------------------------------------------------------------------------------- 1 | 2 | # check the string representation (aka the formula) of a network. 3 | check.modelstring = function(string) { 4 | 5 | # check the type. 6 | if (!is.string(string)) 7 | stop("string must be a character string.") 8 | 9 | # check the syntax (separate regexps for root nodes and non-root ndoes). 10 | correct.format = paste("^(", 11 | "\\[[^\\[\\]\\|:]+?\\]", 12 | "|", 13 | "\\[[^\\[\\]\\|:]+?\\|[^\\[\\]\\|:]+?([:]{0,1}[^\\[\\]\\|:])*?\\]", 14 | ")+$", sep = "") 15 | 16 | if (!grepl(correct.format, string, perl = TRUE)) 17 | stop("malformed model string format (see ?modelstring).") 18 | 19 | }#CHECK.MODELSTRING 20 | 21 | -------------------------------------------------------------------------------- /src/include/rcore.h: -------------------------------------------------------------------------------- 1 | #define USE_FC_LEN_T 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | /* for backwards compatibility with older R versions. */ 11 | #ifndef MAYBE_REFERENCED 12 | #define MAYBE_REFERENCED(x) (NAMED(x) > 0) 13 | #endif 14 | 15 | /* utility macros. */ 16 | #define isTRUE(logical) ((LOGICAL(logical)[0]) == TRUE) 17 | #define INT(x) INTEGER(x)[0] 18 | #define NUM(x) REAL(x)[0] 19 | #define NODE(i) CHAR(STRING_ELT(nodes, i)) 20 | #define MIN(a,b) (((a)<(b))?(a):(b)) 21 | #define MAX(a,b) (((a)>(b))?(a):(b)) 22 | 23 | -------------------------------------------------------------------------------- /src/core/sort.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | 3 | /* quick sort of an integer vector, with or without indexes. */ 4 | void i_sort(int *array, int *indexes, int length) { 5 | 6 | if (length == 0) 7 | return; 8 | 9 | if (!indexes) 10 | R_qsort_int(array, 1, length); 11 | else 12 | R_qsort_int_I(array, indexes, 1, length); 13 | 14 | }/*I_SORT*/ 15 | 16 | /* quick sort of a double vector, with or without indexes. */ 17 | void d_sort(double *array, int *indexes, int length) { 18 | 19 | if (length == 0) 20 | return; 21 | 22 | if (!indexes) 23 | R_qsort(array, 1, length); 24 | else 25 | R_qsort_I(array, indexes, 1, length); 26 | 27 | }/*D_SORT*/ 28 | -------------------------------------------------------------------------------- /src/fitted/rinterface/nparams.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../fitted/fitted.h" 3 | 4 | /* compute the number of parameters of a fitted model. */ 5 | SEXP nparams_fitted(SEXP fitted, SEXP debug) { 6 | 7 | double node_params = 0, all_params = 0; 8 | fitted_bn bn = fitted_network_from_SEXP(fitted); 9 | 10 | for (int i = 0; i < bn.nnodes; i++) { 11 | 12 | node_params = nparams_fitted_node(bn.ldists[i], bn.node_types[i]); 13 | 14 | if (isTRUE(debug)) 15 | Rprintf("* node %s has %.0lf parameter(s).\n", bn.labels[i], node_params); 16 | 17 | all_params += node_params; 18 | 19 | }/*FOR*/ 20 | 21 | FreeFittedBN(bn); 22 | 23 | return ScalarReal(all_params); 24 | 25 | }/*NPARAMS_FITTED*/ 26 | 27 | -------------------------------------------------------------------------------- /man/pcalg.Rd: -------------------------------------------------------------------------------- 1 | \name{pcalg integration} 2 | \alias{pcalg integration} 3 | \alias{as.bn.pcAlgo} 4 | \title{Import and export networks from the pcalg package} 5 | \description{ 6 | 7 | Convert \code{pcAlgo} objects to \code{bn} objects. 8 | 9 | } 10 | \usage{ 11 | \method{as.bn}{pcAlgo}(x, ..., check.cycles = TRUE) 12 | } 13 | \arguments{ 14 | \item{x}{an object of class \code{pcAlgo}.} 15 | \item{\dots}{extra arguments from the generic method (currently ignored).} 16 | \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will 17 | not be checked for cycles.} 18 | } 19 | \value{ 20 | 21 | An object of class \code{bn}. 22 | 23 | } 24 | \author{Marco Scutari} 25 | \keyword{utilities} 26 | \keyword{interfaces to other packages} 27 | -------------------------------------------------------------------------------- /man/test.counter.Rd: -------------------------------------------------------------------------------- 1 | \name{test counter} 2 | \alias{test.counter} 3 | \alias{increment.test.counter} 4 | \alias{reset.test.counter} 5 | \title{Manipulating the test counter} 6 | \description{ 7 | 8 | Check, increment or reset the test/score counter used in structure learning 9 | algorithms. 10 | 11 | } 12 | \usage{ 13 | test.counter() 14 | increment.test.counter(i = 1) 15 | reset.test.counter() 16 | } 17 | \arguments{ 18 | \item{i}{a numeric value, which is added to the test counter.} 19 | } 20 | \value{ 21 | 22 | A numeric value, the current value of the test counter. 23 | 24 | } 25 | \examples{ 26 | data(learning.test) 27 | hc(learning.test) 28 | test.counter() 29 | reset.test.counter() 30 | test.counter() 31 | } 32 | \author{Marco Scutari} 33 | \keyword{convenience functions} 34 | -------------------------------------------------------------------------------- /src/tests/rinterface/indep.test.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../tests.h" 3 | 4 | /* independence tests, frontend to be used in R code. */ 5 | SEXP indep_test(SEXP x, SEXP y, SEXP sx, SEXP data, SEXP test, SEXP alpha, 6 | SEXP extra_args, SEXP learning, SEXP complete) { 7 | 8 | /* if either node to test is not provided, return a zero-length numeric 9 | * vector. */ 10 | if (length(x) == 0 || length(y) == 0) 11 | return allocVector(REALSXP, 0); 12 | 13 | /* filter for NULL and empty strings to make it easy to interface with R. */ 14 | if (length(sx) == 0 || sx == R_NilValue) 15 | return utest(x, y, data, test, alpha, extra_args, learning, complete); 16 | else 17 | return ctest(x, y, sx, data, test, alpha, extra_args, learning, complete); 18 | 19 | }/*INDEP_TEST*/ 20 | -------------------------------------------------------------------------------- /src/core/allocations.h: -------------------------------------------------------------------------------- 1 | #ifndef ALLOCATIONS_HEADER 2 | #define ALLOCATIONS_HEADER 3 | 4 | /* memory allocation. */ 5 | void *Calloc1D(size_t R, size_t size); 6 | void **Calloc2D(size_t R, size_t C, size_t size); 7 | void ***Calloc3D(size_t R, size_t C, size_t L, size_t size); 8 | void *Realloc1D(void *p, size_t R, size_t size); 9 | void BN_Free1D(void *p); 10 | void BN_Free2D(void **p, size_t R); 11 | void BN_Free3D(void ***p, size_t R, size_t C); 12 | 13 | #define Free1D(p) \ 14 | do { \ 15 | BN_Free1D((void *)p); \ 16 | p = NULL; \ 17 | } while(0) 18 | #define Free2D(p, R) \ 19 | do { \ 20 | BN_Free2D((void **)p, R); \ 21 | p = NULL; \ 22 | } while (0) 23 | #define Free3D(p, R, C) \ 24 | do { \ 25 | BN_Free3D((void ***)p, R, C); \ 26 | p = NULL; \ 27 | } while (0) 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /man/gaussian-test.Rd: -------------------------------------------------------------------------------- 1 | \name{gaussian.test} 2 | \docType{data} 3 | \alias{gaussian.test} 4 | \title{Synthetic (continuous) data set to test learning algorithms} 5 | \description{ 6 | 7 | This a synthetic data set used as a test case in the \pkg{bnlearn} package. 8 | 9 | } 10 | \usage{ 11 | data(gaussian.test) 12 | } 13 | \format{ 14 | 15 | The \code{gaussian.test} data set contains seven normal (Gaussian) variables. 16 | 17 | } 18 | \note{ 19 | 20 | The R script to generate data from this network is available from 21 | \url{https://www.bnlearn.com/documentation/networks/}. 22 | 23 | } 24 | \examples{ 25 | # load the data. 26 | data(gaussian.test) 27 | # create and plot the network structure. 28 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]") 29 | \dontrun{graphviz.plot(dag)} 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /src/test.counter.c: -------------------------------------------------------------------------------- 1 | #include "include/rcore.h" 2 | 3 | /* initialize the global test counter. */ 4 | double test_counter = 0; 5 | 6 | /* increment the global test counter. */ 7 | SEXP increment_test_counter(SEXP n) { 8 | 9 | switch(TYPEOF(n)) { 10 | 11 | case REALSXP: 12 | test_counter += NUM(n); 13 | break; 14 | 15 | case INTSXP: 16 | test_counter += INT(n); 17 | break; 18 | 19 | }/*SWITCH*/ 20 | 21 | return R_NilValue; 22 | 23 | }/*INCREMENT_TEST_COUNTER*/ 24 | 25 | /* reset the global test counter. */ 26 | SEXP reset_test_counter(void) { 27 | 28 | test_counter = 0; 29 | 30 | return R_NilValue; 31 | 32 | }/*RESET_TEST_COUNTER*/ 33 | 34 | /* return the global test counter, for R to see. */ 35 | SEXP get_test_counter(void) { 36 | 37 | return ScalarReal(test_counter); 38 | 39 | }/*GET_TEST_COUNTER*/ 40 | 41 | -------------------------------------------------------------------------------- /man/clgaussian-test.Rd: -------------------------------------------------------------------------------- 1 | \name{clgaussian.test} 2 | \docType{data} 3 | \alias{clgaussian.test} 4 | \title{Synthetic (mixed) data set to test learning algorithms} 5 | \description{ 6 | 7 | This a synthetic data set used as a test case in the \pkg{bnlearn} package. 8 | 9 | } 10 | \usage{ 11 | data(clgaussian.test) 12 | } 13 | \format{ 14 | 15 | The \code{clgaussian.test} data set contains one normal (Gaussian) variable, 16 | 4 discrete variables and 3 conditional Gaussian variables. 17 | 18 | } 19 | \note{ 20 | 21 | The R script to generate data from this network is available from 22 | \url{https://www.bnlearn.com/documentation/networks/}. 23 | 24 | } 25 | \examples{ 26 | # load the data. 27 | data(clgaussian.test) 28 | # create and plot the network structure. 29 | dag = model2network("[A][B][C][H][D|A:H][F|B:C][E|B:D][G|A:D:E:F]") 30 | \dontrun{graphviz.plot(dag)} 31 | } 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /src/parameters/parameters.h: -------------------------------------------------------------------------------- 1 | #ifndef PARAMETER_LEARNING_HEADER 2 | #define PARAMETER_LEARNING_HEADER 3 | 4 | #include "../math/linear.algebra.h" 5 | 6 | /* bit-field tracking all possible errors in the hierarchical Dirichlet 7 | * parameter estimation. */ 8 | typedef struct { 9 | 10 | unsigned int outer_em_convergence_fail : 1; 11 | unsigned int kappa_tau_convergence_fail : 1; 12 | unsigned int tau_convergence_fail : 1; 13 | unsigned int tau_is_zero : 1; 14 | unsigned int kappa_convergence_fail : 1; 15 | unsigned int padding : 3; /* pad to 1 byte. */ 16 | 17 | } hdstatus; 18 | 19 | void c_classic_discrete_parameters(int *counts, double *cpt, int nrows, 20 | int ncols, double alpha, bool replace); 21 | hdstatus c_hierarchical_dirichlet_parameters(cmcmap counts, double alpha0, 22 | double s, bool debugging, double *nu); 23 | 24 | #endif 25 | 26 | -------------------------------------------------------------------------------- /src/inference/loglikelihood/loglikelihood.h: -------------------------------------------------------------------------------- 1 | #ifndef LOGLIKELIHOOD_FUNCTIONS_HEADER 2 | #define LOGLIKELIHOOD_FUNCTIONS_HEADER 3 | 4 | bool check_locally_incomplete_data(fitted_bn bn, meta m, bool debugging); 5 | 6 | void bysample_discrete_loglikelihood(fitted_bn bn, ddata dt, double *loglik, 7 | bool debugging); 8 | void bysample_gaussian_loglikelihood(fitted_bn bn, gdata dt, double *loglik, 9 | bool robust, bool debugging); 10 | void bysample_clgaussian_loglikelihood(fitted_bn bn, cgdata dt, double *loglik, 11 | bool robust, bool debugging); 12 | 13 | double data_discrete_loglikelihood(fitted_bn bn, ddata dt, bool propagate, 14 | bool debugging); 15 | double data_gaussian_loglikelihood(fitted_bn bn, gdata dt, double *scratch, 16 | bool propagate, bool debugging); 17 | double data_clgaussian_loglikelihood(fitted_bn bn, cgdata dt, double *scratch, 18 | bool propagate, bool debugging); 19 | 20 | #endif 21 | 22 | -------------------------------------------------------------------------------- /R/utils-tests.R: -------------------------------------------------------------------------------- 1 | 2 | # compute the sample size / CPT cells ratio. 3 | obs.per.cell = function(x, y, z = NULL, data) { 4 | 5 | opc = 0 6 | ndata = nrow(data) 7 | nlx = nlevels(data[, x]) 8 | nly = nlevels(data[, y]) 9 | 10 | # return +Inf for continuous data to bypass countermeasures 11 | # thought for sparce discrete data. 12 | if (data.type(data) == "continuous") 13 | return(Inf) 14 | 15 | if (is.null(z) || (length(z) == 0)) { 16 | 17 | opc = ndata / (nlx * nly) 18 | 19 | }#THEN 20 | else if (is.character(z)) { 21 | 22 | if (length(z) == 1) 23 | opc = ndata / (nlx * nly * nlevels(data[, z])) 24 | else if (length(z) > 1) 25 | opc = ndata / (nlx * nly * 26 | prod(sapply(z, function(col) { nlevels(data[, col]) } ))) 27 | 28 | }#THEN 29 | else if (is.factor(z)) { 30 | 31 | opc = ndata / (nlx * nly * nlevels(z)) 32 | 33 | }#ELSE 34 | 35 | return(opc) 36 | 37 | }#OBS.PER.CELL 38 | 39 | -------------------------------------------------------------------------------- /man/ordering.Rd: -------------------------------------------------------------------------------- 1 | \name{node ordering utilities} 2 | \alias{node ordering utilities} 3 | \alias{node.ordering} 4 | \title{Partial node orderings} 5 | \description{ 6 | 7 | Find the partial node ordering implied by a network. 8 | 9 | } 10 | \usage{ 11 | node.ordering(x, debug = FALSE) 12 | } 13 | \arguments{ 14 | \item{x}{an object of class \code{bn} or \code{bn.fit}.} 15 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 16 | printed; otherwise the function is completely silent.} 17 | } 18 | \value{ 19 | 20 | \code{node.ordering()} returns a vector of character strings, an ordered set 21 | of node labels. 22 | 23 | } 24 | \note{ 25 | 26 | \code{node.ordering()} supports only completely directed Bayesian networks. 27 | 28 | } 29 | \examples{ 30 | dag = random.graph(LETTERS[1:10]) 31 | ord = node.ordering(dag) 32 | ord 33 | } 34 | \author{Marco Scutari} 35 | \keyword{convenience functions} 36 | \keyword{graphs} 37 | -------------------------------------------------------------------------------- /src/fitted/enums.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../minimal/common.h" 3 | #include "fitted.h" 4 | 5 | fitted_node_e fitted_node_to_enum(SEXP object) { 6 | 7 | if (c_is(object, "bn.fit.dnode")) 8 | return DNODE; 9 | else if (c_is(object, "bn.fit.onode")) 10 | return ONODE; 11 | else if (c_is(object, "bn.fit.gnode")) 12 | return GNODE; 13 | else if (c_is(object, "bn.fit.cgnode")) 14 | return CGNODE; 15 | 16 | return ENOFIT; 17 | 18 | }/*FITTED_NODE_TO_ENUM*/ 19 | 20 | fitted_net_e fitted_net_to_enum(SEXP object) { 21 | 22 | if (c_is(object, "bn.fit.dnet")) 23 | return DNET; 24 | else if (c_is(object, "bn.fit.onet")) 25 | return ONET; 26 | else if (c_is(object, "bn.fit.donet")) 27 | return DONET; 28 | else if (c_is(object, "bn.fit.gnet")) 29 | return GNET; 30 | else if (c_is(object, "bn.fit.cgnet")) 31 | return CGNET; 32 | 33 | return ENONET; 34 | 35 | }/*FITTED_NET_TO_ENUM*/ 36 | -------------------------------------------------------------------------------- /man/configs.Rd: -------------------------------------------------------------------------------- 1 | \name{configs} 2 | \alias{configs} 3 | \title{Construct configurations of discrete variables} 4 | \description{ 5 | 6 | Create configurations of discrete variables, which can be used in modelling 7 | conditional probability tables. 8 | 9 | } 10 | \usage{ 11 | configs(data, all = TRUE) 12 | } 13 | \arguments{ 14 | \item{data}{a data frame containing factor columns.} 15 | \item{all}{a boolean value. If \code{TRUE} all configuration are included as 16 | levels in the return value; otherwise only configurations which are actually 17 | observed are considered.} 18 | } 19 | \value{ 20 | 21 | A factor with one element for each row of \code{data}, and levels as 22 | specified by \code{all}. 23 | 24 | } 25 | \examples{ 26 | data(learning.test) 27 | configs(learning.test, all = TRUE) 28 | configs(learning.test, all = FALSE) 29 | } 30 | \author{Marco Scutari} 31 | \keyword{data preprocessing} 32 | \keyword{convenience functions} 33 | -------------------------------------------------------------------------------- /src/sanitization/covariance.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/covariance.matrix.h" 3 | #include "../math/linear.algebra.h" 4 | #include "../include/globals.h" 5 | 6 | /* check that a matrix is symmetric and satisfies Cauchy-Schwarz. */ 7 | SEXP check_covariance(SEXP covmat) { 8 | 9 | int i = 0, j = 0, n = (int)sqrt(length(covmat)); 10 | double *cov = REAL(covmat); 11 | 12 | for (i = 0; i < n; i++) 13 | for (j = i + 1; j < n; j++) { 14 | 15 | /* firstly, check symmetry. */ 16 | if (fabs(cov[CMC(i, j, n)] - cov[CMC(j, i, n)]) > MACHINE_TOL) 17 | error("'covmat' (%d, %d) is not symmetric.", i + 1, j + 1); 18 | 19 | /* secondly, check Cauchy-Schwarz. */ 20 | if (cov[CMC(i, j, n)] > sqrt(cov[CMC(i, i, n)] * cov[CMC(j, j, n)])) 21 | error("'covmat' (%d, %d) does not satisfy the Cauchy-Schwarz " 22 | "inequality.", i + 1, j + 1); 23 | 24 | }/*FOR*/ 25 | 26 | return R_NilValue; 27 | 28 | }/*CHECK_COVARIANCE*/ 29 | -------------------------------------------------------------------------------- /R/frontend-averaging.R: -------------------------------------------------------------------------------- 1 | 2 | # build the averaged network structure using arc strengths and a 3 | # significance threshold. 4 | averaged.network = function(strength, threshold) { 5 | 6 | # check the main argument. 7 | check.bn.strength(strength, valid = c("bootstrap", "bayes-factor")) 8 | # check the strength threshold. 9 | threshold = check.threshold(threshold, strength) 10 | 11 | avg = averaged.network.backend(strength = strength, threshold = threshold) 12 | 13 | # add the metadata for the print() method. 14 | avg$learning$algo = "averaged" 15 | avg$learning$args = list(threshold = threshold) 16 | 17 | return(avg) 18 | 19 | }#AVERAGED.NETWORK 20 | 21 | # compute the inclusion threshold from a set of arc strengths. 22 | inclusion.threshold = function(strength) { 23 | 24 | # check the (only) argument. 25 | check.bn.strength(strength, valid = c("bootstrap", "bayes-factor")) 26 | 27 | threshold(strength = strength, method = "l1") 28 | 29 | }#INCLUSION.THRESHOLD 30 | -------------------------------------------------------------------------------- /man/dsep.Rd: -------------------------------------------------------------------------------- 1 | \name{dsep} 2 | \alias{dsep} 3 | \title{Test d-separation} 4 | \description{ 5 | 6 | Check whether two nodes are d-separated. 7 | 8 | } 9 | \usage{ 10 | dsep(bn, x, y, z) 11 | } 12 | \arguments{ 13 | \item{bn}{an object of class \code{bn}.} 14 | \item{x,y}{a character string, the label of a node.} 15 | \item{z}{an optional vector of character strings, the label of the 16 | (candidate) d-separating nodes. It defaults to the empty set.} 17 | } 18 | \value{ 19 | 20 | \code{dsep()} returns \code{TRUE} if \code{x} and \code{y} are 21 | d-separated by \code{z}, and \code{FALSE} otherwise. 22 | 23 | } 24 | \references{ 25 | 26 | Koller D, Friedman N (2009). \emph{Probabilistic Graphical Models: Principles 27 | and Techniques}. MIT Press. 28 | 29 | } 30 | \examples{ 31 | bn = model2network("[A][C|A][B|C]") 32 | dsep(bn, "A", "B", "C") 33 | bn = model2network("[A][C][B|A:C]") 34 | dsep(bn, "A", "B", "C") 35 | } 36 | \author{Marco Scutari} 37 | \keyword{graphs} 38 | -------------------------------------------------------------------------------- /R/aracne.R: -------------------------------------------------------------------------------- 1 | 2 | aracne.backend = function(x, estimator, whitelist, blacklist, debug = FALSE) { 3 | 4 | # fix the whitelist and the blacklist to keep the C side simple. 5 | nodes = names(x) 6 | 7 | if (!is.null(blacklist)) { 8 | 9 | # arcs must be blacklisted in both directions, so keep only 10 | # the undirected ones. 11 | blacklist = blacklist[which.undirected(blacklist, nodes), , drop = TRUE] 12 | # keep only one direction for each blacklisted arc. 13 | blacklist = pdag2dag.backend(blacklist, nodes) 14 | 15 | }#THEN 16 | 17 | if (!is.null(whitelist)) { 18 | 19 | # keep only one direction for each whitelisted arc. 20 | whitelist = pdag2dag.backend(whitelist, nodes) 21 | 22 | }#THEN 23 | 24 | .Call(call_aracne, 25 | data = x, 26 | estimator = estimator, 27 | whitelist = whitelist, 28 | blacklist = blacklist, 29 | complete = attr(x, "metadata")$complete.nodes, 30 | debug = debug) 31 | 32 | }#ARACNE.BACKEND 33 | 34 | -------------------------------------------------------------------------------- /R/simulation.R: -------------------------------------------------------------------------------- 1 | # do a partial ordering of the nodes of a graph. 2 | topological.ordering = function(x, start = NULL, reverse = FALSE, 3 | debug = FALSE) { 4 | 5 | if (is.null(start)) 6 | roots = root.leaf.nodes(x, leaf = reverse) 7 | else 8 | roots = start 9 | 10 | to.do = .Call(call_topological_ordering, 11 | bn = x, 12 | root.nodes = roots, 13 | reverse = reverse, 14 | debug = debug) 15 | 16 | if (is.null(start)) 17 | return(names(sort(to.do))) 18 | else 19 | return(names(sort(to.do[to.do > 0]))) 20 | 21 | }#SCHEDULE 22 | 23 | # use the Logic Sampling (LS) algorithm as described in "Bayesian Artificial 24 | # Intelligence", Korb & Nicholson, chap 3.6.1. 25 | rbn.backend = function(x, n, fix = TRUE, debug = FALSE) { 26 | 27 | .Call(call_rbn_master, 28 | fitted = x, 29 | n = as.integer(n), 30 | fix = fix, 31 | add.metadata = FALSE, 32 | debug = debug) 33 | 34 | }#RBN.BACKEND 35 | 36 | -------------------------------------------------------------------------------- /src/preprocessing/preprocessing.h: -------------------------------------------------------------------------------- 1 | #ifndef PREPROCESSING_HEADER 2 | #define PREPROCESSING_HEADER 3 | 4 | #include "../core/data.table.h" 5 | 6 | /* enum for discretization methods, to be matched from the label string passed 7 | * down from R. */ 8 | typedef enum { 9 | ENOMETHOD = 0, /* error code, no such discretization method. */ 10 | INTERVAL = 1, /* interval discretization (marginal). */ 11 | QUANTILE = 2, /* quantile discretization (marginal). */ 12 | HARTEMINK = 3 /* Hartemink discretization (pairwise). */ 13 | } discretization_e; 14 | 15 | discretization_e discretization_to_enum(const char *label); 16 | 17 | int interval_discretization(double *orig, int *factor, int nbreaks, 18 | double *cutpoints, int nobs, bool debugging); 19 | int quantile_discretization(double *orig, int *factor, int nbreaks, 20 | double *cutpoints, int nobs, bool complete, bool debugging); 21 | void hartemink_discretization(ddata work, int *nbreaks, double **cutpoints, 22 | bool debugging); 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/tests/enums.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "tests.h" 3 | 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value; 5 | 6 | test_e test_to_enum(const char *label) { 7 | 8 | ENTRY("mi", MI); 9 | ENTRY("mi-adf", MI_ADF); 10 | ENTRY("x2", X2); 11 | ENTRY("x2-adf", X2_ADF); 12 | ENTRY("jt", JT); 13 | ENTRY("cor", COR); 14 | ENTRY("zf", ZF); 15 | ENTRY("mi-g", MI_G); 16 | ENTRY("mi-cg", MI_CG); 17 | ENTRY("mi-sh", MI_SH); 18 | ENTRY("mi-g-sh", MI_G_SH); 19 | ENTRY("mc-mi", MC_MI); 20 | ENTRY("mc-x2", MC_X2); 21 | ENTRY("sp-mi", SP_MI); 22 | ENTRY("sp-x2", SP_X2); 23 | ENTRY("mc-jt", MC_JT); 24 | ENTRY("smc-mi", SMC_MI); 25 | ENTRY("smc-x2", SMC_X2); 26 | ENTRY("smc-jt", SMC_JT); 27 | ENTRY("mc-cor", MC_COR); 28 | ENTRY("mc-mi-g", MC_MI_G); 29 | ENTRY("mc-zf", MC_ZF); 30 | ENTRY("smc-cor", SMC_COR); 31 | ENTRY("smc-zf", SMC_ZF); 32 | ENTRY("smc-mi-g", SMC_MI_G); 33 | ENTRY("custom-test", CUSTOM_T); 34 | 35 | return ENOTEST; 36 | 37 | }/*TEST_TO_ENUM*/ 38 | 39 | 40 | -------------------------------------------------------------------------------- /R/utils-plot.R: -------------------------------------------------------------------------------- 1 | # print an underlined label in the plot. 2 | underlined = function(x, y, label, col){ 3 | 4 | text(x, y, label, col = col, font = 2) 5 | sw = strwidth(label) 6 | sh = strheight(label) 7 | lines(x + c(-sw/2, sw/2), rep(y - 1.5*sh/2, 2), col = col) 8 | 9 | }#UNDERLINED 10 | 11 | # compute the largest possible expansion factor for which a string fits the box. 12 | largest.cex = function(node, height, width, hfrac = 0.7, wfrac = 0.9) { 13 | 14 | # fit vertically. 15 | guess = hfrac * height / strheight(node, cex = 1) 16 | best = optimize(f = function(x) abs(strheight(node, cex = x) - hfrac * height), 17 | interval = guess * c(0.5, 2), tol = 0.025)$minimum 18 | 19 | # fit horizonally. 20 | best = best * min(wfrac * width / strwidth(node, cex = best), 1) 21 | 22 | return(best) 23 | 24 | }#LARGEST.CEX 25 | 26 | # created a lighter tint of a given colour. 27 | lighter.colour = function(col, offset = 0.25) { 28 | 29 | rgb = col2rgb(col)[, 1] 30 | do.call("rgb", as.list((rgb + (255 - rgb) * offset) / 255)) 31 | 32 | }#LIGHTER.COLOUR 33 | -------------------------------------------------------------------------------- /R/sanitization-mutual.R: -------------------------------------------------------------------------------- 1 | 2 | # check the estimator for the mutual information. 3 | check.mi.estimator = function(estimator, data) { 4 | 5 | # check which type of data we are dealing with. 6 | type = data.type(data) 7 | 8 | if (!missing(estimator) && !is.null(estimator)) { 9 | 10 | check.label(estimator, choices = available.mi, 11 | labels = mi.estimator.labels, argname = "mutual information estimator") 12 | 13 | # check if it's the right estimator for the data (discrete, continuous). 14 | if ((type %!in% discrete.data.types) && 15 | (estimator %in% available.discrete.mi)) 16 | stop("estimator '", estimator, "' may only be used with discrete data.") 17 | if ((type != "continuous") && (estimator %in% available.continuous.mi)) 18 | stop("estimator '", estimator, "' may only be used with continuous data.") 19 | 20 | return(estimator) 21 | 22 | }#THEN 23 | else { 24 | 25 | if (type %in% discrete.data.types) 26 | return("mi") 27 | else 28 | return("mi-g") 29 | 30 | }#ELSE 31 | 32 | }#CHECK.MI.ESTIMATOR 33 | 34 | -------------------------------------------------------------------------------- /src/foreign/parse.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | 3 | /* find the matching closed brace. */ 4 | SEXP match_brace(SEXP lines, SEXP start, SEXP open_brace, SEXP close_brace) { 5 | 6 | int depth = 0, open = 0, line_id = INT(start) - 1; 7 | const char *current = NULL; 8 | const char *op = CHAR(STRING_ELT(open_brace, 0)); 9 | const char *cl = CHAR(STRING_ELT(close_brace, 0)); 10 | 11 | do { 12 | 13 | /* dereference the current line. */ 14 | current = CHAR(STRING_ELT(lines, line_id)); 15 | 16 | /* increment the depth counter if an open brace is found. */ 17 | if (strstr(current, op)) { 18 | 19 | /* be sure no to exit from the do-while loop until an open curly brace 20 | * has been spotted. */ 21 | open = 1; 22 | depth++; 23 | 24 | }/*THEN*/ 25 | /* decrement the depth counter if a closed brace is found. */ 26 | if (strstr(current, cl)) 27 | depth--; 28 | 29 | /* increment the line id. */ 30 | line_id++; 31 | 32 | } while ((depth > 0) || (open == 0)); 33 | 34 | return ScalarInteger(line_id); 35 | 36 | }/*MATCH_BRACE*/ 37 | 38 | -------------------------------------------------------------------------------- /R/scores.R: -------------------------------------------------------------------------------- 1 | 2 | # compute individual node contributions to the network score. 3 | per.node.score = function(network, data, score, targets, extra.args, 4 | debug = FALSE) { 5 | 6 | .Call(call_per_node_score, 7 | network = network, 8 | data = data, 9 | score = score, 10 | targets = targets, 11 | extra.args = extra.args, 12 | debug = debug) 13 | 14 | }#PER.NODE.SCORE 15 | 16 | # complete a prior over arcs as per Castelo and Siebes. 17 | cs.completed.prior = function(beta, nodes, learning = FALSE) { 18 | 19 | beta = .Call(call_castelo_completion, 20 | prior = beta, 21 | nodes = nodes, 22 | learning = learning) 23 | 24 | class(beta) = c("prior", "prior.cs", "data.frame") 25 | attr(beta, "nodes") = nodes 26 | 27 | return(beta) 28 | 29 | }#CS.COMPLETED.PRIOR 30 | 31 | # compute the optimal imaginary sample size for a discrete network. 32 | alpha.star.backend = function(x, data, debug = FALSE) { 33 | 34 | .Call(call_alpha_star, 35 | x = x, 36 | data = data, 37 | debug = debug) 38 | 39 | }#ALPHA.STAR.BACKEND 40 | 41 | -------------------------------------------------------------------------------- /man/igraphpkg.Rd: -------------------------------------------------------------------------------- 1 | \name{igraph integration} 2 | \alias{igraph integration} 3 | \alias{as.bn.igraph} 4 | \alias{as.igraph} 5 | \alias{as.igraph.bn} 6 | \alias{as.igraph.bn.fit} 7 | \title{Import and export networks from the igraph package} 8 | \description{ 9 | 10 | Convert \code{bn} and \code{bn.fit} objects to \code{igraph} and vice versa. 11 | 12 | } 13 | \usage{ 14 | \method{as.bn}{igraph}(x, ..., check.cycles = TRUE) 15 | \method{as.igraph}{bn}(x, ...) 16 | \method{as.igraph}{bn.fit}(x, ...) 17 | } 18 | \arguments{ 19 | \item{x}{an object of class \code{bn}, \code{bn.fit}, or \code{igraph}.} 20 | \item{\dots}{extra arguments from the generic method (currently ignored).} 21 | \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will 22 | not be checked for cycles.} 23 | } 24 | \value{ 25 | 26 | An object of the relevant class. 27 | 28 | } 29 | \examples{ 30 | \dontrun{ 31 | a = bn.fit(hc(learning.test), learning.test) 32 | b = as.igraph(a) 33 | plot(b, edge.arrow.mode = 2L * !igraph::which_mutual(b)) 34 | c = as.bn(b)}} 35 | \author{Marco Scutari} 36 | \keyword{utilities} 37 | \keyword{interfaces to other packages} 38 | -------------------------------------------------------------------------------- /man/plot.bn.strength.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.bn.strength} 2 | \alias{plot.bn.strength} 3 | \title{Plot arc strengths derived from bootstrap} 4 | \description{ 5 | 6 | Plot arc strengths derived from bootstrap resampling. 7 | 8 | } 9 | \usage{ 10 | \method{plot}{bn.strength}(x, draw.threshold = TRUE, main = NULL, 11 | xlab = "arc strengths", ylab = "CDF(arc strengths)", ...) 12 | } 13 | \arguments{ 14 | \item{x}{an object of class \code{bn.strength}.} 15 | \item{draw.threshold}{a boolean value. If \code{TRUE}, a dashed vertical line 16 | is drawn at the threshold.} 17 | \item{main,xlab,ylab}{character strings, the main title and the axes labels.} 18 | \item{\dots}{other graphical parameters to be passed through to plotting 19 | functions.} 20 | } 21 | \note{ 22 | The \code{xlim} and \code{ylim} arguments are always overridden. 23 | } 24 | \examples{ 25 | data(learning.test) 26 | 27 | start = random.graph(nodes = names(learning.test), num = 50) 28 | netlist = lapply(start, function(net) { 29 | hc(learning.test, score = "bde", iss = 10, start = net) }) 30 | arcs = custom.strength(netlist, nodes = names(learning.test), cpdag = FALSE) 31 | plot(arcs) 32 | } 33 | \author{Marco Scutari} 34 | \keyword{plots} 35 | -------------------------------------------------------------------------------- /man/bayesian.network.classifiers.Rd: -------------------------------------------------------------------------------- 1 | \name{network-classifiers} 2 | \alias{network-classifiers} 3 | \alias{network classifiers} 4 | \title{Bayesian network Classifiers} 5 | \description{ 6 | 7 | Structure learning algorithms for Bayesian network classifiers. 8 | 9 | } 10 | \details{ 11 | 12 | The algorithms are aimed at classification, and favour predictive power over 13 | the ability to recover the correct network structure. The implementation in 14 | \pkg{bnlearn} assumes that all variables, including the classifiers, are 15 | discrete. 16 | 17 | \itemize{ 18 | 19 | \item \emph{Naive Bayes} (\code{\link{naive.bayes}}): a very simple 20 | algorithm assuming that all classifiers are independent and using the 21 | posterior probability of the target variable for classification. 22 | \item \emph{Tree-Augmented Naive Bayes} (\code{\link{tree.bayes}}): an 23 | improvement over naive Bayes, this algorithms uses Chow-Liu to approximate 24 | the dependence structure of the classifiers. 25 | 26 | Friedman N, Geiger D, Goldszmit M (1997). "Bayesian Network Classifiers." 27 | \emph{Machine Learning}, \strong{29}:131--163. 28 | 29 | } 30 | 31 | } 32 | \keyword{package} 33 | -------------------------------------------------------------------------------- /src/core/covariance.matrix.h: -------------------------------------------------------------------------------- 1 | #ifndef COVARIANCE_MATRIX_HEADER 2 | #define COVARIANCE_MATRIX_HEADER 3 | 4 | /* covariance matrix, with additional fields to carry around its own SVD 5 | * decomposition and dimension. */ 6 | typedef struct { 7 | 8 | int dim; /* dimension of the covariance matrix. */ 9 | double *mat; /* pointer to the matrix. */ 10 | double *u; /* SVD decomposition, U left matrix, for c_svd(). */ 11 | double *d; /* SVD decomposition, vector for U's diagonal, for c_svd(). */ 12 | double *vt; /* SVD decomposition, V^t right matrix, for c_svd(). */ 13 | 14 | } covariance; 15 | 16 | covariance new_covariance(int dim, bool decomp); 17 | void print_covariance(covariance cov); 18 | void copy_covariance(covariance *src, covariance *copy); 19 | void covariance_drop_variable(covariance *full, covariance *sub, int to_drop); 20 | void FreeCOV(covariance cov); 21 | 22 | void c_covmat(double **data, double *mean, int nrow, int ncol, covariance cov, 23 | int first); 24 | void c_update_covmat(double **data, double *mean, int update, int nrow, 25 | int ncol, double *mat); 26 | void c_covmat_with_missing(double **data, int nrow, int ncol, 27 | bool *missing_partial, bool *missing_all, double *mean, double *mat, 28 | int *ncomplete); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/inference/loglikelihood/common.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../fitted/fitted.h" 3 | #include "../../core/data.table.h" 4 | 5 | /* check whether the data are complete for all the local distributions of 6 | * interest. */ 7 | bool check_locally_incomplete_data(fitted_bn bn, meta m, bool debugging) { 8 | 9 | bool early_return = FALSE; 10 | 11 | for (int i = 0; i < m.ncols; i++) { 12 | 13 | if (!m.flag[i].fixed) 14 | continue; 15 | 16 | /* for the data to be locally complete, the node itself... */ 17 | if (!m.flag[i].complete) { 18 | 19 | early_return = TRUE; 20 | goto incomplete_data; 21 | 22 | }/*THEN*/ 23 | 24 | /* ... and its parents must be complete. */ 25 | for (int j = 0; j < bn.ldists[i].nparents; j++) 26 | if (!m.flag[bn.ldists[i].parents[j]].complete) { 27 | 28 | early_return = TRUE; 29 | goto incomplete_data; 30 | 31 | }/*THEN*/ 32 | 33 | incomplete_data: 34 | if (early_return) { 35 | 36 | if (debugging) 37 | Rprintf("* incomplete data for node %s, the log-likelihood is NA.\n", 38 | bn.labels[i]); 39 | 40 | return TRUE; 41 | 42 | }/*THEN*/ 43 | 44 | }/*FOR*/ 45 | 46 | return FALSE; 47 | 48 | }/*CHECK_LOCALLY_INCOMPLETE_DATA*/ 49 | 50 | -------------------------------------------------------------------------------- /src/include/graph.h: -------------------------------------------------------------------------------- 1 | 2 | /* from filter.arcs.c */ 3 | SEXP which_undirected(SEXP arcs, SEXP nodes); 4 | 5 | /* from tiers.c */ 6 | SEXP tiers(SEXP nodes, SEXP debug); 7 | 8 | /* from filter.arcs.c */ 9 | SEXP c_unique_arcs(SEXP arcs, SEXP nodes, bool warnlevel); 10 | 11 | /* from fitted.c */ 12 | SEXP root_nodes(SEXP bn, SEXP leaves); 13 | 14 | /* from simulation.c */ 15 | SEXP topological_ordering(SEXP bn, SEXP root_nodes, SEXP reverse, SEXP debug); 16 | void topological_sort(SEXP fitted, int *poset, int nnodes); 17 | 18 | /* from path.c */ 19 | int c_has_path(int start, int stop, int *amat, int n, SEXP nodes, 20 | bool ugraph, bool notdirect, int *path, int *counter, bool debugging); 21 | bool c_directed_path(int start, int stop, int *amat, int n, SEXP nodes, 22 | int *path, int *counter, bool debugging); 23 | int c_uptri3_path(short int *uptri, int *depth, int from, int to, int nnodes, 24 | SEXP nodes, bool debugging); 25 | 26 | /* from hash.c */ 27 | SEXP arc_hash(SEXP arcs, SEXP nodes, bool uptri, bool sort); 28 | void c_arc_hash(int narcs, int nnodes, int *from, int *to, int *uptri, 29 | int *cmc, bool sort); 30 | SEXP c_amat_hash(int *amat, int nnodes); 31 | 32 | /* from arcs2amat.c */ 33 | SEXP arcs2amat(SEXP arcs, SEXP nodes); 34 | SEXP amat2arcs(SEXP amat, SEXP nodes); 35 | 36 | -------------------------------------------------------------------------------- /R/chow.liu.R: -------------------------------------------------------------------------------- 1 | 2 | chow.liu.backend = function(x, nodes, estimator, whitelist, blacklist, 3 | conditional = NULL, debug = FALSE) { 4 | 5 | # fix the whitelist and the blacklist to keep the C side simple. 6 | if (!is.null(blacklist)) { 7 | 8 | # arcs must be blacklisted in both directions, so keep only 9 | # the undirected ones. 10 | blacklist = blacklist[which.undirected(blacklist, nodes), , drop = TRUE] 11 | # keep only one direction for each blacklisted arc. 12 | blacklist = pdag2dag.backend(blacklist, nodes) 13 | 14 | }#THEN 15 | 16 | if (!is.null(whitelist)) { 17 | 18 | # keep only one direction for each whitelisted arc. 19 | whitelist = pdag2dag.backend(whitelist, nodes) 20 | 21 | # the chow-liu algorithms allows the selection of exactly length(nodes) arcs, 22 | # so the whitelist must contain less. 23 | if (nrow(whitelist) > length(nodes)) 24 | stop("too many whitelisted arcs, there can be only ", length(nodes), ".") 25 | 26 | }#THEN 27 | 28 | .Call(call_chow_liu, 29 | data = x, 30 | nodes = nodes, 31 | estimator = estimator, 32 | whitelist = whitelist, 33 | blacklist = blacklist, 34 | complete = attr(x, "metadata")$complete.nodes, 35 | conditional = conditional, 36 | debug = debug) 37 | 38 | }#CHOW.LIU.BACKEND 39 | 40 | -------------------------------------------------------------------------------- /src/parameters/discrete/classic.discrete.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../core/contingency.tables.h" 3 | #include "../../math/linear.algebra.h" 4 | 5 | void c_classic_discrete_parameters(int *counts, double *cpt, int nrows, 6 | int ncols, double alpha, bool replace) { 7 | 8 | long double colsum = 0; 9 | 10 | /* add the imaginary sample size, if any, ... */ 11 | for (int i = 0; i < nrows * ncols; i++) 12 | cpt[i] = counts[i] + alpha / (nrows * ncols); 13 | 14 | /* ... and normalize the columns to sum up to 1. */ 15 | for (int j = 0; j < ncols; j++) { 16 | 17 | colsum = 0; 18 | for (int i = 0; i < nrows; i++) 19 | colsum += cpt[CMC(i, j, nrows)]; 20 | 21 | /* some columns cannot be normalized: either fill them with NaNs or with 22 | * a uniform distribution. */ 23 | if (colsum == 0) { 24 | 25 | if (replace) { 26 | 27 | for (int i = 0; i < nrows; i++) 28 | cpt[CMC(i, j, nrows)] = (double) 1 / nrows; 29 | 30 | }/*THEN*/ 31 | else { 32 | 33 | for (int i = 0; i < nrows; i++) 34 | cpt[CMC(i, j, nrows)] = NA_REAL; 35 | 36 | }/*ELSE*/ 37 | 38 | }/*THEN*/ 39 | else { 40 | 41 | for (int i = 0; i < nrows; i++) 42 | cpt[CMC(i, j, nrows)] /= colsum; 43 | 44 | }/*ELSE*/ 45 | 46 | }/*FOR*/ 47 | 48 | }/*C_CLASSIC_DISCRETE_PARAMETERS*/ 49 | 50 | -------------------------------------------------------------------------------- /man/graphpkg.Rd: -------------------------------------------------------------------------------- 1 | \name{graph integration} 2 | \alias{graph integration} 3 | \alias{as.bn.graphNEL} 4 | \alias{as.bn.graphAM} 5 | \alias{as.graphNEL} 6 | \alias{as.graphAM} 7 | \alias{as.graphNEL.bn} 8 | \alias{as.graphNEL.bn.fit} 9 | \alias{as.graphAM.bn} 10 | \alias{as.graphAM.bn.fit} 11 | \title{Import and export networks from the graph package} 12 | \description{ 13 | 14 | Convert \code{bn} and \code{bn.fit} objects to \code{graphNEL} and 15 | \code{graphAM} objects and vice versa. 16 | 17 | } 18 | \usage{ 19 | \method{as.bn}{graphNEL}(x, ..., check.cycles = TRUE) 20 | \method{as.bn}{graphAM}(x, ..., check.cycles = TRUE) 21 | \method{as.graphNEL}{bn}(x) 22 | \method{as.graphNEL}{bn.fit}(x) 23 | \method{as.graphAM}{bn}(x) 24 | \method{as.graphAM}{bn.fit}(x) 25 | } 26 | \arguments{ 27 | \item{x}{an object of class \code{bn}, \code{bn.fit}, \code{graphNEL}, 28 | \code{graphAM}.} 29 | \item{\dots}{extra arguments from the generic method (currently ignored).} 30 | \item{check.cycles}{a boolean value. If \code{FALSE} the returned network will 31 | not be checked for cycles.} 32 | } 33 | \value{ 34 | 35 | An object of the relevant class. 36 | 37 | } 38 | \examples{ 39 | \dontrun{ 40 | library(graph) 41 | a = bn.fit(hc(learning.test), learning.test) 42 | b = as.graphNEL(a) 43 | c = as.bn(b)}} 44 | \author{Marco Scutari} 45 | \keyword{utilities} 46 | \keyword{interfaces to other packages} 47 | -------------------------------------------------------------------------------- /src/core/uppertriangular.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "allocations.h" 3 | #include "uppertriangular.h" 4 | 5 | uppertriangular new_uppertriangular(int dim) { 6 | 7 | uppertriangular sym = { 0 }; 8 | 9 | sym.dim = dim; 10 | sym.mat = Calloc1D(dim * (dim - 1) / 2, sizeof(double)); 11 | 12 | return sym; 13 | 14 | }/*NEW_UPPERTRIANGULAR*/ 15 | 16 | void FreeUPPERTRIANGULAR(uppertriangular sym) { 17 | 18 | Free1D(sym.names); 19 | Free1D(sym.mat); 20 | 21 | }/*FREEUPPERTRIANGULAR*/ 22 | 23 | int uppertriangular_size(uppertriangular sym) { 24 | 25 | return sym.dim * (sym.dim - 1) / 2; 26 | 27 | }/*UPPTERTRIANGULAR_SIZE*/ 28 | 29 | void uppertriangular_copy_names(uppertriangular *sym, const char **names) { 30 | 31 | int i = 0; 32 | 33 | (*sym).names = Calloc1D((*sym).dim, sizeof(char *)); 34 | 35 | for (i = 0; i < (*sym).dim; i++) 36 | (*sym).names[i] = names[i]; 37 | 38 | }/*UPPERTRIANGULAR_COPY_NAMES*/ 39 | 40 | /* inverse function of the UPTRI3() macro. */ 41 | void INV_UPTRI3(int x, int n, int *res) { 42 | 43 | int c = 0, r = 0, cn = n - 1; 44 | 45 | for (r = 0; r < n; r++) { 46 | 47 | if (x < cn) { 48 | 49 | c = n - (cn - x); 50 | break; 51 | 52 | }/*THEN*/ 53 | else { 54 | 55 | cn += n - (r + 2); 56 | 57 | }/*ELSE*/ 58 | 59 | }/*FOR*/ 60 | 61 | res[0] = r; 62 | res[1] = c; 63 | 64 | }/*INV_UPTRI3*/ 65 | 66 | -------------------------------------------------------------------------------- /man/learning-test.Rd: -------------------------------------------------------------------------------- 1 | \name{learning.test} 2 | \docType{data} 3 | \alias{learning.test} 4 | \title{Synthetic (discrete) data set to test learning algorithms} 5 | \description{ 6 | 7 | This a synthetic data set used as a test case in the \pkg{bnlearn} package. 8 | 9 | } 10 | \usage{ 11 | data(learning.test) 12 | } 13 | \format{ 14 | 15 | The \code{learning.test} data set contains the following variables: 16 | \itemize{ 17 | 18 | \item \code{A}, a three-level factor with levels \code{a}, \code{b} and 19 | \code{c}. 20 | \item \code{B}, a three-level factor with levels \code{a}, \code{b} and 21 | \code{c}. 22 | \item \code{C}, a three-level factor with levels \code{a}, \code{b} and 23 | \code{c}. 24 | \item \code{D}, a three-level factor with levels \code{a}, \code{b} and 25 | \code{c}. 26 | \item \code{E}, a three-level factor with levels \code{a}, \code{b} and 27 | \code{c}. 28 | \item \code{F}, a two-level factor with levels \code{a} and \code{b}. 29 | 30 | } 31 | 32 | } 33 | \note{ 34 | 35 | The R script to generate data from this network is available from 36 | \url{https://www.bnlearn.com/documentation/networks/}. 37 | 38 | } 39 | \examples{ 40 | # load the data. 41 | data(learning.test) 42 | # create and plot the network structure. 43 | dag = model2network("[A][C][F][B|A][D|A:C][E|B:F]") 44 | \dontrun{graphviz.plot(dag)} 45 | } 46 | \keyword{datasets} 47 | -------------------------------------------------------------------------------- /R/utils-cluster.R: -------------------------------------------------------------------------------- 1 | # check whether the cluster is running. 2 | isClusterRunning = function(cl) { 3 | 4 | tryCatch(any(unlist(parallel::clusterEvalQ(cl, TRUE))), 5 | error = function(err) { FALSE }) 6 | 7 | }#ISCLUSTERRUNNING 8 | 9 | # check the status of the snow/parallel cluster. 10 | check.cluster = function(cluster) { 11 | 12 | if (missing(cluster) || is.null(cluster)) 13 | return(NULL) 14 | if (!is(cluster, "cluster")) 15 | stop("cluster is not a valid cluster object.") 16 | check.and.load.package("parallel") 17 | if (!isClusterRunning(cluster)) 18 | stop("the cluster is stopped.") 19 | 20 | return(cluster) 21 | 22 | }#CHECK.CLUSTER 23 | 24 | # get the number of slaves. 25 | nSlaves = function(cluster) { 26 | 27 | length(cluster) 28 | 29 | }#NSLAVES 30 | 31 | slaves.setup = function(cluster) { 32 | 33 | # set the test counter in all the cluster nodes. 34 | parallel::clusterEvalQ(cluster, library(bnlearn)) 35 | parallel::clusterEvalQ(cluster, reset.test.counter()) 36 | 37 | }#SLAVE.SETUP 38 | 39 | # smart parSapply() that falls back to standard sapply(), but with defaults to 40 | # simplify = FALSE. 41 | smartSapply = function(cl, ..., simplify = FALSE, USE.NAMES = TRUE) { 42 | 43 | if (is.null(cl)) 44 | sapply(..., simplify = simplify, USE.NAMES = USE.NAMES) 45 | else 46 | parallel::parSapplyLB(cl = cl, ..., simplify = simplify, USE.NAMES = USE.NAMES) 47 | 48 | }#SMARTSAPPLY 49 | 50 | -------------------------------------------------------------------------------- /src/arcs/bind.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../include/bn.h" 3 | #include "../minimal/strings.h" 4 | #include "../minimal/common.h" 5 | #include "../math/linear.algebra.h" 6 | 7 | /* faster rbind() implementation for arc sets. */ 8 | SEXP arcs_rbind (SEXP matrix1, SEXP matrix2, SEXP reverse2) { 9 | 10 | int i = 0, j = 0, m1 = length(matrix1)/2, m2 = length(matrix2)/2; 11 | SEXP res; 12 | 13 | /* allocate the return value. */ 14 | PROTECT(res = allocMatrix(STRSXP, m1 + m2, 2)); 15 | /* allocate and initialize the column names. */ 16 | setDimNames(res, R_NilValue, mkStringVec(2, "from", "to")); 17 | 18 | /* copy the elements of the first matrix. */ 19 | for (i = 0; i < m1; i++) 20 | for (j = 0; j < 2; j++) 21 | SET_STRING_ELT(res, CMC(i, j, m1 + m2), STRING_ELT(matrix1, CMC(i, j, m1))); 22 | 23 | /* copy the elements of the second matrix, reversing the order of the 24 | * columns as needed. */ 25 | if (isTRUE(reverse2)) { 26 | 27 | for (i = 0; i < m2; i++) 28 | for(j = 0; j < 2; j++) 29 | SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, 1 - j, m2))); 30 | 31 | }/*THEN*/ 32 | else { 33 | 34 | for (i = 0; i < m2; i++) 35 | for(j = 0; j < 2; j++) 36 | SET_STRING_ELT(res, CMC(i + m1, j, m1 + m2), STRING_ELT(matrix2, CMC(i, j, m2))); 37 | 38 | }/*ELSE*/ 39 | 40 | UNPROTECT(1); 41 | 42 | return res; 43 | 44 | }/*ARCS_RBIND*/ 45 | 46 | -------------------------------------------------------------------------------- /R/utils-print.R: -------------------------------------------------------------------------------- 1 | # advanced cat which correctly handles ini-like lines and short line widths. 2 | wcat = function(header, value, indent = 2) { 3 | 4 | # get the number of available columns. 5 | columns = options("width") 6 | 7 | header = sprintf("%-40s", paste0(strrep(" ", indent), header, ":")) 8 | 9 | # blatantly ignore any line width shorter than 45, trying to support 10 | # that case is a losing proposition. 11 | if ((columns >= nchar(header) + nchar(value) + 1) || (columns < 45)) { 12 | 13 | # if there are enough columns print the string as is. 14 | cat(paste(header, value, sep = " "), "\n") 15 | 16 | }#THEN 17 | else { 18 | 19 | # if there are not enough columns print the header on one row 20 | # (left-aligned) and the value on the following line (right-aligned). 21 | cat(header, "\n", sprintf(paste("%", columns, "s", sep = ""), value), "\n") 22 | 23 | }#ELSE 24 | 25 | }#WCAT 26 | 27 | # advanced cat handles model strings and short line widths. 28 | fcat = function(modelstr, indent = 0) { 29 | 30 | # measure the number of available columns. 31 | columns = options("width") 32 | 33 | if ((columns >= nchar(modelstr)) || columns < 45) { 34 | 35 | cat(strrep(" ", indent), modelstr, "\n") 36 | 37 | }#THEN 38 | else { 39 | 40 | cat(paste(strsplit(modelstr, "\\]")[[1]], "]", sep = ""), 41 | fill = TRUE, sep = "", labels = strrep(" ", indent)) 42 | 43 | }#ELSE 44 | 45 | }#FCAT 46 | -------------------------------------------------------------------------------- /src/scores/enum.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "scores.h" 3 | 4 | #define ENTRY(key, value) if (strcmp(label, key) == 0) return value; 5 | 6 | score_e score_to_enum(const char *label) { 7 | 8 | ENTRY("loglik", LOGLIK); 9 | ENTRY("aic", AIC); 10 | ENTRY("bic", BIC); 11 | ENTRY("ebic", EBIC); 12 | ENTRY("bde", BDE); 13 | ENTRY("bds", BDS); 14 | ENTRY("bdj", BDJ); 15 | ENTRY("k2", K2); 16 | ENTRY("mbde", MBDE); 17 | ENTRY("bdla", BDLA); 18 | ENTRY("pred-loglik", PRED_LOGLIK); 19 | ENTRY("fnml", FNML); 20 | ENTRY("qnml", QNML); 21 | ENTRY("nal", NAL); 22 | ENTRY("pnal", PNAL); 23 | ENTRY("loglik-g", LOGLIK_G); 24 | ENTRY("aic-g", AIC_G); 25 | ENTRY("bic-g", BIC_G); 26 | ENTRY("ebic-g", EBIC_G); 27 | ENTRY("bge", BGE); 28 | ENTRY("pred-loglik-g", PRED_LOGLIK_G); 29 | ENTRY("nal-g", NAL_G); 30 | ENTRY("pnal-g", PNAL_G); 31 | ENTRY("loglik-cg", LOGLIK_CG); 32 | ENTRY("aic-cg", AIC_CG); 33 | ENTRY("bic-cg", BIC_CG); 34 | ENTRY("ebic-cg", EBIC_CG); 35 | ENTRY("pred-loglik-cg", PRED_LOGLIK_CG); 36 | ENTRY("nal-cg", NAL_CG); 37 | ENTRY("pnal-cg", PNAL_CG); 38 | ENTRY("custom-score", CUSTOM); 39 | 40 | return ENOSCORE; 41 | 42 | }/*SCORE_TO_ENUM*/ 43 | 44 | gprior_e gprior_to_enum(const char *label) { 45 | 46 | ENTRY("uniform", UNIFORM); 47 | ENTRY("vsp", VSP); 48 | ENTRY("cs", CS); 49 | ENTRY("marginal", MU); 50 | 51 | return ENOPRIOR; 52 | 53 | }/*GPRIOR_TO_ENUM*/ 54 | 55 | -------------------------------------------------------------------------------- /src/math/linear.algebra.h: -------------------------------------------------------------------------------- 1 | #ifndef LINEAR_ALGEBRA_HEADER 2 | #define LINEAR_ALGEBRA_HEADER 3 | 4 | #include "../core/covariance.matrix.h" 5 | 6 | /* column-major coordinates conversion macro. */ 7 | #define CMC(i, j, nrow) ((i) + (j) * (nrow)) 8 | 9 | /* matrix with elements stored in column-major order. */ 10 | typedef struct{ 11 | 12 | int nrows; /* first dimension. */ 13 | int ncols; /* second dimension */ 14 | int *el; /* pointer to the column-major matrix. */ 15 | 16 | } cmcmap; 17 | 18 | #define CMEL(matrix, i, j) matrix.el[CMC(i, j, matrix.nrows)] 19 | 20 | double c_logdet(double *matrix, int rows); 21 | void c_udvt(double **u, double **d, double **vt, int ncol); 22 | void c_svd(double *A, double *U, double *D, double *V, int *nrow, int *ncol, 23 | int *mindim, bool strict, int *errcode); 24 | void c_ginv(covariance cov, covariance mpinv); 25 | void c_qr(double *qr, double *y, int nrow, int ncol, double *fitted, 26 | double *resid, double *beta, double *sd); 27 | 28 | void c_ols(double **x, double *y, int nrow, int ncol, double *fitted, 29 | double *resid, double *beta, double *sd, int *nobs, bool missing); 30 | void c_cls(double **x, double *y, int *z, int nrow, int ncol, int ncond, 31 | double *fitted, double *resid, double *beta, double *sd, int *nobs, 32 | bool missing); 33 | 34 | void c_qr_matrix(double *qr, double **x, int nrow, int ncol, int *complete, 35 | int ncomplete); 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /man/alpha.star.Rd: -------------------------------------------------------------------------------- 1 | \name{alpha.star} 2 | \alias{alpha.star} 3 | \title{Estimate the optimal imaginary sample size for BDe(u)} 4 | \description{ 5 | 6 | Estimate the optimal value of the imaginary sample size for the BDe score, 7 | assuming a uniform prior and given a network structure and a data set. 8 | 9 | } 10 | \usage{ 11 | alpha.star(x, data, debug = FALSE) 12 | } 13 | \arguments{ 14 | \item{x}{an object of class \code{bn} (for \code{bn.fit} and \code{custom.fit}) 15 | or an object of class \code{bn.fit} (for \code{bn.net}).} 16 | \item{data}{a data frame containing the variables in the model.} 17 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 18 | printed; otherwise the function is completely silent.} 19 | } 20 | \value{ 21 | 22 | \code{alpha.star()} returns a positive number, the estimated optimal imaginary 23 | sample size value. 24 | 25 | } 26 | \examples{ 27 | data(learning.test) 28 | dag = hc(learning.test, score = "bic") 29 | 30 | for (i in 1:3) { 31 | 32 | a = alpha.star(dag, learning.test) 33 | dag = hc(learning.test, score = "bde", iss = a) 34 | 35 | }#FOR 36 | } 37 | \references{ 38 | 39 | Steck H (2008). "Learning the Bayesian Network Structure: Dirichlet Prior 40 | versus Data." \emph{Proceedings of the 24th Conference on Uncertainty in 41 | Artificial Intelligence}, 511--518. 42 | 43 | } 44 | \author{Marco Scutari} 45 | \keyword{structure learning} 46 | \keyword{network scores} 47 | -------------------------------------------------------------------------------- /man/statspkg.Rd: -------------------------------------------------------------------------------- 1 | \name{lm integration} 2 | \alias{lm integration} 3 | \alias{as.lm} 4 | \alias{as.lm.bn} 5 | \alias{as.lm.bn.fit} 6 | \alias{as.lm.bn.fit.gnode} 7 | \title{Produce lm objects from Bayesian networks} 8 | \description{ 9 | 10 | Take a \code{bn} object or \code{bn.fit} object encoding a Gaussian network 11 | and refit all the local distributions using \code{lm()}. This makes it 12 | possible to use all the functions provided by R for \code{lm} objects 13 | (\code{summary}, \code{anova}, etc.) to investigate the network. 14 | 15 | } 16 | \usage{ 17 | \method{as.lm}{bn}(x, data, ...) 18 | \method{as.lm}{bn.fit}(x, data, ...) 19 | \method{as.lm}{bn.fit.gnode}(x, data, ...) 20 | } 21 | \arguments{ 22 | \item{x}{an object of class \code{bn}, \code{bn.fit} or \code{bn.fit.gnode}.} 23 | \item{data}{a data frame containing the variables in the model.} 24 | \item{\dots}{additional arguments, currently ignored.} 25 | } 26 | \value{ 27 | 28 | If \code{x} is an object of class \code{bn} or \code{bn.fit}, \code{as.lm()} 29 | returns a list of \code{lm} objects, one for each node in \code{x}. If 30 | \code{x} is an object of class \code{bn} or \code{bn.fit.gnode}, 31 | \code{as.lm()} returns a single \code{lm} object. 32 | 33 | } 34 | \examples{ 35 | dag = hc(gaussian.test) 36 | fitted = bn.fit(dag, gaussian.test) 37 | as.lm(dag, gaussian.test) 38 | as.lm(fitted, gaussian.test) 39 | as.lm(fitted$F, gaussian.test) 40 | } 41 | \author{Marco} 42 | \keyword{interfaces to other packages} 43 | -------------------------------------------------------------------------------- /man/mvnorm.Rd: -------------------------------------------------------------------------------- 1 | \name{multivariate normal distribution} 2 | \alias{mvnorm2gbn} 3 | \alias{gbn2mvnorm} 4 | \title{Gaussian Bayesian networks and multivariate normals} 5 | \description{ 6 | 7 | Convert a Gaussian Bayesian network into the multivariate normal distribution 8 | that is its global distribution, and vice versa. 9 | 10 | } 11 | \usage{ 12 | gbn2mvnorm(fitted) 13 | mvnorm2gbn(dag, mu, sigma) 14 | } 15 | \arguments{ 16 | \item{fitted}{an object of class \code{bn.fit}.} 17 | \item{dag}{an object of class \code{bn}, the structure of the network that 18 | will be returned.} 19 | \item{mu}{a numeric vector, the expectation of the multivariate normal.} 20 | \item{sigma}{a square numeric matrix, the covariance matrix of the 21 | multivariate normal.} 22 | } 23 | \value{ 24 | 25 | \code{gbn2mvnorm()} returns a list with elements \code{"mu"} (the vector of 26 | expectations) and \code{"sigma"} (the covariance matrix). 27 | 28 | \code{mvnorm2gbn()} returns an object of class \code{bn.fit}. 29 | 30 | } 31 | \references{ 32 | 33 | Pourahmadi M (2011). "Covariance Estimation: The GLM and Regularization 34 | Perspectives." \emph{Statistical Science}, 26(3), 369--387. 35 | 36 | } 37 | \examples{ 38 | data(gaussian.test) 39 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]") 40 | bn = bn.fit(dag, gaussian.test) 41 | mvn = gbn2mvnorm(bn) 42 | bn2 = mvnorm2gbn(dag, mu = mvn$mu, sigma = mvn$sigma) 43 | all.equal(bn, bn2) 44 | } 45 | \author{Marco Scutari} 46 | \seealso{\link{bn.fit}.} 47 | \keyword{parameter learning} 48 | -------------------------------------------------------------------------------- /man/marks.Rd: -------------------------------------------------------------------------------- 1 | \name{marks} 2 | \docType{data} 3 | \alias{marks} 4 | \title{Examination marks data set} 5 | \description{ 6 | 7 | Examination marks of 88 students on five different topics, from Mardia (1979). 8 | 9 | } 10 | \usage{ 11 | data(marks) 12 | } 13 | \format{ 14 | 15 | The \code{marks} data set contains the following variables, one for each 16 | topic in the examination: 17 | \itemize{ 18 | 19 | \item \code{MECH} (\emph{mechanics}) 20 | \item \code{VECT} (\emph{vectors}) 21 | \item \code{ALG} (\emph{algebra}) 22 | \item \code{ANL} (\emph{analysis}) 23 | \item \code{STAT} (\emph{statistics}) 24 | 25 | } 26 | 27 | All are measured on the same scale (0-100). 28 | 29 | } 30 | \source{ 31 | 32 | Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd 33 | edition. 34 | 35 | Mardia KV, Kent JT, Bibby JM (1979). \emph{Multivariate Analysis}. Academic 36 | Press. 37 | 38 | Whittaker J (1990). \emph{Graphical Models in Applied Multivariate 39 | Statistics}. Wiley. 40 | 41 | } 42 | \examples{ 43 | # This is the undirected graphical model from Edwards (2000). 44 | data(marks) 45 | ug = empty.graph(names(marks)) 46 | arcs(ug, check.cycles = FALSE) = matrix( 47 | c("MECH", "VECT", "MECH", "ALG", "VECT", "MECH", "VECT", "ALG", 48 | "ALG", "MECH", "ALG", "VECT", "ALG", "ANL", "ALG", "STAT", 49 | "ANL", "ALG", "ANL", "STAT", "STAT", "ALG", "STAT", "ANL"), 50 | ncol = 2, byrow = TRUE, 51 | dimnames = list(c(), c("from", "to"))) 52 | \dontrun{graphviz.plot(ug)} 53 | } 54 | \keyword{datasets} 55 | -------------------------------------------------------------------------------- /src/graphs/completely.directed.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../core/uppertriangular.h" 4 | #include "../math/linear.algebra.h" 5 | 6 | /* Beware: this function is based on the assumption that each arc is unique 7 | * in the arc set; otherwise the counter may be wrong, leading to false 8 | * negatives. */ 9 | 10 | /* determine whether a graph is DAG or a PDAG/UG. */ 11 | SEXP completely_directed(SEXP arcs, SEXP nodes) { 12 | 13 | int i = 0, nrow = length(arcs)/2, n = LENGTH(nodes); 14 | int *a = NULL; 15 | short int *checklist = NULL; 16 | SEXP try; 17 | 18 | /* match the node labels in the arc set. */ 19 | PROTECT(try = match(nodes, arcs, 0)); 20 | a = INTEGER(try); 21 | 22 | /* allocate and initialize the checklist. */ 23 | checklist = Calloc1D(UPTRI_MATRIX(n), sizeof(short int)); 24 | 25 | for (i = 0; i < nrow; i++) { 26 | 27 | if (checklist[UPTRI(a[CMC(i, 0, nrow)], a[CMC(i, 1, nrow)], n)] == 0) { 28 | 29 | /* this arc is not present in the checklist; add it. */ 30 | checklist[UPTRI(a[CMC(i, 0, nrow)], a[CMC(i, 1, nrow)], n)] = 1; 31 | 32 | }/*THEN*/ 33 | else { 34 | 35 | /* this arc or its opposite already present in the checklist; the graph 36 | * has at least an undirected arc, so return FALSE. */ 37 | UNPROTECT(1); 38 | 39 | Free1D(checklist); 40 | 41 | return ScalarLogical(FALSE); 42 | 43 | }/*THEN*/ 44 | 45 | }/*FOR*/ 46 | 47 | UNPROTECT(1); 48 | 49 | Free1D(checklist); 50 | 51 | return ScalarLogical(TRUE); 52 | 53 | }/*COMPLETELY_DIRECTED*/ 54 | 55 | -------------------------------------------------------------------------------- /src/fitted/nparams.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../fitted/fitted.h" 3 | #include "../math/linear.algebra.h" 4 | 5 | /* compute the number of parameters of a fitted model. */ 6 | double nparams_fitted_node(ldist ld, fitted_node_e type) { 7 | 8 | int nrows = 0, nconfigs = 0; 9 | double node_params = 0; 10 | 11 | switch(type) { 12 | 13 | /* ... if it's a discrete node: */ 14 | case DNODE: 15 | case ONODE: 16 | 17 | nrows = ld.d.dims[0]; 18 | nconfigs = ld.d.nconfigs; 19 | 20 | /* the number of parameters for each node is (number of rows - 1), 21 | * either in total or for each parents configuration. */ 22 | node_params = nrows * nconfigs - nconfigs; 23 | 24 | break; 25 | 26 | /* ... if it's a Gaussian node: */ 27 | case GNODE: 28 | 29 | /* the number of parameters is (number of regression coefficients, one 30 | * per parent) plus two (intercept and standard error). */ 31 | node_params = ld.nparents + 2; 32 | 33 | break; 34 | 35 | /* ... if it's a conditional Gaussian node: */ 36 | case CGNODE: 37 | 38 | nconfigs = ld.cg.nconfigs; 39 | nrows = ld.cg.ncoefs; 40 | 41 | /* the number of parameters is (number of continuous parents + 2) for each 42 | * configuration of the discrete parents (of which there should be at 43 | * least one). */ 44 | node_params += nconfigs * (nrows + 1); 45 | 46 | break; 47 | 48 | case ENOFIT: 49 | default: 50 | break; 51 | 52 | }/*SWITCH*/ 53 | 54 | return node_params; 55 | 56 | }/*NPARAMS_FITTED_NODE*/ 57 | 58 | -------------------------------------------------------------------------------- /man/mi.matrix.Rd: -------------------------------------------------------------------------------- 1 | \name{local discovery algorithms} 2 | \alias{local discovery algorithms} 3 | \alias{aracne} 4 | \alias{chow.liu} 5 | \title{Local discovery structure learning algorithms} 6 | \description{ 7 | 8 | ARACNE and Chow-Liu learn simple graphs structures from data using pairwise 9 | mutual information coefficients. 10 | 11 | } 12 | \usage{ 13 | aracne(x, whitelist = NULL, blacklist = NULL, mi = NULL, debug = FALSE) 14 | chow.liu(x, whitelist = NULL, blacklist = NULL, mi = NULL, debug = FALSE) 15 | } 16 | \arguments{ 17 | \item{x}{a data frame containing the variables in the model.} 18 | \item{whitelist}{a data frame with two columns (optionally labeled "from" 19 | and "to"), containing a set of arcs to be included in the graph.} 20 | \item{blacklist}{a data frame with two columns (optionally labeled "from" 21 | and "to"), containing a set of arcs not to be included in the graph.} 22 | \item{mi}{a character string, the estimator used for the pairwise (i.e. 23 | unconditional) mutual information coefficients in the ARACNE and Chow-Liu 24 | algorithms. Possible values are \code{mi} (discrete mutual information) 25 | and \code{mi-g} (Gaussian mutual information).} 26 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 27 | printed; otherwise the function is completely silent.} 28 | } 29 | \value{ 30 | 31 | An object of class \code{bn}. See \code{\link{bn-class}} for details. 32 | 33 | } 34 | \author{Marco Scutari} 35 | \seealso{\link{constraint-based algorithms}, \link{score-based algorithms}, 36 | \link{hybrid algorithms}.} 37 | \keyword{local learning} 38 | -------------------------------------------------------------------------------- /man/bn.strength-class.Rd: -------------------------------------------------------------------------------- 1 | \name{bn.strength class} 2 | \alias{bn.strength class} 3 | \alias{bn.strength-class} 4 | \alias{bn.strength} 5 | \title{The bn.strength class structure} 6 | \description{ 7 | 8 | The structure of an object of S3 class \code{bn.strength}. 9 | 10 | } 11 | \details{ 12 | 13 | An object of class \code{bn.strength} is a data frame with the following 14 | columns (one row for each arc): 15 | 16 | \itemize{ 17 | 18 | \item \code{from, to}: the nodes incident on the arc. 19 | \item \code{strength}: the strength of the arc. See 20 | \code{\link{arc.strength}}, \code{\link{boot.strength}}, 21 | \code{\link{custom.strength}} and \code{\link{strength.plot}} 22 | for details. 23 | 24 | } 25 | 26 | and some additional attributes: 27 | 28 | \itemize{ 29 | 30 | \item \code{nodes}: a vector of character strings, the labels of the nodes 31 | of the network(s) the strength were computed from. 32 | \item \code{method}: a character string, the method used to compute the 33 | strength coefficients. It can be equal to \code{test}, \code{score} or 34 | \code{bootstrap}. 35 | \item \code{threshold}: a numeric value, the threshold used to determine 36 | if a strength coefficient is significant. 37 | 38 | } 39 | 40 | An optional column called \code{direction} may also be present, giving the 41 | probability of the direction of an arc given its presence in the graph. 42 | 43 | Only the \code{plot()} method is defined for this class; therefore, it can 44 | be manipulated as a standard data frame. 45 | 46 | } 47 | \author{Marco Scutari} 48 | \keyword{classes} 49 | -------------------------------------------------------------------------------- /man/kl.Rd: -------------------------------------------------------------------------------- 1 | \name{information theoretic quantities} 2 | \alias{H} 3 | \alias{KL} 4 | \title{Compute the distance between two fitted Bayesian networks} 5 | \description{ 6 | 7 | Compute Shannon's entropy of a fitted Bayesian network and the 8 | Kullback-Leibler divergence between two fitted Bayesian networks. 9 | 10 | } 11 | \usage{ 12 | H(P) 13 | KL(P, Q) 14 | } 15 | \arguments{ 16 | \item{P, Q}{objects of class \code{bn.fit}.} 17 | } 18 | \note{ 19 | 20 | Note that in the case of Gaussian and conditional Gaussian netwoks the 21 | divergence can be negative. Regardless of the type of network, if at least one 22 | of the two networks is singular the divergence can be infinite. 23 | 24 | If any of the parameters of the two networks are \code{NA}s, the divergence 25 | will also be \code{NA}. 26 | 27 | } 28 | \value{ 29 | 30 | \code{H()} and \code{KL()} return a single numeric value. 31 | 32 | } 33 | \examples{ 34 | \dontrun{ 35 | # discrete networks 36 | dag = model2network("[A][C][F][B|A][D|A:C][E|B:F]") 37 | fitted1 = bn.fit(dag, learning.test, method = "mle") 38 | fitted2 = bn.fit(dag, learning.test, method = "bayes", iss = 20) 39 | 40 | H(fitted1) 41 | H(fitted2) 42 | 43 | KL(fitted1, fitted1) 44 | KL(fitted2, fitted2) 45 | KL(fitted1, fitted2) 46 | } 47 | 48 | # continuous, singular networks. 49 | dag = model2network("[A][B][E][G][C|A:B][D|B][F|A:D:E:G]") 50 | singular = fitted1 = bn.fit(dag, gaussian.test) 51 | singular$A = list(coef = coef(fitted1[["A"]]) + runif(1), sd = 0) 52 | 53 | H(singular) 54 | H(fitted1) 55 | 56 | KL(singular, fitted1) 57 | KL(fitted1, singular) 58 | } 59 | \author{Marco Scutari} 60 | \keyword{parameter learning} 61 | \keyword{inference} 62 | -------------------------------------------------------------------------------- /src/core/uppertriangular.h: -------------------------------------------------------------------------------- 1 | #ifndef UPPER_TRIANGULAR_HEADER 2 | #define UPPER_TRIANGULAR_HEADER 3 | 4 | /* upper triangular matrix (not including the diagonal). */ 5 | typedef struct { 6 | 7 | int dim; /* dimension of the symmetrix matrix. */ 8 | const char **names; /* row and column names (assumed to be identical). */ 9 | double *mat; /* pointer to the matrix. */ 10 | 11 | } uppertriangular; 12 | 13 | /* 14 | * Coordinate system for an upper triangular matrix: 15 | * 16 | * [(row - 1) * ncols + ncols] - [row * (row - 1) / 2] 17 | * 18 | * the first term is the standard row major order coordinates; 19 | * the second one is an adjustment to account for the missing 20 | * lower half of the matrix. 21 | * 22 | */ 23 | 24 | /* this macro swaps its arguments to avoid "memory not mapped" errors. */ 25 | #define UPTRI(x, y, n) \ 26 | (((x) <= (y)) ? \ 27 | ((x) - 1) * n + (y) - 1 - ((x) * ((x) - 1)) / 2 : \ 28 | ((y) - 1) * n + (x) - 1 - ((y) * ((y) - 1)) / 2) 29 | 30 | /* coordinate system for an upper triangular matrix (not including 31 | * * the diagonal elements). */ 32 | #define UPTRI3(r, c, n) (UPTRI(r, c, n) - ((r > c) ? c : r)) 33 | void INV_UPTRI3(int x, int n, int *res); 34 | 35 | /* dimension of the upper triangular part of a n x n matrix. */ 36 | #define UPTRI_MATRIX(n) (n) * ((n) + 1) / 2 37 | 38 | /* upper triangular matrices. */ 39 | uppertriangular new_uppertriangular(int dim); 40 | void uppertriangular_copy_names(uppertriangular *sym, const char **names); 41 | void FreeUPPERTRIANGULAR(uppertriangular sym); 42 | int uppertriangular_size(uppertriangular sym); 43 | #define UTREL(sym, i, j) sym.mat[UPTRI3((i) + 1, (j) + 1, sym.dim)] 44 | 45 | #endif 46 | -------------------------------------------------------------------------------- /src/graphs/rinterface/path.matrix.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../core/allocations.h" 3 | #include "../../include/graph.h" 4 | #include "../../include/globals.h" 5 | #include "../../minimal/common.h" 6 | #include "../../minimal/strings.h" 7 | #include "../../math/linear.algebra.h" 8 | #include "../graphs.h" 9 | 10 | /* build a path matrix, telling whether each node is reachable from another. */ 11 | SEXP path_matrix(SEXP x, SEXP debug) { 12 | 13 | int nnodes = 0; 14 | char **labels = NULL; 15 | SEXP arcs, nodes, amat, roots, root_ids, pathmat; 16 | 17 | /* extract the relevant information from the network. */ 18 | arcs = getListElement(x, "arcs"); 19 | nodes = getListElement(x, "nodes"); 20 | nodes = getAttrib(nodes, R_NamesSymbol); 21 | nnodes = length(nodes); 22 | /* dereference the node labels. */ 23 | labels = Calloc1D(nnodes, sizeof(char *)); 24 | for (int i = 0; i < nnodes; i++) 25 | labels[i] = (char *) CHAR(STRING_ELT(nodes, i)); 26 | 27 | /* identify the root nodes. */ 28 | PROTECT(roots = root_nodes(x, FALSESEXP)); 29 | PROTECT(root_ids = match(nodes, roots, 0)); 30 | 31 | /* contruct the adjacency matrix. */ 32 | PROTECT(amat = arcs2amat(arcs, nodes)); 33 | 34 | /* allocate the path matrix. */ 35 | PROTECT(pathmat = allocMatrix(LGLSXP, nnodes, nnodes)); 36 | setAttrib(pathmat, R_DimNamesSymbol, getAttrib(amat, R_DimNamesSymbol)); 37 | memset(INTEGER(pathmat), '\0', nnodes * nnodes * sizeof(int)); 38 | 39 | dag_path_matrix(INTEGER(amat), LOGICAL(pathmat), nnodes, labels, 40 | INTEGER(root_ids), length(root_ids), isTRUE(debug)); 41 | 42 | UNPROTECT(4); 43 | 44 | Free1D(labels); 45 | 46 | return pathmat; 47 | 48 | }/*PATH_MATRIX*/ 49 | 50 | -------------------------------------------------------------------------------- /src/scores/custom.score.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../minimal/data.frame.h" 3 | #include "../minimal/common.h" 4 | 5 | /* evaluate a user-provided, custom decomposable score function. */ 6 | double custom_score_function(SEXP target, SEXP x, SEXP data, SEXP custom_fn, 7 | SEXP custom_args, bool debugging) { 8 | 9 | SEXP nodes_info, target_info, parents, call, args_iterator, result; 10 | 11 | /* allocate and populate the pairlist to be valuated. */ 12 | PROTECT(args_iterator = call = allocLang(5)); 13 | /* first slot, the function name. */ 14 | SETCAR(args_iterator, custom_fn); 15 | args_iterator = CDR(args_iterator); 16 | /* second slot, the label of the target node. */ 17 | SETCAR(args_iterator, target); 18 | args_iterator = CDR(args_iterator); 19 | /* third slot, the labels of the parents. */ 20 | nodes_info = getListElement(x, "nodes"); 21 | target_info = getListElement(nodes_info, (char *)CHAR(STRING_ELT(target, 0))); 22 | parents = getListElement(target_info, "parents"); 23 | SETCAR(args_iterator, parents); 24 | args_iterator = CDR(args_iterator); 25 | /* fourth slot, the data. */ 26 | SETCAR(args_iterator, data); 27 | args_iterator = CDR(args_iterator); 28 | /* fifth slot, the optional arguments passed as a list. */ 29 | SETCAR(args_iterator, custom_args); 30 | /* evaluate the custom score function. */ 31 | PROTECT(result = eval(call, R_GlobalEnv)); 32 | 33 | /* the return value must be a scalar, real number. */ 34 | if ((TYPEOF(result) != REALSXP) || (length(result) != 1)) 35 | error("the score for node %s must be a scalar, real value.", 36 | CHAR(STRING_ELT(target, 0))); 37 | 38 | UNPROTECT(2); 39 | 40 | return NUM(result); 41 | 42 | }/*CUSTOM_SCORE_FUNCTION*/ 43 | -------------------------------------------------------------------------------- /src/scores/nml.regret.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../math/linear.algebra.h" 4 | #include "scores.h" 5 | 6 | /* regret approximation for large N */ 7 | double regret_fn_szp1(double N, double K) { 8 | 9 | double hK = 0.5 * K; 10 | double hK_h = hK - 0.5; 11 | double lgammahK = lgamma(hK); 12 | double lgammahK_h = lgamma(hK_h); 13 | double gamma_ratio = exp(lgammahK - lgammahK_h); 14 | double gamma_ratio_term = M_SQRT1_2 - gamma_ratio * K/3/sqrt(N); 15 | 16 | return (hK_h) * (log(N) - M_LN2) 17 | + M_LN_SQRT_PI - lgammahK 18 | + 0.5 - gamma_ratio_term * gamma_ratio_term 19 | + (3 + K * (K - 2) * (2 * K + 1)) / (36.0 * N); 20 | 21 | }/*REGRET_FN_SZP1*/ 22 | 23 | /* regret approximation for large K */ 24 | double regret_fn_szp2(double N, double K) { 25 | 26 | double a = K/N; 27 | double Ca = 0.5 * (1 + sqrt(1 + 4/a)); 28 | 29 | return N * (log(a) + (a + 2) * log(Ca) - 1/Ca) - 0.5 * log(Ca + 2/a); 30 | 31 | }/*REGRET_FN_SZP2*/ 32 | 33 | /* allocate and fill the regret table. */ 34 | double nml_regret(double n, double k) { 35 | 36 | if (n == 1) { 37 | 38 | return log(k); 39 | 40 | }/*THEN*/ 41 | else if (k == 1 || n == 0) { 42 | 43 | return 0.0; 44 | 45 | }/*THEN*/ 46 | else if (n <= MAX_REGRET_TABLE_N && k <= MAX_REGRET_TABLE_K) { 47 | 48 | if (regret_table == NULL) 49 | regret_table = get_regret_table(MAX_REGRET_TABLE_N, MAX_REGRET_TABLE_K); 50 | 51 | return regret_table[CMC((int) k, (int) n, MAX_REGRET_TABLE_K + 1)]; 52 | 53 | }/*THEN*/ 54 | else { 55 | 56 | double approx1 = regret_fn_szp1(n, k); 57 | double approx2 = regret_fn_szp2(n, k); 58 | 59 | return (approx1 < approx2) ? approx1 : approx2; 60 | 61 | }/*ELSE*/ 62 | 63 | }/*NML_REGRET*/ 64 | -------------------------------------------------------------------------------- /R/frontend-amat.R: -------------------------------------------------------------------------------- 1 | 2 | # build an adjacency matrix from a graph. 3 | amat = function(x) { 4 | 5 | check.bn.or.fit(x) 6 | 7 | if (is(x, "bn")) 8 | arcs2amat(x$arcs, names(x$nodes)) 9 | else 10 | arcs2amat(fit2arcs(x), names(x)) 11 | 12 | }#AMAT 13 | 14 | # rebuild the network structure using a new adjacency matrix. 15 | "amat<-" = function(x, check.cycles = TRUE, check.illegal = TRUE, debug = FALSE, 16 | value) { 17 | 18 | check.bn(x) 19 | # a node is needed. 20 | if (missing(value)) 21 | stop("no adjacency matrix specified.") 22 | # check logical arguments. 23 | check.logical(check.cycles) 24 | check.logical(check.illegal) 25 | check.logical(debug) 26 | # check the adjacency matrix. 27 | value = check.amat(amat = value, nodes = names(x$nodes)) 28 | 29 | # update the arcs of the network. 30 | x$arcs = amat2arcs(value, names(x$nodes)) 31 | 32 | # check whether the the graph contains directed cycles. 33 | if (check.cycles) 34 | if (!is.acyclic(nodes = names(x$nodes), arcs = x$arcs, debug = debug, 35 | directed = TRUE)) 36 | stop("the specified network contains cycles.") 37 | # check whether any arc is illegal. 38 | if (check.illegal) { 39 | 40 | illegal = which.listed(x$arcs, x$learning$illegal) 41 | 42 | if (any(illegal)) { 43 | 44 | illegal = apply(x$arcs[illegal, , drop = FALSE], 1, 45 | function(x) { paste(" (", x[1], ", ", x[2], ")", sep = "") }) 46 | 47 | stop("the following arcs are not valid due to the parametric assumptions of the network:", 48 | illegal, ".") 49 | 50 | }#THEN 51 | 52 | }#THEN 53 | 54 | # update the network structure. 55 | x$nodes = cache.structure(names(x$nodes), 56 | amat = as.integer(value), debug = debug) 57 | 58 | return(x) 59 | 60 | }#AMAT<- 61 | 62 | -------------------------------------------------------------------------------- /man/plot.bn.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.bn} 2 | \alias{plot.bn} 3 | \title{Plot a Bayesian network} 4 | \description{ 5 | 6 | Plot the graph associated with a small Bayesian network. 7 | 8 | } 9 | \usage{ 10 | \method{plot}{bn}(x, ylim = c(0,600), xlim = ylim, radius = 250, 11 | arrow = 35, highlight = NULL, color = "red", ...) 12 | } 13 | \arguments{ 14 | \item{x}{an object of class \code{bn}.} 15 | \item{ylim}{a numeric vector with two components containing the range of the 16 | y-axis.} 17 | \item{xlim}{a numeric vector with two components containing the range of the 18 | x-axis.} 19 | \item{radius}{a numeric value containing the radius of the nodes.} 20 | \item{arrow}{a numeric value containing the length of the arrow heads.} 21 | \item{highlight}{a vector of character strings, representing the labels of 22 | the nodes (and corresponding arcs) to be highlighted.} 23 | \item{color}{an integer or character string (the highlight colour).} 24 | \item{\dots}{other graphical parameters to be passed through to plotting 25 | functions.} 26 | } 27 | \note{ 28 | The following arguments are always overridden: 29 | \itemize{ 30 | \item \code{axes} is set to \code{FALSE}. 31 | \item \code{xlab} is set to an empty string. 32 | \item \code{ylab} is set to an empty string. 33 | } 34 | } 35 | \examples{ 36 | data(learning.test) 37 | cpdag = pc.stable(learning.test) 38 | 39 | plot(cpdag) 40 | 41 | ## highlight node B and related arcs. 42 | plot(cpdag, highlight = "B") 43 | ## highlight B and its Markov blanket. 44 | plot(cpdag, highlight = c("B", mb(cpdag, "B"))) 45 | 46 | ## a more compact plot. 47 | par(oma = rep(0, 4), mar = rep(0, 4), mai = rep(0, 4), 48 | plt = c(0.06, 0.94, 0.12, 0.88)) 49 | plot(cpdag) 50 | } 51 | \seealso{\code{\link{graphviz.plot}}.} 52 | \author{Marco Scutari} 53 | \keyword{plots} 54 | -------------------------------------------------------------------------------- /src/minimal/unique.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | 3 | /* return the unique elements from an input vector.*/ 4 | SEXP unique(SEXP array) { 5 | 6 | int *d = NULL, i = 0, k = 0, dup_counter = 0, n = length(array); 7 | int *res = NULL, *a = NULL; 8 | SEXP dup, result = R_NilValue; 9 | 10 | PROTECT(dup = duplicated(array, FALSE)); 11 | d = LOGICAL(dup); 12 | 13 | switch(TYPEOF(array)) { 14 | 15 | case INTSXP: 16 | 17 | a = INTEGER(array); 18 | 19 | for (i = 0; i < n; i++) 20 | if ((d[i] == 0) && (a[i] != NA_INTEGER)) 21 | dup_counter++; 22 | 23 | PROTECT(result = allocVector(INTSXP, dup_counter)); 24 | res = INTEGER(result); 25 | 26 | for (i = 0; i < n; i++) 27 | if ((d[i] == 0) && (a[i] != NA_INTEGER)) 28 | res[k++] = a[i]; 29 | 30 | break; 31 | 32 | case STRSXP: 33 | 34 | for (i = 0; i < n; i++) 35 | if (d[i] == 0) 36 | dup_counter++; 37 | 38 | PROTECT(result = allocVector(STRSXP, dup_counter)); 39 | 40 | for (i = 0; i < n; i++) 41 | if (d[i] == 0) 42 | SET_STRING_ELT(result, k++, STRING_ELT(array, i)); 43 | 44 | break; 45 | 46 | default: 47 | 48 | error("this SEXP type is not handled in unique()."); 49 | 50 | }/*SWITCH*/ 51 | 52 | UNPROTECT(2); 53 | 54 | return result; 55 | 56 | }/*UNIQUE*/ 57 | 58 | /* determine which elements are dupes. */ 59 | SEXP dupe(SEXP array) { 60 | 61 | int i = 0, n = length(array); 62 | int *res = NULL, *tmp = NULL; 63 | SEXP result, temp; 64 | 65 | PROTECT(result = duplicated(array, FALSE)); 66 | PROTECT(temp = duplicated(array, TRUE)); 67 | res = LOGICAL(result); 68 | tmp = LOGICAL(temp); 69 | 70 | for (i = 0; i < n; i++) 71 | res[i] = res[i] || tmp[i]; 72 | 73 | UNPROTECT(2); 74 | 75 | return result; 76 | 77 | }/*DUPE*/ 78 | 79 | -------------------------------------------------------------------------------- /R/backend-score.R: -------------------------------------------------------------------------------- 1 | 2 | # compare two network scores in an efficient way. 3 | score.delta = function(arc, network, data, score, score.delta, 4 | reference.score, op, extra, decomposable = TRUE) { 5 | 6 | .Call(call_score_delta, 7 | arc = arc, 8 | network = network, 9 | data = data, 10 | score = score, 11 | score.delta = score.delta, 12 | reference.score = reference.score, 13 | op = op, 14 | extra = extra, 15 | decomposable = decomposable) 16 | 17 | }#SCORE.DELTA 18 | 19 | # create a data frame or an adjacency matrix containing the arcs to be added. 20 | arcs.to.be.added = function(amat, nodes, blacklist = NULL, whitelist = NULL, 21 | nparents = NULL, maxp = Inf, arcs = TRUE) { 22 | 23 | .Call(call_hc_to_be_added, 24 | arcs = amat, 25 | blacklist = blacklist, 26 | whitelist = whitelist, 27 | nparents = nparents, 28 | maxp = maxp, 29 | nodes = nodes, 30 | convert = arcs) 31 | 32 | }#ARCS.TO.BE.ADDED 33 | 34 | # create a data frame containing the arcs to be dropped: 35 | # arcs arcs already in the graph. 36 | # !is.listed(whitelist) exclude whitelisted arcs. 37 | arcs.to.be.dropped = function(arcs, whitelist) { 38 | 39 | if (!is.null(whitelist)) 40 | return(arcs[!which.listed(arcs, whitelist), , drop = FALSE]) 41 | else 42 | return(arcs) 43 | 44 | }#ARCS.TO.BE.DROPPED 45 | 46 | # create a data frame containing the arcs to be reversed: 47 | arcs.to.be.reversed = function(arcs, blacklist, nparents, maxp = Inf) { 48 | 49 | if (!is.null(blacklist)) 50 | arcs = arcs[!which.listed(arcs[, c(2, 1), drop = FALSE], blacklist), , drop = FALSE] 51 | 52 | if (!missing(nparents)) 53 | arcs = arcs[nparents[arcs[, 1]] < maxp, , drop = FALSE] 54 | 55 | return(arcs) 56 | 57 | }#ARCS.TO.BE.REVERSED 58 | 59 | -------------------------------------------------------------------------------- /man/nodeops.Rd: -------------------------------------------------------------------------------- 1 | \name{node operations} 2 | \alias{node operations} 3 | \alias{add.node} 4 | \alias{remove.node} 5 | \alias{rename.nodes} 6 | \alias{nodes<-} 7 | \alias{nodes<-,bn-method} 8 | \alias{nodes<-,bn.fit-method} 9 | \alias{nodes<-,bn.naive-method} 10 | \alias{nodes<-,bn.tan-method} 11 | \title{Manipulate nodes in a graph} 12 | \description{ 13 | 14 | Add, remove and rename nodes in a graph. 15 | 16 | } 17 | \usage{ 18 | # add and remove nodes. 19 | add.node(x, node) 20 | remove.node(x, node) 21 | 22 | # re-label nodes. 23 | rename.nodes(x, names) 24 | \S4method{nodes}{bn}(object) <- value 25 | \S4method{nodes}{bn.fit}(object) <- value 26 | } 27 | \arguments{ 28 | \item{x}{an object of class \code{bn} for \code{add.node()} and 29 | \code{remove.node()}; an object of class \code{bn} or \code{bn.fit} for 30 | \code{rename.nodes()}.} 31 | \item{object}{an object of class \code{bn} or \code{bn.fit}.} 32 | \item{node}{a character string, the label of a node.} 33 | \item{value, names}{a vector of character strings, the new set of labels 34 | that wll be used as to rename the nodes.} 35 | } 36 | \details{ 37 | 38 | \code{add.node()} adds a new (isolated) node to an existing \code{bn} object. 39 | 40 | \code{remove.node()} removes a node from a \code{bn} object. 41 | 42 | \code{rename.nodes()} replaces the node labels with new ones, relabelling the 43 | whole node set. The assignment method for \code{nodes()} is an alias of 44 | \code{rename.nodes()}. 45 | 46 | } 47 | \value{ 48 | 49 | \code{add.node()}, \code{remove.node()} and \code{rename.nodes()} return an 50 | updated \code{bn} object. 51 | 52 | } 53 | \examples{ 54 | dag = random.graph(LETTERS[1:5]) 55 | add.node(dag, "Z") 56 | remove.node(dag, "A") 57 | 58 | nodes(dag) 59 | nodes(dag) = LETTERS[6:10] 60 | nodes(dag) 61 | } 62 | \author{Marco Scutari} 63 | \keyword{graphs} 64 | -------------------------------------------------------------------------------- /R/test.R: -------------------------------------------------------------------------------- 1 | 2 | # conditional and unconditional independence tests (vectorized in x, scalar 3 | # in y). 4 | indep.test = function(x, y, sx, data, test, extra.args = list(), alpha = 1, 5 | learning = TRUE) { 6 | 7 | .Call(call_indep_test, 8 | x = x, 9 | y = y, 10 | sx = sx[sx != ""], 11 | data = data, 12 | test = test, 13 | alpha = alpha, 14 | extra.args = extra.args, 15 | learning = learning, 16 | complete = attr(data, "metadata")$complete.nodes) 17 | 18 | }#INDEP.TEST 19 | 20 | # test against all possible subsets of the conditioning set (scalar in both x 21 | # and y). 22 | allsubs.test = function(x, y, sx, fixed = character(0), data, test, 23 | extra.args = list(), alpha = 1, min = 0, max = length(sx), debug = FALSE) { 24 | 25 | .Call(call_allsubs_test, 26 | x = x, 27 | y = y, 28 | sx = c(fixed, sx), 29 | fixed = fixed, 30 | data = data, 31 | test = test, 32 | alpha = alpha, 33 | extra.args = extra.args, 34 | min = as.integer(min), 35 | max = as.integer(min(max, length(sx))), 36 | complete = attr(data, "metadata")$complete.nodes, 37 | debug = debug) 38 | 39 | }#ALLSUBS.TEST 40 | 41 | # test each variable in turn given the rest as a conditioning set. 42 | roundrobin.test = function(x, z, fixed, data, test, extra.args = list(), 43 | alpha = 1, debug = FALSE) { 44 | 45 | if (length(z) == 0) 46 | return(structure(numeric(0), names = character(0))) 47 | 48 | .Call(call_roundrobin_test, 49 | x = x, 50 | z = z, 51 | fixed = fixed, 52 | data = data, 53 | test = test, 54 | alpha = alpha, 55 | extra.args = extra.args, 56 | complete = attr(data, "metadata")$complete.nodes, 57 | debug = debug) 58 | 59 | }#ROUNDROBIN.TEST 60 | 61 | -------------------------------------------------------------------------------- /man/lizards.Rd: -------------------------------------------------------------------------------- 1 | \name{lizards} 2 | \docType{data} 3 | \alias{lizards} 4 | \title{Lizards' perching behaviour data set} 5 | \description{ 6 | 7 | Real-world data set about the perching behaviour of two species of lizards in 8 | the South Bimini island, from Shoener (1968). 9 | 10 | } 11 | \usage{ 12 | data(lizards) 13 | } 14 | \format{ 15 | 16 | The \code{lizards} data set contains the following variables: 17 | \itemize{ 18 | 19 | \item \code{Species} (\emph{the species of the lizard}): a two-level factor 20 | with levels \code{Sagrei} and \code{Distichus}. 21 | \item \code{Height} (\emph{perch height}): a two-level factor with levels 22 | \code{high} (greater than 4.75 feet) and \code{low} (lesser or equal to 23 | 4.75 feet). 24 | \item \code{Diameter} (\emph{perch diameter}): a two-level factor with 25 | levels \code{narrow} (greater than 4 inches) and \code{wide} (lesser or 26 | equal to 4 inches). 27 | 28 | } 29 | 30 | } 31 | \source{ 32 | 33 | Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd 34 | edition. 35 | 36 | Fienberg SE (1980). \emph{The Analysis of Cross-Classified Categorical Data}. 37 | Springer, 2nd edition. 38 | 39 | Schoener TW (1968). "The Anolis Lizards of Bimini: Resource Partitioning in a 40 | Complex Fauna." \emph{Ecology}, \strong{49}(4):704--726. 41 | 42 | } 43 | \examples{ 44 | # load the data. 45 | data(lizards) 46 | # create and plot the network structure. 47 | dag = model2network("[Species][Diameter|Species][Height|Species]") 48 | \dontrun{graphviz.plot(dag, shape = "ellipse")} 49 | 50 | # This data set is useful as it offers nominal values for 51 | # the conditional mutual information and X^2 tests. 52 | ci.test("Height", "Diameter", "Species", test = "mi", data = lizards) 53 | ci.test("Height", "Diameter", "Species", test = "x2", data = lizards) 54 | } 55 | \keyword{datasets} 56 | -------------------------------------------------------------------------------- /src/scores/nml.regret.table.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../math/linear.algebra.h" 4 | #include "scores.h" 5 | 6 | double get_regret_k2(int N, double *logs, double *logfacs) { 7 | 8 | double res = 0.0; 9 | 10 | for (int n = 0; n <= N; ++n) { 11 | 12 | double term = logfacs[N] - logfacs[n] - logfacs[N - n]; 13 | 14 | if (n) 15 | term += n * (logs[n] - logs[N]); 16 | if (N - n) 17 | term += (N - n) * (logs[N - n] - logs[N]); 18 | 19 | res += exp(term); 20 | 21 | }/*FOR*/ 22 | 23 | return log(res); 24 | 25 | }/*GET_REGRET_K2*/ 26 | 27 | void fill_regrets_up_to_K(int K, int n, double *regret_table) { 28 | 29 | double rk_prev_prev = 1.0; 30 | double rk_prev = exp(regret_table[CMC(2, n, K + 1)]); 31 | double rk = 0; 32 | 33 | for (int k = 3; k <= K; ++k) { 34 | 35 | rk = rk_prev + rk_prev_prev / (k - 2) * n; 36 | regret_table[CMC(k, n, K + 1)] = log(rk); 37 | rk_prev_prev = rk_prev; 38 | rk_prev = rk; 39 | 40 | }/*FOR*/ 41 | 42 | }/*FILL_REGRETS_UP_TO_K*/ 43 | 44 | double *get_regret_table(int N, int K) { 45 | 46 | double *logs = (double *) Calloc1D((N + 1), sizeof(double)); 47 | double *logfacs = (double *) Calloc1D((N + 1), sizeof(double)); 48 | double *regret_table = (double *) Calloc1D((N + 1) * (K + 1), sizeof(double)); 49 | 50 | /* cache a table of logarithms. */ 51 | for (int n = 1; n <= N; ++n) 52 | logs[n] = log(n); 53 | 54 | /* cache a table of log-factorials (computed as log-Gammas). */ 55 | for (int n = 1; n <= N; ++n) 56 | logfacs[n] = lgammafn(n + 1); 57 | 58 | for (int n = 1; n <= N; ++n) { 59 | 60 | regret_table[CMC(2, n, K + 1)] = get_regret_k2(n, logs, logfacs); 61 | fill_regrets_up_to_K(K, n, regret_table); 62 | 63 | }/*FOR*/ 64 | 65 | Free1D(logfacs); 66 | Free1D(logs); 67 | 68 | return regret_table; 69 | 70 | }/*GET_REGRET_TABLE*/ 71 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bnlearn 2 | Type: Package 3 | Title: Bayesian Network Structure Learning, Parameter Learning and 4 | Inference 5 | Version: 5.1 6 | Date: 2025-08-18 7 | Depends: R (>= 4.4.0), methods 8 | Suggests: parallel, graph, Rgraphviz, igraph, lattice, gRbase, gRain 9 | (>= 1.3-3), Rmpfr, gmp 10 | Authors@R: c(person(given = "Marco", family = "Scutari", role = c("aut", "cre"), 11 | email = "scutari@bnlearn.com"), 12 | person(given = "Tomi", family = "Silander", role = "ctb")) 13 | Maintainer: Marco Scutari 14 | Description: Bayesian network structure learning, parameter learning and inference. 15 | This package implements constraint-based (PC, GS, IAMB, Inter-IAMB, Fast-IAMB, MMPC, 16 | Hiton-PC, HPC), pairwise (ARACNE and Chow-Liu), score-based (Hill-Climbing and Tabu 17 | Search) and hybrid (MMHC, RSMAX2, H2PC) structure learning algorithms for discrete, 18 | Gaussian and conditional Gaussian networks, along with many score functions and 19 | conditional independence tests. 20 | The Naive Bayes and the Tree-Augmented Naive Bayes (TAN) classifiers are also implemented. 21 | Some utility functions (model comparison and manipulation, random data generation, arc 22 | orientation testing, simple and advanced plots) are included, as well as support for 23 | parameter estimation (maximum likelihood and Bayesian) and inference, conditional 24 | probability queries, cross-validation, bootstrap and model averaging. 25 | Development snapshots with the latest bugfixes are available from . 26 | URL: https://www.bnlearn.com/ 27 | SystemRequirements: USE_C17 28 | License: GPL (>= 2) 29 | LazyData: yes 30 | NeedsCompilation: yes 31 | Packaged: 2025-08-20 08:45:12 UTC; fizban 32 | Author: Marco Scutari [aut, cre], 33 | Tomi Silander [ctb] 34 | Repository: CRAN 35 | Date/Publication: 2025-08-20 12:50:13 UTC 36 | -------------------------------------------------------------------------------- /R/sanitization-enumeration.R: -------------------------------------------------------------------------------- 1 | 2 | # sanitize the extra arguments passed to the graph enumerations. 3 | check.enumeration.args = function(type, extra) { 4 | 5 | # check the number of nodes. 6 | if (has.argument(type, "nodes", enumerations.extra.args)) { 7 | 8 | if (!is.positive.vector(extra[["nodes"]])) 9 | stop("'nodes' must be positive integers, the number(s) of nodes in the graph.") 10 | 11 | }#THEN 12 | 13 | # check the number of root nodes. 14 | if (has.argument(type, "k", enumerations.extra.args)) 15 | check.graph.root.nodes(extra[["k"]], N = extra[["nodes"]]) 16 | 17 | # check the number of arcs. 18 | if (has.argument(type, "r", enumerations.extra.args)) 19 | check.graph.narcs(extra[["r"]], N = extra[["nodes"]]) 20 | 21 | # check the markov equivalence class. 22 | if (has.argument(type, "eqclass", enumerations.extra.args)) 23 | valid.cpdag.backend(extra[["eqclass"]]) 24 | 25 | # warn about and remove unused arguments. 26 | extra = check.unused.args(extra, enumerations.extra.args[[type]]) 27 | 28 | return(extra) 29 | 30 | }#CHECK.ENUMERATION.ARGS 31 | 32 | # check the number of root nodes. 33 | check.graph.root.nodes = function(k, N) { 34 | 35 | if (is.null(k)) 36 | stop("unspecified number of root nodes.") 37 | 38 | if (!is.positive.integer(k)) 39 | stop("the number of root nodes must be a positive integer.") 40 | if (k > max(N)) 41 | warning("the number of root nodes is larger than the number of nodes.") 42 | 43 | }#CHECK.GRAPH.ROOT.NODES 44 | 45 | # check the number of arcs. 46 | check.graph.narcs = function(r, N) { 47 | 48 | if (is.null(r)) 49 | stop("unspecified number of arcs.") 50 | 51 | if (!is.non.negative.integer(r)) 52 | stop("the number of arcs must be a positive integer.") 53 | if (r > choose(max(N), 2)) 54 | warning("the number of arcs is larger than the maximum possible number of arcs.") 55 | 56 | }#CHECK.GRAPH.NARCS 57 | -------------------------------------------------------------------------------- /src/parameters/rinterface/classic.discrete.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../core/contingency.tables.h" 3 | #include "../../minimal/data.frame.h" 4 | #include "../../minimal/table.h" 5 | #include "../parameters.h" 6 | 7 | SEXP classic_discrete_parameters(SEXP data, SEXP node, SEXP parents, SEXP iss, 8 | SEXP replace_unidentifiable, SEXP missing) { 9 | 10 | double *cpt = 0, alpha = 0; 11 | bool replace = isTRUE(replace_unidentifiable); 12 | SEXP nodes_in_order, relevant_data, counts, cptable; 13 | 14 | /* subset the data with the node labels in the right order. */ 15 | PROTECT(nodes_in_order = allocVector(STRSXP, length(parents) + 1)); 16 | SET_STRING_ELT(nodes_in_order, 0, STRING_ELT(node, 0)); 17 | for (int i = 0; i < length(parents); i++) 18 | SET_STRING_ELT(nodes_in_order, i + 1, STRING_ELT(parents, i)); 19 | 20 | PROTECT(relevant_data = c_dataframe_column(data, nodes_in_order, FALSE, TRUE)); 21 | 22 | /* implement the maximum likelihood estimator as a particular case of the 23 | * posterior estimator with prior mass equal to zero. */ 24 | if (iss == R_NilValue) 25 | alpha = 0; 26 | else 27 | alpha = NUM(iss); 28 | 29 | /* compute the counts that are the sufficient statistic. */ 30 | PROTECT(counts = minimal_table(relevant_data, missing)); 31 | 32 | /* prepare the conditional probability table... */ 33 | PROTECT(cptable = allocVector(REALSXP, length(counts))); 34 | setAttrib(cptable, R_DimSymbol, getAttrib(counts, R_DimSymbol)); 35 | setAttrib(cptable, R_DimNamesSymbol, getAttrib(counts, R_DimNamesSymbol)); 36 | setAttrib(cptable, R_ClassSymbol, mkString("table")); 37 | cpt = REAL(cptable); 38 | 39 | /* ... and estimate it. */ 40 | c_classic_discrete_parameters(INTEGER(counts), cpt, nrows(cptable), 41 | length(cptable) / nrows(cptable), alpha, replace); 42 | 43 | UNPROTECT(4); 44 | 45 | return cptable; 46 | 47 | }/*CLASSIC_DISCRETE_PARAMETERS*/ 48 | -------------------------------------------------------------------------------- /src/minimal/strings.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | 3 | /* setdiff() for two vectors of character strings. */ 4 | SEXP string_setdiff(SEXP large, SEXP small) { 5 | 6 | int i = 0, k = 0, *t = NULL, nl = length(large), ns = length(small); 7 | SEXP try, diff; 8 | 9 | /* match the elements of the smaller set against the larger set. */ 10 | PROTECT(try = match(small, large, 0)); 11 | t = INTEGER(try); 12 | /* allocate the return value. */ 13 | PROTECT(diff = allocVector(STRSXP, nl - ns)); 14 | /* copy the elements that are not shared between te two. */ 15 | for (i = 0, k = 0; i < nl; i++) 16 | if (t[i] == 0) 17 | SET_STRING_ELT(diff, k++, STRING_ELT(large, i)); 18 | 19 | UNPROTECT(2); 20 | 21 | return diff; 22 | 23 | }/*STRING_SETDIFF*/ 24 | 25 | /* delete a string for a STRSXP array. */ 26 | SEXP string_delete(SEXP array, SEXP string, int *idx) { 27 | 28 | int i = 0, k = 0, *t = NULL, n = length(array); 29 | SEXP try, new_array; 30 | 31 | PROTECT(try = match(array, string, 0)); 32 | t = INTEGER(try); 33 | 34 | /* optional, save the index of the string. */ 35 | if (idx) 36 | *idx = *t; 37 | 38 | if (*t == 0) { 39 | 40 | UNPROTECT(1); 41 | 42 | return array; 43 | 44 | }/*THEN*/ 45 | 46 | /* allocate the new array. */ 47 | PROTECT(new_array = allocVector(STRSXP, n - 1)); 48 | 49 | for (i = 0, k = 0; i < n; i++) 50 | if (i != *t - 1) 51 | SET_STRING_ELT(new_array, k++, STRING_ELT(array, i)); 52 | 53 | UNPROTECT(2); 54 | 55 | return new_array; 56 | 57 | }/*STRING_DELETE*/ 58 | 59 | /* variadic version of mkString(). */ 60 | SEXP mkStringVec(int n, ...) { 61 | 62 | va_list strings; 63 | int i = 0; 64 | SEXP vec; 65 | 66 | PROTECT(vec = allocVector(STRSXP, n)); 67 | va_start(strings, n); 68 | for (i = 0; i < n; i++) 69 | SET_STRING_ELT(vec, i, mkChar(va_arg(strings, char *))); 70 | va_end(strings); 71 | UNPROTECT(1); 72 | 73 | return vec; 74 | 75 | }/*MKSTRINGVEC*/ 76 | -------------------------------------------------------------------------------- /src/graphs/rinterface/connected.components.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../core/allocations.h" 3 | #include "../../include/graph.h" 4 | #include "../../minimal/common.h" 5 | #include "../graphs.h" 6 | 7 | /* identify the connected components in an undirected graph. */ 8 | SEXP connected_components(SEXP x, SEXP debug) { 9 | 10 | int nnodes = 0, ncomponents = 0; 11 | int **buffer = NULL, *buflen = NULL; 12 | char **labels = NULL; 13 | SEXP arcs, nodes, amat, temp, components; 14 | 15 | /* extract the relevant information from the network. */ 16 | arcs = getListElement(x, "arcs"); 17 | nodes = getListElement(x, "nodes"); 18 | nodes = getAttrib(nodes, R_NamesSymbol); 19 | nnodes = length(nodes); 20 | 21 | /* contruct the adjacency matrix. */ 22 | PROTECT(amat = arcs2amat(arcs, nodes)); 23 | 24 | /* buffer to store the connected components. */ 25 | buffer = Calloc1D(nnodes, sizeof(int **)); 26 | buflen = Calloc1D(nnodes, sizeof(int)); 27 | /* dereference the node labels. */ 28 | labels = Calloc1D(nnodes, sizeof(char *)); 29 | for (int i = 0; i < nnodes; i++) 30 | labels[i] = (char *) CHAR(STRING_ELT(nodes, i)); 31 | 32 | /* identify the connected components. */ 33 | ncomponents = ug_connected_components(INTEGER(amat), labels, nnodes, buffer, 34 | buflen, isTRUE(debug)); 35 | 36 | /* return the components and the respective sets of node labels. */ 37 | PROTECT(components = allocVector(VECSXP, ncomponents)); 38 | 39 | for (int i = 0; i < ncomponents; i++) { 40 | 41 | PROTECT(temp = allocVector(STRSXP, buflen[i])); 42 | for (int j = 0; j < buflen[i]; j++) 43 | SET_STRING_ELT(temp, j, STRING_ELT(nodes, buffer[i][j])); 44 | SET_VECTOR_ELT(components, i, temp); 45 | 46 | UNPROTECT(1); 47 | 48 | }/*FOR*/ 49 | 50 | UNPROTECT(2); 51 | 52 | Free2D(buffer, nnodes); 53 | Free1D(buflen); 54 | Free1D(labels); 55 | 56 | return components; 57 | 58 | }/*CONNECTED_COMPONENTS*/ 59 | -------------------------------------------------------------------------------- /R/frontend-formula.R: -------------------------------------------------------------------------------- 1 | 2 | # describe the network with a "model string". 3 | modelstring = function(x) { 4 | 5 | check.bn.or.fit(x) 6 | # no model string if the graph is partially directed. 7 | if (is(x, "bn")) 8 | if (!is.completely.directed(x)) 9 | stop("the graph is only partially directed.") 10 | 11 | modelstring.backend(x) 12 | 13 | }#MODELSTRING 14 | 15 | # set a specific network structure with the model string. 16 | "modelstring<-" = function(x, debug = FALSE, value) { 17 | 18 | check.modelstring(value) 19 | check.logical(debug) 20 | 21 | model2network.backend(value, node.order = names(x$nodes), debug = debug) 22 | 23 | }#MODELSTRING<- 24 | 25 | # bn-to-character (i.e. the model string) conversion function. 26 | # an alias of modelstring(). 27 | as.character.bn = function(x, ...) { 28 | 29 | # warn about unused arguments. 30 | check.unused.args(list(...), character(0)) 31 | 32 | modelstring(x) 33 | 34 | }#AS.CHARACTER.BN 35 | 36 | # generate an object of class bn from a model string. 37 | model2network = function(string, ordering = NULL, debug = FALSE) { 38 | 39 | # check string's class and format. 40 | check.modelstring(string) 41 | # check the node ordering; NULL is ok this time, it lets the backend decide. 42 | if (!is.null(ordering)) 43 | check.nodes(ordering) 44 | 45 | check.logical(debug) 46 | 47 | result = model2network.backend(string, node.order = ordering, debug = debug) 48 | 49 | # check the node ordering again now that the graph is built. 50 | if (!is.null(ordering)) { 51 | 52 | check.nodes(ordering, graph = result, min.nodes = length(result$nodes), 53 | max.nodes = length(result$nodes)) 54 | 55 | }#THEN 56 | 57 | return(result) 58 | 59 | }#MODEL2NETWORK 60 | 61 | # model-string-to-bn conversion function. 62 | as.bn.character = function(x, ...) { 63 | 64 | # warn about unused arguments. 65 | check.unused.args(list(...), character(0)) 66 | 67 | model2network(x) 68 | 69 | }#AS.BN.CHARACTER 70 | 71 | -------------------------------------------------------------------------------- /src/inference/rinterface/cpdist.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../include/sampling.h" 3 | #include "../../minimal/data.frame.h" 4 | #include "../../minimal/strings.h" 5 | #include "../../include/globals.h" 6 | #include "../../core/math.functions.h" 7 | 8 | /* sample observations with the associated likelihood weights. */ 9 | SEXP cpdist_lw(SEXP fitted, SEXP nodes, SEXP n, SEXP fix, SEXP debug) { 10 | 11 | int nsims = INT(n), max_id = 0; 12 | double *weights = NULL; 13 | SEXP result, simulation, wgt, from; 14 | 15 | /* allocate the scratch space for the simulation. */ 16 | PROTECT(simulation = fit2df(fitted, nsims)); 17 | /* generate the random observations. */ 18 | c_rbn_master(fitted, simulation, n, fix, TRUE, FALSE); 19 | 20 | if (isTRUE(debug)) 21 | Rprintf("* generated %d samples from the bayesian network.\n", nsims); 22 | 23 | /* compute the weights. */ 24 | PROTECT(wgt = allocVector(REALSXP, nsims)); 25 | weights = REAL(wgt); 26 | PROTECT(from = getAttrib(fix, R_NamesSymbol)); 27 | c_lw_weights(fitted, simulation, nsims, weights, from, FALSE); 28 | 29 | /* if all weights are zero or NA, the evidence is making it impossible to 30 | * generate a set of valid random observations. */ 31 | max_id = d_which_max(weights, nsims); 32 | 33 | if (max_id == NA_INTEGER) 34 | error("all weights are NA, the probability of the evidence is impossible to compute."); 35 | if (weights[d_which_max(weights, nsims) - 1] == 0) 36 | error("all weights are zero, the evidence has probability zero."); 37 | 38 | /* prepare the return value with all the attributes. */ 39 | PROTECT(result = c_dataframe_column(simulation, nodes, FALSE, TRUE)); 40 | minimal_data_frame(result); 41 | setAttrib(result, BN_WeightsSymbol, wgt); 42 | setAttrib(result, BN_MethodSymbol, mkString("lw")); 43 | setAttrib(result, R_ClassSymbol, mkStringVec(2, "bn.cpdist", "data.frame")); 44 | 45 | UNPROTECT(4); 46 | 47 | return result; 48 | 49 | }/*CPDIST_LW*/ 50 | 51 | -------------------------------------------------------------------------------- /man/rbn.Rd: -------------------------------------------------------------------------------- 1 | \name{rbn} 2 | \alias{rbn} 3 | \title{Simulate random samples from a given Bayesian network} 4 | \description{ 5 | 6 | Simulate random samples from a given Bayesian network. 7 | 8 | } 9 | \usage{ 10 | rbn(x, n = 1, \dots, debug = FALSE) 11 | } 12 | \arguments{ 13 | \item{x}{an object of class \code{bn.fit}.} 14 | \item{n}{a positive integer giving the number of observations to generate.} 15 | \item{...}{additional arguments for the parameter estimation prcoedure, see 16 | again \code{\link{bn.fit}} for details.} 17 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 18 | printed; otherwise the function is completely silent.} 19 | } 20 | \details{ 21 | 22 | \code{rbn()} implements forward/logic sampling: values for the root nodes are 23 | sampled from their (unconditional) distribution, then those of their children 24 | conditional on the respective parent sets. This is done iteratively until 25 | values have been sampled for all nodes. 26 | 27 | If \code{x} contains \code{NA} parameter estimates (because of unobserved 28 | discrete parents configurations in the data the parameters were learned from), 29 | \code{rbn} will produce samples that contain \code{NA}s when those parents 30 | configurations appear in the simulated samples. See \code{\link{bn.fit}} for 31 | details on how to make sure \code{bn.fit} objects contain no \code{NA} 32 | parameter estimates. 33 | 34 | } 35 | \value{ 36 | 37 | A data frame with the same structure as the data originally used to to fit the 38 | parameters of the Bayesian network. 39 | 40 | } 41 | \references{ 42 | 43 | Korb K, Nicholson AE (2010). \emph{Bayesian Artificial Intelligence}. 44 | Chapman & Hall/CRC, 2nd edition. 45 | 46 | } 47 | \examples{ 48 | data(learning.test) 49 | dag = hc(learning.test) 50 | fitted = bn.fit(dag, learning.test) 51 | rbn(fitted, 5) 52 | } 53 | \author{Marco Scutari} 54 | \seealso{\code{\link{cpdist}}.} 55 | \keyword{inference} 56 | \keyword{simulation} 57 | -------------------------------------------------------------------------------- /R/data.preprocessing.R: -------------------------------------------------------------------------------- 1 | 2 | dedup.backend = function(data, threshold, debug = FALSE) { 3 | 4 | .Call(call_dedup, 5 | data = data, 6 | threshold = threshold, 7 | complete = attr(data, "metadata")$complete.nodes, 8 | debug = debug) 9 | 10 | }#DEDUP.BACKEND 11 | 12 | discretize.backend = function(data, method, breaks, ordered = FALSE, extra.args, 13 | debug = FALSE) { 14 | 15 | if (method %in% c("quantile", "interval")) { 16 | 17 | discretized = marginal.discretize.backend(data = data, method = method, 18 | breaks = breaks, ordered = ordered, debug = debug) 19 | 20 | }#THEN 21 | else if (method == "hartemink") { 22 | 23 | discretized = joint.discretize.backend(data = data, method = method, 24 | breaks = breaks, ordered = ordered, 25 | initial.discretization = extra.args$idisc, 26 | initial.breaks = extra.args$ibreaks, debug = debug) 27 | 28 | }#ELSE 29 | 30 | # ensure that the attribute with the metadata set by check.data() is removed. 31 | attr(discretized, "metadata") = NULL 32 | 33 | return(discretized) 34 | 35 | }#DISCRETIZE.BACKEND 36 | 37 | marginal.discretize.backend = function(data, method, breaks, ordered = FALSE, 38 | debug = FALSE) { 39 | 40 | .Call(call_marginal_discretize, 41 | data = data, 42 | method = method, 43 | breaks = as.integer(breaks), 44 | ordered = ordered, 45 | debug = debug) 46 | 47 | }#MARGINAL.DISCRETIZE.BACKEND 48 | 49 | joint.discretize.backend = function(data, method, breaks, ordered = FALSE, 50 | initial.discretization = "quantile", initial.breaks, debug = FALSE) { 51 | 52 | .Call(call_joint_discretize, 53 | data = data, 54 | method = method, 55 | breaks = as.integer(breaks), 56 | ordered = ordered, 57 | initial.discretization = initial.discretization, 58 | initial.breaks = as.integer(initial.breaks), 59 | debug = debug) 60 | 61 | }#JOINT.DISCRETIZE.BACKEND 62 | -------------------------------------------------------------------------------- /R/sanitization-amat.R: -------------------------------------------------------------------------------- 1 | 2 | # sanity check adjacency matrices. 3 | check.amat = function(amat, nodes) { 4 | 5 | # a node is needed. 6 | if (missing(amat)) 7 | stop("no adjacency matrix specified.") 8 | # the adjacency matrix must, well, be a matrix. 9 | if (!is(amat, "matrix") || (ncol(amat) != nrow(amat)) || (length(dim(amat)) != 2)) 10 | stop("an adjacency matrix must be a 2-dimensional square matrix.") 11 | # check the dimensions against the number of nodes in the graph. 12 | if (any(dim(amat) != length(nodes))) 13 | stop("the dimensions of the adjacency matrix do not agree with the number of nodes in the graph.") 14 | # column names must be valid node labels. 15 | if (!is.null(colnames(amat))) 16 | if (any(colnames(amat) %!in% nodes)) 17 | stop("node (column label) not present in the graph.") 18 | # column names must be valid node labels. 19 | if (!is.null(rownames(amat))) 20 | if (any(rownames(amat) %!in% nodes)) 21 | stop("node (row label) not present in the graph.") 22 | # column names must match with row names. 23 | if (!is.null(colnames(amat)) && !is.null(rownames(amat))) { 24 | 25 | if (!identical(colnames(amat), rownames(amat))) 26 | stop("row/column names mismatch in the adjacency matrix.") 27 | 28 | if (!identical(colnames(amat), nodes) || !identical(rownames(amat), nodes)) { 29 | 30 | warning("rearranging the rows/columns of the adjacency matrix.") 31 | 32 | amat = amat[nodes, nodes, drop = FALSE] 33 | 34 | }#THEN 35 | 36 | }#THEN 37 | # make really sure the adjacency matrix is made up of integers. 38 | if (storage.mode(amat) != "integer") 39 | storage.mode(amat) = "integer" 40 | # check the elements of the matrix. 41 | if (!all((amat == 0L) | (amat == 1L))) 42 | stop("all the elements of an adjacency matrix must be equal to either 0 or 1.") 43 | # no arcs from a node to itself. 44 | if (any(diag(amat) != 0)) 45 | stop("the elements on the diagonal must be zero.") 46 | 47 | return(amat) 48 | 49 | }#CHECK.AMAT 50 | 51 | -------------------------------------------------------------------------------- /src/core/allocations.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "allocations.h" 3 | 4 | void *Calloc1D(size_t R, size_t size) { 5 | 6 | void *p = NULL; 7 | 8 | if (R == 0) 9 | return NULL; 10 | 11 | p = calloc(R, size); 12 | 13 | if (!p) 14 | error("unable to allocate a %llu array.", (unsigned long long)R); 15 | 16 | return p; 17 | 18 | }/*CALLOC1D*/ 19 | 20 | void *Realloc1D(void *p, size_t R, size_t size) { 21 | 22 | p = realloc(p, R * size); 23 | 24 | if (!p) 25 | error("unable to reallocate a %llu array.", (unsigned long long)R); 26 | 27 | return p; 28 | 29 | }/*REALLOC1D*/ 30 | 31 | void BN_Free1D(void *p) { 32 | 33 | free(p); 34 | 35 | }/*FREE1D*/ 36 | 37 | void **Calloc2D(size_t R, size_t C, size_t size) { 38 | 39 | void **p = NULL; 40 | 41 | /* no corner cases, both dimensions required to be positive. */ 42 | if ((R == 0) || (C == 0)) 43 | error("trying to allocate a %llux%llu two-dimensional array.", 44 | (unsigned long long)R, (unsigned long long)C); 45 | 46 | p = Calloc1D(R, sizeof(void *)); 47 | 48 | for (int i = 0; i < R; i++) 49 | p[i] = Calloc1D(C, size); 50 | 51 | return p; 52 | 53 | }/*CALLOC2D*/ 54 | 55 | void BN_Free2D(void **p, size_t R) { 56 | 57 | for (int i = 0; i < R; i++) 58 | free(p[i]); 59 | free(p); 60 | 61 | }/*FREE2D*/ 62 | 63 | void ***Calloc3D(size_t R, size_t C, size_t L, size_t size) { 64 | 65 | void ***p = NULL; 66 | 67 | /* no corner cases, all three dimensions required to be positive. */ 68 | if ((R == 0) || (C == 0) || (L == 0)) 69 | error("trying to allocate a %llux%llux%llu three-dimensional array.", 70 | (unsigned long long)R, (unsigned long long)C, (unsigned long long)L); 71 | 72 | p = Calloc1D(R, sizeof(void *)); 73 | for (int i = 0; i < R; i++) 74 | p[i] = Calloc2D(C, L, size); 75 | 76 | return p; 77 | 78 | }/*CALLOC3D*/ 79 | 80 | void BN_Free3D(void ***p, size_t R, size_t C) { 81 | 82 | for (int i = 0; i < R; i++) 83 | BN_Free2D(p[i], C); 84 | free(p); 85 | 86 | }/*FREE3D*/ 87 | 88 | -------------------------------------------------------------------------------- /R/sanitization-classifiers.R: -------------------------------------------------------------------------------- 1 | 2 | # sanitize the extra arguments passed to Bayesian classifiers. 3 | check.classifier.args = function(method, data, training, explanatory, 4 | extra.args) { 5 | 6 | # check the label of the mutual information estimator. 7 | if (has.argument(method, "estimator", learning.extra.args)) 8 | extra.args[["estimator"]] = 9 | check.mi.estimator(extra.args[["estimator"]], data = data) 10 | 11 | # check the node to use the root of the tree (if not specified pick the first 12 | # explanatory variable assuming natural ordering). 13 | if (has.argument(method, "root", learning.extra.args)) { 14 | 15 | if (!is.null(extra.args[["root"]])) 16 | check.nodes(extra.args[["root"]], graph = explanatory, max.nodes = 1) 17 | else 18 | extra.args[["root"]] = explanatory[1] 19 | 20 | }#THEN 21 | 22 | # warn about and remove unused arguments. 23 | extra.args = check.unused.args(extra.args, learning.extra.args[[method]]) 24 | 25 | return(extra.args) 26 | 27 | }#CHECK.CLASSIFIER.ARGS 28 | 29 | # check a prior distribution against the observed variable. 30 | check.classifier.prior = function(prior, training) { 31 | 32 | if (missing(prior) || is.null(prior)) { 33 | 34 | # use the empirical probabilities in the fitted network, or a flat prior 35 | # as a last resort. 36 | if (is(training, c("bn.fit.dnode", "bn.fit.onode"))) 37 | prior = training$prob 38 | else 39 | prior = rep(1, nlevels(training)) 40 | 41 | }#THEN 42 | else { 43 | 44 | if (is(training, c("bn.fit.dnode", "bn.fit.onode"))) 45 | nlvls = dim(training$prob)[1] 46 | else 47 | nlvls = nlevels(training) 48 | 49 | if (length(prior) != nlvls) 50 | stop("the prior distribution and the training variable have a different number of levels.") 51 | if (!is.nonnegative.vector(prior)) 52 | stop("the prior distribution must be expressed as a probability vector.") 53 | 54 | # make sure the prior probabilities sum to one. 55 | prior = prior / sum(prior) 56 | 57 | }#ELSE 58 | 59 | return(prior) 60 | 61 | }#CHECK.CLASSIFIER.PRIOR 62 | 63 | -------------------------------------------------------------------------------- /src/tests/omnibus/custom.test.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../tests.h" 3 | 4 | double custom_test_function(SEXP x, SEXP y, SEXP z, SEXP data, SEXP custom_fn, 5 | SEXP custom_args, double *pvalue) { 6 | 7 | double statistic = 0, temp_pvalue = 0; 8 | SEXP call, args_iterator, result; 9 | 10 | /* allocate and populate the pairlist to be valuated. */ 11 | PROTECT(args_iterator = call = allocLang(6)); 12 | /* first slot, the function name. */ 13 | SETCAR(args_iterator, custom_fn); 14 | args_iterator = CDR(args_iterator); 15 | /* second slot, the label of the first node. */ 16 | SETCAR(args_iterator, x); 17 | args_iterator = CDR(args_iterator); 18 | /* third slot, the label of the second node. */ 19 | SETCAR(args_iterator, y); 20 | args_iterator = CDR(args_iterator); 21 | /* fourth slot, the labels of the conditioning set (NULL if empty). */ 22 | SETCAR(args_iterator, z); 23 | args_iterator = CDR(args_iterator); 24 | /* fifth slot, the data. */ 25 | SETCAR(args_iterator, data); 26 | args_iterator = CDR(args_iterator); 27 | /* sixth slot, the optional arguments passed as a list. */ 28 | SETCAR(args_iterator, custom_args); 29 | /* evaluate the custom score function. */ 30 | PROTECT(result = eval(call, R_GlobalEnv)); 31 | 32 | /* the return value must be a scalar, real number. */ 33 | if ((TYPEOF(result) != REALSXP) || (length(result) != 2)) 34 | error("the test for nodes %s and %s must return two scalar, real values.", 35 | CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0))); 36 | 37 | /* the second element of the return value is a p-value. */ 38 | temp_pvalue = REAL(result)[1]; 39 | if (ISNAN(temp_pvalue)) 40 | error("the test for nodes %s and %s has a NA p-value.", 41 | CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0))); 42 | if ((temp_pvalue < 0) || (temp_pvalue > 1)) 43 | error("the test for nodes %s and %s has a p-value not in [0, 1].", 44 | CHAR(STRING_ELT(x, 0)), CHAR(STRING_ELT(y, 0))); 45 | 46 | statistic = REAL(result)[0]; 47 | *pvalue = temp_pvalue; 48 | 49 | UNPROTECT(2); 50 | 51 | return statistic; 52 | 53 | }/*CUSTOM_TEST_FUNCTION*/ 54 | 55 | 56 | -------------------------------------------------------------------------------- /man/gRain.Rd: -------------------------------------------------------------------------------- 1 | \name{gRain integration} 2 | \alias{gRain integration} 3 | \alias{as.bn.fit} 4 | \alias{as.bn.fit.grain} 5 | \alias{as.bn.grain} 6 | \alias{as.grain} 7 | \alias{as.grain.bn.fit} 8 | \alias{as.grain.bn} 9 | \title{Import and export networks from the gRain package} 10 | \description{ 11 | 12 | Convert \code{bn.fit} objects to \code{grain} objects and vice versa. 13 | 14 | } 15 | \usage{ 16 | \method{as.bn.fit}{grain}(x, including.evidence = FALSE, ...) 17 | \method{as.grain}{bn.fit}(x) 18 | \method{as.bn}{grain}(x, ...) 19 | } 20 | \arguments{ 21 | \item{x}{an object of class \code{grain(code)} (for \code{as.bn.fit}) or 22 | \code{bn.fit()} (for \code{as.grain}).} 23 | \item{including.evidence}{a boolean value. If \code{FALSE}, the \code{grain} 24 | object is converted without considering any evidence that has been set into 25 | it. If \code{TRUE}, any hard evidence is carried over into the \code{bn.fit} 26 | object as a zero-one probability distribution.} 27 | \item{\dots}{extra arguments from the generic method (currently ignored).} 28 | } 29 | \note{ 30 | 31 | Conditional probability tables in \code{grain} objects must be completely 32 | specified; on the other hand, \code{bn.fit} allows \code{NaN} values for 33 | unobserved parents' configurations. Such \code{bn.fit} objects will be 34 | converted to $m$ \code{grain} objects by replacing the missing conditional 35 | probability distributions with uniform distributions. 36 | 37 | Another solution to this problem is to fit another \code{bn.fit} with 38 | \code{method = "bayes"} and a low \code{iss} value, using the same data 39 | and network structure. 40 | 41 | Ordinal nodes will be treated as categorical by \code{as.grain}, 42 | disregarding the ordering of the levels. 43 | 44 | } 45 | \value{ 46 | 47 | An object of class \code{grain} (for \code{as.grain}), \code{bn.fit} (for 48 | \code{as.bn.fit}) or \code{bn} (for \code{as.bn}). 49 | 50 | } 51 | \examples{ 52 | \dontrun{ 53 | library(gRain) 54 | a = bn.fit(hc(learning.test), learning.test) 55 | b = as.grain(a) 56 | c = as.bn.fit(b)}} 57 | \author{Marco Scutari} 58 | \keyword{utilities} 59 | \keyword{interfaces to other packages} 60 | -------------------------------------------------------------------------------- /man/bf.Rd: -------------------------------------------------------------------------------- 1 | \name{BF} 2 | \alias{BF} 3 | \title{Bayes factor between two network structures} 4 | \description{ 5 | 6 | Compute the Bayes factor between the structures of two Bayesian networks.. 7 | 8 | } 9 | \usage{ 10 | BF(num, den, data, score, ..., log = TRUE) 11 | } 12 | \arguments{ 13 | \item{num, den}{two objects of class \code{bn}, corresponding to the numerator 14 | and the denominator models in the Bayes factor.} 15 | \item{data}{a data frame containing the data to be used to compute the Bayes 16 | factor.} 17 | \item{score}{a character string, the label of a posterior network score or 18 | \code{custom} for the custom score. If none is specified, the default score 19 | is the \emph{Bayesian Dirichlet equivalent} score (\code{bde}) for discrete 20 | networks and the \emph{Bayesian Gaussian score} (\code{bge}) for Gaussian 21 | networks. Other kinds of Bayesian networks are not currently supported.} 22 | \item{\dots}{extra tuning arguments for the posterior scores. See 23 | \code{\link{score}} for details.} 24 | \item{log}{a boolean value. If \code{TRUE} the Bayes factor is given as 25 | log(BF).} 26 | } 27 | \note{ 28 | 29 | The Bayes factor for two network structures, by definition, is the ratio of 30 | the respective marginal likelihoods which is equivalent to the ration of 31 | the corresponding posterior probabilities if we assume the \code{uniform} 32 | prior over all possible DAGs. However, note that it is possible to specify 33 | different priors using the \dQuote{\code{...}} arguments of \code{BF()}; in 34 | that case the value returned by the function will not be the classic Bayes 35 | factor. 36 | 37 | } 38 | \value{ 39 | 40 | A single numeric value, the Bayes factor of the two network structures 41 | \code{num} and \code{den}. 42 | 43 | } 44 | \examples{ 45 | data(learning.test) 46 | 47 | dag1 = model2network("[A][B][F][C|B][E|B][D|A:B:C]") 48 | dag2 = model2network("[A][C][B|A][D|A][E|D][F|A:C:E]") 49 | BF(dag1, dag2, learning.test, score = "bds", iss = 1) 50 | } 51 | \seealso{\code{\link{score}}, \code{\link{compare}}, 52 | \code{\link{bf.strength}}.} 53 | \author{Marco Scutari} 54 | \keyword{network scores} 55 | \keyword{structure learning} 56 | -------------------------------------------------------------------------------- /R/frontend-queries.R: -------------------------------------------------------------------------------- 1 | 2 | # perform conditional probability queries. 3 | cpquery = function(fitted, event, evidence, cluster = NULL, method = "ls", ..., 4 | debug = FALSE) { 5 | 6 | check.fit(fitted) 7 | check.logical(debug) 8 | # check event and evidence. 9 | if (missing(event)) 10 | stop("the expression describing the event is missing.") 11 | if (missing(evidence)) 12 | stop("the expression describing the evidence is missing.") 13 | # check the generation method. 14 | check.label(method, choices = cpq.algorithms, labels = cpq.labels, 15 | argname = "query method", see = "cpquery") 16 | # check the cluster. 17 | cluster = check.cluster(cluster) 18 | 19 | if (!is.null(cluster)) { 20 | 21 | # disable debugging, the slaves do not cat() here. 22 | if (debug) { 23 | 24 | warning("disabling debugging output for parallel computing.") 25 | debug = FALSE 26 | 27 | }#THEN 28 | 29 | }#THEN 30 | 31 | extra.args = check.cpq.args(fitted = fitted, event = NULL, 32 | extra.args = list(...), method = method, action = "cpquery") 33 | 34 | # deparse the expression for the event before passing it to 35 | # the backend and beyond. 36 | event = substitute(event) 37 | 38 | # recheck event and evidence expression after deparsing. 39 | if (!(is.language(event) || identical(event, TRUE))) 40 | stop("event must be an unevaluated expression or TRUE.") 41 | if (method == "ls") { 42 | 43 | if (missing(evidence)) 44 | stop("the expression describing the evidence is missing.") 45 | 46 | # deparse evidence expression before passing it to the backend and beyond. 47 | evidence = substitute(evidence) 48 | # recheck event and evidence expression after deparsing. 49 | if (!(is.language(evidence) || identical(evidence, TRUE))) 50 | stop("evidence must be an unevaluated expression or TRUE.") 51 | 52 | }#THEN 53 | else if (method == "lw") { 54 | 55 | evidence = check.evidence(evidence, fitted) 56 | 57 | }#THEN 58 | 59 | # special-case pointless queries. 60 | if (isTRUE(event)) 61 | return(1) 62 | 63 | conditional.probability.query(fitted = fitted, event = event, 64 | evidence = evidence, method = method, extra = extra.args, cluster = cluster, 65 | debug = debug) 66 | 67 | }#CPQUERY 68 | 69 | -------------------------------------------------------------------------------- /src/bnlearn/nparams.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../minimal/common.h" 4 | #include "../math/linear.algebra.h" 5 | 6 | /* get the number of parameters of the whole network (mixed case, also handles 7 | * discrete and Gaussian networks). */ 8 | SEXP nparams_cgnet(SEXP graph, SEXP data, SEXP debug) { 9 | 10 | int i = 0, j = 0, nnodes = 0; 11 | int *nlevels = NULL, *index = NULL, ngp = 0; 12 | double nconfig = 0, node_params = 0, all_params = 0; 13 | bool debugging = isTRUE(debug); 14 | SEXP nodes = R_NilValue, node_data, parents, temp; 15 | 16 | /* get nodes' number and data. */ 17 | node_data = getListElement(graph, "nodes"); 18 | nnodes = length(node_data); 19 | PROTECT(nodes = getAttrib(node_data, R_NamesSymbol)); 20 | /* cache the number of levels of each variables (zero = continuous). */ 21 | nlevels = Calloc1D(nnodes, sizeof(int)); 22 | for (i = 0; i < nnodes; i++) { 23 | 24 | temp = VECTOR_ELT(data, i); 25 | if (TYPEOF(temp) == INTSXP) 26 | nlevels[i] = NLEVELS(temp); 27 | 28 | }/*FOR*/ 29 | 30 | for (i = 0; i < nnodes; i++) { 31 | 32 | /* extract the parents of the node and match them. */ 33 | parents = getListElement(VECTOR_ELT(node_data, i), "parents"); 34 | PROTECT(temp = match(nodes, parents, 0)); 35 | index = INTEGER(temp); 36 | 37 | /* compute the number of regressors and of configurations. */ 38 | for (j = 0, ngp = 0, nconfig = 1; j < length(parents); j++) { 39 | 40 | if (nlevels[index[j] - 1] == 0) 41 | ngp++; 42 | else 43 | nconfig *= nlevels[index[j] - 1]; 44 | 45 | }/*FOR*/ 46 | /* compute the overall number of parameters as regressors plus intercept 47 | * and standard error (if continuous) or the number of levels - 1 (if 48 | * discrete) times the configurations of the discrete parents. */ 49 | node_params = nconfig * (nlevels[i] == 0 ? ngp + 2 : nlevels[i] - 1); 50 | 51 | if (debugging) 52 | Rprintf("* node %s has %.0lf parameter(s).\n", NODE(i), node_params); 53 | 54 | /* update the return value. */ 55 | all_params += node_params; 56 | 57 | UNPROTECT(1); 58 | 59 | }/*FOR*/ 60 | 61 | Free1D(nlevels); 62 | UNPROTECT(1); 63 | 64 | return ScalarReal(all_params); 65 | 66 | }/*NPARAMS_CGNET*/ 67 | 68 | 69 | -------------------------------------------------------------------------------- /man/foreign.Rd: -------------------------------------------------------------------------------- 1 | \name{foreign files utilities} 2 | \alias{read.bif} 3 | \alias{write.bif} 4 | \alias{read.dsc} 5 | \alias{write.dsc} 6 | \alias{read.net} 7 | \alias{write.net} 8 | \alias{write.dot} 9 | \title{Read and write BIF, NET, DSC and DOT files} 10 | \description{ 11 | 12 | Read networks saved from other programs into \code{bn.fit} objects, and dump 13 | \code{bn} and \code{bn.fit} objects into files for other programs to read. 14 | 15 | } 16 | \usage{ 17 | # Old (non-XML) Bayesian Interchange format. 18 | read.bif(file, debug = FALSE) 19 | write.bif(file, fitted) 20 | 21 | # Microsoft Interchange format. 22 | read.dsc(file, debug = FALSE) 23 | write.dsc(file, fitted) 24 | 25 | # HUGIN flat network format. 26 | read.net(file, debug = FALSE) 27 | write.net(file, fitted) 28 | 29 | # Graphviz DOT format. 30 | write.dot(file, graph) 31 | } 32 | \arguments{ 33 | \item{file}{a connection object or a character string.} 34 | \item{fitted}{an object of class \code{bn.fit}.} 35 | \item{graph}{an object of class \code{bn} or \code{bn.fit}.} 36 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 37 | printed; otherwise the function is completely silent.} 38 | } 39 | \note{ 40 | 41 | All the networks present in the Bayesian Network Repository have associated 42 | BIF, DSC and NET files that can be imported with \code{read.bif()}, 43 | \code{read.dsc()} and \code{read.net()}. 44 | 45 | HUGIN can import and export NET files; Netica can read (but not write) DSC 46 | files; and GeNIe can read and write both DSC and NET files. 47 | 48 | DOT files can be read by Graphviz, Gephi and a variety of other programs. 49 | 50 | Please note that these functions work on a "best effort" basis, as the parsing 51 | of these formats have been implemented by reverse engineering the file format 52 | from publicly available examples. 53 | 54 | } 55 | \value{ 56 | 57 | \code{read.bif()}, \code{read.dsc()} and \code{read.net()} return an object of class 58 | \code{bn.fit}. 59 | 60 | \code{write.bif()}, \code{write.dsc()}, \code{write.net()} and \code{write.dot()} 61 | return \code{NULL} invisibly. 62 | 63 | } 64 | \references{ 65 | 66 | Bayesian Network Repository, \url{https://www.bnlearn.com/bnrepository/}. 67 | 68 | } 69 | \author{Marco Scutari} 70 | \keyword{import/export to file} 71 | -------------------------------------------------------------------------------- /R/averaged.network.R: -------------------------------------------------------------------------------- 1 | 2 | # model averaging for bootstrapped network structures. 3 | averaged.network.backend = function(strength, threshold) { 4 | 5 | nodes = attr(strength, "nodes") 6 | e = empty.graph(nodes) 7 | 8 | # arcs with a strength of one should always be selected, regardless of 9 | # the threshold. 10 | significant = (strength$strength > threshold) | (strength$strength == 1) 11 | 12 | # filter also the direction if present in the bn.strength object. 13 | if ("direction" %in% names(strength)) 14 | significant = significant & (strength$direction >= 0.5) 15 | 16 | # nothing to see, move along. 17 | if (!any(significant)) 18 | return(e) 19 | 20 | candidate.arcs = as.matrix(strength[significant, c("from", "to"), drop = FALSE]) 21 | 22 | if (all(which.undirected(candidate.arcs))) { 23 | 24 | # update the arcs of the network, no cycles. 25 | e$arcs = candidate.arcs 26 | 27 | }#THEN 28 | else { 29 | 30 | # update the arcs of the network, minding cycles. 31 | e$arcs = .Call(call_smart_network_averaging, 32 | arcs = candidate.arcs, 33 | nodes = nodes, 34 | weights = strength$strength[significant]) 35 | 36 | }#ELSE 37 | 38 | # update the network structure. 39 | e$nodes = cache.structure(nodes, arcs = e$arcs) 40 | # add back illegal arcs, so that cpdag() works correctly. 41 | if ("illegal" %in% names(attributes(strength))) 42 | e$learning$illegal = attr(strength, "illegal") 43 | 44 | return(e) 45 | 46 | }#AVERAGED.NETWORK.BACKEND 47 | 48 | # compute the significance threshold for Friedman's confidence. 49 | threshold = function(strength, method = "l1") { 50 | 51 | # do not blow up with graphs with only 1 node. 52 | if (nrow(strength) == 0) 53 | return(0) 54 | 55 | e = ecdf(strength$strength) 56 | u = knots(e) 57 | 58 | if (method == "l1") { 59 | 60 | norm = function(p) 61 | sum( diff(unique(c(0, u, 1))) * abs(e(unique(c(0, u[u < 1]))) - p)) 62 | 63 | }#THEN 64 | 65 | p0 = optimize(f = norm, interval = c(0, 1))$minimum 66 | 67 | # double-check the boundaries, they are legal solutions but optimize() does 68 | # not check them. 69 | if (norm(1) < norm(p0)) 70 | p0 = 1 71 | if (norm(0) < norm(p0)) 72 | p0 = 0 73 | 74 | quantile(strength$strength, p0, type = 1, names = FALSE) 75 | 76 | }#THRESHOLD 77 | 78 | -------------------------------------------------------------------------------- /man/bn.fit.plots.Rd: -------------------------------------------------------------------------------- 1 | \name{bn.fit plots} 2 | \alias{bn.fit plots} 3 | \alias{bn.fit.qqplot} 4 | \alias{bn.fit.xyplot} 5 | \alias{bn.fit.histogram} 6 | \alias{bn.fit.barchart} 7 | \alias{bn.fit.dotplot} 8 | \title{Plot fitted Bayesian networks} 9 | \description{ 10 | 11 | Plot functions for the \code{bn.fit}, \code{bn.fit.dnode} and 12 | \code{bn.fit.gnode} classes, based on the \pkg{lattice} package. 13 | 14 | } 15 | \usage{ 16 | ## for Gaussian Bayesian networks. 17 | bn.fit.qqplot(fitted, xlab = "Theoretical Quantiles", 18 | ylab = "Sample Quantiles", main, ...) 19 | bn.fit.histogram(fitted, density = TRUE, xlab = "Residuals", 20 | ylab = ifelse(density, "Density", ""), main, ...) 21 | bn.fit.xyplot(fitted, xlab = "Fitted values", ylab = "Residuals", main, ...) 22 | ## for discrete (multinomial and ordinal) Bayesian networks. 23 | bn.fit.barchart(fitted, xlab = "Probabilities", ylab = "Levels", main, ...) 24 | bn.fit.dotplot(fitted, xlab = "Probabilities", ylab = "Levels", main, ...) 25 | } 26 | \arguments{ 27 | \item{fitted}{an object of class \code{bn.fit}, \code{bn.fit.dnode} or 28 | \code{bn.fit.gnode}.} 29 | \item{xlab, ylab, main}{the label of the x axis, of the y axis, and the plot 30 | title.} 31 | \item{density}{a boolean value. If \code{TRUE} the histogram is plotted using 32 | relative frequencies, and the matching normal density is added to the plot.} 33 | \item{\dots}{additional arguments to be passed to \pkg{lattice} functions.} 34 | } 35 | \details{ 36 | 37 | \code{bn.fit.qqplot()} draws a quantile-quantile plot of the residuals. 38 | 39 | \code{bn.fit.histogram()} draws a histogram of the residuals, using either 40 | absolute or relative frequencies. 41 | 42 | \code{bn.fit.xyplot()} plots the residuals versus the fitted values. 43 | 44 | \code{bn.fit.barchart()} and \code{bn.fit.dotplot} plot the probabilities in 45 | the conditional probability table associated with each node. 46 | 47 | } 48 | \value{ 49 | 50 | The \pkg{lattice} plot objects. Note that if auto-printing is turned off (for 51 | example when the code is loaded with the \code{source} function), the return 52 | value must be printed explicitly for the plot to be displayed. 53 | 54 | } 55 | \seealso{ 56 | \code{\link{bn.fit}}, \code{\link{bn.fit class}}. 57 | } 58 | \author{Marco Scutari} 59 | \keyword{interfaces to other packages} 60 | \keyword{plots} 61 | -------------------------------------------------------------------------------- /src/tests/discrete/df.adjust.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../tests.h" 3 | 4 | double discrete_df(test_e test, int *ni, int llx, int *nj, int lly) { 5 | 6 | int i = 0, j = 0, alx = 0, aly = 0; 7 | double df = 0; 8 | 9 | switch(test) { 10 | 11 | /* usual degrees of freedom. */ 12 | case MI: 13 | case MI_SH: 14 | case X2: 15 | df = (llx - 1) * (lly - 1); 16 | break; 17 | 18 | /* adjust degrees of freedom: zeroes are considered structural if they 19 | * are part of a column or a row with a zero marginal, the rest are 20 | * considered sampling zeros. */ 21 | case MI_ADF: 22 | case X2_ADF: 23 | for (i = 0; i < llx; i++) 24 | alx += (ni[i] > 0); 25 | for (j = 0; j < lly; j++) 26 | aly += (nj[j] > 0); 27 | 28 | /* ensure the degrees of freedom will not be negative. */ 29 | alx = (alx >= 1) ? alx : 1; 30 | aly = (aly >= 1) ? aly : 1; 31 | 32 | df = (alx - 1) * (aly - 1); 33 | 34 | break; 35 | 36 | default: 37 | error("no degrees of freedom for this test."); 38 | 39 | }/*SWITCH*/ 40 | 41 | return df; 42 | 43 | }/*DISCRETE_DF*/ 44 | 45 | double discrete_cdf(test_e test, int **ni, int llx, int **nj, int lly, int llz) { 46 | 47 | int i = 0, j = 0, k = 0, alx = 0, aly = 0; 48 | double df = 0; 49 | 50 | switch(test) { 51 | 52 | /* usual degrees of freedom. */ 53 | case MI: 54 | case MI_SH: 55 | case X2: 56 | df = (llx - 1) * (lly - 1) * llz; 57 | break; 58 | 59 | /* adjust degrees of freedom: zeroes are considered structural if they 60 | * are part of a column or a row with a zero marginal, the rest are 61 | * considered sampling zeros. */ 62 | case MI_ADF: 63 | case X2_ADF: 64 | for (k = 0; k < llz; k++) { 65 | 66 | alx = aly = 0; 67 | 68 | for (i = 0; i < llx; i++) 69 | alx += (ni[k][i] > 0); 70 | for (j = 0; j < lly; j++) 71 | aly += (nj[k][j] > 0); 72 | 73 | /* ensure the degrees of freedom will not be negative. */ 74 | alx = (alx >= 1) ? alx : 1; 75 | aly = (aly >= 1) ? aly : 1; 76 | 77 | df += (alx - 1) * (aly - 1); 78 | 79 | }/*FOR*/ 80 | 81 | break; 82 | 83 | default: 84 | error("no degrees of freedom for this test."); 85 | 86 | }/*SWITCH*/ 87 | 88 | return df; 89 | 90 | }/*DISCRETE_CDF*/ 91 | 92 | -------------------------------------------------------------------------------- /man/bnboot.Rd: -------------------------------------------------------------------------------- 1 | \name{bn.boot} 2 | \alias{bn.boot} 3 | \title{Nonparametric bootstrap of Bayesian networks} 4 | \description{ 5 | 6 | Apply a user-specified function to the Bayesian network structures learned 7 | from bootstrap samples of the original data. 8 | 9 | } 10 | \usage{ 11 | bn.boot(data, statistic, R = 200, m = nrow(data), algorithm, 12 | algorithm.args = list(), statistic.args = list(), cluster, 13 | debug = FALSE) 14 | } 15 | \arguments{ 16 | \item{data}{a data frame containing the variables in the model.} 17 | \item{statistic}{a function or a character string (the name of a function) 18 | to be applied to each bootstrap replicate.} 19 | \item{R}{a positive integer, the number of bootstrap replicates.} 20 | \item{m}{a positive integer, the size of each bootstrap replicate.} 21 | \item{algorithm}{a character string, the learning algorithm to be applied 22 | to the bootstrap replicates. See \code{\link{structure learning}} and the 23 | documentation of each algorithm for details.} 24 | \item{algorithm.args}{a list of extra arguments to be passed to the learning 25 | algorithm.} 26 | \item{statistic.args}{a list of extra arguments to be passed to the function 27 | specified by \code{statistic}.} 28 | \item{cluster}{an optional cluster object from package \pkg{parallel}.} 29 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 30 | printed; otherwise the function is completely silent.} 31 | } 32 | \details{ 33 | 34 | The first argument of \code{statistic} is the \code{bn} object encoding the 35 | network structure learned from the bootstrap sample; the arguments specified 36 | in \code{statistics.args} are extracted from the list and passed to 37 | \code{statistics} as the 2nd, 3rd, etc. arguments. 38 | 39 | } 40 | \value{ 41 | 42 | A list containing the results of the calls to \code{statistic}. 43 | 44 | } 45 | \references{ 46 | 47 | Friedman N, Goldszmidt M, Wyner A (1999). "Data Analysis with Bayesian 48 | Networks: A Bootstrap Approach." \emph{Proceedings of the 15th Annual 49 | Conference on Uncertainty in Artificial Intelligence}, 196--201. 50 | 51 | } 52 | \examples{ 53 | \dontrun{ 54 | data(learning.test) 55 | bn.boot(data = learning.test, R = 2, m = 500, algorithm = "gs", 56 | statistic = arcs) 57 | } 58 | } 59 | \author{Marco Scutari} 60 | \seealso{\code{\link{bn.cv}}, \code{\link{rbn}}.} 61 | \keyword{resampling} 62 | -------------------------------------------------------------------------------- /man/bn.kcv.class.Rd: -------------------------------------------------------------------------------- 1 | \name{bn.kcv class} 2 | \alias{bn.kcv class} 3 | \alias{bn.kcv-class} 4 | \alias{bn.kcv.list class} 5 | \alias{bn.kcv.list-class} 6 | \title{The bn.kcv class structure} 7 | \description{ 8 | 9 | The structure of an object of S3 class \code{bn.kcv} or \code{bn.kcv.list}. 10 | 11 | } 12 | \details{ 13 | 14 | An object of class \code{bn.kcv.list} is a list whose elements are objects 15 | of class \code{bn.kcv}. 16 | 17 | An object of class \code{bn.kcv} is a list whose elements correspond to the 18 | iterations of a k-fold cross-validation. Each element contains the following 19 | objects: 20 | 21 | \itemize{ 22 | 23 | \item \code{test}: an integer vector, the indexes of the observations 24 | used as a test set. 25 | \item \code{fitted}: an object of class \code{bn.fit}, the Bayesian network 26 | fitted from the training set. 27 | \item \code{learning}: the \code{learning} element of the \code{bn} object 28 | that was used for parameter learning from the training set (either learned 29 | from the training set as well or specified by the user). 30 | \item \code{loss}: the value of the loss function (for 31 | \code{method = "hold-out"} or \code{loss = "logl"}), or \code{NA} 32 | (otherwise). 33 | 34 | } 35 | 36 | If the loss function requires to predict values from the test sets, each 37 | element also contains: 38 | 39 | \itemize{ 40 | 41 | \item \code{predicted}: a factor or a numeric vector, the predicted values 42 | for the target node in the test set. 43 | \item \code{observed}: a factor or a numeric vector, the observed values 44 | for the target node in the test set. 45 | 46 | } 47 | 48 | In addition, an object of class \code{bn.kcv} has the following attributes: 49 | 50 | \itemize{ 51 | 52 | \item \code{loss}: a character string, the label of the loss function. 53 | \item \code{mean}: the mean of the values of the loss function computed in 54 | the \code{k} iterations of the cross-validation, which is printed as the 55 | "expected loss" or averaged to compute the "average loss over the runs". 56 | \item \code{bn}: either a character string (the label of the learning 57 | algorithm to be applied to the training data in each iteration) or an 58 | object of class \code{bn} (a fixed network structure). 59 | 60 | } 61 | 62 | } 63 | \author{Marco Scutari} 64 | \keyword{classes} 65 | -------------------------------------------------------------------------------- /R/classifiers.R: -------------------------------------------------------------------------------- 1 | 2 | # backend of the naive Bayes classifier. 3 | naive.bayes.backend = function(data, training, explanatory) { 4 | 5 | # cache the node set. 6 | nodes = c(training, explanatory) 7 | # create the empty graph. 8 | res = empty.graph(nodes) 9 | # create the set of arcs outgoing from the training variable. 10 | res$arcs = matrix(c(rep(training, length(explanatory)), explanatory), 11 | ncol = 2, byrow = FALSE) 12 | # update the network structure. 13 | res$nodes = cache.structure(nodes, arcs = res$arcs) 14 | # set a second class "bn.naive" to reroute method dispatch as needed. 15 | class(res) = c("bn.naive", "bn") 16 | 17 | return(res) 18 | 19 | }#NAIVE.BAYES.BACKEND 20 | 21 | # backend of the TAN algorithm. 22 | tan.backend = function(data, training, explanatory, whitelist, blacklist, mi, 23 | root, debug = FALSE) { 24 | 25 | # cache the node set. 26 | nodes = c(training, explanatory) 27 | # create the empty graph. 28 | res = empty.graph(nodes) 29 | # create the set of arcs outgoing from the training variable. 30 | class.arcs = matrix(c(rep(training, length(explanatory)), explanatory), 31 | ncol = 2, byrow = FALSE) 32 | 33 | # separate features and target class variable in data and metadata. 34 | features.data = 35 | .data.frame.column(data, explanatory, drop = FALSE, keep.names = TRUE) 36 | attr(features.data, "metadata") = collect.metadata(features.data) 37 | class.data = .data.frame.column(data, training, drop = TRUE) 38 | 39 | # call chow-liu to build the rest of the network. 40 | chow.liu.arcs = 41 | chow.liu.backend(x = features.data, nodes = explanatory, estimator = mi, 42 | whitelist = whitelist, blacklist = blacklist, conditional = class.data, 43 | debug = debug) 44 | 45 | # set the directions of the arcs in the Chow-Liu tree. 46 | chow.liu.arcs = .Call(call_tree_directions, 47 | arcs = chow.liu.arcs, 48 | nodes = explanatory, 49 | root = root, 50 | debug = FALSE) 51 | 52 | # merge learned and predetermined arcs. 53 | res$arcs = arcs.rbind(class.arcs, chow.liu.arcs) 54 | # update the network structure. 55 | res$nodes = cache.structure(nodes, arcs = res$arcs) 56 | # set a second class "bn.tan" to reroute method dispatch as needed. 57 | class(res) = c("bn.tan", "bn") 58 | 59 | return(res) 60 | 61 | }#TAN.BACKEND 62 | 63 | -------------------------------------------------------------------------------- /man/count.graphs.Rd: -------------------------------------------------------------------------------- 1 | \name{graph enumeration} 2 | \alias{count.graphs} 3 | \alias{graph enumeration} 4 | \title{Count graphs with specific characteristics} 5 | \description{ 6 | Count directed acyclic graphs of various sizes and/or with specific 7 | characteristics. 8 | } 9 | \usage{ 10 | count.graphs(type = "all.dags", ..., debug = FALSE) 11 | } 12 | \arguments{ 13 | \item{type}{a character string, the label describing the types of graphs to 14 | be counted (see below).} 15 | \item{\dots}{additional parameters (see below).} 16 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 17 | printed; otherwise the function is completely silent. Ignored in some 18 | generation methods.} 19 | } 20 | \details{ 21 | 22 | The types of graphs, and the associated additional parameters, are: 23 | 24 | \itemize{ 25 | 26 | \item \code{"all-dags"}: all directed acyclic graphs with \code{nodes} 27 | nodes. 28 | \item \code{"dags-given-ordering"}: all directed acyclic graphs 29 | with a specific topological ordering and \code{nodes} nodes. 30 | \item \code{"dags-with-k-roots"}: all directed acyclic graphs with \code{k} 31 | root nodes and \code{nodes} nodes. 32 | \item \code{"dags-with-r-arcs"}: all directed acyclic graphs with \code{r} 33 | arcs and \code{nodes} nodes. 34 | \item \code{"dags-in-equivalence-class"}: all directed acyclic arcs in the 35 | equivalence class described by \code{eqclass}. 36 | 37 | } 38 | 39 | } 40 | \value{ 41 | 42 | \code{count.graphs()} returns an object of class \code{bigz} from the 43 | \pkg{gmp} package, a vector with the graph counts. 44 | 45 | } 46 | \references{ 47 | 48 | Harary F, Palmer EM (1973). "Graphical Enumeration." Academic Press. 49 | 50 | Rodionov VI (1992). "On the Number of Labeled Acyclic Digraphs." 51 | \emph{Discrete Mathematics}, \strong{105}:319--321. 52 | 53 | Liskovets VA (1976). "On the Number of Maximal Vertices of a Random Acyclic 54 | Digraph." \emph{Theory of Probability and its Applications}, 55 | \strong{20}(2):401--409. 56 | 57 | Wienobst M, Luttermann M, Bannach M, Liskiewicz (2023). "Efficient Enumeration 58 | of Markov Equivalent DAGs." \emph{Proceedings of the 37th AAAI Conference on 59 | Artificial Intelligence}, 12313--12320. 60 | 61 | } 62 | \examples{ 63 | \dontrun{ 64 | count.graphs("dags.with.r.arcs", nodes = 3:6, r = 2) 65 | }} 66 | \author{Marco Scutari} 67 | \keyword{graphs} 68 | -------------------------------------------------------------------------------- /R/sanitization-misc.R: -------------------------------------------------------------------------------- 1 | 2 | # check labels for various arguments. 3 | check.label = function(arg, choices, labels, argname, see) { 4 | 5 | if (!is.string(arg)) 6 | stop("the ", argname, " must be a single character string.") 7 | 8 | if (arg %in% choices) 9 | return(invisible(NULL)) 10 | 11 | # concatenate valid values, optinally with labels. 12 | if (missing(labels)) { 13 | 14 | choices = paste(paste('"', choices, '"', sep = ""), collapse = ", ") 15 | 16 | }#THEN 17 | else { 18 | 19 | labels = paste("(", labels[choices], ")", sep = "") 20 | choices = paste('"', choices, '"', sep = "") 21 | nl = length(labels) 22 | choices = paste(choices, labels, collapse = ", ") 23 | 24 | }#ELSE 25 | 26 | # mention the most relevant manual page. 27 | if (missing(see)) 28 | see = character(0) 29 | else 30 | see = paste(" See ?", see, " for details.", sep = "") 31 | 32 | # build the error message. 33 | errmsg = paste("valid ", argname, "(s) are ", choices, ".", see, sep = "") 34 | 35 | # make sure the error message is not truncated if possible at all. 36 | errlen = unlist(options("warning.length"), use.names = FALSE) 37 | options("warning.length" = max(1000, min(8170, nchar(errmsg) + 20))) 38 | on.exit(options("warning.length" = errlen)) 39 | 40 | stop(errmsg) 41 | 42 | }#CHECK.LABEL 43 | 44 | # warn about unused arguments and remove them. 45 | check.unused.args = function(dots, used.args) { 46 | 47 | if (is(dots, "list")) 48 | args = names(dots) 49 | else 50 | args = dots 51 | 52 | unused = args %!in% used.args 53 | 54 | if (any(unused)) 55 | warning("unused argument(s):", paste0(" '", args[unused], "'"), ".") 56 | 57 | # remove unused args to avoid problems later on (like conflicts in partial 58 | # name matching). 59 | dots = dots[used.args] 60 | 61 | return(dots) 62 | 63 | }#CHECK.UNUSED.ARGS 64 | 65 | # check whether a package is loaded. 66 | check.and.load.package = function(pkg) { 67 | 68 | # silence all warnings while looking for suggested packages. 69 | warning.level = as.numeric(options("warn")) 70 | options("warn" = -1) 71 | on.exit(options("warn" = warning.level)) 72 | 73 | if (!requireNamespace(pkg)) 74 | stop("this function requires the ", pkg, " package.") 75 | 76 | }#CHECK.AND.LOAD.PACKAGE 77 | 78 | # reverse lookup for optional arguments. 79 | has.argument = function(label, arg, lookup) { 80 | 81 | arg %in% lookup[[label]] 82 | 83 | }#HAS.ARGUMENT 84 | -------------------------------------------------------------------------------- /R/frontend-causal.R: -------------------------------------------------------------------------------- 1 | # mutilated network used for interventions and in likelihood weighting. 2 | mutilated = function(x, evidence) { 3 | 4 | check.bn.or.fit(x) 5 | # check the evidence, disallowing non-ideal interventions if needed. 6 | evidence = check.evidence(evidence, graph = x, 7 | ideal.only = is(x, "bn.fit.gnet")) 8 | 9 | if (is(x, "bn")) { 10 | 11 | # the network must be a directed acyclic graph to treat it as causal. 12 | if (!is.completely.directed(x)) 13 | stop("the graph is only partially directed.") 14 | if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE)) 15 | stop("the graph contains cycles.") 16 | 17 | return(mutilated.backend.bn(x, evidence)) 18 | 19 | }#THEN 20 | else { 21 | 22 | return(mutilated.backend.fitted(x, evidence)) 23 | 24 | }#ELSE 25 | 26 | }#MUTILATED 27 | 28 | # twin network used for counterfactuals. 29 | twin = function(x) { 30 | 31 | check.bn.or.fit(x) 32 | 33 | if (is(x, "bn.fit")) 34 | x = bn.net(x) 35 | else { 36 | 37 | # the network must be a directed acyclic graph to treat it as causal. 38 | if (!is.completely.directed(x)) 39 | stop("the graph is only partially directed.") 40 | if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE)) 41 | stop("the graph contains cycles.") 42 | 43 | }#ELSE 44 | 45 | twin.backend.bn(x) 46 | 47 | }#TWIN 48 | 49 | # perform an intervention on a network or fitted network. 50 | intervention = function(x, evidence) { 51 | 52 | # the mutilation encodes the intervention into the network. 53 | mutilated(x = x, evidence = evidence) 54 | 55 | }#INTERVENTION 56 | 57 | # set up a counterfactual network. 58 | counterfactual = function(x, evidence, merging = TRUE) { 59 | 60 | check.bn.or.fit(x) 61 | check.logical(merging) 62 | 63 | # check the evidence, disallowing non-ideal interventions if needed. 64 | evidence = check.evidence(evidence, graph = x, 65 | ideal.only = is(x, "bn.fit.gnet")) 66 | 67 | if (is(x, "bn.fit")) 68 | x = bn.net(x) 69 | else { 70 | 71 | # the network must be a directed acyclic graph to treat it as causal. 72 | if (!is.completely.directed(x)) 73 | stop("the graph is only partially directed.") 74 | if (!is.acyclic(x$arcs, names(x$nodes), directed = TRUE)) 75 | stop("the graph contains cycles.") 76 | 77 | }#ELSE 78 | 79 | counterfactual.backend.bn(x = x, evidence = evidence, merging = merging) 80 | 81 | }#COUNTERFACTUAL 82 | 83 | -------------------------------------------------------------------------------- /src/inference/likelihood.weighting.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../include/globals.h" 3 | #include "../core/math.functions.h" 4 | #include "../fitted/fitted.h" 5 | #include "../core/data.table.h" 6 | #include "loglikelihood/loglikelihood.h" 7 | #include "../minimal/common.h" 8 | 9 | void c_lw_weights(SEXP fitted, SEXP data, int n, double *w, SEXP keep, 10 | bool debugging) { 11 | 12 | int i = 0, max_el = 0; 13 | double maxw = 0; 14 | fitted_bn bn = fitted_network_from_SEXP(fitted); 15 | SEXP metadata, complete_nodes, nodes_in_fitted, keep2; 16 | 17 | memset(w, '\0', n * sizeof(double)); 18 | /* find out which nodes to compute the log-likelihood for, zero-indexed. */ 19 | PROTECT(nodes_in_fitted = getAttrib(fitted, R_NamesSymbol)); 20 | PROTECT(keep2 = match(keep, nodes_in_fitted, 0)); 21 | 22 | /* extract the metadata from the data. */ 23 | PROTECT(metadata = getAttrib(data, BN_MetaDataSymbol)); 24 | PROTECT(complete_nodes = getListElement(metadata, "complete.nodes")); 25 | 26 | if (bn.type == DNET || bn.type == ONET || bn.type == DONET) { 27 | 28 | ddata dt = ddata_from_SEXP(data, 0); 29 | meta_copy_names(&(dt.m), 0, data); 30 | meta_init_flags(&(dt.m), 0, complete_nodes, keep2); 31 | 32 | bysample_discrete_loglikelihood(bn, dt, w, debugging); 33 | 34 | FreeDDT(dt); 35 | 36 | }/*THEN*/ 37 | else if (bn.type == GNET) { 38 | 39 | gdata dt = gdata_from_SEXP(data, 0); 40 | meta_copy_names(&(dt.m), 0, data); 41 | meta_init_flags(&(dt.m), 0, complete_nodes, keep2); 42 | 43 | bysample_gaussian_loglikelihood(bn, dt, w, TRUE, debugging); 44 | 45 | FreeGDT(dt); 46 | 47 | }/*THEN*/ 48 | else if (bn.type == CGNET) { 49 | 50 | cgdata dt = cgdata_from_SEXP(data, 0, 0); 51 | meta_copy_names(&(dt.m), 0, data); 52 | meta_init_flags(&(dt.m), 0, complete_nodes, keep2); 53 | 54 | bysample_clgaussian_loglikelihood(bn, dt, w, TRUE, debugging); 55 | 56 | FreeCGDT(dt); 57 | 58 | }/*THEN*/ 59 | 60 | FreeFittedBN(bn); 61 | 62 | UNPROTECT(4); 63 | 64 | /* rescale before exponentiating them into probabilities (if possible). */ 65 | max_el = d_which_max(w, n); 66 | 67 | if (max_el == NA_INTEGER) 68 | return; 69 | else if ((max_el == 1) && (w[0] == R_NegInf)) 70 | memset(w, '\0', n * sizeof(double)); 71 | else { 72 | 73 | maxw = w[max_el - 1]; 74 | for (i = 0; i < n; i++) 75 | w[i] = exp(w[i] - maxw); 76 | 77 | }/*ELSE*/ 78 | 79 | }/*C_LW_WEIGHTS*/ 80 | 81 | -------------------------------------------------------------------------------- /man/modelstring.Rd: -------------------------------------------------------------------------------- 1 | \name{model string utilities} 2 | \alias{model string utilities} 3 | \alias{modelstring} 4 | \alias{modelstring<-} 5 | \alias{model2network} 6 | \alias{as.character.bn} 7 | \alias{as.bn} 8 | \alias{as.bn.character} 9 | \title{Build a model string from a Bayesian network and vice versa} 10 | \description{ 11 | 12 | Build a model string from a Bayesian network and vice versa. 13 | 14 | } 15 | \usage{ 16 | modelstring(x) 17 | modelstring(x, debug = FALSE) <- value 18 | 19 | model2network(string, ordering = NULL, debug = FALSE) 20 | 21 | \method{as.character}{bn}(x, ...) 22 | \method{as.bn}{character}(x, ...) 23 | } 24 | \arguments{ 25 | \item{x}{an object of class \code{bn}. \code{modelstring()} (but not its 26 | replacement form) accepts also objects of class \code{bn.fit}.} 27 | \item{string}{a character string describing the Bayesian network.} 28 | \item{ordering}{the labels of all the nodes in the graph; their order is the 29 | node ordering used in the construction of the \code{bn} object. If 30 | \code{NULL} the nodes are sorted alphabetically.} 31 | \item{value}{a character string, the same as the \code{string}.} 32 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 33 | printed; otherwise the function is completely silent.} 34 | \item{\dots}{extra arguments from the generic method (currently ignored).} 35 | } 36 | \details{ 37 | 38 | The strings returned by \code{modelstringi()} have the same format as the ones 39 | returned by the \code{modelstring()} function in package \pkg{deal}; network 40 | structures may be easily exported to and imported from that package (via the 41 | \code{model2network} function). 42 | 43 | The format of the model strings is as follows. The local structure of each 44 | node is enclosed in square brackets ("\code{[]}"); the first string is the 45 | label of that node. The parents of the node (if any) are listed after a 46 | ("\code{|}") and separated by colons ("\code{:}"). All nodes (including 47 | isolated and root nodes) must be listed. 48 | 49 | } 50 | \value{ 51 | 52 | \code{model2network()} and \code{as.bn()} return an object of class \code{bn}; 53 | \code{modelstring()} and \code{as.character.bn()} return a character string. 54 | 55 | } 56 | \examples{ 57 | data(learning.test) 58 | dag = hc(learning.test) 59 | dag 60 | modelstring(dag) 61 | dag2 = model2network(modelstring(dag)) 62 | dag2 63 | all.equal(dag, dag2) 64 | } 65 | \author{Marco Scutari} 66 | \keyword{convenience functions} 67 | -------------------------------------------------------------------------------- /R/sanitization-bootstrap.R: -------------------------------------------------------------------------------- 1 | # check bootstrap arguments (when they are passed as variable length args). 2 | check.bootstrap.args = function(extra.args, network, data) { 3 | 4 | # check the number of bootstrap replicates. 5 | extra.args[["R"]] = check.replicates(extra.args[["R"]]) 6 | # check the size of each bootstrap sample. 7 | extra.args[["m"]] = check.bootsize(extra.args[["m"]], data) 8 | # check the learning algorithm. 9 | algorithm = check.learning.algorithm(extra.args[["algorithm"]], bn = network) 10 | # check the extra arguments for the learning algorithm. 11 | algorithm.args = 12 | check.learning.algorithm.args(extra.args[["algorithm.args"]], 13 | algorithm = algorithm, bn = network) 14 | 15 | extra.args[["algorithm"]] = algorithm 16 | extra.args[["algorithm.args"]] = algorithm.args 17 | 18 | # remap additional arguments used in hybrid algorithms. 19 | if (algorithm %in% hybrid.algorithms) { 20 | 21 | # there's no need to sanitize these arguments, it's done either in 22 | # the calls to the structure learning algorithms. 23 | if (is.null(extra.args[["algorithm.args"]][["restrict"]])) 24 | extra.args[["algorithm.args"]][["restrict"]] = network$learning$restrict 25 | if (is.null(extra.args[["algorithm.args"]]$maximize)) 26 | extra.args[["algorithm.args"]][["maximize"]] = network$learning$maximize 27 | if (is.null(extra.args[["algorithm.args"]]$test)) 28 | extra.args[["algorithm.args"]][["test"]] = network$learning$rstest 29 | if (is.null(extra.args[["algorithm.args"]]$score)) 30 | extra.args[["algorithm.args"]][["score"]] = network$learning$maxscore 31 | 32 | }#THEN 33 | 34 | # warn about and remove unused arguments. 35 | extra.args = 36 | check.unused.args(extra.args, c("R", "m", "algorithm", "algorithm.args")) 37 | 38 | return(extra.args) 39 | 40 | }#CHECK.BOOTSTRAP.ARGS 41 | 42 | # check the number of bootstrap replicates. 43 | check.replicates = function(R, default = 200) { 44 | 45 | if (missing(R) || is.null(R)) 46 | R = default 47 | else if (!is.positive.integer(R)) 48 | stop("the number of bootstrap replicates must be a positive integer.") 49 | 50 | return(R) 51 | 52 | }#CHECK.REiPLICATES 53 | 54 | # check the size of bootstrap replicates. 55 | check.bootsize = function(m, data) { 56 | 57 | if (missing(m) || is.null(m)) 58 | m = nrow(data) 59 | else if (!is.positive.integer(m)) 60 | stop("bootstrap sample size must be a positive integer.") 61 | 62 | return(m) 63 | 64 | }#CHECK.BOOTSIZE 65 | -------------------------------------------------------------------------------- /src/core/contingency.tables.h: -------------------------------------------------------------------------------- 1 | #ifndef CONTINGENCY_TABLES_HEADER 2 | #define CONTINGENCY_TABLES_HEADER 3 | 4 | /* one-dimensional contingency table. */ 5 | typedef struct { 6 | 7 | int llx; /* first (and only) dimension. */ 8 | int nobs; /* total count over all cells. */ 9 | int *n; /* contingency table. */ 10 | 11 | } counts1d; 12 | 13 | /* two-dimensional contingency table. */ 14 | typedef struct{ 15 | 16 | int llx; /* first dimension. */ 17 | int lly; /* second dimension */ 18 | int nobs; /* total count over all cells. */ 19 | int **n; /* contingency table. */ 20 | int *ni; /* marginal counts for the first dimension. */ 21 | int *nj; /* marginal counts for the second dimension. */ 22 | 23 | } counts2d; 24 | 25 | /* three-dimensional contingency table, as an array of two-dimensional tables 26 | * spanning the third dimension. */ 27 | typedef struct { 28 | 29 | int llx; /* first dimension. */ 30 | int lly; /* second dimension */ 31 | int llz; /* third dimension. */ 32 | int nobs; /* total count over all cells. */ 33 | int ***n; /* contingency table. */ 34 | int **ni; /* marginal counts for the first dimension. */ 35 | int **nj; /* marginal counts for the second dimension. */ 36 | int *nk; /* marginal counts for the third dimension. */ 37 | 38 | } counts3d; 39 | 40 | counts1d new_1d_table(int llx); 41 | counts2d new_2d_table(int llx, int lly, bool margins); 42 | counts3d new_3d_table(int llx, int lly, int llz); 43 | 44 | void fill_1d_table(int *xx, counts1d *table, int num); 45 | void fill_2d_table(int *xx, int *yy, counts2d *table, int num); 46 | void fill_3d_table(int *xx, int *yy, int *zz, counts3d *table, int num); 47 | 48 | void refill_1d_table(int *xx, counts1d *table, int num); 49 | void refill_2d_table(int *xx, int *yy, counts2d *table, int num); 50 | void refill_3d_table(int *xx, int *yy, int *zz, counts3d *table, int num); 51 | 52 | void resize_1d_table(int llx, counts1d *table); 53 | void resize_2d_table(int llx, int lly, counts2d *table); 54 | void resize_3d_table(int llx, int lly, int llz, counts3d *table); 55 | 56 | void print_1d_table(counts1d table); 57 | void print_2d_table(counts2d table); 58 | void print_3d_table(counts3d table); 59 | 60 | void Free1DTAB(counts1d table); 61 | void Free2DTAB(counts2d table); 62 | void Free3DTAB(counts3d table); 63 | 64 | void rcounts2d(counts2d table, double *fact, int *workspace); 65 | void rcounts3d(counts3d table, double *fact, int *workspace); 66 | 67 | #endif 68 | -------------------------------------------------------------------------------- /man/blacklist.Rd: -------------------------------------------------------------------------------- 1 | \name{utilities for whitelists and blacklists} 2 | \alias{whitelist} 3 | \alias{blacklist} 4 | \alias{ordering2blacklist} 5 | \alias{tiers2blacklist} 6 | \alias{set2blacklist} 7 | \title{Get or create whitelists and blacklists} 8 | \description{ 9 | 10 | Extract whitelists and blacklists from an object of class \code{bn}, or create 11 | them for use in structure learning. 12 | 13 | } 14 | \usage{ 15 | whitelist(x) 16 | blacklist(x) 17 | 18 | ordering2blacklist(nodes) 19 | tiers2blacklist(tiers) 20 | set2blacklist(set) 21 | } 22 | \arguments{ 23 | \item{x}{an object of class \code{bn}.} 24 | \item{nodes,set}{a vector of character strings, the labels of the nodes.} 25 | \item{tiers}{a vector of character strings or a list, see below.} 26 | 27 | } 28 | \details{ 29 | 30 | \code{ordering2blacklist()} takes a vector of character strings (the labels 31 | of the nodes), which specifies a complete node ordering. An object of class 32 | \code{bn} or \code{bn.fit}; in that case, the node ordering is derived by the 33 | graph. In both cases, the blacklist returned by \code{ordering2blacklist()} 34 | contains all the possible arcs that violate the specified node ordering. 35 | 36 | \code{tiers2blacklist()} takes (again) a vector of character strings (the 37 | labels of the nodes), which specifies a complete node ordering, or a list of 38 | character vectors, which specifies a partial node ordering. In the latter 39 | case, all arcs going from a node in a particular element of the list 40 | (sometimes known as \emph{tier}) to a node in one of the previous elements 41 | are blacklisted. Arcs between nodes in the same element are not blacklisted. 42 | 43 | \code{set2blacklist()} creates a blacklist containing all the arcs between any 44 | two of the nodes whose labels are passed as the argument \code{set}. 45 | 46 | } 47 | \value{ 48 | 49 | \code{whitelist()} and \code{blacklist()} return a matrix of character string 50 | with two columns, named \code{from} and \code{to}, if whitelist or a blacklist 51 | have been used to learn the \code{bn} object passed as their argument. 52 | 53 | \code{ordering2blacklist()}, \code{tiers2blacklist()} and 54 | \code{set2blacklist()} return a sanitized \code{blacklist} (a two-column 55 | matrix, whose columns are labeled \code{from} and \code{to}). 56 | 57 | } 58 | \examples{ 59 | tiers2blacklist(list(LETTERS[1:3], LETTERS[4:6])) 60 | set2blacklist(LETTERS[1:3]) 61 | ordering2blacklist(LETTERS[1:6]) 62 | } 63 | 64 | \author{Marco Scutari} 65 | \keyword{convenience functions} 66 | \keyword{structure learning} 67 | -------------------------------------------------------------------------------- /man/coronary.Rd: -------------------------------------------------------------------------------- 1 | \name{coronary} 2 | \docType{data} 3 | \alias{coronary} 4 | \title{Coronary heart disease data set} 5 | \description{ 6 | 7 | Probable risk factors for coronary thrombosis, comprising data from 1841 men. 8 | 9 | } 10 | \usage{ 11 | data(coronary) 12 | } 13 | \format{ 14 | 15 | The \code{coronary} data set contains the following 6 variables: 16 | \itemize{ 17 | 18 | \item \code{Smoking} (\emph{smoking}): a two-level factor with levels 19 | \code{no} and \code{yes}. 20 | \item \code{M. Work} (\emph{strenuous mental work}): a two-level factor 21 | with levels \code{no} and \code{yes}. 22 | \item \code{P. Work} (\emph{strenuous physical work}): a two-level factor 23 | with levels \code{no} and \code{yes}. 24 | \item \code{Pressure} (\emph{systolic blood pressure}): a two-level factor 25 | with levels \code{<140} and \code{>140}. 26 | \item \code{Proteins} (\emph{ratio of beta and alpha lipoproteins}): a 27 | two-level factor with levels \code{<3} and \code{>3}. 28 | \item \code{Family} (\emph{family anamnesis of coronary heart disease}): a 29 | two-level factor with levels \code{neg} and \code{pos}. 30 | 31 | } 32 | 33 | } 34 | \source{ 35 | 36 | Edwards DI (2000). \emph{Introduction to Graphical Modelling}. Springer, 2nd 37 | edition. 38 | 39 | Reinis Z, Pokorny J, Basika V, Tiserova J, Gorican K, Horakova D, Stuchlikova 40 | E, Havranek T, Hrabovsky F (1981). "Prognostic Significance of the Risk 41 | Profile in the Prevention of Coronary Heart Disease." \emph{Bratisl Lek 42 | Listy}, \strong{76}:137--150. Published on Bratislava Medical Journal, 43 | in Czech. 44 | 45 | Whittaker J (1990). \emph{Graphical Models in Applied Multivariate 46 | Statistics}. Wiley. 47 | 48 | } 49 | \examples{ 50 | 51 | # This is the undirected graphical model from Whittaker (1990). 52 | data(coronary) 53 | ug = empty.graph(names(coronary)) 54 | arcs(ug, check.cycles = FALSE) = matrix( 55 | c("Family", "M. Work", "M. Work", "Family", 56 | "M. Work", "P. Work", "P. Work", "M. Work", 57 | "M. Work", "Proteins", "Proteins", "M. Work", 58 | "M. Work", "Smoking", "Smoking", "M. Work", 59 | "P. Work", "Smoking", "Smoking", "P. Work", 60 | "P. Work", "Proteins", "Proteins", "P. Work", 61 | "Smoking", "Proteins", "Proteins", "Smoking", 62 | "Smoking", "Pressure", "Pressure", "Smoking", 63 | "Pressure", "Proteins", "Proteins", "Pressure"), 64 | ncol = 2, byrow = TRUE, 65 | dimnames = list(c(), c("from", "to"))) 66 | \dontrun{graphviz.plot(ug, shape = "ellipse")} 67 | } 68 | \keyword{datasets} 69 | -------------------------------------------------------------------------------- /man/whitelists.and.blacklists.Rd: -------------------------------------------------------------------------------- 1 | \name{whitelists-blacklists} 2 | \alias{whitelists-blacklists} 3 | \alias{whitelists and blacklists} 4 | \title{Whitelists and blacklists in structure learning} 5 | \description{ 6 | 7 | How whitelists and blacklists are used in structure learning. 8 | 9 | } 10 | \section{Constraint-based Algorithms}{ 11 | 12 | Constraint-based algorithms support arc whitelisting and blacklisting as 13 | follows: 14 | 15 | \itemize{ 16 | 17 | \item blacklisted arcs are never present in the learned graph. 18 | \item arcs whitelisted in one direction only (i.e. 19 | \eqn{A \rightarrow B}{A -> B} is whitelisted but 20 | \eqn{B \rightarrow A}{B -> A} is not) have the respective reverse arcs 21 | blacklisted, and are always present in the learned graph. 22 | \item arcs whitelisted in both directions (i.e. both 23 | \eqn{A \rightarrow B}{A -> B} and \eqn{B \rightarrow A}{B -> A} are 24 | whitelisted) are present in the learned graph, but their direction is set 25 | by the learning algorithm. 26 | 27 | } 28 | 29 | Any arc whitelisted and blacklisted at the same time is assumed to be 30 | whitelisted, and is thus removed from the blacklist. 31 | 32 | } 33 | \section{Score-based Algorithms}{ 34 | 35 | Score-based algorithms support arc whitelisting and blacklisting as follows: 36 | 37 | \itemize{ 38 | 39 | \item blacklisted arcs are never present in the learned graph. 40 | \item arcs can only be whitelisted in a single direction, and are always 41 | present in the learned graph; it is not possible to whitelist arcs in both 42 | directions. 43 | 44 | } 45 | 46 | } 47 | \section{Hybrid Algorithms}{ 48 | 49 | Hybrid algorithms use constraint-based (or pairwise mutual information) 50 | algorithms in the \emph{restrict phase} and score-based algorithms in the 51 | \emph{maximize phase}. Hence whitelists and blacklists are supported as 52 | follows: 53 | 54 | \itemize{ 55 | 56 | \item whitelists and blacklists should be specified for the algorithm used 57 | in the restrict phase. 58 | \item if the whitelist contains any undirected arc, its consistent extension 59 | is used instead in the maximize phase. 60 | 61 | } 62 | 63 | } 64 | \section{Pairwise Mutual Information Algorithms}{ 65 | 66 | In algorithms that learn undirected graphs, such as ARACNE and Chow-Liu, arcs 67 | are treated as being whitelisted or blacklisted in both directions even if 68 | only one direction is listed in the whitelist or blacklist. Again blacklisted 69 | arcs are never present in the learned graph and whitelisted arcs are 70 | guaranteed to be present in the learned graph. 71 | 72 | } 73 | \keyword{package} 74 | -------------------------------------------------------------------------------- /src/tests/rinterface/htest.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../minimal/strings.h" 3 | #include "../../minimal/common.h" 4 | #include "../tests.h" 5 | 6 | SEXP c_create_htest(double stat, SEXP test, double pvalue, double df, 7 | SEXP extra_args) { 8 | 9 | test_e test_type = test_to_enum(CHAR(STRING_ELT(test, 0))); 10 | SEXP result, s, n, B, params; 11 | 12 | /* allocate the return value. */ 13 | PROTECT(result = allocVector(VECSXP, 7)); 14 | /* set the class. */ 15 | setAttrib(result, R_ClassSymbol, mkString("htest")); 16 | /* set the names of the elements. */ 17 | setAttrib(result, R_NamesSymbol, mkStringVec(7, "statistic", "p.value", 18 | "method", "null.value", "alternative", "data.name", "parameter")); 19 | 20 | /* set the test statistic. */ 21 | PROTECT(s = ScalarReal(stat)); 22 | setAttrib(s, R_NamesSymbol, test); 23 | SET_VECTOR_ELT(result, 0, s); 24 | 25 | /* set the p-value. */ 26 | SET_VECTOR_ELT(result, 1, ScalarReal(pvalue)); 27 | 28 | /* set the label of the test. */ 29 | SET_VECTOR_ELT(result, 2, mkString("")); 30 | 31 | /* set the value of the statistic under the null. */ 32 | PROTECT(n = ScalarReal(0)); 33 | setAttrib(n, R_NamesSymbol, mkString("value")); 34 | SET_VECTOR_ELT(result, 3, n); 35 | 36 | /* set the alternative hypothesis. */ 37 | if (test_type == CUSTOM_T) 38 | SET_VECTOR_ELT(result, 4, R_NilValue); 39 | else if (IS_TWO_SIDED(test_type)) 40 | SET_VECTOR_ELT(result, 4, mkString("two.sided")); 41 | else 42 | SET_VECTOR_ELT(result, 4, mkString("greater")); 43 | 44 | /* set the data description string. */ 45 | SET_VECTOR_ELT(result, 5, mkString("")); 46 | 47 | /* save the relevant parameters of the test. */ 48 | B = getListElement(extra_args, "B"); 49 | 50 | if (B != R_NilValue) { 51 | 52 | if (ISNAN(df)) { 53 | 54 | PROTECT(params = ScalarReal(INT(B))); 55 | setAttrib(params, R_NamesSymbol, mkString("Monte Carlo samples")); 56 | SET_VECTOR_ELT(result, 6, params); 57 | UNPROTECT(1); 58 | 59 | }/*THEN*/ 60 | else { 61 | 62 | PROTECT(params = allocVector(REALSXP, 2)); 63 | REAL(params)[0] = df; 64 | REAL(params)[1] = INT(B); 65 | setAttrib(params, R_NamesSymbol, mkStringVec(2, "df", "Monte Carlo samples")); 66 | SET_VECTOR_ELT(result, 6, params); 67 | UNPROTECT(1); 68 | 69 | }/*ELSE*/ 70 | 71 | }/*THEN*/ 72 | else { 73 | 74 | if (!ISNAN(df)) { 75 | 76 | PROTECT(params = ScalarReal(df)); 77 | setAttrib(params, R_NamesSymbol, mkString("df")); 78 | SET_VECTOR_ELT(result, 6, params); 79 | UNPROTECT(1); 80 | 81 | }/*THEN*/ 82 | 83 | }/*ELSE*/ 84 | 85 | UNPROTECT(3); 86 | 87 | return result; 88 | 89 | }/*C_CREATE_HTEST*/ 90 | 91 | -------------------------------------------------------------------------------- /R/bootstrap.R: -------------------------------------------------------------------------------- 1 | 2 | # simple nonparametric bootstrap implementation. 3 | bootstrap.backend = function(data, statistic, R, m, algorithm, 4 | algorithm.args = list(), statistic.args = list(), cluster = NULL, 5 | debug = FALSE) { 6 | 7 | # allocate the result list. 8 | res = as.list(seq(R)) 9 | 10 | bootstrap.replicate = function(r, data, m, algorithm, algorithm.args, 11 | statistic, statistic.args, debug) { 12 | 13 | if (debug) { 14 | 15 | cat("----------------------------------------------------------------\n") 16 | cat("* bootstrap replicate", r, ".\n") 17 | 18 | }#THEN 19 | 20 | # generate the r-th bootstrap sample by resampling with replacement. 21 | resampling = sample(nrow(data), m, replace = TRUE) 22 | 23 | # user-provided lists of manipulated observations for the mbde score must 24 | # be remapped to match the bootstrap sample. 25 | if (!is.null(algorithm.args$score) && (algorithm.args$score == "mbde") && 26 | !is.null(algorithm.args$exp)) { 27 | 28 | algorithm.args$exp = lapply(algorithm.args$exp, function(x) { 29 | 30 | x = match(x, resampling) 31 | x = x[!is.na(x)] 32 | 33 | }) 34 | 35 | }#THEN 36 | 37 | # generate the bootstrap sample. 38 | replicate = data[resampling, , drop = FALSE] 39 | 40 | if (debug) 41 | cat("* learning bayesian network structure.\n") 42 | 43 | # learn the network structure from the bootstrap sample. 44 | bn = do.call(algorithm, c(list(x = replicate), algorithm.args)) 45 | 46 | if (debug) { 47 | 48 | print(bn) 49 | cat("* computing user-defined statistic.\n") 50 | 51 | }#THEN 52 | 53 | # apply the user-defined function to the newly-learned bayesian network; 54 | # the bayesian network is passed as the first argument hoping it will end 55 | # at the right place thanks to the positional matching. 56 | res = do.call(statistic, c(list(bn), statistic.args)) 57 | 58 | if (debug) { 59 | 60 | cat(" > the function returned:\n") 61 | print(res) 62 | 63 | }#THEN 64 | 65 | return(res) 66 | 67 | }#BOOTSTRAP.REPLICATE 68 | 69 | if (!is.null(cluster)) { 70 | 71 | res = parallel::parLapplyLB(cluster, res, bootstrap.replicate, data = data, 72 | m = m, algorithm = algorithm, 73 | algorithm.args = algorithm.args, statistic = statistic, 74 | statistic.args = statistic.args, debug = debug) 75 | 76 | }#THEN 77 | else { 78 | 79 | res = lapply(res, bootstrap.replicate, data = data, m = m, 80 | algorithm = algorithm, algorithm.args = algorithm.args, 81 | statistic = statistic, statistic.args = statistic.args, 82 | debug = debug) 83 | 84 | }#ELSE 85 | 86 | return(res) 87 | 88 | }#BOOTSTRAP.BACKEND 89 | 90 | -------------------------------------------------------------------------------- /src/bnlearn/shd.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../include/graph.h" 3 | #include "../minimal/common.h" 4 | #include "../math/linear.algebra.h" 5 | 6 | SEXP shd(SEXP learned, SEXP golden, SEXP debug) { 7 | 8 | int i = 0, j = 0, c1 = 0, c2 = 0, shd = 0, nnodes = 0; 9 | int *lrn = NULL, *ref = NULL; 10 | bool debugging = isTRUE(debug); 11 | SEXP temp, nodes, l, r; 12 | 13 | /* get the labels of the nodes. */ 14 | temp = getListElement(learned, "nodes"); 15 | PROTECT(nodes = getAttrib(temp, R_NamesSymbol)); 16 | nnodes = length(nodes); 17 | 18 | /* get the arcs of the learned network. */ 19 | temp = getListElement(learned, "arcs"); 20 | /* build the adjacency matrix. */ 21 | PROTECT(l = arcs2amat(temp, nodes)); 22 | lrn = INTEGER(l); 23 | 24 | /* get the arcs of the golden standard network. */ 25 | temp = getListElement(golden, "arcs"); 26 | /* build the adjacency matrix. */ 27 | PROTECT(r = arcs2amat(temp, nodes)); 28 | ref = INTEGER(r); 29 | 30 | for (i = 0; i < nnodes; i++) { 31 | 32 | for (j = i + 1; j < nnodes; j++) { 33 | 34 | /* compute coordinates only once per iteration. */ 35 | c1 = CMC(i, j, nnodes); 36 | c2 = CMC(j, i, nnodes); 37 | 38 | /* the two arcs are identical, nothing to do. */ 39 | if ((lrn[c1] == ref[c1]) && (lrn[c2] == ref[c2])) 40 | continue; 41 | 42 | if (debugging) { 43 | 44 | Rprintf("* arcs between %s and %s do not match.\n", NODE(i), NODE(j)); 45 | 46 | if ((lrn[c1] == 1) && (lrn[c2] == 1)) 47 | Rprintf(" > the learned network contains %s - %s.\n", NODE(i), NODE(j)); 48 | else if ((lrn[c1] == 0) && (lrn[c2] == 0)) 49 | Rprintf(" > the learned network contains no arc between %s and %s.\n", NODE(i), NODE(j)); 50 | else if ((lrn[c1] == 1) && (lrn[c2] == 0)) 51 | Rprintf(" > the learned network contains %s -> %s.\n", NODE(i), NODE(j)); 52 | else if ((lrn[c1] == 0) && (lrn[c2] == 1)) 53 | Rprintf(" > the learned network contains %s -> %s.\n", NODE(j), NODE(i)); 54 | 55 | if ((ref[c1] == 1) && (ref[c2] == 1)) 56 | Rprintf(" > the true network contains %s - %s.\n", NODE(i), NODE(j)); 57 | else if ((ref[c1] == 0) && (ref[c2] == 0)) 58 | Rprintf(" > the true network contains no arc between %s and %s.\n", NODE(i), NODE(j)); 59 | else if ((ref[c1] == 1) && (ref[c2] == 0)) 60 | Rprintf(" > the true network contains %s -> %s.\n", NODE(i), NODE(j)); 61 | else if ((ref[c1] == 0) && (ref[c2] == 1)) 62 | Rprintf(" > the true network contains %s -> %s.\n", NODE(j), NODE(i)); 63 | 64 | }/*THEN*/ 65 | 66 | /* increase the distance by one. */ 67 | shd++; 68 | 69 | }/*FOR*/ 70 | 71 | }/*FOR*/ 72 | 73 | UNPROTECT(3); 74 | 75 | return ScalarInteger(shd); 76 | 77 | }/*SHD*/ 78 | 79 | -------------------------------------------------------------------------------- /src/arcs/arcs2amat.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../include/bn.h" 3 | #include "../minimal/strings.h" 4 | #include "../minimal/common.h" 5 | #include "../math/linear.algebra.h" 6 | 7 | /* convert an arc set to an adjacency matrix. */ 8 | SEXP arcs2amat(SEXP arcs, SEXP nodes) { 9 | 10 | int k = 0, nrow = length(arcs) / 2, dims = length(nodes); 11 | int *res = NULL, *coords = NULL; 12 | SEXP result, try; 13 | 14 | /* allocate and initialize the adjacency matrix. */ 15 | PROTECT(result = allocMatrix(INTSXP, dims, dims)); 16 | res = INTEGER(result); 17 | memset(res, '\0', sizeof(int) * dims * dims); 18 | 19 | /* set rownames and colnames to the node labels. */ 20 | setDimNames(result, nodes, nodes); 21 | 22 | /* nothing to do if there are no arcs. */ 23 | if (nrow == 0) { 24 | 25 | UNPROTECT(1); 26 | return result; 27 | 28 | }/*THEN*/ 29 | 30 | /* match the node labels in the arc set. */ 31 | PROTECT(try = match(nodes, arcs, 0)); 32 | coords = INTEGER(try); 33 | 34 | /* iterate over the arcs. */ 35 | for (k = 0; k < nrow; k++) 36 | res[CMC(coords[k] - 1, coords[k + nrow] - 1, dims)] = 1; 37 | 38 | UNPROTECT(2); 39 | 40 | return result; 41 | 42 | }/*ARCS2AMAT*/ 43 | 44 | /* convert an adjacency matrix to an arc set. */ 45 | SEXP amat2arcs(SEXP amat, SEXP nodes) { 46 | 47 | int i = 0, j = 0, k = 0, nrow = length(nodes), narcs = 0; 48 | int *a = INTEGER(amat); 49 | SEXP arcs; 50 | 51 | /* count the number of arcs in the adjacency matrix. */ 52 | for (i = 0; i < nrow; i++) { 53 | 54 | for (j = 0; j < nrow; j++) { 55 | 56 | if (a[CMC(i, j, nrow)] == 1) narcs++; 57 | 58 | }/*FOR*/ 59 | 60 | }/*FOR*/ 61 | 62 | /* allocate the arc set and set the column names. */ 63 | PROTECT(arcs = allocMatrix(STRSXP, narcs, 2)); 64 | setDimNames(arcs, R_NilValue, mkStringVec(2, "from", "to")); 65 | 66 | /* if there are no arcs, return an empty arc set. */ 67 | if (narcs == 0) { 68 | 69 | UNPROTECT(1); 70 | return arcs; 71 | 72 | }/*THEN*/ 73 | 74 | /* fill the arc set from the adjacency matrix. */ 75 | for (i = 0; i < nrow; i++) { 76 | 77 | for (j = 0; j < nrow; j++) { 78 | 79 | /* colnames and rownames are completely ignored to avoid hitting some 80 | * corner cases present in the old R code. */ 81 | if (a[CMC(i, j, nrow)] == 1) { 82 | 83 | SET_STRING_ELT(arcs, k, STRING_ELT(nodes, i)); 84 | SET_STRING_ELT(arcs, k + 1 * narcs, STRING_ELT(nodes, j)); 85 | k++; 86 | 87 | }/*THEN*/ 88 | 89 | /* no more arcs, get out of both loops. */ 90 | if (k == narcs) goto end; 91 | 92 | }/*FOR*/ 93 | 94 | }/*FOR*/ 95 | 96 | end: 97 | 98 | UNPROTECT(1); 99 | 100 | return arcs; 101 | 102 | }/*AMAT2ARCS*/ 103 | 104 | -------------------------------------------------------------------------------- /src/graphs/rinterface/sid.c: -------------------------------------------------------------------------------- 1 | #include "../../include/rcore.h" 2 | #include "../../core/allocations.h" 3 | #include "../../include/graph.h" 4 | #include "../../include/globals.h" 5 | #include "../../minimal/common.h" 6 | #include "../../minimal/strings.h" 7 | #include "../../math/linear.algebra.h" 8 | #include "../graphs.h" 9 | 10 | /* build a reachability matrix that takes non-causal paths into account. */ 11 | SEXP reachability_matrix(SEXP dag, SEXP path_matrix, SEXP target_node, 12 | SEXP adjustment_set, SEXP debug) { 13 | 14 | int nnodes = 0, target = 0; 15 | int *initial_rmat = NULL, *final_rmat = NULL; 16 | char **labels = NULL; 17 | bool *aset = NULL; 18 | SEXP nodes, arcs, amat, names, reachability, temp; 19 | 20 | /* dereference the node labels and the arcs. */ 21 | arcs = getListElement(dag, "arcs"); 22 | nodes = getListElement(dag, "nodes"); 23 | nodes = getAttrib(nodes, R_NamesSymbol); 24 | nnodes = length(nodes); 25 | /* for the node labels, duplicate them for use in the reachability matrix. */ 26 | labels = Calloc1D(2 * nnodes, sizeof(char *)); 27 | for (int i = 0; i < nnodes; i++) 28 | labels[i] = labels[i + nnodes] = (char *) CHAR(STRING_ELT(nodes, i)); 29 | 30 | /* contruct the adjacency matrix. */ 31 | PROTECT(amat = arcs2amat(arcs, nodes)); 32 | 33 | /* dereference the target node. */ 34 | PROTECT(temp = match(nodes, target_node, 0)); 35 | target = INT(temp) - 1; 36 | UNPROTECT(1); 37 | 38 | /* explode the adjustment set into a logical vector to reduce the amount of 39 | * nested loops later. */ 40 | PROTECT(temp = match(nodes, adjustment_set, 0)); 41 | aset = Calloc1D(nnodes, sizeof(bool)); 42 | for (int i = 0; i < length(adjustment_set); i++) 43 | aset[INTEGER(temp)[i] - 1] = TRUE; 44 | UNPROTECT(1); 45 | 46 | /* initialize the reachability matrix. */ 47 | initial_rmat = Calloc1D(4 * nnodes * nnodes, sizeof(int)); 48 | 49 | initialize_reachability_matrix(INTEGER(amat), INTEGER(path_matrix), nnodes, 50 | target, aset, initial_rmat, labels, isTRUE(debug)); 51 | 52 | /* allocate the final reachability matrix with all the attributes. */ 53 | PROTECT(reachability = allocMatrix(LGLSXP, 2 * nnodes, 2 * nnodes)); 54 | final_rmat = LOGICAL(reachability); 55 | memset(final_rmat, '\0', 4 * nnodes * nnodes * sizeof(int)); 56 | 57 | PROTECT(names = allocVector(STRSXP, 2 * nnodes)); 58 | for (int i = 0; i < 2 * nnodes; i++) 59 | SET_STRING_ELT(names, i, STRING_ELT(nodes, i % nnodes)); 60 | setDimNames(reachability, names, names); 61 | 62 | /* traverse the paths in the reachability matrix to complete it. */ 63 | complete_reachability_matrix(initial_rmat, final_rmat, 2 * nnodes, labels, 64 | isTRUE(debug)); 65 | 66 | Free1D(aset); 67 | Free1D(labels); 68 | Free1D(initial_rmat); 69 | 70 | UNPROTECT(3); 71 | 72 | return reachability; 73 | 74 | }/*REACHABILITY_MATRIX*/ 75 | -------------------------------------------------------------------------------- /src/graphs/connected.components.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../math/linear.algebra.h" 4 | 5 | /* identify the connected components in an undirected graph. */ 6 | int ug_connected_components(int *amat, char **labels, int nnodes, int **buffer, 7 | int *buflen, bool debugging) { 8 | 9 | int *elements = NULL, nelements = 0, ncomponents = 0; 10 | bool changed = TRUE, *visited = NULL; 11 | 12 | /* status vector for the nodes. */ 13 | visited = Calloc1D(nnodes, sizeof(bool)); 14 | /* depth of the nodes in the breadth-first search. */ 15 | elements = Calloc1D(nnodes, sizeof(int)); 16 | 17 | for (int i = 0; i < nnodes; i++) { 18 | 19 | /* for each node we have not visited already... */ 20 | if (visited[i]) 21 | continue; 22 | else if (debugging) 23 | Rprintf("* checking node %s.\n", labels[i]); 24 | 25 | /* ... mark it as visited. */ 26 | visited[i] = TRUE; 27 | 28 | /* reset the connected component... */ 29 | memset(elements, '\0', nnodes * sizeof(int)); 30 | nelements = 1; 31 | elements[i] = 1; 32 | changed = FALSE; 33 | 34 | /* in a breadth-first search... */ 35 | for (int depth = 2; depth < nnodes; depth++) { 36 | 37 | /* ... check the remaining nodes... */ 38 | for (int j = 0; j < nnodes; j++) { 39 | 40 | /* .. if we have visited them in the last iteration... */ 41 | if (elements[j] != depth - 1) 42 | continue; 43 | 44 | /* ... mark their neighbours. */ 45 | for (int k = 0; k < nnodes; k++) { 46 | 47 | if ((amat[CMC(j, k, nnodes)] != 0) && (elements[k] == 0)) { 48 | 49 | elements[k] = depth; 50 | visited[k] = TRUE; 51 | changed = TRUE; 52 | nelements++; 53 | 54 | }/*THEN*/ 55 | 56 | }/*FOR*/ 57 | 58 | }/*FOR*/ 59 | 60 | /* if we found no new nodes, we are done with this component. */ 61 | if (changed) 62 | changed = FALSE; 63 | else 64 | break; 65 | 66 | }/*FOR*/ 67 | 68 | /* save the elements of the connected component. */ 69 | buffer[ncomponents] = Calloc1D(nelements, sizeof(int)); 70 | buflen[ncomponents] = nelements; 71 | for (int k = 0, l = 0; k < nnodes; k++) 72 | if (elements[k] > 0) 73 | buffer[ncomponents][l++] = k; 74 | 75 | if (debugging) { 76 | 77 | Rprintf(" @ connected component %d, %d nodes:\n", 78 | ncomponents, nelements); 79 | for (int k = 0; k < buflen[ncomponents]; k++) 80 | Rprintf("%s ", labels[buffer[ncomponents][k]]); 81 | Rprintf("\n"); 82 | 83 | }/*THEN*/ 84 | 85 | /* move to the next component. */ 86 | ncomponents++; 87 | 88 | }/*FOR*/ 89 | 90 | Free1D(visited); 91 | Free1D(elements); 92 | 93 | return ncomponents; 94 | 95 | }/*UG_CONNECTED_COMPONENTS*/ 96 | 97 | -------------------------------------------------------------------------------- /man/hybrid.Rd: -------------------------------------------------------------------------------- 1 | \name{hybrid algorithms} 2 | \alias{hybrid algorithms} 3 | \alias{rsmax2} 4 | \alias{mmhc} 5 | \alias{h2pc} 6 | \title{Hybrid structure learning algorithms} 7 | \description{ 8 | 9 | Learn the structure of a Bayesian network with Max-Min Hill Climbing (MMHC), 10 | Hybrid HPC (H2PC), and the more general 2-phase Restricted Maximization 11 | (RSMAX2) hybrid algorithms. 12 | 13 | } 14 | \usage{ 15 | rsmax2(x, whitelist = NULL, blacklist = NULL, restrict = "si.hiton.pc", 16 | maximize = "hc", restrict.args = list(), maximize.args = list(), debug = FALSE) 17 | mmhc(x, whitelist = NULL, blacklist = NULL, restrict.args = list(), 18 | maximize.args = list(), debug = FALSE) 19 | h2pc(x, whitelist = NULL, blacklist = NULL, restrict.args = list(), 20 | maximize.args = list(), debug = FALSE) 21 | } 22 | \arguments{ 23 | \item{x}{a data frame containing the variables in the model.} 24 | \item{whitelist}{a data frame with two columns (optionally labeled "from" and 25 | "to"), containing a set of arcs to be included in the graph.} 26 | \item{blacklist}{a data frame with two columns (optionally labeled "from" and 27 | "to"), containing a set of arcs not to be included in the graph.} 28 | \item{restrict}{a character string, the constraint-based or local search 29 | algorithm to be used in the \dQuote{restrict} phase. See 30 | \code{\link{structure learning}} and the documentation of each algorithm for 31 | details.} 32 | \item{maximize}{a character string, the score-based algorithm to be used in 33 | the \dQuote{maximize} phase. Possible values are \code{hc} and \code{tabu}. 34 | See \code{\link{structure learning}} for details.} 35 | \item{restrict.args}{a list of arguments to be passed to the algorithm 36 | specified by \code{restrict}, such as \code{test} or \code{alpha}.} 37 | \item{maximize.args}{a list of arguments to be passed to the algorithm 38 | specified by \code{maximize}, such as \code{restart} for hill-climbing or 39 | \code{tabu} for tabu search.} 40 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 41 | printed; otherwise the function is completely silent.} 42 | } 43 | \note{ 44 | 45 | \code{mmhc()} is simply \code{rsmax2()} with \code{restrict} set to 46 | \code{mmpc} and \code{maximize} set to \code{hc}. Similarly, \code{h2pc} is 47 | simply \code{rsmax2()} with \code{restrict} set to \code{hpc}and 48 | \code{maximize} set to \code{hc}. 49 | 50 | See \code{\link{structure learning}} for a complete list of structure learning 51 | algorithms with the respective references. 52 | 53 | } 54 | \value{ 55 | 56 | An object of class \code{bn}. See \code{\link{bn-class}} for details. 57 | 58 | } 59 | \author{Marco Scutari} 60 | \seealso{\link{local discovery algorithms}, 61 | \link{score-based algorithms}, \link{constraint-based algorithms}.} 62 | \keyword{structure learning} 63 | -------------------------------------------------------------------------------- /man/asia.Rd: -------------------------------------------------------------------------------- 1 | \name{asia} 2 | \docType{data} 3 | \alias{asia} 4 | \title{Asia (synthetic) data set by Lauritzen and Spiegelhalter} 5 | \description{ 6 | 7 | Small synthetic data set from Lauritzen and Spiegelhalter (1988) about lung 8 | diseases (tuberculosis, lung cancer or bronchitis) and visits to Asia. 9 | 10 | } 11 | \usage{ 12 | data(asia) 13 | } 14 | \format{ 15 | 16 | The \code{asia} data set contains the following variables: 17 | \itemize{ 18 | 19 | \item \code{D} (\emph{dyspnoea}), a two-level factor with levels \code{yes} 20 | and \code{no}. 21 | \item \code{T} (\emph{tuberculosis}), a two-level factor with levels 22 | \code{yes} and \code{no}. 23 | \item \code{L} (\emph{lung cancer}), a two-level factor with levels 24 | \code{yes} and \code{no}. 25 | \item \code{B} (\emph{bronchitis}), a two-level factor with levels 26 | \code{yes} and \code{no}. 27 | \item \code{A} (\emph{visit to Asia}), a two-level factor with levels 28 | \code{yes} and \code{no}. 29 | \item \code{S} (\emph{smoking}), a two-level factor with levels \code{yes} 30 | and \code{no}. 31 | \item \code{X} (\emph{chest X-ray}), a two-level factor with levels 32 | \code{yes} and \code{no}. 33 | \item \code{E} (\emph{tuberculosis versus lung cancer/bronchitis}), a 34 | two-level factor with levels \code{yes} and \code{no}. 35 | 36 | } 37 | 38 | } 39 | \note{ 40 | 41 | Lauritzen and Spiegelhalter (1988) motivate this example as follows: 42 | 43 | \dQuote{Shortness-of-breath (dyspnoea) may be due to tuberculosis, lung 44 | cancer or bronchitis, or none of them, or more than one of them. A recent 45 | visit to Asia increases the chances of tuberculosis, while smoking is known 46 | to be a risk factor for both lung cancer and bronchitis. The results of a 47 | single chest X-ray do not discriminate between lung cancer and tuberculosis, 48 | as neither does the presence or absence of dyspnoea.} 49 | 50 | Standard learning algorithms are not able to recover the true structure of 51 | the network because of the presence of a node (\code{E}) with conditional 52 | probabilities equal to both 0 and 1. Monte Carlo tests seems to behave 53 | better than their parametric counterparts. 54 | 55 | The complete BN can be downloaded from 56 | \url{https://www.bnlearn.com/bnrepository/}. 57 | 58 | } 59 | \source{ 60 | 61 | Lauritzen S, Spiegelhalter D (1988). "Local Computation with Probabilities 62 | on Graphical Structures and their Application to Expert Systems (with 63 | discussion)." \emph{Journal of the Royal Statistical Society: Series B}, 64 | \strong{50}(2):157--224. 65 | 66 | } 67 | \examples{ 68 | # load the data. 69 | data(asia) 70 | # create and plot the network structure. 71 | dag = model2network("[A][S][T|A][L|S][B|S][D|B:E][E|T:L][X|E]") 72 | \dontrun{graphviz.plot(dag)} 73 | } 74 | \keyword{datasets} 75 | -------------------------------------------------------------------------------- /R/frontend-mvnorm.R: -------------------------------------------------------------------------------- 1 | 2 | # transform the local distributions in a Gaussian BN into the multivariate 3 | # normal that is the global distribution. 4 | gbn2mvnorm = function(fitted) { 5 | 6 | # the network must be a Gaussian BN. 7 | check.fit(fitted) 8 | if (!is(fitted, "bn.fit.gnet")) 9 | stop("'fitted' must be an object of class 'bn.fit.gnet'.") 10 | 11 | gbn2mvnorm.backend(fitted) 12 | 13 | }#GBN2MVNORM 14 | 15 | # factorize a multivariate normal distribution into the local distributions that 16 | # make up a Gaussian BN into the multivariate. 17 | mvnorm2gbn = function(dag, mu, sigma) { 18 | 19 | # the network must be completely directed and acyclic. 20 | check.bn(dag) 21 | if (!is.completely.directed(dag)) 22 | stop("'dag' is only partially directed.") 23 | if (!is.acyclic(dag$arcs, names(dag$nodes), directed = TRUE)) 24 | stop("'dag' contains cycles.") 25 | 26 | nodes = names(dag$nodes) 27 | ordnodes = topological.ordering(dag) 28 | nnodes = length(nodes) 29 | 30 | # check the covariance matrix. 31 | if (missing(sigma)) 32 | stop("'sigma' is missing.") 33 | if (!is(sigma, "matrix") || (ncol(sigma) != nrow(sigma)) || 34 | (length(dim(sigma)) != 2)) 35 | stop("'sigma' must be a 2-dimensional square matrix.") 36 | if ((nrow(sigma) != nnodes) || (ncol(sigma) != nnodes)) 37 | stop("'sigma' must be a square matrix of size ", nnodes, ".") 38 | if (!is.real.vector(sigma)) 39 | stop("the elements of 'sigma' must be real numbers.") 40 | check.covariance(sigma) 41 | 42 | # the nodes in the dag must match the variables in the covariance matrix. 43 | dims = dimnames(sigma) 44 | if (is.null(dims)) { 45 | 46 | dimnames(sigma) = list(nodes, nodes) 47 | 48 | }#THEN 49 | else { 50 | 51 | if (!setequal(dims[[1]], dims[[2]])) 52 | stop("the row names and the column names of 'sigma' are different.") 53 | if (!setequal(dims[[1]], nodes)) 54 | stop("the row names of 'sigma' do not match with the nodes in 'dag'.") 55 | if (!setequal(dims[[2]], nodes)) 56 | stop("the column names of 'sigma' do not match with the nodes in 'dag'.") 57 | 58 | }#ELSE 59 | 60 | # check the mean vector 61 | if (missing(mu)) { 62 | 63 | mu = structure(rep(0, nnodes), names = ordnodes) 64 | warning("'mu' is missing, all variables are assumed to the centered.") 65 | 66 | }#THEN 67 | else { 68 | 69 | if (!is.real.vector(mu)) 70 | stop("'mu' must be a vector of real numbers.") 71 | if (length(mu) != nnodes) 72 | stop("'mu' has ", length(mu), " elements, but 'sigma' has ", nnodes, ".") 73 | 74 | if (is.null(names(mu))) 75 | names(mu) = nodes 76 | else if (!setequal(names(mu), nodes)) 77 | stop("the names of the elements of 'mu' do not match with the nodes in 'dag'.") 78 | 79 | }#THEN 80 | 81 | mvnorm2gbn.backend(dag = dag, mu = mu, sigma = sigma) 82 | 83 | }#MVNORM2GBN 84 | 85 | -------------------------------------------------------------------------------- /R/graph-generation.R: -------------------------------------------------------------------------------- 1 | 2 | random.graph.backend = function(num, nodes, method, extra.args, debug = FALSE) { 3 | 4 | if (method == "ordered") { 5 | 6 | res = ordered.graph(num = num, nodes = nodes, prob = extra.args$prob) 7 | 8 | }#THEN 9 | else if (method == "ic-dag") { 10 | 11 | # adjust the number of graph to generate with the stepping factor. 12 | num = num * extra.args$every 13 | 14 | res = ide.cozman.graph(num = num, nodes = nodes, 15 | burn.in = extra.args$burn.in, 16 | max.in.degree = extra.args$max.in.degree, 17 | max.out.degree = extra.args$max.out.degree, 18 | max.degree = extra.args$max.degree, 19 | connected = TRUE, debug = debug) 20 | 21 | # keep only every k-th network. 22 | if (num > 1) { 23 | 24 | res = res[seq(from = extra.args$every, to = num, by = extra.args$every)] 25 | 26 | }#THEN 27 | 28 | }#THEN 29 | else if (method == "melancon") { 30 | 31 | # adjust the number of graph to generate with the stepping factor. 32 | num = num * extra.args$every 33 | 34 | res = ide.cozman.graph(num = num, nodes = nodes, 35 | burn.in = extra.args$burn.in, 36 | max.in.degree = extra.args$max.in.degree, 37 | max.out.degree = extra.args$max.out.degree, 38 | max.degree = extra.args$max.degree, 39 | connected = FALSE, debug = debug) 40 | 41 | # keep only every k-th network. 42 | if (num > 1) { 43 | 44 | res = res[seq(from = extra.args$every, to = num, by = extra.args$every)] 45 | 46 | }#THEN 47 | 48 | }#THEN 49 | 50 | return(res) 51 | 52 | }#RANDOM.GRAPH.BACKEND 53 | 54 | # generate a random directed acyclic graph. 55 | ordered.graph = function(num, nodes, prob) { 56 | 57 | .Call(call_ordered_graph, 58 | nodes = nodes, 59 | num = as.integer(num), 60 | prob = prob) 61 | 62 | }#ORDERED.GRAPH 63 | 64 | # generate a random directed acyclic graph accordin to a uniform 65 | # probability distribution over the space of connected graphs (if 66 | # connected = TRUE) or the space of graphs (if connected = FALSE). 67 | ide.cozman.graph = function(num, nodes, burn.in, max.in.degree, 68 | max.out.degree, max.degree, connected, debug = FALSE) { 69 | 70 | .Call(call_ide_cozman_graph, 71 | nodes = nodes, 72 | num = as.integer(num), 73 | burn.in = as.integer(burn.in), 74 | max.in.degree = as.numeric(max.in.degree), 75 | max.out.degree = as.numeric(max.out.degree), 76 | max.degree = as.numeric(max.degree), 77 | connected = connected, 78 | debug = debug) 79 | 80 | }#IDE.COZMAN.GRAPH 81 | 82 | # generate an empty 'bn' object given a set of nodes. 83 | empty.graph.backend = function(nodes, num = 1) { 84 | 85 | .Call(call_empty_graph, 86 | nodes = nodes, 87 | num = as.integer(num)) 88 | 89 | }#EMPTY.GRAPH.BACKEND 90 | -------------------------------------------------------------------------------- /R/frontend-foreign.R: -------------------------------------------------------------------------------- 1 | 2 | # read a BIF file into a bn.fit object. 3 | read.bif = function(file, debug = FALSE) { 4 | 5 | # load the BIF file into memory. 6 | lines = readLines(file) 7 | 8 | read.foreign.backend(lines, format = "bif", filename = file, debug = debug) 9 | 10 | }#READ.BIF 11 | 12 | # dump a bn.fit object into a BIF file. 13 | write.bif = function(file, fitted) { 14 | 15 | check.fit(fitted) 16 | # only discrete bayesian networks are supported. 17 | if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet"))) 18 | stop("only discrete Bayesian networks can be exported into BIF format.") 19 | 20 | # open the file for writing. 21 | fd = file(description = file, open = "w") 22 | 23 | write.foreign.backend(fd, fitted = fitted, format = "bif") 24 | 25 | close(fd) 26 | 27 | invisible(NULL) 28 | 29 | }#WRITE.BIF 30 | 31 | # read a DSC file into a bn.fit object. 32 | read.dsc = function(file, debug = FALSE) { 33 | 34 | # load the DSC file into memory. 35 | lines = readLines(file) 36 | 37 | read.foreign.backend(lines, format = "dsc", filename = file, debug = debug) 38 | 39 | }#READ.DSC 40 | 41 | # dump a bn.fit object into a DSC file. 42 | write.dsc = function(file, fitted) { 43 | 44 | check.fit(fitted) 45 | # only discrete bayesian networks are supported. 46 | if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet"))) 47 | stop("only discrete Bayesian networks can be exported into DSC format.") 48 | 49 | # open the file for writing. 50 | fd = file(description = file, open = "w") 51 | 52 | write.foreign.backend(fd, fitted = fitted, format = "dsc") 53 | 54 | close(fd) 55 | 56 | invisible(NULL) 57 | 58 | }#WRITE.DSC 59 | 60 | # read a NET file into a bn.fit object. 61 | read.net = function(file, debug = FALSE) { 62 | 63 | # load the NET file into memory. 64 | lines = readLines(file) 65 | 66 | read.foreign.backend(lines, format = "net", filename = file, debug = debug) 67 | 68 | }#READ.NET 69 | 70 | # dump a bn.fit object into a NET file. 71 | write.net = function(file, fitted) { 72 | 73 | check.fit(fitted) 74 | # only discrete bayesian networks are supported. 75 | if (!is(fitted, c("bn.fit.dnet", "bn.fit.onet", "bn.fit.donet"))) 76 | stop("only discrete Bayesian networks can be exported into DSC format.") 77 | 78 | # open the file for writing. 79 | fd = file(description = file, open = "w") 80 | 81 | write.foreign.backend(fd, fitted = fitted, format = "net") 82 | 83 | close(fd) 84 | 85 | invisible(NULL) 86 | 87 | }#WRITE.NET 88 | 89 | write.dot = function(file, graph) { 90 | 91 | check.bn.or.fit(graph) 92 | 93 | # open the file for writing. 94 | fd = file(description = file, open = "w") 95 | 96 | write.dot.backend(fd, graph = graph) 97 | 98 | close(fd) 99 | 100 | invisible(NULL) 101 | 102 | }#WRITE.DOT 103 | 104 | -------------------------------------------------------------------------------- /src/minimal/tiers.c: -------------------------------------------------------------------------------- 1 | #include "../include/rcore.h" 2 | #include "../core/allocations.h" 3 | #include "../include/bn.h" 4 | #include "strings.h" 5 | #include "common.h" 6 | 7 | SEXP tiers(SEXP nodes, SEXP debug) { 8 | 9 | int i = 0, j = 0, k = 0, narcs = 0, nnodes = 0, ntiers = length(nodes); 10 | int *tier_size = NULL, tier_start = 0, cur = 0; 11 | bool debugging = isTRUE(debug); 12 | SEXP flattened, blacklist, temp; 13 | 14 | /* allocate the counters for tiers' sizes. */ 15 | tier_size = Calloc1D(ntiers, sizeof(int)); 16 | 17 | if (!isString(nodes)) { 18 | 19 | /* "node" is a list, each tier is an element. */ 20 | for (i = ntiers - 1; i >= 0; i--) { 21 | 22 | temp = VECTOR_ELT(nodes, i); 23 | tier_size[i] = length(temp); 24 | nnodes += tier_size[i]; 25 | narcs += (nnodes - tier_size[i]) * tier_size[i]; 26 | 27 | }/*FOR*/ 28 | 29 | /* flatten the tiers to keep manipulation later on simple. */ 30 | PROTECT(flattened = allocVector(STRSXP, nnodes)); 31 | 32 | for (i = 0, k = 0; i < ntiers; i++) { 33 | 34 | temp = VECTOR_ELT(nodes, i); 35 | 36 | for (j = 0; j < tier_size[i]; j++) 37 | SET_STRING_ELT(flattened, k++, STRING_ELT(temp, j)); 38 | 39 | }/*FOR*/ 40 | 41 | }/*THEN*/ 42 | else { 43 | 44 | /* "node" is a character vector, which means that each node is in its own tier 45 | * and that there is no need to flatten it. */ 46 | flattened = nodes; 47 | nnodes = length(nodes); 48 | for (i = 0; i < ntiers; i++) 49 | tier_size[i] = 1; 50 | 51 | /* the blacklist is the one resulting from a complete node ordering. */ 52 | narcs = ntiers * (ntiers - 1) / 2; 53 | 54 | }/*ELSE*/ 55 | 56 | /* allocate the return value. */ 57 | PROTECT(blacklist = allocMatrix(STRSXP, narcs, 2)); 58 | 59 | for (k = 0, i = 0; k < nnodes; k++) { 60 | 61 | temp = STRING_ELT(flattened, k); 62 | 63 | if (debugging) 64 | Rprintf("* current node is %s in tier %d, position %d of %d.\n", 65 | CHAR(temp), i + 1, k + 1, nnodes); 66 | 67 | for (j = tier_start + tier_size[i]; j < nnodes; j++) { 68 | 69 | if (debugging) 70 | Rprintf(" > blacklisting %s -> %s\n", CHAR(STRING_ELT(flattened, j)), CHAR(temp)); 71 | 72 | SET_STRING_ELT(blacklist, cur, STRING_ELT(flattened, j)); 73 | SET_STRING_ELT(blacklist, cur + narcs, temp); 74 | cur++; 75 | 76 | }/*FOR*/ 77 | 78 | while (k >= tier_start + tier_size[i] - 1) { 79 | 80 | tier_start += tier_size[i++]; 81 | 82 | if (i == ntiers) 83 | break; 84 | 85 | }/*WHILE*/ 86 | 87 | if (i == ntiers) 88 | break; 89 | 90 | }/*FOR*/ 91 | 92 | /* set the column names. */ 93 | setDimNames(blacklist, R_NilValue, mkStringVec(2, "from", "to")); 94 | 95 | Free1D(tier_size); 96 | 97 | if (!isString(nodes)) 98 | UNPROTECT(2); 99 | else 100 | UNPROTECT(1); 101 | 102 | return blacklist; 103 | 104 | }/*TIERS*/ 105 | 106 | -------------------------------------------------------------------------------- /man/graph.Rd: -------------------------------------------------------------------------------- 1 | \name{graph utilities} 2 | \alias{graph utilities} 3 | \alias{acyclic} 4 | \alias{directed} 5 | \alias{valid.dag} 6 | \alias{valid.cpdag} 7 | \alias{valid.ug} 8 | \alias{path.exists} 9 | \alias{path} 10 | \alias{path,bn-method} 11 | \alias{path,bn.fit-method} 12 | \alias{path,bn.naive-method} 13 | \alias{path,bn.tan-method} 14 | \alias{skeleton} 15 | \alias{pdag2dag} 16 | \alias{subgraph} 17 | \title{Utilities to manipulate graphs} 18 | \description{ 19 | 20 | Check and manipulate graph-related properties of an object of class \code{bn}. 21 | 22 | } 23 | \usage{ 24 | # check whether the graph is acyclic/completely directed. 25 | acyclic(x, directed = FALSE, debug = FALSE) 26 | directed(x) 27 | valid.dag(x, debug = FALSE) 28 | valid.cpdag(x, debug = FALSE) 29 | valid.ug(x, debug = FALSE) 30 | # check whether there is a path between two nodes. 31 | path.exists(x, from, to, direct = TRUE, underlying.graph = FALSE, debug = FALSE) 32 | # build the skeleton or a complete orientation of the graph. 33 | skeleton(x) 34 | pdag2dag(x, ordering) 35 | # build a subgraph spanning a subset of nodes. 36 | subgraph(x, nodes) 37 | } 38 | \arguments{ 39 | \item{x}{an object of class \code{bn}. \code{skeleton()}, \code{acyclic()}, 40 | \code{directed()} and \code{path.exixsts()} also accept objects of class 41 | \code{bn.fit}.} 42 | \item{from}{a character string, the label of a node.} 43 | \item{to}{a character string, the label of a node (different from 44 | \code{from}).} 45 | \item{direct}{a boolean value. If \code{FALSE} ignore any arc between 46 | \code{from} and \code{to} when looking for a path.} 47 | \item{underlying.graph}{a boolean value. If \code{TRUE} the underlying 48 | undirected graph is used instead of the (directed) one from the \code{x} 49 | argument.} 50 | \item{ordering}{the labels of all the nodes in the graph; their order is the 51 | node ordering used to set the direction of undirected arcs.} 52 | \item{nodes}{the labels of the nodes that induce the subgraph.} 53 | \item{directed}{a boolean value. If \code{TRUE} only completely directed 54 | cycles are considered; otherwise undirected arcs will also be considered 55 | and treated as arcs present in both directions.} 56 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 57 | printed; otherwise the function is completely silent.} 58 | } 59 | \value{ 60 | 61 | \code{acyclic()}, \code{path()} and \code{directed()} return a boolean value. \cr 62 | \code{skeleton()}, \code{pdag2dag()} and \code{subgraph()} return an object of 63 | class \code{bn}. 64 | 65 | \code{valid.dag()}, \code{valid.cpdag()} and \code{valid.ug()} return a 66 | logical value. 67 | 68 | } 69 | \references{ 70 | 71 | Bang-Jensen J, Gutin G (2009). \emph{Digraphs: Theory, Algorithms and 72 | Applications}. Springer, 2nd edition. 73 | 74 | } 75 | \examples{ 76 | data(learning.test) 77 | cpdag = pc.stable(learning.test) 78 | 79 | acyclic(cpdag) 80 | directed(cpdag) 81 | dag = pdag2dag(cpdag, ordering = LETTERS[1:6]) 82 | dag 83 | directed(dag) 84 | skeleton(dag) 85 | } 86 | \author{Marco Scutari} 87 | \keyword{graphs} 88 | -------------------------------------------------------------------------------- /man/hc.Rd: -------------------------------------------------------------------------------- 1 | \name{score-based algorithms} 2 | \alias{score-based algorithms} 3 | \alias{hc} 4 | \alias{tabu} 5 | \title{Score-based structure learning algorithms} 6 | \description{ 7 | 8 | Learn the structure of a Bayesian network using a hill-climbing (HC) or a 9 | Tabu search (TABU) greedy search. 10 | 11 | } 12 | \usage{ 13 | hc(x, start = NULL, whitelist = NULL, blacklist = NULL, score = NULL, ..., 14 | debug = FALSE, restart = 0, perturb = 1, max.iter = Inf, maxp = Inf, optimized = TRUE) 15 | tabu(x, start = NULL, whitelist = NULL, blacklist = NULL, score = NULL, ..., 16 | debug = FALSE, tabu = 10, max.tabu = tabu, max.iter = Inf, maxp = Inf, optimized = TRUE) 17 | } 18 | \arguments{ 19 | \item{x}{a data frame containing the variables in the model.} 20 | \item{start}{an object of class \code{bn}, the preseeded directed acyclic 21 | graph used to initialize the algorithm. If none is specified, an empty one 22 | (i.e. without any arc) is used.} 23 | \item{whitelist}{a data frame with two columns (optionally labeled "from" and 24 | "to"), containing a set of arcs to be included in the graph.} 25 | \item{blacklist}{a data frame with two columns (optionally labeled "from" and 26 | "to"), containing a set of arcs not to be included in the graph.} 27 | \item{score}{a character string, the label of the network score to be used in 28 | the algorithm. If none is specified, the default score is the \emph{Bayesian 29 | Information Criterion} for both discrete and continuous data sets. See 30 | \code{\link{network scores}} for details.} 31 | \item{\dots}{additional tuning parameters for the network score. See 32 | \code{\link{score}} for details.} 33 | \item{debug}{a boolean value. If \code{TRUE} a lot of debugging output is 34 | printed; otherwise the function is completely silent.} 35 | \item{restart}{an integer, the number of random restarts.} 36 | \item{tabu}{a positive integer number, the length of the tabu list used in the 37 | \code{tabu} function.} 38 | \item{max.tabu}{a positive integer number, the iterations tabu search can 39 | perform without improving the best network score.} 40 | \item{perturb}{an integer, the number of attempts to randomly 41 | insert/remove/reverse an arc on every random restart.} 42 | \item{max.iter}{an integer, the maximum number of iterations.} 43 | \item{maxp}{the maximum number of parents allowed for a node in any network 44 | that is considered in the search, including that that is returned. The 45 | default value is \code{Inf}.} 46 | \item{optimized}{a boolean value. If \code{TRUE} (the default), score caching 47 | is used to speed up structure learning.} 48 | } 49 | \note{ 50 | 51 | See \code{\link{structure learning}} for a complete list of structure learning 52 | algorithms with the respective references. 53 | 54 | } 55 | \value{ 56 | 57 | An object of class \code{bn}. See \code{\link{bn-class}} for details. 58 | 59 | } 60 | \author{Marco Scutari} 61 | \seealso{\code{\link{network scores}}, \link{constraint-based algorithms}, 62 | \link{hybrid algorithms}, \link{local discovery algorithms}, 63 | \link{alpha.star}.} 64 | \keyword{structure learning} 65 | -------------------------------------------------------------------------------- /src/fitted/fitted.h: -------------------------------------------------------------------------------- 1 | #ifndef BN_FIT_OBJECTS_HEADER 2 | #define BN_FIT_OBJECTS_HEADER 3 | 4 | /* enum for fitted node types, to match the class in the R objects. */ 5 | typedef enum { 6 | ENOFIT = 0, /* error code, no such node type. */ 7 | DNODE = 1, /* categorical node. */ 8 | ONODE = 2, /* ordinal node. */ 9 | GNODE = 3, /* Gaussian node. */ 10 | CGNODE = 4 /* conditional Gaussian node. */ 11 | } fitted_node_e; 12 | 13 | /* enum for fitted network types, to match the class in the R objects. */ 14 | typedef enum { 15 | ENONET = 0, /* error code, no such network type. */ 16 | DNET = 1, /* discrete Bayesian networks. */ 17 | ONET = 2, /* ordinal Bayesian networks. */ 18 | DONET = 3, /* mixed categorical and ordinal nodes. */ 19 | GNET = 4, /* Gaussian Bayesian networks. */ 20 | CGNET = 5 /* conditional Gaussian networks. */ 21 | } fitted_net_e; 22 | 23 | fitted_node_e fitted_node_to_enum(SEXP object); 24 | fitted_net_e fitted_net_to_enum(SEXP object); 25 | 26 | /* a local distribution, meant to be embedded in the fitted_bn struct below. */ 27 | typedef struct { 28 | 29 | int nparents; /* number of parents. */ 30 | int *parents; /* indexes of the parent nodes. */ 31 | 32 | union { 33 | 34 | struct { 35 | 36 | int ndims; /* number of dimensions of the CPT. */ 37 | int *dims; /* dimensions of the CPT. */ 38 | int nconfigs; /* number of parents configurations. */ 39 | double *cpt; /* conditional probability table. */ 40 | 41 | } d; 42 | 43 | struct { 44 | 45 | int ncoefs; /* number of regression coefficients. */ 46 | double *coefs; /* regression coefficients. */ 47 | double sd; /* standard error of the residuals. */ 48 | 49 | } g; 50 | 51 | struct { 52 | 53 | int ndparents; /* number of discrete parents. */ 54 | int ngparents; /* number of continuous parents. */ 55 | int *dparents; /* indexes of the discrete parents in the parents array. */ 56 | int *gparents; /* indexes of the continuous parents in the parents array. */ 57 | int ncoefs; /* number of regression coefficients. */ 58 | int nconfigs; /* number of discrete parents configurations. */ 59 | double *coefs; /* regression coefficients. */ 60 | double *sd; /* standard errors of the residuals. */ 61 | 62 | } cg; 63 | 64 | }; 65 | 66 | } ldist; 67 | 68 | /* a fitted Bayesian network, mapping to the bn.fit class in R code. */ 69 | typedef struct { 70 | 71 | fitted_net_e type; /* network type (discrete, Gaussian, etc.). */ 72 | int nnodes; /* number of nodes in the network. */ 73 | const char **labels; /* node labels. */ 74 | fitted_node_e *node_types; /* node types (discrete, Gaussian, etc.). */ 75 | ldist *ldists; /* local distributions for the nodes. */ 76 | 77 | } fitted_bn; 78 | 79 | fitted_bn fitted_network_from_SEXP(SEXP fitted); 80 | void print_fitted_network(fitted_bn); 81 | void FreeFittedBN(fitted_bn bn); 82 | 83 | double nparams_fitted_node(ldist ld, fitted_node_e type); 84 | 85 | #endif 86 | --------------------------------------------------------------------------------