├── lighthergm
    ├── src
    │   ├── .gitignore
    │   ├── Makevars
    │   ├── Makevars.win
    │   ├── helper.h
    │   ├── compute_eigenvector.cpp
    │   ├── compute_yule_coef.cpp
    │   ├── simulate_between_network.cpp
    │   ├── helper_function.cpp
    │   ├── create_feature_matrix.cpp
    │   └── RcppExports.cpp
    ├── vignettes
    │   ├── .gitignore
    │   └── intro-lighthergm.Rmd
    ├── .gitignore
    ├── .Rbuildignore
    ├── tests
    │   ├── testthat.R
    │   └── testthat
    │   │   ├── test-compute_linear_term.R
    │   │   ├── test-get_induced_subgraph.R
    │   │   ├── test-compute_yule_coef.R
    │   │   ├── test-get_elementwise_multiplied_matrices.R
    │   │   ├── test-get_sparse_feature_adjmat_cpp.R
    │   │   ├── test-cache.R
    │   │   ├── test-start-from-given-cluster.R
    │   │   ├── test-compute_quadratic_term.R
    │   │   ├── test-compute_pi_with_features.R
    │   │   ├── test-estimate-params.R
    │   │   ├── test-compute_pi_d1x0.R
    │   │   ├── test-compute_quadratic_term_with_features.R
    │   │   ├── test-within-network-stats.R
    │   │   ├── test-estimate_within_param.R
    │   │   ├── test-EM-checkpoint.R
    │   │   ├── test-compute_lower_bound.R
    │   │   └── test-gof.R
    ├── data
    │   └── toyNet.rda
    ├── man
    │   ├── as_sparse_adj.Rd
    │   ├── compute_yule_coef.Rd
    │   ├── to_degree_dist_df.Rd
    │   ├── to_edgewise_shared_partners_df.Rd
    │   ├── to_geodesic_dist_df.Rd
    │   ├── spec_clust_sparse.Rd
    │   ├── arrange_edgelist.Rd
    │   ├── estimate_between_param.Rd
    │   ├── extract_covariate_names.Rd
    │   ├── get_list_sparse_feature_adjmat.Rd
    │   ├── get_between_stats.Rd
    │   ├── edgelist_to_stats.Rd
    │   ├── separate_formulas.Rd
    │   ├── compute_multiplied_feature_matrices.Rd
    │   ├── swap_formula_network.Rd
    │   ├── toyNet.Rd
    │   ├── generate_network_for_output.Rd
    │   ├── sort_block_membership.Rd
    │   ├── combine_within_between_edges.Rd
    │   ├── generate_seed_network.Rd
    │   ├── get_gof_stats.Rd
    │   ├── estimate_within_params.Rd
    │   ├── draw_within_block_connection.Rd
    │   ├── simulate_hergm_within.Rd
    │   ├── gof_lighthergm.Rd
    │   ├── draw_between_block_connection.Rd
    │   ├── simulate_hergm.Rd
    │   └── hergm.Rd
    ├── lighthergm.Rproj
    ├── R
    │   ├── data.R
    │   ├── get_list_sparse_feature_adjmat.R
    │   ├── estimate_between_param.R
    │   ├── RcppExports.R
    │   ├── estimate_within_params.R
    │   ├── gof_lighthergm.R
    │   └── hergm.R
    ├── NAMESPACE
    └── DESCRIPTION
├── lighthergm.jpg
├── doc
    └── intro-lighthergm.pdf
├── docker-compose.yml
├── .gitignore
├── Dockerfile
├── .github
    └── workflows
    │   └── check-standard.yml
└── README.md


/lighthergm/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------
/lighthergm/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/lighthergm/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | inst/doc
3 | /doc/
4 | /Meta/
5 | 


--------------------------------------------------------------------------------
/lighthergm.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/lighthergm.jpg


--------------------------------------------------------------------------------
/doc/intro-lighthergm.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/doc/intro-lighthergm.pdf


--------------------------------------------------------------------------------
/lighthergm/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^lighthergm\.Rproj$
2 | ^\.Rproj\.user$
3 | ^doc$
4 | ^LICENSE\.md$
5 | ^Meta$
6 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(lighthergm)
3 | 
4 | test_check("lighthergm")
5 | 


--------------------------------------------------------------------------------
/lighthergm/data/toyNet.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/lighthergm/data/toyNet.rda


--------------------------------------------------------------------------------
/lighthergm/src/Makevars:
--------------------------------------------------------------------------------
1 | CXX_STD = CXX11
2 | PKG_CXXFLAGS = -DARMA_64BIT_WORD=1 $(SHLIB_OPENMP_CXXFLAGS)
3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
4 | 


--------------------------------------------------------------------------------
/lighthergm/src/Makevars.win:
--------------------------------------------------------------------------------
1 | CXX_STD = CXX11
2 | PKG_CXXFLAGS = -DARMA_64BIT_WORD=1 $(SHLIB_OPENMP_CXXFLAGS)
3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
4 | 


--------------------------------------------------------------------------------
/lighthergm/man/as_sparse_adj.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_within_params.R
 3 | \name{as_sparse_adj}
 4 | \alias{as_sparse_adj}
 5 | \title{Get a sparse adjacency matrix from a network object}
 6 | \usage{
 7 | as_sparse_adj(net)
 8 | }
 9 | \arguments{
10 | \item{net}{a network object}
11 | }
12 | \description{
13 | Get a sparse adjacency matrix from a network object
14 | }
15 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | 
 3 | services:
 4 |   rstudio:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     container_name: lighthergm
 9 |     ports:
10 |       - 8787:8787
11 |     environment:
12 |       - PASSWORD=${PASSWORD}
13 |       #- USERID=${USERID}
14 |       #- GROUPID=${GROUPID}
15 |     #volumes:
16 |     #  - ${WORKSPACE_PATH}:/home/rstudio/workspace
17 |     tty: true
18 |     stdin_open: true
19 | 


--------------------------------------------------------------------------------
/lighthergm/man/compute_yule_coef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RcppExports.R
 3 | \name{compute_yule_coef}
 4 | \alias{compute_yule_coef}
 5 | \title{Compute Yule's Φ-coefficient}
 6 | \usage{
 7 | compute_yule_coef(z_star, z)
 8 | }
 9 | \arguments{
10 | \item{z_star}{a true block membership}
11 | 
12 | \item{z}{an estimated block membership}
13 | }
14 | \description{
15 | Compute Yule's Φ-coefficient
16 | }
17 | 


--------------------------------------------------------------------------------
/lighthergm/src/helper.h:
--------------------------------------------------------------------------------
 1 | #ifndef __HELPER__
 2 | #define __HELPER__
 3 | 
 4 | arma::vec decimal_to_binary_vector(
 5 |     int decimal,
 6 |     int vec_length);
 7 | 
 8 | arma::mat compute_sumTaus(
 9 |     int numOfVertices,
10 |     int numOfClasses,
11 |     const arma::mat& tau,
12 |     int verbose = 0);
13 | 
14 | void normalizeTau(
15 |     arma::mat& tau,
16 |     double minValue);
17 | 
18 | arma::rowvec sumDoubleMatrixByRow(
19 |     const arma::mat& matrix);
20 | 
21 | #endif // __HELPER__
22 | 


--------------------------------------------------------------------------------
/lighthergm/lighthergm.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 | 
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 | 


--------------------------------------------------------------------------------
/lighthergm/man/to_degree_dist_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{to_degree_dist_df}
 4 | \alias{to_degree_dist_df}
 5 | \title{Extracts the degree distribution from a network and returns it as a data frame.}
 6 | \usage{
 7 | to_degree_dist_df(net)
 8 | }
 9 | \arguments{
10 | \item{net}{a statnet network object}
11 | }
12 | \value{
13 | a data frame
14 | }
15 | \description{
16 | Extracts the degree distribution from a network and returns it as a data frame.
17 | }
18 | 


--------------------------------------------------------------------------------
/lighthergm/man/to_edgewise_shared_partners_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{to_edgewise_shared_partners_df}
 4 | \alias{to_edgewise_shared_partners_df}
 5 | \title{Extracts the edgewise shared partners distribution (undirected).}
 6 | \usage{
 7 | to_edgewise_shared_partners_df(net)
 8 | }
 9 | \arguments{
10 | \item{net}{a statnet network object}
11 | }
12 | \value{
13 | a data frame
14 | }
15 | \description{
16 | Extracts the edgewise shared partners distribution (undirected).
17 | }
18 | 


--------------------------------------------------------------------------------
/lighthergm/man/to_geodesic_dist_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{to_geodesic_dist_df}
 4 | \alias{to_geodesic_dist_df}
 5 | \title{Extracts the geodesic distance distribution from a network and returns it as a dataframe.}
 6 | \usage{
 7 | to_geodesic_dist_df(net)
 8 | }
 9 | \arguments{
10 | \item{net}{a statnet network object}
11 | }
12 | \value{
13 | a data frame
14 | }
15 | \description{
16 | Extracts the geodesic distance distribution from a network and returns it as a dataframe.
17 | }
18 | 


--------------------------------------------------------------------------------
/lighthergm/man/spec_clust_sparse.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EM_wrapper.R
 3 | \name{spec_clust_sparse}
 4 | \alias{spec_clust_sparse}
 5 | \title{function for spectral clustering}
 6 | \usage{
 7 | spec_clust_sparse(network, n_clusters, eigenvectors_fn)
 8 | }
 9 | \arguments{
10 | \item{network}{a sparse adjacency matrix}
11 | 
12 | \item{n_clusters}{number of specified clusters}
13 | 
14 | \item{eigenvectors_fn}{a function that performs eigenvector decomposition}
15 | }
16 | \description{
17 | function for spectral clustering
18 | }
19 | 


--------------------------------------------------------------------------------
/lighthergm/src/compute_eigenvector.cpp:
--------------------------------------------------------------------------------
 1 | // Files to look to get MM version: ReciprocityModel for basic functions,
 2 | // BinaryReciprocityModel.cpp for version written already,
 3 | // MMBinaryReciprocityModel.cpp for more eleborate version
 4 | // #define ARMA_64BIT_WORD 1;
 5 | #include <RcppArmadillo.h>
 6 | // [[Rcpp::depends(RcppArmadillo)]]
 7 | 
 8 | // [[Rcpp::export]]
 9 | arma::mat eigenvectors_sparse(
10 |     const arma::sp_mat& X,
11 |     int n_vec) {
12 |   arma::vec eigval;
13 |   arma::mat eigvec;
14 | 
15 |   arma::eigs_sym(eigval, eigvec, X, n_vec);
16 |   return(eigvec);
17 | }
18 | 


--------------------------------------------------------------------------------
/lighthergm/R/data.R:
--------------------------------------------------------------------------------
 1 | #' A toy network to play `lighthergm` with.
 2 | #'
 3 | #' This network has a clear cluster structure.
 4 | #' The number of clusters is four, and which cluster each node belongs to is defined in the variable "block".
 5 | #'
 6 | #' @format A `statnet`'s network class object. It has three nodal features.
 7 | #' \describe{
 8 | #'   \item{block}{block membership of each node}
 9 | #'   \item{x}{a covariate. It has 10 labels.}
10 | #'   \item{y}{a covariate. It has 10 labels.}
11 | #'   ...
12 | #' }
13 | #' `x` and `y` are not variables with any particular meaning.
14 | #'
15 | "toyNet"
16 | 


--------------------------------------------------------------------------------
/lighthergm/man/arrange_edgelist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{arrange_edgelist}
 4 | \alias{arrange_edgelist}
 5 | \title{Make the given edgelist consistent with the data frame that contains vertex info.}
 6 | \usage{
 7 | arrange_edgelist(edgelist, sorted_dataframe)
 8 | }
 9 | \arguments{
10 | \item{edgelist}{an edgelist to be arranged}
11 | 
12 | \item{sorted_dataframe}{a data frame sorted by \code{sort_block_membership}}
13 | }
14 | \description{
15 | Make the given edgelist consistent with the data frame that contains vertex info.
16 | }
17 | 


--------------------------------------------------------------------------------
/lighthergm/man/estimate_between_param.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_between_param.R
 3 | \name{estimate_between_param}
 4 | \alias{estimate_between_param}
 5 | \title{Estimate between-block parameters by logit}
 6 | \usage{
 7 | estimate_between_param(formula, network, block)
 8 | }
 9 | \arguments{
10 | \item{formula}{formula for estimating between-block parameters}
11 | 
12 | \item{network}{network object}
13 | 
14 | \item{block}{a vector that represents which node belongs to which node}
15 | }
16 | \description{
17 | Estimate between-block parameters by logit
18 | }
19 | 


--------------------------------------------------------------------------------
/lighthergm/man/extract_covariate_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{extract_covariate_names}
 4 | \alias{extract_covariate_names}
 5 | \title{Extract Covariate Names
 6 | Extracts the names of covariates used in the formula}
 7 | \usage{
 8 | extract_covariate_names(formula_for_simulation)
 9 | }
10 | \arguments{
11 | \item{formula_for_simulation}{the formula to check}
12 | }
13 | \value{
14 | A list of covariate names (can be empty)
15 | }
16 | \description{
17 | Extract Covariate Names
18 | Extracts the names of covariates used in the formula
19 | }
20 | 


--------------------------------------------------------------------------------
/lighthergm/man/get_list_sparse_feature_adjmat.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_list_sparse_feature_adjmat.R
 3 | \name{get_list_sparse_feature_adjmat}
 4 | \alias{get_list_sparse_feature_adjmat}
 5 | \title{Get a list of sparse feature adjacency matrix from a formula}
 6 | \usage{
 7 | get_list_sparse_feature_adjmat(network, formula)
 8 | }
 9 | \arguments{
10 | \item{network}{a network object from which nodal covariates are extracted.}
11 | 
12 | \item{formula}{a network model to be considered}
13 | }
14 | \description{
15 | Get a list of sparse feature adjacency matrix from a formula
16 | }
17 | 


--------------------------------------------------------------------------------
/lighthergm/man/get_between_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{get_between_stats}
 4 | \alias{get_between_stats}
 5 | \title{Converts a list of edgelists into a data frame of network statistics}
 6 | \usage{
 7 | get_between_stats(edgelists, between_formula)
 8 | }
 9 | \arguments{
10 | \item{edgelists}{the list of edgelists}
11 | 
12 | \item{between_formula}{the formula for the between connections}
13 | }
14 | \value{
15 | a data frame of sufficient network statistics
16 | }
17 | \description{
18 | Converts a list of edgelists into a data frame of network statistics
19 | }
20 | 


--------------------------------------------------------------------------------
/lighthergm/man/edgelist_to_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{edgelist_to_stats}
 4 | \alias{edgelist_to_stats}
 5 | \title{Converts an edgelist into a matrix of sufficient network statistics}
 6 | \usage{
 7 | edgelist_to_stats(net, edgelist, between_formula)
 8 | }
 9 | \arguments{
10 | \item{net}{the net to extract the covariates from}
11 | 
12 | \item{edgelist}{the edgelist}
13 | 
14 | \item{between_formula}{the formula for the between connections}
15 | }
16 | \value{
17 | a matrix of sufficient network statistics
18 | }
19 | \description{
20 | Converts an edgelist into a matrix of sufficient network statistics
21 | }
22 | 


--------------------------------------------------------------------------------
/lighthergm/man/separate_formulas.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{separate_formulas}
 4 | \alias{separate_formulas}
 5 | \title{Separates a formula into its between and within components. The between component excludes
 6 | terms which introduce dyadic dependence.}
 7 | \usage{
 8 | separate_formulas(target_formula)
 9 | }
10 | \arguments{
11 | \item{target_formula}{a target formula}
12 | }
13 | \value{
14 | a list containing the between and within formulas
15 | }
16 | \description{
17 | Separates a formula into its between and within components. The between component excludes
18 | terms which introduce dyadic dependence.
19 | }
20 | 


--------------------------------------------------------------------------------
/lighthergm/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(compute_multiplied_feature_matrices)
 4 | export(compute_yule_coef)
 5 | export(estimate_between_param)
 6 | export(estimate_within_params)
 7 | export(get_list_sparse_feature_adjmat)
 8 | export(gof_lighthergm)
 9 | export(hergm)
10 | export(simulate_hergm)
11 | export(simulate_hergm_within)
12 | importFrom(Rcpp,sourceCpp)
13 | importFrom(ergm,ergm)
14 | importFrom(ergm,ergm.getnetwork)
15 | importFrom(foreach,"%do%")
16 | importFrom(foreach,"%dopar%")
17 | importFrom(foreach,foreach)
18 | importFrom(magrittr,"%<>%")
19 | importFrom(magrittr,"%>%")
20 | importFrom(parallel,mclapply)
21 | importFrom(rlang,"%||%")
22 | useDynLib(lighthergm)
23 | 


--------------------------------------------------------------------------------
/lighthergm/man/compute_multiplied_feature_matrices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_list_sparse_feature_adjmat.R
 3 | \name{compute_multiplied_feature_matrices}
 4 | \alias{compute_multiplied_feature_matrices}
 5 | \title{Get a list of sparse feature adjacency matrix from a formula}
 6 | \usage{
 7 | compute_multiplied_feature_matrices(net, list_feature_matrices)
 8 | }
 9 | \arguments{
10 | \item{net}{a network object from which nodal covariates are extracted.}
11 | 
12 | \item{list_feature_matrices}{a list of feature adjacency matrices generated by \code{get_list_sparse_feature_adjmat()}.}
13 | }
14 | \description{
15 | Get a list of sparse feature adjacency matrix from a formula
16 | }
17 | 


--------------------------------------------------------------------------------
/lighthergm/man/swap_formula_network.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{swap_formula_network}
 4 | \alias{swap_formula_network}
 5 | \title{Swaps the network on the lhs of a formula for a new one with the given environment}
 6 | \usage{
 7 | swap_formula_network(new_net, net_formula, env)
 8 | }
 9 | \arguments{
10 | \item{new_net}{A network object to be inserted into the lhs of the formula}
11 | 
12 | \item{net_formula}{The target formula}
13 | 
14 | \item{env}{The environment to assign to the formula}
15 | }
16 | \value{
17 | A new formula with the lhs swapped
18 | }
19 | \description{
20 | Swaps the network on the lhs of a formula for a new one with the given environment
21 | }
22 | 


--------------------------------------------------------------------------------
/lighthergm/man/toyNet.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data.R
 3 | \docType{data}
 4 | \name{toyNet}
 5 | \alias{toyNet}
 6 | \title{A toy network to play \code{lighthergm} with.}
 7 | \format{
 8 | A \code{statnet}'s network class object. It has three nodal features.
 9 | \describe{
10 | \item{block}{block membership of each node}
11 | \item{x}{a covariate. It has 10 labels.}
12 | \item{y}{a covariate. It has 10 labels.}
13 | ...
14 | }
15 | \code{x} and \code{y} are not variables with any particular meaning.
16 | }
17 | \usage{
18 | toyNet
19 | }
20 | \description{
21 | This network has a clear cluster structure.
22 | The number of clusters is four, and which cluster each node belongs to is defined in the variable "block".
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # User-specific files
 9 | .Ruserdata
10 | 
11 | # Example code in package build process
12 | *-Ex.R
13 | 
14 | # Output files from R CMD build
15 | /*.tar.gz
16 | 
17 | # Output files from R CMD check
18 | /*.Rcheck/
19 | 
20 | # RStudio files
21 | .Rproj.user/
22 | 
23 | # produced vignettes
24 | vignettes/*.html
25 | vignettes/*.pdf
26 | 
27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
28 | .httr-oauth
29 | 
30 | # knitr and R markdown default cache directories
31 | *_cache/
32 | /cache/
33 | 
34 | # Temporary files created by R markdown
35 | *.utf8.md
36 | *.knit.md
37 | 
38 | # R Environment Variables
39 | .Renviron
40 | .Rproj.user
41 | 
42 | # Environment variables
43 | .env
44 | 


--------------------------------------------------------------------------------
/lighthergm/man/generate_network_for_output.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{generate_network_for_output}
 4 | \alias{generate_network_for_output}
 5 | \title{Create a final network}
 6 | \usage{
 7 | generate_network_for_output(
 8 |   formula_for_simulation_within,
 9 |   formula_for_simulation_between,
10 |   sorted_dataframe,
11 |   edgelist
12 | )
13 | }
14 | \arguments{
15 | \item{formula_for_simulation_within}{formula for simulating a within network}
16 | 
17 | \item{formula_for_simulation_between}{formula for simulating a between network}
18 | 
19 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}}
20 | 
21 | \item{edgelist}{an edgelist that contain both within- and between-block edges without duplication}
22 | }
23 | \description{
24 | Create a final network
25 | }
26 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # If you would like to utilize parallelized computation, the R version should be less than 4.0.0.
 2 | # There seems some problem with OpenMP for R >= 4.0.0 when you use macOS.
 3 | # https://github.com/RcppCore/RcppArmadillo/issues/290
 4 | FROM rocker/verse:3.6.3
 5 | 
 6 | # Install Python for installing Python's infomap library.
 7 | RUN apt-get update && apt-get install -y python3-dev python3-pip
 8 | 
 9 | # Install infomap by pip3.
10 | RUN pip3 install -U infomap
11 | 
12 | # Make it possible to install latest packages
13 | # https://www.rocker-project.org/use/extending/
14 | RUN echo "options(repos = c(REPO_NAME = 'https://packagemanager.rstudio.com/all/__linux__/centos7/latest'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site
15 | 
16 | # Install lighthergm.
17 | COPY ./lighthergm /lighthergm
18 | RUN Rscript -e "devtools::install('lighthergm')"
19 | 


--------------------------------------------------------------------------------
/lighthergm/man/sort_block_membership.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{sort_block_membership}
 4 | \alias{sort_block_membership}
 5 | \title{Create a data frame from block memberships, vertex ids, and vertex covariates, sorted by block ids.}
 6 | \usage{
 7 | sort_block_membership(
 8 |   data_for_simulation,
 9 |   colname_vertex_id,
10 |   colname_block_membership
11 | )
12 | }
13 | \arguments{
14 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.}
15 | 
16 | \item{colname_vertex_id}{a column name in the data frame for the vertex id}
17 | 
18 | \item{colname_block_membership}{a column name in the data frame for the block membership.}
19 | }
20 | \description{
21 | Create a data frame from block memberships, vertex ids, and vertex covariates, sorted by block ids.
22 | }
23 | 


--------------------------------------------------------------------------------
/lighthergm/man/combine_within_between_edges.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{combine_within_between_edges}
 4 | \alias{combine_within_between_edges}
 5 | \title{Combine within- and between-block edges while removing duplicated links.}
 6 | \usage{
 7 | combine_within_between_edges(
 8 |   edgelist_within,
 9 |   edgelist_between,
10 |   use_fast_between_simulation
11 | )
12 | }
13 | \arguments{
14 | \item{edgelist_within}{an within-block edgelist}
15 | 
16 | \item{edgelist_between}{a between-block edgelist (Potentially, there are edges that also appear in the within-block edgelist)}
17 | 
18 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network. If the network is very large, you should consider using this option.}
19 | }
20 | \description{
21 | Combine within- and between-block edges while removing duplicated links.
22 | }
23 | 


--------------------------------------------------------------------------------
/lighthergm/man/generate_seed_network.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{generate_seed_network}
 4 | \alias{generate_seed_network}
 5 | \title{Create a seed network from which a network will be simulated.}
 6 | \usage{
 7 | generate_seed_network(
 8 |   formula_for_simulation,
 9 |   sorted_dataframe,
10 |   edgelist = NULL,
11 |   directed
12 | )
13 | }
14 | \arguments{
15 | \item{formula_for_simulation}{formula for simulating a network}
16 | 
17 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}}
18 | 
19 | \item{edgelist}{an edgelist from which a seed network is generated. The class of the edgelist should be "edgelist", i.e. it should contain as attributes the number of nodes and vertex names.}
20 | 
21 | \item{directed}{a boolean of whether the network is directed.}
22 | }
23 | \description{
24 | Create a seed network from which a network will be simulated.
25 | }
26 | 


--------------------------------------------------------------------------------
/lighthergm/man/get_gof_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{get_gof_stats}
 4 | \alias{get_gof_stats}
 5 | \title{Gets the GOF stats for a formula
 6 | If a network is passed, that one is used to obtain the network statistics,
 7 | otherwise the netwok in the formula is used.}
 8 | \usage{
 9 | get_gof_stats(
10 |   sim_formula,
11 |   net = NULL,
12 |   sim_number = NULL,
13 |   compute_geodesic_distance = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{sim_formula}{a formula}
18 | 
19 | \item{net}{a statnet network object}
20 | 
21 | \item{sim_number}{the ID of the current simulation}
22 | 
23 | \item{compute_geodesic_distance}{if TRUE, includes the geodesic distance in the result object}
24 | }
25 | \value{
26 | a list with the goodness-of-fit statistics
27 | }
28 | \description{
29 | Gets the GOF stats for a formula
30 | If a network is passed, that one is used to obtain the network statistics,
31 | otherwise the netwok in the formula is used.
32 | }
33 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_linear_term.R:
--------------------------------------------------------------------------------
 1 | test_that("linear term computation without features works", {
 2 |   # Number of nodes
 3 |   N <- 12
 4 |   # Number of clusters
 5 |   K <- 3
 6 | 
 7 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 8 |   tau <-
 9 |     matrix(c(
10 |       0.2, 0.5, 0.3,
11 |       0.4, 0.4, 0.2,
12 |       0.1, 0.4, 0.5,
13 |       0.4, 0.4, 0.2,
14 |       0.1, 0.1, 0.8,
15 |       0.05, 0.05, 0.9,
16 |       0.8, 0.1, 0.1,
17 |       0.3, 0.4, 0.3,
18 |       0.1, 0.8, 0.1,
19 |       0.5, 0.4, 0.1,
20 |       0.3, 0.3, 0.4,
21 |       0.8, 0.1, 0.1
22 |     ),
23 |     nrow = K, ncol = N
24 |     )
25 |   tau <- t(tau)
26 | 
27 |   # Compute gamma (parameter of multinomial distribution)
28 |   alpha <- colSums(tau)
29 | 
30 |   # Compute the true linear term in a naive way
31 |   s_true <- matrix(0, nrow = N, ncol = K)
32 | 
33 |   for (i in 1:N) {
34 |     for (k in 1:K) {
35 |       s_ik <- 1 + log(alpha[k]) - log(tau[i, k])
36 |       s_true[i, k] <- s_true[i, k] + s_ik
37 |     }
38 |   }
39 | 
40 |   s <- compute_linear_term(N, K, alpha, tau, 0)
41 |   expect_equal(s, s_true, check.attributes = FALSE, tolerance = 1e-10)
42 | })
43 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-get_induced_subgraph.R:
--------------------------------------------------------------------------------
 1 | test_that("getting a subgraph from a network object works", {
 2 |   # Which node belongs to which block
 3 |   df <-
 4 |     tibble::tibble(
 5 |       block = c(1, 1, 1, 1, 2, 2, 2, 2),
 6 |       node_id = c("A", "B", "C", "D", "E", "F", "G", "H")
 7 |     )
 8 | 
 9 |   # Edgelist
10 |   edgelist <-
11 |     tibble::tibble(
12 |       source_id = c("A", "A", "A", "B"),
13 |       target_id = c("B", "C", "E", "F")
14 |     )
15 | 
16 |   # When not all nodes are isolated in a block
17 |   subgraph1 <- get_induced_subgraph(block_structure = df, edgelist = edgelist, searched_block = 1)
18 |   adj_true <- matrix(c(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), nrow = 4, ncol = 4)
19 |   expect_equal(c("A", "B", "C", "D"), network::network.vertex.names(subgraph1))
20 |   expect_equal(adj_true, network::as.matrix.network.adjacency(subgraph1), check.attributes = FALSE)
21 | 
22 |   # When all nodes are isolated in a block
23 |   subgraph2 <- get_induced_subgraph(block_structure = df, edgelist = edgelist, searched_block = 2)
24 |   adj_true <- matrix(0, nrow = 4, ncol = 4)
25 |   expect_equal(c("E", "F", "G", "H"), network::network.vertex.names(subgraph2))
26 |   expect_equal(adj_true, network::as.matrix.network.adjacency(subgraph2), check.attributes = FALSE)
27 | })
28 | 


--------------------------------------------------------------------------------
/lighthergm/man/estimate_within_params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/estimate_within_params.R
 3 | \name{estimate_within_params}
 4 | \alias{estimate_within_params}
 5 | \title{Estimate a within-block network model.}
 6 | \usage{
 7 | estimate_within_params(
 8 |   formula,
 9 |   network,
10 |   z_memb,
11 |   number_cores = 1,
12 |   verbose = 1,
13 |   seeds = NULL,
14 |   method_second_step = c("MPLE", "MLE"),
15 |   offset_coef = NULL,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{formula}{a within network formula}
21 | 
22 | \item{network}{a network object}
23 | 
24 | \item{z_memb}{block memberships for each node}
25 | 
26 | \item{number_cores}{The number of CPU cores to use.}
27 | 
28 | \item{verbose}{A logical or an integer: if this is TRUE/1,
29 | the program will print out additional information about the progress of estimation and simulation.}
30 | 
31 | \item{seeds}{seed value (integer) for the random number generator}
32 | 
33 | \item{method_second_step}{If "MPLE" (the default), then the maximum pseudolikelihood estimator is returned.
34 | If "MLE", then an approximate maximum likelihood estimator is returned.}
35 | 
36 | \item{offset_coef}{a vector of model parameters to be fixed when estimation.(i.e., not estimated).}
37 | 
38 | \item{...}{Additional arguments, to be passed to lower-level functions}
39 | }
40 | \description{
41 | Estimate a within-block network model.
42 | }
43 | 


--------------------------------------------------------------------------------
/lighthergm/man/draw_within_block_connection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{draw_within_block_connection}
 4 | \alias{draw_within_block_connection}
 5 | \title{Draw within-block connections}
 6 | \usage{
 7 | draw_within_block_connection(
 8 |   seed_network,
 9 |   formula_for_simulation,
10 |   coef_within_block,
11 |   ergm_control,
12 |   output,
13 |   seed,
14 |   n_sim,
15 |   verbose,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{seed_network}{a seed network from which a network will be simulated.}
21 | 
22 | \item{formula_for_simulation}{formula for simulating a network}
23 | 
24 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.}
25 | 
26 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation}
27 | 
28 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.}
29 | 
30 | \item{seed}{seed value (integer) for the random number generator.}
31 | 
32 | \item{n_sim}{Number of networks to be randomly drawn from the given distribution on the set of all networks.}
33 | 
34 | \item{verbose}{If this is TRUE/1, the program will print out additionalinformation about the progress of simulation.}
35 | 
36 | \item{...}{Additional arguments, to be passed to lower-level functions}
37 | }
38 | \description{
39 | Draw within-block connections
40 | }
41 | 


--------------------------------------------------------------------------------
/lighthergm/src/compute_yule_coef.cpp:
--------------------------------------------------------------------------------
 1 | #include <RcppArmadillo.h>
 2 | // [[Rcpp::depends(RcppArmadillo)]]
 3 | 
 4 | //' Compute Yule's Φ-coefficient
 5 | //' @param z_star a true block membership
 6 | //' @param z an estimated block membership
 7 | //' @export
 8 | // [[Rcpp::export]]
 9 | double compute_yule_coef(
10 |     const arma::vec& z_star,
11 |     const arma::vec& z) {
12 | 
13 |   double n_00 = 0;
14 |   double n_01 = 0;
15 |   double n_10 = 0;
16 |   double n_11 = 0;
17 | 
18 |   int numOfVertices = z.n_elem;
19 | 
20 |   // Remove missing values from z_star and z
21 |   arma::vec z_star_new = z_star;
22 |   arma::vec z_new = z;
23 |   for (int i = 0; i < numOfVertices; i++) {
24 |     if (std::isnan(z_star_new(i))) {
25 |       z_star_new[i] = -100;
26 |     }
27 |     if (std::isnan(z_new(i))) {
28 |       z_new[i] = -1;
29 |     }
30 |   }
31 | 
32 |   for (int i = 0; i < numOfVertices; i++) {
33 |     for (int j = i+1; j < numOfVertices; j++) {
34 |       if (z_star_new(i) == z_star_new(j)) {
35 |         if (z_new(i) == z_new(j)) {
36 |           n_11 += 1;
37 |         } else {
38 |           n_10 += 1;
39 |         }
40 |       } else {
41 |         if (z_new(i) == z_new(j)) {
42 |           n_01 += 1;
43 |         } else {
44 |           n_00 += 1;
45 |         }
46 |       }
47 |     }
48 |   }
49 | 
50 |   // Compute Yule's φ-coefficnent
51 |   double num = (n_00 * n_11) - (n_01 * n_10);
52 |   double denom = sqrt((n_00 + n_01) * (n_10 + n_11) * (n_00 + n_10) * (n_01 + n_11));
53 |   double phi = num / denom;
54 | 
55 |   // Return the output
56 |   return phi;
57 | }
58 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_yule_coef.R:
--------------------------------------------------------------------------------
 1 | test_that("computing Yule's phi-coefficient works", {
 2 |   # Number of nodes
 3 |   N <- 1000
 4 |   # Number of clusters
 5 |   K <- 100
 6 | 
 7 |   # An extreme case
 8 |   z1 <- rep(1:K, each = N / K)
 9 |   z_star <- z1
10 | 
11 |   # Check if it equals one.
12 |   expect_equal(compute_yule_coef(z_star, z1), 1)
13 | 
14 |   # Another case
15 |   z2 <- sample(1:K, size = N, replace = TRUE)
16 | 
17 |   # Compute Yule's phi-coefficient naively.
18 |   n00 <- 0
19 |   n01 <- 0
20 |   n10 <- 0
21 |   n11 <- 0
22 | 
23 |   for (i in 1:(N - 1)) {
24 |     for (j in (i + 1):N) {
25 |       if (z_star[i] == z_star[j] & z2[i] == z2[j]) {
26 |         n11 <- n11 + 1
27 |       }
28 |       if (z_star[i] == z_star[j] & z2[i] != z2[j]) {
29 |         n10 <- n10 + 1
30 |       }
31 |       if (z_star[i] != z_star[j] & z2[i] == z2[j]) {
32 |         n01 <- n01 + 1
33 |       }
34 |       if (z_star[i] != z_star[j] & z2[i] != z2[j]) {
35 |         n00 <- n00 + 1
36 |       }
37 |     }
38 |   }
39 | 
40 |   phi_naive <- (n00 * n11 - n01 * n10) / sqrt((n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11))
41 | 
42 |   # Check if it works
43 |   expect_equal(compute_yule_coef(z_star, z2), phi_naive, tolerance = 1e-10)
44 | })
45 | 
46 | 
47 | test_that("Removing missing values works", {
48 |   # Number of nodes
49 |   N <- 1000
50 |   # Number of clusters
51 |   K <- 100
52 | 
53 |   # An extreme case
54 |   z1 <- rep(1:K, each = N / K)
55 |   z_star <- z1
56 |   z1[1:4] <- NA
57 | 
58 |   # Check if it equals one.
59 |   compute_yule_coef(z_star, z1)
60 |   expect_silent(compute_yule_coef(z_star, z1))
61 | 
62 |   z_star[50:56] <- NA
63 |   expect_silent(compute_yule_coef(z_star, z1))
64 | })
65 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-get_elementwise_multiplied_matrices.R:
--------------------------------------------------------------------------------
 1 | test_that("element-wise matrix multiplication works", {
 2 |   # Prepare matrices
 3 |   N <- 50
 4 |   G <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N)
 5 |   diag(G) <- 0
 6 |   G <- as(G, "sparseMatrix")
 7 |   G <- Matrix::forceSymmetric(G)
 8 | 
 9 |   S <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N)
10 |   diag(S) <- 0
11 |   S <- as(S, "sparseMatrix")
12 |   S <- Matrix::forceSymmetric(S)
13 | 
14 |   V <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N)
15 |   diag(V) <- 0
16 |   V <- as(V, "sparseMatrix")
17 |   V <- Matrix::forceSymmetric(V)
18 | 
19 |   # Prepare true results
20 |   output_true <- list()
21 |   # The first element of the list is filled with - (S + V) + (S % V), which will be used to compute pi_d0x0.
22 |   output_true[[1]] <- -(S + V) + (S * V)
23 |   # N = 1: (1, 0, 0)
24 |   output_true[[2]] <- Matrix::drop0(G * (1 - S) * (1 - V))
25 |   # N = 2: (0, 1, 0)
26 |   output_true[[3]] <- Matrix::drop0((1 - G) * S * (1 - V))
27 |   # N = 3: (1, 1, 0)
28 |   output_true[[4]] <- Matrix::drop0(G * S * (1 - V))
29 |   # N = 4: (0, 0, 1)
30 |   output_true[[5]] <- Matrix::drop0((1 - G) * (1 - S) * V)
31 |   # N = 5: (1, 0, 1)
32 |   output_true[[6]] <- Matrix::drop0(G * (1 - S) * V)
33 |   # N = 6: (0, 1, 1)
34 |   output_true[[7]] <- Matrix::drop0((1 - G) * S * V)
35 |   # N = 7: (1, 1, 1)
36 |   output_true[[8]] <- Matrix::drop0(G * S * V)
37 | 
38 |   # Compute element-wise mutiplied matrices
39 |   output <- get_elementwise_multiplied_matrices(G, list(S, V))
40 | 
41 |   for (i in 1:8) {
42 |     expect_equal(output[[i]], output_true[[i]], check.attributes = FALSE, )
43 |   }
44 | })
45 | 


--------------------------------------------------------------------------------
/lighthergm/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: lighthergm
 2 | Title: Fit, Simulate, and Diagnose Hierarchical Exponential-Family Models for Networks in A Scalable Way
 3 | Version: 1.1.0
 4 | Authors@R: c(
 5 |     person(given = "Shota", family = "Komatsu", role = c("aut", "cre"), email = "first.last@example.com"),
 6 |     person(given = "Juan Nelson", family = "Martínez Dahbura", role = c("aut"), email = "first.last@example.com"),
 7 |     person(given = "Takanori", family = "Nishida", role = c("aut"), email = "first.last@example.com"),
 8 |     person(given = "Angelo", family = "Mele", role = c("aut"), email = "first.last@example.com"))
 9 | Description: `lighthergm` is an R library for estimating hierarchical exponential random graph models (HERGMs) efficiently on large networks. It also contains tools for simulating networks with local dependence and for assessing the goodness-of-fit of the estimates.
10 | License: GPL-3
11 | Encoding: UTF-8
12 | LazyData: true
13 | Roxygen: list(markdown = TRUE)
14 | RoxygenNote: 7.1.1
15 | Depends:
16 |     R (>= 3.5.0)
17 | LinkingTo: 
18 |     Rcpp,
19 |     RcppArmadillo (>= 0.10.5)
20 | Imports: 
21 |     Rcpp,
22 |     ergm (>= 3.11.0),
23 |     network (>= 1.16.0),
24 |     Matrix,
25 |     stringr,
26 |     intergraph,
27 |     igraph,
28 |     parallel,
29 |     magrittr,
30 |     purrr,
31 |     dplyr,
32 |     tibble,
33 |     glue,
34 |     readr,
35 |     foreach,
36 |     rlang,
37 |     reticulate,
38 |     tidyr,
39 |     statnet.common,
40 |     doParallel,
41 |     memoise,
42 |     cachem
43 | Suggests: 
44 |     rmarkdown,
45 |     knitr,
46 |     testthat,
47 |     ggplot2,
48 |     GGally,
49 |     sna
50 | Config/reticulate:
51 |   list(
52 |     packages = list(
53 |       list(package = "infomap", version = "1.3.0", pip = TRUE)
54 |     )
55 |   )
56 | VignetteBuilder: knitr
57 | 


--------------------------------------------------------------------------------
/lighthergm/man/simulate_hergm_within.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{simulate_hergm_within}
 4 | \alias{simulate_hergm_within}
 5 | \title{Obtains network statistics based on MCMC simulations including only the
 6 | within-blocks connections.}
 7 | \usage{
 8 | simulate_hergm_within(
 9 |   formula_for_simulation,
10 |   data_for_simulation,
11 |   colname_vertex_id,
12 |   colname_block_membership,
13 |   coef_within_block,
14 |   seed_edgelist = NULL,
15 |   output = "stats",
16 |   ergm_control = ergm::control.simulate.formula(),
17 |   seed = NULL,
18 |   n_sim = 1,
19 |   verbose = 0,
20 |   ...
21 | )
22 | }
23 | \arguments{
24 | \item{formula_for_simulation}{formula for simulating a network}
25 | 
26 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.}
27 | 
28 | \item{colname_vertex_id}{a column name in the data frame for the vertex ids}
29 | 
30 | \item{colname_block_membership}{a column name in the data frame for the block membership}
31 | 
32 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.}
33 | 
34 | \item{seed_edgelist}{an edgelist used for creating a seed network. It should have the "edgelist" class}
35 | 
36 | \item{output}{The desired output of the simulation (any of \code{stats}, \code{network} or \code{edgelist}). Defaults to \code{stats}}
37 | 
38 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation}
39 | 
40 | \item{seed}{seed value (integer) for network simulation.}
41 | 
42 | \item{n_sim}{number of networks generated}
43 | 
44 | \item{verbose}{If this is TRUE/1, the program will print out additional information about the progress of simulation.}
45 | 
46 | \item{...}{arguments to be passed to low level functions}
47 | }
48 | \description{
49 | Obtains network statistics based on MCMC simulations including only the
50 | within-blocks connections.
51 | }
52 | 


--------------------------------------------------------------------------------
/lighthergm/R/get_list_sparse_feature_adjmat.R:
--------------------------------------------------------------------------------
 1 | #' Get a list of sparse feature adjacency matrix from a formula
 2 | #' @importFrom foreach foreach %do%
 3 | #' @param network a network object from which nodal covariates are extracted.
 4 | #' @param formula a network model to be considered
 5 | #' @export
 6 | get_list_sparse_feature_adjmat <- function(network, formula) {
 7 |   # Get variable names from formula (extract strings sandwiched by double quotes)
 8 |   list_varname <- as.character(formula)[3]
 9 |   list_varname <- unlist(stringr::str_extract_all(string = list_varname, pattern = '"[^"]*"'))
10 |   list_varname <- stringr::str_remove_all(string = list_varname, pattern = '\"')
11 | 
12 |   # Extract variable from a network object
13 |   list_var <-
14 |     foreach(i = 1:length(list_varname)) %do% {
15 |       feature <- network::get.vertex.attribute(x = network, list_varname[i])
16 |       return(feature)
17 |     }
18 | 
19 |   # Create a list of sparse feature adjacency matrices
20 |   list_sparse_feature_adjmat <-
21 |     foreach(i = 1:length(list_var)) %do% {
22 |       if (is.numeric(list_var[[i]])) {
23 |         output <- get_sparse_feature_adjmat(list_var[[i]])
24 |       } else {
25 |         output <- get_sparse_feature_adjmat_from_string(list_var[[i]])
26 |       }
27 |     }
28 | 
29 |   # Attach variable names to each matrix
30 |   names(list_sparse_feature_adjmat) <- list_varname
31 | 
32 |   # Return the output
33 |   return(list_sparse_feature_adjmat)
34 | }
35 | 
36 | #' Get a list of sparse feature adjacency matrix from a formula
37 | #' @param net a network object from which nodal covariates are extracted.
38 | #' @param list_feature_matrices a list of feature adjacency matrices generated by `get_list_sparse_feature_adjmat()`.
39 | #' @export
40 | compute_multiplied_feature_matrices <- function(net, list_feature_matrices) {
41 |   adj <- network::as.edgelist(net)
42 |   N <- net$gal$n
43 |   adj <- Matrix::sparseMatrix(i = adj[, 1], j = adj[, 2], x = 1, dims = c(N, N), symmetric = TRUE)
44 |   get_elementwise_multiplied_matrices(adj, list_feature_matrices)
45 | }
46 | 


--------------------------------------------------------------------------------
/lighthergm/man/gof_lighthergm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gof_lighthergm.R
 3 | \name{gof_lighthergm}
 4 | \alias{gof_lighthergm}
 5 | \title{Goodness of fit statistics for HERGM}
 6 | \usage{
 7 | gof_lighthergm(
 8 |   net,
 9 |   data_for_simulation,
10 |   list_feature_matrices,
11 |   colname_vertex_id,
12 |   colname_block_membership,
13 |   lighthergm_results,
14 |   type = "full",
15 |   ergm_control = ergm::control.simulate.formula(),
16 |   seed = NULL,
17 |   n_sim = 1,
18 |   prevent_duplicate = TRUE,
19 |   compute_geodesic_distance = FALSE,
20 |   start_from_observed = FALSE,
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{net}{the target network}
26 | 
27 | \item{data_for_simulation}{a dataframe with node-level covariates}
28 | 
29 | \item{list_feature_matrices}{a list of feature adjacency matrices}
30 | 
31 | \item{colname_vertex_id}{the name of the column that contains the node id}
32 | 
33 | \item{colname_block_membership}{the name o the column that contains the block affiliation of each node}
34 | 
35 | \item{lighthergm_results}{a lighthergm results object}
36 | 
37 | \item{type}{the type of evaluation to perform. Can take the values \code{full} or \code{within}. \code{full} performs the evaluation on all edges, and \code{within} only considers within-block edges.}
38 | 
39 | \item{ergm_control}{MCMC parameters as an instance of ergm.control}
40 | 
41 | \item{seed}{the seed to be passed to simulate_hergm}
42 | 
43 | \item{n_sim}{the number of simulations to employ for calculating goodness of fit}
44 | 
45 | \item{prevent_duplicate}{see \code{simulate_hergm}}
46 | 
47 | \item{compute_geodesic_distance}{if \code{TRUE}, the distribution of geodesic distances is also computed (considerably increases computation time on large networks. \code{FALSE} by default.)}
48 | 
49 | \item{start_from_observed}{if \code{TRUE}, MCMC uses the observed network as a starting point}
50 | 
51 | \item{...}{Additional arguments, to be passed to lower-level functions}
52 | }
53 | \description{
54 | Goodness of fit statistics for HERGM
55 | }
56 | 


--------------------------------------------------------------------------------
/lighthergm/src/simulate_between_network.cpp:
--------------------------------------------------------------------------------
 1 | #ifdef _OPENMP
 2 |     #include <omp.h>
 3 | #else
 4 |     #define omp_get_max_threads() 0
 5 | #endif
 6 | #include <RcppArmadillo.h>
 7 | // [[Rcpp::depends(RcppArmadillo)]]
 8 | // [[Rcpp::plugins(openmp)]]
 9 | 
10 | // Function that simulates a between-block network.
11 | // The first element of `coef_between` must be the edges parameter.
12 | // [[Rcpp::export]]
13 | arma::sp_mat simulate_between_network
14 |   (int numOfVertices,
15 |    const Rcpp::List& list_feature_adjmat,
16 |    const arma::vec& coef_between,
17 |    const arma::vec& block_membership,
18 |    bool directed
19 |    )
20 | {
21 |   // Number of covariates
22 |   int numOfCovariates = list_feature_adjmat.length();
23 |   // Initialize a sparse adjacency matrix for the between-block network
24 |   arma::sp_mat between_adjmat = arma::sp_mat(numOfVertices, numOfVertices);
25 |   // Prepare a sparse adjacency cube
26 |   arma::field<arma::sp_mat> feature_cube(numOfCovariates);
27 |   for (int p = 0; p < numOfCovariates; p++) {
28 |     feature_cube(p) = Rcpp::as<arma::sp_mat>(list_feature_adjmat[p]);
29 |   }
30 |   // Necessary for R random number generator
31 |   GetRNGstate();
32 | 
33 |   #pragma omp parallel
34 |   {
35 |     // Simulate between-block links
36 |     #pragma omp for
37 |     for (int j = 0; j < numOfVertices; j++) {
38 |       for (int i = 0; i < numOfVertices; i++) {
39 |         // Skip as many unnecessary calculations as possible in this nested loop, which makes the computation faster.
40 |         if (block_membership[i] != block_membership[j] && ((directed && i != j) || (!directed && i < j))) {
41 |           double x = unif_rand();
42 |           double u = coef_between[0];
43 |           for (int p = 0; p < numOfCovariates; p++) {
44 |             double elem = feature_cube(p)(i, j);
45 |             double elem_coef = coef_between[p+1];
46 |             u += elem_coef * elem;
47 |           }
48 |           //std::printf("x: %f, Thread: %d, Loop: (%d, %d), u: %f\n", x, omp_get_thread_num(), i, j, u);
49 |           if (u > log(x/(1-x))) {
50 |             between_adjmat(i, j) = 1;
51 |             }
52 |           }
53 |         }
54 |       }
55 |     }
56 |   // This must be called after GetRNGstate before returning to R.
57 |   PutRNGstate();
58 |   return between_adjmat;
59 | }
60 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-get_sparse_feature_adjmat_cpp.R:
--------------------------------------------------------------------------------
 1 | test_that("creating a feature adjacency matrix works", {
 2 |   library(magrittr)
 3 | 
 4 |   get_sparse_feature_adjacency_matrix <- function(feature) {
 5 |     S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(length(feature), length(feature)))
 6 |     S <- as(S, "dgCMatrix")
 7 |     for (i in 1:length(feature)) {
 8 |       for (j in 1:length(feature)) {
 9 |         if (i != j) {
10 |           if (feature[i] == feature[j]) {
11 |             S[i, j] <- 1
12 |           }
13 |         }
14 |       }
15 |     }
16 |     return(S)
17 |   }
18 | 
19 |   # Number of nodes
20 |   N <- 100
21 | 
22 |   # Features
23 |   x <- sample(x = c(1:100), size = N, replace = TRUE)
24 |   y <- sample(x = c(1:30), size = N, replace = TRUE)
25 |   z <- sample(x = c(1:50), size = N, replace = TRUE)
26 |   w <- sample(x = c(LETTERS, letters), size = N, replace = TRUE)
27 | 
28 |   # Create an edgelist
29 |   edgelist <-
30 |     tibble::tibble(tail = 1:N, head = 1:N) %>%
31 |     tidyr::expand(tail, head) %>%
32 |     dplyr::filter(tail < head) %>%
33 |     dplyr::mutate(connect = unlist(as.integer(purrr::rbernoulli(n = nrow(.), p = 0.005)))) %>%
34 |     dplyr::filter(connect == 1) %>%
35 |     dplyr::select(tail, head)
36 | 
37 |   # Create a network object
38 |   g <- network::network.initialize(n = N, directed = FALSE)
39 |   network::add.edges(x = g, tail = edgelist$tail, head = edgelist$head)
40 |   network::set.vertex.attribute(x = g, attrname = "x", value = x)
41 |   network::set.vertex.attribute(x = g, attrname = "y", value = y)
42 |   network::set.vertex.attribute(x = g, attrname = "z", value = z)
43 |   network::set.vertex.attribute(x = g, attrname = "w", value = w)
44 | 
45 |   # Create a formula
46 |   form <- g ~ edges + triangles + nodematch("x") + nodematch("y") + nodematch("z") + nodematch("w")
47 | 
48 |   # True list of feature adjacency metrices
49 |   list_adjmat_true <- list(
50 |     get_sparse_feature_adjacency_matrix(x),
51 |     get_sparse_feature_adjacency_matrix(y),
52 |     get_sparse_feature_adjacency_matrix(z),
53 |     get_sparse_feature_adjacency_matrix(w)
54 |   )
55 | 
56 |   # Create a list
57 |   list_adjmat <- get_list_sparse_feature_adjmat(network = g, formula = form)
58 | 
59 |   # Check if it works
60 |   for (i in 1:4) {
61 |     expect_equal(list_adjmat[[i]], list_adjmat_true[[i]], check.attributes = FALSE, tolerance = 1e-10)
62 |   }
63 | })
64 | 


--------------------------------------------------------------------------------
/lighthergm/R/estimate_between_param.R:
--------------------------------------------------------------------------------
 1 | #' Estimate between-block parameters by logit
 2 | #' @importFrom magrittr %>%
 3 | #' @importFrom ergm ergm
 4 | #' @param formula formula for estimating between-block parameters
 5 | #' @param network network object
 6 | #' @param block a vector that represents which node belongs to which node
 7 | #' @export
 8 | estimate_between_param <- function(formula, network, block) {
 9 |   # Create a data frame that stores node-block correspondences.
10 |   df_block <-
11 |     tibble::tibble(
12 |       intergraph_id = 1:length(block),
13 |       block = block
14 |     )
15 | 
16 |   # Create an edgelist for within-block connections
17 |   within_block_link <-
18 |     intergraph::asDF(network)$edges %>%
19 |     dplyr::rename(
20 |       tail = V1,
21 |       head = V2
22 |     ) %>%
23 |     dplyr::left_join(., df_block, by = c("tail" = "intergraph_id")) %>%
24 |     dplyr::select(tail, head, block) %>%
25 |     dplyr::rename(block_tail = block) %>%
26 |     dplyr::left_join(., df_block, by = c("head" = "intergraph_id")) %>%
27 |     dplyr::select(tail, head, block_tail, block) %>%
28 |     dplyr::rename(block_head = block) %>%
29 |     dplyr::filter(block_tail == block_head)
30 | 
31 |   # Get edge id for within-block links
32 |   within_block_link_eid <-
33 |     unlist(network::get.dyads.eids(
34 |       x = network,
35 |       tails = within_block_link$tail,
36 |       heads = within_block_link$head
37 |     ))
38 | 
39 |   # Create a network for logit estimation
40 |   g_logit <- network
41 | 
42 |   # Delete within-block links
43 |   network::delete.edges(x = g_logit, eid = within_block_link_eid)
44 | 
45 |   # Create a formula that contains only dyad-independent terms. i.e. exclude externality terms like triangle.
46 |   terms <- ergm::ergm_model(formula)$terms
47 |   varnames <-
48 |     statnet.common::list_rhs.formula(formula) %>%
49 |     as.character()
50 |   dep_terms <-
51 |     terms %>% purrr::map(function(t) {
52 |       dep <- t$dependence
53 |       is_dep <- is.null(dep) || dep
54 |     }) %>% unlist()
55 |   between_rhs <- varnames[!dep_terms]
56 |   between_formula <- as.formula(glue::glue("g_logit ~ {paste(between_rhs, collapse = '+')}"))
57 | 
58 |   # Estimate logit
59 |   between_logit <- ergm(
60 |     formula = between_formula,
61 |     estimate = "MPLE"
62 |   )
63 | 
64 |   # Remove unnecessary network objects
65 |   between_logit$newnetwork <- NULL
66 | 
67 |   # Return the output
68 |   return(between_logit)
69 | }
70 | 


--------------------------------------------------------------------------------
/lighthergm/man/draw_between_block_connection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/simulate_hergm.R
 3 | \name{draw_between_block_connection}
 4 | \alias{draw_between_block_connection}
 5 | \title{Draw between-block connections. There may be some edges that appear both in within- and between-block links.
 6 | The overlapped edges will be removed after this step.}
 7 | \usage{
 8 | draw_between_block_connection(
 9 |   formula_for_simulation,
10 |   sorted_dataframe,
11 |   coef_between_block,
12 |   seed_edgelist_between = NULL,
13 |   use_fast_between_simulation = FALSE,
14 |   list_feature_matrices = NULL,
15 |   seed = NULL,
16 |   n_sim = 1,
17 |   prevent_duplicate = TRUE,
18 |   verbose = 0,
19 |   ergm_control = ergm::control.simulate.formula(),
20 |   output = "edgelist",
21 |   ...
22 | )
23 | }
24 | \arguments{
25 | \item{formula_for_simulation}{formula for simulating a between-block network}
26 | 
27 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}}
28 | 
29 | \item{coef_between_block}{a vector of between-block parameters. The order of the parameters should match that of the formula.}
30 | 
31 | \item{seed_edgelist_between}{a seed edgelist from which a between-block network is simulated.}
32 | 
33 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network.
34 | If the network is very large, you should consider using this option.
35 | Note that when you use this, the first element of \code{coef_between_block} must be the edges parameter.}
36 | 
37 | \item{list_feature_matrices}{a list of feature adjacency matrices. This is used when \code{use_fast_between_simulation}.}
38 | 
39 | \item{seed}{seed value (integer) for the random number generator.}
40 | 
41 | \item{n_sim}{number of networks generated.}
42 | 
43 | \item{prevent_duplicate}{If \code{TRUE}, the coefficient on nodematch("block") is set to be a very large negative number in drawing between-block links,
44 | so that there will be (almost) no within-block links.}
45 | 
46 | \item{verbose}{If this is TRUE/1, the program will print out additionalinformation about the progress of simulation.}
47 | 
48 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation}
49 | 
50 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.}
51 | 
52 | \item{...}{Additional arguments, to be passed to lower-level functions}
53 | }
54 | \description{
55 | Draw between-block connections. There may be some edges that appear both in within- and between-block links.
56 | The overlapped edges will be removed after this step.
57 | }
58 | 


--------------------------------------------------------------------------------
/.github/workflows/check-standard.yml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - develop
 9 |   pull_request:
10 |     branches:
11 |       - main
12 |       - develop
13 | 
14 | name: R-CMD-check
15 | 
16 | jobs:
17 |   R-CMD-check:
18 |     runs-on: ${{ matrix.config.os }}
19 | 
20 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
21 | 
22 |     strategy:
23 |       fail-fast: false
24 |       matrix:
25 |         config:
26 |           #- {os: windows-latest, r: 'release'}
27 |           - {os: macOS-latest, r: 'release'}
28 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
29 | 
30 |     env:
31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 |       RSPM: ${{ matrix.config.rspm }}
33 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v1
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 | 
42 |       - uses: r-lib/actions/setup-pandoc@v1
43 | 
44 |       - name: Query dependencies
45 |         run: |
46 |           install.packages('remotes')
47 |           saveRDS(remotes::dev_package_deps("lighthergm", dependencies = TRUE), ".github/depends.Rds", version = 2)
48 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49 |         shell: Rscript {0}
50 | 
51 |       - name: Restore R package cache
52 |         uses: actions/cache@v2
53 |         with:
54 |           path: ${{ env.R_LIBS_USER }}
55 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
56 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
57 | 
58 |       - name: Install system dependencies
59 |         if: runner.os == 'Linux'
60 |         run: |
61 |           while read -r cmd
62 |           do
63 |             eval sudo $cmd
64 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04", "lighthergm"))')
65 | 
66 |       - name: Install dependencies
67 |         run: |
68 |           remotes::install_deps("lighthergm", dependencies = TRUE)
69 |           remotes::install_cran("rcmdcheck")
70 |         shell: Rscript {0}
71 | 
72 |       #- name: Install the binary version of igraph (for Windows)
73 |       #  if: runner.os == 'Windows'
74 |       #  run: |
75 |       #    install.packages("igraph", type = "binary")
76 |       #  shell: Rscript {0}
77 | 
78 |       - name: Check
79 |         env:
80 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
81 |         run: |
82 |           options(crayon.enabled = TRUE)
83 |           rcmdcheck::rcmdcheck("lighthergm", args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
84 |         shell: Rscript {0}
85 | 
86 |       - name: Upload check results
87 |         if: failure()
88 |         uses: actions/upload-artifact@main
89 |         with:
90 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
91 |           path: check
92 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-cache.R:
--------------------------------------------------------------------------------
  1 | set.seed(334)
  2 | 
  3 | # Simulate a random network for testing
  4 | simulate_network <- function(){
  5 |   N <- 1000
  6 |   K <- 50
  7 |   memb <- rep(1:K, each = N / K)
  8 |   x <- sample(1:10, size = N, replace = TRUE)
  9 |   y <- sample(1:10, size = N, replace = TRUE)
 10 |   list_within_params <- c(-1, 1, 1, 0.5)
 11 |   list_between_params <- c(-3.5, 0.5, 0.5)
 12 | 
 13 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle
 14 | 
 15 |   vertex_id <- 1:N
 16 | 
 17 |   df <- tibble::tibble(
 18 |     id = vertex_id,
 19 |     memb = memb,
 20 |     x = x,
 21 |     y = y
 22 |   )
 23 | 
 24 |   simulate_hergm(
 25 |       formula_for_simulation = formula,
 26 |       data_for_simulation = df,
 27 |       colname_vertex_id = "id",
 28 |       colname_block_membership = "memb",
 29 |       coef_within_block = list_within_params,
 30 |       coef_between_block = list_between_params,
 31 |       ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000),
 32 |       seed = 1,
 33 |       n_sim = 1,
 34 |       directed = FALSE,
 35 |       output = "network"
 36 |     )
 37 | }
 38 | 
 39 | # Function that checks that the number of files in the directory is the expected number
 40 | check_files <- function(dir, expected_number){
 41 |   expect_equal(length(list.files(dir, '*.rds')), expected_number)
 42 | }
 43 | 
 44 | cleanup <- function(){
 45 |   do.call(file.remove, list(list.files(tempdir(), '*.rds', full.names = TRUE)))
 46 | }
 47 | 
 48 | test_that('Estimation with a disk cache stores data in the correct directory', {
 49 |   on.exit(cleanup())
 50 | 
 51 |   # Use this directory for caching in disk
 52 |   dir <- tempdir()
 53 | 
 54 |   # Simulate a network
 55 |   g_1 <- simulate_network()
 56 | 
 57 |   # There should be no cached files in the directory
 58 |   check_files(dir, 0)
 59 | 
 60 |   # Perform estimation
 61 |   lighthergm::hergm(
 62 |     object = g_1 ~ edges + nodematch("x"),
 63 |     n_clusters = 20,
 64 |     n_em_step_max = 3,
 65 |     initialization_method = 1,
 66 |     clustering_with_features = TRUE,
 67 |     verbose=2,
 68 |     cache = cachem::cache_disk(dir)
 69 |   )
 70 | 
 71 |   # The estimation should have stored one RDS object in the cache directory.
 72 |   check_files(dir, 1)
 73 | 
 74 |   lighthergm::hergm(
 75 |     object = g_1 ~ edges + nodematch("x"),
 76 |     n_clusters = 20,
 77 |     n_em_step_max = 3,
 78 |     initialization_method = 1,
 79 |     clustering_with_features = TRUE,
 80 |     verbose=2,
 81 |     cache = cachem::cache_disk(dir)
 82 |   )
 83 | 
 84 |   # Running again the estimation on the same network should reuse the previously stored RDS object
 85 |   # and not store a new one.
 86 |   check_files(dir, 1)
 87 | 
 88 |   # Generate a different network
 89 |   g_2 <- simulate_network()
 90 | 
 91 |   # Perform estimation on the new network.
 92 |   lighthergm::hergm(
 93 |     object = g_2 ~ edges + nodematch("x"),
 94 |     n_clusters = 20,
 95 |     n_em_step_max = 3,
 96 |     initialization_method = 1,
 97 |     clustering_with_features = TRUE,
 98 |     verbose=2,
 99 |     cache = cachem::cache_disk(dir)
100 |   )
101 | 
102 |   # The network has changed, so the previously cached RDS is not reused and a new cache file is generated.
103 |   check_files(dir, 2)
104 | 
105 | })
106 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-start-from-given-cluster.R:
--------------------------------------------------------------------------------
 1 | test_that("starting EM iterations and parameter estimation from a given vector of block memberships works", {
 2 |   set.seed(334)
 3 |   # Simulate a network to work with in this unit test.
 4 |   # Number of nodes
 5 |   N <- 1000
 6 |   # Number of blocks
 7 |   K <- 50
 8 |   # Block memberships (same block size)
 9 |   memb <- rep(1:K, each = N / K)
10 |   # Covariates
11 |   x <- sample(1:10, size = N, replace = TRUE)
12 |   y <- sample(1:10, size = N, replace = TRUE)
13 | 
14 |   # Within-block parameters: edges, nodematch("x"), nodematch("y"), triangle
15 |   list_within_params <- c(-1, 1, 1, 0.5)
16 |   # Between-block parameters: edges, nodematch("x"), nodematch("y")
17 |   list_between_params <- c(-3.5, 0.5, 0.5)
18 | 
19 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle
20 | 
21 |   vertex_id <- 1:N
22 | 
23 |   df <- tibble::tibble(
24 |     id = vertex_id,
25 |     memb = memb,
26 |     x = x,
27 |     y = y
28 |   )
29 | 
30 |   g_sim <-
31 |     simulate_hergm(
32 |       formula_for_simulation = formula,
33 |       data_for_simulation = df,
34 |       colname_vertex_id = "id",
35 |       colname_block_membership = "memb",
36 |       coef_within_block = list_within_params,
37 |       coef_between_block = list_between_params,
38 |       ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000),
39 |       seed = 1,
40 |       n_sim = 1,
41 |       directed = FALSE,
42 |       output = "network"
43 |     )
44 | 
45 |   # Conduct clustering
46 |   cluster_with_feature <-
47 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
48 |       n_clusters = K,
49 |       estimate_parameters = FALSE,
50 |       verbose = 0,
51 |       n_em_step_max = 3,
52 |       initialization_method = 3,
53 |       infomap_python = FALSE,
54 |       clustering_with_features = TRUE,
55 |       check_alpha_update = TRUE,
56 |       compute_pi = TRUE,
57 |       check_lower_bound = TRUE,
58 |       check_block_membership = TRUE,
59 |       seeds = 334
60 |     )
61 | 
62 |   # Check if starting from the previously estimated block memberships works.
63 |   expect_error(result <-
64 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
65 |       initialized_cluster_data = cluster_with_feature$partition,
66 |       n_em_step_max = 2,
67 |       estimate_parameters = FALSE
68 |     ), NA)
69 | 
70 |   # Check if starting from block memberships initialized Python's infomap works.
71 |   expect_error(result2 <-
72 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
73 |       initialized_cluster_data = system.file("extdata", "initialized_cluster_data_by_infomap.clu", package = "lighthergm"),
74 |       n_em_step_max = 1,
75 |       estimate_parameters = FALSE,
76 |       verbose = 1
77 |     ), NA)
78 | 
79 |   # Check if starting paramter estimation from a given vector of block memberships works.
80 |   expect_error(result3 <-
81 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
82 |       block_membership = result$partition,
83 |       verbose = 1
84 |     ), NA)
85 | 
86 |   # Check if not specifying n_clusters when initialized_cluster_data and block_membership are null yields an error.
87 |   expect_error(result4 <-
88 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
89 |       verbose = 1
90 |     ))
91 | })
92 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_quadratic_term.R:
--------------------------------------------------------------------------------
  1 | test_that("quadratic term calculation without features works", {
  2 |   # Number of nodes
  3 |   N <- 12
  4 |   # Number of clusters
  5 |   K <- 3
  6 |   # Create an adjacency matrix
  7 |   edgelist <-
  8 |     tibble::tibble(
  9 |       tail = 1:N,
 10 |       head = 1:N
 11 |     ) %>%
 12 |     tidyr::expand(tail, head) %>%
 13 |     dplyr::filter(tail < head) %>%
 14 |     dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>%
 15 |     dplyr::filter(connect == 1)
 16 | 
 17 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 18 |   adj <- network::as.matrix.network.adjacency(net)
 19 |   adj <- as(adj, "dgCMatrix")
 20 | 
 21 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 22 |   tau <-
 23 |     matrix(c(
 24 |       0.2, 0.5, 0.3,
 25 |       0.4, 0.4, 0.2,
 26 |       0.1, 0.4, 0.5,
 27 |       0.4, 0.4, 0.2,
 28 |       0.1, 0.1, 0.8,
 29 |       0.05, 0.05, 0.9,
 30 |       0.8, 0.1, 0.1,
 31 |       0.3, 0.4, 0.3,
 32 |       0.1, 0.8, 0.1,
 33 |       0.5, 0.4, 0.1,
 34 |       0.3, 0.3, 0.4,
 35 |       0.8, 0.1, 0.1
 36 |     ),
 37 |     nrow = K, ncol = N
 38 |     )
 39 |   tau <- t(tau)
 40 | 
 41 |   # Create a K x K matrix whose (k, l) element represents Pr(D_ij = 1 | Z_i = k, Z_j = l).
 42 |   sumTaus <- compute_sumTaus(N, K, tau)
 43 |   pi <- (t(tau) %*% adj %*% tau) / sumTaus
 44 | 
 45 |   # Compute gamma (parameter of multinomial distribution)
 46 |   alpha <- colSums(tau)
 47 | 
 48 |   # Compute the true quadratic term in a naive way
 49 |   A <- matrix(0, nrow = N, ncol = K)
 50 | 
 51 |   for (i in 1:N) {
 52 |     for (k in 1:K) {
 53 |       for (j in 1:N) {
 54 |         if (i != j) {
 55 |           for (l in 1:K) {
 56 |             pi_ij <- pi
 57 |             # When D_ij = 0, we must use 1 - pi.
 58 |             if (adj[i, j] == 0) {
 59 |               pi_ij <- 1 - pi
 60 |             }
 61 |             a_ij <- tau[j, l] * log(pi_ij[k, l])
 62 |             A[i, k] <- A[i, k] + a_ij
 63 |           }
 64 |         }
 65 |       }
 66 |     }
 67 |   }
 68 | 
 69 |   A <- 1 - A / 2
 70 | 
 71 |   # Divide A by alpha_{ik}
 72 |   A <- A / tau
 73 | 
 74 |   A_cpp <- compute_quadratic_term(N, K, alpha, tau, adj, LB = 0)
 75 | 
 76 |   # Check if computation works as expected
 77 |   expect_equal(A, A_cpp, check.attributes = FALSE, tolerance = 1e-10)
 78 | 
 79 |   # Check if Michael's formula is correct
 80 |   # Compute the first term
 81 |   A_true <- 0
 82 | 
 83 |   for (i in 1:N) {
 84 |     for (j in i:N) {
 85 |       if (i != j) {
 86 |         for (k in 1:K) {
 87 |           for (l in 1:K) {
 88 |             pi_ij <- pi
 89 |             # When D_ij = 0, we must use 1 - pi.
 90 |             if (adj[i, j] == 0) {
 91 |               pi_ij <- 1 - pi
 92 |             }
 93 |             A_true <- A_true + tau[i, k]^2 * tau[j, l] * log(pi_ij[k, l]) / (2 * tau[i, k]) + tau[j, l]^2 * tau[i, k] * log(pi_ij[k, l]) / (2 * tau[j, l])
 94 |           }
 95 |         }
 96 |       }
 97 |     }
 98 |   }
 99 | 
100 |   A <- 0
101 | 
102 |   for (i in 1:N) {
103 |     for (k in 1:K) {
104 |       for (j in 1:N) {
105 |         if (i != j) {
106 |           for (l in 1:K) {
107 |             pi_ij <- pi
108 |             # When D_ij = 0, we must use 1 - pi.
109 |             if (adj[i, j] == 0) {
110 |               pi_ij <- 1 - pi
111 |             }
112 |             A <- A + tau[i, k]^2 * tau[j, l] * log(pi_ij[k, l]) / (2 * tau[i, k])
113 |           }
114 |         }
115 |       }
116 |     }
117 |   }
118 | 
119 |   # Check if they are the same
120 |   expect_equal(A, A_true, check.attributes = FALSE, tolerance = 1e-10)
121 | })
122 | 


--------------------------------------------------------------------------------
/lighthergm/src/helper_function.cpp:
--------------------------------------------------------------------------------
  1 | #include <RcppArmadillo.h>
  2 | // [[Rcpp::depends(RcppArmadillo)]]
  3 | 
  4 | // [[Rcpp::export]]
  5 | arma::vec decimal_to_binary_vector(int decimal,
  6 |                                    int vec_length) {
  7 |   int n = decimal;
  8 |   arma::vec output(vec_length);
  9 | 
 10 |   for (int i = 0; i < vec_length; i++) {
 11 |     output[i] = n % 2;
 12 |     n = n/2;
 13 |   }
 14 |   return output;
 15 | }
 16 | 
 17 | // Summing up a matrix by row
 18 | arma::rowvec sumDoubleMatrixByRow(const arma::mat& matrix) {
 19 |   arma::rowvec vector = arma::sum(matrix, 0);
 20 |   return vector;
 21 | }
 22 | 
 23 | 
 24 | // Normalizing tau
 25 | void normalizeTau(arma::mat& tau,
 26 |                   double minValue) {
 27 |   int numOfVertices = tau.n_rows;
 28 |   int numOfClasses = tau.n_cols;
 29 |   // normalize
 30 |   for (int i = 0; i < numOfVertices; i++) {
 31 |     double denominator = 0;
 32 |     for (int k = 0; k < numOfClasses; k++) {
 33 |       denominator += tau(i, k);
 34 |     }
 35 |     bool again = false;
 36 |     for (int k = 0; k < numOfClasses; k++) {
 37 |       tau(i, k) /= denominator;
 38 |       if (tau(i, k) < minValue) {
 39 |         tau(i, k) = minValue;
 40 |         again = true;
 41 |       }
 42 |     }
 43 |     if (again) {
 44 |       denominator = 0;
 45 |       for (int k = 0; k < numOfClasses; k++) {
 46 |         denominator += tau(i, k);
 47 |       }
 48 |       for (int k = 0; k < numOfClasses; k++)
 49 |         tau(i, k) /= denominator;
 50 |     }
 51 |   }
 52 | }
 53 | 
 54 | 
 55 | // [[Rcpp::export]]
 56 | arma::mat compute_sumTaus(int numOfVertices,
 57 |                           int numOfClasses,
 58 |                           const arma::mat& tau,
 59 |                           int verbose = 0) {
 60 | 
 61 |   if (verbose >= 5) {
 62 |     Rcpp::Rcout  << "find_sumTaus: sum by row";
 63 |   }
 64 |   arma::rowvec tauL = sumDoubleMatrixByRow(tau);
 65 | 
 66 |   if (verbose >= 5) {
 67 |     Rcpp::Rcout  << "find_sumTaus: calculating sumTaus";
 68 |   }
 69 |   arma::mat sumTaus = tau.t() * -(tau.each_row() - tauL); // Check computation speed here
 70 | 
 71 |   if (verbose >= 5) {
 72 |     Rcpp::Rcout  << "find_sumTaus: returning";
 73 |   }
 74 | 
 75 |   return sumTaus;
 76 | }
 77 | 
 78 | // A naive implementation of quadratic coefficient computation
 79 | // [[Rcpp::export]]
 80 | arma::mat compute_quadratic_term_naive(int numOfVertices,
 81 |                                        int numOfClasses,
 82 |                                        const arma::mat& pi,
 83 |                                        const arma::mat& tau,
 84 |                                        const arma::sp_mat& network) {
 85 |   arma::mat pi1 = pi;
 86 |   arma::mat pi0 = 1 - pi;
 87 |   arma::mat logPi0 = arma::log(pi0);
 88 |   arma::mat logPi1 = arma::log(pi1);
 89 | 
 90 |   arma::mat A(numOfVertices, numOfClasses);
 91 |   A.zeros();
 92 |   for (int i = 0; i < numOfVertices; i++) {
 93 |     for (int k = 0; k < numOfClasses; k++) {
 94 |       for (int j = 0; j < numOfVertices; j++) {
 95 |         if (i != j) {
 96 |           for (int l = 0; l < numOfClasses; l++) {
 97 |             if (network(i, j) == 0 ) {
 98 |               A(i, k) += tau(j, l) * logPi0(k, l);
 99 |             } else {
100 |               A(i, k) += tau(j, l) * logPi1(k, l);
101 |             }
102 |           }
103 |         }
104 |       }
105 |     }
106 |   }
107 | 
108 |   // Finalize by subtracting half of from 1 dividing tau_{ik}
109 |   for (int i = 0; i < numOfVertices; i++) {
110 |     for (int k = 0; k < numOfClasses; k++) {
111 |       // In theory, A(i, k) must be negative or 0.
112 |       if (A(i, k) > 0) { // In reality, A(i, k) can be greater than 0 because of numerical precision.
113 |         A(i, k) = 0;     // Therefore, we cut it off to 0 in this case
114 |       }
115 |       A(i, k) = 1 - A(i, k) / 2;
116 |       A(i, k) /= tau(i, k);
117 |     }
118 |   }
119 |   return A;
120 | }
121 | 
122 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_pi_with_features.R:
--------------------------------------------------------------------------------
  1 | test_that("computing pi with features works", {
  2 |   # Number of nodes
  3 |   N <- 12
  4 |   # Number of clusters
  5 |   K <- 3
  6 | 
  7 |   # Create an adjacency matrix
  8 |   edgelist <-
  9 |     tibble::tibble(
 10 |       tail = 1:N,
 11 |       head = 1:N
 12 |     ) %>%
 13 |     tidyr::expand(tail, head) %>%
 14 |     dplyr::filter(tail < head) %>%
 15 |     dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>%
 16 |     dplyr::filter(connect == 1)
 17 | 
 18 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 19 |   adj <- network::as.matrix.network.adjacency(net)
 20 |   adj <- as(adj, "dgCMatrix")
 21 | 
 22 |   # Create feature matrices
 23 |   x <- as.integer(unlist(purrr::rbernoulli(n = N)))
 24 |   S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 25 |   S <- as(S, "dgCMatrix")
 26 |   for (i in 1:N) {
 27 |     for (j in 1:N) {
 28 |       if (i != j) {
 29 |         s_ij <- ifelse(x[i] == x[j], 1, 0)
 30 |         S[i, j] <- s_ij
 31 |       }
 32 |     }
 33 |   }
 34 | 
 35 |   y <- as.integer(unlist(purrr::rbernoulli(n = N)))
 36 |   V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 37 |   V <- as(V, "dgCMatrix")
 38 |   for (i in 1:N) {
 39 |     for (j in 1:N) {
 40 |       if (i != j) {
 41 |         v_ij <- ifelse(y[i] == y[j], 1, 0)
 42 |         V[i, j] <- v_ij
 43 |       }
 44 |     }
 45 |   }
 46 | 
 47 |   z <- as.integer(unlist(purrr::rbernoulli(n = N)))
 48 |   W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 49 |   W <- as(W, "dgCMatrix")
 50 |   for (i in 1:N) {
 51 |     for (j in 1:N) {
 52 |       if (i != j) {
 53 |         w_ij <- ifelse(z[i] == z[j], 1, 0)
 54 |         W[i, j] <- w_ij
 55 |       }
 56 |     }
 57 |   }
 58 | 
 59 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 60 |   tau <-
 61 |     matrix(c(
 62 |       0.2, 0.5, 0.3,
 63 |       0.4, 0.4, 0.2,
 64 |       0.1, 0.4, 0.5,
 65 |       0.4, 0.4, 0.2,
 66 |       0.1, 0.1, 0.8,
 67 |       0.05, 0.05, 0.9,
 68 |       0.8, 0.1, 0.1,
 69 |       0.3, 0.4, 0.3,
 70 |       0.1, 0.8, 0.1,
 71 |       0.5, 0.4, 0.1,
 72 |       0.3, 0.3, 0.4,
 73 |       0.8, 0.1, 0.1
 74 |     ),
 75 |     nrow = K, ncol = N
 76 |     )
 77 |   tau <- t(tau)
 78 | 
 79 |   ###########################################################
 80 |   # Compute the true quadratic term in a naive way
 81 |   ###########################################################
 82 |   # Compute pi for D_ij = 1
 83 |   minPi <- 1e-4
 84 |   list_pi <- list()
 85 |   for (w in 0:1) {
 86 |     for (v in 0:1) {
 87 |       for (s in 0:1) {
 88 |         print(glue::glue("Compute pi for pi_s{s}v{v}w{w}"))
 89 |         denom <- matrix(0, nrow = K, ncol = K)
 90 |         num <- matrix(0, nrow = K, ncol = K)
 91 |         index <- s + 2 * v + 4 * w + 1
 92 |         print(index)
 93 |         for (k in 1:K) {
 94 |           for (l in 1:K) {
 95 |             for (i in 1:N) {
 96 |               for (j in 1:N) {
 97 |                 if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
 98 |                   denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l]
 99 |                 }
100 |                 if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
101 |                   num[k, l] <- num[k, l] + tau[i, k] * tau[j, l]
102 |                 }
103 |               }
104 |             }
105 |           }
106 |         }
107 |         pi <- num / denom
108 |         # Remove extremely small elements in pi
109 |         for (k in 1:K) {
110 |           for (l in 1:K) {
111 |             if (pi[k, l] < minPi) {
112 |               pi[k, l] <- minPi
113 |             }
114 |           }
115 |         }
116 |         list_pi[[index]] <- pi
117 |       }
118 |     }
119 |   }
120 | 
121 |   list_feature_adjmat <- list(S, V, W)
122 |   list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat)
123 |   expect_equal(compute_pi_with_features(N, K, list_multiplied_feature_adjmat, tau), list_pi, tolerance = 1e-10)
124 | })
125 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-estimate-params.R:
--------------------------------------------------------------------------------
  1 | set.seed(334)
  2 | # Prepare data
  3 | edgelist <-
  4 |   tibble::tribble(
  5 |     ~head, ~tail,
  6 |     1, 9,
  7 |     2, 6,
  8 |     2, 7,
  9 |     2, 9,
 10 |     3, 5,
 11 |     3, 9,
 12 |     4, 7,
 13 |     4, 11,
 14 |     4, 15,
 15 |     5, 11,
 16 |     5, 15,
 17 |     7, 8,
 18 |     7, 16,
 19 |     9, 13,
 20 |     9, 14,
 21 |     9, 16,
 22 |     10, 14,
 23 |     11, 15,
 24 |     13, 15,
 25 |     13, 16
 26 |   )
 27 | edgelist <-
 28 |   as.matrix(edgelist)
 29 | attr(edgelist, "n") <- 16
 30 | attr(edgelist, "vnames") <-
 31 |   c(
 32 |     "Acciaiuoli", "Albizzi", "Barbadori", "Bischeri", "Castellani", "Ginori",
 33 |     "Guadagni", "Lamberteschi", "Medici", "Pazzi", "Peruzzi", "Pucci", "Ridolfi",
 34 |     "Salviati", "Strozzi", "Tornabuoni"
 35 |   )
 36 | attr(edgelist, "directed") <- FALSE
 37 | attr(edgelist, "bipartite") <- FALSE
 38 | attr(edgelist, "loops") <- FALSE
 39 | attr(edgelist, "class") <- c("edgelist", "matrix")
 40 | 
 41 | g <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 42 | 
 43 | x1 <- as.integer(unlist(purrr::rbernoulli(n = g$gal$n)))
 44 | network::set.vertex.attribute(x = g, attrname = "x1", value = x1)
 45 | 
 46 | # Cluster
 47 | z_memb <- rep(1:4, each = 4)
 48 | network::set.vertex.attribute(x = g, attrname = "block", value = z_memb)
 49 | 
 50 | # Create dataset for test
 51 | g_link <- intergraph::asDF(g)$edges
 52 | g_attr <- intergraph::asDF(g)$vertexes
 53 | 
 54 | df_g <-
 55 |   tibble::tibble(
 56 |     head = 1:g$gal$n,
 57 |     tail = 1:g$gal$n
 58 |   ) %>%
 59 |   tidyr::expand(tail, head) %>%
 60 |   dplyr::filter(tail < head) %>%
 61 |   dplyr::left_join(., g_attr, by = c("tail" = "intergraph_id")) %>%
 62 |   dplyr::left_join(., g_attr, by = c("head" = "intergraph_id")) %>%
 63 |   dplyr::mutate(
 64 |     nodematch.x1 = ifelse(x1.x == x1.y, 1, 0),
 65 |     same_block = ifelse(block.x == block.y, 1, 0)
 66 |   ) %>%
 67 |   dplyr::select(tail, head, nodematch.x1:same_block) %>%
 68 |   dplyr::left_join(., g_link, by = c("tail" = "V1", "head" = "V2")) %>%
 69 |   dplyr::mutate(connected = ifelse(is.na(na), 0, 1)) %>%
 70 |   dplyr::select(-na)
 71 | 
 72 | # Estimate the model
 73 | formula <- g ~ edges + nodematch("x1") + triangle + kstar(2)
 74 | est_between <- estimate_between_param(
 75 |   formula = formula,
 76 |   network = g,
 77 |   block = z_memb
 78 | )
 79 | 
 80 | test_that("estimating between-block parameters by logit works", {
 81 |   # Check if between-block connections are all zero.
 82 |   g_logit <- est_between$network
 83 |   edgelist <- intergraph::asDF(g_logit)$edges
 84 | 
 85 |   true_edgelist <-
 86 |     df_g %>%
 87 |     dplyr::filter(same_block == 0 & connected == 1) %>%
 88 |     dplyr::select(tail, head) %>%
 89 |     dplyr::arrange(tail, head)
 90 | 
 91 |   # Does it work!!!?
 92 |   expect_equal(edgelist$V1, true_edgelist$tail)
 93 |   expect_equal(edgelist$V2, true_edgelist$head)
 94 | 
 95 |   # Check if estimates for between-block parameters are the same.
 96 |   param_est <- stats::coef(est_between)
 97 |   logit_true <- glm(
 98 |     formula = connected ~ nodematch.x1,
 99 |     data = df_g %>% dplyr::mutate(connected = ifelse(same_block == 1, 0, connected)),
100 |     family = "binomial"
101 |   )
102 | 
103 |   param_est_true <- stats::coef(logit_true)
104 | 
105 |   # Does it work?
106 |   expect_equal(param_est, param_est_true, check.attributes = FALSE, tolerance = 1e-7)
107 | 
108 |   # Check if within-block parameter estiamtion works
109 |   expect_error(estimate_within_params(
110 |     formula = formula,
111 |     network = g,
112 |     z_memb = z_memb,
113 |     parallel = FALSE,
114 |     verbose = 0,
115 |     initial_estimate = NULL,
116 |     seeds = NULL,
117 |     method_second_step = "MPLE"
118 |   ), NA)
119 | })
120 | 
121 | test_that("estimating between-block parameters using a formula without externality terms works", {
122 |   # Check if within-block parameter estiamtion works
123 |   expect_error(
124 |     estimate_between_param(
125 |       formula = g ~ edges + nodematch("x1"),
126 |       network = g,
127 |       block = z_memb
128 |     ),
129 |     NA)
130 | })
131 | 


--------------------------------------------------------------------------------
/lighthergm/R/RcppExports.R:
--------------------------------------------------------------------------------
 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 3 | 
 4 | eigenvectors_sparse <- function(X, n_vec) {
 5 |     .Call('_lighthergm_eigenvectors_sparse', PACKAGE = 'lighthergm', X, n_vec)
 6 | }
 7 | 
 8 | #' Compute Yule's Φ-coefficient
 9 | #' @param z_star a true block membership
10 | #' @param z an estimated block membership
11 | #' @export
12 | compute_yule_coef <- function(z_star, z) {
13 |     .Call('_lighthergm_compute_yule_coef', PACKAGE = 'lighthergm', z_star, z)
14 | }
15 | 
16 | get_sparse_feature_adjmat <- function(x) {
17 |     .Call('_lighthergm_get_sparse_feature_adjmat', PACKAGE = 'lighthergm', x)
18 | }
19 | 
20 | get_sparse_feature_adjmat_from_string <- function(x) {
21 |     .Call('_lighthergm_get_sparse_feature_adjmat_from_string', PACKAGE = 'lighthergm', x)
22 | }
23 | 
24 | get_matrix_for_denominator <- function(numOfVertices, list_feature_adjmat) {
25 |     .Call('_lighthergm_get_matrix_for_denominator', PACKAGE = 'lighthergm', numOfVertices, list_feature_adjmat)
26 | }
27 | 
28 | get_elementwise_multiplied_matrices <- function(adjmat, list_feature_adjmat) {
29 |     .Call('_lighthergm_get_elementwise_multiplied_matrices', PACKAGE = 'lighthergm', adjmat, list_feature_adjmat)
30 | }
31 | 
32 | decimal_to_binary_vector <- function(decimal, vec_length) {
33 |     .Call('_lighthergm_decimal_to_binary_vector', PACKAGE = 'lighthergm', decimal, vec_length)
34 | }
35 | 
36 | compute_sumTaus <- function(numOfVertices, numOfClasses, tau, verbose = 0L) {
37 |     .Call('_lighthergm_compute_sumTaus', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, tau, verbose)
38 | }
39 | 
40 | compute_quadratic_term_naive <- function(numOfVertices, numOfClasses, pi, tau, network) {
41 |     .Call('_lighthergm_compute_quadratic_term_naive', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, pi, tau, network)
42 | }
43 | 
44 | compute_linear_term <- function(numOfVertices, numOfClasses, alpha, tau, LB) {
45 |     .Call('_lighthergm_compute_linear_term', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, LB)
46 | }
47 | 
48 | compute_pi <- function(numOfVertices, numOfClasses, stat, tau) {
49 |     .Call('_lighthergm_compute_pi', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, stat, tau)
50 | }
51 | 
52 | compute_quadratic_term <- function(numOfVertices, numOfClasses, alpha, tau, network, LB, verbose = 0L) {
53 |     .Call('_lighthergm_compute_quadratic_term', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, network, LB, verbose)
54 | }
55 | 
56 | run_EM_without_features <- function(numOfVertices, numOfClasses, alpha, tau, network, verbose = 0L) {
57 |     .Call('_lighthergm_run_EM_without_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, network, verbose)
58 | }
59 | 
60 | compute_denominator_for_pi_d1x0 <- function(numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose) {
61 |     .Call('_lighthergm_compute_denominator_for_pi_d1x0', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose)
62 | }
63 | 
64 | compute_pi_d1x0 <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose) {
65 |     .Call('_lighthergm_compute_pi_d1x0', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose)
66 | }
67 | 
68 | compute_quadratic_term_with_features <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose = 0L) {
69 |     .Call('_lighthergm_compute_quadratic_term_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose)
70 | }
71 | 
72 | compute_pi_with_features <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau) {
73 |     .Call('_lighthergm_compute_pi_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau)
74 | }
75 | 
76 | run_EM_with_features <- function(numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose = 0L) {
77 |     .Call('_lighthergm_run_EM_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose)
78 | }
79 | 
80 | simulate_between_network <- function(numOfVertices, list_feature_adjmat, coef_between, block_membership, directed) {
81 |     .Call('_lighthergm_simulate_between_network', PACKAGE = 'lighthergm', numOfVertices, list_feature_adjmat, coef_between, block_membership, directed)
82 | }
83 | 
84 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_pi_d1x0.R:
--------------------------------------------------------------------------------
  1 | test_that("computing pi_d1x0 works", {
  2 |   rm(list = ls())
  3 |   # Number of nodes
  4 |   N <- 12
  5 |   # Number of clusters
  6 |   K <- 3
  7 | 
  8 |   # Create an adjacency matrix
  9 |   edgelist <-
 10 |     tibble::tibble(
 11 |       tail = 1:N,
 12 |       head = 1:N
 13 |     ) %>%
 14 |     tidyr::expand(tail, head) %>%
 15 |     dplyr::filter(tail < head) %>%
 16 |     dplyr::mutate(connect = rep(0:1, nrow(.) / 2)) %>%
 17 |     dplyr::filter(connect == 1)
 18 | 
 19 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 20 |   adj <- network::as.matrix.network.adjacency(net)
 21 |   adj <- as(adj, "dgCMatrix")
 22 | 
 23 |   # Create feature matrices
 24 |   x <- as.integer(unlist(purrr::rbernoulli(n = N)))
 25 |   S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 26 |   S <- as(S, "dgCMatrix")
 27 |   for (i in 1:N) {
 28 |     for (j in 1:N) {
 29 |       if (i != j) {
 30 |         s_ij <- ifelse(x[i] == x[j], 1, 0)
 31 |         S[i, j] <- s_ij
 32 |       }
 33 |     }
 34 |   }
 35 | 
 36 |   y <- as.integer(unlist(purrr::rbernoulli(n = N)))
 37 |   V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 38 |   V <- as(V, "dgCMatrix")
 39 |   for (i in 1:N) {
 40 |     for (j in 1:N) {
 41 |       if (i != j) {
 42 |         v_ij <- ifelse(y[i] == y[j], 1, 0)
 43 |         V[i, j] <- v_ij
 44 |       }
 45 |     }
 46 |   }
 47 | 
 48 |   z <- as.integer(unlist(purrr::rbernoulli(n = N)))
 49 |   W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 50 |   W <- as(W, "dgCMatrix")
 51 |   for (i in 1:N) {
 52 |     for (j in 1:N) {
 53 |       if (i != j) {
 54 |         w_ij <- ifelse(z[i] == z[j], 1, 0)
 55 |         W[i, j] <- w_ij
 56 |       }
 57 |     }
 58 |   }
 59 | 
 60 | 
 61 | 
 62 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 63 |   tau <-
 64 |     matrix(c(
 65 |       0.2, 0.5, 0.3,
 66 |       0.4, 0.4, 0.2,
 67 |       0.1, 0.4, 0.5,
 68 |       0.4, 0.4, 0.2,
 69 |       0.1, 0.1, 0.8,
 70 |       0.05, 0.05, 0.9,
 71 |       0.8, 0.1, 0.1,
 72 |       0.3, 0.4, 0.3,
 73 |       0.1, 0.8, 0.1,
 74 |       0.5, 0.4, 0.1,
 75 |       0.3, 0.3, 0.4,
 76 |       0.8, 0.1, 0.1
 77 |     ),
 78 |     nrow = K, ncol = N
 79 |     )
 80 |   tau <- t(tau)
 81 | 
 82 |   # Compute the true denominator
 83 |   one <- matrix(1, nrow = N, ncol = N)
 84 |   mat <- (one - S) * (one - V) * (one - W)
 85 |   diag(mat) <- 0
 86 |   denom_for_pi0_true <- t(tau) %*% mat %*% tau
 87 |   denom_for_pi0_true <- as.matrix(denom_for_pi0_true)
 88 | 
 89 |   # Compute the true denominator in a naive way
 90 |   denom_for_pi0_naive <- matrix(0, nrow = K, ncol = K)
 91 |   for (k in 1:K) {
 92 |     for (l in 1:K) {
 93 |       for (i in 1:N) {
 94 |         for (j in 1:N) {
 95 |           if (i != j & S[i, j] == 0 & V[i, j] == 0 & W[i, j] == 0) {
 96 |             denom_for_pi0_naive[k, l] <- denom_for_pi0_naive[k, l] + tau[i, k] * tau[j, l]
 97 |           }
 98 |         }
 99 |       }
100 |     }
101 |   }
102 | 
103 |   # Check if they are the same. This verifies that the formula is correct.
104 |   expect_equal(denom_for_pi0_true, denom_for_pi0_naive, check.attributes = FALSE, tolerance = 1e-10)
105 | 
106 |   # Compute the denominator using the c++ function
107 |   denom <- get_matrix_for_denominator(N, list(S, V, W))
108 |   denom_for_pi0 <- compute_denominator_for_pi_d1x0(N, K, denom, tau, verbose = 0)
109 | 
110 |   # Check if the computed matrix is correct.
111 |   expect_equal(denom_for_pi0, denom_for_pi0_naive, check.attributes = FALSE, tolerance = 1e-10)
112 | 
113 | 
114 |   # Compute true pi1 in a naive way
115 |   pi1 <- matrix(0, nrow = K, ncol = K)
116 |   for (k in 1:K) {
117 |     for (l in 1:K) {
118 |       for (i in 1:N) {
119 |         for (j in 1:N) {
120 |           if (i != j & adj[i, j] == 1 & S[i, j] == 0 & V[i, j] == 0 & W[i, j] == 0) {
121 |             pi1[k, l] <- pi1[k, l] + tau[i, k] * tau[j, l]
122 |           }
123 |         }
124 |       }
125 |     }
126 |   }
127 |   pi1_true <- pi1 / denom_for_pi0_naive
128 | 
129 |   # Remove extremely small values in pi1
130 |   minPi <- 1e-4
131 |   for (k in 1:K) {
132 |     for (l in 1:K) {
133 |       if (pi1_true[k, l] < minPi) {
134 |         pi1_true[k, l] <- minPi
135 |       }
136 |     }
137 |   }
138 | 
139 |   # Compute pi0 using the Rcpp function
140 |   list_multiplied_adjmat <- get_elementwise_multiplied_matrices(adj, list(S, V, W))
141 |   list_multiplied_adjmat[[1]] <- denom
142 |   pi1 <- compute_pi_d1x0(N, K, list_multiplied_adjmat, tau, verbose = 0)
143 | 
144 |   # Check if the computed conditional probability is correct.
145 |   expect_equal(pi1, pi1_true, check.attributes = FALSE, tolerance = 1e-10)
146 | })
147 | 


--------------------------------------------------------------------------------
/lighthergm/src/create_feature_matrix.cpp:
--------------------------------------------------------------------------------
  1 | // Files to look to get MM version: ReciprocityModel for basic functions,
  2 | // BinaryReciprocityModel.cpp for version written already,
  3 | // MMBinaryReciprocityModel.cpp for more eleborate version
  4 | // #define ARMA_64BIT_WORD 1;
  5 | #ifdef _OPENMP
  6 |     #include <omp.h>
  7 | #else
  8 |     #define omp_get_max_threads() 0
  9 | #endif
 10 | #include <RcppArmadillo.h>
 11 | #include "helper.h"
 12 | // [[Rcpp::depends(RcppArmadillo)]]
 13 | // [[Rcpp::plugins(openmp)]]
 14 | 
 15 | 
 16 | // [[Rcpp::export]]
 17 | arma::sp_mat get_sparse_feature_adjmat(const arma::vec& x) {
 18 |   int numOfVertices = x.size();
 19 |   arma::sp_mat S(numOfVertices, numOfVertices);
 20 |   // When x[i] == x[j] and i != j, S[i,j] = 1.
 21 |   #pragma omp parallel for
 22 |   for (int j = 0; j < numOfVertices; j++) {
 23 |     for (int i = 0; i < numOfVertices; i++) {
 24 |       if (i != j) {
 25 |         if (x[i] == x[j]) {
 26 |           S(i, j) = 1;
 27 |         }
 28 |       }
 29 |     }
 30 |   }
 31 |   return S;
 32 | }
 33 | 
 34 | // [[Rcpp::export]]
 35 | arma::sp_mat get_sparse_feature_adjmat_from_string(const Rcpp::StringVector& x) {
 36 |   int numOfVertices = x.size();
 37 |   arma::sp_mat S(numOfVertices, numOfVertices);
 38 |   // When x[i] == x[j] and i != j, S[i,j] = 1.
 39 |   #pragma omp parallel for
 40 |   for (int j = 0; j < numOfVertices; j++) {
 41 |     for (int i = 0; i < numOfVertices; i++) {
 42 |       if (i != j) {
 43 |         if (x[i] == x[j]) {
 44 |           S(i, j) = 1;
 45 |         }
 46 |       }
 47 |     }
 48 |   }
 49 |   return S;
 50 | }
 51 | 
 52 | // Compute something like X := - (S + T + U) + (S % T + T % U + U % S) - S % T % U.
 53 | // [[Rcpp::export]]
 54 | arma::sp_mat get_matrix_for_denominator(int numOfVertices, const Rcpp::List& list_feature_adjmat)
 55 | {
 56 |   int n_feature = list_feature_adjmat.length();
 57 |   int n_item = pow(2, n_feature);
 58 |   arma::sp_mat output(numOfVertices, numOfVertices);
 59 | 
 60 |   // It is difficult to explain this part...
 61 |   for (int s = 1; s < n_item; s++) {
 62 |     arma::vec index = decimal_to_binary_vector(s, n_feature);
 63 |     int k = sum(index);
 64 |     arma::sp_mat X(numOfVertices, numOfVertices);
 65 |     // Set a counter
 66 |     int counter = 0;
 67 |     for (int t = 0; t < n_feature; t++) {
 68 |       if (index[t] == 1) {
 69 |         arma::sp_mat S = list_feature_adjmat[t];
 70 |         counter += 1;
 71 |         if (counter == 1) {
 72 |           X = S;
 73 |         } else {
 74 |           X = X % S;
 75 |         }
 76 |       }
 77 |     }
 78 |     // X = arma::trimatu(X);
 79 |     output += pow(-1, k) * X;
 80 |   }
 81 |   return output;
 82 | }
 83 | 
 84 | 
 85 | // [[Rcpp::export]]
 86 | Rcpp::List get_elementwise_multiplied_matrices(const arma::sp_mat& adjmat,
 87 |                                                const Rcpp::List& list_feature_adjmat) {
 88 |   // Number of nodes
 89 |   int n_node = adjmat.n_rows;
 90 |   // Number of feature matrices
 91 |   int n_feature = list_feature_adjmat.length();
 92 | 
 93 |   // Append all the matrices in a single list
 94 |   Rcpp::List list_mat(n_feature+1);
 95 |   list_mat[0] = adjmat;
 96 |   for (int i = 0; i < n_feature; i++) {
 97 |     list_mat[i+1] = list_feature_adjmat[i];
 98 |   }
 99 | 
100 |   // Create a list to store multiplied matrices
101 |   int n_matrix = list_mat.length();
102 |   int length_output = pow(2, n_matrix);
103 |   Rcpp::List output(length_output);
104 | 
105 |   // The first element of the output list should contain the matrix for the denominator of pi_d0x0.
106 |   arma::sp_mat denom = get_matrix_for_denominator(n_node, list_feature_adjmat);
107 |   output[0] = denom;
108 | 
109 |   // Element-wise matrix multiplication without breaking sparsity
110 |   for (int s = 1; s < length_output; s++) {
111 |     // Convert an integer to a binary numeric vector
112 |     arma::vec index = decimal_to_binary_vector(s, n_matrix);
113 |     // Initialize a sparse matrix
114 |     arma::sp_mat X(n_node, n_node);
115 |     // Set a counter
116 |     int counter = 0;
117 | 
118 |     // Start element-wise matrix multiplication
119 |     for (int t = 0; t < n_matrix; t++) {
120 |       // Prepare a matrix to be multiplied
121 |       arma::sp_mat S = list_mat[t];
122 |       // First, multiply matrices that don't need subtraction like (one - X).
123 |       if (index[t] == 1) {
124 |         counter += 1;
125 |         if (counter == 1) {
126 |           X = S;
127 |         } else {
128 |           X = X % S;
129 |         }
130 |       }
131 |     }
132 | 
133 |     // Then multiply the rest of the matrices
134 |     for (int t = 0; t < n_matrix; t++) {
135 |       // Prepare a matrix to be multiplied
136 |       arma::sp_mat S = list_mat[t];
137 |       if (index[t] == 0) {
138 |         X = X - S % X;
139 |       }
140 |     }
141 |     // Lastly, store the multiplied matrix in the output list
142 |     output[s] = X;
143 |   }
144 |   // Return the output
145 |   return output;
146 | }
147 | 
148 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_quadratic_term_with_features.R:
--------------------------------------------------------------------------------
  1 | test_that("computing a quadratic term with multiple features works", {
  2 |   set.seed(334)
  3 |   # Number of nodes
  4 |   N <- 12
  5 |   # Number of clusters
  6 |   K <- 3
  7 | 
  8 |   # Create an adjacency matrix
  9 |   edgelist <-
 10 |     tibble::tibble(
 11 |       tail = 1:N,
 12 |       head = 1:N
 13 |     ) %>%
 14 |     tidyr::expand(tail, head) %>%
 15 |     dplyr::filter(tail < head) %>%
 16 |     dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>%
 17 |     dplyr::filter(connect == 1)
 18 | 
 19 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 20 |   adj <- network::as.matrix.network.adjacency(net)
 21 |   adj <- as(adj, "dgCMatrix")
 22 | 
 23 |   # Create feature matrices
 24 |   x <- as.integer(unlist(purrr::rbernoulli(n = N)))
 25 |   S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 26 |   S <- as(S, "dgCMatrix")
 27 |   for (i in 1:N) {
 28 |     for (j in 1:N) {
 29 |       if (i != j) {
 30 |         s_ij <- ifelse(x[i] == x[j], 1, 0)
 31 |         S[i, j] <- s_ij
 32 |       }
 33 |     }
 34 |   }
 35 | 
 36 |   y <- as.integer(unlist(purrr::rbernoulli(n = N)))
 37 |   V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 38 |   V <- as(V, "dgCMatrix")
 39 |   for (i in 1:N) {
 40 |     for (j in 1:N) {
 41 |       if (i != j) {
 42 |         v_ij <- ifelse(y[i] == y[j], 1, 0)
 43 |         V[i, j] <- v_ij
 44 |       }
 45 |     }
 46 |   }
 47 | 
 48 |   z <- as.integer(unlist(purrr::rbernoulli(n = N)))
 49 |   W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 50 |   W <- as(W, "dgCMatrix")
 51 |   for (i in 1:N) {
 52 |     for (j in 1:N) {
 53 |       if (i != j) {
 54 |         w_ij <- ifelse(z[i] == z[j], 1, 0)
 55 |         W[i, j] <- w_ij
 56 |       }
 57 |     }
 58 |   }
 59 | 
 60 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 61 |   tau <-
 62 |     matrix(c(
 63 |       0.2, 0.5, 0.3,
 64 |       0.4, 0.4, 0.2,
 65 |       0.1, 0.4, 0.5,
 66 |       0.4, 0.4, 0.2,
 67 |       0.1, 0.1, 0.8,
 68 |       0.05, 0.05, 0.9,
 69 |       0.8, 0.1, 0.1,
 70 |       0.3, 0.4, 0.3,
 71 |       0.1, 0.8, 0.1,
 72 |       0.5, 0.4, 0.1,
 73 |       0.3, 0.3, 0.4,
 74 |       0.8, 0.1, 0.1
 75 |     ),
 76 |     nrow = K, ncol = N
 77 |     )
 78 |   tau <- t(tau)
 79 | 
 80 |   ###########################################################
 81 |   # Compute the true quadratic term in a naive way
 82 |   ###########################################################
 83 |   # Compute pi for D_ij = 1
 84 |   minPi <- 1e-4
 85 |   list_pi <- list()
 86 |   for (w in 0:1) {
 87 |     for (v in 0:1) {
 88 |       for (s in 0:1) {
 89 |         print(glue::glue("Compute pi for pi_s{s}v{v}w{w}"))
 90 |         denom <- matrix(0, nrow = K, ncol = K)
 91 |         num <- matrix(0, nrow = K, ncol = K)
 92 |         index <- s + 2 * v + 4 * w + 1
 93 |         print(index)
 94 |         for (k in 1:K) {
 95 |           for (l in 1:K) {
 96 |             for (i in 1:N) {
 97 |               for (j in 1:N) {
 98 |                 if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
 99 |                   denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l]
100 |                 }
101 |                 if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
102 |                   num[k, l] <- num[k, l] + tau[i, k] * tau[j, l]
103 |                 }
104 |               }
105 |             }
106 |           }
107 |         }
108 |         pi <- num / denom
109 |         # Remove extremely small elements in pi
110 |         for (k in 1:K) {
111 |           for (l in 1:K) {
112 |             if (pi[k, l] < minPi) {
113 |               pi[k, l] <- minPi
114 |             }
115 |           }
116 |         }
117 |         list_pi[[index]] <- pi
118 |       }
119 |     }
120 |   }
121 |   # Compute the quadratic term
122 |   A_true <- matrix(0, nrow = N, ncol = K)
123 |   for (i in 1:N) {
124 |     for (j in 1:N) {
125 |       if (i != j) {
126 |         # For each ij, determine which pi must be used.
127 |         index_ij <- S[i, j] + 2 * V[i, j] + 4 * W[i, j] + 1
128 |         pi_ij <- list_pi[[index_ij]]
129 |         # if D_ij = 0, replace pi with 1 - pi.
130 |         if (adj[i, j] == 0) {
131 |           pi_ij <- 1 - pi_ij
132 |         }
133 |         for (k in 1:K) {
134 |           for (l in 1:K) {
135 |             a_ij <- tau[j, l] * log(pi_ij[k, l])
136 |             A_true[i, k] <- A_true[i, k] + a_ij
137 |           }
138 |         }
139 |       }
140 |     }
141 |   }
142 | 
143 |   A_true <- 1 - A_true / 2
144 | 
145 |   # Divide A by alpha_{ik}
146 |   A_true <- A_true / tau
147 | 
148 |   ###########################################################
149 |   # Compute the quadratic term using the cpp function
150 |   ###########################################################
151 |   list_feature_adjmat <- list(S, V, W)
152 |   list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat)
153 |   A <- compute_quadratic_term_with_features(N, K, list_multiplied_feature_adjmat, tau, 0, 0)
154 | 
155 |   expect_equal(A, A_true, check.attributes = FALSE, tolerance = 1e-10)
156 | })
157 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-within-network-stats.R:
--------------------------------------------------------------------------------
  1 | test_that("generating multiple within-block networks works", {
  2 |   set.seed(1)
  3 |   # Prepare ingredients for simulating a network
  4 |   N <- 1000
  5 |   K <- 10
  6 | 
  7 |   list_within_params <- c(-3, 1, 1, 0.76, 0.08)
  8 |   list_between_params <- c(-5, 2, 2)
  9 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2)
 10 | 
 11 |   memb <- sample(1:K, size = N, replace = TRUE)
 12 |   vertex_id <- as.character(11:(11 + N - 1))
 13 | 
 14 |   x <- sample(1:20, size = N, replace = TRUE)
 15 |   y <- sample(LETTERS, size = N, replace = TRUE)
 16 | 
 17 | 
 18 |   df <- tibble::tibble(
 19 |     id = vertex_id,
 20 |     memb = memb,
 21 |     x = x,
 22 |     y = y
 23 |   )
 24 | 
 25 |   # Obtain the stats
 26 |   within_sim_stats <- lighthergm::simulate_hergm_within(
 27 |     formula_for_simulation = formula,
 28 |     data_for_simulation = df,
 29 |     colname_vertex_id = 'id',
 30 |     colname_block_membership = 'memb',
 31 |     coef_within_block = list_within_params,
 32 |     ergm_control = ergm::control.simulate.formula(),
 33 |     seed = 1,
 34 |     n_sim = 3
 35 |   )
 36 | 
 37 |   expected_terms <- statnet.common::list_rhs.formula(formula)
 38 | 
 39 |   expect_equal(nrow(within_sim_stats), 3)
 40 |   expect_equal(length(expected_terms), length(names(within_sim_stats)))
 41 | 
 42 | })
 43 | 
 44 | test_that("simulating a network from a given edgelist works", {
 45 |   set.seed(1)
 46 |   # Prepare ingredients for simulating a network
 47 |   N <- 1000
 48 |   K <- 10
 49 | 
 50 |   list_within_params <- c(-3, 1, 1, 0.76, 0.08)
 51 |   list_between_params <- c(-5, 2, 2)
 52 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2)
 53 | 
 54 |   memb <- sample(1:K, size = N, replace = TRUE)
 55 |   vertex_id <- as.character(11:(11 + N - 1))
 56 | 
 57 |   x <- sample(1:20, size = N, replace = TRUE)
 58 |   y <- sample(LETTERS, size = N, replace = TRUE)
 59 | 
 60 |   df <- tibble::tibble(
 61 |     id = vertex_id,
 62 |     memb = memb,
 63 |     x = x,
 64 |     y = y
 65 |   )
 66 | 
 67 |   # Simulate a network
 68 |   g_sim <-
 69 |     simulate_hergm_within(
 70 |       formula_for_simulation = formula,
 71 |       data_for_simulation = df,
 72 |       colname_vertex_id = "id",
 73 |       colname_block_membership = "memb",
 74 |       coef_within_block = list_within_params,
 75 |       coef_between_block = list_between_params,
 76 |       ergm_control = ergm::control.simulate.formula(
 77 |         MCMC.burnin = 10000,
 78 |         MCMC.interval = 100
 79 |       ),
 80 |       seed_for_within = 1,
 81 |       seed_for_between = 1,
 82 |       n_sim = 1,
 83 |       directed = FALSE,
 84 |       output = 'network'
 85 |     )
 86 | 
 87 |   g_sim <- network::as.edgelist(g_sim)
 88 | 
 89 |   # Simulate a within-block network from a given edgelist
 90 |   g2 <-
 91 |     simulate_hergm_within(
 92 |       formula_for_simulation = formula,
 93 |       data_for_simulation = df,
 94 |       colname_vertex_id = "id",
 95 |       colname_block_membership = "memb",
 96 |       coef_within_block = list_within_params,
 97 |       coef_between_block = list_between_params,
 98 |       # These settings should result on the exact same network being returned
 99 |       # after one simulation. Check that.
100 |       ergm_control = ergm::control.simulate.formula(
101 |         MCMC.burnin = 0,
102 |         MCMC.interval = 1
103 |       ),
104 |       seed_for_within = 1,
105 |       seed_for_between = 1,
106 |       seed_edgelist = g_sim,
107 |       n_sim = 1,
108 |       directed = FALSE,
109 |       output = 'network'
110 |     )
111 | 
112 |   expect_match(class(g2), "network")
113 | 
114 |   g2 <- network::as.edgelist(g2)
115 | 
116 |   # Check if the network is correctly generated
117 |   expect_equal(nrow(g_sim), nrow(g2))
118 |   expect_true(all(g_sim == g2))
119 | })
120 | 
121 | test_that("The within-simulation begins from an empty network by default", {
122 |   set.seed(1)
123 |   # Prepare ingredients for simulating a network
124 |   N <- 1000
125 |   K <- 10
126 | 
127 |   list_within_params <- c(-3, 1, 1, 0.76, 0.08)
128 |   list_between_params <- c(-5, 2, 2)
129 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2)
130 | 
131 |   memb <- sample(1:K, size = N, replace = TRUE)
132 |   vertex_id <- as.character(11:(11 + N - 1))
133 | 
134 |   x <- sample(1:20, size = N, replace = TRUE)
135 |   y <- sample(LETTERS, size = N, replace = TRUE)
136 | 
137 |   df <- tibble::tibble(
138 |     id = vertex_id,
139 |     memb = memb,
140 |     x = x,
141 |     y = y
142 |   )
143 | 
144 |   # Simulate a network
145 |   g_sim <-
146 |     simulate_hergm_within(
147 |       formula_for_simulation = formula,
148 |       data_for_simulation = df,
149 |       colname_vertex_id = "id",
150 |       colname_block_membership = "memb",
151 |       coef_within_block = list_within_params,
152 |       coef_between_block = list_between_params,
153 |       ergm_control = ergm::control.simulate.formula(
154 |         MCMC.burnin = 0,
155 |         MCMC.interval = 1
156 |       ),
157 |       seed_for_within = 1,
158 |       seed_for_between = 1,
159 |       n_sim = 1,
160 |       directed = FALSE,
161 |       output = 'network'
162 |     )
163 | 
164 |   expect_match(class(g_sim), "network")
165 |   g_sim <- network::as.edgelist(g_sim)
166 | 
167 |   # Check if the network is correctly generated
168 |   expect_equal(nrow(g_sim), 0)
169 | })
170 | 


--------------------------------------------------------------------------------
/lighthergm/man/simulate_hergm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/simulate_hergm.R
  3 | \name{simulate_hergm}
  4 | \alias{simulate_hergm}
  5 | \title{Simulate a network}
  6 | \usage{
  7 | simulate_hergm(
  8 |   formula_for_simulation,
  9 |   data_for_simulation,
 10 |   colname_vertex_id,
 11 |   colname_block_membership,
 12 |   seed_edgelist = NULL,
 13 |   coef_within_block,
 14 |   coef_between_block,
 15 |   ergm_control = ergm::control.simulate.formula(),
 16 |   seed = NULL,
 17 |   directed = FALSE,
 18 |   n_sim = 1,
 19 |   output = "network",
 20 |   prevent_duplicate = TRUE,
 21 |   use_fast_between_simulation = FALSE,
 22 |   list_feature_matrices = NULL,
 23 |   verbose = 0,
 24 |   ...
 25 | )
 26 | }
 27 | \arguments{
 28 | \item{formula_for_simulation}{formula for simulating a network}
 29 | 
 30 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.}
 31 | 
 32 | \item{colname_vertex_id}{a column name in the data frame for the vertex id}
 33 | 
 34 | \item{colname_block_membership}{a column name in the data frame for the block membership}
 35 | 
 36 | \item{seed_edgelist}{an edgelist used for creating a seed network. It should have the "edgelist" class}
 37 | 
 38 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.}
 39 | 
 40 | \item{coef_between_block}{a vector of between-block parameters. The order of the parameters should match that of the formula without externality terms.}
 41 | 
 42 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation}
 43 | 
 44 | \item{seed}{seed value (integer) for network simulation.}
 45 | 
 46 | \item{directed}{whether the simulated network is directed}
 47 | 
 48 | \item{n_sim}{number of networks generated}
 49 | 
 50 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.}
 51 | 
 52 | \item{prevent_duplicate}{If \code{TRUE}, the coefficient on nodematch("block") is set to be a very large negative number in drawing between-block links, so that there will be (almost) no within-block links.}
 53 | 
 54 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network. If the network is very large, you should consider using this option.
 55 | Note that when you use this, the first element of \code{coef_between_block} must be the edges parameter.}
 56 | 
 57 | \item{list_feature_matrices}{a list of feature adjacency matrices. If \code{use_fast_between_simulation}, this must be given.}
 58 | 
 59 | \item{verbose}{If this is TRUE/1, the program will print out additional information about the progress of simulation.}
 60 | 
 61 | \item{...}{Additional arguments, to be passed to lower-level functions}
 62 | }
 63 | \description{
 64 | Simulate a network
 65 | }
 66 | \examples{
 67 | # Load an embedded network object.
 68 | data(toyNet)
 69 | 
 70 | # Specify the model that you would like to estimate.
 71 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle
 72 | 
 73 | # Estimate the model
 74 | hergm_res <-
 75 |   lighthergm::hergm(
 76 |     object = model_formula, # The model you would like to estiamte
 77 |     n_clusters = 4, # The number of blocks
 78 |     n_em_step_max = 100, # The maximum number of EM algorithm steps
 79 |     estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step
 80 |     clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics
 81 |     check_block_membership = TRUE # Keep track of block memberships at each EM iteration
 82 |   )
 83 | 
 84 | # Prepare a data frame that contains nodal id and covariates.
 85 | nodes_data <-
 86 |   data.frame(
 87 |     node_id = network::network.vertex.names(toyNet),
 88 |     block = hergm_res$partition,
 89 |     x = network::get.vertex.attribute(toyNet, "x"),
 90 |     y = network::get.vertex.attribute(toyNet, "y")
 91 |   )
 92 | # The feature adjacency matrices
 93 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula)
 94 | 
 95 | # Estimated coefficients for the between-community connections
 96 | coef_between_block <- coef(hergm_res$est_between)
 97 | 
 98 | # Estimated coefficients for the within-community connections
 99 | coef_within_block <- coef(hergm_res$est_within)
100 | 
101 | # The MCMC settings
102 | sim_ergm_control <- ergm::control.simulate.formula(
103 |   MCMC.burnin = 1000000,
104 |   MCMC.interval = 100000
105 | )
106 | 
107 | # Simulate network stats
108 | sim_stats <- lighthergm::simulate_hergm(
109 |   formula_for_simulation = model_formula, # Formula for between-blocks
110 |   data_for_simulation = nodes_data, # Same as for gof, a dataframe containing nodes attributes
111 |   colname_vertex_id = "node_id", # Name of the column containing node IDs
112 |   colname_block_membership = "block", # Name of the column containing block IDs
113 |   coef_between_block = coef_between_block, # The coefficients for the between connections
114 |   coef_within_block = coef_within_block, # The coefficients for the within connections
115 |   ergm_control = sim_ergm_control,
116 |   n_sim = 100, # Number of simulations to return
117 |   output = "stats", # If `stats` a list with network statistics for the between and within connections is returned
118 |   use_fast_between_simulation = TRUE, # Simulates between connections by drawing from a logistic distribution. If FALSE, draws between connections by MCMC.
119 |   list_feature_matrices = list_feature_matrices
120 | )
121 | }
122 | 


--------------------------------------------------------------------------------
/lighthergm/man/hergm.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/hergm.R
  3 | \name{hergm}
  4 | \alias{hergm}
  5 | \title{Hierarchical exponential-family random graph models (HERGMs) with local dependence}
  6 | \usage{
  7 | hergm(
  8 |   object,
  9 |   n_clusters,
 10 |   n_cores = 1,
 11 |   block_membership = NULL,
 12 |   estimate_parameters = TRUE,
 13 |   verbose = 0,
 14 |   n_em_step_max = 100,
 15 |   initialization_method = 1,
 16 |   use_infomap_python = FALSE,
 17 |   seed_infomap = NULL,
 18 |   weight_for_initialization = 1000,
 19 |   seeds = NULL,
 20 |   initialized_cluster_data = NULL,
 21 |   method_second_step = "MPLE",
 22 |   clustering_with_features = TRUE,
 23 |   list_multiplied_feature_matrices = NULL,
 24 |   fix_covariate_parameter = FALSE,
 25 |   compute_pi = FALSE,
 26 |   check_alpha_update = FALSE,
 27 |   check_block_membership = FALSE,
 28 |   cache = NULL,
 29 |   ...
 30 | )
 31 | }
 32 | \arguments{
 33 | \item{object}{A formula or \code{lighthergm} class object. A \code{lighthergm} is returned by \code{hergm()}.
 34 | When you pass a \code{lighthergm} class object to \code{hergm()}, you can restart the EM step.}
 35 | 
 36 | \item{n_clusters}{The number of blocks. This must be specified by the user.
 37 | When you pass a "lighthergm" class object to \code{hergm()}, you don't have to specify this argument.}
 38 | 
 39 | \item{n_cores}{The number of CPU cores to use.}
 40 | 
 41 | \item{block_membership}{The pre-specified block memberships for each node.
 42 | If \code{NULL}, the latent community structure is estimated, assuming that the number of communities is \code{n_clusters}.}
 43 | 
 44 | \item{estimate_parameters}{If \code{TRUE}, both clustering and parameter estimation are implemented.
 45 | If \code{FALSE}, only clustering is executed.}
 46 | 
 47 | \item{verbose}{A logical or an integer: if this is TRUE/1,
 48 | the program will print out additional information about the progress of estimation and simulation.
 49 | A higher value yields lower level information.}
 50 | 
 51 | \item{n_em_step_max}{The maximum number of EM iterations.
 52 | Currently, no early stopping criteria is introduced. Thus \code{n_em_step_max} EM iterations are exactly implemented.}
 53 | 
 54 | \item{initialization_method}{Cluster initialization method.
 55 | If \code{1} (the default), \code{igraph}'s infomap is implemented.
 56 | If \code{2}, the initial clusters are randomly uniformally selected.
 57 | If \code{3}, spectral clustering is conducted.}
 58 | 
 59 | \item{use_infomap_python}{If \code{TRUE}, the cluster initialization is implemented using Pythons' infomap.
 60 | When using this, make sure that Python's infomap is installed and callable from `system().}
 61 | 
 62 | \item{seed_infomap}{seed value (integer) for Python's infomap.}
 63 | 
 64 | \item{weight_for_initialization}{weight value used for cluster initialization. The higher this value, the more weight is put on the initialized alpha.}
 65 | 
 66 | \item{seeds}{seed value (integer) for the random number generator}
 67 | 
 68 | \item{initialized_cluster_data}{initialized cluster data from which the EM iterations begin.
 69 | This can be either a vector of block affiliations of each node or initialized cluster data by Python's infomap (given by .clu format).}
 70 | 
 71 | \item{method_second_step}{If "MPLE" (the default), then the maximum pseudolikelihood estimator is implemented when estimating the within-block network model.
 72 | If "MLE", then an approximate maximum likelihood estimator is conducted.}
 73 | 
 74 | \item{clustering_with_features}{If \code{TRUE}, clustering is implemented using the discrete covariates specified in the formula.}
 75 | 
 76 | \item{list_multiplied_feature_matrices}{a list of multiplied feature adjacency matarices necessary for EM step.
 77 | If \code{NULL}, \code{hergm()} automatically calculates. Or you can calculate by \code{compute_multiplied_feature_matrices()}.}
 78 | 
 79 | \item{fix_covariate_parameter}{If \code{TRUE}, when estimating the within-block network model,
 80 | parameters for covariates are fixed at the estimated of the between-block network model.}
 81 | 
 82 | \item{compute_pi}{If \code{TRUE}, this function keeps track of pi matrices at each EM iteration.
 83 | If the network is large, we strongly recommend to set to be \code{FALSE}.}
 84 | 
 85 | \item{check_alpha_update}{If \code{TRUE}, this function keeps track of alpha matrices at each EM iteration.
 86 | If the network is large, we strongly recommend to set to be \code{FALSE}.}
 87 | 
 88 | \item{check_block_membership}{If TRUE, this function keeps track of estimated block memberships at each EM iteration.}
 89 | 
 90 | \item{cache}{a \code{cachem} cache object used to store intermediate calculations such as eigenvector decomposition results.}
 91 | 
 92 | \item{...}{Additional arguments, to be passed to lower-level functions}
 93 | }
 94 | \description{
 95 | The function hergm estimates and simulates three classes of hierarchical exponential-family random graph models.
 96 | }
 97 | \examples{
 98 | # Load an embedded network object.
 99 | data(toyNet)
100 | 
101 | # Specify the model that you would like to estimate.
102 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle
103 | 
104 | # Estimate the model
105 | hergm_res <-
106 |   lighthergm::hergm(
107 |     object = model_formula, # The model you would like to estiamte
108 |     n_clusters = 4, # The number of blocks
109 |     n_em_step_max = 100, # The maximum number of EM algorithm steps
110 |     estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step
111 |     clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics
112 |     check_block_membership = TRUE # Keep track of block memberships at each EM iteration
113 |   )
114 | }
115 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-estimate_within_param.R:
--------------------------------------------------------------------------------
  1 | test_that("estimating within-block parameters works", {
  2 |   #################################################################
  3 |   ### Network preparation #########################################
  4 |   #################################################################
  5 |   # Create an adjacency matrix
  6 |   adj <- c(
  7 |     c(0, 1, 0, 1, 1, 0),
  8 |     c(1, 0, 1, 0, 0, 1),
  9 |     c(0, 1, 0, 1, 1, 0),
 10 |     c(1, 0, 1, 0, 1, 1),
 11 |     c(1, 0, 1, 1, 0, 1),
 12 |     c(0, 1, 0, 1, 1, 0)
 13 |   )
 14 |   adj <- matrix(data = adj, nrow = 6, ncol = 6)
 15 |   rownames(adj) <- as.character(1001:1006)
 16 |   colnames(adj) <- as.character(1001:1006)
 17 | 
 18 |   # Vertex attribute
 19 |   x <- c(1, 0, 0, 1, 1, 0)
 20 | 
 21 |   # Block
 22 |   block <- c(1, 2, 3, 1, 3, 2)
 23 | 
 24 |   g <- network::network(adj, matrix.type = "adjacency")
 25 |   network::set.vertex.attribute(g, attrname = "x", value = x)
 26 | 
 27 |   #################################################################
 28 |   ### Estimate the within-block parameters ########################
 29 |   #################################################################
 30 |   suppressWarnings(est <- estimate_within_params(
 31 |     formula = g ~ edges + nodematch("x"),
 32 |     network = g,
 33 |     z_memb = block,
 34 |     parallel = FALSE,
 35 |     verbose = 0,
 36 |     initial_estimate = NULL,
 37 |     seeds = NULL,
 38 |     method_second_step = "MPLE"
 39 |   ))
 40 | 
 41 | 
 42 |   # Get the network used for estimation
 43 |   g_est <- est$network
 44 | 
 45 |   # Get the adjacency matrix for the network
 46 |   adj_est <- network::as.matrix.network.adjacency(g_est)
 47 | 
 48 |   #################################################################
 49 |   ### Test if the created and true networks are the same  #########
 50 |   #################################################################
 51 | 
 52 |   # Prepare the answer
 53 |   adj_ans <- c(
 54 |     c(0, 1, 0, 0, 0, 0),
 55 |     c(1, 0, 0, 0, 0, 0),
 56 |     c(0, 0, 0, 1, 0, 0),
 57 |     c(0, 0, 1, 0, 0, 0),
 58 |     c(0, 0, 0, 0, 0, 1),
 59 |     c(0, 0, 0, 0, 1, 0)
 60 |   )
 61 | 
 62 |   # Order of vertex id after diagonization: 1001, 1004, 1002, 1006, 1003, 1005
 63 |   # Order of original x: c(1, 0, 0, 1, 1, 0)
 64 |   vertex_id_ans <- as.character(c(1001, 1004, 1002, 1006, 1003, 1005))
 65 |   x_ans <- c(1, 1, 0, 0, 0, 1)
 66 | 
 67 |   # Test
 68 |   expect_equal(adj_est, adj_ans, check.attributes = FALSE)
 69 |   expect_equal(network::get.vertex.attribute(g_est, "x"), x_ans)
 70 |   expect_equal(network::network.vertex.names(g_est), vertex_id_ans)
 71 | })
 72 | 
 73 | test_that("estimating within-block parameters works with non-consecutive block names", {
 74 |   adj <- c(
 75 |     c(0, 1, 0, 1, 1, 0),
 76 |     c(1, 0, 1, 0, 0, 1),
 77 |     c(0, 1, 0, 1, 1, 0),
 78 |     c(1, 0, 1, 0, 1, 1),
 79 |     c(1, 0, 1, 1, 0, 1),
 80 |     c(0, 1, 0, 1, 1, 0)
 81 |   )
 82 |   adj <- matrix(data = adj, nrow = 6, ncol = 6)
 83 |   rownames(adj) <- as.character(1001:1006)
 84 |   colnames(adj) <- as.character(1001:1006)
 85 | 
 86 |   x <- c(1, 0, 0, 1, 1, 0)
 87 | 
 88 |   # Use non-consecutive block names
 89 |   block <- c(50, 70, 95, 50, 95, 70)
 90 | 
 91 |   g <- network::network(adj, matrix.type = "adjacency")
 92 |   network::set.vertex.attribute(g, attrname = "x", value = x)
 93 | 
 94 |   suppressWarnings(est <- estimate_within_params(
 95 |     formula = g ~ edges + nodematch("x"),
 96 |     network = g,
 97 |     z_memb = block,
 98 |     parallel = FALSE,
 99 |     verbose = 0,
100 |     initial_estimate = NULL,
101 |     seeds = NULL,
102 |     method_second_step = "MPLE"
103 |   ))
104 | 
105 | 
106 |   # Get the network used for estimation
107 |   g_est <- est$network
108 | 
109 |   # Get the adjacency matrix for the network
110 |   adj_est <- network::as.matrix.network.adjacency(g_est)
111 | 
112 |   adj_ans <- c(
113 |     c(0, 1, 0, 0, 0, 0),
114 |     c(1, 0, 0, 0, 0, 0),
115 |     c(0, 0, 0, 1, 0, 0),
116 |     c(0, 0, 1, 0, 0, 0),
117 |     c(0, 0, 0, 0, 0, 1),
118 |     c(0, 0, 0, 0, 1, 0)
119 |   )
120 | 
121 |   vertex_id_ans <- as.character(c(1001, 1004, 1002, 1006, 1003, 1005))
122 |   x_ans <- c(1, 1, 0, 0, 0, 1)
123 | 
124 |   # Check that the network is the same
125 |   expect_equal(adj_est, adj_ans, check.attributes = FALSE)
126 |   expect_equal(network::get.vertex.attribute(g_est, "x"), x_ans)
127 |   expect_equal(network::network.vertex.names(g_est), vertex_id_ans)
128 | 
129 |   # Check that the blocks are assigned to the right nodes
130 |   g_nodes_data <- data.frame(
131 |     id = network::network.vertex.names(g),
132 |     block = block
133 |   ) %>% dplyr::arrange(id)
134 | 
135 |   est_g_nodes_data <- data.frame(
136 |     id = network::network.vertex.names(g_est),
137 |     block = as.double(network::get.vertex.attribute(g_est, 'block'))
138 |   ) %>% dplyr::arrange(id)
139 | 
140 |   expect_equal(g_nodes_data$id, est_g_nodes_data$id)
141 |   expect_equal(g_nodes_data$block, est_g_nodes_data$block)
142 | })
143 | 
144 | test_that("control.ergm settings can be passed to the within estimation from hergm function", {
145 |   # Define some settings for testing purposes
146 |   test_burnin <- 9797
147 |   test_interval <- 3434
148 |   test_method <- 'Stepping'
149 | 
150 |   hergm_formula <- g ~ edges + triangle + nodematch("x")
151 | 
152 |   n_nodes <- 100
153 |   n_clusters <- 2
154 | 
155 |   nodes_data <- tibble::tibble(
156 |     node_id = 1:n_nodes,
157 |     x = sample(1:2, size = n_nodes, replace = T),
158 |     block = sample(1:n_clusters, size = n_nodes, replace = T)
159 |   )
160 | 
161 |   g <- network::network.initialize(n = n_nodes)
162 |   network::set.vertex.attribute(g, "x", nodes_data$x)
163 |   list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(g, hergm_formula)
164 | 
165 |   coef_between_block <- c(-3, 1)
166 |   coef_within_block <- c(-2, 0.1, 0.5)
167 | 
168 |   sim_ergm_control <- ergm::control.simulate.formula(
169 |     MCMC.burnin = 4000000,
170 |     MCMC.interval = 200000
171 |   )
172 | 
173 |   g <- lighthergm::simulate_hergm(
174 |     formula_for_simulation = hergm_formula,
175 |     data_for_simulation = nodes_data,
176 |     colname_vertex_id = "node_id",
177 |     colname_block_membership = "block",
178 |     coef_between_block = coef_between_block,
179 |     coef_within_block = coef_within_block,
180 |     ergm_control = sim_ergm_control,
181 |     fast_between_simulation = TRUE,
182 |     list_feature_matrices = list_feature_matrices
183 |   )
184 | 
185 |   hergm_res <- lighthergm::hergm(
186 |     g ~ edges + nodematch("x") + triangle,
187 |     n_clusters = n_clusters,
188 |     n_em_step_max = 10,
189 |     estimate_parameters = T,
190 |     clustering_with_features = T,
191 |     method_second_step = 'MLE',
192 |     control = ergm::control.ergm(
193 |       MCMC.burnin = test_burnin,
194 |       MCMC.interval = test_interval,
195 |       main.method = test_method
196 |     )
197 |   )
198 | 
199 |   used_control <- hergm_res$est_within$control
200 | 
201 |   expect_equal(used_control$MCMC.burnin, test_burnin)
202 |   expect_equal(used_control$MCMC.interval, test_interval)
203 |   expect_equal(used_control$main.method, test_method)
204 | })
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![lighthergm-logo](lighthergm.jpg "Title")
  2 | 
  3 | # `lighthergm`: Fit, Simulate, and Diagnose Hierarchical Exponential-Family Models for Networks in A Scalable Way
  4 | 
  5 | `lighthergm` is an R library for estimating Hierarchical Exponential Random Graph Models (HERGMs) efficiently on large networks. It also contains tools for simulating networks with local dependence and for assessing the goodness-of-fit of the estimates. It is built upon the existing R package `hergm` (Schweinberger and Luna, 2018).
  6 | 
  7 | See [Martínez Dahbura et al. (2021)](https://arxiv.org/abs/2105.12704) for more details on the algorithm and the model.
  8 | 
  9 | ## Table of Contents
 10 | 
 11 | - [Installation](#installation)
 12 | - [Usage](#usage)
 13 | - [Docker](#docker)
 14 | - [Contributing](#contributing)
 15 | - [License](#license)
 16 | - [References](#references)
 17 | 
 18 | ## Installation
 19 | 
 20 | You can install `lighthergm` in several ways.
 21 | 
 22 | 
 23 | ### From GitHub
 24 | 
 25 | Use devtools to install directly from the repository:
 26 | ```r
 27 | devtools::install_github(repo = 'sansan-inc/lighthergm', subdir = '/lighthergm', ref = 'main')
 28 | ```
 29 | This will install the latest stable version of the code. If you need to install a different version, use the name of the target branch as the value for the `ref` argument.
 30 | 
 31 | ### From Source
 32 | Clone this repository and run the following R code from the root directory:
 33 | 
 34 | ```r
 35 | devtools::install("lighthergm")
 36 | ```
 37 | 
 38 | ### Installing on Mac OS
 39 | Some Mac OS users may encounter the following error when installing `lighthergm`:
 40 | 
 41 | ```
 42 | clang: error: unsupported option '-fopenmp'
 43 | ```
 44 | This is because **clang** for Mac OS may not support the **-fopenmp** flag by default.
 45 | 
 46 | To fix this, install [llvm](https://formulae.brew.sh/formula/llvm) from `brew`:
 47 | ```
 48 | brew install llvm
 49 | ```
 50 | 
 51 | Then, edit the **~/.R/Makevars** file in your machine (if it does not exist, create it) including the following line:
 52 | ```
 53 | CXX11 = /usr/local/opt/llvm/bin/clang
 54 | ```
 55 | This indicates R that it should use the right `clang` for compilation. After doing this you should be able to install the library without problems using any of the methods above.
 56 | 
 57 | If you still have issues installing `lighthergm` let us know.
 58 | 
 59 | ## Usage
 60 | 
 61 | See [the vignette](doc/intro-lighthergm.pdf) for a detailed introduction to `lighthergm`.
 62 | 
 63 | You can build the vignette by passing `build_vignettes = TRUE` to your devtools command of choice when installing.
 64 | 
 65 | ## Docker
 66 | 
 67 | You can test-drive `lighthergm` on a virtual environment with R Studio using Docker, if it is installed on your machine. This can be useful if you want to check out the library without installing it on your machine.
 68 | 
 69 | To do it clone the repository and run the following command from its root directory:
 70 | 
 71 | ```
 72 | docker build --tag lighthergm .
 73 | ```
 74 | 
 75 | This will create a Docker image called `lighthergm`. To run a container from that image, set the **PASSWORD** environment variable to any password you want to use for securing your R Studio session.
 76 | 
 77 | ```
 78 | docker run -p 8787:8787 -e PASSWORD=$PASSWORD --name lighthergm --rm lighthergm
 79 | ```
 80 | 
 81 | Now you can open R Studio by directing your browser to `localhost:8787`. Use the username **rstudio** and the value of the **PASSWORD** environment variable as the password. Now you should be able to work on an R Studio session with `lighthergm` installed.
 82 | 
 83 | You can also use **Docker Compose** for running a virtual environment with R Studio and `lighthergm`. Just set the necessary environment variables and run it with `docker-compose up`.
 84 | 
 85 | This will build the image and run a container for you, which you can stop or take down using `docker-compose stop` or `docker-compose down` respectively.
 86 | 
 87 | If you need to store the files you create within the container, uncomment the commented out lines in the `docker-compose.yaml` file and set the following environment variables on your environment:
 88 | 
 89 | - `PASSWORD`: The password you want to set for your R Studio session.
 90 | - `USERID`: Your user id (depends on your OS).
 91 | - `GROUPID`: Your group id. Check your user and group ids by typing `id` on your command line tool.
 92 | - `WORKSPACE_PATH`: A path to a directory where files created inside the container will be stored. What you save in `home/rstudio/workspace` within the container will be stored there.
 93 | 
 94 | You can also set multiple environment variables by creating a file called **.env** in the root directory of the repository with the values of the environment variables. For example:
 95 | 
 96 | ```
 97 | PASSWORD=somepassword
 98 | USERID=1234
 99 | GROUPID=4321
100 | WORKSPACE_PATH=/path/to/your/workspace/
101 | ```
102 | 
103 | If you do so, remember to include the file's path in the `.gitignore` file.
104 | 
105 | The build works on the following Docker setup:
106 | 
107 | ```
108 | Docker Engine version:  19.03.12
109 | docker-compose version: 1.17.1
110 | ```
111 | 
112 | ## Contributing
113 | 
114 | We welcome and encorage contributions in the form of issue reports and pull requests. Current topics where collaboration is especially welcome include, but are not limited to:
115 | 
116 | - Scalability improvements, for example, by avoiding the usage of memory-expensive feature adjacency matrices.
117 | 
118 | - Support for a wider range of models, including continuous covariates, directed networks, etc.
119 | 
120 | - Quality-of-life improvements to facilitate network analysis on a wider range of use cases.
121 | 
122 | ### Creating Issues
123 | 
124 | Feel free to fill an issue with your bug reports, feature suggestions and recommendations for improving the performance of `lighthergm`.
125 | 
126 | Please include all the necessary details. In the case of bug reports, include code and the necessary steps to reproduce it. For feature suggestions, describe in detail the problem they address and include if possible some code to make it easy to discuss.
127 | 
128 | ### Pull Requests
129 | 
130 | We employ gitflow whenever possible. The **develop** branch contains the latest changes, and the **main** branch is for releases.
131 | 
132 | To create a pull request, fork the repository and create a feature branch such as **feature/added-shiny-new-feature** from **develop**.
133 | 
134 | Before making a pull request, please use `devtools::check()` to check that the package can be installed and that the unit tests pass after your changes. We encorage test-oriented development. Adding tests to cover your changes makes it much easier to review you proposal.
135 | 
136 | When done, make a pull request from your fork, including any necessary details to help us understand the nature of your changes.
137 | 
138 | ## License
139 | This package is licensed under GPL-3. See the [license file](lighthergm/LICENSE.md) for details.
140 | 
141 | ## References
142 | - Martínez Dahbura, Juan Nelson, Shota Komatsu, Takanori Nishida, and Angelo Mele (2021), "A Structural Model of Business Card Exchange Networks", Working Paper, Available at https://arxiv.org/abs/2105.12704.
143 | - Schweinberger, Michael and Pamela Luna (2018), "HERGM: Hierarchical Exponential-Family Random Graph Models", Journal of Statistical Software 85(1), 1-39.
144 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-EM-checkpoint.R:
--------------------------------------------------------------------------------
  1 | test_that("Setting a checkpoint for EM iterations works", {
  2 |   set.seed(334)
  3 |   # Simulate a network to work with in this unit test.
  4 |   # Number of nodes
  5 |   N <- 1000
  6 |   # Number of blocks
  7 |   K <- 50
  8 |   # Block memberships (same block size)
  9 |   memb <- rep(1:K, each = N / K)
 10 |   # Covariates
 11 |   x <- sample(1:10, size = N, replace = TRUE)
 12 |   y <- sample(1:10, size = N, replace = TRUE)
 13 | 
 14 |   # Within-block parameters: edges, nodematch("x"), nodematch("y"), triangle
 15 |   list_within_params <- c(-1, 1, 1, 0.5)
 16 |   # Between-block parameters: edges, nodematch("x"), nodematch("y")
 17 |   list_between_params <- c(-3.5, 0.5, 0.5)
 18 | 
 19 |   formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle
 20 | 
 21 |   vertex_id <- 1:N
 22 | 
 23 |   df <- tibble::tibble(
 24 |     id = vertex_id,
 25 |     memb = memb,
 26 |     x = x,
 27 |     y = y
 28 |   )
 29 | 
 30 |   g_sim <-
 31 |     simulate_hergm(
 32 |       formula_for_simulation = formula,
 33 |       data_for_simulation = df,
 34 |       colname_vertex_id = "id",
 35 |       colname_block_membership = "memb",
 36 |       coef_within_block = list_within_params,
 37 |       coef_between_block = list_between_params,
 38 |       ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000),
 39 |       seed = 1,
 40 |       n_sim = 1,
 41 |       directed = FALSE,
 42 |       output = "network"
 43 |     )
 44 | 
 45 |   ############# 1. Clustering with features ##############################
 46 |   # Conduct clustering at once
 47 |   initial_weight <- 1000
 48 | 
 49 |   cluster_with_feature <-
 50 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
 51 |       n_clusters = K,
 52 |       estimate_parameters = TRUE,
 53 |       verbose = 0,
 54 |       n_em_step_max = 10,
 55 |       initialization_method = 3,
 56 |       infomap_python = FALSE,
 57 |       clustering_with_features = TRUE,
 58 |       check_alpha_update = TRUE,
 59 |       compute_pi = TRUE,
 60 |       check_lower_bound = TRUE,
 61 |       check_block_membership = TRUE,
 62 |       weight_for_initialization = initial_weight,
 63 |       seeds = 334
 64 |     )
 65 | 
 66 |   # Conduct clustering in two steps
 67 |   first_step <-
 68 |     lighthergm::hergm(
 69 |       object = g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
 70 |       n_clusters = K,
 71 |       estimate_parameters = TRUE,
 72 |       verbose = 0,
 73 |       n_em_step_max = 7,
 74 |       initialization_method = 3,
 75 |       infomap_python = FALSE,
 76 |       clustering_with_features = TRUE,
 77 |       check_block_membership = TRUE,
 78 |       weight_for_initialization = initial_weight,
 79 |       check_alpha_update = TRUE,
 80 |       seeds = 334
 81 |     )
 82 | 
 83 |   second_step <-
 84 |     lighthergm::hergm(
 85 |       object = first_step,
 86 |       n_em_step_max = 3
 87 |     )
 88 | 
 89 |   # Check if the calculated lower bounds are identical (both length and values)
 90 |   expect_equal(cluster_with_feature$EM_lower_bound, second_step$EM_lower_bound)
 91 | 
 92 |   # Check if block memberships are identical over iterations (both length and Yule's coefficient)
 93 |   expect_true(all(unlist(purrr::map2(cluster_with_feature$EM_list_z, second_step$EM_list_z, compute_yule_coef) == rep(1, length(second_step$EM_list_z)))))
 94 | 
 95 |   # The partition should be the same at the end of the second estimation as the one obtained after running 10 iterations of the EM algorithm
 96 |   expect_equal(compute_yule_coef(cluster_with_feature$partition, second_step$partition), 1)
 97 | 
 98 |   # Check if alphas are identical over iterations
 99 |   for (i in 1:length(cluster_with_feature$EM_list_alpha)) {
100 |     expect_equal(cluster_with_feature$EM_list_alpha[[i]], second_step$EM_list_alpha[[i]], check.attribute = FALSE, tolerance = 1e-2)
101 |   }
102 |   # Check if the alpha after the second checkpoint is the same as the alpha when performing estimation with 10 EM iterations without a checkpoint.
103 |   expect_equal(cluster_with_feature$alpha, second_step$alpha)
104 | 
105 |   # Check if estimated coefficients with and without a checkpoint are identical.
106 |   expect_equal(coef(cluster_with_feature$est_between), coef(second_step$est_between), tolerance = 1e-10)
107 |   expect_equal(coef(cluster_with_feature$est_within), coef(second_step$est_within), tolerance = 1e-10)
108 | 
109 | 
110 |   ############# 2. Clustering without features ##############################
111 |   # Conduct clustering at once
112 |   initial_weight <- 1000
113 | 
114 |   cluster_without_feature <-
115 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
116 |       n_clusters = K,
117 |       estimate_parameters = TRUE,
118 |       verbose = 0,
119 |       n_em_step_max = 10,
120 |       initialization_method = 3,
121 |       infomap_python = FALSE,
122 |       clustering_with_features = FALSE,
123 |       check_alpha_update = TRUE,
124 |       compute_pi = TRUE,
125 |       check_lower_bound = TRUE,
126 |       check_block_membership = TRUE,
127 |       weight_for_initialization = initial_weight,
128 |       seeds = 334
129 |     )
130 | 
131 |   # Conduct clustering in two steps
132 |   first_step_without_feature <-
133 |     lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles,
134 |       n_clusters = K,
135 |       estimate_parameters = TRUE,
136 |       verbose = 0,
137 |       n_em_step_max = 3,
138 |       initialization_method = 3,
139 |       infomap_python = FALSE,
140 |       clustering_with_features = FALSE,
141 |       check_alpha_update = TRUE,
142 |       compute_pi = TRUE,
143 |       check_lower_bound = TRUE,
144 |       check_block_membership = TRUE,
145 |       weight_for_initialization = initial_weight,
146 |       seeds = 334
147 |     )
148 | 
149 |   second_step_without_feature <-
150 |     lighthergm::hergm(
151 |       object = first_step_without_feature,
152 |       n_em_step_max = 7
153 |     )
154 | 
155 |   # Check if the calculated lower bounds are identical (both length and values)
156 |   expect_equal(cluster_without_feature$EM_lower_bound, second_step_without_feature$EM_lower_bound)
157 | 
158 |   # Check if block memberships are identical over iterations (both length and Yule's coefficient)
159 |   expect_true(all(unlist(purrr::map2(cluster_without_feature$EM_list_z, second_step_without_feature$EM_list_z, compute_yule_coef) ==
160 |     rep(1, length(second_step_without_feature$EM_list_z)))))
161 | 
162 |   # The partition should be the same at the end of the second estimation as the one obtained after running 10 iterations of the EM algorithm
163 |   expect_equal(compute_yule_coef(cluster_without_feature$partition, second_step_without_feature$partition), 1)
164 | 
165 |   # Check if alphas are identical over iterations
166 |   for (i in 1:length(cluster_without_feature$EM_list_alpha)) {
167 |     expect_equal(cluster_without_feature$EM_list_alpha[[i]], second_step_without_feature$EM_list_alpha[[i]], check.attribute = FALSE, tolerance = 1e-2)
168 |   }
169 |   # Check if the alpha after the second checkpoint is the same as the alpha when performing estimation with 10 EM iterations without a checkpoint.
170 |   expect_equal(cluster_without_feature$alpha, second_step_without_feature$alpha)
171 | 
172 |   # Check if estimated coefficients with and without a checkpoint are identical.
173 |   expect_equal(coef(cluster_without_feature$est_between), coef(second_step_without_feature$est_between), tolerance = 1e-10)
174 |   expect_equal(coef(cluster_without_feature$est_within), coef(second_step_without_feature$est_within), tolerance = 1e-10)
175 | })
176 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-compute_lower_bound.R:
--------------------------------------------------------------------------------
  1 | test_that("computing the lower bound works", {
  2 |   set.seed(12345)
  3 |   ####################
  4 |   # Setup
  5 |   ####################
  6 |   # Number of nodes
  7 |   N <- 12
  8 |   # Number of clusters
  9 |   K <- 3
 10 | 
 11 |   # Create an adjacency matrix
 12 |   edgelist <-
 13 |     tibble::tibble(
 14 |       tail = 1:N,
 15 |       head = 1:N
 16 |     ) %>%
 17 |     tidyr::expand(tail, head) %>%
 18 |     dplyr::filter(tail < head) %>%
 19 |     dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>%
 20 |     dplyr::filter(connect == 1)
 21 | 
 22 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
 23 |   adj <- network::as.matrix.network.adjacency(net)
 24 |   adj <- as(adj, "dgCMatrix")
 25 | 
 26 |   # Create feature matrices
 27 |   x <- as.integer(unlist(purrr::rbernoulli(n = N)))
 28 |   S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 29 |   S <- as(S, "dgCMatrix")
 30 |   for (i in 1:N) {
 31 |     for (j in 1:N) {
 32 |       if (i != j) {
 33 |         s_ij <- ifelse(x[i] == x[j], 1, 0)
 34 |         S[i, j] <- s_ij
 35 |       }
 36 |     }
 37 |   }
 38 | 
 39 |   y <- as.integer(unlist(purrr::rbernoulli(n = N)))
 40 |   V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 41 |   V <- as(V, "dgCMatrix")
 42 |   for (i in 1:N) {
 43 |     for (j in 1:N) {
 44 |       if (i != j) {
 45 |         v_ij <- ifelse(y[i] == y[j], 1, 0)
 46 |         V[i, j] <- v_ij
 47 |       }
 48 |     }
 49 |   }
 50 | 
 51 |   z <- as.integer(unlist(purrr::rbernoulli(n = N)))
 52 |   W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N))
 53 |   W <- as(W, "dgCMatrix")
 54 |   for (i in 1:N) {
 55 |     for (j in 1:N) {
 56 |       if (i != j) {
 57 |         w_ij <- ifelse(z[i] == z[j], 1, 0)
 58 |         W[i, j] <- w_ij
 59 |       }
 60 |     }
 61 |   }
 62 | 
 63 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
 64 |   tau <-
 65 |     matrix(c(
 66 |       0.2, 0.5, 0.3,
 67 |       0.4, 0.4, 0.2,
 68 |       0.1, 0.4, 0.5,
 69 |       0.4, 0.4, 0.2,
 70 |       0.1, 0.1, 0.8,
 71 |       0.05, 0.05, 0.9,
 72 |       0.8, 0.1, 0.1,
 73 |       0.3, 0.4, 0.3,
 74 |       0.1, 0.8, 0.1,
 75 |       0.5, 0.4, 0.1,
 76 |       0.3, 0.3, 0.4,
 77 |       0.8, 0.1, 0.1
 78 |     ),
 79 |     nrow = K, ncol = N
 80 |     )
 81 |   tau <- t(tau)
 82 | 
 83 |   # Compute gamma (parameter of multinomial distribution)
 84 |   alpha <- colMeans(tau)
 85 | 
 86 |   ###########################################################
 87 |   # Compute the lower bound in a naive way
 88 |   ###########################################################
 89 |   # Compute pi for D_ij = 1
 90 |   minPi <- 1e-4
 91 |   list_pi <- list()
 92 |   for (w in 0:1) {
 93 |     for (v in 0:1) {
 94 |       for (s in 0:1) {
 95 |         print(glue::glue("Compute pi for pi_s{s}v{v}w{w}"))
 96 |         denom <- matrix(0, nrow = K, ncol = K)
 97 |         num <- matrix(0, nrow = K, ncol = K)
 98 |         index <- s + 2 * v + 4 * w + 1
 99 |         print(index)
100 |         for (k in 1:K) {
101 |           for (l in 1:K) {
102 |             for (i in 1:N) {
103 |               for (j in 1:N) {
104 |                 if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
105 |                   denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l]
106 |                 }
107 |                 if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) {
108 |                   num[k, l] <- num[k, l] + tau[i, k] * tau[j, l]
109 |                 }
110 |               }
111 |             }
112 |           }
113 |         }
114 |         pi <- num / denom
115 |         # Remove extremely small elements in pi
116 |         for (k in 1:K) {
117 |           for (l in 1:K) {
118 |             if (pi[k, l] < minPi) {
119 |               pi[k, l] <- minPi
120 |             }
121 |           }
122 |         }
123 |         list_pi[[index]] <- pi
124 |       }
125 |     }
126 |   }
127 | 
128 |   # Compute the true lower bound
129 |   LB_true <- 0
130 |   # First term
131 |   for (i in 1:N) {
132 |     for (j in 1:N) {
133 |       if (i != j) {
134 |         # For each ij, determine which pi must be used.
135 |         index_ij <- S[i, j] + 2 * V[i, j] + 4 * W[i, j] + 1
136 |         pi_ij <- list_pi[[index_ij]]
137 |         # if D_ij = 0, replace pi with 1 - pi.
138 |         if (adj[i, j] == 0) {
139 |           pi_ij <- 1 - pi_ij
140 |         }
141 |         for (k in 1:K) {
142 |           for (l in 1:K) {
143 |             LB_true <- LB_true + tau[i, k] * tau[j, l] * log(pi_ij[k, l])
144 |           }
145 |         }
146 |       }
147 |     }
148 |   }
149 | 
150 |   # Second term
151 |   for (i in 1:N) {
152 |     for (k in 1:K) {
153 |       LB_true <- LB_true + tau[i, k] * (log(alpha[k]) - log(tau[i, k]))
154 |     }
155 |   }
156 | 
157 |   ###########################################################
158 |   # Compute the lower bound using the cpp function
159 |   ###########################################################
160 |   list_feature_adjmat <- list(S, V, W)
161 |   list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat)
162 |   denom <- get_matrix_for_denominator(N, list_feature_adjmat)
163 |   list_multiplied_feature_adjmat[[1]] <- denom
164 | 
165 |   alpha_LB <- run_EM_with_features(N, K, alpha, list_multiplied_feature_adjmat, tau, verbose = 2)
166 | 
167 |   # Check if it works
168 |   expect_equal(alpha_LB[[2]], LB_true, tolerance = 1e-10)
169 | })
170 | 
171 | 
172 | test_that("computing the lower bound without features works", {
173 | 
174 |   ####################
175 |   # Setup
176 |   ####################
177 |   # Number of nodes
178 |   N <- 12
179 |   # Number of clusters
180 |   K <- 3
181 | 
182 |   # Create an adjacency matrix
183 |   edgelist <-
184 |     tibble::tibble(
185 |       tail = 1:N,
186 |       head = 1:N
187 |     ) %>%
188 |     tidyr::expand(tail, head) %>%
189 |     dplyr::filter(tail < head) %>%
190 |     dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>%
191 |     dplyr::filter(connect == 1)
192 | 
193 |   net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
194 |   adj <- network::as.matrix.network.adjacency(net)
195 |   adj <- as(adj, "dgCMatrix")
196 | 
197 |   # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k.
198 |   tau <-
199 |     matrix(c(
200 |       0.2, 0.5, 0.3,
201 |       0.4, 0.4, 0.2,
202 |       0.1, 0.4, 0.5,
203 |       0.4, 0.4, 0.2,
204 |       0.1, 0.1, 0.8,
205 |       0.05, 0.05, 0.9,
206 |       0.8, 0.1, 0.1,
207 |       0.3, 0.4, 0.3,
208 |       0.1, 0.8, 0.1,
209 |       0.5, 0.4, 0.1,
210 |       0.3, 0.3, 0.4,
211 |       0.8, 0.1, 0.1
212 |     ),
213 |     nrow = K, ncol = N
214 |     )
215 |   tau <- t(tau)
216 | 
217 |   # Compute gamma (parameter of multinomial distribution)
218 |   alpha <- colMeans(tau)
219 | 
220 |   ###########################################################
221 |   # Compute the lower bound in a naive way
222 |   ###########################################################
223 |   # Compute pi for D_ij = 1
224 |   minPi <- 1e-4
225 |   denom <- matrix(0, nrow = K, ncol = K)
226 |   num <- matrix(0, nrow = K, ncol = K)
227 |   for (k in 1:K) {
228 |     for (l in 1:K) {
229 |       for (i in 1:N) {
230 |         for (j in 1:N) {
231 |           if (i != j) {
232 |             denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l]
233 |             if (i != j & adj[i, j] == 1) {
234 |               num[k, l] <- num[k, l] + tau[i, k] * tau[j, l]
235 |             }
236 |           }
237 |         }
238 |       }
239 |     }
240 |   }
241 |   pi <- num / denom
242 | 
243 |   # Remove extremely small elements in pi
244 |   for (k in 1:K) {
245 |     for (l in 1:K) {
246 |       if (pi[k, l] < minPi) {
247 |         pi[k, l] <- minPi
248 |       }
249 |     }
250 |   }
251 | 
252 | 
253 |   # Compute the true lower bound
254 |   LB_true <- 0
255 |   # First term
256 |   for (i in 1:N) {
257 |     for (j in 1:N) {
258 |       if (i != j) {
259 |         # if D_ij = 0, replace pi with 1 - pi.
260 |         if (adj[i, j] == 0) {
261 |           pi_ij <- 1 - pi
262 |         } else {
263 |           pi_ij <- pi
264 |         }
265 |         for (k in 1:K) {
266 |           for (l in 1:K) {
267 |             LB_true <- LB_true + tau[i, k] * tau[j, l] * log(pi_ij[k, l])
268 |           }
269 |         }
270 |       }
271 |     }
272 |   }
273 | 
274 |   # Second term
275 |   for (i in 1:N) {
276 |     for (k in 1:K) {
277 |       LB_true <- LB_true + tau[i, k] * (log(alpha[k]) - log(tau[i, k]))
278 |     }
279 |   }
280 | 
281 |   ###########################################################
282 |   # Compute the lower bound using the cpp function
283 |   ###########################################################
284 |   alpha_LB <- run_EM_without_features(N, K, alpha, tau, adj)
285 |   # Check if it works
286 |   expect_equal(alpha_LB[[2]], LB_true, tolerance = 1e-10)
287 | })
288 | 


--------------------------------------------------------------------------------
/lighthergm/R/estimate_within_params.R:
--------------------------------------------------------------------------------
  1 | #' Estimate a within-block network model.
  2 | #' @importFrom parallel mclapply
  3 | #' @importFrom magrittr %>%
  4 | #' @importFrom ergm ergm
  5 | #' @importFrom foreach foreach %do% %dopar%
  6 | #' @param formula a within network formula
  7 | #' @param network a network object
  8 | #' @param z_memb block memberships for each node
  9 | #' @param number_cores The number of CPU cores to use.
 10 | #' @param verbose A logical or an integer: if this is TRUE/1,
 11 | #' the program will print out additional information about the progress of estimation and simulation.
 12 | #' @param seeds seed value (integer) for the random number generator
 13 | #' @param method_second_step If "MPLE" (the default), then the maximum pseudolikelihood estimator is returned.
 14 | #' If "MLE", then an approximate maximum likelihood estimator is returned.
 15 | #' @param offset_coef a vector of model parameters to be fixed when estimation.(i.e., not estimated).
 16 | #' @param ... Additional arguments, to be passed to lower-level functions
 17 | #' @importFrom rlang %||%
 18 | #' @export
 19 | estimate_within_params <-
 20 |   function(formula,
 21 |            network,
 22 |            z_memb,
 23 |            number_cores = 1,
 24 |            verbose = 1,
 25 |            seeds = NULL,
 26 |            method_second_step = c("MPLE", "MLE"),
 27 |            offset_coef = NULL,
 28 |            ...) {
 29 | 
 30 |     varargs <- list(...)
 31 |     # Store block structure in a tibble
 32 |     block_structure <-
 33 |       tibble::tibble(
 34 |         node_id = network::network.vertex.names(network),
 35 |         block = z_memb
 36 |       )
 37 | 
 38 |     # Get number of clusters
 39 |     all_clusters <- names(table(factor(block_structure$block)))
 40 | 
 41 |     # Get an edgelist and vertex attributes from the network object using intergraph::asDF
 42 |     list_edgelist <- intergraph::asDF(network)
 43 | 
 44 |     # Get vertex attributes.
 45 |     vertex_attr <- list_edgelist$vertexes %>%
 46 |       dplyr::select(-intergraph_id)
 47 | 
 48 |     # Extract vertex names from the network
 49 |     vertex_name <- list_edgelist$vertexes %>%
 50 |       dplyr::select(intergraph_id, vertex.names)
 51 | 
 52 |     # Construct an edgelist whose source_id and target_ids correspond to the vertex names of the network.
 53 |     # If you use network::as.edgelist instead, the original vertex names are not kept. That causes a problem when you convert an edgelist into a network object.
 54 |     edgelist <-
 55 |       list_edgelist$edges %>%
 56 |       dplyr::select(V1, V2) %>%
 57 |       dplyr::left_join(., vertex_name, by = c("V1" = "intergraph_id")) %>%
 58 |       dplyr::rename(source_id = vertex.names) %>%
 59 |       dplyr::select(V2, source_id) %>%
 60 |       dplyr::left_join(., vertex_name, by = c("V2" = "intergraph_id")) %>%
 61 |       dplyr::rename(target_id = vertex.names) %>%
 62 |       dplyr::select(source_id, target_id) %>%
 63 |       tibble::tibble()
 64 | 
 65 |     # Get a sparse adjacency matrix for each block. Store them in a list.
 66 |     # This computation might be unstable when the network is large. Check this point later.
 67 |     # For Windows users
 68 |     if (Sys.info()[["sysname"]] == "Windows") {
 69 |       # Preparation for parallel computing using foreach
 70 |       cluster <- parallel::makeCluster(number_cores, type = "PSOCK")
 71 |       doParallel::registerDoParallel(cluster)
 72 |       # Start computation
 73 |       block_net <- foreach(k = all_clusters) %dopar% {
 74 |         # Get a subgraph whose vertices belong to block k.
 75 |         subnet <- get_induced_subgraph(block_structure, edgelist, k)
 76 |         # Keep vertex ids
 77 |         vertex_id <- network::network.vertex.names(subnet)
 78 |         # Convert the subgraph into a sparse adjacency matrix.
 79 |         sub_net <- as_sparse_adj(subnet)
 80 |         # Make a block attribute.
 81 |         # The length of this list must be the same with the number of vertices of the subgraph.
 82 |         block_attr <- rep(k, length(which(z_memb == k)))
 83 |         # Return the objects as a list.
 84 |         return(list(net = sub_net, id = vertex_id, block = block_attr))
 85 |       }
 86 |       parallel::stopCluster(cluster)
 87 |     }
 88 |     # For non-Windows users
 89 |     else {
 90 |       block_net <- mclapply(all_clusters, function(k) {
 91 |         # Get a subgraph whose vertices belong to block k.
 92 |         subnet <- get_induced_subgraph(block_structure, edgelist, k)
 93 |         # Keep vertex ids
 94 |         vertex_id <- network::network.vertex.names(subnet)
 95 |         # Convert the subgraph into a sparse adjacency matrix.
 96 |         sub_net <- as_sparse_adj(subnet)
 97 |         # Make a block attribute.
 98 |         # The length of this list must be the same with the number of vertices of the subgraph.
 99 |         block_attr <- rep(k, length(which(z_memb == k)))
100 |         # Return the objects as a list.
101 |         list(net = sub_net, id = vertex_id, block = block_attr)
102 |       },
103 |       mc.cores = number_cores
104 |       )
105 |     }
106 | 
107 |     # Extract info on vertex id.
108 |     vertex_id <- unlist(c(block_net %>%
109 |       purrr::map(function(x) {
110 |         x$id
111 |       })))
112 | 
113 |     # Extract info on which node belongs to which block.
114 |     block_attr <- unlist(c(block_net %>%
115 |       purrr::map(function(x) {
116 |         x$block
117 |       })))
118 | 
119 |     # Create a sparse adjacency matrix that only considers within-block connections.
120 |     block_net <- Matrix::bdiag(block_net %>%
121 |       purrr::map(function(x) {
122 |         x$net
123 |       }))
124 | 
125 |     # Convert the sparse adjacency matrix into a network object, called "block_net".
126 |     edgelist <-
127 |       Matrix::summary(block_net) %>%
128 |       dplyr::select(i, j) %>%
129 |       as.matrix()
130 | 
131 |     attr(edgelist, "n") <- length(vertex_id)
132 |     attr(edgelist, "vnames") <- vertex_id
133 |     attr(edgelist, "directed") <- FALSE
134 |     attr(edgelist, "bipartite") <- FALSE
135 |     attr(edgelist, "loops") <- FALSE
136 |     attr(edgelist, "class") <- c("matrix", "edgelist")
137 | 
138 |     block_net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE)
139 | 
140 |     # Attach vertex id
141 |     network::network.vertex.names(block_net) <- vertex_id
142 | 
143 |     # Attach vertex attributes
144 |     df_vertex_attr <-
145 |       tibble::tibble(vertex.names = vertex_id) %>%
146 |       dplyr::left_join(., vertex_attr, by = "vertex.names")
147 | 
148 |     # Remove non-vertex-attribute columns
149 |     df_vertex_attr <-
150 |       df_vertex_attr %>%
151 |       dplyr::select(-vertex.names)
152 | 
153 |     ## This part could be written in a much better way without foreach?
154 |     df_vertex_attr_colnames <- colnames(df_vertex_attr)
155 |     foreach(i = 1:ncol(df_vertex_attr)) %do% {
156 |       network::set.vertex.attribute(x = block_net, attrname = df_vertex_attr_colnames[i], value = df_vertex_attr[[i]])
157 |     }
158 | 
159 |     # Attach block attributes
160 |     network::set.vertex.attribute(block_net, "block", block_attr)
161 | 
162 |     # Re-arrange the formula in such a way that the LHS is block_net.
163 |     formula_terms <- as.character(formula)[3]
164 | 
165 |     # If within-block parameters are fixed, need to include `offset` in the formula
166 |     if (!is.null(offset_coef)) {
167 |       second_step_rhs <- as.character(formula)[3]
168 |       second_step_rhs <- unlist(stringr::str_split(string = second_step_rhs, pattern = " \\+ "))
169 |       # Extract offset terms and wrap them by `offset()`
170 |       offset_terms <- second_step_rhs[stringr::str_detect(string = second_step_rhs, pattern = "\"")]
171 |       offset_terms <- glue::glue("offset({offset_terms})")
172 |       # Extract within-block parameters to be estimated
173 |       within_terms <- second_step_rhs[!stringr::str_detect(string = second_step_rhs, pattern = "\"")]
174 |       # Combine all
175 |       formula_terms <- stringr::str_c(c(within_terms, offset_terms), collapse = " + ")
176 |     }
177 | 
178 |     formula <- as.formula(glue::glue("block_net ~ {formula_terms}"))
179 | 
180 |     # Estimate the within-block parameters
181 |     # The default estimation method is "MPLE", but you can select "MLE" if you like.
182 | 
183 |     # %||% extracts the value on the left with a default value if null
184 |     control <- varargs$control %||% ergm::control.ergm()
185 | 
186 |     model_est <- ergm(
187 |       formula = formula,
188 |       constraints = ~ blockdiag("block"),
189 |       estimate = method_second_step,
190 |       offset.coef = offset_coef,
191 |       control = control
192 |     )
193 | 
194 |     # Remove unnecessary network objects
195 |     model_est$newnetwork <- NULL
196 | 
197 |     return(model_est)
198 |   }
199 | 
200 | # ------------------------------------------------------------------------
201 | # -------------- Auxiliary functions -------------------------------------
202 | # ------------------------------------------------------------------------
203 | 
204 | #' Get a sparse adjacency matrix from a network object
205 | #' @param net a network object
206 | as_sparse_adj <- function(net) {
207 |   n_nodes <- as.integer(net$gal$n)
208 |   net <- network::as.edgelist(net)
209 |   net <- Matrix::sparseMatrix(
210 |     i = net[, 1],
211 |     j = net[, 2],
212 |     x = 1,
213 |     dims = c(n_nodes, n_nodes),
214 |     symmetric = TRUE
215 |   )
216 |   return(net)
217 | }
218 | 
219 | 
220 | get_induced_subgraph <- function(block_structure, edgelist, searched_block) {
221 |   # Get the relevant nodes for the searched block
222 |   block_nodes <-
223 |     block_structure %>%
224 |     dplyr::filter(block == searched_block) %>%
225 |     .$node_id
226 | 
227 |   # Get the number of nodes in the block
228 |   n_nodes_in_block <- length(block_nodes)
229 | 
230 |   # Get only the edges where nodes on both sides share the searched block membership
231 |   subgraph <-
232 |     edgelist %>%
233 |     dplyr::filter(source_id %in% block_nodes & target_id %in% block_nodes)
234 | 
235 |   # Keep only the ID attributes
236 |   subgraph <-
237 |     subgraph %>%
238 |     dplyr::select(source_id, target_id) %>%
239 |     as.matrix()
240 | 
241 |   attr(subgraph, "n") <- n_nodes_in_block
242 |   attr(subgraph, "vnames") <- block_nodes
243 |   attr(subgraph, "directed") <- FALSE
244 |   attr(subgraph, "bipartite") <- FALSE
245 |   attr(subgraph, "loops") <- FALSE
246 |   attr(subgraph, "class") <- c("matrix", "edgelist")
247 | 
248 |   if (nrow(subgraph) == 0) {
249 |     subgraph <- network::network.initialize(n = n_nodes_in_block, directed = FALSE)
250 |     network::network.vertex.names(subgraph) <- block_nodes
251 |     return(subgraph)
252 |   } else {
253 |     # Return the subnet
254 |     subgraph <-
255 |       network::network(subgraph, directed = FALSE, matrix.type = "edgelist")
256 | 
257 |     return(subgraph)
258 |   }
259 | }
260 | 


--------------------------------------------------------------------------------
/lighthergm/tests/testthat/test-gof.R:
--------------------------------------------------------------------------------
  1 | get_dummy_net <- function(n_nodes, n_clusters, em_iters = 10) {
  2 |   hergm_formula <- g ~ edges + triangle + nodematch("x")
  3 | 
  4 |   nodes_data <- tibble::tibble(
  5 |     node_id = 1:n_nodes,
  6 |     x = sample(1:2, size = n_nodes, replace = T),
  7 |     block = sample(1:n_clusters, size = n_nodes, replace = T)
  8 |   )
  9 | 
 10 |   g <- network::network.initialize(n = n_nodes)
 11 |   network::set.vertex.attribute(g, "x", nodes_data$x)
 12 |   list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(g, hergm_formula)
 13 | 
 14 |   coef_between_block <- c(-3, 1)
 15 |   coef_within_block <- c(-2, 0.1, 0.5)
 16 | 
 17 |   sim_ergm_control <- ergm::control.simulate.formula(
 18 |     MCMC.burnin = 4000000,
 19 |     MCMC.interval = 200000
 20 |   )
 21 | 
 22 |   g <- lighthergm::simulate_hergm(
 23 |     formula_for_simulation = hergm_formula,
 24 |     data_for_simulation = nodes_data,
 25 |     colname_vertex_id = "node_id",
 26 |     colname_block_membership = "block",
 27 |     coef_between_block = coef_between_block,
 28 |     coef_within_block = coef_within_block,
 29 |     ergm_control = sim_ergm_control,
 30 |     fast_between_simulation = TRUE,
 31 |     list_feature_matrices = list_feature_matrices
 32 |   )
 33 | 
 34 |   hergm_res <- lighthergm::hergm(
 35 |     g ~ edges + nodematch("x") + triangle,
 36 |     n_clusters = n_clusters,
 37 |     n_em_step_max = em_iters,
 38 |     estimate_parameters = T,
 39 |     clustering_with_features = T
 40 |   )
 41 | 
 42 |   list(
 43 |     hergm_res = hergm_res,
 44 |     g = g,
 45 |     nodes_data = nodes_data,
 46 |     K = n_clusters,
 47 |     list_feature_matrices = list_feature_matrices,
 48 |     vertex_id_var = "node_id",
 49 |     block_id_var = "block",
 50 |     ergm_control = sim_ergm_control
 51 |   )
 52 | }
 53 | 
 54 | 
 55 | test_that("Returned GOF dataframe has the correct fields", {
 56 |   sim <- get_dummy_net(50, 2)
 57 |   g <- sim$g
 58 | 
 59 |   test_gof_res <- lighthergm::gof_lighthergm(
 60 |     g,
 61 |     list_feature_matrices = sim$list_feature_matrices,
 62 |     data_for_simulation = sim$nodes_data,
 63 |     colname_vertex_id = sim$vertex_id_var,
 64 |     colname_block_membership = sim$block_id_var,
 65 |     lighthergm_results = sim$hergm_res,
 66 |     ergm_control = sim$ergm_control,
 67 |     n_sim = 3
 68 |   )
 69 | 
 70 |   for (stat_type in c("original", "simulated")) {
 71 |     stats <- test_gof_res[[stat_type]]
 72 |     expect_false(is.null(stats))
 73 |     for (stat in c("network_stats", "degree_dist", "esp_dist")) {
 74 |       expect_false(is.null(stats[[stat]]))
 75 |     }
 76 |     expect_true(is.null(stats[["geodesic_dist"]]))
 77 |   }
 78 | })
 79 | 
 80 | test_that("GOF network stats have the right fields and terms", {
 81 |   sim <- get_dummy_net(50, 2)
 82 |   g <- sim$g
 83 | 
 84 |   test_gof_res <- lighthergm::gof_lighthergm(
 85 |     g,
 86 |     list_feature_matrices = sim$list_feature_matrices,
 87 |     data_for_simulation = sim$nodes_data,
 88 |     colname_vertex_id = sim$vertex_id_var,
 89 |     colname_block_membership = sim$block_id_var,
 90 |     lighthergm_results = sim$hergm_res,
 91 |     ergm_control = sim$ergm_control,
 92 |     n_sim = 3
 93 |   )
 94 | 
 95 |   expected_terms <- ergm::ergm_model(sim$hergm_res$est_within$formula)$terms %>%
 96 |     purrr::map(function(t) {
 97 |       `$`(t, name)
 98 |     })
 99 |   unlist
100 | 
101 |   for (stat_type in c("original", "simulated")) {
102 |     stat_type_df <- test_gof_res[[stat_type]]
103 |     actual_terms <- colnames(stat_type_df$network_stats)
104 | 
105 |     actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>%
106 |       setdiff(c("value", "stat")) %>%
107 |       length() %>%
108 |       expect_equal(0)
109 | 
110 |     stat_type_df$network_stats$stat %>%
111 |       unique() %>%
112 |       stringr::str_replace("[.].*", "") %>%
113 |       setdiff(expected_terms) %>%
114 |       length() %>%
115 |       expect_equal(0)
116 |   }
117 | })
118 | 
119 | test_that("GOF degree stats have the right fields and terms", {
120 |   sim <- get_dummy_net(50, 2)
121 |   g <- sim$g
122 | 
123 |   test_gof_res <- lighthergm::gof_lighthergm(
124 |     g,
125 |     list_feature_matrices = sim$list_feature_matrices,
126 |     data_for_simulation = sim$nodes_data,
127 |     colname_vertex_id = sim$vertex_id_var,
128 |     colname_block_membership = sim$block_id_var,
129 |     lighthergm_results = sim$hergm_res,
130 |     ergm_control = sim$ergm_control,
131 |     n_sim = 3
132 |   )
133 | 
134 |   for (stat_type in c("original", "simulated")) {
135 |     stat_type_df <- test_gof_res[[stat_type]]
136 |     actual_terms <- colnames(stat_type_df$degree_dist)
137 | 
138 |     actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>%
139 |       setdiff(c("degree", "share")) %>%
140 |       length() %>%
141 |       expect_equal(0)
142 | 
143 |     expect_lte(max(stat_type_df$degree_dist$degree), g$gal$n)
144 |     expect(
145 |       min(stat_type_df$degree_dist$share) >= 0 && max(stat_type_df$degree_dist$share) <= 1,
146 |       failure_message = "Some degree shares are out of bounds"
147 |     )
148 |   }
149 | })
150 | 
151 | test_that("GOF esp stats have the right fields and terms", {
152 |   sim <- get_dummy_net(50, 2)
153 |   g <- sim$g
154 | 
155 |   test_gof_res <- lighthergm::gof_lighthergm(
156 |     g,
157 |     list_feature_matrices = sim$list_feature_matrices,
158 |     data_for_simulation = sim$nodes_data,
159 |     colname_vertex_id = sim$vertex_id_var,
160 |     colname_block_membership = sim$block_id_var,
161 |     lighthergm_results = sim$hergm_res,
162 |     ergm_control = sim$ergm_control,
163 |     n_sim = 3
164 |   )
165 | 
166 |   for (stat_type in c("original", "simulated")) {
167 |     stat_type_df <- test_gof_res[[stat_type]]
168 |     actual_terms <- colnames(stat_type_df$esp_dist)
169 |     actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>%
170 |       setdiff(c("label", "esp")) %>%
171 |       length() %>%
172 |       expect_equal(0)
173 | 
174 |     expect_lte(max(stat_type_df$esp_dist$label), min(g$gal$n, 10))
175 |     expect(
176 |       min(stat_type_df$esp_dist$esp) >= 0 && max(stat_type_df$esp_dist$esp) <= (g$gal$n^2),
177 |       failure_message = "Some esp counts are out of bounds."
178 |     )
179 |   }
180 | })
181 | 
182 | test_that("GOF geodesic distance is returned when requested", {
183 |   sim <- get_dummy_net(50, 2)
184 |   g <- sim$g
185 | 
186 |   test_gof_res <- lighthergm::gof_lighthergm(
187 |     g,
188 |     list_feature_matrices = sim$list_feature_matrices,
189 |     data_for_simulation = sim$nodes_data,
190 |     colname_vertex_id = sim$vertex_id_var,
191 |     colname_block_membership = sim$block_id_var,
192 |     lighthergm_results = sim$hergm_res,
193 |     ergm_control = sim$ergm_control,
194 |     n_sim = 3,
195 |     compute_geodesic_distance = TRUE
196 |   )
197 | 
198 |   for (stat_type in c("original", "simulated")) {
199 |     stat_type_df <- test_gof_res[[stat_type]]
200 |     actual_terms <- colnames(stat_type_df$geodesic_dist)
201 | 
202 |     actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>%
203 |       setdiff(c("dist", "pairs")) %>%
204 |       length() %>%
205 |       expect_equal(0)
206 | 
207 |     # Some of the distances will be Inf, and that's ok (that's how ergm returns them).
208 |     non_inf <- stat_type_df$geodesic_dist$dist[!is.infinite(stat_type_df$geodesic_dist$dist)]
209 |     expect_lte(max(non_inf), g$gal$n)
210 |     expect(
211 |       (min(stat_type_df$geodesic_dist$pairs) >= 0) && (max(stat_type_df$geodesic_dist$pairs) <= (g$gal$n^2)),
212 |       failure_message = "Some geodesic distance pairs are out of bounds."
213 |     )
214 |   }
215 | })
216 | 
217 | test_that("Return GOF statistics including only within-block connections", {
218 |   sim <- get_dummy_net(50, 2)
219 |   g <- sim$g
220 | 
221 |   test_gof_res <- lighthergm::gof_lighthergm(
222 |     g,
223 |     list_feature_matrices = sim$list_feature_matrices,
224 |     data_for_simulation = sim$nodes_data,
225 |     colname_vertex_id = sim$vertex_id_var,
226 |     colname_block_membership = sim$block_id_var,
227 |     lighthergm_results = sim$hergm_res,
228 |     type = 'within',
229 |     ergm_control = sim$ergm_control,
230 |     n_sim = 3
231 |   )
232 | 
233 |   # check that the network stats belong to the within-block sub network only
234 |   edgelist <- network::as.edgelist(g) %>% as.data.frame
235 |   colnames(edgelist) <- c('src', 'dst')
236 |   nodes_with_blocks <- data.frame(id = 1:length(network::network.vertex.names(g)), block=network::get.vertex.attribute(g, 'block'))
237 |   actual_within_conns <- edgelist %>%
238 |     dplyr::left_join(nodes_with_blocks, by = c('src' = 'id')) %>%
239 |     dplyr::left_join(nodes_with_blocks, by = c('dst' = 'id'), suffix=c('.src', '.dst')) %>%
240 |     dplyr::filter(block.src == block.dst) %>%
241 |     nrow
242 | 
243 |   within_conns_from_gof <- (test_gof_res$original$network_stats %>% dplyr::filter(stat == 'edges'))[, 2]
244 | 
245 |   expect_equal(within_conns_from_gof, actual_within_conns)
246 | 
247 |   for (stat_type in c("original", "simulated")) {
248 |     stats <- test_gof_res[[stat_type]]
249 |     expect_false(is.null(stats))
250 |     for (stat in c("network_stats", "degree_dist", "esp_dist")) {
251 |       expect_false(is.null(stats[[stat]]))
252 |     }
253 |     expect_true(is.null(stats[["geodesic_dist"]]))
254 |   }
255 | })
256 | 
257 | test_that("Within-connections GOF can be started from the observed network", {
258 |   sim <- get_dummy_net(100, 4)
259 |   g <- sim$g
260 | 
261 |   ergm_control <- ergm::control.simulate.formula(
262 |     MCMC.burnin = 0,
263 |     MCMC.interval = 1
264 |   )
265 | 
266 |   test_gof_res <- lighthergm::gof_lighthergm(
267 |     g,
268 |     list_feature_matrices = sim$list_feature_matrices,
269 |     data_for_simulation = sim$nodes_data,
270 |     colname_vertex_id = sim$vertex_id_var,
271 |     colname_block_membership = sim$block_id_var,
272 |     lighthergm_results = sim$hergm_res,
273 |     type = 'within',
274 |     ergm_control = ergm_control,
275 |     n_sim = 2,
276 |     start_from_observed = TRUE
277 |   )
278 | 
279 |   first_simulation_stats <-test_gof_res$simulated$network_stats %>%
280 |     dplyr::filter(n_sim == 1) %>%
281 |     dplyr::select(-n_sim)
282 | 
283 |   original_network_stats <- test_gof_res$original$network_stats
284 |   expect_equal(original_network_stats, first_simulation_stats)
285 | })
286 | 
287 | test_that("Full GOF can be started from the observed network", {
288 |   sim <- get_dummy_net(100, 4)
289 |   g <- sim$g
290 | 
291 |   ergm_control <- ergm::control.simulate.formula(
292 |     MCMC.burnin = 0,
293 |     MCMC.interval = 1
294 |   )
295 | 
296 |   test_gof_res <- lighthergm::gof_lighthergm(
297 |     g,
298 |     list_feature_matrices = sim$list_feature_matrices,
299 |     data_for_simulation = sim$nodes_data,
300 |     colname_vertex_id = sim$vertex_id_var,
301 |     colname_block_membership = sim$block_id_var,
302 |     lighthergm_results = sim$hergm_res,
303 |     type = 'full',
304 |     ergm_control = ergm_control,
305 |     n_sim = 2,
306 |     start_from_observed = TRUE
307 |   )
308 | 
309 |   first_simulation_stats <-test_gof_res$simulated$network_stats %>%
310 |     dplyr::filter(n_sim == 1)
311 | 
312 |   # If it starts from the observed network, the stats should not be zero
313 |   expect_true(all(first_simulation_stats['value'] > 0))
314 | })
315 | 


--------------------------------------------------------------------------------
/lighthergm/vignettes/intro-lighthergm.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "A Light Introduction to `lighthergm`"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{intro-lighthergm}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | 
  9 | references:
 10 | - id: schweinberger2015
 11 |   title: "Local Dependence in Random Graph Models: Characterization, Properties and Statistical Inference"
 12 |   author:
 13 |   - family: Schweinberger
 14 |     given: Michael
 15 |   - family: Handcock
 16 |     given: Mark S
 17 |   container-title: Journal of the Royal statistical Society B
 18 |   volume: 77
 19 |   issue: 3
 20 |   page: 647-676
 21 |   type: article-journal
 22 |   issued:
 23 |     year: 2015
 24 | 
 25 | - id: schweinberger2018
 26 |   title: "hergm: Hierarchical Exponential-Family Random Graph Models"
 27 |   author:
 28 |   - family: Schweinberger
 29 |     given: Michael
 30 |   - family: Luna
 31 |     given: Pamela
 32 |   container-title: Journal of Statistical Software
 33 |   volume: 85
 34 |   issue: 1
 35 |   page: 1-39
 36 |   type: article-journal
 37 |   issued:
 38 |     year: 2018
 39 | 
 40 | - id: martinezdahbura2021
 41 |   title: "A Structural Model of Business Card Exchange Networks"
 42 |   author:
 43 |   - family: Martínez Dahbura
 44 |     given: Juan Nelson
 45 |   - family: Komatsu
 46 |     given: Shota
 47 |   - family: Nishida
 48 |     given: Takanori
 49 |   - family: Mele
 50 |     given: Angelo
 51 |   type: article-journal
 52 |   page: Working Paper. Available at https://arxiv.org/abs/2105.12704
 53 |   issued:
 54 |     year: 2021
 55 | ---
 56 | 
 57 | ```{r, include = FALSE}
 58 | options(rmarkdown.html_vignette.check_title = FALSE)
 59 | knitr::opts_chunk$set(
 60 |   collapse = TRUE,
 61 |   comment = "#>"
 62 | )
 63 | ```
 64 | 
 65 | 
 66 | This vignette provides a brief introduction on how to use the R package `lighthergm`, which estimates Hierarchical Exponential-Family Random Graph Models [HERGMs, @schweinberger2015].
 67 | `lighthergm` is built upon the R package `hergm` [@schweinberger2018] and applies scalable algorithms and computational techniques.
 68 | See @martinezdahbura2021  for further information.
 69 | 
 70 | # A simple example
 71 | 
 72 | ```{r setup}
 73 | library(lighthergm)
 74 | ```
 75 | 
 76 | `lighthergm` has a toy network to test-drive the package with
 77 | Let's load the network data and plot it.
 78 | 
 79 | ```{r, message=FALSE}
 80 | # Load an embedded network object.
 81 | data(toyNet)
 82 | # Draw the network.
 83 | library(ggplot2)
 84 | library(magrittr)
 85 | library(GGally)
 86 | g <- ggnet2(toyNet,
 87 |             size = 1,
 88 |             color = rep(c("tomato", "steelblue", "darkgreen", "black"),
 89 |                         each = toyNet$gal$n/4),
 90 |             mode = "kamadakawai")
 91 | plot(g)
 92 | ```
 93 | 
 94 | As you can see, this network has a clear cluster or community structure.
 95 | Although this is a fake network, we often observe such community structures in real social networks.
 96 | Exploiting this stylized fact, we model the way agents in a network get connected differently for connections across and within communities:
 97 | 
 98 | - Connections across communities happen by luck, influenced by homophily
 99 | - Connections within communities also consider interdependencies among links.
100 | For example, the probability that agent $i$ and $j$ gets connected may be influenced by a friend in common $k$.
101 | 
102 | To estimate the latent community structure of a network and agents' preferences for connection, `lighthergm` implements the following two-step procedure.
103 | 
104 | 1. Recover the community structure by applying a scalable minorization-maximization algorithm.
105 | 1. Given the estimated community structure in Step 1, estimate agents' between- and within-community payoffs by maximum pseudo-likelihood estimation.
106 | 
107 | Seeing is believing. Let's perform an estimation using the toy network. (If you would like to see the progress, set `verbose = 1`.)
108 | 
109 | ```{r, message=FALSE}
110 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle
111 | 
112 | hergm_res <-
113 |   lighthergm::hergm(
114 |     # The model you would like to estiamte
115 |     object = model_formula,
116 |     # The number of blocks
117 |     n_clusters = 4,
118 |     # The maximum number of EM algorithm steps
119 |     n_em_step_max = 100,
120 |     # Perform parameter estimation after the block recovery step
121 |     estimate_parameters = TRUE,
122 |     # Indicate that clustering must take into account nodematch on characteristics
123 |     clustering_with_features = TRUE,
124 |     # Keep track of block memberships at each EM iteration
125 |     check_block_membership = TRUE
126 | )
127 | ```
128 | 
129 | To see whether the first step (recovering the latent community structure) has converged, we can plot the estimated lower bound of the objective function over iterations.
130 | 
131 | ```{r}
132 | g_LB <-
133 |   ggplot(data = data.frame(iterations = 1:length(hergm_res$EM_lower_bound),
134 |                            lower_bound = hergm_res$EM_lower_bound),
135 |          aes(x = iterations, y = lower_bound)) +
136 |   geom_line()
137 | 
138 | plot(g_LB)
139 | ```
140 | 
141 | This indicates that the clustering step converged at the early stage.
142 | Note that the number of iterations that you need to perform (`n_em_step_max`) varies depending on the size of a network, whether it has a clear community structure, etc..
143 | You need trial and error on how many iterations are at least necessary in your case.
144 | Plotting the lower bound may help check the convergence of the clustering step.
145 | 
146 | You can check the clustering result
147 | 
148 | ```{r}
149 | # Number of nodes per recovered block:
150 | table(hergm_res$partition)
151 | ```
152 | 
153 | and estimated parameters.
154 | 
155 | ```{r}
156 | # For the between networks
157 | summary(hergm_res$est_between)
158 | ```
159 | 
160 | ```{r}
161 | # For the within networks
162 | summary(hergm_res$est_within)
163 | ```
164 | 
165 | Currently, the only supported way to include covariates in the model is via `nodematch()`.
166 | 
167 | You can also employ caching to avoid repeating heavy operations that yield the same results for your network. To use it, pass the `cache` parameter to `lighthergm::hergm`, setting its value to a [cachem](https://github.com/r-lib/cachem "cachem repository") object. A disk cache lets you speed up estimations on the same network data even across R Sessions.
168 | 
169 | # Goodness-of-fit
170 | 
171 | You can evaluate the goodness-of-fit of the model with the `lighthergm::gof_lighthergm()` function:
172 | 
173 | ```{r, message=FALSE}
174 | # Prepare a data frame that contains nodal id and covariates.
175 | nodes_data <-
176 |   tibble::tibble(
177 |     node_id = network::network.vertex.names(toyNet),
178 |     block = hergm_res$partition,
179 |     x = network::get.vertex.attribute(toyNet, "x"),
180 |     y = network::get.vertex.attribute(toyNet, "y")
181 |     )
182 | 
183 | # The feature adjacency matrices
184 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula)
185 | 
186 | # The MCMC settings
187 | sim_ergm_control <- ergm::control.simulate.formula(
188 |   MCMC.burnin = 1000000,
189 |   MCMC.interval = 100000
190 | )
191 | 
192 | # The feature adjacency matrices
193 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula)
194 | 
195 | gof_res <- lighthergm::gof_lighthergm(
196 |   toyNet,
197 |   # The feature adjacency matrices
198 |   list_feature_matrices = list_feature_matrices,
199 |   # A dataframe containing the nodes data.
200 |   data_for_simulation = nodes_data,
201 |   # The name of the nodes_data column containing the node IDs
202 |   # which are used within the network g
203 |   colname_vertex_id = 'node_id',
204 |   # The name of the nodes_data column containing the block ID.
205 |   colname_block_membership = 'block',
206 |   # The object returned by lighthergm::hergm()
207 |   lighthergm_results = hergm_res,
208 |   # The MCMC settings
209 |   ergm_control = sim_ergm_control,
210 |   # The number of simulations to use
211 |   n_sim = 100
212 | )
213 | ```
214 | 
215 | Currently, gof is evaluated on the following metrics:
216 | 
217 | 1. the network statistics (the counts you obtain when you use summary on an ergm formula, such as the number of edges, triangles, nodematches, etc.),
218 | 
219 | 1. degree distribution
220 | 
221 | 1. geodesic distance, and
222 | 
223 | 1. edgewise shared partners.
224 | 
225 | `lighthergm::gof_lighthergm()` returns a list of data frames for these matrices instead of creating plots as `ergm::gof()` does.
226 | This allows you to flexibly create gof plots that match your needs.
227 | 
228 | Below is a example gof plot on degree distribution.
229 | 
230 | ```{r, message=FALSE, warning=FALSE}
231 | degree_gof <- 
232 |   gof_res$simulated$degree_dist %>%
233 |   dplyr::group_by(degree) %>%
234 |   dplyr::summarise(log_mean_share = mean(log(share)),
235 |                    log_sd_share = sd(log(share))) %>%
236 |   dplyr::ungroup()
237 | 
238 | plot_degree <- 
239 |   ggplot(data = degree_gof %>% 
240 |            dplyr::filter(degree < 20 & degree >= 6), 
241 |          aes(x = degree, y = log_mean_share)) +
242 |   geom_line(aes(y = log_mean_share + 1.96 * log_sd_share), 
243 |             colour = "grey50",
244 |             linetype = "dotted") +
245 |   geom_line(aes(y = log_mean_share - 1.96 * log_sd_share),
246 |             colour = "grey50",
247 |             linetype = "dotted") +
248 |   geom_line(colour = "grey50",
249 |             linetype = "dashed") +
250 |   geom_line(data = gof_res$original$degree_dist %>% 
251 |               dplyr::filter(share > 0 & degree < 22), 
252 |             aes(y = log(share))) +
253 |   ylab("log prop. of nodes")
254 | 
255 | plot(plot_degree)
256 | ```
257 | 
258 | # Simulation
259 | 
260 | You can simulate networks with local dependence using the `lighthergm::simulate_hergm()`.
261 | 
262 | ```{r, message=FALSE, echo=TRUE}
263 | # Estimated coefficients for the between-community connections
264 | coef_between_block <- coef(hergm_res$est_between)
265 | 
266 | # Estimated coefficients for the within-community connections
267 | coef_within_block <- coef(hergm_res$est_within)
268 | 
269 | sim_net <- lighthergm::simulate_hergm(
270 |   # Formula for between-blocks
271 |   formula_for_simulation = model_formula,
272 |   # Same as for gof, a dataframe containing nodes attributes
273 |   data_for_simulation = nodes_data,
274 |   # Name of the column containing node IDs
275 |   colname_vertex_id = "node_id",
276 |   # Name of the column containing block IDs
277 |   colname_block_membership = "block",
278 |   # The coefficients for the between connections
279 |   coef_between_block = coef_between_block,
280 |    # The coefficients for the within connections
281 |   coef_within_block = coef_within_block,
282 |   # The MCMC settings
283 |   ergm_control = sim_ergm_control,
284 |   # Number of simulations to return
285 |   n_sim = 1,
286 |   # If `stats` a list with network statistics 
287 |   # for the between and within connections is returned
288 |   output = "network",
289 |   # Simulates between connections by drawing from a logistic distribution. 
290 |   # If FALSE, draws between connections by MCMC.
291 |   use_fast_between_simulation = TRUE,
292 |   # The feature adjacency matrices
293 |   list_feature_matrices = list_feature_matrices
294 | )
295 | ```
296 | 
297 | 
298 | ```{r}
299 | ggnet2(sim_net, size = 1, mode = "kamadakawai")
300 | ```
301 | 
302 | # When you work with large networks
303 | 
304 | If you would like to estimate an HERGM with a large network (say, when the number of nodes $\geq$ 50,000):
305 | 
306 | - Select features sparse enough to fit into memory. Covariates such as gender or race will be too dense to construct feature matrices. This is a non-negligible limitation of our algorithm and will be solved in the future. 
307 | - Prepare a list of multiplied feature adjacency matrices by `lighthergm::compute_multiplied_feature_matrices()`, and pass it to `lighthergm::hergm()` by `list_multiplied_feature_matrices`. Once calculated and stored, it can be used in models with the same network and the same features.
308 | - Use Python's infomap to initialize clusters. This is because it is much faster to implement cluster initialization than R functions such as `igraph::cluster_infomap()`. To install it, run `system("pip3 install infomap")` and check if it is effective by `system("infomap --version")`. If `system("infomap --version")` yields an error, consider using `{reticulate}`.
309 | - If successfully installed Python's infomap, set `use_infomap_python = TRUE` in `lighthergm::hergm()`.
310 | - When the EM estimation does not seem to have converged by inspecting the lower bound plot, you can further continue iterating by passing the `lighthergm` class object to `lighthergm::hergm()` as follows (all parameters such as the number of EM iterations will be inherited from the previous estimation unless specified).
311 | 
312 | ```{r, message=FALSE}
313 | hergm_res_second <-
314 |   lighthergm::hergm(object = hergm_res)
315 | ```
316 | 
317 | 
318 | # References
319 | 


--------------------------------------------------------------------------------
/lighthergm/R/gof_lighthergm.R:
--------------------------------------------------------------------------------
  1 | #' Extracts the degree distribution from a network and returns it as a data frame.
  2 | #' @param net a statnet network object
  3 | #' @return a data frame
  4 | to_degree_dist_df <- function(net) {
  5 |   degree_dist <- igraph::degree.distribution(intergraph::asIgraph(net))
  6 |   data.frame(
  7 |     degree = 0:(length(degree_dist) - 1),
  8 |     share = degree_dist
  9 |   )
 10 | }
 11 | 
 12 | #' Extracts the geodesic distance distribution from a network and returns it as a dataframe.
 13 | #' @param net a statnet network object
 14 | #' @return a data frame
 15 | to_geodesic_dist_df <- function(net) {
 16 |   dist <- ergm::ergm.geodistdist(net)
 17 |   labels <- as.numeric(names(dist))
 18 |   names(dist) <- NULL
 19 |   geodesic_distances_df <- data.frame(
 20 |     dist = labels,
 21 |     pairs = dist[[1]]
 22 |   )
 23 |   geodesic_distances_df
 24 | }
 25 | 
 26 | #' Extracts the edgewise shared partners distribution (undirected).
 27 | #' @param net a statnet network object
 28 | #' @return a data frame
 29 | to_edgewise_shared_partners_df <- function(net) {
 30 |   esp_dist <- summary(net ~ esp(1:min(net$gal$n - 2, 10)))
 31 |   labels <- names(esp_dist) %>%
 32 |     purrr::map(function(lab) {
 33 |       stringr::str_replace(lab, "esp", "")
 34 |     }) %>%
 35 |     as.numeric()
 36 | 
 37 |   names(esp_dist) <- NULL
 38 | 
 39 |   data.frame(
 40 |     label = labels,
 41 |     esp = esp_dist
 42 |   )
 43 | }
 44 | 
 45 | #' Swaps the network on the lhs of a formula for a new one with the given environment
 46 | #' @param new_net A network object to be inserted into the lhs of the formula
 47 | #' @param net_formula The target formula
 48 | #' @param env The environment to assign to the formula
 49 | #' @return A new formula with the lhs swapped
 50 | swap_formula_network <- function(new_net, net_formula, env) {
 51 |   rhs <- as.character(net_formula)[3]
 52 |   as.formula(paste(deparse(substitute(new_net)), "~", rhs), env = env)
 53 | }
 54 | 
 55 | 
 56 | 
 57 | #' Separates a formula into its between and within components. The between component excludes
 58 | #' terms which introduce dyadic dependence.
 59 | #' @param target_formula a target formula
 60 | #' @return a list containing the between and within formulas
 61 | separate_formulas <- function(target_formula) {
 62 |   str_net <- as.character(target_formula)[2]
 63 |   net <- get(str_net, envir = environment(target_formula))
 64 |   terms <- ergm::ergm_model(target_formula)$terms
 65 |   varnames <- statnet.common::list_rhs.formula(target_formula) %>% as.character()
 66 |   dep_terms <-
 67 |     terms %>% purrr::map(function(t) {
 68 |       dep <- t$dependence
 69 |       is_dep <- is.null(dep) || dep
 70 |     }) %>% unlist()
 71 |   between_rhs <- varnames[!dep_terms]
 72 |   within_rhs <- varnames
 73 | 
 74 |   between_formula <- paste(str_net, "~", paste(between_rhs, collapse = " + "))
 75 |   within_formula <- paste(str_net, "~", paste(within_rhs, collapse = " + "))
 76 | 
 77 |   list(
 78 |     between = formula(between_formula, env = environment(target_formula)),
 79 |     within = formula(within_formula, env = environment(target_formula))
 80 |   )
 81 | }
 82 | 
 83 | #' Gets the GOF stats for a formula
 84 | #' If a network is passed, that one is used to obtain the network statistics,
 85 | #' otherwise the netwok in the formula is used.
 86 | #' @param sim_formula a formula
 87 | #' @param net a statnet network object
 88 | #' @param sim_number the ID of the current simulation
 89 | #' @param compute_geodesic_distance if TRUE, includes the geodesic distance in the result object
 90 | #' @return a list with the goodness-of-fit statistics
 91 | get_gof_stats <- function(sim_formula, net = NULL, sim_number = NULL, compute_geodesic_distance = FALSE) {
 92 |   stats_formula <- sim_formula
 93 | 
 94 |   if (!is.null(net)) {
 95 |     stats_formula <- swap_formula_network(net, stats_formula, environment())
 96 |   }
 97 | 
 98 |   network_stats <- summary(stats_formula)
 99 |   network_stats <- data.frame(stat = names(network_stats), value = network_stats)
100 |   rownames(network_stats) <- NULL
101 | 
102 |   formula_net <- get(as.character(stats_formula)[2], envir = environment(stats_formula))
103 |   degree_dist <- to_degree_dist_df(formula_net)
104 |   esp_dist <- to_edgewise_shared_partners_df(formula_net)
105 | 
106 |   if (compute_geodesic_distance == TRUE) {
107 |     geodesic_dist <- to_geodesic_dist_df(formula_net)
108 |   } else {
109 |     geodesic_dist <- NULL
110 |   }
111 | 
112 |   stats <- list(
113 |     network_stats = network_stats,
114 |     degree_dist = degree_dist,
115 |     esp_dist = esp_dist,
116 |     geodesic_dist = geodesic_dist
117 |   )
118 | 
119 |   if (!is.null(sim_number)) {
120 |     stats$network_stats$n_sim <- sim_number
121 |     stats$degree_dist$n_sim <- sim_number
122 |     stats$esp_dist$n_sim <- sim_number
123 | 
124 |     if (!is.null(stats$geodesic_dist)) {
125 |       stats$geodesic_dist$n_sim <- sim_number
126 |     }
127 |   }
128 | 
129 |   stats
130 | }
131 | 
132 | 
133 | #' Goodness of fit statistics for HERGM
134 | #' @param net the target network
135 | #' @param data_for_simulation a dataframe with node-level covariates
136 | #' @param list_feature_matrices a list of feature adjacency matrices
137 | #' @param colname_vertex_id the name of the column that contains the node id
138 | #' @param colname_block_membership the name o the column that contains the block affiliation of each node
139 | #' @param lighthergm_results a lighthergm results object
140 | #' @param type the type of evaluation to perform. Can take the values `full` or `within`. `full` performs the evaluation on all edges, and `within` only considers within-block edges.
141 | #' @param ergm_control MCMC parameters as an instance of ergm.control
142 | #' @param seed the seed to be passed to simulate_hergm
143 | #' @param n_sim the number of simulations to employ for calculating goodness of fit
144 | #' @param prevent_duplicate see `simulate_hergm`
145 | #' @param compute_geodesic_distance if `TRUE`, the distribution of geodesic distances is also computed (considerably increases computation time on large networks. `FALSE` by default.)
146 | #' @param start_from_observed if `TRUE`, MCMC uses the observed network as a starting point
147 | #' @param ... Additional arguments, to be passed to lower-level functions
148 | #'
149 | #' @export
150 | gof_lighthergm <- function(net,
151 |                            data_for_simulation,
152 |                            list_feature_matrices,
153 |                            colname_vertex_id,
154 |                            colname_block_membership,
155 |                            lighthergm_results,
156 |                            type = 'full',
157 |                            ergm_control = ergm::control.simulate.formula(),
158 |                            seed = NULL,
159 |                            n_sim = 1,
160 |                            prevent_duplicate = TRUE,
161 |                            compute_geodesic_distance = FALSE,
162 |                            start_from_observed = FALSE,
163 |                            ...) {
164 |   # Setup
165 |   gof_formula <- swap_formula_network(net, lighthergm_results$est_within$formula, environment())
166 |   coef_within_block <- coef(lighthergm_results$est_within)
167 |   coef_between_block <- coef(lighthergm_results$est_between)
168 | 
169 |   # Validate the simulation type
170 |   allowed_type_values <- c('full', 'within')
171 |   if (!type %in% allowed_type_values){
172 |     stop("The `type` argument must be any of 'full' or 'within'")
173 |   }
174 | 
175 |   seed_edgelist = NULL
176 | 
177 |   if (type == 'full'){
178 |     original_stats <- get_gof_stats(gof_formula, compute_geodesic_distance = compute_geodesic_distance)
179 | 
180 |     if(start_from_observed){
181 |       seed_edgelist <- network::as.edgelist(net)
182 |     }
183 | 
184 |   } else {
185 |     sorted_dataframe <- sort_block_membership(data_for_simulation, colname_vertex_id, colname_block_membership)
186 |     seed_edgelist_within <- arrange_edgelist(network::as.edgelist(net), sorted_dataframe)$edgelist_within
187 |     within_network <- generate_seed_network(gof_formula, sorted_dataframe, edgelist = seed_edgelist_within, directed = FALSE)
188 | 
189 |     original_stats <- get_gof_stats(gof_formula, net = within_network, compute_geodesic_distance = compute_geodesic_distance)
190 | 
191 |     if(start_from_observed){
192 |       seed_edgelist <- network::as.edgelist(within_network)
193 |     }
194 |   }
195 | 
196 |   # Simulate the first network by initializing it from zero. The burnin here is the one set by the user.
197 |   if (type == 'full'){
198 |     base_network <- simulate_hergm(
199 |       formula_for_simulation = gof_formula,
200 |       data_for_simulation = data_for_simulation,
201 |       colname_vertex_id = colname_vertex_id,
202 |       colname_block_membership = colname_block_membership,
203 |       seed_edgelist = seed_edgelist,
204 |       coef_within_block = coef_within_block,
205 |       coef_between_block = coef_between_block,
206 |       ergm_control = ergm_control,
207 |       seed_for_within = seed_for_within,
208 |       seed_for_between = seed_for_between,
209 |       directed = FALSE,
210 |       n_sim = 1,
211 |       output = "network",
212 |       prevent_duplicate = prevent_duplicate,
213 |       list_feature_matrices = list_feature_matrices,
214 |       use_fast_between_simulation = TRUE
215 |     )
216 |   } else {
217 |     base_network <- lighthergm::simulate_hergm_within(
218 |       formula_for_simulation = gof_formula,
219 |       data_for_simulation = data_for_simulation,
220 |       colname_vertex_id = colname_vertex_id,
221 |       colname_block_membership = colname_block_membership,
222 |       seed_edgelist = seed_edgelist,
223 |       coef_within_block = coef_within_block,
224 |       output = 'network',
225 |       ergm_control = ergm_control,
226 |       seed = seed,
227 |       n_sim = 1
228 |     )
229 |   }
230 | 
231 |   # Get the statistics for the first network
232 |   sim_stats <- get_gof_stats(gof_formula, base_network, 1, compute_geodesic_distance = compute_geodesic_distance)
233 |   results <- list(
234 |     original = list(
235 |       network_stats = original_stats$network_stats,
236 |       degree_dist = original_stats$degree_dist,
237 |       esp_dist = original_stats$esp_dist,
238 |       geodesic_dist = original_stats$geodesic_dist
239 |     ),
240 |     simulated = list(
241 |       network_stats = sim_stats$network_stats,
242 |       degree_dist = sim_stats$degree_dist,
243 |       esp_dist = sim_stats$esp_dist,
244 |       geodesic_dist = sim_stats$geodesic_dist
245 |     )
246 |   )
247 | 
248 |   effective_nsim <- n_sim - 1
249 | 
250 |   if (effective_nsim > 0) {
251 |     # Now replace the burnin with the interval and simulate networks one by one.
252 |     ergm_control$MCMC.burnin <- ergm_control$MCMC.interval
253 | 
254 |     for (i in 1:effective_nsim) {
255 |       if ((i + 1) %% 50 == 0) {
256 |         message(paste("Simulation:", i + 1))
257 |       }
258 | 
259 |       if(type == 'full'){
260 |         base_network <- simulate_hergm(
261 |           formula_for_simulation = gof_formula,
262 |           list_feature_matrices = list_feature_matrices,
263 |           data_for_simulation = data_for_simulation,
264 |           colname_vertex_id = colname_vertex_id,
265 |           colname_block_membership = colname_block_membership,
266 |           seed_edgelist = network::as.edgelist(base_network),
267 |           coef_within_block = coef_within_block,
268 |           coef_between_block = coef_between_block,
269 |           ergm_control = ergm_control,
270 |           seed = seed,
271 |           directed = FALSE,
272 |           n_sim = 1,
273 |           output = "network",
274 |           prevent_duplicate = prevent_duplicate,
275 |           use_fast_between_simulation = TRUE,
276 |           ...
277 |         )
278 |       } else {
279 |         base_network <- lighthergm::simulate_hergm_within(
280 |           formula_for_simulation = gof_formula,
281 |           data_for_simulation = data_for_simulation,
282 |           colname_vertex_id = colname_vertex_id,
283 |           colname_block_membership = colname_block_membership,
284 |           seed_edgelist = network::as.edgelist(base_network),
285 |           coef_within_block = coef_within_block,
286 |           output = 'network',
287 |           ergm_control = ergm_control,
288 |           seed = seed,
289 |           n_sim = 1
290 |         )
291 |       }
292 | 
293 |       sim_stats <- get_gof_stats(gof_formula, base_network, i + 1, compute_geodesic_distance = compute_geodesic_distance)
294 |       results$simulated$network_stats <- rbind(results$simulated$network_stats, sim_stats$network_stats)
295 |       results$simulated$degree_dist <- rbind(results$simulated$degree_dist, sim_stats$degree_dist)
296 |       results$simulated$esp_dist <- rbind(results$simulated$esp_dist, sim_stats$esp_dist)
297 |       if (
298 |         !(is.null(results$simulated$geodesic_dist)) &
299 |           !(is.null(sim_stats$geodesic_dist))
300 |       ) {
301 |         results$simulated$geodesic_dist <- rbind(results$simulated$geodesic_dist, sim_stats$geodesic_dist)
302 |       }
303 |     }
304 | 
305 |     message("Simulation Finished")
306 |   }
307 | 
308 |   return(results)
309 | }
310 | 


--------------------------------------------------------------------------------
/lighthergm/R/hergm.R:
--------------------------------------------------------------------------------
  1 | #' Hierarchical exponential-family random graph models (HERGMs) with local dependence
  2 | #' @description
  3 | #' The function hergm estimates and simulates three classes of hierarchical exponential-family random graph models.
  4 | #' @useDynLib lighthergm
  5 | #' @importFrom Rcpp sourceCpp
  6 | #' @importFrom ergm ergm.getnetwork
  7 | #' @param object A formula or `lighthergm` class object. A `lighthergm` is returned by `hergm()`.
  8 | #' When you pass a `lighthergm` class object to `hergm()`, you can restart the EM step.
  9 | #' @param n_clusters The number of blocks. This must be specified by the user.
 10 | #' When you pass a "lighthergm" class object to `hergm()`, you don't have to specify this argument.
 11 | #' @param n_cores The number of CPU cores to use.
 12 | #' @param block_membership The pre-specified block memberships for each node.
 13 | #' If `NULL`, the latent community structure is estimated, assuming that the number of communities is `n_clusters`.
 14 | #' @param estimate_parameters If `TRUE`, both clustering and parameter estimation are implemented.
 15 | #' If `FALSE`, only clustering is executed.
 16 | #' @param verbose A logical or an integer: if this is TRUE/1,
 17 | #' the program will print out additional information about the progress of estimation and simulation.
 18 | #' A higher value yields lower level information.
 19 | #' @param n_em_step_max The maximum number of EM iterations.
 20 | #' Currently, no early stopping criteria is introduced. Thus `n_em_step_max` EM iterations are exactly implemented.
 21 | #' @param initialization_method Cluster initialization method.
 22 | #' If `1` (the default), `igraph`'s infomap is implemented.
 23 | #' If `2`, the initial clusters are randomly uniformally selected.
 24 | #' If `3`, spectral clustering is conducted.
 25 | #' @param use_infomap_python If `TRUE`, the cluster initialization is implemented using Pythons' infomap.
 26 | #' When using this, make sure that Python's infomap is installed and callable from `system().
 27 | #' @param seed_infomap seed value (integer) for Python's infomap.
 28 | #' @param weight_for_initialization weight value used for cluster initialization. The higher this value, the more weight is put on the initialized alpha.
 29 | #' @param seeds seed value (integer) for the random number generator
 30 | #' @param initialized_cluster_data initialized cluster data from which the EM iterations begin.
 31 | #' This can be either a vector of block affiliations of each node or initialized cluster data by Python's infomap (given by .clu format).
 32 | #' @param method_second_step If "MPLE" (the default), then the maximum pseudolikelihood estimator is implemented when estimating the within-block network model.
 33 | #' If "MLE", then an approximate maximum likelihood estimator is conducted.
 34 | #' @param clustering_with_features If `TRUE`, clustering is implemented using the discrete covariates specified in the formula.
 35 | #' @param list_multiplied_feature_matrices a list of multiplied feature adjacency matarices necessary for EM step.
 36 | #' If `NULL`, `hergm()` automatically calculates. Or you can calculate by `compute_multiplied_feature_matrices()`.
 37 | #' @param fix_covariate_parameter If `TRUE`, when estimating the within-block network model,
 38 | #' parameters for covariates are fixed at the estimated of the between-block network model.
 39 | #' @param compute_pi If `TRUE`, this function keeps track of pi matrices at each EM iteration.
 40 | #' If the network is large, we strongly recommend to set to be `FALSE`.
 41 | #' @param check_alpha_update If `TRUE`, this function keeps track of alpha matrices at each EM iteration.
 42 | #' If the network is large, we strongly recommend to set to be `FALSE`.
 43 | #' @param check_block_membership If TRUE, this function keeps track of estimated block memberships at each EM iteration.
 44 | #' @param cache a `cachem` cache object used to store intermediate calculations such as eigenvector decomposition results.
 45 | #' @param ... Additional arguments, to be passed to lower-level functions
 46 | #'
 47 | #' @examples
 48 | #' # Load an embedded network object.
 49 | #' data(toyNet)
 50 | #'
 51 | #' # Specify the model that you would like to estimate.
 52 | #' model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle
 53 | #'
 54 | #' # Estimate the model
 55 | #' hergm_res <-
 56 | #'   lighthergm::hergm(
 57 | #'     object = model_formula, # The model you would like to estiamte
 58 | #'     n_clusters = 4, # The number of blocks
 59 | #'     n_em_step_max = 100, # The maximum number of EM algorithm steps
 60 | #'     estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step
 61 | #'     clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics
 62 | #'     check_block_membership = TRUE # Keep track of block memberships at each EM iteration
 63 | #'   )
 64 | #' @export
 65 | hergm <- function(object,
 66 |                   n_clusters,
 67 |                   n_cores = 1,
 68 |                   block_membership = NULL,
 69 |                   estimate_parameters = TRUE,
 70 |                   verbose = 0,
 71 |                   n_em_step_max = 100,
 72 |                   initialization_method = 1,
 73 |                   use_infomap_python = FALSE,
 74 |                   seed_infomap = NULL,
 75 |                   weight_for_initialization = 1000,
 76 |                   seeds = NULL,
 77 |                   initialized_cluster_data = NULL,
 78 |                   method_second_step = "MPLE",
 79 |                   clustering_with_features = TRUE,
 80 |                   list_multiplied_feature_matrices = NULL,
 81 |                   fix_covariate_parameter = FALSE,
 82 |                   compute_pi = FALSE,
 83 |                   check_alpha_update = FALSE,
 84 |                   check_block_membership = FALSE,
 85 |                   cache = NULL,
 86 |                   ...) {
 87 |   ###################################################################################
 88 |   ###### Preparations for estimation ################################################
 89 |   ###################################################################################
 90 |   # When the given object is formula:
 91 |   if ("formula" %in% class(object)) {
 92 |     # If n_cluster is missing and block_membership and initialized_cluster_data are NULL, stop the process.
 93 |     if (missing(n_clusters) & is.null(block_membership) & is.null(initialized_cluster_data)) {
 94 |       stop("\nThe argument 'n_clusters' is missing. Please specify the number of clusters.")
 95 |     }
 96 | 
 97 |     # Get the formula
 98 |     formula <- object
 99 | 
100 |     # When use_infomap_python = TRUE, check if it is installed.
101 |     if (use_infomap_python) {
102 |       tryCatch(
103 |         {
104 |           system("infomap --version")
105 |         },
106 |         warning = function(e) {
107 |           stop("\nPython's infomap is not installed or your default Python version is 2.x. Please install infomap by
108 |            \nsystem('pip3 install infomap')
109 |            \nIf already installed, make sure that the default Python version is 3.x. You can check the default version by
110 |            \nsystem('python --version')")
111 |         }
112 |       )
113 |     }
114 |     # If the initialized cluster data is given by .clu format:
115 |     if (!is.null(initialized_cluster_data) && all(stringr::str_detect(initialized_cluster_data, ".clu"))) {
116 |       if (verbose > 0) {
117 |         message(paste("Reading initial clusters data from: ", initialized_cluster_data))
118 |       }
119 |       initialized_cluster_data <- readr::read_delim(initialized_cluster_data, delim = " ", skip = 9, col_names = c("node_id", "block", "flow"), col_types = "iid")
120 |       initialized_cluster_data <- as.numeric(dplyr::arrange(initialized_cluster_data, by_group = node_id)$block)
121 |     }
122 | 
123 |     # Update the number of clusters if `initialized_cluster_data` or `block_membership` is given:
124 |     if (!is.null(initialized_cluster_data)) {
125 |       n_clusters <- length(unique(initialized_cluster_data))
126 |     }
127 |     else if (!is.null(block_membership)) {
128 |       n_clusters <- length(unique(block_membership))
129 |     }
130 | 
131 |     # Get network object from formula
132 |     network <- ergm::ergm.getnetwork(formula)
133 | 
134 |     # The current hergm doesn't support directed networks.
135 |     if (network$gal$directed) {
136 |       stop("\nThe current hergm doesn't support directed networks. This will be modified in the future.")
137 |     }
138 | 
139 |     # If list_multiplied_feature_matrices is not NULL, check if the order of features names is the same with that of formula.
140 |     if (!is.null(list_multiplied_feature_matrices)) {
141 |       # If clustering_with_features = FALSE, stop the process.
142 |       if (!clustering_with_features) {
143 |         stop("\nSet clustering_with_features = TRUE since you are going to use vertex features for cluster estimation.")
144 |       }
145 |     }
146 | 
147 |     # Convert vertex.names into string
148 |     if (!is.character(network::network.vertex.names(network))) {
149 |       network::network.vertex.names(network) <- as.character(network::network.vertex.names(network))
150 |     }
151 | 
152 |     # An N x K matrix which stores the probability that node i belongs to block k (i = 1,..., N, k = 1,..., K)
153 |     # If all_block_memberships_fixed == TRUE, this matrix remains NULL.
154 |     # sbm_pi <- NULL
155 |   }
156 | 
157 |   EM_restart_object <- NULL
158 |   # When the given object has a class of "lighthergm":
159 |   if ("lighthergm" %in% class(object)) {
160 |     # Inherit the following arguments from the previous estimation.
161 |     # These arguments are so important that they won't be replaced by the arguments given by the user.
162 |     network <- object$checkpoint$network
163 |     formula <- object$checkpoint$formula
164 |     n_clusters <- object$checkpoint$n_clusters
165 |     clustering_with_features <- object$checkpoint$clustering_with_features
166 |     list_multiplied_feature_matrices <- object$checkpoint$list_multiplied_feature_matrices
167 | 
168 |     # Inherit the following arguments from the previous estimation if not given by the user.
169 |     # If given by the user, discard the inherited argument and use the given one.
170 |     vec_arguments <-
171 |       c(
172 |         "n_cores",
173 |         "estimate_parameters",
174 |         "verbose",
175 |         "n_em_step_max",
176 |         "seeds",
177 |         "method_second_step",
178 |         "fix_covariate_parameter",
179 |         "compute_pi",
180 |         "check_alpha_update",
181 |         "check_block_membership"
182 |       )
183 | 
184 |     message("Arguments:")
185 |     message(glue::glue("Number of clusters = {n_clusters}"))
186 |     for (i in 1:length(vec_arguments)) {
187 |       if (do.call(missing, list(vec_arguments[[i]])) == TRUE) {
188 |         assign(vec_arguments[[i]], object$checkpoint[[vec_arguments[[i]]]])
189 |       }
190 |       message(glue::glue("{vec_arguments[[i]]} = {get(vec_arguments[[i]])}"))
191 |     }
192 | 
193 |     # Prepare an object to restart the EM with.
194 |     EM_restart_object <-
195 |       list(
196 |         alpha = object$alpha,
197 |         list_alpha = object$EM_list_alpha,
198 |         list_z = object$EM_list_z,
199 |         z_memb_init = object$z_memb_final_before_kmeans,
200 |         change_in_alpha = object$EM_change_in_alpha,
201 |         lower_bound = object$EM_lower_bound,
202 |         counter_e_step = object$counter_e_step,
203 |         adjacency_matrix = object$adjacency_matrix
204 |       )
205 |   }
206 | 
207 |   # If the block_memberships of each node are specified in the variable 'block_membership' as integers between 1 and n_clusters,
208 |   # the specified block memberships are fixed.
209 |   all_block_memberships_fixed <- ifelse(is.null(block_membership), FALSE, TRUE)
210 |   # Make sure that if all_block_memberships_fixed == TRUE, estimate_parameters must also be TRUE.
211 |   estimate_parameters <- ifelse(all_block_memberships_fixed, TRUE, estimate_parameters)
212 | 
213 |   ###################################################################################
214 |   ###### First step: Estimating block memberships ###################################
215 |   ###################################################################################
216 | 
217 |   # Estimate the memberships if they are not specified.
218 |   if (!all_block_memberships_fixed) {
219 |     set.seed(seeds[1])
220 |     # Estimate the block memberships
221 |     answer <- EM_wrapper(
222 |       network = network,
223 |       formula = formula,
224 |       n_clusters = n_clusters,
225 |       n_em_step_max = n_em_step_max,
226 |       min_size = 2,
227 |       initialization_method = initialization_method,
228 |       use_infomap_python = use_infomap_python,
229 |       seed_infomap = seed_infomap,
230 |       initialized_cluster_data = initialized_cluster_data,
231 |       clustering_with_features = clustering_with_features,
232 |       list_multiplied_feature_matrices = list_multiplied_feature_matrices,
233 |       verbose = verbose,
234 |       weight_for_initialization = weight_for_initialization,
235 |       compute_pi = compute_pi,
236 |       check_alpha_update = check_alpha_update,
237 |       check_block_membership = check_block_membership,
238 |       EM_restart_object = EM_restart_object,
239 |       cache = cache
240 |     )
241 | 
242 |     block_membership <- answer$z_memb_final
243 |     initial_block <- answer$z_memb_init
244 |     membership_before_kmeans <- answer$z_memb_final_before_kmeans
245 |     sbm_pi <- answer$Pi
246 |     EM_list_alpha <- answer$list_alpha
247 |     EM_list_z <- answer$list_z
248 |     EM_change_in_alpha <- answer$change_in_alpha
249 |     EM_lower_bound <- answer$lower_bound
250 |     alpha <- answer$alpha
251 |     counter_e_step <- answer$counter_e_step
252 |     adjacency_matrix <- answer$adjacency_matrix
253 |     list_multiplied_feature_matrices <- answer$list_multiplied_feature_matrices
254 |   }
255 |   else {
256 |     if (verbose > 0) {
257 |       message("\nSkipping Steps 1 and 2: z specified")
258 |     }
259 |     param_EM_wrapper <- NULL
260 |     initial_block <- NULL
261 |     membership_before_kmeans <- NULL
262 |     sbm_pi <- NULL
263 |     EM_list_z <- NULL
264 |     EM_list_alpha <- NULL
265 |     EM_change_in_alpha <- NULL
266 |     EM_lower_bound <- NULL
267 |     alpha <- NULL
268 |     counter_e_step <- NULL
269 |     adjacency_matrix <- NULL
270 |     list_multiplied_feature_matrices <- NULL
271 |   }
272 | 
273 |   ####################################################################################################
274 |   ###### Second step: Estimating between-block parameters ############################################
275 |   ####################################################################################################
276 | 
277 |   if (estimate_parameters) {
278 |     if (verbose > 0) {
279 |       message("\nEstimate between-block parameters")
280 |     }
281 |     ## Estimate between-block parameters
282 |     est_between <- estimate_between_param(
283 |       formula = formula,
284 |       network = network,
285 |       block = block_membership
286 |     )
287 |   } else {
288 |     # If you don't estimate any parameters...
289 |     est_between <- NULL
290 |   }
291 | 
292 |   ####################################################################################################
293 |   ###### Third step: Estimating within-block parameters ##############################################
294 |   ####################################################################################################
295 | 
296 |   # When you estimate with-block parameters, then:
297 |   if (estimate_parameters) {
298 |     if (verbose > 0) {
299 |       message("\n\nStep 3: Estimate parameters conditional on z")
300 |     }
301 |     # If fixing within-block feature parameters:
302 |     if (fix_covariate_parameter) {
303 |       offset_coef <- coef(est_between)
304 |       offset_coef <- offset_coef[-1]
305 |     } else {
306 |       offset_coef <- NULL
307 |     }
308 |     # Estimate within-block parameters
309 |     est_within <-
310 |       estimate_within_params(
311 |         formula,
312 |         network,
313 |         z_memb = block_membership,
314 |         n_cores,
315 |         verbose,
316 |         seeds = NULL,
317 |         method_second_step,
318 |         offset_coef,
319 |         ...
320 |       )
321 | 
322 | 
323 |     ####################################################################################################
324 |     ###### Store the results ###########################################################################
325 |     ####################################################################################################
326 | 
327 |     estimation_status <- ifelse(est_within$failure, "Estimation failed", "Estimated")
328 |   } else {
329 |     # estimate_parameters = FALSE:
330 |     message("\n")
331 |     estimation_status <- "Not estimated"
332 |     est_within <- NULL
333 |   }
334 | 
335 |   # Store the given arguments. These will be inherited for the next EM.
336 |   checkpoint <- list(
337 |     network = network,
338 |     formula = formula,
339 |     n_clusters = n_clusters,
340 |     clustering_with_features = clustering_with_features,
341 |     list_multiplied_feature_matrices = list_multiplied_feature_matrices,
342 |     n_cores = n_cores,
343 |     estimate_parameters = estimate_parameters,
344 |     verbose = verbose,
345 |     n_em_step_max = n_em_step_max,
346 |     seeds = seeds,
347 |     method_second_step = method_second_step,
348 |     fix_covariate_parameter = fix_covariate_parameter,
349 |     compute_pi = compute_pi,
350 |     check_alpha_update = check_alpha_update,
351 |     check_block_membership = check_block_membership
352 |   )
353 | 
354 |   # Store the results in a list
355 |   output <- list(
356 |     partition = block_membership,
357 |     initial_block = initial_block,
358 |     sbm_pi = sbm_pi,
359 |     EM_list_z = EM_list_z,
360 |     EM_list_alpha = EM_list_alpha,
361 |     EM_change_in_alpha = EM_change_in_alpha,
362 |     EM_lower_bound = EM_lower_bound,
363 |     alpha = alpha,
364 |     counter_e_step = counter_e_step,
365 |     adjacency_matrix = adjacency_matrix,
366 |     estimation_status = estimation_status,
367 |     est_within = est_within,
368 |     est_between = est_between,
369 |     checkpoint = checkpoint,
370 |     membership_before_kmeans = membership_before_kmeans
371 |   )
372 | 
373 |   # Return the output
374 |   return(structure(output, class = "lighthergm"))
375 | }
376 | 


--------------------------------------------------------------------------------
/lighthergm/src/RcppExports.cpp:
--------------------------------------------------------------------------------
  1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand
  2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
  3 | 
  4 | #include <RcppArmadillo.h>
  5 | #include <Rcpp.h>
  6 | 
  7 | using namespace Rcpp;
  8 | 
  9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM
 10 | Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
 11 | Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 12 | #endif
 13 | 
 14 | // eigenvectors_sparse
 15 | arma::mat eigenvectors_sparse(const arma::sp_mat& X, int n_vec);
 16 | RcppExport SEXP _lighthergm_eigenvectors_sparse(SEXP XSEXP, SEXP n_vecSEXP) {
 17 | BEGIN_RCPP
 18 |     Rcpp::RObject rcpp_result_gen;
 19 |     Rcpp::RNGScope rcpp_rngScope_gen;
 20 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type X(XSEXP);
 21 |     Rcpp::traits::input_parameter< int >::type n_vec(n_vecSEXP);
 22 |     rcpp_result_gen = Rcpp::wrap(eigenvectors_sparse(X, n_vec));
 23 |     return rcpp_result_gen;
 24 | END_RCPP
 25 | }
 26 | // compute_yule_coef
 27 | double compute_yule_coef(const arma::vec& z_star, const arma::vec& z);
 28 | RcppExport SEXP _lighthergm_compute_yule_coef(SEXP z_starSEXP, SEXP zSEXP) {
 29 | BEGIN_RCPP
 30 |     Rcpp::RObject rcpp_result_gen;
 31 |     Rcpp::RNGScope rcpp_rngScope_gen;
 32 |     Rcpp::traits::input_parameter< const arma::vec& >::type z_star(z_starSEXP);
 33 |     Rcpp::traits::input_parameter< const arma::vec& >::type z(zSEXP);
 34 |     rcpp_result_gen = Rcpp::wrap(compute_yule_coef(z_star, z));
 35 |     return rcpp_result_gen;
 36 | END_RCPP
 37 | }
 38 | // get_sparse_feature_adjmat
 39 | arma::sp_mat get_sparse_feature_adjmat(const arma::vec& x);
 40 | RcppExport SEXP _lighthergm_get_sparse_feature_adjmat(SEXP xSEXP) {
 41 | BEGIN_RCPP
 42 |     Rcpp::RObject rcpp_result_gen;
 43 |     Rcpp::RNGScope rcpp_rngScope_gen;
 44 |     Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP);
 45 |     rcpp_result_gen = Rcpp::wrap(get_sparse_feature_adjmat(x));
 46 |     return rcpp_result_gen;
 47 | END_RCPP
 48 | }
 49 | // get_sparse_feature_adjmat_from_string
 50 | arma::sp_mat get_sparse_feature_adjmat_from_string(const Rcpp::StringVector& x);
 51 | RcppExport SEXP _lighthergm_get_sparse_feature_adjmat_from_string(SEXP xSEXP) {
 52 | BEGIN_RCPP
 53 |     Rcpp::RObject rcpp_result_gen;
 54 |     Rcpp::RNGScope rcpp_rngScope_gen;
 55 |     Rcpp::traits::input_parameter< const Rcpp::StringVector& >::type x(xSEXP);
 56 |     rcpp_result_gen = Rcpp::wrap(get_sparse_feature_adjmat_from_string(x));
 57 |     return rcpp_result_gen;
 58 | END_RCPP
 59 | }
 60 | // get_matrix_for_denominator
 61 | arma::sp_mat get_matrix_for_denominator(int numOfVertices, const Rcpp::List& list_feature_adjmat);
 62 | RcppExport SEXP _lighthergm_get_matrix_for_denominator(SEXP numOfVerticesSEXP, SEXP list_feature_adjmatSEXP) {
 63 | BEGIN_RCPP
 64 |     Rcpp::RObject rcpp_result_gen;
 65 |     Rcpp::RNGScope rcpp_rngScope_gen;
 66 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
 67 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP);
 68 |     rcpp_result_gen = Rcpp::wrap(get_matrix_for_denominator(numOfVertices, list_feature_adjmat));
 69 |     return rcpp_result_gen;
 70 | END_RCPP
 71 | }
 72 | // get_elementwise_multiplied_matrices
 73 | Rcpp::List get_elementwise_multiplied_matrices(const arma::sp_mat& adjmat, const Rcpp::List& list_feature_adjmat);
 74 | RcppExport SEXP _lighthergm_get_elementwise_multiplied_matrices(SEXP adjmatSEXP, SEXP list_feature_adjmatSEXP) {
 75 | BEGIN_RCPP
 76 |     Rcpp::RObject rcpp_result_gen;
 77 |     Rcpp::RNGScope rcpp_rngScope_gen;
 78 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type adjmat(adjmatSEXP);
 79 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP);
 80 |     rcpp_result_gen = Rcpp::wrap(get_elementwise_multiplied_matrices(adjmat, list_feature_adjmat));
 81 |     return rcpp_result_gen;
 82 | END_RCPP
 83 | }
 84 | // decimal_to_binary_vector
 85 | arma::vec decimal_to_binary_vector(int decimal, int vec_length);
 86 | RcppExport SEXP _lighthergm_decimal_to_binary_vector(SEXP decimalSEXP, SEXP vec_lengthSEXP) {
 87 | BEGIN_RCPP
 88 |     Rcpp::RObject rcpp_result_gen;
 89 |     Rcpp::RNGScope rcpp_rngScope_gen;
 90 |     Rcpp::traits::input_parameter< int >::type decimal(decimalSEXP);
 91 |     Rcpp::traits::input_parameter< int >::type vec_length(vec_lengthSEXP);
 92 |     rcpp_result_gen = Rcpp::wrap(decimal_to_binary_vector(decimal, vec_length));
 93 |     return rcpp_result_gen;
 94 | END_RCPP
 95 | }
 96 | // compute_sumTaus
 97 | arma::mat compute_sumTaus(int numOfVertices, int numOfClasses, const arma::mat& tau, int verbose);
 98 | RcppExport SEXP _lighthergm_compute_sumTaus(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP tauSEXP, SEXP verboseSEXP) {
 99 | BEGIN_RCPP
100 |     Rcpp::RObject rcpp_result_gen;
101 |     Rcpp::RNGScope rcpp_rngScope_gen;
102 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
103 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
104 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
105 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
106 |     rcpp_result_gen = Rcpp::wrap(compute_sumTaus(numOfVertices, numOfClasses, tau, verbose));
107 |     return rcpp_result_gen;
108 | END_RCPP
109 | }
110 | // compute_quadratic_term_naive
111 | arma::mat compute_quadratic_term_naive(int numOfVertices, int numOfClasses, const arma::mat& pi, const arma::mat& tau, const arma::sp_mat& network);
112 | RcppExport SEXP _lighthergm_compute_quadratic_term_naive(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP piSEXP, SEXP tauSEXP, SEXP networkSEXP) {
113 | BEGIN_RCPP
114 |     Rcpp::RObject rcpp_result_gen;
115 |     Rcpp::RNGScope rcpp_rngScope_gen;
116 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
117 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
118 |     Rcpp::traits::input_parameter< const arma::mat& >::type pi(piSEXP);
119 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
120 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP);
121 |     rcpp_result_gen = Rcpp::wrap(compute_quadratic_term_naive(numOfVertices, numOfClasses, pi, tau, network));
122 |     return rcpp_result_gen;
123 | END_RCPP
124 | }
125 | // compute_linear_term
126 | arma::mat compute_linear_term(int numOfVertices, int numOfClasses, const arma::vec& alpha, const arma::mat& tau, double& LB);
127 | RcppExport SEXP _lighthergm_compute_linear_term(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP LBSEXP) {
128 | BEGIN_RCPP
129 |     Rcpp::RObject rcpp_result_gen;
130 |     Rcpp::RNGScope rcpp_rngScope_gen;
131 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
132 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
133 |     Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP);
134 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
135 |     Rcpp::traits::input_parameter< double& >::type LB(LBSEXP);
136 |     rcpp_result_gen = Rcpp::wrap(compute_linear_term(numOfVertices, numOfClasses, alpha, tau, LB));
137 |     return rcpp_result_gen;
138 | END_RCPP
139 | }
140 | // compute_pi
141 | arma::mat compute_pi(int numOfVertices, int numOfClasses, const arma::sp_mat& stat, const arma::mat& tau);
142 | RcppExport SEXP _lighthergm_compute_pi(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP statSEXP, SEXP tauSEXP) {
143 | BEGIN_RCPP
144 |     Rcpp::RObject rcpp_result_gen;
145 |     Rcpp::RNGScope rcpp_rngScope_gen;
146 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
147 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
148 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type stat(statSEXP);
149 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
150 |     rcpp_result_gen = Rcpp::wrap(compute_pi(numOfVertices, numOfClasses, stat, tau));
151 |     return rcpp_result_gen;
152 | END_RCPP
153 | }
154 | // compute_quadratic_term
155 | arma::mat compute_quadratic_term(int numOfVertices, int numOfClasses, const arma::vec& alpha, const arma::mat& tau, const arma::sp_mat& network, double& LB, int verbose);
156 | RcppExport SEXP _lighthergm_compute_quadratic_term(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP networkSEXP, SEXP LBSEXP, SEXP verboseSEXP) {
157 | BEGIN_RCPP
158 |     Rcpp::RObject rcpp_result_gen;
159 |     Rcpp::RNGScope rcpp_rngScope_gen;
160 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
161 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
162 |     Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP);
163 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
164 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP);
165 |     Rcpp::traits::input_parameter< double& >::type LB(LBSEXP);
166 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
167 |     rcpp_result_gen = Rcpp::wrap(compute_quadratic_term(numOfVertices, numOfClasses, alpha, tau, network, LB, verbose));
168 |     return rcpp_result_gen;
169 | END_RCPP
170 | }
171 | // run_EM_without_features
172 | Rcpp::List run_EM_without_features(int numOfVertices, int numOfClasses, const arma::vec& alpha, arma::mat& tau, const arma::sp_mat& network, int verbose);
173 | RcppExport SEXP _lighthergm_run_EM_without_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP networkSEXP, SEXP verboseSEXP) {
174 | BEGIN_RCPP
175 |     Rcpp::RObject rcpp_result_gen;
176 |     Rcpp::RNGScope rcpp_rngScope_gen;
177 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
178 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
179 |     Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP);
180 |     Rcpp::traits::input_parameter< arma::mat& >::type tau(tauSEXP);
181 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP);
182 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
183 |     rcpp_result_gen = Rcpp::wrap(run_EM_without_features(numOfVertices, numOfClasses, alpha, tau, network, verbose));
184 |     return rcpp_result_gen;
185 | END_RCPP
186 | }
187 | // compute_denominator_for_pi_d1x0
188 | arma::mat compute_denominator_for_pi_d1x0(int numOfVertices, double numOfClasses, const arma::sp_mat& matrix_for_denominator, const arma::mat& tau, int verbose);
189 | RcppExport SEXP _lighthergm_compute_denominator_for_pi_d1x0(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP matrix_for_denominatorSEXP, SEXP tauSEXP, SEXP verboseSEXP) {
190 | BEGIN_RCPP
191 |     Rcpp::RObject rcpp_result_gen;
192 |     Rcpp::RNGScope rcpp_rngScope_gen;
193 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
194 |     Rcpp::traits::input_parameter< double >::type numOfClasses(numOfClassesSEXP);
195 |     Rcpp::traits::input_parameter< const arma::sp_mat& >::type matrix_for_denominator(matrix_for_denominatorSEXP);
196 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
197 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
198 |     rcpp_result_gen = Rcpp::wrap(compute_denominator_for_pi_d1x0(numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose));
199 |     return rcpp_result_gen;
200 | END_RCPP
201 | }
202 | // compute_pi_d1x0
203 | arma::mat compute_pi_d1x0(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau, int verbose);
204 | RcppExport SEXP _lighthergm_compute_pi_d1x0(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP verboseSEXP) {
205 | BEGIN_RCPP
206 |     Rcpp::RObject rcpp_result_gen;
207 |     Rcpp::RNGScope rcpp_rngScope_gen;
208 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
209 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
210 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP);
211 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
212 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
213 |     rcpp_result_gen = Rcpp::wrap(compute_pi_d1x0(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose));
214 |     return rcpp_result_gen;
215 | END_RCPP
216 | }
217 | // compute_quadratic_term_with_features
218 | arma::mat compute_quadratic_term_with_features(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau, double& LB, int verbose);
219 | RcppExport SEXP _lighthergm_compute_quadratic_term_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP LBSEXP, SEXP verboseSEXP) {
220 | BEGIN_RCPP
221 |     Rcpp::RObject rcpp_result_gen;
222 |     Rcpp::RNGScope rcpp_rngScope_gen;
223 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
224 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
225 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP);
226 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
227 |     Rcpp::traits::input_parameter< double& >::type LB(LBSEXP);
228 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
229 |     rcpp_result_gen = Rcpp::wrap(compute_quadratic_term_with_features(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose));
230 |     return rcpp_result_gen;
231 | END_RCPP
232 | }
233 | // compute_pi_with_features
234 | Rcpp::List compute_pi_with_features(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau);
235 | RcppExport SEXP _lighthergm_compute_pi_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP) {
236 | BEGIN_RCPP
237 |     Rcpp::RObject rcpp_result_gen;
238 |     Rcpp::RNGScope rcpp_rngScope_gen;
239 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
240 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
241 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP);
242 |     Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP);
243 |     rcpp_result_gen = Rcpp::wrap(compute_pi_with_features(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau));
244 |     return rcpp_result_gen;
245 | END_RCPP
246 | }
247 | // run_EM_with_features
248 | Rcpp::List run_EM_with_features(int numOfVertices, int numOfClasses, const arma::vec& alpha, const Rcpp::List& list_multiplied_feature_adjmat, arma::mat& tau, int verbose);
249 | RcppExport SEXP _lighthergm_run_EM_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP verboseSEXP) {
250 | BEGIN_RCPP
251 |     Rcpp::RObject rcpp_result_gen;
252 |     Rcpp::RNGScope rcpp_rngScope_gen;
253 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
254 |     Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP);
255 |     Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP);
256 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP);
257 |     Rcpp::traits::input_parameter< arma::mat& >::type tau(tauSEXP);
258 |     Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP);
259 |     rcpp_result_gen = Rcpp::wrap(run_EM_with_features(numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose));
260 |     return rcpp_result_gen;
261 | END_RCPP
262 | }
263 | // simulate_between_network
264 | arma::sp_mat simulate_between_network(int numOfVertices, const Rcpp::List& list_feature_adjmat, const arma::vec& coef_between, const arma::vec& block_membership, bool directed);
265 | RcppExport SEXP _lighthergm_simulate_between_network(SEXP numOfVerticesSEXP, SEXP list_feature_adjmatSEXP, SEXP coef_betweenSEXP, SEXP block_membershipSEXP, SEXP directedSEXP) {
266 | BEGIN_RCPP
267 |     Rcpp::RObject rcpp_result_gen;
268 |     Rcpp::RNGScope rcpp_rngScope_gen;
269 |     Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP);
270 |     Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP);
271 |     Rcpp::traits::input_parameter< const arma::vec& >::type coef_between(coef_betweenSEXP);
272 |     Rcpp::traits::input_parameter< const arma::vec& >::type block_membership(block_membershipSEXP);
273 |     Rcpp::traits::input_parameter< bool >::type directed(directedSEXP);
274 |     rcpp_result_gen = Rcpp::wrap(simulate_between_network(numOfVertices, list_feature_adjmat, coef_between, block_membership, directed));
275 |     return rcpp_result_gen;
276 | END_RCPP
277 | }
278 | 
279 | static const R_CallMethodDef CallEntries[] = {
280 |     {"_lighthergm_eigenvectors_sparse", (DL_FUNC) &_lighthergm_eigenvectors_sparse, 2},
281 |     {"_lighthergm_compute_yule_coef", (DL_FUNC) &_lighthergm_compute_yule_coef, 2},
282 |     {"_lighthergm_get_sparse_feature_adjmat", (DL_FUNC) &_lighthergm_get_sparse_feature_adjmat, 1},
283 |     {"_lighthergm_get_sparse_feature_adjmat_from_string", (DL_FUNC) &_lighthergm_get_sparse_feature_adjmat_from_string, 1},
284 |     {"_lighthergm_get_matrix_for_denominator", (DL_FUNC) &_lighthergm_get_matrix_for_denominator, 2},
285 |     {"_lighthergm_get_elementwise_multiplied_matrices", (DL_FUNC) &_lighthergm_get_elementwise_multiplied_matrices, 2},
286 |     {"_lighthergm_decimal_to_binary_vector", (DL_FUNC) &_lighthergm_decimal_to_binary_vector, 2},
287 |     {"_lighthergm_compute_sumTaus", (DL_FUNC) &_lighthergm_compute_sumTaus, 4},
288 |     {"_lighthergm_compute_quadratic_term_naive", (DL_FUNC) &_lighthergm_compute_quadratic_term_naive, 5},
289 |     {"_lighthergm_compute_linear_term", (DL_FUNC) &_lighthergm_compute_linear_term, 5},
290 |     {"_lighthergm_compute_pi", (DL_FUNC) &_lighthergm_compute_pi, 4},
291 |     {"_lighthergm_compute_quadratic_term", (DL_FUNC) &_lighthergm_compute_quadratic_term, 7},
292 |     {"_lighthergm_run_EM_without_features", (DL_FUNC) &_lighthergm_run_EM_without_features, 6},
293 |     {"_lighthergm_compute_denominator_for_pi_d1x0", (DL_FUNC) &_lighthergm_compute_denominator_for_pi_d1x0, 5},
294 |     {"_lighthergm_compute_pi_d1x0", (DL_FUNC) &_lighthergm_compute_pi_d1x0, 5},
295 |     {"_lighthergm_compute_quadratic_term_with_features", (DL_FUNC) &_lighthergm_compute_quadratic_term_with_features, 6},
296 |     {"_lighthergm_compute_pi_with_features", (DL_FUNC) &_lighthergm_compute_pi_with_features, 4},
297 |     {"_lighthergm_run_EM_with_features", (DL_FUNC) &_lighthergm_run_EM_with_features, 6},
298 |     {"_lighthergm_simulate_between_network", (DL_FUNC) &_lighthergm_simulate_between_network, 5},
299 |     {NULL, NULL, 0}
300 | };
301 | 
302 | RcppExport void R_init_lighthergm(DllInfo *dll) {
303 |     R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
304 |     R_useDynamicSymbols(dll, FALSE);
305 | }
306 | 


--------------------------------------------------------------------------------