├── lighthergm ├── src │ ├── .gitignore │ ├── Makevars │ ├── Makevars.win │ ├── helper.h │ ├── compute_eigenvector.cpp │ ├── compute_yule_coef.cpp │ ├── simulate_between_network.cpp │ ├── helper_function.cpp │ ├── create_feature_matrix.cpp │ └── RcppExports.cpp ├── vignettes │ ├── .gitignore │ └── intro-lighthergm.Rmd ├── .gitignore ├── .Rbuildignore ├── tests │ ├── testthat.R │ └── testthat │ │ ├── test-compute_linear_term.R │ │ ├── test-get_induced_subgraph.R │ │ ├── test-compute_yule_coef.R │ │ ├── test-get_elementwise_multiplied_matrices.R │ │ ├── test-get_sparse_feature_adjmat_cpp.R │ │ ├── test-cache.R │ │ ├── test-start-from-given-cluster.R │ │ ├── test-compute_quadratic_term.R │ │ ├── test-compute_pi_with_features.R │ │ ├── test-estimate-params.R │ │ ├── test-compute_pi_d1x0.R │ │ ├── test-compute_quadratic_term_with_features.R │ │ ├── test-within-network-stats.R │ │ ├── test-estimate_within_param.R │ │ ├── test-EM-checkpoint.R │ │ ├── test-compute_lower_bound.R │ │ └── test-gof.R ├── data │ └── toyNet.rda ├── man │ ├── as_sparse_adj.Rd │ ├── compute_yule_coef.Rd │ ├── to_degree_dist_df.Rd │ ├── to_edgewise_shared_partners_df.Rd │ ├── to_geodesic_dist_df.Rd │ ├── spec_clust_sparse.Rd │ ├── arrange_edgelist.Rd │ ├── estimate_between_param.Rd │ ├── extract_covariate_names.Rd │ ├── get_list_sparse_feature_adjmat.Rd │ ├── get_between_stats.Rd │ ├── edgelist_to_stats.Rd │ ├── separate_formulas.Rd │ ├── compute_multiplied_feature_matrices.Rd │ ├── swap_formula_network.Rd │ ├── toyNet.Rd │ ├── generate_network_for_output.Rd │ ├── sort_block_membership.Rd │ ├── combine_within_between_edges.Rd │ ├── generate_seed_network.Rd │ ├── get_gof_stats.Rd │ ├── estimate_within_params.Rd │ ├── draw_within_block_connection.Rd │ ├── simulate_hergm_within.Rd │ ├── gof_lighthergm.Rd │ ├── draw_between_block_connection.Rd │ ├── simulate_hergm.Rd │ └── hergm.Rd ├── lighthergm.Rproj ├── R │ ├── data.R │ ├── get_list_sparse_feature_adjmat.R │ ├── estimate_between_param.R │ ├── RcppExports.R │ ├── estimate_within_params.R │ ├── gof_lighthergm.R │ └── hergm.R ├── NAMESPACE └── DESCRIPTION ├── lighthergm.jpg ├── doc └── intro-lighthergm.pdf ├── docker-compose.yml ├── .gitignore ├── Dockerfile ├── .github └── workflows │ └── check-standard.yml └── README.md /lighthergm/src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /lighthergm/vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /lighthergm/.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | inst/doc 3 | /doc/ 4 | /Meta/ 5 | -------------------------------------------------------------------------------- /lighthergm.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/lighthergm.jpg -------------------------------------------------------------------------------- /doc/intro-lighthergm.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/doc/intro-lighthergm.pdf -------------------------------------------------------------------------------- /lighthergm/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^lighthergm\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^doc$ 4 | ^LICENSE\.md$ 5 | ^Meta$ 6 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(lighthergm) 3 | 4 | test_check("lighthergm") 5 | -------------------------------------------------------------------------------- /lighthergm/data/toyNet.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sansan-inc/lighthergm/HEAD/lighthergm/data/toyNet.rda -------------------------------------------------------------------------------- /lighthergm/src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_CXXFLAGS = -DARMA_64BIT_WORD=1 $(SHLIB_OPENMP_CXXFLAGS) 3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | -------------------------------------------------------------------------------- /lighthergm/src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_CXXFLAGS = -DARMA_64BIT_WORD=1 $(SHLIB_OPENMP_CXXFLAGS) 3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | -------------------------------------------------------------------------------- /lighthergm/man/as_sparse_adj.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimate_within_params.R 3 | \name{as_sparse_adj} 4 | \alias{as_sparse_adj} 5 | \title{Get a sparse adjacency matrix from a network object} 6 | \usage{ 7 | as_sparse_adj(net) 8 | } 9 | \arguments{ 10 | \item{net}{a network object} 11 | } 12 | \description{ 13 | Get a sparse adjacency matrix from a network object 14 | } 15 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | 3 | services: 4 | rstudio: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: lighthergm 9 | ports: 10 | - 8787:8787 11 | environment: 12 | - PASSWORD=${PASSWORD} 13 | #- USERID=${USERID} 14 | #- GROUPID=${GROUPID} 15 | #volumes: 16 | # - ${WORKSPACE_PATH}:/home/rstudio/workspace 17 | tty: true 18 | stdin_open: true 19 | -------------------------------------------------------------------------------- /lighthergm/man/compute_yule_coef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{compute_yule_coef} 4 | \alias{compute_yule_coef} 5 | \title{Compute Yule's Φ-coefficient} 6 | \usage{ 7 | compute_yule_coef(z_star, z) 8 | } 9 | \arguments{ 10 | \item{z_star}{a true block membership} 11 | 12 | \item{z}{an estimated block membership} 13 | } 14 | \description{ 15 | Compute Yule's Φ-coefficient 16 | } 17 | -------------------------------------------------------------------------------- /lighthergm/src/helper.h: -------------------------------------------------------------------------------- 1 | #ifndef __HELPER__ 2 | #define __HELPER__ 3 | 4 | arma::vec decimal_to_binary_vector( 5 | int decimal, 6 | int vec_length); 7 | 8 | arma::mat compute_sumTaus( 9 | int numOfVertices, 10 | int numOfClasses, 11 | const arma::mat& tau, 12 | int verbose = 0); 13 | 14 | void normalizeTau( 15 | arma::mat& tau, 16 | double minValue); 17 | 18 | arma::rowvec sumDoubleMatrixByRow( 19 | const arma::mat& matrix); 20 | 21 | #endif // __HELPER__ 22 | -------------------------------------------------------------------------------- /lighthergm/lighthergm.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /lighthergm/man/to_degree_dist_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{to_degree_dist_df} 4 | \alias{to_degree_dist_df} 5 | \title{Extracts the degree distribution from a network and returns it as a data frame.} 6 | \usage{ 7 | to_degree_dist_df(net) 8 | } 9 | \arguments{ 10 | \item{net}{a statnet network object} 11 | } 12 | \value{ 13 | a data frame 14 | } 15 | \description{ 16 | Extracts the degree distribution from a network and returns it as a data frame. 17 | } 18 | -------------------------------------------------------------------------------- /lighthergm/man/to_edgewise_shared_partners_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{to_edgewise_shared_partners_df} 4 | \alias{to_edgewise_shared_partners_df} 5 | \title{Extracts the edgewise shared partners distribution (undirected).} 6 | \usage{ 7 | to_edgewise_shared_partners_df(net) 8 | } 9 | \arguments{ 10 | \item{net}{a statnet network object} 11 | } 12 | \value{ 13 | a data frame 14 | } 15 | \description{ 16 | Extracts the edgewise shared partners distribution (undirected). 17 | } 18 | -------------------------------------------------------------------------------- /lighthergm/man/to_geodesic_dist_df.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{to_geodesic_dist_df} 4 | \alias{to_geodesic_dist_df} 5 | \title{Extracts the geodesic distance distribution from a network and returns it as a dataframe.} 6 | \usage{ 7 | to_geodesic_dist_df(net) 8 | } 9 | \arguments{ 10 | \item{net}{a statnet network object} 11 | } 12 | \value{ 13 | a data frame 14 | } 15 | \description{ 16 | Extracts the geodesic distance distribution from a network and returns it as a dataframe. 17 | } 18 | -------------------------------------------------------------------------------- /lighthergm/man/spec_clust_sparse.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EM_wrapper.R 3 | \name{spec_clust_sparse} 4 | \alias{spec_clust_sparse} 5 | \title{function for spectral clustering} 6 | \usage{ 7 | spec_clust_sparse(network, n_clusters, eigenvectors_fn) 8 | } 9 | \arguments{ 10 | \item{network}{a sparse adjacency matrix} 11 | 12 | \item{n_clusters}{number of specified clusters} 13 | 14 | \item{eigenvectors_fn}{a function that performs eigenvector decomposition} 15 | } 16 | \description{ 17 | function for spectral clustering 18 | } 19 | -------------------------------------------------------------------------------- /lighthergm/src/compute_eigenvector.cpp: -------------------------------------------------------------------------------- 1 | // Files to look to get MM version: ReciprocityModel for basic functions, 2 | // BinaryReciprocityModel.cpp for version written already, 3 | // MMBinaryReciprocityModel.cpp for more eleborate version 4 | // #define ARMA_64BIT_WORD 1; 5 | #include 6 | // [[Rcpp::depends(RcppArmadillo)]] 7 | 8 | // [[Rcpp::export]] 9 | arma::mat eigenvectors_sparse( 10 | const arma::sp_mat& X, 11 | int n_vec) { 12 | arma::vec eigval; 13 | arma::mat eigvec; 14 | 15 | arma::eigs_sym(eigval, eigvec, X, n_vec); 16 | return(eigvec); 17 | } 18 | -------------------------------------------------------------------------------- /lighthergm/R/data.R: -------------------------------------------------------------------------------- 1 | #' A toy network to play `lighthergm` with. 2 | #' 3 | #' This network has a clear cluster structure. 4 | #' The number of clusters is four, and which cluster each node belongs to is defined in the variable "block". 5 | #' 6 | #' @format A `statnet`'s network class object. It has three nodal features. 7 | #' \describe{ 8 | #' \item{block}{block membership of each node} 9 | #' \item{x}{a covariate. It has 10 labels.} 10 | #' \item{y}{a covariate. It has 10 labels.} 11 | #' ... 12 | #' } 13 | #' `x` and `y` are not variables with any particular meaning. 14 | #' 15 | "toyNet" 16 | -------------------------------------------------------------------------------- /lighthergm/man/arrange_edgelist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{arrange_edgelist} 4 | \alias{arrange_edgelist} 5 | \title{Make the given edgelist consistent with the data frame that contains vertex info.} 6 | \usage{ 7 | arrange_edgelist(edgelist, sorted_dataframe) 8 | } 9 | \arguments{ 10 | \item{edgelist}{an edgelist to be arranged} 11 | 12 | \item{sorted_dataframe}{a data frame sorted by \code{sort_block_membership}} 13 | } 14 | \description{ 15 | Make the given edgelist consistent with the data frame that contains vertex info. 16 | } 17 | -------------------------------------------------------------------------------- /lighthergm/man/estimate_between_param.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimate_between_param.R 3 | \name{estimate_between_param} 4 | \alias{estimate_between_param} 5 | \title{Estimate between-block parameters by logit} 6 | \usage{ 7 | estimate_between_param(formula, network, block) 8 | } 9 | \arguments{ 10 | \item{formula}{formula for estimating between-block parameters} 11 | 12 | \item{network}{network object} 13 | 14 | \item{block}{a vector that represents which node belongs to which node} 15 | } 16 | \description{ 17 | Estimate between-block parameters by logit 18 | } 19 | -------------------------------------------------------------------------------- /lighthergm/man/extract_covariate_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{extract_covariate_names} 4 | \alias{extract_covariate_names} 5 | \title{Extract Covariate Names 6 | Extracts the names of covariates used in the formula} 7 | \usage{ 8 | extract_covariate_names(formula_for_simulation) 9 | } 10 | \arguments{ 11 | \item{formula_for_simulation}{the formula to check} 12 | } 13 | \value{ 14 | A list of covariate names (can be empty) 15 | } 16 | \description{ 17 | Extract Covariate Names 18 | Extracts the names of covariates used in the formula 19 | } 20 | -------------------------------------------------------------------------------- /lighthergm/man/get_list_sparse_feature_adjmat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_list_sparse_feature_adjmat.R 3 | \name{get_list_sparse_feature_adjmat} 4 | \alias{get_list_sparse_feature_adjmat} 5 | \title{Get a list of sparse feature adjacency matrix from a formula} 6 | \usage{ 7 | get_list_sparse_feature_adjmat(network, formula) 8 | } 9 | \arguments{ 10 | \item{network}{a network object from which nodal covariates are extracted.} 11 | 12 | \item{formula}{a network model to be considered} 13 | } 14 | \description{ 15 | Get a list of sparse feature adjacency matrix from a formula 16 | } 17 | -------------------------------------------------------------------------------- /lighthergm/man/get_between_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{get_between_stats} 4 | \alias{get_between_stats} 5 | \title{Converts a list of edgelists into a data frame of network statistics} 6 | \usage{ 7 | get_between_stats(edgelists, between_formula) 8 | } 9 | \arguments{ 10 | \item{edgelists}{the list of edgelists} 11 | 12 | \item{between_formula}{the formula for the between connections} 13 | } 14 | \value{ 15 | a data frame of sufficient network statistics 16 | } 17 | \description{ 18 | Converts a list of edgelists into a data frame of network statistics 19 | } 20 | -------------------------------------------------------------------------------- /lighthergm/man/edgelist_to_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{edgelist_to_stats} 4 | \alias{edgelist_to_stats} 5 | \title{Converts an edgelist into a matrix of sufficient network statistics} 6 | \usage{ 7 | edgelist_to_stats(net, edgelist, between_formula) 8 | } 9 | \arguments{ 10 | \item{net}{the net to extract the covariates from} 11 | 12 | \item{edgelist}{the edgelist} 13 | 14 | \item{between_formula}{the formula for the between connections} 15 | } 16 | \value{ 17 | a matrix of sufficient network statistics 18 | } 19 | \description{ 20 | Converts an edgelist into a matrix of sufficient network statistics 21 | } 22 | -------------------------------------------------------------------------------- /lighthergm/man/separate_formulas.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{separate_formulas} 4 | \alias{separate_formulas} 5 | \title{Separates a formula into its between and within components. The between component excludes 6 | terms which introduce dyadic dependence.} 7 | \usage{ 8 | separate_formulas(target_formula) 9 | } 10 | \arguments{ 11 | \item{target_formula}{a target formula} 12 | } 13 | \value{ 14 | a list containing the between and within formulas 15 | } 16 | \description{ 17 | Separates a formula into its between and within components. The between component excludes 18 | terms which introduce dyadic dependence. 19 | } 20 | -------------------------------------------------------------------------------- /lighthergm/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(compute_multiplied_feature_matrices) 4 | export(compute_yule_coef) 5 | export(estimate_between_param) 6 | export(estimate_within_params) 7 | export(get_list_sparse_feature_adjmat) 8 | export(gof_lighthergm) 9 | export(hergm) 10 | export(simulate_hergm) 11 | export(simulate_hergm_within) 12 | importFrom(Rcpp,sourceCpp) 13 | importFrom(ergm,ergm) 14 | importFrom(ergm,ergm.getnetwork) 15 | importFrom(foreach,"%do%") 16 | importFrom(foreach,"%dopar%") 17 | importFrom(foreach,foreach) 18 | importFrom(magrittr,"%<>%") 19 | importFrom(magrittr,"%>%") 20 | importFrom(parallel,mclapply) 21 | importFrom(rlang,"%||%") 22 | useDynLib(lighthergm) 23 | -------------------------------------------------------------------------------- /lighthergm/man/compute_multiplied_feature_matrices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_list_sparse_feature_adjmat.R 3 | \name{compute_multiplied_feature_matrices} 4 | \alias{compute_multiplied_feature_matrices} 5 | \title{Get a list of sparse feature adjacency matrix from a formula} 6 | \usage{ 7 | compute_multiplied_feature_matrices(net, list_feature_matrices) 8 | } 9 | \arguments{ 10 | \item{net}{a network object from which nodal covariates are extracted.} 11 | 12 | \item{list_feature_matrices}{a list of feature adjacency matrices generated by \code{get_list_sparse_feature_adjmat()}.} 13 | } 14 | \description{ 15 | Get a list of sparse feature adjacency matrix from a formula 16 | } 17 | -------------------------------------------------------------------------------- /lighthergm/man/swap_formula_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{swap_formula_network} 4 | \alias{swap_formula_network} 5 | \title{Swaps the network on the lhs of a formula for a new one with the given environment} 6 | \usage{ 7 | swap_formula_network(new_net, net_formula, env) 8 | } 9 | \arguments{ 10 | \item{new_net}{A network object to be inserted into the lhs of the formula} 11 | 12 | \item{net_formula}{The target formula} 13 | 14 | \item{env}{The environment to assign to the formula} 15 | } 16 | \value{ 17 | A new formula with the lhs swapped 18 | } 19 | \description{ 20 | Swaps the network on the lhs of a formula for a new one with the given environment 21 | } 22 | -------------------------------------------------------------------------------- /lighthergm/man/toyNet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{toyNet} 5 | \alias{toyNet} 6 | \title{A toy network to play \code{lighthergm} with.} 7 | \format{ 8 | A \code{statnet}'s network class object. It has three nodal features. 9 | \describe{ 10 | \item{block}{block membership of each node} 11 | \item{x}{a covariate. It has 10 labels.} 12 | \item{y}{a covariate. It has 10 labels.} 13 | ... 14 | } 15 | \code{x} and \code{y} are not variables with any particular meaning. 16 | } 17 | \usage{ 18 | toyNet 19 | } 20 | \description{ 21 | This network has a clear cluster structure. 22 | The number of clusters is four, and which cluster each node belongs to is defined in the variable "block". 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # User-specific files 9 | .Ruserdata 10 | 11 | # Example code in package build process 12 | *-Ex.R 13 | 14 | # Output files from R CMD build 15 | /*.tar.gz 16 | 17 | # Output files from R CMD check 18 | /*.Rcheck/ 19 | 20 | # RStudio files 21 | .Rproj.user/ 22 | 23 | # produced vignettes 24 | vignettes/*.html 25 | vignettes/*.pdf 26 | 27 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 28 | .httr-oauth 29 | 30 | # knitr and R markdown default cache directories 31 | *_cache/ 32 | /cache/ 33 | 34 | # Temporary files created by R markdown 35 | *.utf8.md 36 | *.knit.md 37 | 38 | # R Environment Variables 39 | .Renviron 40 | .Rproj.user 41 | 42 | # Environment variables 43 | .env 44 | -------------------------------------------------------------------------------- /lighthergm/man/generate_network_for_output.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{generate_network_for_output} 4 | \alias{generate_network_for_output} 5 | \title{Create a final network} 6 | \usage{ 7 | generate_network_for_output( 8 | formula_for_simulation_within, 9 | formula_for_simulation_between, 10 | sorted_dataframe, 11 | edgelist 12 | ) 13 | } 14 | \arguments{ 15 | \item{formula_for_simulation_within}{formula for simulating a within network} 16 | 17 | \item{formula_for_simulation_between}{formula for simulating a between network} 18 | 19 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}} 20 | 21 | \item{edgelist}{an edgelist that contain both within- and between-block edges without duplication} 22 | } 23 | \description{ 24 | Create a final network 25 | } 26 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # If you would like to utilize parallelized computation, the R version should be less than 4.0.0. 2 | # There seems some problem with OpenMP for R >= 4.0.0 when you use macOS. 3 | # https://github.com/RcppCore/RcppArmadillo/issues/290 4 | FROM rocker/verse:3.6.3 5 | 6 | # Install Python for installing Python's infomap library. 7 | RUN apt-get update && apt-get install -y python3-dev python3-pip 8 | 9 | # Install infomap by pip3. 10 | RUN pip3 install -U infomap 11 | 12 | # Make it possible to install latest packages 13 | # https://www.rocker-project.org/use/extending/ 14 | RUN echo "options(repos = c(REPO_NAME = 'https://packagemanager.rstudio.com/all/__linux__/centos7/latest'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site 15 | 16 | # Install lighthergm. 17 | COPY ./lighthergm /lighthergm 18 | RUN Rscript -e "devtools::install('lighthergm')" 19 | -------------------------------------------------------------------------------- /lighthergm/man/sort_block_membership.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{sort_block_membership} 4 | \alias{sort_block_membership} 5 | \title{Create a data frame from block memberships, vertex ids, and vertex covariates, sorted by block ids.} 6 | \usage{ 7 | sort_block_membership( 8 | data_for_simulation, 9 | colname_vertex_id, 10 | colname_block_membership 11 | ) 12 | } 13 | \arguments{ 14 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.} 15 | 16 | \item{colname_vertex_id}{a column name in the data frame for the vertex id} 17 | 18 | \item{colname_block_membership}{a column name in the data frame for the block membership.} 19 | } 20 | \description{ 21 | Create a data frame from block memberships, vertex ids, and vertex covariates, sorted by block ids. 22 | } 23 | -------------------------------------------------------------------------------- /lighthergm/man/combine_within_between_edges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{combine_within_between_edges} 4 | \alias{combine_within_between_edges} 5 | \title{Combine within- and between-block edges while removing duplicated links.} 6 | \usage{ 7 | combine_within_between_edges( 8 | edgelist_within, 9 | edgelist_between, 10 | use_fast_between_simulation 11 | ) 12 | } 13 | \arguments{ 14 | \item{edgelist_within}{an within-block edgelist} 15 | 16 | \item{edgelist_between}{a between-block edgelist (Potentially, there are edges that also appear in the within-block edgelist)} 17 | 18 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network. If the network is very large, you should consider using this option.} 19 | } 20 | \description{ 21 | Combine within- and between-block edges while removing duplicated links. 22 | } 23 | -------------------------------------------------------------------------------- /lighthergm/man/generate_seed_network.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{generate_seed_network} 4 | \alias{generate_seed_network} 5 | \title{Create a seed network from which a network will be simulated.} 6 | \usage{ 7 | generate_seed_network( 8 | formula_for_simulation, 9 | sorted_dataframe, 10 | edgelist = NULL, 11 | directed 12 | ) 13 | } 14 | \arguments{ 15 | \item{formula_for_simulation}{formula for simulating a network} 16 | 17 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}} 18 | 19 | \item{edgelist}{an edgelist from which a seed network is generated. The class of the edgelist should be "edgelist", i.e. it should contain as attributes the number of nodes and vertex names.} 20 | 21 | \item{directed}{a boolean of whether the network is directed.} 22 | } 23 | \description{ 24 | Create a seed network from which a network will be simulated. 25 | } 26 | -------------------------------------------------------------------------------- /lighthergm/man/get_gof_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{get_gof_stats} 4 | \alias{get_gof_stats} 5 | \title{Gets the GOF stats for a formula 6 | If a network is passed, that one is used to obtain the network statistics, 7 | otherwise the netwok in the formula is used.} 8 | \usage{ 9 | get_gof_stats( 10 | sim_formula, 11 | net = NULL, 12 | sim_number = NULL, 13 | compute_geodesic_distance = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{sim_formula}{a formula} 18 | 19 | \item{net}{a statnet network object} 20 | 21 | \item{sim_number}{the ID of the current simulation} 22 | 23 | \item{compute_geodesic_distance}{if TRUE, includes the geodesic distance in the result object} 24 | } 25 | \value{ 26 | a list with the goodness-of-fit statistics 27 | } 28 | \description{ 29 | Gets the GOF stats for a formula 30 | If a network is passed, that one is used to obtain the network statistics, 31 | otherwise the netwok in the formula is used. 32 | } 33 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_linear_term.R: -------------------------------------------------------------------------------- 1 | test_that("linear term computation without features works", { 2 | # Number of nodes 3 | N <- 12 4 | # Number of clusters 5 | K <- 3 6 | 7 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 8 | tau <- 9 | matrix(c( 10 | 0.2, 0.5, 0.3, 11 | 0.4, 0.4, 0.2, 12 | 0.1, 0.4, 0.5, 13 | 0.4, 0.4, 0.2, 14 | 0.1, 0.1, 0.8, 15 | 0.05, 0.05, 0.9, 16 | 0.8, 0.1, 0.1, 17 | 0.3, 0.4, 0.3, 18 | 0.1, 0.8, 0.1, 19 | 0.5, 0.4, 0.1, 20 | 0.3, 0.3, 0.4, 21 | 0.8, 0.1, 0.1 22 | ), 23 | nrow = K, ncol = N 24 | ) 25 | tau <- t(tau) 26 | 27 | # Compute gamma (parameter of multinomial distribution) 28 | alpha <- colSums(tau) 29 | 30 | # Compute the true linear term in a naive way 31 | s_true <- matrix(0, nrow = N, ncol = K) 32 | 33 | for (i in 1:N) { 34 | for (k in 1:K) { 35 | s_ik <- 1 + log(alpha[k]) - log(tau[i, k]) 36 | s_true[i, k] <- s_true[i, k] + s_ik 37 | } 38 | } 39 | 40 | s <- compute_linear_term(N, K, alpha, tau, 0) 41 | expect_equal(s, s_true, check.attributes = FALSE, tolerance = 1e-10) 42 | }) 43 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-get_induced_subgraph.R: -------------------------------------------------------------------------------- 1 | test_that("getting a subgraph from a network object works", { 2 | # Which node belongs to which block 3 | df <- 4 | tibble::tibble( 5 | block = c(1, 1, 1, 1, 2, 2, 2, 2), 6 | node_id = c("A", "B", "C", "D", "E", "F", "G", "H") 7 | ) 8 | 9 | # Edgelist 10 | edgelist <- 11 | tibble::tibble( 12 | source_id = c("A", "A", "A", "B"), 13 | target_id = c("B", "C", "E", "F") 14 | ) 15 | 16 | # When not all nodes are isolated in a block 17 | subgraph1 <- get_induced_subgraph(block_structure = df, edgelist = edgelist, searched_block = 1) 18 | adj_true <- matrix(c(0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), nrow = 4, ncol = 4) 19 | expect_equal(c("A", "B", "C", "D"), network::network.vertex.names(subgraph1)) 20 | expect_equal(adj_true, network::as.matrix.network.adjacency(subgraph1), check.attributes = FALSE) 21 | 22 | # When all nodes are isolated in a block 23 | subgraph2 <- get_induced_subgraph(block_structure = df, edgelist = edgelist, searched_block = 2) 24 | adj_true <- matrix(0, nrow = 4, ncol = 4) 25 | expect_equal(c("E", "F", "G", "H"), network::network.vertex.names(subgraph2)) 26 | expect_equal(adj_true, network::as.matrix.network.adjacency(subgraph2), check.attributes = FALSE) 27 | }) 28 | -------------------------------------------------------------------------------- /lighthergm/man/estimate_within_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/estimate_within_params.R 3 | \name{estimate_within_params} 4 | \alias{estimate_within_params} 5 | \title{Estimate a within-block network model.} 6 | \usage{ 7 | estimate_within_params( 8 | formula, 9 | network, 10 | z_memb, 11 | number_cores = 1, 12 | verbose = 1, 13 | seeds = NULL, 14 | method_second_step = c("MPLE", "MLE"), 15 | offset_coef = NULL, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{formula}{a within network formula} 21 | 22 | \item{network}{a network object} 23 | 24 | \item{z_memb}{block memberships for each node} 25 | 26 | \item{number_cores}{The number of CPU cores to use.} 27 | 28 | \item{verbose}{A logical or an integer: if this is TRUE/1, 29 | the program will print out additional information about the progress of estimation and simulation.} 30 | 31 | \item{seeds}{seed value (integer) for the random number generator} 32 | 33 | \item{method_second_step}{If "MPLE" (the default), then the maximum pseudolikelihood estimator is returned. 34 | If "MLE", then an approximate maximum likelihood estimator is returned.} 35 | 36 | \item{offset_coef}{a vector of model parameters to be fixed when estimation.(i.e., not estimated).} 37 | 38 | \item{...}{Additional arguments, to be passed to lower-level functions} 39 | } 40 | \description{ 41 | Estimate a within-block network model. 42 | } 43 | -------------------------------------------------------------------------------- /lighthergm/man/draw_within_block_connection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{draw_within_block_connection} 4 | \alias{draw_within_block_connection} 5 | \title{Draw within-block connections} 6 | \usage{ 7 | draw_within_block_connection( 8 | seed_network, 9 | formula_for_simulation, 10 | coef_within_block, 11 | ergm_control, 12 | output, 13 | seed, 14 | n_sim, 15 | verbose, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{seed_network}{a seed network from which a network will be simulated.} 21 | 22 | \item{formula_for_simulation}{formula for simulating a network} 23 | 24 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.} 25 | 26 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation} 27 | 28 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.} 29 | 30 | \item{seed}{seed value (integer) for the random number generator.} 31 | 32 | \item{n_sim}{Number of networks to be randomly drawn from the given distribution on the set of all networks.} 33 | 34 | \item{verbose}{If this is TRUE/1, the program will print out additionalinformation about the progress of simulation.} 35 | 36 | \item{...}{Additional arguments, to be passed to lower-level functions} 37 | } 38 | \description{ 39 | Draw within-block connections 40 | } 41 | -------------------------------------------------------------------------------- /lighthergm/src/compute_yule_coef.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | //' Compute Yule's Φ-coefficient 5 | //' @param z_star a true block membership 6 | //' @param z an estimated block membership 7 | //' @export 8 | // [[Rcpp::export]] 9 | double compute_yule_coef( 10 | const arma::vec& z_star, 11 | const arma::vec& z) { 12 | 13 | double n_00 = 0; 14 | double n_01 = 0; 15 | double n_10 = 0; 16 | double n_11 = 0; 17 | 18 | int numOfVertices = z.n_elem; 19 | 20 | // Remove missing values from z_star and z 21 | arma::vec z_star_new = z_star; 22 | arma::vec z_new = z; 23 | for (int i = 0; i < numOfVertices; i++) { 24 | if (std::isnan(z_star_new(i))) { 25 | z_star_new[i] = -100; 26 | } 27 | if (std::isnan(z_new(i))) { 28 | z_new[i] = -1; 29 | } 30 | } 31 | 32 | for (int i = 0; i < numOfVertices; i++) { 33 | for (int j = i+1; j < numOfVertices; j++) { 34 | if (z_star_new(i) == z_star_new(j)) { 35 | if (z_new(i) == z_new(j)) { 36 | n_11 += 1; 37 | } else { 38 | n_10 += 1; 39 | } 40 | } else { 41 | if (z_new(i) == z_new(j)) { 42 | n_01 += 1; 43 | } else { 44 | n_00 += 1; 45 | } 46 | } 47 | } 48 | } 49 | 50 | // Compute Yule's φ-coefficnent 51 | double num = (n_00 * n_11) - (n_01 * n_10); 52 | double denom = sqrt((n_00 + n_01) * (n_10 + n_11) * (n_00 + n_10) * (n_01 + n_11)); 53 | double phi = num / denom; 54 | 55 | // Return the output 56 | return phi; 57 | } 58 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_yule_coef.R: -------------------------------------------------------------------------------- 1 | test_that("computing Yule's phi-coefficient works", { 2 | # Number of nodes 3 | N <- 1000 4 | # Number of clusters 5 | K <- 100 6 | 7 | # An extreme case 8 | z1 <- rep(1:K, each = N / K) 9 | z_star <- z1 10 | 11 | # Check if it equals one. 12 | expect_equal(compute_yule_coef(z_star, z1), 1) 13 | 14 | # Another case 15 | z2 <- sample(1:K, size = N, replace = TRUE) 16 | 17 | # Compute Yule's phi-coefficient naively. 18 | n00 <- 0 19 | n01 <- 0 20 | n10 <- 0 21 | n11 <- 0 22 | 23 | for (i in 1:(N - 1)) { 24 | for (j in (i + 1):N) { 25 | if (z_star[i] == z_star[j] & z2[i] == z2[j]) { 26 | n11 <- n11 + 1 27 | } 28 | if (z_star[i] == z_star[j] & z2[i] != z2[j]) { 29 | n10 <- n10 + 1 30 | } 31 | if (z_star[i] != z_star[j] & z2[i] == z2[j]) { 32 | n01 <- n01 + 1 33 | } 34 | if (z_star[i] != z_star[j] & z2[i] != z2[j]) { 35 | n00 <- n00 + 1 36 | } 37 | } 38 | } 39 | 40 | phi_naive <- (n00 * n11 - n01 * n10) / sqrt((n00 + n01) * (n10 + n11) * (n00 + n10) * (n01 + n11)) 41 | 42 | # Check if it works 43 | expect_equal(compute_yule_coef(z_star, z2), phi_naive, tolerance = 1e-10) 44 | }) 45 | 46 | 47 | test_that("Removing missing values works", { 48 | # Number of nodes 49 | N <- 1000 50 | # Number of clusters 51 | K <- 100 52 | 53 | # An extreme case 54 | z1 <- rep(1:K, each = N / K) 55 | z_star <- z1 56 | z1[1:4] <- NA 57 | 58 | # Check if it equals one. 59 | compute_yule_coef(z_star, z1) 60 | expect_silent(compute_yule_coef(z_star, z1)) 61 | 62 | z_star[50:56] <- NA 63 | expect_silent(compute_yule_coef(z_star, z1)) 64 | }) 65 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-get_elementwise_multiplied_matrices.R: -------------------------------------------------------------------------------- 1 | test_that("element-wise matrix multiplication works", { 2 | # Prepare matrices 3 | N <- 50 4 | G <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N) 5 | diag(G) <- 0 6 | G <- as(G, "sparseMatrix") 7 | G <- Matrix::forceSymmetric(G) 8 | 9 | S <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N) 10 | diag(S) <- 0 11 | S <- as(S, "sparseMatrix") 12 | S <- Matrix::forceSymmetric(S) 13 | 14 | V <- matrix(as.integer(unlist(purrr::rbernoulli(n = N * N, p = 0.2))), nrow = N, ncol = N) 15 | diag(V) <- 0 16 | V <- as(V, "sparseMatrix") 17 | V <- Matrix::forceSymmetric(V) 18 | 19 | # Prepare true results 20 | output_true <- list() 21 | # The first element of the list is filled with - (S + V) + (S % V), which will be used to compute pi_d0x0. 22 | output_true[[1]] <- -(S + V) + (S * V) 23 | # N = 1: (1, 0, 0) 24 | output_true[[2]] <- Matrix::drop0(G * (1 - S) * (1 - V)) 25 | # N = 2: (0, 1, 0) 26 | output_true[[3]] <- Matrix::drop0((1 - G) * S * (1 - V)) 27 | # N = 3: (1, 1, 0) 28 | output_true[[4]] <- Matrix::drop0(G * S * (1 - V)) 29 | # N = 4: (0, 0, 1) 30 | output_true[[5]] <- Matrix::drop0((1 - G) * (1 - S) * V) 31 | # N = 5: (1, 0, 1) 32 | output_true[[6]] <- Matrix::drop0(G * (1 - S) * V) 33 | # N = 6: (0, 1, 1) 34 | output_true[[7]] <- Matrix::drop0((1 - G) * S * V) 35 | # N = 7: (1, 1, 1) 36 | output_true[[8]] <- Matrix::drop0(G * S * V) 37 | 38 | # Compute element-wise mutiplied matrices 39 | output <- get_elementwise_multiplied_matrices(G, list(S, V)) 40 | 41 | for (i in 1:8) { 42 | expect_equal(output[[i]], output_true[[i]], check.attributes = FALSE, ) 43 | } 44 | }) 45 | -------------------------------------------------------------------------------- /lighthergm/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: lighthergm 2 | Title: Fit, Simulate, and Diagnose Hierarchical Exponential-Family Models for Networks in A Scalable Way 3 | Version: 1.1.0 4 | Authors@R: c( 5 | person(given = "Shota", family = "Komatsu", role = c("aut", "cre"), email = "first.last@example.com"), 6 | person(given = "Juan Nelson", family = "Martínez Dahbura", role = c("aut"), email = "first.last@example.com"), 7 | person(given = "Takanori", family = "Nishida", role = c("aut"), email = "first.last@example.com"), 8 | person(given = "Angelo", family = "Mele", role = c("aut"), email = "first.last@example.com")) 9 | Description: `lighthergm` is an R library for estimating hierarchical exponential random graph models (HERGMs) efficiently on large networks. It also contains tools for simulating networks with local dependence and for assessing the goodness-of-fit of the estimates. 10 | License: GPL-3 11 | Encoding: UTF-8 12 | LazyData: true 13 | Roxygen: list(markdown = TRUE) 14 | RoxygenNote: 7.1.1 15 | Depends: 16 | R (>= 3.5.0) 17 | LinkingTo: 18 | Rcpp, 19 | RcppArmadillo (>= 0.10.5) 20 | Imports: 21 | Rcpp, 22 | ergm (>= 3.11.0), 23 | network (>= 1.16.0), 24 | Matrix, 25 | stringr, 26 | intergraph, 27 | igraph, 28 | parallel, 29 | magrittr, 30 | purrr, 31 | dplyr, 32 | tibble, 33 | glue, 34 | readr, 35 | foreach, 36 | rlang, 37 | reticulate, 38 | tidyr, 39 | statnet.common, 40 | doParallel, 41 | memoise, 42 | cachem 43 | Suggests: 44 | rmarkdown, 45 | knitr, 46 | testthat, 47 | ggplot2, 48 | GGally, 49 | sna 50 | Config/reticulate: 51 | list( 52 | packages = list( 53 | list(package = "infomap", version = "1.3.0", pip = TRUE) 54 | ) 55 | ) 56 | VignetteBuilder: knitr 57 | -------------------------------------------------------------------------------- /lighthergm/man/simulate_hergm_within.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{simulate_hergm_within} 4 | \alias{simulate_hergm_within} 5 | \title{Obtains network statistics based on MCMC simulations including only the 6 | within-blocks connections.} 7 | \usage{ 8 | simulate_hergm_within( 9 | formula_for_simulation, 10 | data_for_simulation, 11 | colname_vertex_id, 12 | colname_block_membership, 13 | coef_within_block, 14 | seed_edgelist = NULL, 15 | output = "stats", 16 | ergm_control = ergm::control.simulate.formula(), 17 | seed = NULL, 18 | n_sim = 1, 19 | verbose = 0, 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{formula_for_simulation}{formula for simulating a network} 25 | 26 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.} 27 | 28 | \item{colname_vertex_id}{a column name in the data frame for the vertex ids} 29 | 30 | \item{colname_block_membership}{a column name in the data frame for the block membership} 31 | 32 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.} 33 | 34 | \item{seed_edgelist}{an edgelist used for creating a seed network. It should have the "edgelist" class} 35 | 36 | \item{output}{The desired output of the simulation (any of \code{stats}, \code{network} or \code{edgelist}). Defaults to \code{stats}} 37 | 38 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation} 39 | 40 | \item{seed}{seed value (integer) for network simulation.} 41 | 42 | \item{n_sim}{number of networks generated} 43 | 44 | \item{verbose}{If this is TRUE/1, the program will print out additional information about the progress of simulation.} 45 | 46 | \item{...}{arguments to be passed to low level functions} 47 | } 48 | \description{ 49 | Obtains network statistics based on MCMC simulations including only the 50 | within-blocks connections. 51 | } 52 | -------------------------------------------------------------------------------- /lighthergm/R/get_list_sparse_feature_adjmat.R: -------------------------------------------------------------------------------- 1 | #' Get a list of sparse feature adjacency matrix from a formula 2 | #' @importFrom foreach foreach %do% 3 | #' @param network a network object from which nodal covariates are extracted. 4 | #' @param formula a network model to be considered 5 | #' @export 6 | get_list_sparse_feature_adjmat <- function(network, formula) { 7 | # Get variable names from formula (extract strings sandwiched by double quotes) 8 | list_varname <- as.character(formula)[3] 9 | list_varname <- unlist(stringr::str_extract_all(string = list_varname, pattern = '"[^"]*"')) 10 | list_varname <- stringr::str_remove_all(string = list_varname, pattern = '\"') 11 | 12 | # Extract variable from a network object 13 | list_var <- 14 | foreach(i = 1:length(list_varname)) %do% { 15 | feature <- network::get.vertex.attribute(x = network, list_varname[i]) 16 | return(feature) 17 | } 18 | 19 | # Create a list of sparse feature adjacency matrices 20 | list_sparse_feature_adjmat <- 21 | foreach(i = 1:length(list_var)) %do% { 22 | if (is.numeric(list_var[[i]])) { 23 | output <- get_sparse_feature_adjmat(list_var[[i]]) 24 | } else { 25 | output <- get_sparse_feature_adjmat_from_string(list_var[[i]]) 26 | } 27 | } 28 | 29 | # Attach variable names to each matrix 30 | names(list_sparse_feature_adjmat) <- list_varname 31 | 32 | # Return the output 33 | return(list_sparse_feature_adjmat) 34 | } 35 | 36 | #' Get a list of sparse feature adjacency matrix from a formula 37 | #' @param net a network object from which nodal covariates are extracted. 38 | #' @param list_feature_matrices a list of feature adjacency matrices generated by `get_list_sparse_feature_adjmat()`. 39 | #' @export 40 | compute_multiplied_feature_matrices <- function(net, list_feature_matrices) { 41 | adj <- network::as.edgelist(net) 42 | N <- net$gal$n 43 | adj <- Matrix::sparseMatrix(i = adj[, 1], j = adj[, 2], x = 1, dims = c(N, N), symmetric = TRUE) 44 | get_elementwise_multiplied_matrices(adj, list_feature_matrices) 45 | } 46 | -------------------------------------------------------------------------------- /lighthergm/man/gof_lighthergm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gof_lighthergm.R 3 | \name{gof_lighthergm} 4 | \alias{gof_lighthergm} 5 | \title{Goodness of fit statistics for HERGM} 6 | \usage{ 7 | gof_lighthergm( 8 | net, 9 | data_for_simulation, 10 | list_feature_matrices, 11 | colname_vertex_id, 12 | colname_block_membership, 13 | lighthergm_results, 14 | type = "full", 15 | ergm_control = ergm::control.simulate.formula(), 16 | seed = NULL, 17 | n_sim = 1, 18 | prevent_duplicate = TRUE, 19 | compute_geodesic_distance = FALSE, 20 | start_from_observed = FALSE, 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{net}{the target network} 26 | 27 | \item{data_for_simulation}{a dataframe with node-level covariates} 28 | 29 | \item{list_feature_matrices}{a list of feature adjacency matrices} 30 | 31 | \item{colname_vertex_id}{the name of the column that contains the node id} 32 | 33 | \item{colname_block_membership}{the name o the column that contains the block affiliation of each node} 34 | 35 | \item{lighthergm_results}{a lighthergm results object} 36 | 37 | \item{type}{the type of evaluation to perform. Can take the values \code{full} or \code{within}. \code{full} performs the evaluation on all edges, and \code{within} only considers within-block edges.} 38 | 39 | \item{ergm_control}{MCMC parameters as an instance of ergm.control} 40 | 41 | \item{seed}{the seed to be passed to simulate_hergm} 42 | 43 | \item{n_sim}{the number of simulations to employ for calculating goodness of fit} 44 | 45 | \item{prevent_duplicate}{see \code{simulate_hergm}} 46 | 47 | \item{compute_geodesic_distance}{if \code{TRUE}, the distribution of geodesic distances is also computed (considerably increases computation time on large networks. \code{FALSE} by default.)} 48 | 49 | \item{start_from_observed}{if \code{TRUE}, MCMC uses the observed network as a starting point} 50 | 51 | \item{...}{Additional arguments, to be passed to lower-level functions} 52 | } 53 | \description{ 54 | Goodness of fit statistics for HERGM 55 | } 56 | -------------------------------------------------------------------------------- /lighthergm/src/simulate_between_network.cpp: -------------------------------------------------------------------------------- 1 | #ifdef _OPENMP 2 | #include 3 | #else 4 | #define omp_get_max_threads() 0 5 | #endif 6 | #include 7 | // [[Rcpp::depends(RcppArmadillo)]] 8 | // [[Rcpp::plugins(openmp)]] 9 | 10 | // Function that simulates a between-block network. 11 | // The first element of `coef_between` must be the edges parameter. 12 | // [[Rcpp::export]] 13 | arma::sp_mat simulate_between_network 14 | (int numOfVertices, 15 | const Rcpp::List& list_feature_adjmat, 16 | const arma::vec& coef_between, 17 | const arma::vec& block_membership, 18 | bool directed 19 | ) 20 | { 21 | // Number of covariates 22 | int numOfCovariates = list_feature_adjmat.length(); 23 | // Initialize a sparse adjacency matrix for the between-block network 24 | arma::sp_mat between_adjmat = arma::sp_mat(numOfVertices, numOfVertices); 25 | // Prepare a sparse adjacency cube 26 | arma::field feature_cube(numOfCovariates); 27 | for (int p = 0; p < numOfCovariates; p++) { 28 | feature_cube(p) = Rcpp::as(list_feature_adjmat[p]); 29 | } 30 | // Necessary for R random number generator 31 | GetRNGstate(); 32 | 33 | #pragma omp parallel 34 | { 35 | // Simulate between-block links 36 | #pragma omp for 37 | for (int j = 0; j < numOfVertices; j++) { 38 | for (int i = 0; i < numOfVertices; i++) { 39 | // Skip as many unnecessary calculations as possible in this nested loop, which makes the computation faster. 40 | if (block_membership[i] != block_membership[j] && ((directed && i != j) || (!directed && i < j))) { 41 | double x = unif_rand(); 42 | double u = coef_between[0]; 43 | for (int p = 0; p < numOfCovariates; p++) { 44 | double elem = feature_cube(p)(i, j); 45 | double elem_coef = coef_between[p+1]; 46 | u += elem_coef * elem; 47 | } 48 | //std::printf("x: %f, Thread: %d, Loop: (%d, %d), u: %f\n", x, omp_get_thread_num(), i, j, u); 49 | if (u > log(x/(1-x))) { 50 | between_adjmat(i, j) = 1; 51 | } 52 | } 53 | } 54 | } 55 | } 56 | // This must be called after GetRNGstate before returning to R. 57 | PutRNGstate(); 58 | return between_adjmat; 59 | } 60 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-get_sparse_feature_adjmat_cpp.R: -------------------------------------------------------------------------------- 1 | test_that("creating a feature adjacency matrix works", { 2 | library(magrittr) 3 | 4 | get_sparse_feature_adjacency_matrix <- function(feature) { 5 | S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(length(feature), length(feature))) 6 | S <- as(S, "dgCMatrix") 7 | for (i in 1:length(feature)) { 8 | for (j in 1:length(feature)) { 9 | if (i != j) { 10 | if (feature[i] == feature[j]) { 11 | S[i, j] <- 1 12 | } 13 | } 14 | } 15 | } 16 | return(S) 17 | } 18 | 19 | # Number of nodes 20 | N <- 100 21 | 22 | # Features 23 | x <- sample(x = c(1:100), size = N, replace = TRUE) 24 | y <- sample(x = c(1:30), size = N, replace = TRUE) 25 | z <- sample(x = c(1:50), size = N, replace = TRUE) 26 | w <- sample(x = c(LETTERS, letters), size = N, replace = TRUE) 27 | 28 | # Create an edgelist 29 | edgelist <- 30 | tibble::tibble(tail = 1:N, head = 1:N) %>% 31 | tidyr::expand(tail, head) %>% 32 | dplyr::filter(tail < head) %>% 33 | dplyr::mutate(connect = unlist(as.integer(purrr::rbernoulli(n = nrow(.), p = 0.005)))) %>% 34 | dplyr::filter(connect == 1) %>% 35 | dplyr::select(tail, head) 36 | 37 | # Create a network object 38 | g <- network::network.initialize(n = N, directed = FALSE) 39 | network::add.edges(x = g, tail = edgelist$tail, head = edgelist$head) 40 | network::set.vertex.attribute(x = g, attrname = "x", value = x) 41 | network::set.vertex.attribute(x = g, attrname = "y", value = y) 42 | network::set.vertex.attribute(x = g, attrname = "z", value = z) 43 | network::set.vertex.attribute(x = g, attrname = "w", value = w) 44 | 45 | # Create a formula 46 | form <- g ~ edges + triangles + nodematch("x") + nodematch("y") + nodematch("z") + nodematch("w") 47 | 48 | # True list of feature adjacency metrices 49 | list_adjmat_true <- list( 50 | get_sparse_feature_adjacency_matrix(x), 51 | get_sparse_feature_adjacency_matrix(y), 52 | get_sparse_feature_adjacency_matrix(z), 53 | get_sparse_feature_adjacency_matrix(w) 54 | ) 55 | 56 | # Create a list 57 | list_adjmat <- get_list_sparse_feature_adjmat(network = g, formula = form) 58 | 59 | # Check if it works 60 | for (i in 1:4) { 61 | expect_equal(list_adjmat[[i]], list_adjmat_true[[i]], check.attributes = FALSE, tolerance = 1e-10) 62 | } 63 | }) 64 | -------------------------------------------------------------------------------- /lighthergm/R/estimate_between_param.R: -------------------------------------------------------------------------------- 1 | #' Estimate between-block parameters by logit 2 | #' @importFrom magrittr %>% 3 | #' @importFrom ergm ergm 4 | #' @param formula formula for estimating between-block parameters 5 | #' @param network network object 6 | #' @param block a vector that represents which node belongs to which node 7 | #' @export 8 | estimate_between_param <- function(formula, network, block) { 9 | # Create a data frame that stores node-block correspondences. 10 | df_block <- 11 | tibble::tibble( 12 | intergraph_id = 1:length(block), 13 | block = block 14 | ) 15 | 16 | # Create an edgelist for within-block connections 17 | within_block_link <- 18 | intergraph::asDF(network)$edges %>% 19 | dplyr::rename( 20 | tail = V1, 21 | head = V2 22 | ) %>% 23 | dplyr::left_join(., df_block, by = c("tail" = "intergraph_id")) %>% 24 | dplyr::select(tail, head, block) %>% 25 | dplyr::rename(block_tail = block) %>% 26 | dplyr::left_join(., df_block, by = c("head" = "intergraph_id")) %>% 27 | dplyr::select(tail, head, block_tail, block) %>% 28 | dplyr::rename(block_head = block) %>% 29 | dplyr::filter(block_tail == block_head) 30 | 31 | # Get edge id for within-block links 32 | within_block_link_eid <- 33 | unlist(network::get.dyads.eids( 34 | x = network, 35 | tails = within_block_link$tail, 36 | heads = within_block_link$head 37 | )) 38 | 39 | # Create a network for logit estimation 40 | g_logit <- network 41 | 42 | # Delete within-block links 43 | network::delete.edges(x = g_logit, eid = within_block_link_eid) 44 | 45 | # Create a formula that contains only dyad-independent terms. i.e. exclude externality terms like triangle. 46 | terms <- ergm::ergm_model(formula)$terms 47 | varnames <- 48 | statnet.common::list_rhs.formula(formula) %>% 49 | as.character() 50 | dep_terms <- 51 | terms %>% purrr::map(function(t) { 52 | dep <- t$dependence 53 | is_dep <- is.null(dep) || dep 54 | }) %>% unlist() 55 | between_rhs <- varnames[!dep_terms] 56 | between_formula <- as.formula(glue::glue("g_logit ~ {paste(between_rhs, collapse = '+')}")) 57 | 58 | # Estimate logit 59 | between_logit <- ergm( 60 | formula = between_formula, 61 | estimate = "MPLE" 62 | ) 63 | 64 | # Remove unnecessary network objects 65 | between_logit$newnetwork <- NULL 66 | 67 | # Return the output 68 | return(between_logit) 69 | } 70 | -------------------------------------------------------------------------------- /lighthergm/man/draw_between_block_connection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{draw_between_block_connection} 4 | \alias{draw_between_block_connection} 5 | \title{Draw between-block connections. There may be some edges that appear both in within- and between-block links. 6 | The overlapped edges will be removed after this step.} 7 | \usage{ 8 | draw_between_block_connection( 9 | formula_for_simulation, 10 | sorted_dataframe, 11 | coef_between_block, 12 | seed_edgelist_between = NULL, 13 | use_fast_between_simulation = FALSE, 14 | list_feature_matrices = NULL, 15 | seed = NULL, 16 | n_sim = 1, 17 | prevent_duplicate = TRUE, 18 | verbose = 0, 19 | ergm_control = ergm::control.simulate.formula(), 20 | output = "edgelist", 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{formula_for_simulation}{formula for simulating a between-block network} 26 | 27 | \item{sorted_dataframe}{a data frame generated by \code{sort_block_membership}} 28 | 29 | \item{coef_between_block}{a vector of between-block parameters. The order of the parameters should match that of the formula.} 30 | 31 | \item{seed_edgelist_between}{a seed edgelist from which a between-block network is simulated.} 32 | 33 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network. 34 | If the network is very large, you should consider using this option. 35 | Note that when you use this, the first element of \code{coef_between_block} must be the edges parameter.} 36 | 37 | \item{list_feature_matrices}{a list of feature adjacency matrices. This is used when \code{use_fast_between_simulation}.} 38 | 39 | \item{seed}{seed value (integer) for the random number generator.} 40 | 41 | \item{n_sim}{number of networks generated.} 42 | 43 | \item{prevent_duplicate}{If \code{TRUE}, the coefficient on nodematch("block") is set to be a very large negative number in drawing between-block links, 44 | so that there will be (almost) no within-block links.} 45 | 46 | \item{verbose}{If this is TRUE/1, the program will print out additionalinformation about the progress of simulation.} 47 | 48 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation} 49 | 50 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.} 51 | 52 | \item{...}{Additional arguments, to be passed to lower-level functions} 53 | } 54 | \description{ 55 | Draw between-block connections. There may be some edges that appear both in within- and between-block links. 56 | The overlapped edges will be removed after this step. 57 | } 58 | -------------------------------------------------------------------------------- /.github/workflows/check-standard.yml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: 7 | - main 8 | - develop 9 | pull_request: 10 | branches: 11 | - main 12 | - develop 13 | 14 | name: R-CMD-check 15 | 16 | jobs: 17 | R-CMD-check: 18 | runs-on: ${{ matrix.config.os }} 19 | 20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 21 | 22 | strategy: 23 | fail-fast: false 24 | matrix: 25 | config: 26 | #- {os: windows-latest, r: 'release'} 27 | - {os: macOS-latest, r: 'release'} 28 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | RSPM: ${{ matrix.config.rspm }} 33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | steps: 36 | - uses: actions/checkout@v2 37 | 38 | - uses: r-lib/actions/setup-r@v1 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | 42 | - uses: r-lib/actions/setup-pandoc@v1 43 | 44 | - name: Query dependencies 45 | run: | 46 | install.packages('remotes') 47 | saveRDS(remotes::dev_package_deps("lighthergm", dependencies = TRUE), ".github/depends.Rds", version = 2) 48 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 49 | shell: Rscript {0} 50 | 51 | - name: Restore R package cache 52 | uses: actions/cache@v2 53 | with: 54 | path: ${{ env.R_LIBS_USER }} 55 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 56 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 57 | 58 | - name: Install system dependencies 59 | if: runner.os == 'Linux' 60 | run: | 61 | while read -r cmd 62 | do 63 | eval sudo $cmd 64 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04", "lighthergm"))') 65 | 66 | - name: Install dependencies 67 | run: | 68 | remotes::install_deps("lighthergm", dependencies = TRUE) 69 | remotes::install_cran("rcmdcheck") 70 | shell: Rscript {0} 71 | 72 | #- name: Install the binary version of igraph (for Windows) 73 | # if: runner.os == 'Windows' 74 | # run: | 75 | # install.packages("igraph", type = "binary") 76 | # shell: Rscript {0} 77 | 78 | - name: Check 79 | env: 80 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 81 | run: | 82 | options(crayon.enabled = TRUE) 83 | rcmdcheck::rcmdcheck("lighthergm", args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 84 | shell: Rscript {0} 85 | 86 | - name: Upload check results 87 | if: failure() 88 | uses: actions/upload-artifact@main 89 | with: 90 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 91 | path: check 92 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-cache.R: -------------------------------------------------------------------------------- 1 | set.seed(334) 2 | 3 | # Simulate a random network for testing 4 | simulate_network <- function(){ 5 | N <- 1000 6 | K <- 50 7 | memb <- rep(1:K, each = N / K) 8 | x <- sample(1:10, size = N, replace = TRUE) 9 | y <- sample(1:10, size = N, replace = TRUE) 10 | list_within_params <- c(-1, 1, 1, 0.5) 11 | list_between_params <- c(-3.5, 0.5, 0.5) 12 | 13 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle 14 | 15 | vertex_id <- 1:N 16 | 17 | df <- tibble::tibble( 18 | id = vertex_id, 19 | memb = memb, 20 | x = x, 21 | y = y 22 | ) 23 | 24 | simulate_hergm( 25 | formula_for_simulation = formula, 26 | data_for_simulation = df, 27 | colname_vertex_id = "id", 28 | colname_block_membership = "memb", 29 | coef_within_block = list_within_params, 30 | coef_between_block = list_between_params, 31 | ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000), 32 | seed = 1, 33 | n_sim = 1, 34 | directed = FALSE, 35 | output = "network" 36 | ) 37 | } 38 | 39 | # Function that checks that the number of files in the directory is the expected number 40 | check_files <- function(dir, expected_number){ 41 | expect_equal(length(list.files(dir, '*.rds')), expected_number) 42 | } 43 | 44 | cleanup <- function(){ 45 | do.call(file.remove, list(list.files(tempdir(), '*.rds', full.names = TRUE))) 46 | } 47 | 48 | test_that('Estimation with a disk cache stores data in the correct directory', { 49 | on.exit(cleanup()) 50 | 51 | # Use this directory for caching in disk 52 | dir <- tempdir() 53 | 54 | # Simulate a network 55 | g_1 <- simulate_network() 56 | 57 | # There should be no cached files in the directory 58 | check_files(dir, 0) 59 | 60 | # Perform estimation 61 | lighthergm::hergm( 62 | object = g_1 ~ edges + nodematch("x"), 63 | n_clusters = 20, 64 | n_em_step_max = 3, 65 | initialization_method = 1, 66 | clustering_with_features = TRUE, 67 | verbose=2, 68 | cache = cachem::cache_disk(dir) 69 | ) 70 | 71 | # The estimation should have stored one RDS object in the cache directory. 72 | check_files(dir, 1) 73 | 74 | lighthergm::hergm( 75 | object = g_1 ~ edges + nodematch("x"), 76 | n_clusters = 20, 77 | n_em_step_max = 3, 78 | initialization_method = 1, 79 | clustering_with_features = TRUE, 80 | verbose=2, 81 | cache = cachem::cache_disk(dir) 82 | ) 83 | 84 | # Running again the estimation on the same network should reuse the previously stored RDS object 85 | # and not store a new one. 86 | check_files(dir, 1) 87 | 88 | # Generate a different network 89 | g_2 <- simulate_network() 90 | 91 | # Perform estimation on the new network. 92 | lighthergm::hergm( 93 | object = g_2 ~ edges + nodematch("x"), 94 | n_clusters = 20, 95 | n_em_step_max = 3, 96 | initialization_method = 1, 97 | clustering_with_features = TRUE, 98 | verbose=2, 99 | cache = cachem::cache_disk(dir) 100 | ) 101 | 102 | # The network has changed, so the previously cached RDS is not reused and a new cache file is generated. 103 | check_files(dir, 2) 104 | 105 | }) 106 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-start-from-given-cluster.R: -------------------------------------------------------------------------------- 1 | test_that("starting EM iterations and parameter estimation from a given vector of block memberships works", { 2 | set.seed(334) 3 | # Simulate a network to work with in this unit test. 4 | # Number of nodes 5 | N <- 1000 6 | # Number of blocks 7 | K <- 50 8 | # Block memberships (same block size) 9 | memb <- rep(1:K, each = N / K) 10 | # Covariates 11 | x <- sample(1:10, size = N, replace = TRUE) 12 | y <- sample(1:10, size = N, replace = TRUE) 13 | 14 | # Within-block parameters: edges, nodematch("x"), nodematch("y"), triangle 15 | list_within_params <- c(-1, 1, 1, 0.5) 16 | # Between-block parameters: edges, nodematch("x"), nodematch("y") 17 | list_between_params <- c(-3.5, 0.5, 0.5) 18 | 19 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle 20 | 21 | vertex_id <- 1:N 22 | 23 | df <- tibble::tibble( 24 | id = vertex_id, 25 | memb = memb, 26 | x = x, 27 | y = y 28 | ) 29 | 30 | g_sim <- 31 | simulate_hergm( 32 | formula_for_simulation = formula, 33 | data_for_simulation = df, 34 | colname_vertex_id = "id", 35 | colname_block_membership = "memb", 36 | coef_within_block = list_within_params, 37 | coef_between_block = list_between_params, 38 | ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000), 39 | seed = 1, 40 | n_sim = 1, 41 | directed = FALSE, 42 | output = "network" 43 | ) 44 | 45 | # Conduct clustering 46 | cluster_with_feature <- 47 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 48 | n_clusters = K, 49 | estimate_parameters = FALSE, 50 | verbose = 0, 51 | n_em_step_max = 3, 52 | initialization_method = 3, 53 | infomap_python = FALSE, 54 | clustering_with_features = TRUE, 55 | check_alpha_update = TRUE, 56 | compute_pi = TRUE, 57 | check_lower_bound = TRUE, 58 | check_block_membership = TRUE, 59 | seeds = 334 60 | ) 61 | 62 | # Check if starting from the previously estimated block memberships works. 63 | expect_error(result <- 64 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 65 | initialized_cluster_data = cluster_with_feature$partition, 66 | n_em_step_max = 2, 67 | estimate_parameters = FALSE 68 | ), NA) 69 | 70 | # Check if starting from block memberships initialized Python's infomap works. 71 | expect_error(result2 <- 72 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 73 | initialized_cluster_data = system.file("extdata", "initialized_cluster_data_by_infomap.clu", package = "lighthergm"), 74 | n_em_step_max = 1, 75 | estimate_parameters = FALSE, 76 | verbose = 1 77 | ), NA) 78 | 79 | # Check if starting paramter estimation from a given vector of block memberships works. 80 | expect_error(result3 <- 81 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 82 | block_membership = result$partition, 83 | verbose = 1 84 | ), NA) 85 | 86 | # Check if not specifying n_clusters when initialized_cluster_data and block_membership are null yields an error. 87 | expect_error(result4 <- 88 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 89 | verbose = 1 90 | )) 91 | }) 92 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_quadratic_term.R: -------------------------------------------------------------------------------- 1 | test_that("quadratic term calculation without features works", { 2 | # Number of nodes 3 | N <- 12 4 | # Number of clusters 5 | K <- 3 6 | # Create an adjacency matrix 7 | edgelist <- 8 | tibble::tibble( 9 | tail = 1:N, 10 | head = 1:N 11 | ) %>% 12 | tidyr::expand(tail, head) %>% 13 | dplyr::filter(tail < head) %>% 14 | dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>% 15 | dplyr::filter(connect == 1) 16 | 17 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 18 | adj <- network::as.matrix.network.adjacency(net) 19 | adj <- as(adj, "dgCMatrix") 20 | 21 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 22 | tau <- 23 | matrix(c( 24 | 0.2, 0.5, 0.3, 25 | 0.4, 0.4, 0.2, 26 | 0.1, 0.4, 0.5, 27 | 0.4, 0.4, 0.2, 28 | 0.1, 0.1, 0.8, 29 | 0.05, 0.05, 0.9, 30 | 0.8, 0.1, 0.1, 31 | 0.3, 0.4, 0.3, 32 | 0.1, 0.8, 0.1, 33 | 0.5, 0.4, 0.1, 34 | 0.3, 0.3, 0.4, 35 | 0.8, 0.1, 0.1 36 | ), 37 | nrow = K, ncol = N 38 | ) 39 | tau <- t(tau) 40 | 41 | # Create a K x K matrix whose (k, l) element represents Pr(D_ij = 1 | Z_i = k, Z_j = l). 42 | sumTaus <- compute_sumTaus(N, K, tau) 43 | pi <- (t(tau) %*% adj %*% tau) / sumTaus 44 | 45 | # Compute gamma (parameter of multinomial distribution) 46 | alpha <- colSums(tau) 47 | 48 | # Compute the true quadratic term in a naive way 49 | A <- matrix(0, nrow = N, ncol = K) 50 | 51 | for (i in 1:N) { 52 | for (k in 1:K) { 53 | for (j in 1:N) { 54 | if (i != j) { 55 | for (l in 1:K) { 56 | pi_ij <- pi 57 | # When D_ij = 0, we must use 1 - pi. 58 | if (adj[i, j] == 0) { 59 | pi_ij <- 1 - pi 60 | } 61 | a_ij <- tau[j, l] * log(pi_ij[k, l]) 62 | A[i, k] <- A[i, k] + a_ij 63 | } 64 | } 65 | } 66 | } 67 | } 68 | 69 | A <- 1 - A / 2 70 | 71 | # Divide A by alpha_{ik} 72 | A <- A / tau 73 | 74 | A_cpp <- compute_quadratic_term(N, K, alpha, tau, adj, LB = 0) 75 | 76 | # Check if computation works as expected 77 | expect_equal(A, A_cpp, check.attributes = FALSE, tolerance = 1e-10) 78 | 79 | # Check if Michael's formula is correct 80 | # Compute the first term 81 | A_true <- 0 82 | 83 | for (i in 1:N) { 84 | for (j in i:N) { 85 | if (i != j) { 86 | for (k in 1:K) { 87 | for (l in 1:K) { 88 | pi_ij <- pi 89 | # When D_ij = 0, we must use 1 - pi. 90 | if (adj[i, j] == 0) { 91 | pi_ij <- 1 - pi 92 | } 93 | A_true <- A_true + tau[i, k]^2 * tau[j, l] * log(pi_ij[k, l]) / (2 * tau[i, k]) + tau[j, l]^2 * tau[i, k] * log(pi_ij[k, l]) / (2 * tau[j, l]) 94 | } 95 | } 96 | } 97 | } 98 | } 99 | 100 | A <- 0 101 | 102 | for (i in 1:N) { 103 | for (k in 1:K) { 104 | for (j in 1:N) { 105 | if (i != j) { 106 | for (l in 1:K) { 107 | pi_ij <- pi 108 | # When D_ij = 0, we must use 1 - pi. 109 | if (adj[i, j] == 0) { 110 | pi_ij <- 1 - pi 111 | } 112 | A <- A + tau[i, k]^2 * tau[j, l] * log(pi_ij[k, l]) / (2 * tau[i, k]) 113 | } 114 | } 115 | } 116 | } 117 | } 118 | 119 | # Check if they are the same 120 | expect_equal(A, A_true, check.attributes = FALSE, tolerance = 1e-10) 121 | }) 122 | -------------------------------------------------------------------------------- /lighthergm/src/helper_function.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | // [[Rcpp::export]] 5 | arma::vec decimal_to_binary_vector(int decimal, 6 | int vec_length) { 7 | int n = decimal; 8 | arma::vec output(vec_length); 9 | 10 | for (int i = 0; i < vec_length; i++) { 11 | output[i] = n % 2; 12 | n = n/2; 13 | } 14 | return output; 15 | } 16 | 17 | // Summing up a matrix by row 18 | arma::rowvec sumDoubleMatrixByRow(const arma::mat& matrix) { 19 | arma::rowvec vector = arma::sum(matrix, 0); 20 | return vector; 21 | } 22 | 23 | 24 | // Normalizing tau 25 | void normalizeTau(arma::mat& tau, 26 | double minValue) { 27 | int numOfVertices = tau.n_rows; 28 | int numOfClasses = tau.n_cols; 29 | // normalize 30 | for (int i = 0; i < numOfVertices; i++) { 31 | double denominator = 0; 32 | for (int k = 0; k < numOfClasses; k++) { 33 | denominator += tau(i, k); 34 | } 35 | bool again = false; 36 | for (int k = 0; k < numOfClasses; k++) { 37 | tau(i, k) /= denominator; 38 | if (tau(i, k) < minValue) { 39 | tau(i, k) = minValue; 40 | again = true; 41 | } 42 | } 43 | if (again) { 44 | denominator = 0; 45 | for (int k = 0; k < numOfClasses; k++) { 46 | denominator += tau(i, k); 47 | } 48 | for (int k = 0; k < numOfClasses; k++) 49 | tau(i, k) /= denominator; 50 | } 51 | } 52 | } 53 | 54 | 55 | // [[Rcpp::export]] 56 | arma::mat compute_sumTaus(int numOfVertices, 57 | int numOfClasses, 58 | const arma::mat& tau, 59 | int verbose = 0) { 60 | 61 | if (verbose >= 5) { 62 | Rcpp::Rcout << "find_sumTaus: sum by row"; 63 | } 64 | arma::rowvec tauL = sumDoubleMatrixByRow(tau); 65 | 66 | if (verbose >= 5) { 67 | Rcpp::Rcout << "find_sumTaus: calculating sumTaus"; 68 | } 69 | arma::mat sumTaus = tau.t() * -(tau.each_row() - tauL); // Check computation speed here 70 | 71 | if (verbose >= 5) { 72 | Rcpp::Rcout << "find_sumTaus: returning"; 73 | } 74 | 75 | return sumTaus; 76 | } 77 | 78 | // A naive implementation of quadratic coefficient computation 79 | // [[Rcpp::export]] 80 | arma::mat compute_quadratic_term_naive(int numOfVertices, 81 | int numOfClasses, 82 | const arma::mat& pi, 83 | const arma::mat& tau, 84 | const arma::sp_mat& network) { 85 | arma::mat pi1 = pi; 86 | arma::mat pi0 = 1 - pi; 87 | arma::mat logPi0 = arma::log(pi0); 88 | arma::mat logPi1 = arma::log(pi1); 89 | 90 | arma::mat A(numOfVertices, numOfClasses); 91 | A.zeros(); 92 | for (int i = 0; i < numOfVertices; i++) { 93 | for (int k = 0; k < numOfClasses; k++) { 94 | for (int j = 0; j < numOfVertices; j++) { 95 | if (i != j) { 96 | for (int l = 0; l < numOfClasses; l++) { 97 | if (network(i, j) == 0 ) { 98 | A(i, k) += tau(j, l) * logPi0(k, l); 99 | } else { 100 | A(i, k) += tau(j, l) * logPi1(k, l); 101 | } 102 | } 103 | } 104 | } 105 | } 106 | } 107 | 108 | // Finalize by subtracting half of from 1 dividing tau_{ik} 109 | for (int i = 0; i < numOfVertices; i++) { 110 | for (int k = 0; k < numOfClasses; k++) { 111 | // In theory, A(i, k) must be negative or 0. 112 | if (A(i, k) > 0) { // In reality, A(i, k) can be greater than 0 because of numerical precision. 113 | A(i, k) = 0; // Therefore, we cut it off to 0 in this case 114 | } 115 | A(i, k) = 1 - A(i, k) / 2; 116 | A(i, k) /= tau(i, k); 117 | } 118 | } 119 | return A; 120 | } 121 | 122 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_pi_with_features.R: -------------------------------------------------------------------------------- 1 | test_that("computing pi with features works", { 2 | # Number of nodes 3 | N <- 12 4 | # Number of clusters 5 | K <- 3 6 | 7 | # Create an adjacency matrix 8 | edgelist <- 9 | tibble::tibble( 10 | tail = 1:N, 11 | head = 1:N 12 | ) %>% 13 | tidyr::expand(tail, head) %>% 14 | dplyr::filter(tail < head) %>% 15 | dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>% 16 | dplyr::filter(connect == 1) 17 | 18 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 19 | adj <- network::as.matrix.network.adjacency(net) 20 | adj <- as(adj, "dgCMatrix") 21 | 22 | # Create feature matrices 23 | x <- as.integer(unlist(purrr::rbernoulli(n = N))) 24 | S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 25 | S <- as(S, "dgCMatrix") 26 | for (i in 1:N) { 27 | for (j in 1:N) { 28 | if (i != j) { 29 | s_ij <- ifelse(x[i] == x[j], 1, 0) 30 | S[i, j] <- s_ij 31 | } 32 | } 33 | } 34 | 35 | y <- as.integer(unlist(purrr::rbernoulli(n = N))) 36 | V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 37 | V <- as(V, "dgCMatrix") 38 | for (i in 1:N) { 39 | for (j in 1:N) { 40 | if (i != j) { 41 | v_ij <- ifelse(y[i] == y[j], 1, 0) 42 | V[i, j] <- v_ij 43 | } 44 | } 45 | } 46 | 47 | z <- as.integer(unlist(purrr::rbernoulli(n = N))) 48 | W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 49 | W <- as(W, "dgCMatrix") 50 | for (i in 1:N) { 51 | for (j in 1:N) { 52 | if (i != j) { 53 | w_ij <- ifelse(z[i] == z[j], 1, 0) 54 | W[i, j] <- w_ij 55 | } 56 | } 57 | } 58 | 59 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 60 | tau <- 61 | matrix(c( 62 | 0.2, 0.5, 0.3, 63 | 0.4, 0.4, 0.2, 64 | 0.1, 0.4, 0.5, 65 | 0.4, 0.4, 0.2, 66 | 0.1, 0.1, 0.8, 67 | 0.05, 0.05, 0.9, 68 | 0.8, 0.1, 0.1, 69 | 0.3, 0.4, 0.3, 70 | 0.1, 0.8, 0.1, 71 | 0.5, 0.4, 0.1, 72 | 0.3, 0.3, 0.4, 73 | 0.8, 0.1, 0.1 74 | ), 75 | nrow = K, ncol = N 76 | ) 77 | tau <- t(tau) 78 | 79 | ########################################################### 80 | # Compute the true quadratic term in a naive way 81 | ########################################################### 82 | # Compute pi for D_ij = 1 83 | minPi <- 1e-4 84 | list_pi <- list() 85 | for (w in 0:1) { 86 | for (v in 0:1) { 87 | for (s in 0:1) { 88 | print(glue::glue("Compute pi for pi_s{s}v{v}w{w}")) 89 | denom <- matrix(0, nrow = K, ncol = K) 90 | num <- matrix(0, nrow = K, ncol = K) 91 | index <- s + 2 * v + 4 * w + 1 92 | print(index) 93 | for (k in 1:K) { 94 | for (l in 1:K) { 95 | for (i in 1:N) { 96 | for (j in 1:N) { 97 | if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 98 | denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l] 99 | } 100 | if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 101 | num[k, l] <- num[k, l] + tau[i, k] * tau[j, l] 102 | } 103 | } 104 | } 105 | } 106 | } 107 | pi <- num / denom 108 | # Remove extremely small elements in pi 109 | for (k in 1:K) { 110 | for (l in 1:K) { 111 | if (pi[k, l] < minPi) { 112 | pi[k, l] <- minPi 113 | } 114 | } 115 | } 116 | list_pi[[index]] <- pi 117 | } 118 | } 119 | } 120 | 121 | list_feature_adjmat <- list(S, V, W) 122 | list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat) 123 | expect_equal(compute_pi_with_features(N, K, list_multiplied_feature_adjmat, tau), list_pi, tolerance = 1e-10) 124 | }) 125 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-estimate-params.R: -------------------------------------------------------------------------------- 1 | set.seed(334) 2 | # Prepare data 3 | edgelist <- 4 | tibble::tribble( 5 | ~head, ~tail, 6 | 1, 9, 7 | 2, 6, 8 | 2, 7, 9 | 2, 9, 10 | 3, 5, 11 | 3, 9, 12 | 4, 7, 13 | 4, 11, 14 | 4, 15, 15 | 5, 11, 16 | 5, 15, 17 | 7, 8, 18 | 7, 16, 19 | 9, 13, 20 | 9, 14, 21 | 9, 16, 22 | 10, 14, 23 | 11, 15, 24 | 13, 15, 25 | 13, 16 26 | ) 27 | edgelist <- 28 | as.matrix(edgelist) 29 | attr(edgelist, "n") <- 16 30 | attr(edgelist, "vnames") <- 31 | c( 32 | "Acciaiuoli", "Albizzi", "Barbadori", "Bischeri", "Castellani", "Ginori", 33 | "Guadagni", "Lamberteschi", "Medici", "Pazzi", "Peruzzi", "Pucci", "Ridolfi", 34 | "Salviati", "Strozzi", "Tornabuoni" 35 | ) 36 | attr(edgelist, "directed") <- FALSE 37 | attr(edgelist, "bipartite") <- FALSE 38 | attr(edgelist, "loops") <- FALSE 39 | attr(edgelist, "class") <- c("edgelist", "matrix") 40 | 41 | g <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 42 | 43 | x1 <- as.integer(unlist(purrr::rbernoulli(n = g$gal$n))) 44 | network::set.vertex.attribute(x = g, attrname = "x1", value = x1) 45 | 46 | # Cluster 47 | z_memb <- rep(1:4, each = 4) 48 | network::set.vertex.attribute(x = g, attrname = "block", value = z_memb) 49 | 50 | # Create dataset for test 51 | g_link <- intergraph::asDF(g)$edges 52 | g_attr <- intergraph::asDF(g)$vertexes 53 | 54 | df_g <- 55 | tibble::tibble( 56 | head = 1:g$gal$n, 57 | tail = 1:g$gal$n 58 | ) %>% 59 | tidyr::expand(tail, head) %>% 60 | dplyr::filter(tail < head) %>% 61 | dplyr::left_join(., g_attr, by = c("tail" = "intergraph_id")) %>% 62 | dplyr::left_join(., g_attr, by = c("head" = "intergraph_id")) %>% 63 | dplyr::mutate( 64 | nodematch.x1 = ifelse(x1.x == x1.y, 1, 0), 65 | same_block = ifelse(block.x == block.y, 1, 0) 66 | ) %>% 67 | dplyr::select(tail, head, nodematch.x1:same_block) %>% 68 | dplyr::left_join(., g_link, by = c("tail" = "V1", "head" = "V2")) %>% 69 | dplyr::mutate(connected = ifelse(is.na(na), 0, 1)) %>% 70 | dplyr::select(-na) 71 | 72 | # Estimate the model 73 | formula <- g ~ edges + nodematch("x1") + triangle + kstar(2) 74 | est_between <- estimate_between_param( 75 | formula = formula, 76 | network = g, 77 | block = z_memb 78 | ) 79 | 80 | test_that("estimating between-block parameters by logit works", { 81 | # Check if between-block connections are all zero. 82 | g_logit <- est_between$network 83 | edgelist <- intergraph::asDF(g_logit)$edges 84 | 85 | true_edgelist <- 86 | df_g %>% 87 | dplyr::filter(same_block == 0 & connected == 1) %>% 88 | dplyr::select(tail, head) %>% 89 | dplyr::arrange(tail, head) 90 | 91 | # Does it work!!!? 92 | expect_equal(edgelist$V1, true_edgelist$tail) 93 | expect_equal(edgelist$V2, true_edgelist$head) 94 | 95 | # Check if estimates for between-block parameters are the same. 96 | param_est <- stats::coef(est_between) 97 | logit_true <- glm( 98 | formula = connected ~ nodematch.x1, 99 | data = df_g %>% dplyr::mutate(connected = ifelse(same_block == 1, 0, connected)), 100 | family = "binomial" 101 | ) 102 | 103 | param_est_true <- stats::coef(logit_true) 104 | 105 | # Does it work? 106 | expect_equal(param_est, param_est_true, check.attributes = FALSE, tolerance = 1e-7) 107 | 108 | # Check if within-block parameter estiamtion works 109 | expect_error(estimate_within_params( 110 | formula = formula, 111 | network = g, 112 | z_memb = z_memb, 113 | parallel = FALSE, 114 | verbose = 0, 115 | initial_estimate = NULL, 116 | seeds = NULL, 117 | method_second_step = "MPLE" 118 | ), NA) 119 | }) 120 | 121 | test_that("estimating between-block parameters using a formula without externality terms works", { 122 | # Check if within-block parameter estiamtion works 123 | expect_error( 124 | estimate_between_param( 125 | formula = g ~ edges + nodematch("x1"), 126 | network = g, 127 | block = z_memb 128 | ), 129 | NA) 130 | }) 131 | -------------------------------------------------------------------------------- /lighthergm/R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | eigenvectors_sparse <- function(X, n_vec) { 5 | .Call('_lighthergm_eigenvectors_sparse', PACKAGE = 'lighthergm', X, n_vec) 6 | } 7 | 8 | #' Compute Yule's Φ-coefficient 9 | #' @param z_star a true block membership 10 | #' @param z an estimated block membership 11 | #' @export 12 | compute_yule_coef <- function(z_star, z) { 13 | .Call('_lighthergm_compute_yule_coef', PACKAGE = 'lighthergm', z_star, z) 14 | } 15 | 16 | get_sparse_feature_adjmat <- function(x) { 17 | .Call('_lighthergm_get_sparse_feature_adjmat', PACKAGE = 'lighthergm', x) 18 | } 19 | 20 | get_sparse_feature_adjmat_from_string <- function(x) { 21 | .Call('_lighthergm_get_sparse_feature_adjmat_from_string', PACKAGE = 'lighthergm', x) 22 | } 23 | 24 | get_matrix_for_denominator <- function(numOfVertices, list_feature_adjmat) { 25 | .Call('_lighthergm_get_matrix_for_denominator', PACKAGE = 'lighthergm', numOfVertices, list_feature_adjmat) 26 | } 27 | 28 | get_elementwise_multiplied_matrices <- function(adjmat, list_feature_adjmat) { 29 | .Call('_lighthergm_get_elementwise_multiplied_matrices', PACKAGE = 'lighthergm', adjmat, list_feature_adjmat) 30 | } 31 | 32 | decimal_to_binary_vector <- function(decimal, vec_length) { 33 | .Call('_lighthergm_decimal_to_binary_vector', PACKAGE = 'lighthergm', decimal, vec_length) 34 | } 35 | 36 | compute_sumTaus <- function(numOfVertices, numOfClasses, tau, verbose = 0L) { 37 | .Call('_lighthergm_compute_sumTaus', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, tau, verbose) 38 | } 39 | 40 | compute_quadratic_term_naive <- function(numOfVertices, numOfClasses, pi, tau, network) { 41 | .Call('_lighthergm_compute_quadratic_term_naive', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, pi, tau, network) 42 | } 43 | 44 | compute_linear_term <- function(numOfVertices, numOfClasses, alpha, tau, LB) { 45 | .Call('_lighthergm_compute_linear_term', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, LB) 46 | } 47 | 48 | compute_pi <- function(numOfVertices, numOfClasses, stat, tau) { 49 | .Call('_lighthergm_compute_pi', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, stat, tau) 50 | } 51 | 52 | compute_quadratic_term <- function(numOfVertices, numOfClasses, alpha, tau, network, LB, verbose = 0L) { 53 | .Call('_lighthergm_compute_quadratic_term', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, network, LB, verbose) 54 | } 55 | 56 | run_EM_without_features <- function(numOfVertices, numOfClasses, alpha, tau, network, verbose = 0L) { 57 | .Call('_lighthergm_run_EM_without_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, tau, network, verbose) 58 | } 59 | 60 | compute_denominator_for_pi_d1x0 <- function(numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose) { 61 | .Call('_lighthergm_compute_denominator_for_pi_d1x0', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose) 62 | } 63 | 64 | compute_pi_d1x0 <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose) { 65 | .Call('_lighthergm_compute_pi_d1x0', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose) 66 | } 67 | 68 | compute_quadratic_term_with_features <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose = 0L) { 69 | .Call('_lighthergm_compute_quadratic_term_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose) 70 | } 71 | 72 | compute_pi_with_features <- function(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau) { 73 | .Call('_lighthergm_compute_pi_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau) 74 | } 75 | 76 | run_EM_with_features <- function(numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose = 0L) { 77 | .Call('_lighthergm_run_EM_with_features', PACKAGE = 'lighthergm', numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose) 78 | } 79 | 80 | simulate_between_network <- function(numOfVertices, list_feature_adjmat, coef_between, block_membership, directed) { 81 | .Call('_lighthergm_simulate_between_network', PACKAGE = 'lighthergm', numOfVertices, list_feature_adjmat, coef_between, block_membership, directed) 82 | } 83 | 84 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_pi_d1x0.R: -------------------------------------------------------------------------------- 1 | test_that("computing pi_d1x0 works", { 2 | rm(list = ls()) 3 | # Number of nodes 4 | N <- 12 5 | # Number of clusters 6 | K <- 3 7 | 8 | # Create an adjacency matrix 9 | edgelist <- 10 | tibble::tibble( 11 | tail = 1:N, 12 | head = 1:N 13 | ) %>% 14 | tidyr::expand(tail, head) %>% 15 | dplyr::filter(tail < head) %>% 16 | dplyr::mutate(connect = rep(0:1, nrow(.) / 2)) %>% 17 | dplyr::filter(connect == 1) 18 | 19 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 20 | adj <- network::as.matrix.network.adjacency(net) 21 | adj <- as(adj, "dgCMatrix") 22 | 23 | # Create feature matrices 24 | x <- as.integer(unlist(purrr::rbernoulli(n = N))) 25 | S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 26 | S <- as(S, "dgCMatrix") 27 | for (i in 1:N) { 28 | for (j in 1:N) { 29 | if (i != j) { 30 | s_ij <- ifelse(x[i] == x[j], 1, 0) 31 | S[i, j] <- s_ij 32 | } 33 | } 34 | } 35 | 36 | y <- as.integer(unlist(purrr::rbernoulli(n = N))) 37 | V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 38 | V <- as(V, "dgCMatrix") 39 | for (i in 1:N) { 40 | for (j in 1:N) { 41 | if (i != j) { 42 | v_ij <- ifelse(y[i] == y[j], 1, 0) 43 | V[i, j] <- v_ij 44 | } 45 | } 46 | } 47 | 48 | z <- as.integer(unlist(purrr::rbernoulli(n = N))) 49 | W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 50 | W <- as(W, "dgCMatrix") 51 | for (i in 1:N) { 52 | for (j in 1:N) { 53 | if (i != j) { 54 | w_ij <- ifelse(z[i] == z[j], 1, 0) 55 | W[i, j] <- w_ij 56 | } 57 | } 58 | } 59 | 60 | 61 | 62 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 63 | tau <- 64 | matrix(c( 65 | 0.2, 0.5, 0.3, 66 | 0.4, 0.4, 0.2, 67 | 0.1, 0.4, 0.5, 68 | 0.4, 0.4, 0.2, 69 | 0.1, 0.1, 0.8, 70 | 0.05, 0.05, 0.9, 71 | 0.8, 0.1, 0.1, 72 | 0.3, 0.4, 0.3, 73 | 0.1, 0.8, 0.1, 74 | 0.5, 0.4, 0.1, 75 | 0.3, 0.3, 0.4, 76 | 0.8, 0.1, 0.1 77 | ), 78 | nrow = K, ncol = N 79 | ) 80 | tau <- t(tau) 81 | 82 | # Compute the true denominator 83 | one <- matrix(1, nrow = N, ncol = N) 84 | mat <- (one - S) * (one - V) * (one - W) 85 | diag(mat) <- 0 86 | denom_for_pi0_true <- t(tau) %*% mat %*% tau 87 | denom_for_pi0_true <- as.matrix(denom_for_pi0_true) 88 | 89 | # Compute the true denominator in a naive way 90 | denom_for_pi0_naive <- matrix(0, nrow = K, ncol = K) 91 | for (k in 1:K) { 92 | for (l in 1:K) { 93 | for (i in 1:N) { 94 | for (j in 1:N) { 95 | if (i != j & S[i, j] == 0 & V[i, j] == 0 & W[i, j] == 0) { 96 | denom_for_pi0_naive[k, l] <- denom_for_pi0_naive[k, l] + tau[i, k] * tau[j, l] 97 | } 98 | } 99 | } 100 | } 101 | } 102 | 103 | # Check if they are the same. This verifies that the formula is correct. 104 | expect_equal(denom_for_pi0_true, denom_for_pi0_naive, check.attributes = FALSE, tolerance = 1e-10) 105 | 106 | # Compute the denominator using the c++ function 107 | denom <- get_matrix_for_denominator(N, list(S, V, W)) 108 | denom_for_pi0 <- compute_denominator_for_pi_d1x0(N, K, denom, tau, verbose = 0) 109 | 110 | # Check if the computed matrix is correct. 111 | expect_equal(denom_for_pi0, denom_for_pi0_naive, check.attributes = FALSE, tolerance = 1e-10) 112 | 113 | 114 | # Compute true pi1 in a naive way 115 | pi1 <- matrix(0, nrow = K, ncol = K) 116 | for (k in 1:K) { 117 | for (l in 1:K) { 118 | for (i in 1:N) { 119 | for (j in 1:N) { 120 | if (i != j & adj[i, j] == 1 & S[i, j] == 0 & V[i, j] == 0 & W[i, j] == 0) { 121 | pi1[k, l] <- pi1[k, l] + tau[i, k] * tau[j, l] 122 | } 123 | } 124 | } 125 | } 126 | } 127 | pi1_true <- pi1 / denom_for_pi0_naive 128 | 129 | # Remove extremely small values in pi1 130 | minPi <- 1e-4 131 | for (k in 1:K) { 132 | for (l in 1:K) { 133 | if (pi1_true[k, l] < minPi) { 134 | pi1_true[k, l] <- minPi 135 | } 136 | } 137 | } 138 | 139 | # Compute pi0 using the Rcpp function 140 | list_multiplied_adjmat <- get_elementwise_multiplied_matrices(adj, list(S, V, W)) 141 | list_multiplied_adjmat[[1]] <- denom 142 | pi1 <- compute_pi_d1x0(N, K, list_multiplied_adjmat, tau, verbose = 0) 143 | 144 | # Check if the computed conditional probability is correct. 145 | expect_equal(pi1, pi1_true, check.attributes = FALSE, tolerance = 1e-10) 146 | }) 147 | -------------------------------------------------------------------------------- /lighthergm/src/create_feature_matrix.cpp: -------------------------------------------------------------------------------- 1 | // Files to look to get MM version: ReciprocityModel for basic functions, 2 | // BinaryReciprocityModel.cpp for version written already, 3 | // MMBinaryReciprocityModel.cpp for more eleborate version 4 | // #define ARMA_64BIT_WORD 1; 5 | #ifdef _OPENMP 6 | #include 7 | #else 8 | #define omp_get_max_threads() 0 9 | #endif 10 | #include 11 | #include "helper.h" 12 | // [[Rcpp::depends(RcppArmadillo)]] 13 | // [[Rcpp::plugins(openmp)]] 14 | 15 | 16 | // [[Rcpp::export]] 17 | arma::sp_mat get_sparse_feature_adjmat(const arma::vec& x) { 18 | int numOfVertices = x.size(); 19 | arma::sp_mat S(numOfVertices, numOfVertices); 20 | // When x[i] == x[j] and i != j, S[i,j] = 1. 21 | #pragma omp parallel for 22 | for (int j = 0; j < numOfVertices; j++) { 23 | for (int i = 0; i < numOfVertices; i++) { 24 | if (i != j) { 25 | if (x[i] == x[j]) { 26 | S(i, j) = 1; 27 | } 28 | } 29 | } 30 | } 31 | return S; 32 | } 33 | 34 | // [[Rcpp::export]] 35 | arma::sp_mat get_sparse_feature_adjmat_from_string(const Rcpp::StringVector& x) { 36 | int numOfVertices = x.size(); 37 | arma::sp_mat S(numOfVertices, numOfVertices); 38 | // When x[i] == x[j] and i != j, S[i,j] = 1. 39 | #pragma omp parallel for 40 | for (int j = 0; j < numOfVertices; j++) { 41 | for (int i = 0; i < numOfVertices; i++) { 42 | if (i != j) { 43 | if (x[i] == x[j]) { 44 | S(i, j) = 1; 45 | } 46 | } 47 | } 48 | } 49 | return S; 50 | } 51 | 52 | // Compute something like X := - (S + T + U) + (S % T + T % U + U % S) - S % T % U. 53 | // [[Rcpp::export]] 54 | arma::sp_mat get_matrix_for_denominator(int numOfVertices, const Rcpp::List& list_feature_adjmat) 55 | { 56 | int n_feature = list_feature_adjmat.length(); 57 | int n_item = pow(2, n_feature); 58 | arma::sp_mat output(numOfVertices, numOfVertices); 59 | 60 | // It is difficult to explain this part... 61 | for (int s = 1; s < n_item; s++) { 62 | arma::vec index = decimal_to_binary_vector(s, n_feature); 63 | int k = sum(index); 64 | arma::sp_mat X(numOfVertices, numOfVertices); 65 | // Set a counter 66 | int counter = 0; 67 | for (int t = 0; t < n_feature; t++) { 68 | if (index[t] == 1) { 69 | arma::sp_mat S = list_feature_adjmat[t]; 70 | counter += 1; 71 | if (counter == 1) { 72 | X = S; 73 | } else { 74 | X = X % S; 75 | } 76 | } 77 | } 78 | // X = arma::trimatu(X); 79 | output += pow(-1, k) * X; 80 | } 81 | return output; 82 | } 83 | 84 | 85 | // [[Rcpp::export]] 86 | Rcpp::List get_elementwise_multiplied_matrices(const arma::sp_mat& adjmat, 87 | const Rcpp::List& list_feature_adjmat) { 88 | // Number of nodes 89 | int n_node = adjmat.n_rows; 90 | // Number of feature matrices 91 | int n_feature = list_feature_adjmat.length(); 92 | 93 | // Append all the matrices in a single list 94 | Rcpp::List list_mat(n_feature+1); 95 | list_mat[0] = adjmat; 96 | for (int i = 0; i < n_feature; i++) { 97 | list_mat[i+1] = list_feature_adjmat[i]; 98 | } 99 | 100 | // Create a list to store multiplied matrices 101 | int n_matrix = list_mat.length(); 102 | int length_output = pow(2, n_matrix); 103 | Rcpp::List output(length_output); 104 | 105 | // The first element of the output list should contain the matrix for the denominator of pi_d0x0. 106 | arma::sp_mat denom = get_matrix_for_denominator(n_node, list_feature_adjmat); 107 | output[0] = denom; 108 | 109 | // Element-wise matrix multiplication without breaking sparsity 110 | for (int s = 1; s < length_output; s++) { 111 | // Convert an integer to a binary numeric vector 112 | arma::vec index = decimal_to_binary_vector(s, n_matrix); 113 | // Initialize a sparse matrix 114 | arma::sp_mat X(n_node, n_node); 115 | // Set a counter 116 | int counter = 0; 117 | 118 | // Start element-wise matrix multiplication 119 | for (int t = 0; t < n_matrix; t++) { 120 | // Prepare a matrix to be multiplied 121 | arma::sp_mat S = list_mat[t]; 122 | // First, multiply matrices that don't need subtraction like (one - X). 123 | if (index[t] == 1) { 124 | counter += 1; 125 | if (counter == 1) { 126 | X = S; 127 | } else { 128 | X = X % S; 129 | } 130 | } 131 | } 132 | 133 | // Then multiply the rest of the matrices 134 | for (int t = 0; t < n_matrix; t++) { 135 | // Prepare a matrix to be multiplied 136 | arma::sp_mat S = list_mat[t]; 137 | if (index[t] == 0) { 138 | X = X - S % X; 139 | } 140 | } 141 | // Lastly, store the multiplied matrix in the output list 142 | output[s] = X; 143 | } 144 | // Return the output 145 | return output; 146 | } 147 | 148 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_quadratic_term_with_features.R: -------------------------------------------------------------------------------- 1 | test_that("computing a quadratic term with multiple features works", { 2 | set.seed(334) 3 | # Number of nodes 4 | N <- 12 5 | # Number of clusters 6 | K <- 3 7 | 8 | # Create an adjacency matrix 9 | edgelist <- 10 | tibble::tibble( 11 | tail = 1:N, 12 | head = 1:N 13 | ) %>% 14 | tidyr::expand(tail, head) %>% 15 | dplyr::filter(tail < head) %>% 16 | dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>% 17 | dplyr::filter(connect == 1) 18 | 19 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 20 | adj <- network::as.matrix.network.adjacency(net) 21 | adj <- as(adj, "dgCMatrix") 22 | 23 | # Create feature matrices 24 | x <- as.integer(unlist(purrr::rbernoulli(n = N))) 25 | S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 26 | S <- as(S, "dgCMatrix") 27 | for (i in 1:N) { 28 | for (j in 1:N) { 29 | if (i != j) { 30 | s_ij <- ifelse(x[i] == x[j], 1, 0) 31 | S[i, j] <- s_ij 32 | } 33 | } 34 | } 35 | 36 | y <- as.integer(unlist(purrr::rbernoulli(n = N))) 37 | V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 38 | V <- as(V, "dgCMatrix") 39 | for (i in 1:N) { 40 | for (j in 1:N) { 41 | if (i != j) { 42 | v_ij <- ifelse(y[i] == y[j], 1, 0) 43 | V[i, j] <- v_ij 44 | } 45 | } 46 | } 47 | 48 | z <- as.integer(unlist(purrr::rbernoulli(n = N))) 49 | W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 50 | W <- as(W, "dgCMatrix") 51 | for (i in 1:N) { 52 | for (j in 1:N) { 53 | if (i != j) { 54 | w_ij <- ifelse(z[i] == z[j], 1, 0) 55 | W[i, j] <- w_ij 56 | } 57 | } 58 | } 59 | 60 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 61 | tau <- 62 | matrix(c( 63 | 0.2, 0.5, 0.3, 64 | 0.4, 0.4, 0.2, 65 | 0.1, 0.4, 0.5, 66 | 0.4, 0.4, 0.2, 67 | 0.1, 0.1, 0.8, 68 | 0.05, 0.05, 0.9, 69 | 0.8, 0.1, 0.1, 70 | 0.3, 0.4, 0.3, 71 | 0.1, 0.8, 0.1, 72 | 0.5, 0.4, 0.1, 73 | 0.3, 0.3, 0.4, 74 | 0.8, 0.1, 0.1 75 | ), 76 | nrow = K, ncol = N 77 | ) 78 | tau <- t(tau) 79 | 80 | ########################################################### 81 | # Compute the true quadratic term in a naive way 82 | ########################################################### 83 | # Compute pi for D_ij = 1 84 | minPi <- 1e-4 85 | list_pi <- list() 86 | for (w in 0:1) { 87 | for (v in 0:1) { 88 | for (s in 0:1) { 89 | print(glue::glue("Compute pi for pi_s{s}v{v}w{w}")) 90 | denom <- matrix(0, nrow = K, ncol = K) 91 | num <- matrix(0, nrow = K, ncol = K) 92 | index <- s + 2 * v + 4 * w + 1 93 | print(index) 94 | for (k in 1:K) { 95 | for (l in 1:K) { 96 | for (i in 1:N) { 97 | for (j in 1:N) { 98 | if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 99 | denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l] 100 | } 101 | if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 102 | num[k, l] <- num[k, l] + tau[i, k] * tau[j, l] 103 | } 104 | } 105 | } 106 | } 107 | } 108 | pi <- num / denom 109 | # Remove extremely small elements in pi 110 | for (k in 1:K) { 111 | for (l in 1:K) { 112 | if (pi[k, l] < minPi) { 113 | pi[k, l] <- minPi 114 | } 115 | } 116 | } 117 | list_pi[[index]] <- pi 118 | } 119 | } 120 | } 121 | # Compute the quadratic term 122 | A_true <- matrix(0, nrow = N, ncol = K) 123 | for (i in 1:N) { 124 | for (j in 1:N) { 125 | if (i != j) { 126 | # For each ij, determine which pi must be used. 127 | index_ij <- S[i, j] + 2 * V[i, j] + 4 * W[i, j] + 1 128 | pi_ij <- list_pi[[index_ij]] 129 | # if D_ij = 0, replace pi with 1 - pi. 130 | if (adj[i, j] == 0) { 131 | pi_ij <- 1 - pi_ij 132 | } 133 | for (k in 1:K) { 134 | for (l in 1:K) { 135 | a_ij <- tau[j, l] * log(pi_ij[k, l]) 136 | A_true[i, k] <- A_true[i, k] + a_ij 137 | } 138 | } 139 | } 140 | } 141 | } 142 | 143 | A_true <- 1 - A_true / 2 144 | 145 | # Divide A by alpha_{ik} 146 | A_true <- A_true / tau 147 | 148 | ########################################################### 149 | # Compute the quadratic term using the cpp function 150 | ########################################################### 151 | list_feature_adjmat <- list(S, V, W) 152 | list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat) 153 | A <- compute_quadratic_term_with_features(N, K, list_multiplied_feature_adjmat, tau, 0, 0) 154 | 155 | expect_equal(A, A_true, check.attributes = FALSE, tolerance = 1e-10) 156 | }) 157 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-within-network-stats.R: -------------------------------------------------------------------------------- 1 | test_that("generating multiple within-block networks works", { 2 | set.seed(1) 3 | # Prepare ingredients for simulating a network 4 | N <- 1000 5 | K <- 10 6 | 7 | list_within_params <- c(-3, 1, 1, 0.76, 0.08) 8 | list_between_params <- c(-5, 2, 2) 9 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2) 10 | 11 | memb <- sample(1:K, size = N, replace = TRUE) 12 | vertex_id <- as.character(11:(11 + N - 1)) 13 | 14 | x <- sample(1:20, size = N, replace = TRUE) 15 | y <- sample(LETTERS, size = N, replace = TRUE) 16 | 17 | 18 | df <- tibble::tibble( 19 | id = vertex_id, 20 | memb = memb, 21 | x = x, 22 | y = y 23 | ) 24 | 25 | # Obtain the stats 26 | within_sim_stats <- lighthergm::simulate_hergm_within( 27 | formula_for_simulation = formula, 28 | data_for_simulation = df, 29 | colname_vertex_id = 'id', 30 | colname_block_membership = 'memb', 31 | coef_within_block = list_within_params, 32 | ergm_control = ergm::control.simulate.formula(), 33 | seed = 1, 34 | n_sim = 3 35 | ) 36 | 37 | expected_terms <- statnet.common::list_rhs.formula(formula) 38 | 39 | expect_equal(nrow(within_sim_stats), 3) 40 | expect_equal(length(expected_terms), length(names(within_sim_stats))) 41 | 42 | }) 43 | 44 | test_that("simulating a network from a given edgelist works", { 45 | set.seed(1) 46 | # Prepare ingredients for simulating a network 47 | N <- 1000 48 | K <- 10 49 | 50 | list_within_params <- c(-3, 1, 1, 0.76, 0.08) 51 | list_between_params <- c(-5, 2, 2) 52 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2) 53 | 54 | memb <- sample(1:K, size = N, replace = TRUE) 55 | vertex_id <- as.character(11:(11 + N - 1)) 56 | 57 | x <- sample(1:20, size = N, replace = TRUE) 58 | y <- sample(LETTERS, size = N, replace = TRUE) 59 | 60 | df <- tibble::tibble( 61 | id = vertex_id, 62 | memb = memb, 63 | x = x, 64 | y = y 65 | ) 66 | 67 | # Simulate a network 68 | g_sim <- 69 | simulate_hergm_within( 70 | formula_for_simulation = formula, 71 | data_for_simulation = df, 72 | colname_vertex_id = "id", 73 | colname_block_membership = "memb", 74 | coef_within_block = list_within_params, 75 | coef_between_block = list_between_params, 76 | ergm_control = ergm::control.simulate.formula( 77 | MCMC.burnin = 10000, 78 | MCMC.interval = 100 79 | ), 80 | seed_for_within = 1, 81 | seed_for_between = 1, 82 | n_sim = 1, 83 | directed = FALSE, 84 | output = 'network' 85 | ) 86 | 87 | g_sim <- network::as.edgelist(g_sim) 88 | 89 | # Simulate a within-block network from a given edgelist 90 | g2 <- 91 | simulate_hergm_within( 92 | formula_for_simulation = formula, 93 | data_for_simulation = df, 94 | colname_vertex_id = "id", 95 | colname_block_membership = "memb", 96 | coef_within_block = list_within_params, 97 | coef_between_block = list_between_params, 98 | # These settings should result on the exact same network being returned 99 | # after one simulation. Check that. 100 | ergm_control = ergm::control.simulate.formula( 101 | MCMC.burnin = 0, 102 | MCMC.interval = 1 103 | ), 104 | seed_for_within = 1, 105 | seed_for_between = 1, 106 | seed_edgelist = g_sim, 107 | n_sim = 1, 108 | directed = FALSE, 109 | output = 'network' 110 | ) 111 | 112 | expect_match(class(g2), "network") 113 | 114 | g2 <- network::as.edgelist(g2) 115 | 116 | # Check if the network is correctly generated 117 | expect_equal(nrow(g_sim), nrow(g2)) 118 | expect_true(all(g_sim == g2)) 119 | }) 120 | 121 | test_that("The within-simulation begins from an empty network by default", { 122 | set.seed(1) 123 | # Prepare ingredients for simulating a network 124 | N <- 1000 125 | K <- 10 126 | 127 | list_within_params <- c(-3, 1, 1, 0.76, 0.08) 128 | list_between_params <- c(-5, 2, 2) 129 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle + kstar(2) 130 | 131 | memb <- sample(1:K, size = N, replace = TRUE) 132 | vertex_id <- as.character(11:(11 + N - 1)) 133 | 134 | x <- sample(1:20, size = N, replace = TRUE) 135 | y <- sample(LETTERS, size = N, replace = TRUE) 136 | 137 | df <- tibble::tibble( 138 | id = vertex_id, 139 | memb = memb, 140 | x = x, 141 | y = y 142 | ) 143 | 144 | # Simulate a network 145 | g_sim <- 146 | simulate_hergm_within( 147 | formula_for_simulation = formula, 148 | data_for_simulation = df, 149 | colname_vertex_id = "id", 150 | colname_block_membership = "memb", 151 | coef_within_block = list_within_params, 152 | coef_between_block = list_between_params, 153 | ergm_control = ergm::control.simulate.formula( 154 | MCMC.burnin = 0, 155 | MCMC.interval = 1 156 | ), 157 | seed_for_within = 1, 158 | seed_for_between = 1, 159 | n_sim = 1, 160 | directed = FALSE, 161 | output = 'network' 162 | ) 163 | 164 | expect_match(class(g_sim), "network") 165 | g_sim <- network::as.edgelist(g_sim) 166 | 167 | # Check if the network is correctly generated 168 | expect_equal(nrow(g_sim), 0) 169 | }) 170 | -------------------------------------------------------------------------------- /lighthergm/man/simulate_hergm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/simulate_hergm.R 3 | \name{simulate_hergm} 4 | \alias{simulate_hergm} 5 | \title{Simulate a network} 6 | \usage{ 7 | simulate_hergm( 8 | formula_for_simulation, 9 | data_for_simulation, 10 | colname_vertex_id, 11 | colname_block_membership, 12 | seed_edgelist = NULL, 13 | coef_within_block, 14 | coef_between_block, 15 | ergm_control = ergm::control.simulate.formula(), 16 | seed = NULL, 17 | directed = FALSE, 18 | n_sim = 1, 19 | output = "network", 20 | prevent_duplicate = TRUE, 21 | use_fast_between_simulation = FALSE, 22 | list_feature_matrices = NULL, 23 | verbose = 0, 24 | ... 25 | ) 26 | } 27 | \arguments{ 28 | \item{formula_for_simulation}{formula for simulating a network} 29 | 30 | \item{data_for_simulation}{a data frame that contains vertex id, block membership, and vertex features.} 31 | 32 | \item{colname_vertex_id}{a column name in the data frame for the vertex id} 33 | 34 | \item{colname_block_membership}{a column name in the data frame for the block membership} 35 | 36 | \item{seed_edgelist}{an edgelist used for creating a seed network. It should have the "edgelist" class} 37 | 38 | \item{coef_within_block}{a vector of within-block parameters. The order of the parameters should match that of the formula.} 39 | 40 | \item{coef_between_block}{a vector of between-block parameters. The order of the parameters should match that of the formula without externality terms.} 41 | 42 | \item{ergm_control}{auxiliary function as user interface for fine-tuning ERGM simulation} 43 | 44 | \item{seed}{seed value (integer) for network simulation.} 45 | 46 | \item{directed}{whether the simulated network is directed} 47 | 48 | \item{n_sim}{number of networks generated} 49 | 50 | \item{output}{Normally character, one of "network" (default), "stats", "edgelist", to determine the output format.} 51 | 52 | \item{prevent_duplicate}{If \code{TRUE}, the coefficient on nodematch("block") is set to be a very large negative number in drawing between-block links, so that there will be (almost) no within-block links.} 53 | 54 | \item{use_fast_between_simulation}{If \code{TRUE}, this function uses an effcient way to simulate a between-block network. If the network is very large, you should consider using this option. 55 | Note that when you use this, the first element of \code{coef_between_block} must be the edges parameter.} 56 | 57 | \item{list_feature_matrices}{a list of feature adjacency matrices. If \code{use_fast_between_simulation}, this must be given.} 58 | 59 | \item{verbose}{If this is TRUE/1, the program will print out additional information about the progress of simulation.} 60 | 61 | \item{...}{Additional arguments, to be passed to lower-level functions} 62 | } 63 | \description{ 64 | Simulate a network 65 | } 66 | \examples{ 67 | # Load an embedded network object. 68 | data(toyNet) 69 | 70 | # Specify the model that you would like to estimate. 71 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle 72 | 73 | # Estimate the model 74 | hergm_res <- 75 | lighthergm::hergm( 76 | object = model_formula, # The model you would like to estiamte 77 | n_clusters = 4, # The number of blocks 78 | n_em_step_max = 100, # The maximum number of EM algorithm steps 79 | estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step 80 | clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics 81 | check_block_membership = TRUE # Keep track of block memberships at each EM iteration 82 | ) 83 | 84 | # Prepare a data frame that contains nodal id and covariates. 85 | nodes_data <- 86 | data.frame( 87 | node_id = network::network.vertex.names(toyNet), 88 | block = hergm_res$partition, 89 | x = network::get.vertex.attribute(toyNet, "x"), 90 | y = network::get.vertex.attribute(toyNet, "y") 91 | ) 92 | # The feature adjacency matrices 93 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula) 94 | 95 | # Estimated coefficients for the between-community connections 96 | coef_between_block <- coef(hergm_res$est_between) 97 | 98 | # Estimated coefficients for the within-community connections 99 | coef_within_block <- coef(hergm_res$est_within) 100 | 101 | # The MCMC settings 102 | sim_ergm_control <- ergm::control.simulate.formula( 103 | MCMC.burnin = 1000000, 104 | MCMC.interval = 100000 105 | ) 106 | 107 | # Simulate network stats 108 | sim_stats <- lighthergm::simulate_hergm( 109 | formula_for_simulation = model_formula, # Formula for between-blocks 110 | data_for_simulation = nodes_data, # Same as for gof, a dataframe containing nodes attributes 111 | colname_vertex_id = "node_id", # Name of the column containing node IDs 112 | colname_block_membership = "block", # Name of the column containing block IDs 113 | coef_between_block = coef_between_block, # The coefficients for the between connections 114 | coef_within_block = coef_within_block, # The coefficients for the within connections 115 | ergm_control = sim_ergm_control, 116 | n_sim = 100, # Number of simulations to return 117 | output = "stats", # If `stats` a list with network statistics for the between and within connections is returned 118 | use_fast_between_simulation = TRUE, # Simulates between connections by drawing from a logistic distribution. If FALSE, draws between connections by MCMC. 119 | list_feature_matrices = list_feature_matrices 120 | ) 121 | } 122 | -------------------------------------------------------------------------------- /lighthergm/man/hergm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hergm.R 3 | \name{hergm} 4 | \alias{hergm} 5 | \title{Hierarchical exponential-family random graph models (HERGMs) with local dependence} 6 | \usage{ 7 | hergm( 8 | object, 9 | n_clusters, 10 | n_cores = 1, 11 | block_membership = NULL, 12 | estimate_parameters = TRUE, 13 | verbose = 0, 14 | n_em_step_max = 100, 15 | initialization_method = 1, 16 | use_infomap_python = FALSE, 17 | seed_infomap = NULL, 18 | weight_for_initialization = 1000, 19 | seeds = NULL, 20 | initialized_cluster_data = NULL, 21 | method_second_step = "MPLE", 22 | clustering_with_features = TRUE, 23 | list_multiplied_feature_matrices = NULL, 24 | fix_covariate_parameter = FALSE, 25 | compute_pi = FALSE, 26 | check_alpha_update = FALSE, 27 | check_block_membership = FALSE, 28 | cache = NULL, 29 | ... 30 | ) 31 | } 32 | \arguments{ 33 | \item{object}{A formula or \code{lighthergm} class object. A \code{lighthergm} is returned by \code{hergm()}. 34 | When you pass a \code{lighthergm} class object to \code{hergm()}, you can restart the EM step.} 35 | 36 | \item{n_clusters}{The number of blocks. This must be specified by the user. 37 | When you pass a "lighthergm" class object to \code{hergm()}, you don't have to specify this argument.} 38 | 39 | \item{n_cores}{The number of CPU cores to use.} 40 | 41 | \item{block_membership}{The pre-specified block memberships for each node. 42 | If \code{NULL}, the latent community structure is estimated, assuming that the number of communities is \code{n_clusters}.} 43 | 44 | \item{estimate_parameters}{If \code{TRUE}, both clustering and parameter estimation are implemented. 45 | If \code{FALSE}, only clustering is executed.} 46 | 47 | \item{verbose}{A logical or an integer: if this is TRUE/1, 48 | the program will print out additional information about the progress of estimation and simulation. 49 | A higher value yields lower level information.} 50 | 51 | \item{n_em_step_max}{The maximum number of EM iterations. 52 | Currently, no early stopping criteria is introduced. Thus \code{n_em_step_max} EM iterations are exactly implemented.} 53 | 54 | \item{initialization_method}{Cluster initialization method. 55 | If \code{1} (the default), \code{igraph}'s infomap is implemented. 56 | If \code{2}, the initial clusters are randomly uniformally selected. 57 | If \code{3}, spectral clustering is conducted.} 58 | 59 | \item{use_infomap_python}{If \code{TRUE}, the cluster initialization is implemented using Pythons' infomap. 60 | When using this, make sure that Python's infomap is installed and callable from `system().} 61 | 62 | \item{seed_infomap}{seed value (integer) for Python's infomap.} 63 | 64 | \item{weight_for_initialization}{weight value used for cluster initialization. The higher this value, the more weight is put on the initialized alpha.} 65 | 66 | \item{seeds}{seed value (integer) for the random number generator} 67 | 68 | \item{initialized_cluster_data}{initialized cluster data from which the EM iterations begin. 69 | This can be either a vector of block affiliations of each node or initialized cluster data by Python's infomap (given by .clu format).} 70 | 71 | \item{method_second_step}{If "MPLE" (the default), then the maximum pseudolikelihood estimator is implemented when estimating the within-block network model. 72 | If "MLE", then an approximate maximum likelihood estimator is conducted.} 73 | 74 | \item{clustering_with_features}{If \code{TRUE}, clustering is implemented using the discrete covariates specified in the formula.} 75 | 76 | \item{list_multiplied_feature_matrices}{a list of multiplied feature adjacency matarices necessary for EM step. 77 | If \code{NULL}, \code{hergm()} automatically calculates. Or you can calculate by \code{compute_multiplied_feature_matrices()}.} 78 | 79 | \item{fix_covariate_parameter}{If \code{TRUE}, when estimating the within-block network model, 80 | parameters for covariates are fixed at the estimated of the between-block network model.} 81 | 82 | \item{compute_pi}{If \code{TRUE}, this function keeps track of pi matrices at each EM iteration. 83 | If the network is large, we strongly recommend to set to be \code{FALSE}.} 84 | 85 | \item{check_alpha_update}{If \code{TRUE}, this function keeps track of alpha matrices at each EM iteration. 86 | If the network is large, we strongly recommend to set to be \code{FALSE}.} 87 | 88 | \item{check_block_membership}{If TRUE, this function keeps track of estimated block memberships at each EM iteration.} 89 | 90 | \item{cache}{a \code{cachem} cache object used to store intermediate calculations such as eigenvector decomposition results.} 91 | 92 | \item{...}{Additional arguments, to be passed to lower-level functions} 93 | } 94 | \description{ 95 | The function hergm estimates and simulates three classes of hierarchical exponential-family random graph models. 96 | } 97 | \examples{ 98 | # Load an embedded network object. 99 | data(toyNet) 100 | 101 | # Specify the model that you would like to estimate. 102 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle 103 | 104 | # Estimate the model 105 | hergm_res <- 106 | lighthergm::hergm( 107 | object = model_formula, # The model you would like to estiamte 108 | n_clusters = 4, # The number of blocks 109 | n_em_step_max = 100, # The maximum number of EM algorithm steps 110 | estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step 111 | clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics 112 | check_block_membership = TRUE # Keep track of block memberships at each EM iteration 113 | ) 114 | } 115 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-estimate_within_param.R: -------------------------------------------------------------------------------- 1 | test_that("estimating within-block parameters works", { 2 | ################################################################# 3 | ### Network preparation ######################################### 4 | ################################################################# 5 | # Create an adjacency matrix 6 | adj <- c( 7 | c(0, 1, 0, 1, 1, 0), 8 | c(1, 0, 1, 0, 0, 1), 9 | c(0, 1, 0, 1, 1, 0), 10 | c(1, 0, 1, 0, 1, 1), 11 | c(1, 0, 1, 1, 0, 1), 12 | c(0, 1, 0, 1, 1, 0) 13 | ) 14 | adj <- matrix(data = adj, nrow = 6, ncol = 6) 15 | rownames(adj) <- as.character(1001:1006) 16 | colnames(adj) <- as.character(1001:1006) 17 | 18 | # Vertex attribute 19 | x <- c(1, 0, 0, 1, 1, 0) 20 | 21 | # Block 22 | block <- c(1, 2, 3, 1, 3, 2) 23 | 24 | g <- network::network(adj, matrix.type = "adjacency") 25 | network::set.vertex.attribute(g, attrname = "x", value = x) 26 | 27 | ################################################################# 28 | ### Estimate the within-block parameters ######################## 29 | ################################################################# 30 | suppressWarnings(est <- estimate_within_params( 31 | formula = g ~ edges + nodematch("x"), 32 | network = g, 33 | z_memb = block, 34 | parallel = FALSE, 35 | verbose = 0, 36 | initial_estimate = NULL, 37 | seeds = NULL, 38 | method_second_step = "MPLE" 39 | )) 40 | 41 | 42 | # Get the network used for estimation 43 | g_est <- est$network 44 | 45 | # Get the adjacency matrix for the network 46 | adj_est <- network::as.matrix.network.adjacency(g_est) 47 | 48 | ################################################################# 49 | ### Test if the created and true networks are the same ######### 50 | ################################################################# 51 | 52 | # Prepare the answer 53 | adj_ans <- c( 54 | c(0, 1, 0, 0, 0, 0), 55 | c(1, 0, 0, 0, 0, 0), 56 | c(0, 0, 0, 1, 0, 0), 57 | c(0, 0, 1, 0, 0, 0), 58 | c(0, 0, 0, 0, 0, 1), 59 | c(0, 0, 0, 0, 1, 0) 60 | ) 61 | 62 | # Order of vertex id after diagonization: 1001, 1004, 1002, 1006, 1003, 1005 63 | # Order of original x: c(1, 0, 0, 1, 1, 0) 64 | vertex_id_ans <- as.character(c(1001, 1004, 1002, 1006, 1003, 1005)) 65 | x_ans <- c(1, 1, 0, 0, 0, 1) 66 | 67 | # Test 68 | expect_equal(adj_est, adj_ans, check.attributes = FALSE) 69 | expect_equal(network::get.vertex.attribute(g_est, "x"), x_ans) 70 | expect_equal(network::network.vertex.names(g_est), vertex_id_ans) 71 | }) 72 | 73 | test_that("estimating within-block parameters works with non-consecutive block names", { 74 | adj <- c( 75 | c(0, 1, 0, 1, 1, 0), 76 | c(1, 0, 1, 0, 0, 1), 77 | c(0, 1, 0, 1, 1, 0), 78 | c(1, 0, 1, 0, 1, 1), 79 | c(1, 0, 1, 1, 0, 1), 80 | c(0, 1, 0, 1, 1, 0) 81 | ) 82 | adj <- matrix(data = adj, nrow = 6, ncol = 6) 83 | rownames(adj) <- as.character(1001:1006) 84 | colnames(adj) <- as.character(1001:1006) 85 | 86 | x <- c(1, 0, 0, 1, 1, 0) 87 | 88 | # Use non-consecutive block names 89 | block <- c(50, 70, 95, 50, 95, 70) 90 | 91 | g <- network::network(adj, matrix.type = "adjacency") 92 | network::set.vertex.attribute(g, attrname = "x", value = x) 93 | 94 | suppressWarnings(est <- estimate_within_params( 95 | formula = g ~ edges + nodematch("x"), 96 | network = g, 97 | z_memb = block, 98 | parallel = FALSE, 99 | verbose = 0, 100 | initial_estimate = NULL, 101 | seeds = NULL, 102 | method_second_step = "MPLE" 103 | )) 104 | 105 | 106 | # Get the network used for estimation 107 | g_est <- est$network 108 | 109 | # Get the adjacency matrix for the network 110 | adj_est <- network::as.matrix.network.adjacency(g_est) 111 | 112 | adj_ans <- c( 113 | c(0, 1, 0, 0, 0, 0), 114 | c(1, 0, 0, 0, 0, 0), 115 | c(0, 0, 0, 1, 0, 0), 116 | c(0, 0, 1, 0, 0, 0), 117 | c(0, 0, 0, 0, 0, 1), 118 | c(0, 0, 0, 0, 1, 0) 119 | ) 120 | 121 | vertex_id_ans <- as.character(c(1001, 1004, 1002, 1006, 1003, 1005)) 122 | x_ans <- c(1, 1, 0, 0, 0, 1) 123 | 124 | # Check that the network is the same 125 | expect_equal(adj_est, adj_ans, check.attributes = FALSE) 126 | expect_equal(network::get.vertex.attribute(g_est, "x"), x_ans) 127 | expect_equal(network::network.vertex.names(g_est), vertex_id_ans) 128 | 129 | # Check that the blocks are assigned to the right nodes 130 | g_nodes_data <- data.frame( 131 | id = network::network.vertex.names(g), 132 | block = block 133 | ) %>% dplyr::arrange(id) 134 | 135 | est_g_nodes_data <- data.frame( 136 | id = network::network.vertex.names(g_est), 137 | block = as.double(network::get.vertex.attribute(g_est, 'block')) 138 | ) %>% dplyr::arrange(id) 139 | 140 | expect_equal(g_nodes_data$id, est_g_nodes_data$id) 141 | expect_equal(g_nodes_data$block, est_g_nodes_data$block) 142 | }) 143 | 144 | test_that("control.ergm settings can be passed to the within estimation from hergm function", { 145 | # Define some settings for testing purposes 146 | test_burnin <- 9797 147 | test_interval <- 3434 148 | test_method <- 'Stepping' 149 | 150 | hergm_formula <- g ~ edges + triangle + nodematch("x") 151 | 152 | n_nodes <- 100 153 | n_clusters <- 2 154 | 155 | nodes_data <- tibble::tibble( 156 | node_id = 1:n_nodes, 157 | x = sample(1:2, size = n_nodes, replace = T), 158 | block = sample(1:n_clusters, size = n_nodes, replace = T) 159 | ) 160 | 161 | g <- network::network.initialize(n = n_nodes) 162 | network::set.vertex.attribute(g, "x", nodes_data$x) 163 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(g, hergm_formula) 164 | 165 | coef_between_block <- c(-3, 1) 166 | coef_within_block <- c(-2, 0.1, 0.5) 167 | 168 | sim_ergm_control <- ergm::control.simulate.formula( 169 | MCMC.burnin = 4000000, 170 | MCMC.interval = 200000 171 | ) 172 | 173 | g <- lighthergm::simulate_hergm( 174 | formula_for_simulation = hergm_formula, 175 | data_for_simulation = nodes_data, 176 | colname_vertex_id = "node_id", 177 | colname_block_membership = "block", 178 | coef_between_block = coef_between_block, 179 | coef_within_block = coef_within_block, 180 | ergm_control = sim_ergm_control, 181 | fast_between_simulation = TRUE, 182 | list_feature_matrices = list_feature_matrices 183 | ) 184 | 185 | hergm_res <- lighthergm::hergm( 186 | g ~ edges + nodematch("x") + triangle, 187 | n_clusters = n_clusters, 188 | n_em_step_max = 10, 189 | estimate_parameters = T, 190 | clustering_with_features = T, 191 | method_second_step = 'MLE', 192 | control = ergm::control.ergm( 193 | MCMC.burnin = test_burnin, 194 | MCMC.interval = test_interval, 195 | main.method = test_method 196 | ) 197 | ) 198 | 199 | used_control <- hergm_res$est_within$control 200 | 201 | expect_equal(used_control$MCMC.burnin, test_burnin) 202 | expect_equal(used_control$MCMC.interval, test_interval) 203 | expect_equal(used_control$main.method, test_method) 204 | }) 205 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![lighthergm-logo](lighthergm.jpg "Title") 2 | 3 | # `lighthergm`: Fit, Simulate, and Diagnose Hierarchical Exponential-Family Models for Networks in A Scalable Way 4 | 5 | `lighthergm` is an R library for estimating Hierarchical Exponential Random Graph Models (HERGMs) efficiently on large networks. It also contains tools for simulating networks with local dependence and for assessing the goodness-of-fit of the estimates. It is built upon the existing R package `hergm` (Schweinberger and Luna, 2018). 6 | 7 | See [Martínez Dahbura et al. (2021)](https://arxiv.org/abs/2105.12704) for more details on the algorithm and the model. 8 | 9 | ## Table of Contents 10 | 11 | - [Installation](#installation) 12 | - [Usage](#usage) 13 | - [Docker](#docker) 14 | - [Contributing](#contributing) 15 | - [License](#license) 16 | - [References](#references) 17 | 18 | ## Installation 19 | 20 | You can install `lighthergm` in several ways. 21 | 22 | 23 | ### From GitHub 24 | 25 | Use devtools to install directly from the repository: 26 | ```r 27 | devtools::install_github(repo = 'sansan-inc/lighthergm', subdir = '/lighthergm', ref = 'main') 28 | ``` 29 | This will install the latest stable version of the code. If you need to install a different version, use the name of the target branch as the value for the `ref` argument. 30 | 31 | ### From Source 32 | Clone this repository and run the following R code from the root directory: 33 | 34 | ```r 35 | devtools::install("lighthergm") 36 | ``` 37 | 38 | ### Installing on Mac OS 39 | Some Mac OS users may encounter the following error when installing `lighthergm`: 40 | 41 | ``` 42 | clang: error: unsupported option '-fopenmp' 43 | ``` 44 | This is because **clang** for Mac OS may not support the **-fopenmp** flag by default. 45 | 46 | To fix this, install [llvm](https://formulae.brew.sh/formula/llvm) from `brew`: 47 | ``` 48 | brew install llvm 49 | ``` 50 | 51 | Then, edit the **~/.R/Makevars** file in your machine (if it does not exist, create it) including the following line: 52 | ``` 53 | CXX11 = /usr/local/opt/llvm/bin/clang 54 | ``` 55 | This indicates R that it should use the right `clang` for compilation. After doing this you should be able to install the library without problems using any of the methods above. 56 | 57 | If you still have issues installing `lighthergm` let us know. 58 | 59 | ## Usage 60 | 61 | See [the vignette](doc/intro-lighthergm.pdf) for a detailed introduction to `lighthergm`. 62 | 63 | You can build the vignette by passing `build_vignettes = TRUE` to your devtools command of choice when installing. 64 | 65 | ## Docker 66 | 67 | You can test-drive `lighthergm` on a virtual environment with R Studio using Docker, if it is installed on your machine. This can be useful if you want to check out the library without installing it on your machine. 68 | 69 | To do it clone the repository and run the following command from its root directory: 70 | 71 | ``` 72 | docker build --tag lighthergm . 73 | ``` 74 | 75 | This will create a Docker image called `lighthergm`. To run a container from that image, set the **PASSWORD** environment variable to any password you want to use for securing your R Studio session. 76 | 77 | ``` 78 | docker run -p 8787:8787 -e PASSWORD=$PASSWORD --name lighthergm --rm lighthergm 79 | ``` 80 | 81 | Now you can open R Studio by directing your browser to `localhost:8787`. Use the username **rstudio** and the value of the **PASSWORD** environment variable as the password. Now you should be able to work on an R Studio session with `lighthergm` installed. 82 | 83 | You can also use **Docker Compose** for running a virtual environment with R Studio and `lighthergm`. Just set the necessary environment variables and run it with `docker-compose up`. 84 | 85 | This will build the image and run a container for you, which you can stop or take down using `docker-compose stop` or `docker-compose down` respectively. 86 | 87 | If you need to store the files you create within the container, uncomment the commented out lines in the `docker-compose.yaml` file and set the following environment variables on your environment: 88 | 89 | - `PASSWORD`: The password you want to set for your R Studio session. 90 | - `USERID`: Your user id (depends on your OS). 91 | - `GROUPID`: Your group id. Check your user and group ids by typing `id` on your command line tool. 92 | - `WORKSPACE_PATH`: A path to a directory where files created inside the container will be stored. What you save in `home/rstudio/workspace` within the container will be stored there. 93 | 94 | You can also set multiple environment variables by creating a file called **.env** in the root directory of the repository with the values of the environment variables. For example: 95 | 96 | ``` 97 | PASSWORD=somepassword 98 | USERID=1234 99 | GROUPID=4321 100 | WORKSPACE_PATH=/path/to/your/workspace/ 101 | ``` 102 | 103 | If you do so, remember to include the file's path in the `.gitignore` file. 104 | 105 | The build works on the following Docker setup: 106 | 107 | ``` 108 | Docker Engine version: 19.03.12 109 | docker-compose version: 1.17.1 110 | ``` 111 | 112 | ## Contributing 113 | 114 | We welcome and encorage contributions in the form of issue reports and pull requests. Current topics where collaboration is especially welcome include, but are not limited to: 115 | 116 | - Scalability improvements, for example, by avoiding the usage of memory-expensive feature adjacency matrices. 117 | 118 | - Support for a wider range of models, including continuous covariates, directed networks, etc. 119 | 120 | - Quality-of-life improvements to facilitate network analysis on a wider range of use cases. 121 | 122 | ### Creating Issues 123 | 124 | Feel free to fill an issue with your bug reports, feature suggestions and recommendations for improving the performance of `lighthergm`. 125 | 126 | Please include all the necessary details. In the case of bug reports, include code and the necessary steps to reproduce it. For feature suggestions, describe in detail the problem they address and include if possible some code to make it easy to discuss. 127 | 128 | ### Pull Requests 129 | 130 | We employ gitflow whenever possible. The **develop** branch contains the latest changes, and the **main** branch is for releases. 131 | 132 | To create a pull request, fork the repository and create a feature branch such as **feature/added-shiny-new-feature** from **develop**. 133 | 134 | Before making a pull request, please use `devtools::check()` to check that the package can be installed and that the unit tests pass after your changes. We encorage test-oriented development. Adding tests to cover your changes makes it much easier to review you proposal. 135 | 136 | When done, make a pull request from your fork, including any necessary details to help us understand the nature of your changes. 137 | 138 | ## License 139 | This package is licensed under GPL-3. See the [license file](lighthergm/LICENSE.md) for details. 140 | 141 | ## References 142 | - Martínez Dahbura, Juan Nelson, Shota Komatsu, Takanori Nishida, and Angelo Mele (2021), "A Structural Model of Business Card Exchange Networks", Working Paper, Available at https://arxiv.org/abs/2105.12704. 143 | - Schweinberger, Michael and Pamela Luna (2018), "HERGM: Hierarchical Exponential-Family Random Graph Models", Journal of Statistical Software 85(1), 1-39. 144 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-EM-checkpoint.R: -------------------------------------------------------------------------------- 1 | test_that("Setting a checkpoint for EM iterations works", { 2 | set.seed(334) 3 | # Simulate a network to work with in this unit test. 4 | # Number of nodes 5 | N <- 1000 6 | # Number of blocks 7 | K <- 50 8 | # Block memberships (same block size) 9 | memb <- rep(1:K, each = N / K) 10 | # Covariates 11 | x <- sample(1:10, size = N, replace = TRUE) 12 | y <- sample(1:10, size = N, replace = TRUE) 13 | 14 | # Within-block parameters: edges, nodematch("x"), nodematch("y"), triangle 15 | list_within_params <- c(-1, 1, 1, 0.5) 16 | # Between-block parameters: edges, nodematch("x"), nodematch("y") 17 | list_between_params <- c(-3.5, 0.5, 0.5) 18 | 19 | formula <- g ~ edges + nodematch("x") + nodematch("y") + triangle 20 | 21 | vertex_id <- 1:N 22 | 23 | df <- tibble::tibble( 24 | id = vertex_id, 25 | memb = memb, 26 | x = x, 27 | y = y 28 | ) 29 | 30 | g_sim <- 31 | simulate_hergm( 32 | formula_for_simulation = formula, 33 | data_for_simulation = df, 34 | colname_vertex_id = "id", 35 | colname_block_membership = "memb", 36 | coef_within_block = list_within_params, 37 | coef_between_block = list_between_params, 38 | ergm_control = ergm::control.simulate.formula(MCMC.burnin = 1000000, MCMC.interval = 1000), 39 | seed = 1, 40 | n_sim = 1, 41 | directed = FALSE, 42 | output = "network" 43 | ) 44 | 45 | ############# 1. Clustering with features ############################## 46 | # Conduct clustering at once 47 | initial_weight <- 1000 48 | 49 | cluster_with_feature <- 50 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 51 | n_clusters = K, 52 | estimate_parameters = TRUE, 53 | verbose = 0, 54 | n_em_step_max = 10, 55 | initialization_method = 3, 56 | infomap_python = FALSE, 57 | clustering_with_features = TRUE, 58 | check_alpha_update = TRUE, 59 | compute_pi = TRUE, 60 | check_lower_bound = TRUE, 61 | check_block_membership = TRUE, 62 | weight_for_initialization = initial_weight, 63 | seeds = 334 64 | ) 65 | 66 | # Conduct clustering in two steps 67 | first_step <- 68 | lighthergm::hergm( 69 | object = g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 70 | n_clusters = K, 71 | estimate_parameters = TRUE, 72 | verbose = 0, 73 | n_em_step_max = 7, 74 | initialization_method = 3, 75 | infomap_python = FALSE, 76 | clustering_with_features = TRUE, 77 | check_block_membership = TRUE, 78 | weight_for_initialization = initial_weight, 79 | check_alpha_update = TRUE, 80 | seeds = 334 81 | ) 82 | 83 | second_step <- 84 | lighthergm::hergm( 85 | object = first_step, 86 | n_em_step_max = 3 87 | ) 88 | 89 | # Check if the calculated lower bounds are identical (both length and values) 90 | expect_equal(cluster_with_feature$EM_lower_bound, second_step$EM_lower_bound) 91 | 92 | # Check if block memberships are identical over iterations (both length and Yule's coefficient) 93 | expect_true(all(unlist(purrr::map2(cluster_with_feature$EM_list_z, second_step$EM_list_z, compute_yule_coef) == rep(1, length(second_step$EM_list_z))))) 94 | 95 | # The partition should be the same at the end of the second estimation as the one obtained after running 10 iterations of the EM algorithm 96 | expect_equal(compute_yule_coef(cluster_with_feature$partition, second_step$partition), 1) 97 | 98 | # Check if alphas are identical over iterations 99 | for (i in 1:length(cluster_with_feature$EM_list_alpha)) { 100 | expect_equal(cluster_with_feature$EM_list_alpha[[i]], second_step$EM_list_alpha[[i]], check.attribute = FALSE, tolerance = 1e-2) 101 | } 102 | # Check if the alpha after the second checkpoint is the same as the alpha when performing estimation with 10 EM iterations without a checkpoint. 103 | expect_equal(cluster_with_feature$alpha, second_step$alpha) 104 | 105 | # Check if estimated coefficients with and without a checkpoint are identical. 106 | expect_equal(coef(cluster_with_feature$est_between), coef(second_step$est_between), tolerance = 1e-10) 107 | expect_equal(coef(cluster_with_feature$est_within), coef(second_step$est_within), tolerance = 1e-10) 108 | 109 | 110 | ############# 2. Clustering without features ############################## 111 | # Conduct clustering at once 112 | initial_weight <- 1000 113 | 114 | cluster_without_feature <- 115 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 116 | n_clusters = K, 117 | estimate_parameters = TRUE, 118 | verbose = 0, 119 | n_em_step_max = 10, 120 | initialization_method = 3, 121 | infomap_python = FALSE, 122 | clustering_with_features = FALSE, 123 | check_alpha_update = TRUE, 124 | compute_pi = TRUE, 125 | check_lower_bound = TRUE, 126 | check_block_membership = TRUE, 127 | weight_for_initialization = initial_weight, 128 | seeds = 334 129 | ) 130 | 131 | # Conduct clustering in two steps 132 | first_step_without_feature <- 133 | lighthergm::hergm(g_sim ~ edges + nodematch("x") + nodematch("y") + triangles, 134 | n_clusters = K, 135 | estimate_parameters = TRUE, 136 | verbose = 0, 137 | n_em_step_max = 3, 138 | initialization_method = 3, 139 | infomap_python = FALSE, 140 | clustering_with_features = FALSE, 141 | check_alpha_update = TRUE, 142 | compute_pi = TRUE, 143 | check_lower_bound = TRUE, 144 | check_block_membership = TRUE, 145 | weight_for_initialization = initial_weight, 146 | seeds = 334 147 | ) 148 | 149 | second_step_without_feature <- 150 | lighthergm::hergm( 151 | object = first_step_without_feature, 152 | n_em_step_max = 7 153 | ) 154 | 155 | # Check if the calculated lower bounds are identical (both length and values) 156 | expect_equal(cluster_without_feature$EM_lower_bound, second_step_without_feature$EM_lower_bound) 157 | 158 | # Check if block memberships are identical over iterations (both length and Yule's coefficient) 159 | expect_true(all(unlist(purrr::map2(cluster_without_feature$EM_list_z, second_step_without_feature$EM_list_z, compute_yule_coef) == 160 | rep(1, length(second_step_without_feature$EM_list_z))))) 161 | 162 | # The partition should be the same at the end of the second estimation as the one obtained after running 10 iterations of the EM algorithm 163 | expect_equal(compute_yule_coef(cluster_without_feature$partition, second_step_without_feature$partition), 1) 164 | 165 | # Check if alphas are identical over iterations 166 | for (i in 1:length(cluster_without_feature$EM_list_alpha)) { 167 | expect_equal(cluster_without_feature$EM_list_alpha[[i]], second_step_without_feature$EM_list_alpha[[i]], check.attribute = FALSE, tolerance = 1e-2) 168 | } 169 | # Check if the alpha after the second checkpoint is the same as the alpha when performing estimation with 10 EM iterations without a checkpoint. 170 | expect_equal(cluster_without_feature$alpha, second_step_without_feature$alpha) 171 | 172 | # Check if estimated coefficients with and without a checkpoint are identical. 173 | expect_equal(coef(cluster_without_feature$est_between), coef(second_step_without_feature$est_between), tolerance = 1e-10) 174 | expect_equal(coef(cluster_without_feature$est_within), coef(second_step_without_feature$est_within), tolerance = 1e-10) 175 | }) 176 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-compute_lower_bound.R: -------------------------------------------------------------------------------- 1 | test_that("computing the lower bound works", { 2 | set.seed(12345) 3 | #################### 4 | # Setup 5 | #################### 6 | # Number of nodes 7 | N <- 12 8 | # Number of clusters 9 | K <- 3 10 | 11 | # Create an adjacency matrix 12 | edgelist <- 13 | tibble::tibble( 14 | tail = 1:N, 15 | head = 1:N 16 | ) %>% 17 | tidyr::expand(tail, head) %>% 18 | dplyr::filter(tail < head) %>% 19 | dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>% 20 | dplyr::filter(connect == 1) 21 | 22 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 23 | adj <- network::as.matrix.network.adjacency(net) 24 | adj <- as(adj, "dgCMatrix") 25 | 26 | # Create feature matrices 27 | x <- as.integer(unlist(purrr::rbernoulli(n = N))) 28 | S <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 29 | S <- as(S, "dgCMatrix") 30 | for (i in 1:N) { 31 | for (j in 1:N) { 32 | if (i != j) { 33 | s_ij <- ifelse(x[i] == x[j], 1, 0) 34 | S[i, j] <- s_ij 35 | } 36 | } 37 | } 38 | 39 | y <- as.integer(unlist(purrr::rbernoulli(n = N))) 40 | V <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 41 | V <- as(V, "dgCMatrix") 42 | for (i in 1:N) { 43 | for (j in 1:N) { 44 | if (i != j) { 45 | v_ij <- ifelse(y[i] == y[j], 1, 0) 46 | V[i, j] <- v_ij 47 | } 48 | } 49 | } 50 | 51 | z <- as.integer(unlist(purrr::rbernoulli(n = N))) 52 | W <- Matrix::sparseMatrix(i = {}, j = {}, dims = c(N, N)) 53 | W <- as(W, "dgCMatrix") 54 | for (i in 1:N) { 55 | for (j in 1:N) { 56 | if (i != j) { 57 | w_ij <- ifelse(z[i] == z[j], 1, 0) 58 | W[i, j] <- w_ij 59 | } 60 | } 61 | } 62 | 63 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 64 | tau <- 65 | matrix(c( 66 | 0.2, 0.5, 0.3, 67 | 0.4, 0.4, 0.2, 68 | 0.1, 0.4, 0.5, 69 | 0.4, 0.4, 0.2, 70 | 0.1, 0.1, 0.8, 71 | 0.05, 0.05, 0.9, 72 | 0.8, 0.1, 0.1, 73 | 0.3, 0.4, 0.3, 74 | 0.1, 0.8, 0.1, 75 | 0.5, 0.4, 0.1, 76 | 0.3, 0.3, 0.4, 77 | 0.8, 0.1, 0.1 78 | ), 79 | nrow = K, ncol = N 80 | ) 81 | tau <- t(tau) 82 | 83 | # Compute gamma (parameter of multinomial distribution) 84 | alpha <- colMeans(tau) 85 | 86 | ########################################################### 87 | # Compute the lower bound in a naive way 88 | ########################################################### 89 | # Compute pi for D_ij = 1 90 | minPi <- 1e-4 91 | list_pi <- list() 92 | for (w in 0:1) { 93 | for (v in 0:1) { 94 | for (s in 0:1) { 95 | print(glue::glue("Compute pi for pi_s{s}v{v}w{w}")) 96 | denom <- matrix(0, nrow = K, ncol = K) 97 | num <- matrix(0, nrow = K, ncol = K) 98 | index <- s + 2 * v + 4 * w + 1 99 | print(index) 100 | for (k in 1:K) { 101 | for (l in 1:K) { 102 | for (i in 1:N) { 103 | for (j in 1:N) { 104 | if (i != j & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 105 | denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l] 106 | } 107 | if (i != j & adj[i, j] == 1 & S[i, j] == s & V[i, j] == v & W[i, j] == w) { 108 | num[k, l] <- num[k, l] + tau[i, k] * tau[j, l] 109 | } 110 | } 111 | } 112 | } 113 | } 114 | pi <- num / denom 115 | # Remove extremely small elements in pi 116 | for (k in 1:K) { 117 | for (l in 1:K) { 118 | if (pi[k, l] < minPi) { 119 | pi[k, l] <- minPi 120 | } 121 | } 122 | } 123 | list_pi[[index]] <- pi 124 | } 125 | } 126 | } 127 | 128 | # Compute the true lower bound 129 | LB_true <- 0 130 | # First term 131 | for (i in 1:N) { 132 | for (j in 1:N) { 133 | if (i != j) { 134 | # For each ij, determine which pi must be used. 135 | index_ij <- S[i, j] + 2 * V[i, j] + 4 * W[i, j] + 1 136 | pi_ij <- list_pi[[index_ij]] 137 | # if D_ij = 0, replace pi with 1 - pi. 138 | if (adj[i, j] == 0) { 139 | pi_ij <- 1 - pi_ij 140 | } 141 | for (k in 1:K) { 142 | for (l in 1:K) { 143 | LB_true <- LB_true + tau[i, k] * tau[j, l] * log(pi_ij[k, l]) 144 | } 145 | } 146 | } 147 | } 148 | } 149 | 150 | # Second term 151 | for (i in 1:N) { 152 | for (k in 1:K) { 153 | LB_true <- LB_true + tau[i, k] * (log(alpha[k]) - log(tau[i, k])) 154 | } 155 | } 156 | 157 | ########################################################### 158 | # Compute the lower bound using the cpp function 159 | ########################################################### 160 | list_feature_adjmat <- list(S, V, W) 161 | list_multiplied_feature_adjmat <- get_elementwise_multiplied_matrices(adj, list_feature_adjmat) 162 | denom <- get_matrix_for_denominator(N, list_feature_adjmat) 163 | list_multiplied_feature_adjmat[[1]] <- denom 164 | 165 | alpha_LB <- run_EM_with_features(N, K, alpha, list_multiplied_feature_adjmat, tau, verbose = 2) 166 | 167 | # Check if it works 168 | expect_equal(alpha_LB[[2]], LB_true, tolerance = 1e-10) 169 | }) 170 | 171 | 172 | test_that("computing the lower bound without features works", { 173 | 174 | #################### 175 | # Setup 176 | #################### 177 | # Number of nodes 178 | N <- 12 179 | # Number of clusters 180 | K <- 3 181 | 182 | # Create an adjacency matrix 183 | edgelist <- 184 | tibble::tibble( 185 | tail = 1:N, 186 | head = 1:N 187 | ) %>% 188 | tidyr::expand(tail, head) %>% 189 | dplyr::filter(tail < head) %>% 190 | dplyr::mutate(connect = as.integer(unlist(purrr::rbernoulli(n = nrow(.), p = 0.5)))) %>% 191 | dplyr::filter(connect == 1) 192 | 193 | net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 194 | adj <- network::as.matrix.network.adjacency(net) 195 | adj <- as(adj, "dgCMatrix") 196 | 197 | # Create a N x K matrix whose (i, k) element represents the probability that node i belongs to block k. 198 | tau <- 199 | matrix(c( 200 | 0.2, 0.5, 0.3, 201 | 0.4, 0.4, 0.2, 202 | 0.1, 0.4, 0.5, 203 | 0.4, 0.4, 0.2, 204 | 0.1, 0.1, 0.8, 205 | 0.05, 0.05, 0.9, 206 | 0.8, 0.1, 0.1, 207 | 0.3, 0.4, 0.3, 208 | 0.1, 0.8, 0.1, 209 | 0.5, 0.4, 0.1, 210 | 0.3, 0.3, 0.4, 211 | 0.8, 0.1, 0.1 212 | ), 213 | nrow = K, ncol = N 214 | ) 215 | tau <- t(tau) 216 | 217 | # Compute gamma (parameter of multinomial distribution) 218 | alpha <- colMeans(tau) 219 | 220 | ########################################################### 221 | # Compute the lower bound in a naive way 222 | ########################################################### 223 | # Compute pi for D_ij = 1 224 | minPi <- 1e-4 225 | denom <- matrix(0, nrow = K, ncol = K) 226 | num <- matrix(0, nrow = K, ncol = K) 227 | for (k in 1:K) { 228 | for (l in 1:K) { 229 | for (i in 1:N) { 230 | for (j in 1:N) { 231 | if (i != j) { 232 | denom[k, l] <- denom[k, l] + tau[i, k] * tau[j, l] 233 | if (i != j & adj[i, j] == 1) { 234 | num[k, l] <- num[k, l] + tau[i, k] * tau[j, l] 235 | } 236 | } 237 | } 238 | } 239 | } 240 | } 241 | pi <- num / denom 242 | 243 | # Remove extremely small elements in pi 244 | for (k in 1:K) { 245 | for (l in 1:K) { 246 | if (pi[k, l] < minPi) { 247 | pi[k, l] <- minPi 248 | } 249 | } 250 | } 251 | 252 | 253 | # Compute the true lower bound 254 | LB_true <- 0 255 | # First term 256 | for (i in 1:N) { 257 | for (j in 1:N) { 258 | if (i != j) { 259 | # if D_ij = 0, replace pi with 1 - pi. 260 | if (adj[i, j] == 0) { 261 | pi_ij <- 1 - pi 262 | } else { 263 | pi_ij <- pi 264 | } 265 | for (k in 1:K) { 266 | for (l in 1:K) { 267 | LB_true <- LB_true + tau[i, k] * tau[j, l] * log(pi_ij[k, l]) 268 | } 269 | } 270 | } 271 | } 272 | } 273 | 274 | # Second term 275 | for (i in 1:N) { 276 | for (k in 1:K) { 277 | LB_true <- LB_true + tau[i, k] * (log(alpha[k]) - log(tau[i, k])) 278 | } 279 | } 280 | 281 | ########################################################### 282 | # Compute the lower bound using the cpp function 283 | ########################################################### 284 | alpha_LB <- run_EM_without_features(N, K, alpha, tau, adj) 285 | # Check if it works 286 | expect_equal(alpha_LB[[2]], LB_true, tolerance = 1e-10) 287 | }) 288 | -------------------------------------------------------------------------------- /lighthergm/R/estimate_within_params.R: -------------------------------------------------------------------------------- 1 | #' Estimate a within-block network model. 2 | #' @importFrom parallel mclapply 3 | #' @importFrom magrittr %>% 4 | #' @importFrom ergm ergm 5 | #' @importFrom foreach foreach %do% %dopar% 6 | #' @param formula a within network formula 7 | #' @param network a network object 8 | #' @param z_memb block memberships for each node 9 | #' @param number_cores The number of CPU cores to use. 10 | #' @param verbose A logical or an integer: if this is TRUE/1, 11 | #' the program will print out additional information about the progress of estimation and simulation. 12 | #' @param seeds seed value (integer) for the random number generator 13 | #' @param method_second_step If "MPLE" (the default), then the maximum pseudolikelihood estimator is returned. 14 | #' If "MLE", then an approximate maximum likelihood estimator is returned. 15 | #' @param offset_coef a vector of model parameters to be fixed when estimation.(i.e., not estimated). 16 | #' @param ... Additional arguments, to be passed to lower-level functions 17 | #' @importFrom rlang %||% 18 | #' @export 19 | estimate_within_params <- 20 | function(formula, 21 | network, 22 | z_memb, 23 | number_cores = 1, 24 | verbose = 1, 25 | seeds = NULL, 26 | method_second_step = c("MPLE", "MLE"), 27 | offset_coef = NULL, 28 | ...) { 29 | 30 | varargs <- list(...) 31 | # Store block structure in a tibble 32 | block_structure <- 33 | tibble::tibble( 34 | node_id = network::network.vertex.names(network), 35 | block = z_memb 36 | ) 37 | 38 | # Get number of clusters 39 | all_clusters <- names(table(factor(block_structure$block))) 40 | 41 | # Get an edgelist and vertex attributes from the network object using intergraph::asDF 42 | list_edgelist <- intergraph::asDF(network) 43 | 44 | # Get vertex attributes. 45 | vertex_attr <- list_edgelist$vertexes %>% 46 | dplyr::select(-intergraph_id) 47 | 48 | # Extract vertex names from the network 49 | vertex_name <- list_edgelist$vertexes %>% 50 | dplyr::select(intergraph_id, vertex.names) 51 | 52 | # Construct an edgelist whose source_id and target_ids correspond to the vertex names of the network. 53 | # If you use network::as.edgelist instead, the original vertex names are not kept. That causes a problem when you convert an edgelist into a network object. 54 | edgelist <- 55 | list_edgelist$edges %>% 56 | dplyr::select(V1, V2) %>% 57 | dplyr::left_join(., vertex_name, by = c("V1" = "intergraph_id")) %>% 58 | dplyr::rename(source_id = vertex.names) %>% 59 | dplyr::select(V2, source_id) %>% 60 | dplyr::left_join(., vertex_name, by = c("V2" = "intergraph_id")) %>% 61 | dplyr::rename(target_id = vertex.names) %>% 62 | dplyr::select(source_id, target_id) %>% 63 | tibble::tibble() 64 | 65 | # Get a sparse adjacency matrix for each block. Store them in a list. 66 | # This computation might be unstable when the network is large. Check this point later. 67 | # For Windows users 68 | if (Sys.info()[["sysname"]] == "Windows") { 69 | # Preparation for parallel computing using foreach 70 | cluster <- parallel::makeCluster(number_cores, type = "PSOCK") 71 | doParallel::registerDoParallel(cluster) 72 | # Start computation 73 | block_net <- foreach(k = all_clusters) %dopar% { 74 | # Get a subgraph whose vertices belong to block k. 75 | subnet <- get_induced_subgraph(block_structure, edgelist, k) 76 | # Keep vertex ids 77 | vertex_id <- network::network.vertex.names(subnet) 78 | # Convert the subgraph into a sparse adjacency matrix. 79 | sub_net <- as_sparse_adj(subnet) 80 | # Make a block attribute. 81 | # The length of this list must be the same with the number of vertices of the subgraph. 82 | block_attr <- rep(k, length(which(z_memb == k))) 83 | # Return the objects as a list. 84 | return(list(net = sub_net, id = vertex_id, block = block_attr)) 85 | } 86 | parallel::stopCluster(cluster) 87 | } 88 | # For non-Windows users 89 | else { 90 | block_net <- mclapply(all_clusters, function(k) { 91 | # Get a subgraph whose vertices belong to block k. 92 | subnet <- get_induced_subgraph(block_structure, edgelist, k) 93 | # Keep vertex ids 94 | vertex_id <- network::network.vertex.names(subnet) 95 | # Convert the subgraph into a sparse adjacency matrix. 96 | sub_net <- as_sparse_adj(subnet) 97 | # Make a block attribute. 98 | # The length of this list must be the same with the number of vertices of the subgraph. 99 | block_attr <- rep(k, length(which(z_memb == k))) 100 | # Return the objects as a list. 101 | list(net = sub_net, id = vertex_id, block = block_attr) 102 | }, 103 | mc.cores = number_cores 104 | ) 105 | } 106 | 107 | # Extract info on vertex id. 108 | vertex_id <- unlist(c(block_net %>% 109 | purrr::map(function(x) { 110 | x$id 111 | }))) 112 | 113 | # Extract info on which node belongs to which block. 114 | block_attr <- unlist(c(block_net %>% 115 | purrr::map(function(x) { 116 | x$block 117 | }))) 118 | 119 | # Create a sparse adjacency matrix that only considers within-block connections. 120 | block_net <- Matrix::bdiag(block_net %>% 121 | purrr::map(function(x) { 122 | x$net 123 | })) 124 | 125 | # Convert the sparse adjacency matrix into a network object, called "block_net". 126 | edgelist <- 127 | Matrix::summary(block_net) %>% 128 | dplyr::select(i, j) %>% 129 | as.matrix() 130 | 131 | attr(edgelist, "n") <- length(vertex_id) 132 | attr(edgelist, "vnames") <- vertex_id 133 | attr(edgelist, "directed") <- FALSE 134 | attr(edgelist, "bipartite") <- FALSE 135 | attr(edgelist, "loops") <- FALSE 136 | attr(edgelist, "class") <- c("matrix", "edgelist") 137 | 138 | block_net <- network::network(edgelist, matrix.type = "edgelist", directed = FALSE) 139 | 140 | # Attach vertex id 141 | network::network.vertex.names(block_net) <- vertex_id 142 | 143 | # Attach vertex attributes 144 | df_vertex_attr <- 145 | tibble::tibble(vertex.names = vertex_id) %>% 146 | dplyr::left_join(., vertex_attr, by = "vertex.names") 147 | 148 | # Remove non-vertex-attribute columns 149 | df_vertex_attr <- 150 | df_vertex_attr %>% 151 | dplyr::select(-vertex.names) 152 | 153 | ## This part could be written in a much better way without foreach? 154 | df_vertex_attr_colnames <- colnames(df_vertex_attr) 155 | foreach(i = 1:ncol(df_vertex_attr)) %do% { 156 | network::set.vertex.attribute(x = block_net, attrname = df_vertex_attr_colnames[i], value = df_vertex_attr[[i]]) 157 | } 158 | 159 | # Attach block attributes 160 | network::set.vertex.attribute(block_net, "block", block_attr) 161 | 162 | # Re-arrange the formula in such a way that the LHS is block_net. 163 | formula_terms <- as.character(formula)[3] 164 | 165 | # If within-block parameters are fixed, need to include `offset` in the formula 166 | if (!is.null(offset_coef)) { 167 | second_step_rhs <- as.character(formula)[3] 168 | second_step_rhs <- unlist(stringr::str_split(string = second_step_rhs, pattern = " \\+ ")) 169 | # Extract offset terms and wrap them by `offset()` 170 | offset_terms <- second_step_rhs[stringr::str_detect(string = second_step_rhs, pattern = "\"")] 171 | offset_terms <- glue::glue("offset({offset_terms})") 172 | # Extract within-block parameters to be estimated 173 | within_terms <- second_step_rhs[!stringr::str_detect(string = second_step_rhs, pattern = "\"")] 174 | # Combine all 175 | formula_terms <- stringr::str_c(c(within_terms, offset_terms), collapse = " + ") 176 | } 177 | 178 | formula <- as.formula(glue::glue("block_net ~ {formula_terms}")) 179 | 180 | # Estimate the within-block parameters 181 | # The default estimation method is "MPLE", but you can select "MLE" if you like. 182 | 183 | # %||% extracts the value on the left with a default value if null 184 | control <- varargs$control %||% ergm::control.ergm() 185 | 186 | model_est <- ergm( 187 | formula = formula, 188 | constraints = ~ blockdiag("block"), 189 | estimate = method_second_step, 190 | offset.coef = offset_coef, 191 | control = control 192 | ) 193 | 194 | # Remove unnecessary network objects 195 | model_est$newnetwork <- NULL 196 | 197 | return(model_est) 198 | } 199 | 200 | # ------------------------------------------------------------------------ 201 | # -------------- Auxiliary functions ------------------------------------- 202 | # ------------------------------------------------------------------------ 203 | 204 | #' Get a sparse adjacency matrix from a network object 205 | #' @param net a network object 206 | as_sparse_adj <- function(net) { 207 | n_nodes <- as.integer(net$gal$n) 208 | net <- network::as.edgelist(net) 209 | net <- Matrix::sparseMatrix( 210 | i = net[, 1], 211 | j = net[, 2], 212 | x = 1, 213 | dims = c(n_nodes, n_nodes), 214 | symmetric = TRUE 215 | ) 216 | return(net) 217 | } 218 | 219 | 220 | get_induced_subgraph <- function(block_structure, edgelist, searched_block) { 221 | # Get the relevant nodes for the searched block 222 | block_nodes <- 223 | block_structure %>% 224 | dplyr::filter(block == searched_block) %>% 225 | .$node_id 226 | 227 | # Get the number of nodes in the block 228 | n_nodes_in_block <- length(block_nodes) 229 | 230 | # Get only the edges where nodes on both sides share the searched block membership 231 | subgraph <- 232 | edgelist %>% 233 | dplyr::filter(source_id %in% block_nodes & target_id %in% block_nodes) 234 | 235 | # Keep only the ID attributes 236 | subgraph <- 237 | subgraph %>% 238 | dplyr::select(source_id, target_id) %>% 239 | as.matrix() 240 | 241 | attr(subgraph, "n") <- n_nodes_in_block 242 | attr(subgraph, "vnames") <- block_nodes 243 | attr(subgraph, "directed") <- FALSE 244 | attr(subgraph, "bipartite") <- FALSE 245 | attr(subgraph, "loops") <- FALSE 246 | attr(subgraph, "class") <- c("matrix", "edgelist") 247 | 248 | if (nrow(subgraph) == 0) { 249 | subgraph <- network::network.initialize(n = n_nodes_in_block, directed = FALSE) 250 | network::network.vertex.names(subgraph) <- block_nodes 251 | return(subgraph) 252 | } else { 253 | # Return the subnet 254 | subgraph <- 255 | network::network(subgraph, directed = FALSE, matrix.type = "edgelist") 256 | 257 | return(subgraph) 258 | } 259 | } 260 | -------------------------------------------------------------------------------- /lighthergm/tests/testthat/test-gof.R: -------------------------------------------------------------------------------- 1 | get_dummy_net <- function(n_nodes, n_clusters, em_iters = 10) { 2 | hergm_formula <- g ~ edges + triangle + nodematch("x") 3 | 4 | nodes_data <- tibble::tibble( 5 | node_id = 1:n_nodes, 6 | x = sample(1:2, size = n_nodes, replace = T), 7 | block = sample(1:n_clusters, size = n_nodes, replace = T) 8 | ) 9 | 10 | g <- network::network.initialize(n = n_nodes) 11 | network::set.vertex.attribute(g, "x", nodes_data$x) 12 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(g, hergm_formula) 13 | 14 | coef_between_block <- c(-3, 1) 15 | coef_within_block <- c(-2, 0.1, 0.5) 16 | 17 | sim_ergm_control <- ergm::control.simulate.formula( 18 | MCMC.burnin = 4000000, 19 | MCMC.interval = 200000 20 | ) 21 | 22 | g <- lighthergm::simulate_hergm( 23 | formula_for_simulation = hergm_formula, 24 | data_for_simulation = nodes_data, 25 | colname_vertex_id = "node_id", 26 | colname_block_membership = "block", 27 | coef_between_block = coef_between_block, 28 | coef_within_block = coef_within_block, 29 | ergm_control = sim_ergm_control, 30 | fast_between_simulation = TRUE, 31 | list_feature_matrices = list_feature_matrices 32 | ) 33 | 34 | hergm_res <- lighthergm::hergm( 35 | g ~ edges + nodematch("x") + triangle, 36 | n_clusters = n_clusters, 37 | n_em_step_max = em_iters, 38 | estimate_parameters = T, 39 | clustering_with_features = T 40 | ) 41 | 42 | list( 43 | hergm_res = hergm_res, 44 | g = g, 45 | nodes_data = nodes_data, 46 | K = n_clusters, 47 | list_feature_matrices = list_feature_matrices, 48 | vertex_id_var = "node_id", 49 | block_id_var = "block", 50 | ergm_control = sim_ergm_control 51 | ) 52 | } 53 | 54 | 55 | test_that("Returned GOF dataframe has the correct fields", { 56 | sim <- get_dummy_net(50, 2) 57 | g <- sim$g 58 | 59 | test_gof_res <- lighthergm::gof_lighthergm( 60 | g, 61 | list_feature_matrices = sim$list_feature_matrices, 62 | data_for_simulation = sim$nodes_data, 63 | colname_vertex_id = sim$vertex_id_var, 64 | colname_block_membership = sim$block_id_var, 65 | lighthergm_results = sim$hergm_res, 66 | ergm_control = sim$ergm_control, 67 | n_sim = 3 68 | ) 69 | 70 | for (stat_type in c("original", "simulated")) { 71 | stats <- test_gof_res[[stat_type]] 72 | expect_false(is.null(stats)) 73 | for (stat in c("network_stats", "degree_dist", "esp_dist")) { 74 | expect_false(is.null(stats[[stat]])) 75 | } 76 | expect_true(is.null(stats[["geodesic_dist"]])) 77 | } 78 | }) 79 | 80 | test_that("GOF network stats have the right fields and terms", { 81 | sim <- get_dummy_net(50, 2) 82 | g <- sim$g 83 | 84 | test_gof_res <- lighthergm::gof_lighthergm( 85 | g, 86 | list_feature_matrices = sim$list_feature_matrices, 87 | data_for_simulation = sim$nodes_data, 88 | colname_vertex_id = sim$vertex_id_var, 89 | colname_block_membership = sim$block_id_var, 90 | lighthergm_results = sim$hergm_res, 91 | ergm_control = sim$ergm_control, 92 | n_sim = 3 93 | ) 94 | 95 | expected_terms <- ergm::ergm_model(sim$hergm_res$est_within$formula)$terms %>% 96 | purrr::map(function(t) { 97 | `$`(t, name) 98 | }) 99 | unlist 100 | 101 | for (stat_type in c("original", "simulated")) { 102 | stat_type_df <- test_gof_res[[stat_type]] 103 | actual_terms <- colnames(stat_type_df$network_stats) 104 | 105 | actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>% 106 | setdiff(c("value", "stat")) %>% 107 | length() %>% 108 | expect_equal(0) 109 | 110 | stat_type_df$network_stats$stat %>% 111 | unique() %>% 112 | stringr::str_replace("[.].*", "") %>% 113 | setdiff(expected_terms) %>% 114 | length() %>% 115 | expect_equal(0) 116 | } 117 | }) 118 | 119 | test_that("GOF degree stats have the right fields and terms", { 120 | sim <- get_dummy_net(50, 2) 121 | g <- sim$g 122 | 123 | test_gof_res <- lighthergm::gof_lighthergm( 124 | g, 125 | list_feature_matrices = sim$list_feature_matrices, 126 | data_for_simulation = sim$nodes_data, 127 | colname_vertex_id = sim$vertex_id_var, 128 | colname_block_membership = sim$block_id_var, 129 | lighthergm_results = sim$hergm_res, 130 | ergm_control = sim$ergm_control, 131 | n_sim = 3 132 | ) 133 | 134 | for (stat_type in c("original", "simulated")) { 135 | stat_type_df <- test_gof_res[[stat_type]] 136 | actual_terms <- colnames(stat_type_df$degree_dist) 137 | 138 | actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>% 139 | setdiff(c("degree", "share")) %>% 140 | length() %>% 141 | expect_equal(0) 142 | 143 | expect_lte(max(stat_type_df$degree_dist$degree), g$gal$n) 144 | expect( 145 | min(stat_type_df$degree_dist$share) >= 0 && max(stat_type_df$degree_dist$share) <= 1, 146 | failure_message = "Some degree shares are out of bounds" 147 | ) 148 | } 149 | }) 150 | 151 | test_that("GOF esp stats have the right fields and terms", { 152 | sim <- get_dummy_net(50, 2) 153 | g <- sim$g 154 | 155 | test_gof_res <- lighthergm::gof_lighthergm( 156 | g, 157 | list_feature_matrices = sim$list_feature_matrices, 158 | data_for_simulation = sim$nodes_data, 159 | colname_vertex_id = sim$vertex_id_var, 160 | colname_block_membership = sim$block_id_var, 161 | lighthergm_results = sim$hergm_res, 162 | ergm_control = sim$ergm_control, 163 | n_sim = 3 164 | ) 165 | 166 | for (stat_type in c("original", "simulated")) { 167 | stat_type_df <- test_gof_res[[stat_type]] 168 | actual_terms <- colnames(stat_type_df$esp_dist) 169 | actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>% 170 | setdiff(c("label", "esp")) %>% 171 | length() %>% 172 | expect_equal(0) 173 | 174 | expect_lte(max(stat_type_df$esp_dist$label), min(g$gal$n, 10)) 175 | expect( 176 | min(stat_type_df$esp_dist$esp) >= 0 && max(stat_type_df$esp_dist$esp) <= (g$gal$n^2), 177 | failure_message = "Some esp counts are out of bounds." 178 | ) 179 | } 180 | }) 181 | 182 | test_that("GOF geodesic distance is returned when requested", { 183 | sim <- get_dummy_net(50, 2) 184 | g <- sim$g 185 | 186 | test_gof_res <- lighthergm::gof_lighthergm( 187 | g, 188 | list_feature_matrices = sim$list_feature_matrices, 189 | data_for_simulation = sim$nodes_data, 190 | colname_vertex_id = sim$vertex_id_var, 191 | colname_block_membership = sim$block_id_var, 192 | lighthergm_results = sim$hergm_res, 193 | ergm_control = sim$ergm_control, 194 | n_sim = 3, 195 | compute_geodesic_distance = TRUE 196 | ) 197 | 198 | for (stat_type in c("original", "simulated")) { 199 | stat_type_df <- test_gof_res[[stat_type]] 200 | actual_terms <- colnames(stat_type_df$geodesic_dist) 201 | 202 | actual_terms[stringr::str_detect(actual_terms, "n_sim", negate = TRUE)] %>% 203 | setdiff(c("dist", "pairs")) %>% 204 | length() %>% 205 | expect_equal(0) 206 | 207 | # Some of the distances will be Inf, and that's ok (that's how ergm returns them). 208 | non_inf <- stat_type_df$geodesic_dist$dist[!is.infinite(stat_type_df$geodesic_dist$dist)] 209 | expect_lte(max(non_inf), g$gal$n) 210 | expect( 211 | (min(stat_type_df$geodesic_dist$pairs) >= 0) && (max(stat_type_df$geodesic_dist$pairs) <= (g$gal$n^2)), 212 | failure_message = "Some geodesic distance pairs are out of bounds." 213 | ) 214 | } 215 | }) 216 | 217 | test_that("Return GOF statistics including only within-block connections", { 218 | sim <- get_dummy_net(50, 2) 219 | g <- sim$g 220 | 221 | test_gof_res <- lighthergm::gof_lighthergm( 222 | g, 223 | list_feature_matrices = sim$list_feature_matrices, 224 | data_for_simulation = sim$nodes_data, 225 | colname_vertex_id = sim$vertex_id_var, 226 | colname_block_membership = sim$block_id_var, 227 | lighthergm_results = sim$hergm_res, 228 | type = 'within', 229 | ergm_control = sim$ergm_control, 230 | n_sim = 3 231 | ) 232 | 233 | # check that the network stats belong to the within-block sub network only 234 | edgelist <- network::as.edgelist(g) %>% as.data.frame 235 | colnames(edgelist) <- c('src', 'dst') 236 | nodes_with_blocks <- data.frame(id = 1:length(network::network.vertex.names(g)), block=network::get.vertex.attribute(g, 'block')) 237 | actual_within_conns <- edgelist %>% 238 | dplyr::left_join(nodes_with_blocks, by = c('src' = 'id')) %>% 239 | dplyr::left_join(nodes_with_blocks, by = c('dst' = 'id'), suffix=c('.src', '.dst')) %>% 240 | dplyr::filter(block.src == block.dst) %>% 241 | nrow 242 | 243 | within_conns_from_gof <- (test_gof_res$original$network_stats %>% dplyr::filter(stat == 'edges'))[, 2] 244 | 245 | expect_equal(within_conns_from_gof, actual_within_conns) 246 | 247 | for (stat_type in c("original", "simulated")) { 248 | stats <- test_gof_res[[stat_type]] 249 | expect_false(is.null(stats)) 250 | for (stat in c("network_stats", "degree_dist", "esp_dist")) { 251 | expect_false(is.null(stats[[stat]])) 252 | } 253 | expect_true(is.null(stats[["geodesic_dist"]])) 254 | } 255 | }) 256 | 257 | test_that("Within-connections GOF can be started from the observed network", { 258 | sim <- get_dummy_net(100, 4) 259 | g <- sim$g 260 | 261 | ergm_control <- ergm::control.simulate.formula( 262 | MCMC.burnin = 0, 263 | MCMC.interval = 1 264 | ) 265 | 266 | test_gof_res <- lighthergm::gof_lighthergm( 267 | g, 268 | list_feature_matrices = sim$list_feature_matrices, 269 | data_for_simulation = sim$nodes_data, 270 | colname_vertex_id = sim$vertex_id_var, 271 | colname_block_membership = sim$block_id_var, 272 | lighthergm_results = sim$hergm_res, 273 | type = 'within', 274 | ergm_control = ergm_control, 275 | n_sim = 2, 276 | start_from_observed = TRUE 277 | ) 278 | 279 | first_simulation_stats <-test_gof_res$simulated$network_stats %>% 280 | dplyr::filter(n_sim == 1) %>% 281 | dplyr::select(-n_sim) 282 | 283 | original_network_stats <- test_gof_res$original$network_stats 284 | expect_equal(original_network_stats, first_simulation_stats) 285 | }) 286 | 287 | test_that("Full GOF can be started from the observed network", { 288 | sim <- get_dummy_net(100, 4) 289 | g <- sim$g 290 | 291 | ergm_control <- ergm::control.simulate.formula( 292 | MCMC.burnin = 0, 293 | MCMC.interval = 1 294 | ) 295 | 296 | test_gof_res <- lighthergm::gof_lighthergm( 297 | g, 298 | list_feature_matrices = sim$list_feature_matrices, 299 | data_for_simulation = sim$nodes_data, 300 | colname_vertex_id = sim$vertex_id_var, 301 | colname_block_membership = sim$block_id_var, 302 | lighthergm_results = sim$hergm_res, 303 | type = 'full', 304 | ergm_control = ergm_control, 305 | n_sim = 2, 306 | start_from_observed = TRUE 307 | ) 308 | 309 | first_simulation_stats <-test_gof_res$simulated$network_stats %>% 310 | dplyr::filter(n_sim == 1) 311 | 312 | # If it starts from the observed network, the stats should not be zero 313 | expect_true(all(first_simulation_stats['value'] > 0)) 314 | }) 315 | -------------------------------------------------------------------------------- /lighthergm/vignettes/intro-lighthergm.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "A Light Introduction to `lighthergm`" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{intro-lighthergm} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | 9 | references: 10 | - id: schweinberger2015 11 | title: "Local Dependence in Random Graph Models: Characterization, Properties and Statistical Inference" 12 | author: 13 | - family: Schweinberger 14 | given: Michael 15 | - family: Handcock 16 | given: Mark S 17 | container-title: Journal of the Royal statistical Society B 18 | volume: 77 19 | issue: 3 20 | page: 647-676 21 | type: article-journal 22 | issued: 23 | year: 2015 24 | 25 | - id: schweinberger2018 26 | title: "hergm: Hierarchical Exponential-Family Random Graph Models" 27 | author: 28 | - family: Schweinberger 29 | given: Michael 30 | - family: Luna 31 | given: Pamela 32 | container-title: Journal of Statistical Software 33 | volume: 85 34 | issue: 1 35 | page: 1-39 36 | type: article-journal 37 | issued: 38 | year: 2018 39 | 40 | - id: martinezdahbura2021 41 | title: "A Structural Model of Business Card Exchange Networks" 42 | author: 43 | - family: Martínez Dahbura 44 | given: Juan Nelson 45 | - family: Komatsu 46 | given: Shota 47 | - family: Nishida 48 | given: Takanori 49 | - family: Mele 50 | given: Angelo 51 | type: article-journal 52 | page: Working Paper. Available at https://arxiv.org/abs/2105.12704 53 | issued: 54 | year: 2021 55 | --- 56 | 57 | ```{r, include = FALSE} 58 | options(rmarkdown.html_vignette.check_title = FALSE) 59 | knitr::opts_chunk$set( 60 | collapse = TRUE, 61 | comment = "#>" 62 | ) 63 | ``` 64 | 65 | 66 | This vignette provides a brief introduction on how to use the R package `lighthergm`, which estimates Hierarchical Exponential-Family Random Graph Models [HERGMs, @schweinberger2015]. 67 | `lighthergm` is built upon the R package `hergm` [@schweinberger2018] and applies scalable algorithms and computational techniques. 68 | See @martinezdahbura2021 for further information. 69 | 70 | # A simple example 71 | 72 | ```{r setup} 73 | library(lighthergm) 74 | ``` 75 | 76 | `lighthergm` has a toy network to test-drive the package with 77 | Let's load the network data and plot it. 78 | 79 | ```{r, message=FALSE} 80 | # Load an embedded network object. 81 | data(toyNet) 82 | # Draw the network. 83 | library(ggplot2) 84 | library(magrittr) 85 | library(GGally) 86 | g <- ggnet2(toyNet, 87 | size = 1, 88 | color = rep(c("tomato", "steelblue", "darkgreen", "black"), 89 | each = toyNet$gal$n/4), 90 | mode = "kamadakawai") 91 | plot(g) 92 | ``` 93 | 94 | As you can see, this network has a clear cluster or community structure. 95 | Although this is a fake network, we often observe such community structures in real social networks. 96 | Exploiting this stylized fact, we model the way agents in a network get connected differently for connections across and within communities: 97 | 98 | - Connections across communities happen by luck, influenced by homophily 99 | - Connections within communities also consider interdependencies among links. 100 | For example, the probability that agent $i$ and $j$ gets connected may be influenced by a friend in common $k$. 101 | 102 | To estimate the latent community structure of a network and agents' preferences for connection, `lighthergm` implements the following two-step procedure. 103 | 104 | 1. Recover the community structure by applying a scalable minorization-maximization algorithm. 105 | 1. Given the estimated community structure in Step 1, estimate agents' between- and within-community payoffs by maximum pseudo-likelihood estimation. 106 | 107 | Seeing is believing. Let's perform an estimation using the toy network. (If you would like to see the progress, set `verbose = 1`.) 108 | 109 | ```{r, message=FALSE} 110 | model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle 111 | 112 | hergm_res <- 113 | lighthergm::hergm( 114 | # The model you would like to estiamte 115 | object = model_formula, 116 | # The number of blocks 117 | n_clusters = 4, 118 | # The maximum number of EM algorithm steps 119 | n_em_step_max = 100, 120 | # Perform parameter estimation after the block recovery step 121 | estimate_parameters = TRUE, 122 | # Indicate that clustering must take into account nodematch on characteristics 123 | clustering_with_features = TRUE, 124 | # Keep track of block memberships at each EM iteration 125 | check_block_membership = TRUE 126 | ) 127 | ``` 128 | 129 | To see whether the first step (recovering the latent community structure) has converged, we can plot the estimated lower bound of the objective function over iterations. 130 | 131 | ```{r} 132 | g_LB <- 133 | ggplot(data = data.frame(iterations = 1:length(hergm_res$EM_lower_bound), 134 | lower_bound = hergm_res$EM_lower_bound), 135 | aes(x = iterations, y = lower_bound)) + 136 | geom_line() 137 | 138 | plot(g_LB) 139 | ``` 140 | 141 | This indicates that the clustering step converged at the early stage. 142 | Note that the number of iterations that you need to perform (`n_em_step_max`) varies depending on the size of a network, whether it has a clear community structure, etc.. 143 | You need trial and error on how many iterations are at least necessary in your case. 144 | Plotting the lower bound may help check the convergence of the clustering step. 145 | 146 | You can check the clustering result 147 | 148 | ```{r} 149 | # Number of nodes per recovered block: 150 | table(hergm_res$partition) 151 | ``` 152 | 153 | and estimated parameters. 154 | 155 | ```{r} 156 | # For the between networks 157 | summary(hergm_res$est_between) 158 | ``` 159 | 160 | ```{r} 161 | # For the within networks 162 | summary(hergm_res$est_within) 163 | ``` 164 | 165 | Currently, the only supported way to include covariates in the model is via `nodematch()`. 166 | 167 | You can also employ caching to avoid repeating heavy operations that yield the same results for your network. To use it, pass the `cache` parameter to `lighthergm::hergm`, setting its value to a [cachem](https://github.com/r-lib/cachem "cachem repository") object. A disk cache lets you speed up estimations on the same network data even across R Sessions. 168 | 169 | # Goodness-of-fit 170 | 171 | You can evaluate the goodness-of-fit of the model with the `lighthergm::gof_lighthergm()` function: 172 | 173 | ```{r, message=FALSE} 174 | # Prepare a data frame that contains nodal id and covariates. 175 | nodes_data <- 176 | tibble::tibble( 177 | node_id = network::network.vertex.names(toyNet), 178 | block = hergm_res$partition, 179 | x = network::get.vertex.attribute(toyNet, "x"), 180 | y = network::get.vertex.attribute(toyNet, "y") 181 | ) 182 | 183 | # The feature adjacency matrices 184 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula) 185 | 186 | # The MCMC settings 187 | sim_ergm_control <- ergm::control.simulate.formula( 188 | MCMC.burnin = 1000000, 189 | MCMC.interval = 100000 190 | ) 191 | 192 | # The feature adjacency matrices 193 | list_feature_matrices <- lighthergm::get_list_sparse_feature_adjmat(toyNet, model_formula) 194 | 195 | gof_res <- lighthergm::gof_lighthergm( 196 | toyNet, 197 | # The feature adjacency matrices 198 | list_feature_matrices = list_feature_matrices, 199 | # A dataframe containing the nodes data. 200 | data_for_simulation = nodes_data, 201 | # The name of the nodes_data column containing the node IDs 202 | # which are used within the network g 203 | colname_vertex_id = 'node_id', 204 | # The name of the nodes_data column containing the block ID. 205 | colname_block_membership = 'block', 206 | # The object returned by lighthergm::hergm() 207 | lighthergm_results = hergm_res, 208 | # The MCMC settings 209 | ergm_control = sim_ergm_control, 210 | # The number of simulations to use 211 | n_sim = 100 212 | ) 213 | ``` 214 | 215 | Currently, gof is evaluated on the following metrics: 216 | 217 | 1. the network statistics (the counts you obtain when you use summary on an ergm formula, such as the number of edges, triangles, nodematches, etc.), 218 | 219 | 1. degree distribution 220 | 221 | 1. geodesic distance, and 222 | 223 | 1. edgewise shared partners. 224 | 225 | `lighthergm::gof_lighthergm()` returns a list of data frames for these matrices instead of creating plots as `ergm::gof()` does. 226 | This allows you to flexibly create gof plots that match your needs. 227 | 228 | Below is a example gof plot on degree distribution. 229 | 230 | ```{r, message=FALSE, warning=FALSE} 231 | degree_gof <- 232 | gof_res$simulated$degree_dist %>% 233 | dplyr::group_by(degree) %>% 234 | dplyr::summarise(log_mean_share = mean(log(share)), 235 | log_sd_share = sd(log(share))) %>% 236 | dplyr::ungroup() 237 | 238 | plot_degree <- 239 | ggplot(data = degree_gof %>% 240 | dplyr::filter(degree < 20 & degree >= 6), 241 | aes(x = degree, y = log_mean_share)) + 242 | geom_line(aes(y = log_mean_share + 1.96 * log_sd_share), 243 | colour = "grey50", 244 | linetype = "dotted") + 245 | geom_line(aes(y = log_mean_share - 1.96 * log_sd_share), 246 | colour = "grey50", 247 | linetype = "dotted") + 248 | geom_line(colour = "grey50", 249 | linetype = "dashed") + 250 | geom_line(data = gof_res$original$degree_dist %>% 251 | dplyr::filter(share > 0 & degree < 22), 252 | aes(y = log(share))) + 253 | ylab("log prop. of nodes") 254 | 255 | plot(plot_degree) 256 | ``` 257 | 258 | # Simulation 259 | 260 | You can simulate networks with local dependence using the `lighthergm::simulate_hergm()`. 261 | 262 | ```{r, message=FALSE, echo=TRUE} 263 | # Estimated coefficients for the between-community connections 264 | coef_between_block <- coef(hergm_res$est_between) 265 | 266 | # Estimated coefficients for the within-community connections 267 | coef_within_block <- coef(hergm_res$est_within) 268 | 269 | sim_net <- lighthergm::simulate_hergm( 270 | # Formula for between-blocks 271 | formula_for_simulation = model_formula, 272 | # Same as for gof, a dataframe containing nodes attributes 273 | data_for_simulation = nodes_data, 274 | # Name of the column containing node IDs 275 | colname_vertex_id = "node_id", 276 | # Name of the column containing block IDs 277 | colname_block_membership = "block", 278 | # The coefficients for the between connections 279 | coef_between_block = coef_between_block, 280 | # The coefficients for the within connections 281 | coef_within_block = coef_within_block, 282 | # The MCMC settings 283 | ergm_control = sim_ergm_control, 284 | # Number of simulations to return 285 | n_sim = 1, 286 | # If `stats` a list with network statistics 287 | # for the between and within connections is returned 288 | output = "network", 289 | # Simulates between connections by drawing from a logistic distribution. 290 | # If FALSE, draws between connections by MCMC. 291 | use_fast_between_simulation = TRUE, 292 | # The feature adjacency matrices 293 | list_feature_matrices = list_feature_matrices 294 | ) 295 | ``` 296 | 297 | 298 | ```{r} 299 | ggnet2(sim_net, size = 1, mode = "kamadakawai") 300 | ``` 301 | 302 | # When you work with large networks 303 | 304 | If you would like to estimate an HERGM with a large network (say, when the number of nodes $\geq$ 50,000): 305 | 306 | - Select features sparse enough to fit into memory. Covariates such as gender or race will be too dense to construct feature matrices. This is a non-negligible limitation of our algorithm and will be solved in the future. 307 | - Prepare a list of multiplied feature adjacency matrices by `lighthergm::compute_multiplied_feature_matrices()`, and pass it to `lighthergm::hergm()` by `list_multiplied_feature_matrices`. Once calculated and stored, it can be used in models with the same network and the same features. 308 | - Use Python's infomap to initialize clusters. This is because it is much faster to implement cluster initialization than R functions such as `igraph::cluster_infomap()`. To install it, run `system("pip3 install infomap")` and check if it is effective by `system("infomap --version")`. If `system("infomap --version")` yields an error, consider using `{reticulate}`. 309 | - If successfully installed Python's infomap, set `use_infomap_python = TRUE` in `lighthergm::hergm()`. 310 | - When the EM estimation does not seem to have converged by inspecting the lower bound plot, you can further continue iterating by passing the `lighthergm` class object to `lighthergm::hergm()` as follows (all parameters such as the number of EM iterations will be inherited from the previous estimation unless specified). 311 | 312 | ```{r, message=FALSE} 313 | hergm_res_second <- 314 | lighthergm::hergm(object = hergm_res) 315 | ``` 316 | 317 | 318 | # References 319 | -------------------------------------------------------------------------------- /lighthergm/R/gof_lighthergm.R: -------------------------------------------------------------------------------- 1 | #' Extracts the degree distribution from a network and returns it as a data frame. 2 | #' @param net a statnet network object 3 | #' @return a data frame 4 | to_degree_dist_df <- function(net) { 5 | degree_dist <- igraph::degree.distribution(intergraph::asIgraph(net)) 6 | data.frame( 7 | degree = 0:(length(degree_dist) - 1), 8 | share = degree_dist 9 | ) 10 | } 11 | 12 | #' Extracts the geodesic distance distribution from a network and returns it as a dataframe. 13 | #' @param net a statnet network object 14 | #' @return a data frame 15 | to_geodesic_dist_df <- function(net) { 16 | dist <- ergm::ergm.geodistdist(net) 17 | labels <- as.numeric(names(dist)) 18 | names(dist) <- NULL 19 | geodesic_distances_df <- data.frame( 20 | dist = labels, 21 | pairs = dist[[1]] 22 | ) 23 | geodesic_distances_df 24 | } 25 | 26 | #' Extracts the edgewise shared partners distribution (undirected). 27 | #' @param net a statnet network object 28 | #' @return a data frame 29 | to_edgewise_shared_partners_df <- function(net) { 30 | esp_dist <- summary(net ~ esp(1:min(net$gal$n - 2, 10))) 31 | labels <- names(esp_dist) %>% 32 | purrr::map(function(lab) { 33 | stringr::str_replace(lab, "esp", "") 34 | }) %>% 35 | as.numeric() 36 | 37 | names(esp_dist) <- NULL 38 | 39 | data.frame( 40 | label = labels, 41 | esp = esp_dist 42 | ) 43 | } 44 | 45 | #' Swaps the network on the lhs of a formula for a new one with the given environment 46 | #' @param new_net A network object to be inserted into the lhs of the formula 47 | #' @param net_formula The target formula 48 | #' @param env The environment to assign to the formula 49 | #' @return A new formula with the lhs swapped 50 | swap_formula_network <- function(new_net, net_formula, env) { 51 | rhs <- as.character(net_formula)[3] 52 | as.formula(paste(deparse(substitute(new_net)), "~", rhs), env = env) 53 | } 54 | 55 | 56 | 57 | #' Separates a formula into its between and within components. The between component excludes 58 | #' terms which introduce dyadic dependence. 59 | #' @param target_formula a target formula 60 | #' @return a list containing the between and within formulas 61 | separate_formulas <- function(target_formula) { 62 | str_net <- as.character(target_formula)[2] 63 | net <- get(str_net, envir = environment(target_formula)) 64 | terms <- ergm::ergm_model(target_formula)$terms 65 | varnames <- statnet.common::list_rhs.formula(target_formula) %>% as.character() 66 | dep_terms <- 67 | terms %>% purrr::map(function(t) { 68 | dep <- t$dependence 69 | is_dep <- is.null(dep) || dep 70 | }) %>% unlist() 71 | between_rhs <- varnames[!dep_terms] 72 | within_rhs <- varnames 73 | 74 | between_formula <- paste(str_net, "~", paste(between_rhs, collapse = " + ")) 75 | within_formula <- paste(str_net, "~", paste(within_rhs, collapse = " + ")) 76 | 77 | list( 78 | between = formula(between_formula, env = environment(target_formula)), 79 | within = formula(within_formula, env = environment(target_formula)) 80 | ) 81 | } 82 | 83 | #' Gets the GOF stats for a formula 84 | #' If a network is passed, that one is used to obtain the network statistics, 85 | #' otherwise the netwok in the formula is used. 86 | #' @param sim_formula a formula 87 | #' @param net a statnet network object 88 | #' @param sim_number the ID of the current simulation 89 | #' @param compute_geodesic_distance if TRUE, includes the geodesic distance in the result object 90 | #' @return a list with the goodness-of-fit statistics 91 | get_gof_stats <- function(sim_formula, net = NULL, sim_number = NULL, compute_geodesic_distance = FALSE) { 92 | stats_formula <- sim_formula 93 | 94 | if (!is.null(net)) { 95 | stats_formula <- swap_formula_network(net, stats_formula, environment()) 96 | } 97 | 98 | network_stats <- summary(stats_formula) 99 | network_stats <- data.frame(stat = names(network_stats), value = network_stats) 100 | rownames(network_stats) <- NULL 101 | 102 | formula_net <- get(as.character(stats_formula)[2], envir = environment(stats_formula)) 103 | degree_dist <- to_degree_dist_df(formula_net) 104 | esp_dist <- to_edgewise_shared_partners_df(formula_net) 105 | 106 | if (compute_geodesic_distance == TRUE) { 107 | geodesic_dist <- to_geodesic_dist_df(formula_net) 108 | } else { 109 | geodesic_dist <- NULL 110 | } 111 | 112 | stats <- list( 113 | network_stats = network_stats, 114 | degree_dist = degree_dist, 115 | esp_dist = esp_dist, 116 | geodesic_dist = geodesic_dist 117 | ) 118 | 119 | if (!is.null(sim_number)) { 120 | stats$network_stats$n_sim <- sim_number 121 | stats$degree_dist$n_sim <- sim_number 122 | stats$esp_dist$n_sim <- sim_number 123 | 124 | if (!is.null(stats$geodesic_dist)) { 125 | stats$geodesic_dist$n_sim <- sim_number 126 | } 127 | } 128 | 129 | stats 130 | } 131 | 132 | 133 | #' Goodness of fit statistics for HERGM 134 | #' @param net the target network 135 | #' @param data_for_simulation a dataframe with node-level covariates 136 | #' @param list_feature_matrices a list of feature adjacency matrices 137 | #' @param colname_vertex_id the name of the column that contains the node id 138 | #' @param colname_block_membership the name o the column that contains the block affiliation of each node 139 | #' @param lighthergm_results a lighthergm results object 140 | #' @param type the type of evaluation to perform. Can take the values `full` or `within`. `full` performs the evaluation on all edges, and `within` only considers within-block edges. 141 | #' @param ergm_control MCMC parameters as an instance of ergm.control 142 | #' @param seed the seed to be passed to simulate_hergm 143 | #' @param n_sim the number of simulations to employ for calculating goodness of fit 144 | #' @param prevent_duplicate see `simulate_hergm` 145 | #' @param compute_geodesic_distance if `TRUE`, the distribution of geodesic distances is also computed (considerably increases computation time on large networks. `FALSE` by default.) 146 | #' @param start_from_observed if `TRUE`, MCMC uses the observed network as a starting point 147 | #' @param ... Additional arguments, to be passed to lower-level functions 148 | #' 149 | #' @export 150 | gof_lighthergm <- function(net, 151 | data_for_simulation, 152 | list_feature_matrices, 153 | colname_vertex_id, 154 | colname_block_membership, 155 | lighthergm_results, 156 | type = 'full', 157 | ergm_control = ergm::control.simulate.formula(), 158 | seed = NULL, 159 | n_sim = 1, 160 | prevent_duplicate = TRUE, 161 | compute_geodesic_distance = FALSE, 162 | start_from_observed = FALSE, 163 | ...) { 164 | # Setup 165 | gof_formula <- swap_formula_network(net, lighthergm_results$est_within$formula, environment()) 166 | coef_within_block <- coef(lighthergm_results$est_within) 167 | coef_between_block <- coef(lighthergm_results$est_between) 168 | 169 | # Validate the simulation type 170 | allowed_type_values <- c('full', 'within') 171 | if (!type %in% allowed_type_values){ 172 | stop("The `type` argument must be any of 'full' or 'within'") 173 | } 174 | 175 | seed_edgelist = NULL 176 | 177 | if (type == 'full'){ 178 | original_stats <- get_gof_stats(gof_formula, compute_geodesic_distance = compute_geodesic_distance) 179 | 180 | if(start_from_observed){ 181 | seed_edgelist <- network::as.edgelist(net) 182 | } 183 | 184 | } else { 185 | sorted_dataframe <- sort_block_membership(data_for_simulation, colname_vertex_id, colname_block_membership) 186 | seed_edgelist_within <- arrange_edgelist(network::as.edgelist(net), sorted_dataframe)$edgelist_within 187 | within_network <- generate_seed_network(gof_formula, sorted_dataframe, edgelist = seed_edgelist_within, directed = FALSE) 188 | 189 | original_stats <- get_gof_stats(gof_formula, net = within_network, compute_geodesic_distance = compute_geodesic_distance) 190 | 191 | if(start_from_observed){ 192 | seed_edgelist <- network::as.edgelist(within_network) 193 | } 194 | } 195 | 196 | # Simulate the first network by initializing it from zero. The burnin here is the one set by the user. 197 | if (type == 'full'){ 198 | base_network <- simulate_hergm( 199 | formula_for_simulation = gof_formula, 200 | data_for_simulation = data_for_simulation, 201 | colname_vertex_id = colname_vertex_id, 202 | colname_block_membership = colname_block_membership, 203 | seed_edgelist = seed_edgelist, 204 | coef_within_block = coef_within_block, 205 | coef_between_block = coef_between_block, 206 | ergm_control = ergm_control, 207 | seed_for_within = seed_for_within, 208 | seed_for_between = seed_for_between, 209 | directed = FALSE, 210 | n_sim = 1, 211 | output = "network", 212 | prevent_duplicate = prevent_duplicate, 213 | list_feature_matrices = list_feature_matrices, 214 | use_fast_between_simulation = TRUE 215 | ) 216 | } else { 217 | base_network <- lighthergm::simulate_hergm_within( 218 | formula_for_simulation = gof_formula, 219 | data_for_simulation = data_for_simulation, 220 | colname_vertex_id = colname_vertex_id, 221 | colname_block_membership = colname_block_membership, 222 | seed_edgelist = seed_edgelist, 223 | coef_within_block = coef_within_block, 224 | output = 'network', 225 | ergm_control = ergm_control, 226 | seed = seed, 227 | n_sim = 1 228 | ) 229 | } 230 | 231 | # Get the statistics for the first network 232 | sim_stats <- get_gof_stats(gof_formula, base_network, 1, compute_geodesic_distance = compute_geodesic_distance) 233 | results <- list( 234 | original = list( 235 | network_stats = original_stats$network_stats, 236 | degree_dist = original_stats$degree_dist, 237 | esp_dist = original_stats$esp_dist, 238 | geodesic_dist = original_stats$geodesic_dist 239 | ), 240 | simulated = list( 241 | network_stats = sim_stats$network_stats, 242 | degree_dist = sim_stats$degree_dist, 243 | esp_dist = sim_stats$esp_dist, 244 | geodesic_dist = sim_stats$geodesic_dist 245 | ) 246 | ) 247 | 248 | effective_nsim <- n_sim - 1 249 | 250 | if (effective_nsim > 0) { 251 | # Now replace the burnin with the interval and simulate networks one by one. 252 | ergm_control$MCMC.burnin <- ergm_control$MCMC.interval 253 | 254 | for (i in 1:effective_nsim) { 255 | if ((i + 1) %% 50 == 0) { 256 | message(paste("Simulation:", i + 1)) 257 | } 258 | 259 | if(type == 'full'){ 260 | base_network <- simulate_hergm( 261 | formula_for_simulation = gof_formula, 262 | list_feature_matrices = list_feature_matrices, 263 | data_for_simulation = data_for_simulation, 264 | colname_vertex_id = colname_vertex_id, 265 | colname_block_membership = colname_block_membership, 266 | seed_edgelist = network::as.edgelist(base_network), 267 | coef_within_block = coef_within_block, 268 | coef_between_block = coef_between_block, 269 | ergm_control = ergm_control, 270 | seed = seed, 271 | directed = FALSE, 272 | n_sim = 1, 273 | output = "network", 274 | prevent_duplicate = prevent_duplicate, 275 | use_fast_between_simulation = TRUE, 276 | ... 277 | ) 278 | } else { 279 | base_network <- lighthergm::simulate_hergm_within( 280 | formula_for_simulation = gof_formula, 281 | data_for_simulation = data_for_simulation, 282 | colname_vertex_id = colname_vertex_id, 283 | colname_block_membership = colname_block_membership, 284 | seed_edgelist = network::as.edgelist(base_network), 285 | coef_within_block = coef_within_block, 286 | output = 'network', 287 | ergm_control = ergm_control, 288 | seed = seed, 289 | n_sim = 1 290 | ) 291 | } 292 | 293 | sim_stats <- get_gof_stats(gof_formula, base_network, i + 1, compute_geodesic_distance = compute_geodesic_distance) 294 | results$simulated$network_stats <- rbind(results$simulated$network_stats, sim_stats$network_stats) 295 | results$simulated$degree_dist <- rbind(results$simulated$degree_dist, sim_stats$degree_dist) 296 | results$simulated$esp_dist <- rbind(results$simulated$esp_dist, sim_stats$esp_dist) 297 | if ( 298 | !(is.null(results$simulated$geodesic_dist)) & 299 | !(is.null(sim_stats$geodesic_dist)) 300 | ) { 301 | results$simulated$geodesic_dist <- rbind(results$simulated$geodesic_dist, sim_stats$geodesic_dist) 302 | } 303 | } 304 | 305 | message("Simulation Finished") 306 | } 307 | 308 | return(results) 309 | } 310 | -------------------------------------------------------------------------------- /lighthergm/R/hergm.R: -------------------------------------------------------------------------------- 1 | #' Hierarchical exponential-family random graph models (HERGMs) with local dependence 2 | #' @description 3 | #' The function hergm estimates and simulates three classes of hierarchical exponential-family random graph models. 4 | #' @useDynLib lighthergm 5 | #' @importFrom Rcpp sourceCpp 6 | #' @importFrom ergm ergm.getnetwork 7 | #' @param object A formula or `lighthergm` class object. A `lighthergm` is returned by `hergm()`. 8 | #' When you pass a `lighthergm` class object to `hergm()`, you can restart the EM step. 9 | #' @param n_clusters The number of blocks. This must be specified by the user. 10 | #' When you pass a "lighthergm" class object to `hergm()`, you don't have to specify this argument. 11 | #' @param n_cores The number of CPU cores to use. 12 | #' @param block_membership The pre-specified block memberships for each node. 13 | #' If `NULL`, the latent community structure is estimated, assuming that the number of communities is `n_clusters`. 14 | #' @param estimate_parameters If `TRUE`, both clustering and parameter estimation are implemented. 15 | #' If `FALSE`, only clustering is executed. 16 | #' @param verbose A logical or an integer: if this is TRUE/1, 17 | #' the program will print out additional information about the progress of estimation and simulation. 18 | #' A higher value yields lower level information. 19 | #' @param n_em_step_max The maximum number of EM iterations. 20 | #' Currently, no early stopping criteria is introduced. Thus `n_em_step_max` EM iterations are exactly implemented. 21 | #' @param initialization_method Cluster initialization method. 22 | #' If `1` (the default), `igraph`'s infomap is implemented. 23 | #' If `2`, the initial clusters are randomly uniformally selected. 24 | #' If `3`, spectral clustering is conducted. 25 | #' @param use_infomap_python If `TRUE`, the cluster initialization is implemented using Pythons' infomap. 26 | #' When using this, make sure that Python's infomap is installed and callable from `system(). 27 | #' @param seed_infomap seed value (integer) for Python's infomap. 28 | #' @param weight_for_initialization weight value used for cluster initialization. The higher this value, the more weight is put on the initialized alpha. 29 | #' @param seeds seed value (integer) for the random number generator 30 | #' @param initialized_cluster_data initialized cluster data from which the EM iterations begin. 31 | #' This can be either a vector of block affiliations of each node or initialized cluster data by Python's infomap (given by .clu format). 32 | #' @param method_second_step If "MPLE" (the default), then the maximum pseudolikelihood estimator is implemented when estimating the within-block network model. 33 | #' If "MLE", then an approximate maximum likelihood estimator is conducted. 34 | #' @param clustering_with_features If `TRUE`, clustering is implemented using the discrete covariates specified in the formula. 35 | #' @param list_multiplied_feature_matrices a list of multiplied feature adjacency matarices necessary for EM step. 36 | #' If `NULL`, `hergm()` automatically calculates. Or you can calculate by `compute_multiplied_feature_matrices()`. 37 | #' @param fix_covariate_parameter If `TRUE`, when estimating the within-block network model, 38 | #' parameters for covariates are fixed at the estimated of the between-block network model. 39 | #' @param compute_pi If `TRUE`, this function keeps track of pi matrices at each EM iteration. 40 | #' If the network is large, we strongly recommend to set to be `FALSE`. 41 | #' @param check_alpha_update If `TRUE`, this function keeps track of alpha matrices at each EM iteration. 42 | #' If the network is large, we strongly recommend to set to be `FALSE`. 43 | #' @param check_block_membership If TRUE, this function keeps track of estimated block memberships at each EM iteration. 44 | #' @param cache a `cachem` cache object used to store intermediate calculations such as eigenvector decomposition results. 45 | #' @param ... Additional arguments, to be passed to lower-level functions 46 | #' 47 | #' @examples 48 | #' # Load an embedded network object. 49 | #' data(toyNet) 50 | #' 51 | #' # Specify the model that you would like to estimate. 52 | #' model_formula <- toyNet ~ edges + nodematch("x") + nodematch("y") + triangle 53 | #' 54 | #' # Estimate the model 55 | #' hergm_res <- 56 | #' lighthergm::hergm( 57 | #' object = model_formula, # The model you would like to estiamte 58 | #' n_clusters = 4, # The number of blocks 59 | #' n_em_step_max = 100, # The maximum number of EM algorithm steps 60 | #' estimate_parameters = TRUE, # Perform parameter estimation after the block recovery step 61 | #' clustering_with_features = TRUE, # Indicate that clustering must take into account nodematch on characteristics 62 | #' check_block_membership = TRUE # Keep track of block memberships at each EM iteration 63 | #' ) 64 | #' @export 65 | hergm <- function(object, 66 | n_clusters, 67 | n_cores = 1, 68 | block_membership = NULL, 69 | estimate_parameters = TRUE, 70 | verbose = 0, 71 | n_em_step_max = 100, 72 | initialization_method = 1, 73 | use_infomap_python = FALSE, 74 | seed_infomap = NULL, 75 | weight_for_initialization = 1000, 76 | seeds = NULL, 77 | initialized_cluster_data = NULL, 78 | method_second_step = "MPLE", 79 | clustering_with_features = TRUE, 80 | list_multiplied_feature_matrices = NULL, 81 | fix_covariate_parameter = FALSE, 82 | compute_pi = FALSE, 83 | check_alpha_update = FALSE, 84 | check_block_membership = FALSE, 85 | cache = NULL, 86 | ...) { 87 | ################################################################################### 88 | ###### Preparations for estimation ################################################ 89 | ################################################################################### 90 | # When the given object is formula: 91 | if ("formula" %in% class(object)) { 92 | # If n_cluster is missing and block_membership and initialized_cluster_data are NULL, stop the process. 93 | if (missing(n_clusters) & is.null(block_membership) & is.null(initialized_cluster_data)) { 94 | stop("\nThe argument 'n_clusters' is missing. Please specify the number of clusters.") 95 | } 96 | 97 | # Get the formula 98 | formula <- object 99 | 100 | # When use_infomap_python = TRUE, check if it is installed. 101 | if (use_infomap_python) { 102 | tryCatch( 103 | { 104 | system("infomap --version") 105 | }, 106 | warning = function(e) { 107 | stop("\nPython's infomap is not installed or your default Python version is 2.x. Please install infomap by 108 | \nsystem('pip3 install infomap') 109 | \nIf already installed, make sure that the default Python version is 3.x. You can check the default version by 110 | \nsystem('python --version')") 111 | } 112 | ) 113 | } 114 | # If the initialized cluster data is given by .clu format: 115 | if (!is.null(initialized_cluster_data) && all(stringr::str_detect(initialized_cluster_data, ".clu"))) { 116 | if (verbose > 0) { 117 | message(paste("Reading initial clusters data from: ", initialized_cluster_data)) 118 | } 119 | initialized_cluster_data <- readr::read_delim(initialized_cluster_data, delim = " ", skip = 9, col_names = c("node_id", "block", "flow"), col_types = "iid") 120 | initialized_cluster_data <- as.numeric(dplyr::arrange(initialized_cluster_data, by_group = node_id)$block) 121 | } 122 | 123 | # Update the number of clusters if `initialized_cluster_data` or `block_membership` is given: 124 | if (!is.null(initialized_cluster_data)) { 125 | n_clusters <- length(unique(initialized_cluster_data)) 126 | } 127 | else if (!is.null(block_membership)) { 128 | n_clusters <- length(unique(block_membership)) 129 | } 130 | 131 | # Get network object from formula 132 | network <- ergm::ergm.getnetwork(formula) 133 | 134 | # The current hergm doesn't support directed networks. 135 | if (network$gal$directed) { 136 | stop("\nThe current hergm doesn't support directed networks. This will be modified in the future.") 137 | } 138 | 139 | # If list_multiplied_feature_matrices is not NULL, check if the order of features names is the same with that of formula. 140 | if (!is.null(list_multiplied_feature_matrices)) { 141 | # If clustering_with_features = FALSE, stop the process. 142 | if (!clustering_with_features) { 143 | stop("\nSet clustering_with_features = TRUE since you are going to use vertex features for cluster estimation.") 144 | } 145 | } 146 | 147 | # Convert vertex.names into string 148 | if (!is.character(network::network.vertex.names(network))) { 149 | network::network.vertex.names(network) <- as.character(network::network.vertex.names(network)) 150 | } 151 | 152 | # An N x K matrix which stores the probability that node i belongs to block k (i = 1,..., N, k = 1,..., K) 153 | # If all_block_memberships_fixed == TRUE, this matrix remains NULL. 154 | # sbm_pi <- NULL 155 | } 156 | 157 | EM_restart_object <- NULL 158 | # When the given object has a class of "lighthergm": 159 | if ("lighthergm" %in% class(object)) { 160 | # Inherit the following arguments from the previous estimation. 161 | # These arguments are so important that they won't be replaced by the arguments given by the user. 162 | network <- object$checkpoint$network 163 | formula <- object$checkpoint$formula 164 | n_clusters <- object$checkpoint$n_clusters 165 | clustering_with_features <- object$checkpoint$clustering_with_features 166 | list_multiplied_feature_matrices <- object$checkpoint$list_multiplied_feature_matrices 167 | 168 | # Inherit the following arguments from the previous estimation if not given by the user. 169 | # If given by the user, discard the inherited argument and use the given one. 170 | vec_arguments <- 171 | c( 172 | "n_cores", 173 | "estimate_parameters", 174 | "verbose", 175 | "n_em_step_max", 176 | "seeds", 177 | "method_second_step", 178 | "fix_covariate_parameter", 179 | "compute_pi", 180 | "check_alpha_update", 181 | "check_block_membership" 182 | ) 183 | 184 | message("Arguments:") 185 | message(glue::glue("Number of clusters = {n_clusters}")) 186 | for (i in 1:length(vec_arguments)) { 187 | if (do.call(missing, list(vec_arguments[[i]])) == TRUE) { 188 | assign(vec_arguments[[i]], object$checkpoint[[vec_arguments[[i]]]]) 189 | } 190 | message(glue::glue("{vec_arguments[[i]]} = {get(vec_arguments[[i]])}")) 191 | } 192 | 193 | # Prepare an object to restart the EM with. 194 | EM_restart_object <- 195 | list( 196 | alpha = object$alpha, 197 | list_alpha = object$EM_list_alpha, 198 | list_z = object$EM_list_z, 199 | z_memb_init = object$z_memb_final_before_kmeans, 200 | change_in_alpha = object$EM_change_in_alpha, 201 | lower_bound = object$EM_lower_bound, 202 | counter_e_step = object$counter_e_step, 203 | adjacency_matrix = object$adjacency_matrix 204 | ) 205 | } 206 | 207 | # If the block_memberships of each node are specified in the variable 'block_membership' as integers between 1 and n_clusters, 208 | # the specified block memberships are fixed. 209 | all_block_memberships_fixed <- ifelse(is.null(block_membership), FALSE, TRUE) 210 | # Make sure that if all_block_memberships_fixed == TRUE, estimate_parameters must also be TRUE. 211 | estimate_parameters <- ifelse(all_block_memberships_fixed, TRUE, estimate_parameters) 212 | 213 | ################################################################################### 214 | ###### First step: Estimating block memberships ################################### 215 | ################################################################################### 216 | 217 | # Estimate the memberships if they are not specified. 218 | if (!all_block_memberships_fixed) { 219 | set.seed(seeds[1]) 220 | # Estimate the block memberships 221 | answer <- EM_wrapper( 222 | network = network, 223 | formula = formula, 224 | n_clusters = n_clusters, 225 | n_em_step_max = n_em_step_max, 226 | min_size = 2, 227 | initialization_method = initialization_method, 228 | use_infomap_python = use_infomap_python, 229 | seed_infomap = seed_infomap, 230 | initialized_cluster_data = initialized_cluster_data, 231 | clustering_with_features = clustering_with_features, 232 | list_multiplied_feature_matrices = list_multiplied_feature_matrices, 233 | verbose = verbose, 234 | weight_for_initialization = weight_for_initialization, 235 | compute_pi = compute_pi, 236 | check_alpha_update = check_alpha_update, 237 | check_block_membership = check_block_membership, 238 | EM_restart_object = EM_restart_object, 239 | cache = cache 240 | ) 241 | 242 | block_membership <- answer$z_memb_final 243 | initial_block <- answer$z_memb_init 244 | membership_before_kmeans <- answer$z_memb_final_before_kmeans 245 | sbm_pi <- answer$Pi 246 | EM_list_alpha <- answer$list_alpha 247 | EM_list_z <- answer$list_z 248 | EM_change_in_alpha <- answer$change_in_alpha 249 | EM_lower_bound <- answer$lower_bound 250 | alpha <- answer$alpha 251 | counter_e_step <- answer$counter_e_step 252 | adjacency_matrix <- answer$adjacency_matrix 253 | list_multiplied_feature_matrices <- answer$list_multiplied_feature_matrices 254 | } 255 | else { 256 | if (verbose > 0) { 257 | message("\nSkipping Steps 1 and 2: z specified") 258 | } 259 | param_EM_wrapper <- NULL 260 | initial_block <- NULL 261 | membership_before_kmeans <- NULL 262 | sbm_pi <- NULL 263 | EM_list_z <- NULL 264 | EM_list_alpha <- NULL 265 | EM_change_in_alpha <- NULL 266 | EM_lower_bound <- NULL 267 | alpha <- NULL 268 | counter_e_step <- NULL 269 | adjacency_matrix <- NULL 270 | list_multiplied_feature_matrices <- NULL 271 | } 272 | 273 | #################################################################################################### 274 | ###### Second step: Estimating between-block parameters ############################################ 275 | #################################################################################################### 276 | 277 | if (estimate_parameters) { 278 | if (verbose > 0) { 279 | message("\nEstimate between-block parameters") 280 | } 281 | ## Estimate between-block parameters 282 | est_between <- estimate_between_param( 283 | formula = formula, 284 | network = network, 285 | block = block_membership 286 | ) 287 | } else { 288 | # If you don't estimate any parameters... 289 | est_between <- NULL 290 | } 291 | 292 | #################################################################################################### 293 | ###### Third step: Estimating within-block parameters ############################################## 294 | #################################################################################################### 295 | 296 | # When you estimate with-block parameters, then: 297 | if (estimate_parameters) { 298 | if (verbose > 0) { 299 | message("\n\nStep 3: Estimate parameters conditional on z") 300 | } 301 | # If fixing within-block feature parameters: 302 | if (fix_covariate_parameter) { 303 | offset_coef <- coef(est_between) 304 | offset_coef <- offset_coef[-1] 305 | } else { 306 | offset_coef <- NULL 307 | } 308 | # Estimate within-block parameters 309 | est_within <- 310 | estimate_within_params( 311 | formula, 312 | network, 313 | z_memb = block_membership, 314 | n_cores, 315 | verbose, 316 | seeds = NULL, 317 | method_second_step, 318 | offset_coef, 319 | ... 320 | ) 321 | 322 | 323 | #################################################################################################### 324 | ###### Store the results ########################################################################### 325 | #################################################################################################### 326 | 327 | estimation_status <- ifelse(est_within$failure, "Estimation failed", "Estimated") 328 | } else { 329 | # estimate_parameters = FALSE: 330 | message("\n") 331 | estimation_status <- "Not estimated" 332 | est_within <- NULL 333 | } 334 | 335 | # Store the given arguments. These will be inherited for the next EM. 336 | checkpoint <- list( 337 | network = network, 338 | formula = formula, 339 | n_clusters = n_clusters, 340 | clustering_with_features = clustering_with_features, 341 | list_multiplied_feature_matrices = list_multiplied_feature_matrices, 342 | n_cores = n_cores, 343 | estimate_parameters = estimate_parameters, 344 | verbose = verbose, 345 | n_em_step_max = n_em_step_max, 346 | seeds = seeds, 347 | method_second_step = method_second_step, 348 | fix_covariate_parameter = fix_covariate_parameter, 349 | compute_pi = compute_pi, 350 | check_alpha_update = check_alpha_update, 351 | check_block_membership = check_block_membership 352 | ) 353 | 354 | # Store the results in a list 355 | output <- list( 356 | partition = block_membership, 357 | initial_block = initial_block, 358 | sbm_pi = sbm_pi, 359 | EM_list_z = EM_list_z, 360 | EM_list_alpha = EM_list_alpha, 361 | EM_change_in_alpha = EM_change_in_alpha, 362 | EM_lower_bound = EM_lower_bound, 363 | alpha = alpha, 364 | counter_e_step = counter_e_step, 365 | adjacency_matrix = adjacency_matrix, 366 | estimation_status = estimation_status, 367 | est_within = est_within, 368 | est_between = est_between, 369 | checkpoint = checkpoint, 370 | membership_before_kmeans = membership_before_kmeans 371 | ) 372 | 373 | # Return the output 374 | return(structure(output, class = "lighthergm")) 375 | } 376 | -------------------------------------------------------------------------------- /lighthergm/src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // eigenvectors_sparse 15 | arma::mat eigenvectors_sparse(const arma::sp_mat& X, int n_vec); 16 | RcppExport SEXP _lighthergm_eigenvectors_sparse(SEXP XSEXP, SEXP n_vecSEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type X(XSEXP); 21 | Rcpp::traits::input_parameter< int >::type n_vec(n_vecSEXP); 22 | rcpp_result_gen = Rcpp::wrap(eigenvectors_sparse(X, n_vec)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | // compute_yule_coef 27 | double compute_yule_coef(const arma::vec& z_star, const arma::vec& z); 28 | RcppExport SEXP _lighthergm_compute_yule_coef(SEXP z_starSEXP, SEXP zSEXP) { 29 | BEGIN_RCPP 30 | Rcpp::RObject rcpp_result_gen; 31 | Rcpp::RNGScope rcpp_rngScope_gen; 32 | Rcpp::traits::input_parameter< const arma::vec& >::type z_star(z_starSEXP); 33 | Rcpp::traits::input_parameter< const arma::vec& >::type z(zSEXP); 34 | rcpp_result_gen = Rcpp::wrap(compute_yule_coef(z_star, z)); 35 | return rcpp_result_gen; 36 | END_RCPP 37 | } 38 | // get_sparse_feature_adjmat 39 | arma::sp_mat get_sparse_feature_adjmat(const arma::vec& x); 40 | RcppExport SEXP _lighthergm_get_sparse_feature_adjmat(SEXP xSEXP) { 41 | BEGIN_RCPP 42 | Rcpp::RObject rcpp_result_gen; 43 | Rcpp::RNGScope rcpp_rngScope_gen; 44 | Rcpp::traits::input_parameter< const arma::vec& >::type x(xSEXP); 45 | rcpp_result_gen = Rcpp::wrap(get_sparse_feature_adjmat(x)); 46 | return rcpp_result_gen; 47 | END_RCPP 48 | } 49 | // get_sparse_feature_adjmat_from_string 50 | arma::sp_mat get_sparse_feature_adjmat_from_string(const Rcpp::StringVector& x); 51 | RcppExport SEXP _lighthergm_get_sparse_feature_adjmat_from_string(SEXP xSEXP) { 52 | BEGIN_RCPP 53 | Rcpp::RObject rcpp_result_gen; 54 | Rcpp::RNGScope rcpp_rngScope_gen; 55 | Rcpp::traits::input_parameter< const Rcpp::StringVector& >::type x(xSEXP); 56 | rcpp_result_gen = Rcpp::wrap(get_sparse_feature_adjmat_from_string(x)); 57 | return rcpp_result_gen; 58 | END_RCPP 59 | } 60 | // get_matrix_for_denominator 61 | arma::sp_mat get_matrix_for_denominator(int numOfVertices, const Rcpp::List& list_feature_adjmat); 62 | RcppExport SEXP _lighthergm_get_matrix_for_denominator(SEXP numOfVerticesSEXP, SEXP list_feature_adjmatSEXP) { 63 | BEGIN_RCPP 64 | Rcpp::RObject rcpp_result_gen; 65 | Rcpp::RNGScope rcpp_rngScope_gen; 66 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 67 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP); 68 | rcpp_result_gen = Rcpp::wrap(get_matrix_for_denominator(numOfVertices, list_feature_adjmat)); 69 | return rcpp_result_gen; 70 | END_RCPP 71 | } 72 | // get_elementwise_multiplied_matrices 73 | Rcpp::List get_elementwise_multiplied_matrices(const arma::sp_mat& adjmat, const Rcpp::List& list_feature_adjmat); 74 | RcppExport SEXP _lighthergm_get_elementwise_multiplied_matrices(SEXP adjmatSEXP, SEXP list_feature_adjmatSEXP) { 75 | BEGIN_RCPP 76 | Rcpp::RObject rcpp_result_gen; 77 | Rcpp::RNGScope rcpp_rngScope_gen; 78 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type adjmat(adjmatSEXP); 79 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP); 80 | rcpp_result_gen = Rcpp::wrap(get_elementwise_multiplied_matrices(adjmat, list_feature_adjmat)); 81 | return rcpp_result_gen; 82 | END_RCPP 83 | } 84 | // decimal_to_binary_vector 85 | arma::vec decimal_to_binary_vector(int decimal, int vec_length); 86 | RcppExport SEXP _lighthergm_decimal_to_binary_vector(SEXP decimalSEXP, SEXP vec_lengthSEXP) { 87 | BEGIN_RCPP 88 | Rcpp::RObject rcpp_result_gen; 89 | Rcpp::RNGScope rcpp_rngScope_gen; 90 | Rcpp::traits::input_parameter< int >::type decimal(decimalSEXP); 91 | Rcpp::traits::input_parameter< int >::type vec_length(vec_lengthSEXP); 92 | rcpp_result_gen = Rcpp::wrap(decimal_to_binary_vector(decimal, vec_length)); 93 | return rcpp_result_gen; 94 | END_RCPP 95 | } 96 | // compute_sumTaus 97 | arma::mat compute_sumTaus(int numOfVertices, int numOfClasses, const arma::mat& tau, int verbose); 98 | RcppExport SEXP _lighthergm_compute_sumTaus(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP tauSEXP, SEXP verboseSEXP) { 99 | BEGIN_RCPP 100 | Rcpp::RObject rcpp_result_gen; 101 | Rcpp::RNGScope rcpp_rngScope_gen; 102 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 103 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 104 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 105 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 106 | rcpp_result_gen = Rcpp::wrap(compute_sumTaus(numOfVertices, numOfClasses, tau, verbose)); 107 | return rcpp_result_gen; 108 | END_RCPP 109 | } 110 | // compute_quadratic_term_naive 111 | arma::mat compute_quadratic_term_naive(int numOfVertices, int numOfClasses, const arma::mat& pi, const arma::mat& tau, const arma::sp_mat& network); 112 | RcppExport SEXP _lighthergm_compute_quadratic_term_naive(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP piSEXP, SEXP tauSEXP, SEXP networkSEXP) { 113 | BEGIN_RCPP 114 | Rcpp::RObject rcpp_result_gen; 115 | Rcpp::RNGScope rcpp_rngScope_gen; 116 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 117 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 118 | Rcpp::traits::input_parameter< const arma::mat& >::type pi(piSEXP); 119 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 120 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP); 121 | rcpp_result_gen = Rcpp::wrap(compute_quadratic_term_naive(numOfVertices, numOfClasses, pi, tau, network)); 122 | return rcpp_result_gen; 123 | END_RCPP 124 | } 125 | // compute_linear_term 126 | arma::mat compute_linear_term(int numOfVertices, int numOfClasses, const arma::vec& alpha, const arma::mat& tau, double& LB); 127 | RcppExport SEXP _lighthergm_compute_linear_term(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP LBSEXP) { 128 | BEGIN_RCPP 129 | Rcpp::RObject rcpp_result_gen; 130 | Rcpp::RNGScope rcpp_rngScope_gen; 131 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 132 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 133 | Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP); 134 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 135 | Rcpp::traits::input_parameter< double& >::type LB(LBSEXP); 136 | rcpp_result_gen = Rcpp::wrap(compute_linear_term(numOfVertices, numOfClasses, alpha, tau, LB)); 137 | return rcpp_result_gen; 138 | END_RCPP 139 | } 140 | // compute_pi 141 | arma::mat compute_pi(int numOfVertices, int numOfClasses, const arma::sp_mat& stat, const arma::mat& tau); 142 | RcppExport SEXP _lighthergm_compute_pi(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP statSEXP, SEXP tauSEXP) { 143 | BEGIN_RCPP 144 | Rcpp::RObject rcpp_result_gen; 145 | Rcpp::RNGScope rcpp_rngScope_gen; 146 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 147 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 148 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type stat(statSEXP); 149 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 150 | rcpp_result_gen = Rcpp::wrap(compute_pi(numOfVertices, numOfClasses, stat, tau)); 151 | return rcpp_result_gen; 152 | END_RCPP 153 | } 154 | // compute_quadratic_term 155 | arma::mat compute_quadratic_term(int numOfVertices, int numOfClasses, const arma::vec& alpha, const arma::mat& tau, const arma::sp_mat& network, double& LB, int verbose); 156 | RcppExport SEXP _lighthergm_compute_quadratic_term(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP networkSEXP, SEXP LBSEXP, SEXP verboseSEXP) { 157 | BEGIN_RCPP 158 | Rcpp::RObject rcpp_result_gen; 159 | Rcpp::RNGScope rcpp_rngScope_gen; 160 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 161 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 162 | Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP); 163 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 164 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP); 165 | Rcpp::traits::input_parameter< double& >::type LB(LBSEXP); 166 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 167 | rcpp_result_gen = Rcpp::wrap(compute_quadratic_term(numOfVertices, numOfClasses, alpha, tau, network, LB, verbose)); 168 | return rcpp_result_gen; 169 | END_RCPP 170 | } 171 | // run_EM_without_features 172 | Rcpp::List run_EM_without_features(int numOfVertices, int numOfClasses, const arma::vec& alpha, arma::mat& tau, const arma::sp_mat& network, int verbose); 173 | RcppExport SEXP _lighthergm_run_EM_without_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP tauSEXP, SEXP networkSEXP, SEXP verboseSEXP) { 174 | BEGIN_RCPP 175 | Rcpp::RObject rcpp_result_gen; 176 | Rcpp::RNGScope rcpp_rngScope_gen; 177 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 178 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 179 | Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP); 180 | Rcpp::traits::input_parameter< arma::mat& >::type tau(tauSEXP); 181 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type network(networkSEXP); 182 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 183 | rcpp_result_gen = Rcpp::wrap(run_EM_without_features(numOfVertices, numOfClasses, alpha, tau, network, verbose)); 184 | return rcpp_result_gen; 185 | END_RCPP 186 | } 187 | // compute_denominator_for_pi_d1x0 188 | arma::mat compute_denominator_for_pi_d1x0(int numOfVertices, double numOfClasses, const arma::sp_mat& matrix_for_denominator, const arma::mat& tau, int verbose); 189 | RcppExport SEXP _lighthergm_compute_denominator_for_pi_d1x0(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP matrix_for_denominatorSEXP, SEXP tauSEXP, SEXP verboseSEXP) { 190 | BEGIN_RCPP 191 | Rcpp::RObject rcpp_result_gen; 192 | Rcpp::RNGScope rcpp_rngScope_gen; 193 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 194 | Rcpp::traits::input_parameter< double >::type numOfClasses(numOfClassesSEXP); 195 | Rcpp::traits::input_parameter< const arma::sp_mat& >::type matrix_for_denominator(matrix_for_denominatorSEXP); 196 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 197 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 198 | rcpp_result_gen = Rcpp::wrap(compute_denominator_for_pi_d1x0(numOfVertices, numOfClasses, matrix_for_denominator, tau, verbose)); 199 | return rcpp_result_gen; 200 | END_RCPP 201 | } 202 | // compute_pi_d1x0 203 | arma::mat compute_pi_d1x0(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau, int verbose); 204 | RcppExport SEXP _lighthergm_compute_pi_d1x0(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP verboseSEXP) { 205 | BEGIN_RCPP 206 | Rcpp::RObject rcpp_result_gen; 207 | Rcpp::RNGScope rcpp_rngScope_gen; 208 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 209 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 210 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP); 211 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 212 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 213 | rcpp_result_gen = Rcpp::wrap(compute_pi_d1x0(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, verbose)); 214 | return rcpp_result_gen; 215 | END_RCPP 216 | } 217 | // compute_quadratic_term_with_features 218 | arma::mat compute_quadratic_term_with_features(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau, double& LB, int verbose); 219 | RcppExport SEXP _lighthergm_compute_quadratic_term_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP LBSEXP, SEXP verboseSEXP) { 220 | BEGIN_RCPP 221 | Rcpp::RObject rcpp_result_gen; 222 | Rcpp::RNGScope rcpp_rngScope_gen; 223 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 224 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 225 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP); 226 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 227 | Rcpp::traits::input_parameter< double& >::type LB(LBSEXP); 228 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 229 | rcpp_result_gen = Rcpp::wrap(compute_quadratic_term_with_features(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau, LB, verbose)); 230 | return rcpp_result_gen; 231 | END_RCPP 232 | } 233 | // compute_pi_with_features 234 | Rcpp::List compute_pi_with_features(int numOfVertices, int numOfClasses, const Rcpp::List& list_multiplied_feature_adjmat, const arma::mat& tau); 235 | RcppExport SEXP _lighthergm_compute_pi_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP) { 236 | BEGIN_RCPP 237 | Rcpp::RObject rcpp_result_gen; 238 | Rcpp::RNGScope rcpp_rngScope_gen; 239 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 240 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 241 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP); 242 | Rcpp::traits::input_parameter< const arma::mat& >::type tau(tauSEXP); 243 | rcpp_result_gen = Rcpp::wrap(compute_pi_with_features(numOfVertices, numOfClasses, list_multiplied_feature_adjmat, tau)); 244 | return rcpp_result_gen; 245 | END_RCPP 246 | } 247 | // run_EM_with_features 248 | Rcpp::List run_EM_with_features(int numOfVertices, int numOfClasses, const arma::vec& alpha, const Rcpp::List& list_multiplied_feature_adjmat, arma::mat& tau, int verbose); 249 | RcppExport SEXP _lighthergm_run_EM_with_features(SEXP numOfVerticesSEXP, SEXP numOfClassesSEXP, SEXP alphaSEXP, SEXP list_multiplied_feature_adjmatSEXP, SEXP tauSEXP, SEXP verboseSEXP) { 250 | BEGIN_RCPP 251 | Rcpp::RObject rcpp_result_gen; 252 | Rcpp::RNGScope rcpp_rngScope_gen; 253 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 254 | Rcpp::traits::input_parameter< int >::type numOfClasses(numOfClassesSEXP); 255 | Rcpp::traits::input_parameter< const arma::vec& >::type alpha(alphaSEXP); 256 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_multiplied_feature_adjmat(list_multiplied_feature_adjmatSEXP); 257 | Rcpp::traits::input_parameter< arma::mat& >::type tau(tauSEXP); 258 | Rcpp::traits::input_parameter< int >::type verbose(verboseSEXP); 259 | rcpp_result_gen = Rcpp::wrap(run_EM_with_features(numOfVertices, numOfClasses, alpha, list_multiplied_feature_adjmat, tau, verbose)); 260 | return rcpp_result_gen; 261 | END_RCPP 262 | } 263 | // simulate_between_network 264 | arma::sp_mat simulate_between_network(int numOfVertices, const Rcpp::List& list_feature_adjmat, const arma::vec& coef_between, const arma::vec& block_membership, bool directed); 265 | RcppExport SEXP _lighthergm_simulate_between_network(SEXP numOfVerticesSEXP, SEXP list_feature_adjmatSEXP, SEXP coef_betweenSEXP, SEXP block_membershipSEXP, SEXP directedSEXP) { 266 | BEGIN_RCPP 267 | Rcpp::RObject rcpp_result_gen; 268 | Rcpp::RNGScope rcpp_rngScope_gen; 269 | Rcpp::traits::input_parameter< int >::type numOfVertices(numOfVerticesSEXP); 270 | Rcpp::traits::input_parameter< const Rcpp::List& >::type list_feature_adjmat(list_feature_adjmatSEXP); 271 | Rcpp::traits::input_parameter< const arma::vec& >::type coef_between(coef_betweenSEXP); 272 | Rcpp::traits::input_parameter< const arma::vec& >::type block_membership(block_membershipSEXP); 273 | Rcpp::traits::input_parameter< bool >::type directed(directedSEXP); 274 | rcpp_result_gen = Rcpp::wrap(simulate_between_network(numOfVertices, list_feature_adjmat, coef_between, block_membership, directed)); 275 | return rcpp_result_gen; 276 | END_RCPP 277 | } 278 | 279 | static const R_CallMethodDef CallEntries[] = { 280 | {"_lighthergm_eigenvectors_sparse", (DL_FUNC) &_lighthergm_eigenvectors_sparse, 2}, 281 | {"_lighthergm_compute_yule_coef", (DL_FUNC) &_lighthergm_compute_yule_coef, 2}, 282 | {"_lighthergm_get_sparse_feature_adjmat", (DL_FUNC) &_lighthergm_get_sparse_feature_adjmat, 1}, 283 | {"_lighthergm_get_sparse_feature_adjmat_from_string", (DL_FUNC) &_lighthergm_get_sparse_feature_adjmat_from_string, 1}, 284 | {"_lighthergm_get_matrix_for_denominator", (DL_FUNC) &_lighthergm_get_matrix_for_denominator, 2}, 285 | {"_lighthergm_get_elementwise_multiplied_matrices", (DL_FUNC) &_lighthergm_get_elementwise_multiplied_matrices, 2}, 286 | {"_lighthergm_decimal_to_binary_vector", (DL_FUNC) &_lighthergm_decimal_to_binary_vector, 2}, 287 | {"_lighthergm_compute_sumTaus", (DL_FUNC) &_lighthergm_compute_sumTaus, 4}, 288 | {"_lighthergm_compute_quadratic_term_naive", (DL_FUNC) &_lighthergm_compute_quadratic_term_naive, 5}, 289 | {"_lighthergm_compute_linear_term", (DL_FUNC) &_lighthergm_compute_linear_term, 5}, 290 | {"_lighthergm_compute_pi", (DL_FUNC) &_lighthergm_compute_pi, 4}, 291 | {"_lighthergm_compute_quadratic_term", (DL_FUNC) &_lighthergm_compute_quadratic_term, 7}, 292 | {"_lighthergm_run_EM_without_features", (DL_FUNC) &_lighthergm_run_EM_without_features, 6}, 293 | {"_lighthergm_compute_denominator_for_pi_d1x0", (DL_FUNC) &_lighthergm_compute_denominator_for_pi_d1x0, 5}, 294 | {"_lighthergm_compute_pi_d1x0", (DL_FUNC) &_lighthergm_compute_pi_d1x0, 5}, 295 | {"_lighthergm_compute_quadratic_term_with_features", (DL_FUNC) &_lighthergm_compute_quadratic_term_with_features, 6}, 296 | {"_lighthergm_compute_pi_with_features", (DL_FUNC) &_lighthergm_compute_pi_with_features, 4}, 297 | {"_lighthergm_run_EM_with_features", (DL_FUNC) &_lighthergm_run_EM_with_features, 6}, 298 | {"_lighthergm_simulate_between_network", (DL_FUNC) &_lighthergm_simulate_between_network, 5}, 299 | {NULL, NULL, 0} 300 | }; 301 | 302 | RcppExport void R_init_lighthergm(DllInfo *dll) { 303 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 304 | R_useDynamicSymbols(dll, FALSE); 305 | } 306 | --------------------------------------------------------------------------------