├── .DS_Store ├── .Rhistory ├── Archived ├── expectation_CM.R ├── initialization.R ├── multilayer_extraction.R └── score.R ├── DESCRIPTION ├── MultilayerExtraction.Rproj ├── NAMESPACE ├── R ├── .DS_Store ├── AU_CS.R ├── adjacency_to_edgelist.R ├── expectation_CM.R ├── initialization.R ├── modularity_matrix.R ├── multilayer_extraction.R ├── plot.R ├── refine.R └── score.R ├── README.html ├── README.md ├── data-raw ├── AU_CS.R ├── AU_CS.rda ├── EU_Airport.R ├── EU_Airport.rda ├── arXiv.R └── arXiv.rda ├── data ├── AU_CS.rda ├── EU_Airport.rda └── arXiv.rda └── man ├── AU_CS.Rd ├── adjacency.to.edgelist.Rd ├── expected.CM.Rd ├── initialization.Rd ├── multilayer.extraction.Rd ├── multilayer.modularity.matrix.Rd ├── plot.MultilayerCommunity.Rd ├── refine.Rd └── score.Rd /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/.DS_Store -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 2 | Mean.Score[i] = temp$Mean.Score 3 | Number.Communities[i] = length(temp$Communities) 4 | } 5 | if(length(Results2) > 0){ 6 | betas = seq(0.01, 1, by = 0.01) 7 | Results3 = list() 8 | Number.Communities = rep(0,length(betas)) 9 | Mean.Score = rep(0,length(betas)) 10 | for(i in 1:length(betas)){ 11 | temp = cleanup(Results2, betas[i]) 12 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 13 | Mean.Score[i] = temp$Mean.Score 14 | Number.Communities[i] = length(temp$Communities) 15 | } 16 | } 17 | if(length(Results2) > 0){ 18 | betas = seq(0.01, 1, by = 0.01) 19 | Results3 = list() 20 | Number.Communities = rep(0,length(betas)) 21 | Mean.Score = rep(0,length(betas)) 22 | for(i in 1:length(betas)){ 23 | temp = cleanup(Results2, betas[i]) 24 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 25 | Mean.Score[i] = temp$Mean.Score 26 | Number.Communities[i] = length(temp$Communities) 27 | } 28 | } 29 | temp 30 | length(Results2) 31 | betas = seq(0.01, 1, by = 0.01) 32 | Results3 = list() 33 | Number.Communities = rep(0,length(betas)) 34 | Number.Communities = rep(0, length(betas)) 35 | Number.Communities 36 | Mean.Score = rep(0, length(betas)) 37 | temp 38 | for(i in 1:length(betas)){ 39 | temp = cleanup(Results2, betas[i]) 40 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 41 | Mean.Score[i] = temp$Score 42 | Number.Communities[i] = length(temp$Communities) 43 | } 44 | i 45 | temp$Score 46 | temp 47 | length(betas) 48 | temp = cleanup(Results2, betas[i]) 49 | temp 50 | cleanup = function(Results, beta){ 51 | k = length(Results) 52 | if(k < 2){ 53 | Results <- Results[[1]] 54 | Results$Mean.Score <- Results$Score 55 | return(Results) 56 | } 57 | indx.rm = numeric() 58 | if(k > 1){ 59 | for(i in 1:(k-1)){ 60 | for(j in (i+1):k){ 61 | match = length(intersect(Results[[i]]$B, Results[[j]]$B))*length(intersect(Results[[i]]$I, Results[[j]]$I))/(min(length(Results[[i]]$B)*length(Results[[i]]$I),length(Results[[j]]$B)*length(Results[[j]]$I))) 62 | pot = c(i,j) 63 | indx = pot[which.min(c(Results[[i]]$Score, Results[[j]]$Score))] 64 | if(match > beta-0.00001){indx.rm = c(indx.rm,indx)} 65 | } 66 | } 67 | } 68 | cleanup = function(Results, beta){ 69 | k = length(Results) 70 | if(k < 2){ 71 | Results <- Results[[1]] 72 | Results$Mean.Score <- Results$Score 73 | return(Results) 74 | } 75 | indx.rm = numeric() 76 | if(k > 1){ 77 | for(i in 1:(k-1)){ 78 | for(j in (i+1):k){ 79 | match = length(intersect(Results[[i]]$B, Results[[j]]$B))*length(intersect(Results[[i]]$I, Results[[j]]$I))/(min(length(Results[[i]]$B)*length(Results[[i]]$I),length(Results[[j]]$B)*length(Results[[j]]$I))) 80 | pot = c(i,j) 81 | indx = pot[which.min(c(Results[[i]]$Score, Results[[j]]$Score))] 82 | if(match > beta-0.00001){indx.rm = c(indx.rm,indx)} 83 | } 84 | } 85 | } 86 | Results[indx.rm] = NULL 87 | temp = 0 88 | for(i in 1:length(Results)){ 89 | temp = temp + Results[[i]]$Score 90 | } 91 | Mean.score = temp/length(Results) 92 | return(list(Communities = Results, Mean.Score = Mean.score)) 93 | } 94 | if(length(Results.temp) < 1){return("No Community Found")} 95 | Scores = rep(0, length(Results.temp)) 96 | for(i in 1:length(Results.temp)){ 97 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 98 | if(length(Results.temp[[i]]$B) > 0){ 99 | Scores[i] = Results.temp[[i]]$Score 100 | } 101 | } 102 | Scores = round(Scores, 2) 103 | #keep only unique communities with score greater than threshold 104 | indx = which(!duplicated(Scores) == TRUE) 105 | indx.2 = which(Scores > min.score) 106 | Results2 = Results.temp[intersect(indx, indx.2)] 107 | if(length(Results2) == 0){ 108 | Results = NULL 109 | return(Object = NULL) 110 | } 111 | if(length(Results2) > 0){ 112 | betas = seq(0.01, 1, by = 0.01) 113 | Results3 = list() 114 | Number.Communities = rep(0, length(betas)) 115 | Mean.Score = rep(0, length(betas)) 116 | for(i in 1:length(betas)){ 117 | temp = cleanup(Results2, betas[i]) 118 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 119 | Mean.Score[i] = temp$Mean.Score 120 | Number.Communities[i] = length(temp$Communities) 121 | } 122 | } 123 | Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) 124 | Object = list(Community.List = Results3, Diagnostics = Z) 125 | Z 126 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 127 | plot(community.object, main = "Diagnostic Plot AU_CS") 128 | cleanup = function(Results, beta){ 129 | k = length(Results) 130 | if(k < 2){ 131 | Results <- Results[[1]] 132 | Results$Mean.Score <- Results$Score 133 | Results$Number.Communities <- 1 134 | return(Results) 135 | } 136 | indx.rm = numeric() 137 | if(k > 1){ 138 | for(i in 1:(k-1)){ 139 | for(j in (i+1):k){ 140 | match = length(intersect(Results[[i]]$B, Results[[j]]$B))*length(intersect(Results[[i]]$I, Results[[j]]$I))/(min(length(Results[[i]]$B)*length(Results[[i]]$I),length(Results[[j]]$B)*length(Results[[j]]$I))) 141 | pot = c(i,j) 142 | indx = pot[which.min(c(Results[[i]]$Score, Results[[j]]$Score))] 143 | if(match > beta-0.00001){indx.rm = c(indx.rm,indx)} 144 | } 145 | } 146 | } 147 | Results[indx.rm] = NULL 148 | temp = 0 149 | for(i in 1:length(Results)){ 150 | temp = temp + Results[[i]]$Score 151 | } 152 | Mean.score = temp/length(Results) 153 | return(list(Communities = Results, Mean.Score = Mean.score)) 154 | } 155 | object <- refine(community.object, k = 6, m = m, n = n) 156 | object <- refine(community.object, k = 1, m = m, n = n) 157 | vertex.set <- 1:10 158 | layer.set <- 1:2 159 | n = 61 160 | if(length(layer.set) < 1 || length(vertex.set) < 1){ 161 | return(obs.score = 0) 162 | } 163 | if(length(layer.set) == 1){ 164 | super.mod <- mod.matrix[[layer.set]] #just a single igraph object 165 | } 166 | if(length(layer.set) > 1){ 167 | #merge the modularity graphs 168 | super.mod <- graph.empty(n = n, directed = FALSE) 169 | for(j in layer.set){ 170 | super.mod <- union(super.mod, mod.matrix[[j]]) #take union of all networks in the layer.set 171 | } 172 | } 173 | super.mod 174 | super.mod.subgraph <- induced_subgraph(super.mod, v = vertex.set) 175 | # 176 | edge.weights <- as.data.frame(get.edge.attribute(super.mod.subgraph)) 177 | edge.weights[is.na(edge.weights)] <- 0 178 | edge.weights 179 | modularity.score <- colSums(edge.weights) 180 | modularity.score 181 | modularity.score <- rowSums(edge.weights) 182 | modularity.score 183 | modularity.score[which(modularity.score < 0)] <- 0 184 | modularity.score 185 | tot.mod <- sum(modularity.score) 186 | obs.score <- (tot.mod)^2 / (n^2*choose(length(vertex.set), 2)*(length(layer.set))) 187 | obs.score 188 | choose(length(vertex.set), 2) 189 | source("score_temp.R") 190 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 191 | score = function(mod.matrix, vertex.set, layer.set, n){ 192 | if(length(layer.set) < 1 || length(vertex.set) < 1){ 193 | return(obs.score = 0) 194 | } 195 | if(length(layer.set) == 1){ 196 | super.mod <- mod.matrix[[layer.set]] #just a single igraph object 197 | } 198 | if(length(layer.set) > 1){ 199 | #merge the modularity graphs 200 | super.mod <- graph.empty(n = n, directed = FALSE) 201 | for(j in layer.set){ 202 | super.mod <- union(super.mod, mod.matrix[[j]]) #take union of all networks in the layer.set 203 | } 204 | } 205 | #take sub-graph of the modularity matrix 206 | super.mod.subgraph <- induced_subgraph(super.mod, v = vertex.set) 207 | #get the edge weights all together 208 | edge.weights <- as.data.frame(get.edge.attribute(super.mod.subgraph)) 209 | edge.weights[is.na(edge.weights)] <- 0 210 | modularity.score <- rowSums(edge.weights) #sum across vertices first 211 | modularity.score[which(modularity.score < 0)] <- 0 #only keep positive values 212 | tot.mod <- sum(modularity.score) 213 | obs.score <- (tot.mod)^2 / (n^2*choose(length(vertex.set), 2)*(length(layer.set))) 214 | return(obs.score) 215 | } 216 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 217 | plot(community.object) 218 | dev.new() 219 | plot(community.object) 220 | mod.matrix <- modularity.matrix(adjacency) 221 | mod.matrix 222 | for(i in 1:m){ 223 | if(i == 1){ 224 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 225 | directed = directed) 226 | initial.set <- initialization(graph, prop.sample, m, n) 227 | }else{ 228 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 229 | directed = directed) 230 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 231 | } 232 | } 233 | print(paste("Search Stage")) 234 | cat(paste("Searching over", length(initial.set[[1]]), "seed sets \n")) 235 | Results.temp <- list() 236 | K <- length(initial.set[[1]]) 237 | #detectCores detects the number of cores available on your instance 238 | registerDoParallel(detectCores()) 239 | Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% { 240 | starter <- list() 241 | starter$vertex.set <- as.numeric(initial.set$vertex.set[[i]]) 242 | starter$layer.set <- as.numeric(initial.set$layer.set[[i]]) 243 | single.swap(starter, adjacency, mod.matrix, m, n) 244 | } 245 | Results.temp 246 | if(length(Results.temp) < 1){return("No Community Found")} 247 | Scores = rep(0, length(Results.temp)) 248 | Scores 249 | for(i in 1:length(Results.temp)){ 250 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 251 | if(length(Results.temp[[i]]$B) > 0){ 252 | Scores[i] = Results.temp[[i]]$Score 253 | } 254 | } 255 | Scores 256 | Scores = round(Scores, 2) 257 | Scores 258 | indx = which(!duplicated(Scores) == TRUE) 259 | indx 260 | for(i in 1:length(Results.temp)){ 261 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 262 | if(length(Results.temp[[i]]$B) > 0){ 263 | Scores[i] = Results.temp[[i]]$Score 264 | } 265 | } 266 | Scores = round(Scores, 5) 267 | Scores 268 | indx = which(!duplicated(Scores) == TRUE) 269 | indx 270 | indx.2 = which(Scores > min.score) 271 | indx.2 272 | Results2 = Results.temp[intersect(indx, indx.2)] 273 | Results2 274 | if(length(Results2) == 0){ 275 | Results = NULL 276 | return(Object = NULL) 277 | } 278 | if(length(Results2) > 0){ 279 | betas = seq(0.01, 1, by = 0.01) 280 | Results3 = list() 281 | Number.Communities = rep(0, length(betas)) 282 | Mean.Score = rep(0, length(betas)) 283 | for(i in 1:length(betas)){ 284 | temp = cleanup(Results2, betas[i]) 285 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 286 | Mean.Score[i] = temp$Mean.Score 287 | Number.Communities[i] = length(temp$Communities) 288 | } 289 | } 290 | Mean.Score 291 | Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) 292 | Object = list(Community.List = Results3, Diagnostics = Z) 293 | Z 294 | multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.sample = 0.05, directed = c(FALSE, TRUE)){ 295 | #adjacency should be an edgelist with three columns - node1, node2, layer 296 | #layer should be numbered with integers 297 | #each network has the same number of nodes 298 | #nodes are indexed by the integers starting from 1 299 | m <- max(adjacency[, 3]) #max of the layer index 300 | n <- length(unique(c(adjacency[, 1], adjacency[, 2]))) 301 | directed <- directed[1] 302 | #Calculate the modularity matrix 303 | print(paste("Estimation Stage")) 304 | mod.matrix <- modularity.matrix(adjacency) 305 | #Initialize the communities #TODO 306 | print(paste("Initialization Stage")) 307 | for(i in 1:m){ 308 | if(i == 1){ 309 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 310 | directed = directed) 311 | initial.set <- initialization(graph, prop.sample, m, n) 312 | }else{ 313 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 314 | directed = directed) 315 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 316 | } 317 | } 318 | #Search Across Initial sets 319 | print(paste("Search Stage")) 320 | cat(paste("Searching over", length(initial.set[[1]]), "seed sets \n")) 321 | Results.temp <- list() 322 | K <- length(initial.set[[1]]) 323 | #detectCores detects the number of cores available on your instance 324 | registerDoParallel(detectCores()) 325 | Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% { 326 | starter <- list() 327 | starter$vertex.set <- as.numeric(initial.set$vertex.set[[i]]) 328 | starter$layer.set <- as.numeric(initial.set$layer.set[[i]]) 329 | single.swap(starter, adjacency, mod.matrix, m, n) 330 | } 331 | #Cleanup the results: Keep the unique communities 332 | print(paste("Cleaning Stage")) 333 | if(length(Results.temp) < 1){return("No Community Found")} 334 | Scores = rep(0, length(Results.temp)) 335 | for(i in 1:length(Results.temp)){ 336 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 337 | if(length(Results.temp[[i]]$B) > 0){ 338 | Scores[i] = Results.temp[[i]]$Score 339 | } 340 | } 341 | Scores = round(Scores, 5) 342 | #keep only unique communities with score greater than threshold 343 | indx = which(!duplicated(Scores) == TRUE) 344 | indx.2 = which(Scores > min.score) 345 | Results2 = Results.temp[intersect(indx, indx.2)] 346 | if(length(Results2) == 0){ 347 | Results = NULL 348 | return(Object = NULL) 349 | } 350 | if(length(Results2) > 0){ 351 | betas = seq(0.01, 1, by = 0.01) 352 | Results3 = list() 353 | Number.Communities = rep(0, length(betas)) 354 | Mean.Score = rep(0, length(betas)) 355 | for(i in 1:length(betas)){ 356 | temp = cleanup(Results2, betas[i]) 357 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 358 | Mean.Score[i] = temp$Mean.Score 359 | Number.Communities[i] = length(temp$Communities) 360 | } 361 | } 362 | Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) 363 | Object = list(Community.List = Results3, Diagnostics = Z) 364 | class(Object) = "MultilayerCommunity" 365 | return(Object) 366 | } 367 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 368 | dev.new() 369 | plot(community.object) 370 | object <- refine(community.object, k = 6, m = m, n = n) 371 | num.layers <- colSums(object$Layers > 0) 372 | num.vertices <- colSums(object$Vertices) 373 | print(data.frame(Layers = num.layers, Vertices = num.vertices)) 374 | indx1 <- which(object$Vertices[,1] == 1) 375 | indx2 <- setdiff(which(object$Vertices[,2] == 1), indx1) 376 | indx3 <- setdiff(setdiff(which(object$Vertices[,3] == 1), indx1), indx2) 377 | indx4 <- setdiff(setdiff(setdiff(which(object$Vertices[,4] == 1), indx3), indx2),indx1) 378 | indx5 <- setdiff(setdiff(setdiff(setdiff(which(object$Vertices[,5] == 1), indx4), indx3),indx2), indx1) 379 | indx6 <- setdiff(setdiff(setdiff(setdiff(setdiff(which(object$Vertices[,6] == 1), 380 | indx1), indx2), indx3), indx4), indx5) 381 | none2 <- setdiff(1:61, union(union(union(indx1, indx2), union(indx3, indx4)), union(indx5, indx6))) 382 | image(object$Layers, main = "Layers in Communities") 383 | image(object$Layers, main = "Layers in Communities") 384 | re.order <- c(indx1, indx2, indx3, indx4, indx5, indx6, none2) 385 | image(AU_CS$coauthor[re.order,re.order], main = "Co-Author") 386 | image(AU_CS$leisure[re.order,re.order], main = "Leisure") 387 | image(AU_CS$work[re.order,re.order], main = "Work") 388 | image(AU_CS$lunch[re.order,re.order], main = "Lunch") 389 | image(AU_CS$facebook[re.order,re.order], main = "Facebook") 390 | dev.new() 391 | image(AU_CS$coauthor[re.order,re.order], main = "Co-Author") 392 | image(AU_CS$leisure[re.order,re.order], main = "Leisure") 393 | image(AU_CS$work[re.order,re.order], main = "Work") 394 | image(AU_CS$lunch[re.order,re.order], main = "Lunch") 395 | image(AU_CS$facebook[re.order,re.order], main = "Facebook") 396 | rm(list = ls()) 397 | install.packages("devtools") 398 | library(devtools, quietly = TRUE) 399 | #install and load MultilayerExtraction 400 | devtools::install_github('jdwilson4/multilayer_extraction') 401 | library(MultilayerExtraction, quietly = TRUE) 402 | library(Matrix, quietly = TRUE) 403 | library(igraph, quietly = TRUE) 404 | library(foreach, quietly = TRUE) 405 | library(doParallel, quietly = TRUE) 406 | library(parallel, quietly = TRUE) 407 | data("AU_CS") 408 | #number of nodes and layers 409 | n <- dim(AU_CS[[1]])[1] 410 | m <- length(AU_CS) 411 | #types of relationships among attendees 412 | relationship.names <- names(AU_CS) 413 | print(relationship.names) 414 | image(AU_CS$coauthor, main = "Co-author") 415 | image(AU_CS$facebook, main = "Leisure") 416 | image(AU_CS$leisure, main = "Facebook") 417 | Mean.Degree <- sapply(AU_CS, mean) * n 418 | print(Mean.Degree) 419 | network <- adjacency.to.edgelist(AU_CS) 420 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 421 | #plot the number of communities across overlap parameter beta 422 | plot(community.object, main = "Diagnostic Plot AU_CS") 423 | rm(list = ls()) 424 | devtools::install_github('jdwilson4/multilayer_extraction') 425 | library(MultilayerExtraction, quietly = TRUE) 426 | ?modularity_matrix 427 | ?adjacency_to_edgelist 428 | data("AU_CS") 429 | #number of nodes and layers 430 | n <- dim(AU_CS[[1]])[1] 431 | m <- length(AU_CS) 432 | network <- adjacency.to.edgelist(AU_CS) 433 | setwd("/Users/jdwilson4/Dropbox/Github/MultilayerExtraction") 434 | document() 435 | devtools::install_github('jdwilson4/multilayer_extraction') 436 | library(MultilayerExtraction, quietly = TRUE) 437 | devtools::install_github('jdwilson4/multilayer_extraction') 438 | devtools::install_github('jdwilson4/multilayer_extraction') 439 | document() 440 | document() 441 | devtools::install_github('jdwilson4/multilayer_extraction') 442 | devtools::install_github('jdwilson4/multilayer_extraction') 443 | document 444 | document() 445 | document() 446 | devtools::install_github('jdwilson4/multilayer_extraction') 447 | library(MultilayerExtraction, quietly = TRUE) 448 | network <- adjacency.to.edgelist(AU_CS) 449 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 450 | network 451 | multilayer.modularity.matrix() 452 | ?multilayer.modularity.matrix 453 | rm(list = ls()) 454 | devtools::install_github('jdwilson4/multilayer_extraction') 455 | library(MultilayerExtraction, quietly = TRUE) 456 | library(Matrix, quietly = TRUE) 457 | library(igraph, quietly = TRUE) 458 | library(foreach, quietly = TRUE) 459 | library(doParallel, quietly = TRUE) 460 | library(parallel, quietly = TRUE) 461 | data("AU_CS") 462 | #number of nodes and layers 463 | n <- dim(AU_CS[[1]])[1] 464 | m <- length(AU_CS) 465 | network <- adjacency.to.edgelist(AU_CS) 466 | ?multilayer.extraction 467 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 468 | class(network) 469 | adjacency = network 470 | m 471 | n 472 | for(i in 1:m){ 473 | if(i == 1){ 474 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 475 | directed = directed) 476 | initial.set <- initialization(graph, prop.sample, m, n) 477 | }else{ 478 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 479 | directed = directed) 480 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 481 | } 482 | } 483 | directed = FALSE 484 | for(i in 1:m){ 485 | if(i == 1){ 486 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 487 | directed = directed) 488 | initial.set <- initialization(graph, prop.sample, m, n) 489 | }else{ 490 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 491 | directed = directed) 492 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 493 | } 494 | } 495 | prop.sample = 0.01 496 | for(i in 1:m){ 497 | if(i == 1){ 498 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 499 | directed = directed) 500 | initial.set <- initialization(graph, prop.sample, m, n) 501 | }else{ 502 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 503 | directed = directed) 504 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 505 | } 506 | } 507 | mod.matrix <- multilayer.modularity.matrix(adjacency) 508 | class(mod.matrix) 509 | mod.matrix 510 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 511 | library(MultilayerExtraction) 512 | ?multilayer.extraction 513 | -------------------------------------------------------------------------------- /Archived/expectation_CM.R: -------------------------------------------------------------------------------- 1 | #' expected.CM 2 | #' 3 | #' Function that calculates the expected edge weight between each pair of nodes in each layer of a multilayer network. 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' 6 | #' @keywords community detection, multilayer networks, configuration model, random graph models 7 | #' @return 8 | #' \itemize{ 9 | #' \item P: a list object whose tth entry is the expected adjacency matrix of the tth layer 10 | #' } 11 | #'@references 12 | #'\itemize{ 13 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 14 | #' extraction in multilayer networks with heterogeneous community structure." 15 | #' } 16 | #' @author James D. Wilson 17 | #' @export 18 | 19 | expected.CM <- function(adjacency){ 20 | #check that adjacency is a list object 21 | if(class(adjacency) != "list"){ 22 | adjacency <- list(adjacency) 23 | } 24 | m <- length(adjacency) #number of layers 25 | P <- list() 26 | for(i in 1:m){ 27 | degrees <- matrix(rowSums(as.matrix(adjacency[[i]])), ncol = 1) 28 | d.tot <- sum(degrees) 29 | expected <- degrees%*%t(degrees) / d.tot #expected under configuration model 30 | P[[i]] <- expected 31 | } 32 | return(P) 33 | } -------------------------------------------------------------------------------- /Archived/initialization.R: -------------------------------------------------------------------------------- 1 | #' initialization 2 | #' 3 | #' Function that generates randomly selected neighborhood vertex-layer sets to begin the 4 | #' multilayer.extraction algorithm. 5 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 6 | #' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 7 | #' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. 8 | #' 9 | #' @keywords community detection, multilayer networks, configuration model, random graph models 10 | #' @return 11 | #' \itemize{ 12 | #' \item neighborhoods: a list object of length prop.sample * n, where each entry contains 13 | #' a vertex set and layer set from which multilayer.extraction can be run. 14 | #' } 15 | #' @details A neighborhood of vertex u is defined as the collection of vertices that 16 | #' have higher than the mean connectivity of vertex u, when aggregated across layers. The 17 | #' chosen layer set is a random sample of size m/2. 18 | #'@references 19 | #'\itemize{ 20 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 21 | #' extraction in multilayer networks with heterogeneous community structure." 22 | #' } 23 | #' @author James D. Wilson 24 | #' @export 25 | 26 | initialization = function(adjacency, prop.sample){ 27 | 28 | if(class(adjacency) != "list"){ 29 | adjacency <- list(adjacency) 30 | } 31 | 32 | m <- length(adjacency) #total number of layers 33 | n <- dim(adjacency[[1]])[1] #total number of vertices 34 | 35 | if(m == 1){ 36 | adj.sum <- adjacency[[1]] 37 | } 38 | 39 | if(m > 1){ 40 | adj.sum = Reduce("+", adjacency[1:m]) 41 | } 42 | 43 | #mean.connection = mean(as.matrix(adj.sum)) 44 | 45 | thresh = function(x, m){ 46 | #random choice of layer set 47 | layer.set = sample(1:m, ceiling(m/2)) 48 | median.connection <- median(x) #mean connection of the given vertex 49 | #vertex set is for those with higher than median connection 50 | vertex.set <- which(x > median.connection) 51 | return(list(vertex.set = vertex.set, layer.set = layer.set)) 52 | } 53 | 54 | neighborhoods <- apply(adj.sum, 1, thresh, m) 55 | keep.sample <- sample(1:n, ceiling(prop.sample*n)) #keep a random sample of the neighborhoods 56 | neighborhoods <- neighborhoods[keep.sample] 57 | return(neighborhoods) 58 | } -------------------------------------------------------------------------------- /Archived/multilayer_extraction.R: -------------------------------------------------------------------------------- 1 | #' multilayer.extraction 2 | #' 3 | #'Function that identifies statistically significant vertex-layer communities in multilayer networks. 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' @param seed: seed for reproducibility. The initial neighborhoods that act as seeds for the multilayer extraction algorithm 6 | #' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123. 7 | #' @param min.score: the minimum score allowable for an extracted community. Default is 0. 8 | #' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 9 | #' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. 10 | #' Default is 0.05. 11 | #' @keywords community detection, multilayer networks, configuration model, random graph models 12 | #' @return A MultilayerCommunity object, which is a list containing the following objects 13 | #' \itemize{ 14 | #' \item Community.List: a list of vertex-layer communities extracted from the algorithm 15 | #' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of 16 | #' each community, and includes for each level of overlap parameter Beta the mean score, and the total number 17 | #' of communities. This is used for determining the overall number of communities in a multilayer network. 18 | #' } 19 | #'@references 20 | #'\itemize{ 21 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 22 | #' extraction in multilayer networks with heterogeneous community structure." 23 | #' } 24 | #' @author James D. Wilson 25 | #' @export 26 | #' 27 | #' 28 | multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.sample = 0.05){ 29 | #check to see if adjacency is a list object 30 | if(class(adjacency) != "list"){ 31 | adjacency <- list(adjacency) 32 | } 33 | m <- length(adjacency) 34 | n <- dim(adjacency[[1]])[1] 35 | 36 | #Estimate the multilayer configuration model 37 | print(paste("Estimation Stage")) 38 | 39 | expected <- expected.CM(adjacency) 40 | 41 | #Initialize the communities 42 | print(paste("Initialization Stage")) 43 | 44 | initial.set = initialization(adjacency, prop.sample) 45 | 46 | #Search Across Initial sets 47 | print(paste("Search Stage")) 48 | 49 | cat(paste("Searching over", length(initial.set), "seed sets \n")) 50 | Results.temp <- list() 51 | K <- length(initial.set) 52 | #note: we can parallelize this part of the search! 53 | registerDoParallel(detectCores()) ###### detectCores will automatically place the number of cores your computer has. 54 | 55 | Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% { 56 | single.swap(initial.set[[i]], adjacency, expected)} 57 | 58 | #Cleanup the results: Keep the unique communities 59 | print(paste("Cleaning Stage")) 60 | 61 | if(length(Results.temp) < 1){return("No Community Found")} 62 | 63 | Scores = rep(0, length(Results.temp)) 64 | 65 | for(i in 1:length(Results.temp)){ 66 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 67 | if(length(Results.temp[[i]]$B) > 0){ 68 | Scores[i] = Results.temp[[i]]$Score 69 | } 70 | } 71 | 72 | Scores = round(Scores, 2) 73 | #keep only unique communities with score greater than threshold 74 | indx = which(!duplicated(Scores) == TRUE) 75 | indx.2 = which(Scores > min.score) 76 | Results2 = Results.temp[intersect(indx, indx.2)] 77 | if(length(Results2) == 0){ 78 | Results = NULL 79 | return(Object = NULL) 80 | } 81 | if(length(Results2) > 0){ 82 | betas = seq(0.01, 1, by = 0.01) 83 | Results3 = list() 84 | Number.Communities = rep(0,length(betas)) 85 | Mean.Score = rep(0,length(betas)) 86 | for(i in 1:length(betas)){ 87 | temp = cleanup(Results2, betas[i]) 88 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 89 | Mean.Score[i] = temp$Mean.Score 90 | Number.Communities[i] = length(temp$Communities) 91 | } 92 | } 93 | 94 | Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) 95 | Object = list(Community.List = Results3, Diagnostics = Z) 96 | class(Object) = "MultilayerCommunity" 97 | return(Object) 98 | } 99 | 100 | 101 | ####################################################################### 102 | ##Swapping functions## 103 | ####Function for determining which vertex/layer should be swapped 104 | swap.candidate = function(set, changes, add, remove, score.old){ 105 | #If there are only some to be added 106 | if(length(remove) == 0 & length(add) > 0){ 107 | if(is.na(add) == FALSE){ 108 | if(changes[add] > 0){ 109 | set.new <- union(set, add) 110 | score.old <- score.old + changes[add] 111 | } 112 | if(changes[add] < 0){ 113 | set.new <- set 114 | return(list(set.new = set.new, score.old = score.old)) 115 | } 116 | } 117 | } 118 | 119 | #If there are only some to removed 120 | if(length(add) == 0 & length(remove) > 0){ 121 | if(is.na(remove) == FALSE){ 122 | if(changes[remove] > 0){ 123 | set.new <- setdiff(set,remove) 124 | score.old <- score.old + changes[remove] 125 | return(list(set.new = set.new, score.old = score.old)) 126 | } 127 | if(changes[remove] < 0){ 128 | set.new <- set 129 | return(list(set.new = set.new, score.old = score.old)) 130 | } 131 | } 132 | } 133 | 134 | #If there are some to removed and some to be added 135 | if(length(add) > 0 & length(remove) > 0){ 136 | if(changes[remove] < 0 & changes[add] < 0){ 137 | set.new <- set 138 | return(list(set.new = set.new, score.old = score.old)) 139 | } 140 | if(changes[remove] > changes[add] & changes[remove] > 0){ 141 | set.new <- setdiff(set,remove) 142 | score.old <- score.old + changes[remove] 143 | return(list(set.new = set.new, score.old = score.old)) 144 | } 145 | if(changes[remove] < changes[add] & changes[add] > 0){ 146 | set.new <- union(set,add) 147 | score.old <- score.old + changes[add] 148 | return(list(set.new = set.new, score.old = score.old)) 149 | } 150 | } 151 | 152 | #if there are none to be added nor removed 153 | if(length(add) == 0 & length(remove) == 0){ 154 | return(list(set.new = set, score.old = score.old)) 155 | } 156 | } 157 | 158 | 159 | ######Choosing which layer to swap one at a time###### 160 | swap.layer = function(adjacency, expected, layer.set, vertex.set, score.old){ 161 | 162 | if(class(adjacency) != "list"){ 163 | adjacency <- list(adjacency) 164 | } 165 | if(class(expected) != "list"){ 166 | expected <- list(expected) 167 | } 168 | 169 | m <- length(adjacency) 170 | n <- dim(adjacency[[1]])[1] 171 | 172 | 173 | if(length(layer.set) == 0){ 174 | print('No Community Found') 175 | return(NULL) 176 | } 177 | 178 | changes <- layer.change(adjacency, expected, layer.set, vertex.set, score.old) 179 | changes[which(is.null(changes) == TRUE)] <- 0 180 | changes[which(is.na(changes) == TRUE)] <- 0 181 | 182 | outside.candidate <- which.max(changes[setdiff(1:m, layer.set)]) #which layer should we add? 183 | l.add <- setdiff(1:m, layer.set)[outside.candidate] 184 | 185 | inside.candidate <- which.max(changes[layer.set]) #which layer should we remove? 186 | l.sub <- layer.set[inside.candidate] 187 | 188 | #Make the swap 189 | results <- swap.candidate(layer.set, changes, l.add, l.sub, score.old) 190 | layer.set.new <- results$set.new 191 | score.old <- results$score.old 192 | 193 | return(list(layer.set.new = layer.set.new, score.old = score.old)) 194 | } 195 | 196 | ####################################################################### 197 | ######Choosing which vertex to swap one at a time###### 198 | swap.vertex = function(adjacency, expected, layer.set, vertex.set, score.old){ 199 | 200 | if(class(adjacency) != "list"){ 201 | adjacency <- list(adjacency) 202 | } 203 | if(class(expected) != "list"){ 204 | expected <- list(expected) 205 | } 206 | 207 | m <- length(adjacency) 208 | n <- dim(adjacency[[1]])[1] 209 | 210 | if(length(layer.set) == 0){ 211 | print('No Community Found') 212 | return(NULL) 213 | } 214 | 215 | #swap decision 216 | if(length(vertex.set) < 5){ 217 | print('No Community Found') 218 | return(NULL) 219 | } 220 | 221 | if(length(vertex.set) == n){ 222 | print('No Community Found') 223 | return(NULL) 224 | } 225 | changes = vertex.change(adjacency, expected, layer.set, vertex.set, score.old) 226 | 227 | #changes[which(is.null(changes) == TRUE)] <- 0 228 | 229 | #changes[which(is.na(changes) == TRUE)] <- 0 230 | 231 | #Get candidates 232 | outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1] 233 | u.add <- setdiff(1:n, vertex.set)[outside.candidate] 234 | 235 | inside.candidate <- which.max(changes[vertex.set]) 236 | u.sub <- vertex.set[inside.candidate] 237 | 238 | #Make the swap 239 | results <- swap.candidate(vertex.set, changes, u.add, u.sub, score.old) 240 | return(list(B.new = results$set.new, score.old = results$score.old)) 241 | } 242 | ####################################################################### 243 | #Inner function for a single swap inside the function for Multilayer.Extraction 244 | #Note: check the names of initial set 245 | single.swap = function(initial.set, adjacency, expected){ 246 | 247 | m <- length(adjacency) 248 | n <- dim(adjacency[[1]])[1] 249 | 250 | #initialize vertex.set and layer.set 251 | B.new <- initial.set$vertex.set 252 | I.new <- initial.set$layer.set 253 | score.old <- score(adjacency, expected, vertex.set = B.new, layer.set = I.new) 254 | 255 | iterations <- 1 256 | B.fixed <- B.new + 1 257 | I.fixed <- I.new + 1 258 | 259 | #main loop 260 | while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) | 261 | length(intersect(I.fixed, I.new)) < max(length(I.fixed), length(I.new))){ 262 | 263 | if(length(B.new) < 2 | length(I.new) < 1){ 264 | print('No community found') 265 | return(NULL) 266 | } 267 | 268 | #seems redundant, check... 269 | B.fixed <- B.new 270 | I.fixed <- I.new 271 | 272 | B <- B.new 273 | I <- I.new + 1 274 | 275 | #update layer set 276 | if(m > 1){ 277 | while(length(intersect(I.new, I)) < max(length(I.new), length(I))){ 278 | 279 | I <- I.new 280 | results <- swap.layer(adjacency, expected, I, B, score.old) 281 | I.new <- results$layer.set.new 282 | score.old <- results$score.old 283 | } 284 | } 285 | if(m == 1){ 286 | I.new <- 1 287 | } 288 | 289 | #update vertex set 290 | B <- B - 1 291 | B.new <- B + 1 292 | 293 | while(length(intersect(B.new, B)) < max(length(B.new), length(B))){ 294 | B <- B.new 295 | results <- swap.vertex(adjacency, expected, I.new, B, score.old) 296 | 297 | B.new <- results$B.new 298 | score.old <- results$score.old 299 | } 300 | } 301 | return(list(B = sort(B.new), I = sort(I.new), Score = score.old)) 302 | } 303 | 304 | ######Effect on score when adding or subtracting a layer####### 305 | layer.change = function(adjacency, expected, layer.set, vertex.set, score.old){ 306 | 307 | #first check that adjacency and expected are lists 308 | if(class(adjacency) != "list"){ 309 | adjacency <- list(adjacency) 310 | } 311 | 312 | if(class(expected) != "list"){ 313 | expected <- list(expected) 314 | } 315 | 316 | 317 | m <- length(adjacency) 318 | n <- dim(adjacency[[1]])[1] 319 | indx <- setdiff(1:m, layer.set) #which layers are not in the current set 320 | score.changes <- rep(0, m) 321 | 322 | for(i in 1:m){ 323 | if(i %in% indx){ 324 | score.changes[i] <- score(adjacency, expected, vertex.set = 325 | vertex.set, 326 | layer.set = union(layer.set, i)) - score.old 327 | } 328 | if(i %in% indx == FALSE){ 329 | score.changes[i] <- score(adjacency, expected, vertex.set = 330 | vertex.set, 331 | layer.set = setdiff(layer.set, i)) - score.old 332 | } 333 | } 334 | return(score.changes) 335 | } 336 | 337 | ######Effect on score when adding or subtracting a vertex####### 338 | vertex.change = function(adjacency, expected, layer.set, vertex.set, score.old){ 339 | 340 | #first check that adjacency and expected are lists 341 | if(class(adjacency) != "list"){ 342 | adjacency <- list(adjacency) 343 | } 344 | 345 | if(class(expected) != "list"){ 346 | } 347 | n <- dim(adjacency[[1]])[1] 348 | indx <- setdiff(1:n, vertex.set) 349 | score.changes <- rep(0, n) 350 | #the following can also be parallelized! 351 | 352 | for(i in 1:n){ 353 | if(i %in% indx){ 354 | score.changes[i] <- score(adjacency, expected, vertex.set = 355 | union(vertex.set, i), 356 | layer.set = layer.set) - score.old 357 | } 358 | if(i %in% indx == FALSE){ 359 | score.changes[i] <- score(adjacency, expected, vertex.set = 360 | setdiff(vertex.set, i), 361 | layer.set = layer.set) - score.old 362 | } 363 | } 364 | 365 | return(score.changes) 366 | } -------------------------------------------------------------------------------- /Archived/score.R: -------------------------------------------------------------------------------- 1 | #' score 2 | #' 3 | #'Function that calculates the score of a multilayer vertex - layer community 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' @param expected: the expected value of the connectivity in the multilayer network adjacency. This can be calculated using the 6 | #' expected.CM function in this package. 7 | #' @param vertex.set: a numeric specifying the nodes within the multilayer community of interest. 8 | #' @param layer.set: a numeric specifying the layers within the multilayer community of interest. 9 | #' @keywords community detection, multilayer networks, configuration model, random graph models 10 | #' @return 11 | #' \itemize{ 12 | #' \item score: the score of the multilayer community 13 | #' } 14 | 15 | #'@references 16 | #'\itemize{ 17 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 18 | #' extraction in multilayer networks with heterogeneous community structure." 19 | #' } 20 | #' @author James D. Wilson 21 | #' @export 22 | #' 23 | #' 24 | 25 | score = function(adjacency, expected, vertex.set, layer.set){ 26 | 27 | #check that input arguments are appropriately defined 28 | if(class(adjacency) != "list"){ 29 | adjacency <- list(adjacency) 30 | } 31 | 32 | if(class(expected) != "list"){ 33 | expected <- list(expected) 34 | } 35 | if(length(layer.set) < 1 || length(vertex.set) < 1){ 36 | return(obs.score = 0) 37 | } 38 | 39 | if(length(layer.set) == 1){ 40 | adj.sum <- adjacency[[layer.set]] 41 | exp.sum <- expected[[layer.set]] 42 | } 43 | 44 | if(length(layer.set) > 1){ 45 | adj.sum <- Reduce('+', adjacency[layer.set]) 46 | exp.sum <- Reduce('+', expected[layer.set]) 47 | } 48 | 49 | D.B <- matrix(adj.sum[vertex.set, vertex.set], ncol = 1) 50 | P.vec <- matrix(exp.sum[vertex.set, vertex.set], ncol = 1) 51 | 52 | 53 | #calculate the modularity score and set all negative values to 0 54 | modularity.score <- D.B - P.vec 55 | modularity.score[which(modularity.score < 0)] <- 0 56 | 57 | #calculate the score of the community 58 | tot.mod <- sum(modularity.score) 59 | obs.score <- 2*(tot.mod)^2 / (choose(length(vertex.set), 2)*(length(layer.set))) 60 | 61 | return(obs.score) 62 | } -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: MultilayerExtraction 2 | Title: What the Package Does (one line, title case) 3 | Version: 0.0.0.9000 4 | Author: James D. Wilson [aut, cre] 5 | Maintainer: James D. Wilson 6 | Description: An R package to identify and describe strongly connected vertex-layer communities in multilayer networks. 7 | Depends: 8 | R (>= 3.2.2) 9 | License: GPL (>= 2) 10 | Encoding: UTF-8 11 | LazyData: TRUE 12 | RoxygenNote: 6.0.1 13 | -------------------------------------------------------------------------------- /MultilayerExtraction.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | Encoding: UTF-8 9 | 10 | AutoAppendNewline: Yes 11 | StripTrailingWhitespace: Yes 12 | 13 | BuildType: Package 14 | PackageUseDevtools: Yes 15 | PackageInstallArgs: --no-multiarch --with-keep.source 16 | PackageRoxygenize: rd,collate,namespace 17 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(plot,MultilayerCommunity) 4 | export(adjacency.to.edgelist) 5 | export(expected.CM) 6 | export(initialization) 7 | export(multilayer.extraction) 8 | export(multilayer.modularity.matrix) 9 | export(refine) 10 | export(score) 11 | -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/R/.DS_Store -------------------------------------------------------------------------------- /R/AU_CS.R: -------------------------------------------------------------------------------- 1 | #' dataset: AU_CS 2 | #' 3 | #' Data set that contains the multilayer network describing the multilayer relationships among attendees to an Austrailian computer science conference. 4 | #' In total, there were 61 attendees (represented by the nodes in each layer), and 5 relationships that are represented by the layers in the network. The 5 | #' data is provided in a list format where each entry represents a different layer, and the type of relationship is represented by the name of the list entry. 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data("AU_CS") 10 | #' 11 | #' @format This data set contains a list of adjacency matrices where each entry represents a different social relationship. 12 | #' 13 | #' @keywords datasets 14 | #' 15 | #'@references 16 | #' 17 | #'\itemize{ 18 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 19 | #' extraction in multilayer networks with heterogeneous community structure." 20 | #' } 21 | #' @author James D. Wilson 22 | #' 23 | #' @examples 24 | #' data(AU_CS) 25 | #' image(Matrix(Adjacency$facebook)) #visualize the adjacency matrix that represents the Facebook friendships of the conference attendees. 26 | 27 | "AU_CS" -------------------------------------------------------------------------------- /R/adjacency_to_edgelist.R: -------------------------------------------------------------------------------- 1 | #' adjacency.to.edgelist 2 | #' 3 | #'Function that converts a list of adjacency matrices to an edgelist. 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' @param mode: directed or undirected 6 | #' @param weighted: currently not functioning. Coming in later version. 7 | #' @keywords community detection, multilayer networks, configuration model, random graph models 8 | #' @return edgelist: a matrix with three columns representing edge connections- node1, node2, layer 9 | 10 | #'@references 11 | #'\itemize{ 12 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 13 | #' extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 14 | #' } 15 | #' @author James D. Wilson 16 | #' @export 17 | #' 18 | #' 19 | adjacency.to.edgelist <- function(adjacency, mode = c("undirected", "directed"), weighted = NULL){ 20 | if(class(adjacency) != "list"){ 21 | adjacency <- list(adjacency) 22 | } 23 | mode <- mode[1] 24 | m <- length(adjacency) 25 | edgelist <- c(0, 0, 0) 26 | if(mode == "undirected"){ 27 | directed <- FALSE}else{ 28 | directed <- TRUE 29 | } 30 | 31 | for(i in 1:m){ 32 | #first convert to an igraph object 33 | temp.graph <- graph.adjacency(as.matrix(adjacency[[i]]), mode = mode, weighted = weighted) 34 | 35 | #elminate multiple edges 36 | temp.graph2 <- simplify(temp.graph) 37 | 38 | 39 | #store in an edgelist 40 | edgelist <- rbind(edgelist, cbind(get.edgelist(temp.graph2), i)) 41 | } 42 | edgelist <- edgelist[-1, ] 43 | colnames(edgelist) <- c("node1", "node2", "layer") 44 | return(edgelist) 45 | } -------------------------------------------------------------------------------- /R/expectation_CM.R: -------------------------------------------------------------------------------- 1 | #' expected.CM 2 | #' 3 | #' Function that calculates the expected edge weight between each pair of nodes in each layer of a multilayer network. 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' 6 | #' @keywords community detection, multilayer networks, configuration model, random graph models 7 | #' @return 8 | #' \itemize{ 9 | #' \item P: a list object whose tth entry is the expected adjacency matrix of the tth layer 10 | #' } 11 | #'@references 12 | #'\itemize{ 13 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 14 | #' extraction in multilayer networks with heterogeneous community structure." 15 | #' } 16 | #' @author James D. Wilson 17 | #' @export 18 | 19 | expected.CM <- function(adjacency, directed = FALSE){ 20 | #check that adjacency is a list object 21 | m <- max(adjacency[, 3]) #max of the layer index 22 | n <- length(unique(c(adjacency[, 1], adjacency[, 2]))) 23 | 24 | P <- list() 25 | for(i in 1:m){ 26 | #may have to make sure that the appropriate number of nodes are specified 27 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 28 | directed = directed) 29 | degrees <- degree(graph) 30 | d.tot <- sum(degrees) 31 | expected <- degrees%*%t(degrees) / d.tot #expected adjacency matrix under configuration model 32 | #save the expected value as a weighted igraph object 33 | P[[i]] <- graph_from_adjacency_matrix(expected, mode = "undirected", weighted = TRUE) 34 | } 35 | return(P) 36 | } -------------------------------------------------------------------------------- /R/initialization.R: -------------------------------------------------------------------------------- 1 | #' initialization 2 | #' 3 | #' Function that generates randomly selected neighborhood vertex-layer sets to begin the 4 | #' multilayer.extraction algorithm. 5 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 6 | #' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 7 | #' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. 8 | #' 9 | #' @keywords community detection, multilayer networks, configuration model, random graph models 10 | #' @return 11 | #' \itemize{ 12 | #' \item neighborhoods: a list object of length prop.sample * n, where each entry contains 13 | #' a vertex set and layer set from which multilayer.extraction can be run. 14 | #' } 15 | #' @details A neighborhood of vertex u is defined as the collection of vertices that 16 | #' have higher than the mean connectivity of vertex u, when aggregated across layers. The 17 | #' chosen layer set is a random sample of size m/2. 18 | #'@references 19 | #'\itemize{ 20 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 21 | #' extraction in multilayer networks with heterogeneous community structure." 22 | #' } 23 | #' @author James D. Wilson 24 | #' @export 25 | 26 | #take neighborhoods from each layer 27 | initialization = function(adjacency, prop.sample, m, n){ 28 | layer.set = replicate(n = ceiling(prop.sample*n), sample(1:m, ceiling(m/2)), simplify = FALSE) 29 | neighborhoods <- neighborhood(adjacency, order = 1) 30 | keep.sample <- sample(1:n, ceiling(prop.sample*n)) #keep a random sample of the neighborhoods 31 | neighborhoods <- neighborhoods[keep.sample] 32 | return(list(vertex.set = neighborhoods, layer.set = layer.set)) 33 | } -------------------------------------------------------------------------------- /R/modularity_matrix.R: -------------------------------------------------------------------------------- 1 | #' multilayer.modularity.matrix 2 | #' 3 | #'Function that calculates the modularity matrix for each layer in a multilayer network 4 | #' @param adjacency: an edgelist. Use adjacency.to.edgelist function to obtain this object 5 | #' @keywords community detection, multilayer networks, configuration model, random graph models 6 | #' @return mod.matrix: an igraph object representing the modularity matrix for each layer 7 | #'@references 8 | #'\itemize{ 9 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 10 | #' extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 11 | #' } 12 | #' @author James D. Wilson 13 | #' @export 14 | #' 15 | #' 16 | 17 | multilayer.modularity.matrix <- function(adjacency, directed = FALSE){ 18 | m <- max(adjacency[, 3]) #max of the layer index 19 | n <- length(unique(c(adjacency[, 1], adjacency[, 2]))) 20 | mod.matrix <- list() 21 | for(i in 1:m){ 22 | #may have to make sure that the appropriate number of nodes are specified 23 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 24 | directed = directed) 25 | graph <- add_vertices(graph, n - length(V(graph))) 26 | temp <- modularity_matrix(graph, membership = rep(1, n)) 27 | #store as an igraph object with weights 28 | mod.matrix[[i]] <- graph_from_adjacency_matrix(temp, mode = "undirected", weighted = TRUE) 29 | } 30 | return(mod.matrix) 31 | } -------------------------------------------------------------------------------- /R/multilayer_extraction.R: -------------------------------------------------------------------------------- 1 | #' multilayer.extraction 2 | #' 3 | #'Function that identifies statistically significant vertex-layer communities in multilayer networks. 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' @param seed: seed for reproducibility. The initial neighborhoods that act as seeds for the multilayer extraction algorithm 6 | #' are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123. 7 | #' @param min.score: the minimum score allowable for an extracted community. Default is 0. 8 | #' @param prop.sample: the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 9 | #' specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. 10 | #' Default is 0.05. 11 | #' @keywords community detection, multilayer networks, configuration model, random graph models 12 | #' @return A MultilayerCommunity object, which is a list containing the following objects 13 | #' \itemize{ 14 | #' \item Community.List: a list of vertex-layer communities extracted from the algorithm 15 | #' \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of 16 | #' each community, and includes for each level of overlap parameter Beta the mean score, and the total number 17 | #' of communities. This is used for determining the overall number of communities in a multilayer network. 18 | #' } 19 | #'@references 20 | #'\itemize{ 21 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 22 | #' extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 23 | #' } 24 | #' @author James D. Wilson 25 | #' @export 26 | #' 27 | #' 28 | multilayer.extraction = function(adjacency, seed = 123, min.score = 0, prop.sample = 0.05, directed = c(FALSE, TRUE)){ 29 | #adjacency should be an edgelist with three columns - node1, node2, layer 30 | #layer should be numbered with integers 31 | #each network has the same number of nodes 32 | #nodes are indexed by the integers starting from 1 33 | 34 | m <- max(adjacency[, 3]) #max of the layer index 35 | n <- length(unique(c(adjacency[, 1], adjacency[, 2]))) 36 | directed <- directed[1] 37 | #Calculate the modularity matrix 38 | print(paste("Estimation Stage")) 39 | 40 | mod.matrix <- multilayer.modularity.matrix(adjacency) 41 | 42 | #Initialize the communities 43 | print(paste("Initialization Stage")) 44 | 45 | for(i in 1:m){ 46 | if(i == 1){ 47 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 48 | directed = directed) 49 | initial.set <- initialization(graph, prop.sample, m, n) 50 | }else{ 51 | graph <- graph_from_edgelist(as.matrix(subset(as.data.frame(adjacency), layer == i))[, 1:2], 52 | directed = directed) 53 | initial.set <- Map(c, initial.set, initialization(graph, prop.sample, m, n)) 54 | } 55 | } 56 | 57 | #Search Across Initial sets 58 | print(paste("Search Stage")) 59 | 60 | cat(paste("Searching over", length(initial.set[[1]]), "seed sets \n")) 61 | Results.temp <- list() 62 | K <- length(initial.set[[1]]) 63 | 64 | #detectCores detects the number of cores available on your instance 65 | 66 | registerDoParallel(detectCores()) 67 | Results.temp <- foreach(i=1:K,.packages="MultilayerExtraction") %dopar% { 68 | starter <- list() 69 | starter$vertex.set <- as.numeric(initial.set$vertex.set[[i]]) 70 | #if the initial neighborhood is of length 1, add a random vertex 71 | if(length(starter$vertex.set) < 2){ 72 | starter$vertex.set <- c(starter$vertex.set, setdiff(1:n, starter$vertex.set)[1]) 73 | } 74 | starter$layer.set <- as.numeric(initial.set$layer.set[[i]]) 75 | single.swap(starter, adjacency, mod.matrix, m, n) 76 | } 77 | 78 | #Cleanup the results: Keep the unique communities 79 | print(paste("Cleaning Stage")) 80 | 81 | if(length(Results.temp) < 1){return("No Community Found")} 82 | 83 | Scores = rep(0, length(Results.temp)) 84 | 85 | for(i in 1:length(Results.temp)){ 86 | if(length(Results.temp[[i]]$B) == 0){Scores[i] = -1000} 87 | if(length(Results.temp[[i]]$B) > 0){ 88 | Scores[i] = Results.temp[[i]]$Score 89 | } 90 | } 91 | 92 | Scores = round(Scores, 5) 93 | #keep only unique communities with score greater than threshold 94 | indx = which(!duplicated(Scores) == TRUE) 95 | indx.2 = which(Scores > min.score) 96 | Results2 = Results.temp[intersect(indx, indx.2)] 97 | if(length(Results2) == 0){ 98 | Results = NULL 99 | return(Object = NULL) 100 | } 101 | if(length(Results2) > 0){ 102 | betas = seq(0.01, 1, by = 0.01) 103 | Results3 = list() 104 | Number.Communities = rep(0, length(betas)) 105 | Mean.Score = rep(0, length(betas)) 106 | for(i in 1:length(betas)){ 107 | temp = cleanup(Results2, betas[i]) 108 | Results3[[i]] = list(Beta = betas[i], Communities = temp$Communities) 109 | Mean.Score[i] = temp$Mean.Score 110 | Number.Communities[i] = length(temp$Communities) 111 | } 112 | } 113 | 114 | Z = data.frame(Beta = betas, Mean.Score = Mean.Score, Number.Communities = Number.Communities) 115 | Object = list(Community.List = Results3, Diagnostics = Z) 116 | class(Object) = "MultilayerCommunity" 117 | return(Object) 118 | } 119 | 120 | 121 | ####################################################################### 122 | ##Swapping functions## 123 | ####Function for determining which vertex/layer should be swapped 124 | swap.candidate = function(set, changes, add, remove, score.old){ 125 | #If there are only some to be added 126 | if(length(remove) == 0 & length(add) > 0){ 127 | if(is.na(add) == FALSE){ 128 | if(changes[add] > 0){ 129 | set.new <- union(set, add) 130 | score.old <- score.old + changes[add] 131 | } 132 | if(changes[add] < 0){ 133 | set.new <- set 134 | return(list(set.new = set.new, score.old = score.old)) 135 | } 136 | } 137 | } 138 | 139 | #If there are only some to removed 140 | if(length(add) == 0 & length(remove) > 0){ 141 | if(is.na(remove) == FALSE){ 142 | if(changes[remove] > 0){ 143 | set.new <- setdiff(set,remove) 144 | score.old <- score.old + changes[remove] 145 | return(list(set.new = set.new, score.old = score.old)) 146 | } 147 | if(changes[remove] < 0){ 148 | set.new <- set 149 | return(list(set.new = set.new, score.old = score.old)) 150 | } 151 | } 152 | } 153 | 154 | #If there are some to removed and some to be added 155 | if(length(add) > 0 & length(remove) > 0){ 156 | if(changes[remove] < 0 & changes[add] < 0){ 157 | set.new <- set 158 | return(list(set.new = set.new, score.old = score.old)) 159 | } 160 | if(changes[remove] > changes[add] & changes[remove] > 0){ 161 | set.new <- setdiff(set, remove) 162 | score.old <- score.old + changes[remove] 163 | return(list(set.new = set.new, score.old = score.old)) 164 | } 165 | if(changes[remove] < changes[add] & changes[add] > 0){ 166 | set.new <- union(set, add) 167 | score.old <- score.old + changes[add] 168 | return(list(set.new = set.new, score.old = score.old)) 169 | } 170 | } 171 | 172 | #if there are none to be added nor removed 173 | if(length(add) == 0 & length(remove) == 0){ 174 | return(list(set.new = set, score.old = score.old)) 175 | } 176 | } 177 | 178 | ######Choosing which layer to swap one at a time###### 179 | swap.layer = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ 180 | 181 | if(length(layer.set) == 0){ 182 | print('No Community Found') 183 | return(NULL) 184 | } 185 | 186 | changes <- layer.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n) 187 | changes[which(is.null(changes) == TRUE)] <- 0 188 | changes[which(is.na(changes) == TRUE)] <- 0 189 | 190 | outside.candidate <- which.max(changes[setdiff(1:m, layer.set)]) #which layer should we add? 191 | l.add <- setdiff(1:m, layer.set)[outside.candidate] 192 | 193 | inside.candidate <- which.max(changes[layer.set]) #which layer should we remove? 194 | l.sub <- layer.set[inside.candidate] 195 | 196 | #Make the swap 197 | results <- swap.candidate(layer.set, changes, l.add, l.sub, score.old) 198 | layer.set.new <- results$set.new 199 | score.old <- results$score.old 200 | 201 | return(list(layer.set.new = layer.set.new, score.old = score.old)) 202 | } 203 | 204 | ####################################################################### 205 | ######Choosing which vertex to swap one at a time###### 206 | swap.vertex = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ 207 | 208 | if(length(layer.set) == 0){ 209 | print('No Community Found') 210 | return(NULL) 211 | } 212 | 213 | #swap decision 214 | if(length(vertex.set) < 5){ 215 | print('No Community Found') 216 | return(NULL) 217 | } 218 | 219 | if(length(vertex.set) == n){ 220 | print('No Community Found') 221 | return(NULL) 222 | } 223 | changes = vertex.change(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n) 224 | 225 | #Get candidates 226 | outside.candidate <- which.max(changes[setdiff(1:n, vertex.set)])[1] 227 | u.add <- setdiff(1:n, vertex.set)[outside.candidate] 228 | 229 | inside.candidate <- which.max(changes[vertex.set]) 230 | u.sub <- vertex.set[inside.candidate] 231 | 232 | #Make the swap 233 | results <- swap.candidate(vertex.set, changes, u.add, u.sub, score.old) 234 | return(list(B.new = results$set.new, score.old = results$score.old)) 235 | } 236 | ####################################################################### 237 | #Inner function for a single swap inside the function for Multilayer.Extraction 238 | #Note: check the names of initial set 239 | single.swap = function(initial.set, adjacency, mod.matrix, m, n){ 240 | 241 | #initialize vertex.set and layer.set 242 | B.new <- initial.set$vertex.set 243 | I.new <- initial.set$layer.set 244 | score.old <- score(mod.matrix, vertex.set = B.new, layer.set = I.new, n) 245 | 246 | iterations <- 1 247 | B.fixed <- B.new + 1 248 | I.fixed <- I.new + 1 249 | 250 | #main loop 251 | while(length(intersect(B.fixed, B.new)) < max(length(B.fixed), length(B.new)) | 252 | length(intersect(I.fixed, I.new)) < max(length(I.fixed), length(I.new))){ 253 | 254 | if(length(B.new) < 2 | length(I.new) < 1){ 255 | print('No community found') 256 | return(NULL) 257 | } 258 | 259 | #seems redundant, check... 260 | B.fixed <- B.new 261 | I.fixed <- I.new 262 | 263 | B <- B.new 264 | I <- I.new + 1 265 | 266 | #update layer set 267 | if(m > 1){ 268 | while(length(intersect(I.new, I)) < max(length(I.new), length(I))){ 269 | 270 | I <- I.new 271 | results <- swap.layer(adjacency, mod.matrix, I, B, score.old, m, n) 272 | I.new <- results$layer.set.new 273 | score.old <- results$score.old 274 | } 275 | } 276 | if(m == 1){ 277 | I.new <- 1 278 | } 279 | 280 | #update vertex set 281 | B <- B - 1 282 | B.new <- B + 1 283 | 284 | while(length(intersect(B.new, B)) < max(length(B.new), length(B))){ 285 | B <- B.new 286 | results <- swap.vertex(adjacency, mod.matrix, I.new, B, score.old, m, n) 287 | 288 | B.new <- results$B.new 289 | score.old <- results$score.old 290 | } 291 | } 292 | return(list(B = sort(B.new), I = sort(I.new), Score = score.old)) 293 | } 294 | 295 | ######Effect on score when adding or subtracting a layer####### 296 | layer.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ 297 | 298 | indx <- setdiff(1:m, layer.set) #which layers are not in the current set 299 | score.changes <- rep(0, m) 300 | 301 | for(i in 1:m){ 302 | if(i %in% indx){ 303 | score.changes[i] <- score(mod.matrix, vertex.set = 304 | vertex.set, layer.set = union(layer.set, i), n) - score.old 305 | } 306 | if(i %in% indx == FALSE){ 307 | score.changes[i] <- score(mod.matrix, vertex.set = 308 | vertex.set, layer.set = setdiff(layer.set, i), n) - score.old 309 | } 310 | } 311 | return(score.changes) 312 | } 313 | 314 | ######Effect on score when adding or subtracting a vertex####### 315 | vertex.change = function(adjacency, mod.matrix, layer.set, vertex.set, score.old, m, n){ 316 | 317 | indx <- setdiff(1:n, vertex.set) 318 | score.changes <- rep(0, n) 319 | 320 | #the following can also be parallelized! 321 | for(i in 1:n){ 322 | if(i %in% indx){ 323 | score.changes[i] <- score(mod.matrix, vertex.set = 324 | union(vertex.set, i), layer.set = layer.set, n) - score.old 325 | } 326 | if(i %in% indx == FALSE){ 327 | score.changes[i] <- score(mod.matrix, vertex.set = 328 | setdiff(vertex.set, i), layer.set = layer.set, n) - score.old 329 | } 330 | } 331 | 332 | return(score.changes) 333 | } 334 | -------------------------------------------------------------------------------- /R/plot.R: -------------------------------------------------------------------------------- 1 | #' plot.MultilayerCommunity 2 | #' 3 | #'Function plots the average score and total number of communities extracted across a grid of overlap parameters beta. 4 | # 5 | #' @param multilayer.object: a MultilayerCommunity object that contains communities extracted from the multilayer.extraction algorithm. 6 | #' @param main: the title of the plot. Optional. Default is "". 7 | #' @keywords community detection, multilayer networks, configuration model, random graph models 8 | #' @return A plot showing the total number of communities and average score of the communities across a grid of overlap parameters beta. 9 | #' @details This is a useful diagnostic plot to determine how many communities were extracted in the observed multilayer network. At beta = 0, 10 | #' all of the communities are disjoint. As beta increases, the communities are allowed to overlap more. See the below reference for more details about this function. 11 | #'@references 12 | #'\itemize{ 13 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 14 | #' extraction in multilayer networks with heterogeneous community structure." 15 | #' } 16 | #' 17 | #' @author James D. Wilson 18 | #' @export 19 | #' 20 | plot.MultilayerCommunity = function(multilayer.object, main = ""){ 21 | data.frame = multilayer.object$Diagnostics 22 | #Calculate when the biggest drop happens in total score 23 | 24 | diffs = diff(data.frame$Mean.Score) 25 | indx = which.min(diffs) 26 | #Attempt to put both objects on the same plot 27 | par(mar=c(5, 4, 4, 6) + 0.1) 28 | 29 | ## Plot first set of data and draw its axis 30 | plot(data.frame$Beta, data.frame$Number.Communities, type = "l",lwd = 3, axes = FALSE, col = "blue", xlab = "", ylab = "", main = "") 31 | if(max(data.frame$Number.Communities) > 9){ 32 | axis(2, c(seq(0,8,2),seq(10,max(data.frame$Number.Communities),5)),col = "blue", las = 1, col.axis = "blue") 33 | } 34 | 35 | if(max(data.frame$Number.Communities) < 10){ 36 | axis(2, 1:max(data.frame$Number.Communities),col = "blue", las = 1, col.axis = "blue") 37 | } 38 | mtext("Number of Communities", side = 2, line = 2.5, cex = 1.5) 39 | box() 40 | 41 | #Add the second graph 42 | par(new = TRUE) 43 | plot(data.frame$Beta, (data.frame$Mean.Score), xlab = "", type = "l", lty = 2, lwd = 3, col = "darkgreen", ylab = "", axes = FALSE) 44 | mtext("Average Score", side = 4, line = 4, cex = 1.5) 45 | axis(4, col = "darkgreen", col.axis = "darkgreen", las = 1) 46 | 47 | axis(1, seq(0,1,0.1)) 48 | mtext(expression(beta), side=1, col="black", line = 2.5, cex = 1.5) 49 | 50 | } -------------------------------------------------------------------------------- /R/refine.R: -------------------------------------------------------------------------------- 1 | #' refine 2 | #' 3 | #'Function that identifies statistically significant vertex-layer communities in multilayer networks. 4 | #' @param Multilayer.object: a MultilayerCommunity object that contains communities extracted from the multilayer.extraction algorithm. 5 | #' @param k: the number of communities that one would like to keep. 6 | #' @param m: the total number of layers in the original multilayer network. 7 | #' @param n: the total number of vertices in the original multilayer network. 8 | #' 9 | #' @keywords community detection, multilayer networks, configuration model, random graph models 10 | #' @return A list that contains the following objects 11 | #' \itemize{ 12 | #' \item Layers: an m x k matrix whose (i,j)th entry is non-zero if layer i is contained 13 | #' within community j. The value of the (i,j)th entry is the score of that community. 14 | #' \item Vertices: an n x k matrix whose (i,j)th entry is 1 if vertex i is contained within community j. 15 | #' } 16 | #'@references 17 | #'\itemize{ 18 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 19 | #' extraction in multilayer networks with heterogeneous community structure." 20 | #' } 21 | #' @author James D. Wilson 22 | #' @export 23 | #' 24 | 25 | ####################################################################### 26 | refine = function(Multilayer.object, k, m, n){ 27 | #Multilayer.object = resulting object from running multilayer extraction 28 | indx = which(Multilayer.object$Diagnostics$Number.Communities > (k-1))[1] 29 | beta = Multilayer.object$Diagnostics$Beta[indx] 30 | Results = list() 31 | #score = numeric() 32 | h = length(Multilayer.object$Community.List) 33 | r = 0 34 | for(i in 1:h){ 35 | g = length(Multilayer.object$Community.List[[i]]$Communities) 36 | for(j in 1:g){ 37 | r = r + 1 38 | Results[[r]] = list(B = Multilayer.object$Community.List[[i]]$Communities[[j]]$B, I = Multilayer.object$Community.List[[i]]$Communities[[j]]$I, Score = Multilayer.object$Community.List[[i]]$Communities[[j]]$Score) 39 | } 40 | } 41 | Results = unique(Results) 42 | refined.group = cleanup(Results, beta) 43 | 44 | ##Now create a binary matrices for the results 45 | Score <- Matrix(0, m, k) 46 | Vertices <- Matrix(0, n, k) 47 | for(i in 1:k){ 48 | Score[refined.group$Communities[[i]]$I, i] = refined.group$Communities[[i]]$Score 49 | Vertices[refined.group$Communities[[i]]$B, i] = 1 50 | } 51 | return(list(Layers = Score, Vertices = Vertices)) 52 | } 53 | 54 | ###cleanup function 55 | cleanup = function(Results, beta){ 56 | k = length(Results) 57 | if(k < 2){ 58 | Results <- Results[[1]] 59 | Results$Mean.Score <- Results$Score 60 | Results$Number.Communities <- 1 61 | return(Results) 62 | } 63 | indx.rm = numeric() 64 | if(k > 1){ 65 | for(i in 1:(k-1)){ 66 | for(j in (i+1):k){ 67 | match = length(intersect(Results[[i]]$B, Results[[j]]$B))*length(intersect(Results[[i]]$I, Results[[j]]$I))/(min(length(Results[[i]]$B)*length(Results[[i]]$I),length(Results[[j]]$B)*length(Results[[j]]$I))) 68 | pot = c(i,j) 69 | indx = pot[which.min(c(Results[[i]]$Score, Results[[j]]$Score))] 70 | if(match > beta-0.00001){indx.rm = c(indx.rm,indx)} 71 | } 72 | } 73 | } 74 | Results[indx.rm] = NULL 75 | 76 | temp = 0 77 | for(i in 1:length(Results)){ 78 | temp = temp + Results[[i]]$Score 79 | } 80 | Mean.score = temp/length(Results) 81 | return(list(Communities = Results, Mean.Score = Mean.score)) 82 | } 83 | 84 | 85 | -------------------------------------------------------------------------------- /R/score.R: -------------------------------------------------------------------------------- 1 | #' score 2 | #' 3 | #'Function that calculates the score of a multilayer vertex - layer community 4 | #' @param adjacency: a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network. 5 | #' @param expected: the expected value of the connectivity in the multilayer network adjacency. This can be calculated using the 6 | #' expected.CM function in this package. 7 | #' @param vertex.set: a numeric specifying the nodes within the multilayer community of interest. 8 | #' @param layer.set: a numeric specifying the layers within the multilayer community of interest. 9 | #' @keywords community detection, multilayer networks, configuration model, random graph models 10 | #' @return 11 | #' \itemize{ 12 | #' \item score: the score of the multilayer community 13 | #' } 14 | 15 | #'@references 16 | #'\itemize{ 17 | #' \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 18 | #' extraction in multilayer networks with heterogeneous community structure." 19 | #' } 20 | #' @author James D. Wilson 21 | #' @export 22 | #' 23 | #' 24 | 25 | score = function(mod.matrix, vertex.set, layer.set, n){ 26 | 27 | if(length(layer.set) < 1 || length(vertex.set) < 1){ 28 | return(obs.score = 0) 29 | } 30 | 31 | if(length(layer.set) == 1){ 32 | super.mod <- mod.matrix[[layer.set]] #just a single igraph object 33 | } 34 | 35 | if(length(layer.set) > 1){ 36 | #merge the modularity graphs 37 | super.mod <- graph.empty(n = n, directed = FALSE) 38 | for(j in layer.set){ 39 | super.mod <- igraph::union(super.mod, mod.matrix[[j]]) #take union of all networks in the layer.set 40 | } 41 | } 42 | #take sub-graph of the modularity matrix 43 | super.mod.subgraph <- induced_subgraph(super.mod, v = vertex.set) 44 | 45 | #get the edge weights all together 46 | edge.weights <- as.data.frame(get.edge.attribute(super.mod.subgraph)) 47 | edge.weights[is.na(edge.weights)] <- 0 48 | modularity.score <- rowSums(edge.weights) #sum across vertices first 49 | modularity.score[which(modularity.score < 0)] <- 0 #only keep positive values 50 | 51 | tot.mod <- sum(modularity.score) 52 | obs.score <- (tot.mod)^2 / (n^2*choose(length(vertex.set), 2)*(length(layer.set))) 53 | 54 | return(obs.score) 55 | } 56 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## MultilayerExtraction 2 | An R package that implements the multilayer extraction algorithm to identify 3 | densely connected vertex-layer sets in multilayer networks. 4 | 5 | The key reference for this monitoring method is 6 | 7 | Wilson, J.D., Palowitch, J., Bhamidi, S., and Nobel, A.B. (2017) **Significance based extraction in multilayer networks with heterogeneous community structure**, *Journal of Machine Learning Research* (18) 1-49 8 | 9 | ## Installation 10 | 11 | To install MultilayerExtraction, use the following commands. Be sure to include the required package **devtools** from R version 3.2.2 or higher. 12 | 13 | ``` 14 | #install the latest version of devtools 15 | install.packages("devtools") 16 | library(devtools, quietly = TRUE) 17 | 18 | #install and load MultilayerExtraction 19 | devtools::install_github('jdwilson4/multilayer_extraction') 20 | 21 | library(MultilayerExtraction, quietly = TRUE) 22 | ``` 23 | 24 | ## Install all other dependencies 25 | ``` 26 | install.packages("Matrix") 27 | install.packages("foreach") 28 | install.packages("doParallel") 29 | install.packages("parallel") 30 | install.packages("igraph") 31 | 32 | library(Matrix, quietly = TRUE) 33 | library(igraph, quietly = TRUE) 34 | library(foreach, quietly = TRUE) 35 | library(doParallel, quietly = TRUE) 36 | library(parallel, quietly = TRUE) 37 | ``` 38 | 39 | ## Description 40 | This package contains four primary functions, which are briefly described below. For a function named ```function``` below, type ```?function``` in R to get full documentation. 41 | 42 | - ```multilayer.extraction()```: identify densely connected vertex-layer communities within multilayer networks. This is a significance based approach that seeks to optimize a score derived from the strength of connection within multilayer communities. See the above reference for more details. 43 | - ```expectation.CM()```: estimate the expected edge weight between each pair of nodes across each layer of the multilayer network. 44 | - ```score()```: calculate the score of a specified vertex-layer community in an observed multilayer network. 45 | - ```plot.MultilayerCommunity()```: diagnostic plot that shows the number of communities and average score of communities from extracted communities against the overall overlap between each identified community. 46 | - ```refine()```: refine a ```MultilayerCommunity``` object identified by the ```multilayer.extraction()``` function so as to obtain a desired amount of overlap. 47 | 48 | ## Application: Austrailian computer science network 49 | Here we show how to use multilayer.extraction in practice by demonstrating an example on the Austrailian Computer Science network. This data set is a multilayer network describing the relationships among attendees at an Austrailian computer science conference. 50 | 51 | There were 61 attendees (represented by the nodes in each layer), and 5 relationships that are represented by the layers in the network. The 52 | data is provided in the file ```AU_CS.rda``` in a list format where each entry represents a different layer, and the type of relationship is represented by the name of the list entry. 53 | 54 | Below, we show how to load the data and run the Multilayer Extraction algorithm on the network. 55 | 56 | ``` 57 | #load the data 58 | data("AU_CS") 59 | 60 | #number of nodes and layers 61 | n <- dim(AU_CS[[1]])[1] 62 | m <- length(AU_CS) 63 | 64 | #types of relationships among attendees 65 | relationship.names <- names(AU_CS) 66 | print(relationship.names) 67 | ``` 68 | 69 | Next, we get a cursory view of some of the layers in the network. We follow this up by looking at the mean degree of the nodes in each of the layers. 70 | 71 | ``` 72 | image(AU_CS$coauthor, main = "Co-author") 73 | image(AU_CS$facebook, main = "Leisure") 74 | image(AU_CS$leisure, main = "Facebook") 75 | 76 | 77 | Mean.Degree <- sapply(AU_CS, mean) * n 78 | print(Mean.Degree) 79 | ``` 80 | 81 | Notably, this network has heterogeneous connectivity as witnessed above. The Multilayer Extraction algorithm accounts for such heterogeneity and identifies communities using the joint information from the layers. 82 | 83 | We now run the extraction algorithm. 84 | ``` 85 | ##run Multilayer Extraction algorithm on this network 86 | 87 | ##convert the list of adjacency matrices to an edgelist 88 | network <- adjacency.to.edgelist(AU_CS) 89 | set.seed(123) 90 | start_time <- Sys.time() 91 | community.object <- multilayer.extraction(adjacency = network, seed = 123, min.score = 0, prop.sample = .10) 92 | end_time <- Sys.time() 93 | 94 | end_time - start_time 95 | #for me, this took 1.055319 mins 96 | 97 | #plot the number of communities across overlap parameter beta 98 | plot(community.object, main = "Diagnostic Plot AU_CS") 99 | object <- refine(community.object, k = 6, m = m, n = n) 100 | 101 | ##there are 7 small communities. Let's look at the size of each 102 | num.layers <- colSums(object$Layers > 0) 103 | num.vertices <- colSums(object$Vertices) 104 | 105 | print(data.frame(Layers = num.layers, Vertices = num.vertices)) 106 | ``` 107 | 108 | We now re-order the adjacency matrices according to a disjoint re-ordering of the vertices from the identified communities. 109 | 110 | ``` 111 | indx1 <- which(object$Vertices[,1] == 1) 112 | indx2 <- setdiff(which(object$Vertices[,2] == 1), indx1) 113 | indx3 <- setdiff(setdiff(which(object$Vertices[,3] == 1), indx1), indx2) 114 | indx4 <- setdiff(setdiff(setdiff(which(object$Vertices[,4] == 1), indx3), indx2),indx1) 115 | indx5 <- setdiff(setdiff(setdiff(setdiff(which(object$Vertices[,5] == 1), indx4), indx3),indx2), indx1) 116 | 117 | indx6 <- setdiff(setdiff(setdiff(setdiff(setdiff(which(object$Vertices[,6] == 1), 118 | indx1), indx2), indx3), indx4), indx5) 119 | none2 <- setdiff(1:61, union(union(union(indx1, indx2), union(indx3, indx4)), union(indx5, indx6))) 120 | 121 | 122 | image(object$Layers, main = "Layers in Communities") 123 | 124 | #Visualizing the disjoint communities 125 | re.order <- c(indx1, indx2, indx3, indx4, indx5, indx6, none2) 126 | image(AU_CS$coauthor[re.order,re.order], main = "Co-Author") 127 | image(AU_CS$leisure[re.order,re.order], main = "Leisure") 128 | image(AU_CS$work[re.order,re.order], main = "Work") 129 | image(AU_CS$lunch[re.order,re.order], main = "Lunch") 130 | image(AU_CS$facebook[re.order,re.order], main = "Facebook") 131 | ``` 132 | As we can see above, the communities are strongly connected relative to the remaining network, as desired. 133 | 134 | ## Contributors 135 | - **James D. Wilson**, Assistant Professor of Statistics, University of San Francisco. Developer, contributor, and maintainer. 136 | - **Jean Carlos Paredes**, University of San Francisco. Contributor. 137 | 138 | Please send any comments, bugs, or questions to the developer James D. Wilson at jdwilson4@usfca.edu. -------------------------------------------------------------------------------- /data-raw/AU_CS.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(lubridate) 3 | library(tidyr) 4 | 5 | load("data-raw/AU_CS.rda") 6 | AU_CS <- Adjacency 7 | 8 | devtools::use_data(AU_CS, overwrite = TRUE) 9 | -------------------------------------------------------------------------------- /data-raw/AU_CS.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data-raw/AU_CS.rda -------------------------------------------------------------------------------- /data-raw/EU_Airport.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(lubridate) 3 | library(tidyr) 4 | 5 | load("data-raw/EU_Airport.rda") 6 | 7 | EU_Airport <- Adjacency 8 | names(EU_Airport) <- layer.labels 9 | 10 | devtools::use_data(EU_Airport, overwrite = TRUE) 11 | -------------------------------------------------------------------------------- /data-raw/EU_Airport.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data-raw/EU_Airport.rda -------------------------------------------------------------------------------- /data-raw/arXiv.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(lubridate) 3 | library(tidyr) 4 | 5 | load("data-raw/arXiv.rda") 6 | 7 | arXiv <- Adjacency 8 | names(arXiv) <- layer.labels 9 | 10 | devtools::use_data(arXiv, overwrite = TRUE) 11 | -------------------------------------------------------------------------------- /data-raw/arXiv.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data-raw/arXiv.rda -------------------------------------------------------------------------------- /data/AU_CS.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data/AU_CS.rda -------------------------------------------------------------------------------- /data/EU_Airport.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data/EU_Airport.rda -------------------------------------------------------------------------------- /data/arXiv.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jdwilson4/MultilayerExtraction/efa69a8ed60ce8006f7f587bde1241e53a25cc0c/data/arXiv.rda -------------------------------------------------------------------------------- /man/AU_CS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AU_CS.R 3 | \docType{data} 4 | \name{AU_CS} 5 | \alias{AU_CS} 6 | \title{dataset: AU_CS} 7 | \format{This data set contains a list of adjacency matrices where each entry represents a different social relationship.} 8 | \usage{ 9 | data("AU_CS") 10 | } 11 | \description{ 12 | Data set that contains the multilayer network describing the multilayer relationships among attendees to an Austrailian computer science conference. 13 | In total, there were 61 attendees (represented by the nodes in each layer), and 5 relationships that are represented by the layers in the network. The 14 | data is provided in a list format where each entry represents a different layer, and the type of relationship is represented by the name of the list entry. 15 | } 16 | \examples{ 17 | data(AU_CS) 18 | image(Matrix(Adjacency$facebook)) #visualize the adjacency matrix that represents the Facebook friendships of the conference attendees. 19 | } 20 | \references{ 21 | \itemize{ 22 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 23 | extraction in multilayer networks with heterogeneous community structure." 24 | } 25 | } 26 | \author{ 27 | James D. Wilson 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/adjacency.to.edgelist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adjacency_to_edgelist.R 3 | \name{adjacency.to.edgelist} 4 | \alias{adjacency.to.edgelist} 5 | \title{adjacency.to.edgelist} 6 | \usage{ 7 | adjacency.to.edgelist(adjacency, mode = c("undirected", "directed"), 8 | weighted = NULL) 9 | } 10 | \arguments{ 11 | \item{adjacency:}{a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.} 12 | 13 | \item{mode:}{directed or undirected} 14 | 15 | \item{weighted:}{currently not functioning. Coming in later version.} 16 | } 17 | \value{ 18 | edgelist: a matrix with three columns representing edge connections- node1, node2, layer 19 | } 20 | \description{ 21 | Function that converts a list of adjacency matrices to an edgelist. 22 | } 23 | \references{ 24 | \itemize{ 25 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 26 | extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 27 | } 28 | } 29 | \author{ 30 | James D. Wilson 31 | } 32 | \keyword{community} 33 | \keyword{configuration} 34 | \keyword{detection,} 35 | \keyword{graph} 36 | \keyword{model,} 37 | \keyword{models} 38 | \keyword{multilayer} 39 | \keyword{networks,} 40 | \keyword{random} 41 | -------------------------------------------------------------------------------- /man/expected.CM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expectation_CM.R 3 | \name{expected.CM} 4 | \alias{expected.CM} 5 | \title{expected.CM} 6 | \usage{ 7 | expected.CM(adjacency, directed = FALSE) 8 | } 9 | \arguments{ 10 | \item{adjacency:}{a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.} 11 | } 12 | \value{ 13 | \itemize{ 14 | \item P: a list object whose tth entry is the expected adjacency matrix of the tth layer 15 | } 16 | } 17 | \description{ 18 | Function that calculates the expected edge weight between each pair of nodes in each layer of a multilayer network. 19 | } 20 | \references{ 21 | \itemize{ 22 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 23 | extraction in multilayer networks with heterogeneous community structure." 24 | } 25 | } 26 | \author{ 27 | James D. Wilson 28 | } 29 | \keyword{community} 30 | \keyword{configuration} 31 | \keyword{detection,} 32 | \keyword{graph} 33 | \keyword{model,} 34 | \keyword{models} 35 | \keyword{multilayer} 36 | \keyword{networks,} 37 | \keyword{random} 38 | -------------------------------------------------------------------------------- /man/initialization.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initialization.R 3 | \name{initialization} 4 | \alias{initialization} 5 | \title{initialization} 6 | \usage{ 7 | initialization(adjacency, prop.sample, m, n) 8 | } 9 | \arguments{ 10 | \item{adjacency:}{a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.} 11 | 12 | \item{prop.sample:}{the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 13 | specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer.} 14 | } 15 | \value{ 16 | \itemize{ 17 | \item neighborhoods: a list object of length prop.sample * n, where each entry contains 18 | a vertex set and layer set from which multilayer.extraction can be run. 19 | } 20 | } 21 | \description{ 22 | Function that generates randomly selected neighborhood vertex-layer sets to begin the 23 | multilayer.extraction algorithm. 24 | } 25 | \details{ 26 | A neighborhood of vertex u is defined as the collection of vertices that 27 | have higher than the mean connectivity of vertex u, when aggregated across layers. The 28 | chosen layer set is a random sample of size m/2. 29 | } 30 | \references{ 31 | \itemize{ 32 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 33 | extraction in multilayer networks with heterogeneous community structure." 34 | } 35 | } 36 | \author{ 37 | James D. Wilson 38 | } 39 | \keyword{community} 40 | \keyword{configuration} 41 | \keyword{detection,} 42 | \keyword{graph} 43 | \keyword{model,} 44 | \keyword{models} 45 | \keyword{multilayer} 46 | \keyword{networks,} 47 | \keyword{random} 48 | -------------------------------------------------------------------------------- /man/multilayer.extraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/multilayer_extraction.R 3 | \name{multilayer.extraction} 4 | \alias{multilayer.extraction} 5 | \title{multilayer.extraction} 6 | \usage{ 7 | multilayer.extraction(adjacency, seed = 123, min.score = 0, 8 | prop.sample = 0.05, directed = c(FALSE, TRUE)) 9 | } 10 | \arguments{ 11 | \item{adjacency:}{a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.} 12 | 13 | \item{seed:}{seed for reproducibility. The initial neighborhoods that act as seeds for the multilayer extraction algorithm 14 | are random in this algorithm; hence, a seed will need to be set for reproducible results. Default is 123.} 15 | 16 | \item{min.score:}{the minimum score allowable for an extracted community. Default is 0.} 17 | 18 | \item{prop.sample:}{the proportion of vertices one would like to search over for initialization. Example: prop.sample = 0.05 19 | specifies that one will obtain 0.05 * n randomly selected vertex neighborhoods for initialization, where n = number of nodes in each layer. 20 | Default is 0.05.} 21 | } 22 | \value{ 23 | A MultilayerCommunity object, which is a list containing the following objects 24 | \itemize{ 25 | \item Community.List: a list of vertex-layer communities extracted from the algorithm 26 | \item Diagnostics: the diagnostics associated with each extracted community. This is a summary of 27 | each community, and includes for each level of overlap parameter Beta the mean score, and the total number 28 | of communities. This is used for determining the overall number of communities in a multilayer network. 29 | } 30 | } 31 | \description{ 32 | Function that identifies statistically significant vertex-layer communities in multilayer networks. 33 | } 34 | \references{ 35 | \itemize{ 36 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 37 | extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 38 | } 39 | } 40 | \author{ 41 | James D. Wilson 42 | } 43 | \keyword{community} 44 | \keyword{configuration} 45 | \keyword{detection,} 46 | \keyword{graph} 47 | \keyword{model,} 48 | \keyword{models} 49 | \keyword{multilayer} 50 | \keyword{networks,} 51 | \keyword{random} 52 | -------------------------------------------------------------------------------- /man/multilayer.modularity.matrix.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/modularity_matrix.R 3 | \name{multilayer.modularity.matrix} 4 | \alias{multilayer.modularity.matrix} 5 | \title{multilayer.modularity.matrix} 6 | \usage{ 7 | multilayer.modularity.matrix(adjacency, directed = FALSE) 8 | } 9 | \arguments{ 10 | \item{adjacency:}{an edgelist. Use adjacency.to.edgelist function to obtain this object} 11 | } 12 | \value{ 13 | mod.matrix: an igraph object representing the modularity matrix for each layer 14 | } 15 | \description{ 16 | Function that calculates the modularity matrix for each layer in a multilayer network 17 | } 18 | \references{ 19 | \itemize{ 20 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2017) "Significance based 21 | extraction in multilayer networks with heterogeneous community structure." Journal of Machine Learning Research 22 | } 23 | } 24 | \author{ 25 | James D. Wilson 26 | } 27 | \keyword{community} 28 | \keyword{configuration} 29 | \keyword{detection,} 30 | \keyword{graph} 31 | \keyword{model,} 32 | \keyword{models} 33 | \keyword{multilayer} 34 | \keyword{networks,} 35 | \keyword{random} 36 | -------------------------------------------------------------------------------- /man/plot.MultilayerCommunity.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot.R 3 | \name{plot.MultilayerCommunity} 4 | \alias{plot.MultilayerCommunity} 5 | \title{plot.MultilayerCommunity} 6 | \usage{ 7 | \method{plot}{MultilayerCommunity}(multilayer.object, main = "") 8 | } 9 | \arguments{ 10 | \item{multilayer.object:}{a MultilayerCommunity object that contains communities extracted from the multilayer.extraction algorithm.} 11 | 12 | \item{main:}{the title of the plot. Optional. Default is "".} 13 | } 14 | \value{ 15 | A plot showing the total number of communities and average score of the communities across a grid of overlap parameters beta. 16 | } 17 | \description{ 18 | Function plots the average score and total number of communities extracted across a grid of overlap parameters beta. 19 | } 20 | \details{ 21 | This is a useful diagnostic plot to determine how many communities were extracted in the observed multilayer network. At beta = 0, 22 | all of the communities are disjoint. As beta increases, the communities are allowed to overlap more. See the below reference for more details about this function. 23 | } 24 | \references{ 25 | \itemize{ 26 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 27 | extraction in multilayer networks with heterogeneous community structure." 28 | } 29 | } 30 | \author{ 31 | James D. Wilson 32 | } 33 | \keyword{community} 34 | \keyword{configuration} 35 | \keyword{detection,} 36 | \keyword{graph} 37 | \keyword{model,} 38 | \keyword{models} 39 | \keyword{multilayer} 40 | \keyword{networks,} 41 | \keyword{random} 42 | -------------------------------------------------------------------------------- /man/refine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/refine.R 3 | \name{refine} 4 | \alias{refine} 5 | \title{refine} 6 | \usage{ 7 | refine(Multilayer.object, k, m, n) 8 | } 9 | \arguments{ 10 | \item{Multilayer.object:}{a MultilayerCommunity object that contains communities extracted from the multilayer.extraction algorithm.} 11 | 12 | \item{k:}{the number of communities that one would like to keep.} 13 | 14 | \item{m:}{the total number of layers in the original multilayer network.} 15 | 16 | \item{n:}{the total number of vertices in the original multilayer network.} 17 | } 18 | \value{ 19 | A list that contains the following objects 20 | \itemize{ 21 | \item Layers: an m x k matrix whose (i,j)th entry is non-zero if layer i is contained 22 | within community j. The value of the (i,j)th entry is the score of that community. 23 | \item Vertices: an n x k matrix whose (i,j)th entry is 1 if vertex i is contained within community j. 24 | } 25 | } 26 | \description{ 27 | Function that identifies statistically significant vertex-layer communities in multilayer networks. 28 | } 29 | \references{ 30 | \itemize{ 31 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 32 | extraction in multilayer networks with heterogeneous community structure." 33 | } 34 | } 35 | \author{ 36 | James D. Wilson 37 | } 38 | \keyword{community} 39 | \keyword{configuration} 40 | \keyword{detection,} 41 | \keyword{graph} 42 | \keyword{model,} 43 | \keyword{models} 44 | \keyword{multilayer} 45 | \keyword{networks,} 46 | \keyword{random} 47 | -------------------------------------------------------------------------------- /man/score.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/score.R 3 | \name{score} 4 | \alias{score} 5 | \title{score} 6 | \usage{ 7 | score(mod.matrix, vertex.set, layer.set, n) 8 | } 9 | \arguments{ 10 | \item{adjacency:}{a list object whose tth entry is an adjacency matrix representing the tth layer of a multilayer network.} 11 | 12 | \item{expected:}{the expected value of the connectivity in the multilayer network adjacency. This can be calculated using the 13 | expected.CM function in this package.} 14 | 15 | \item{vertex.set:}{a numeric specifying the nodes within the multilayer community of interest.} 16 | 17 | \item{layer.set:}{a numeric specifying the layers within the multilayer community of interest.} 18 | } 19 | \value{ 20 | \itemize{ 21 | \item score: the score of the multilayer community 22 | } 23 | } 24 | \description{ 25 | Function that calculates the score of a multilayer vertex - layer community 26 | } 27 | \references{ 28 | \itemize{ 29 | \item Wilson, James D., Palowitch, John, Bhamidi, Shankar, and Nobel, Andrew B. (2016) "Significance based 30 | extraction in multilayer networks with heterogeneous community structure." 31 | } 32 | } 33 | \author{ 34 | James D. Wilson 35 | } 36 | \keyword{community} 37 | \keyword{configuration} 38 | \keyword{detection,} 39 | \keyword{graph} 40 | \keyword{model,} 41 | \keyword{models} 42 | \keyword{multilayer} 43 | \keyword{networks,} 44 | \keyword{random} 45 | --------------------------------------------------------------------------------