├── Output └── SampleOutput.txt ├── .gitattributes ├── README.md ├── .gitignore ├── Graphs └── sample.graph └── Python └── FindCommunities.py /Output/SampleOutput.txt: -------------------------------------------------------------------------------- 1 | 0.0209999084473 seconds File Read Time 2 | Running Link Aggregate Algorithm... 3 | Running Improved Improved Iterative Scan Algorithm... 4 | 74 76 77 78 79 80 5 | 27 28 29 30 31 32 35 6 | 47 48 49 50 51 68 70 71 72 73 7 | 36 37 38 39 40 41 42 43 44 45 46 8 | 16 17 19 20 21 22 23 9 | 5 8 10 11 12 14 15 10 | 60 61 62 63 64 65 66 67 11 | 52 53 54 55 56 58 59 12 | 24 25 26 27 28 29 30 31 32 33 34 35 13 | 24 25 26 27 28 29 30 31 32 33 34 35 14 | 0 1 2 3 4 15 | 52 53 54 55 59 16 | 5 6 7 8 9 10 11 12 13 14 15 17 | 0.12700009346 seconds in Total 18 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Requirements: 2 | ============ 3 | *Software that needs to be installed (if any) with URL’s to download and instructions to install them.* 4 | Packages are required for R: igraph, compiler, doSNOW, foreach, parallel 5 | 6 | _Environment variable settings (if any) and OS it should/could run on._ 7 | ======================================================================= 8 | None. 9 | 10 | _Instructions on how to run the program._ 11 | ========================================= 12 | We need to change the path where the graph lies (setwd). 13 | For sample code run 14 | 15 | ```````````````````````EXAMPLE`````````````````````````` 16 | 1. unzip the archive 17 | 2. cd archive/ 18 | 3. update virusprop.R with full path where the static graph lies. 19 | 4. execute 20 | ```````````````````````````````````````````````````````` 21 | 22 | _Instructions on how to interpret the results._ 23 | =============================================== 24 | Graphs are generated for displaying the results for each section. 25 | 26 | Tested On 27 | ========= 28 | static.network -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | 46 | [Dd]ebug/ 47 | [Rr]elease/ 48 | x64/ 49 | build/ 50 | [Bb]in/ 51 | [Oo]bj/ 52 | 53 | # MSTest test Results 54 | [Tt]est[Rr]esult*/ 55 | [Bb]uild[Ll]og.* 56 | 57 | *_i.c 58 | *_p.c 59 | *.ilk 60 | *.meta 61 | *.obj 62 | *.pch 63 | *.pdb 64 | *.pgc 65 | *.pgd 66 | *.rsp 67 | *.sbr 68 | *.tlb 69 | *.tli 70 | *.tlh 71 | *.tmp 72 | *.tmp_proj 73 | *.log 74 | *.vspscc 75 | *.vssscc 76 | .builds 77 | *.pidb 78 | *.log 79 | *.scc 80 | 81 | # Visual C++ cache files 82 | ipch/ 83 | *.aps 84 | *.ncb 85 | *.opensdf 86 | *.sdf 87 | *.cachefile 88 | 89 | # Visual Studio profiler 90 | *.psess 91 | *.vsp 92 | *.vspx 93 | 94 | # Guidance Automation Toolkit 95 | *.gpState 96 | 97 | # ReSharper is a .NET coding add-in 98 | _ReSharper*/ 99 | *.[Rr]e[Ss]harper 100 | 101 | # TeamCity is a build add-in 102 | _TeamCity* 103 | 104 | # DotCover is a Code Coverage Tool 105 | *.dotCover 106 | 107 | # NCrunch 108 | *.ncrunch* 109 | .*crunch*.local.xml 110 | 111 | # Installshield output folder 112 | [Ee]xpress/ 113 | 114 | # DocProject is a documentation generator add-in 115 | DocProject/buildhelp/ 116 | DocProject/Help/*.HxT 117 | DocProject/Help/*.HxC 118 | DocProject/Help/*.hhc 119 | DocProject/Help/*.hhk 120 | DocProject/Help/*.hhp 121 | DocProject/Help/Html2 122 | DocProject/Help/html 123 | 124 | # Click-Once directory 125 | publish/ 126 | 127 | # Publish Web Output 128 | *.Publish.xml 129 | *.pubxml 130 | 131 | # NuGet Packages Directory 132 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 133 | #packages/ 134 | 135 | # Windows Azure Build Output 136 | csx 137 | *.build.csdef 138 | 139 | # Windows Store app package directory 140 | AppPackages/ 141 | 142 | # Others 143 | sql/ 144 | *.Cache 145 | ClientBin/ 146 | [Ss]tyle[Cc]op.* 147 | ~$* 148 | *~ 149 | *.dbmdl 150 | *.[Pp]ublish.xml 151 | *.pfx 152 | *.publishsettings 153 | 154 | # RIA/Silverlight projects 155 | Generated_Code/ 156 | 157 | # Backup & report files from converting an old project file to a newer 158 | # Visual Studio version. Backup files are not needed, because we have git ;-) 159 | _UpgradeReport_Files/ 160 | Backup*/ 161 | UpgradeLog*.XML 162 | UpgradeLog*.htm 163 | 164 | # SQL Server files 165 | App_Data/*.mdf 166 | App_Data/*.ldf 167 | 168 | ############# 169 | ## Windows detritus 170 | ############# 171 | 172 | # Windows image file caches 173 | Thumbs.db 174 | ehthumbs.db 175 | 176 | # Folder config file 177 | Desktop.ini 178 | 179 | # Recycle Bin used on file shares 180 | $RECYCLE.BIN/ 181 | 182 | # Mac crap 183 | .DS_Store 184 | 185 | 186 | ############# 187 | ## Python 188 | ############# 189 | 190 | *.py[co] 191 | 192 | # Packages 193 | *.egg 194 | *.egg-info 195 | dist/ 196 | build/ 197 | eggs/ 198 | parts/ 199 | var/ 200 | sdist/ 201 | develop-eggs/ 202 | .installed.cfg 203 | 204 | # Installer logs 205 | pip-log.txt 206 | 207 | # Unit test / coverage reports 208 | .coverage 209 | .tox 210 | 211 | #Translations 212 | *.mo 213 | 214 | #Mr Developer 215 | .mr.developer.cfg 216 | -------------------------------------------------------------------------------- /Graphs/sample.graph: -------------------------------------------------------------------------------- 1 | 81 362 2 | 0 1 3 | 0 2 4 | 0 3 5 | 0 4 6 | 1 2 7 | 1 3 8 | 1 4 9 | 1 6 10 | 1 26 11 | 2 3 12 | 2 4 13 | 2 6 14 | 2 7 15 | 3 4 16 | 3 26 17 | 4 33 18 | 5 6 19 | 5 7 20 | 5 8 21 | 5 9 22 | 5 10 23 | 5 11 24 | 5 12 25 | 5 13 26 | 5 14 27 | 5 15 28 | 5 75 29 | 6 7 30 | 6 8 31 | 6 9 32 | 6 10 33 | 6 11 34 | 6 12 35 | 6 13 36 | 6 14 37 | 6 15 38 | 6 46 39 | 7 8 40 | 7 9 41 | 7 10 42 | 7 11 43 | 7 12 44 | 7 13 45 | 7 14 46 | 7 15 47 | 7 65 48 | 8 9 49 | 8 10 50 | 8 11 51 | 8 12 52 | 8 13 53 | 8 14 54 | 8 15 55 | 9 10 56 | 9 11 57 | 9 12 58 | 9 13 59 | 9 14 60 | 9 15 61 | 9 47 62 | 9 70 63 | 10 11 64 | 10 12 65 | 10 13 66 | 10 14 67 | 10 15 68 | 11 12 69 | 11 13 70 | 11 14 71 | 11 15 72 | 11 75 73 | 12 13 74 | 12 14 75 | 12 15 76 | 13 14 77 | 13 15 78 | 13 70 79 | 14 15 80 | 16 17 81 | 16 18 82 | 16 19 83 | 16 20 84 | 16 21 85 | 16 22 86 | 16 23 87 | 16 24 88 | 16 61 89 | 17 18 90 | 17 19 91 | 17 20 92 | 17 21 93 | 17 22 94 | 17 23 95 | 17 34 96 | 18 19 97 | 18 20 98 | 18 21 99 | 18 22 100 | 18 23 101 | 18 30 102 | 18 53 103 | 18 69 104 | 19 20 105 | 19 21 106 | 19 22 107 | 19 23 108 | 19 78 109 | 19 80 110 | 20 21 111 | 20 22 112 | 20 23 113 | 20 62 114 | 21 22 115 | 21 23 116 | 21 57 117 | 22 23 118 | 23 50 119 | 24 25 120 | 24 26 121 | 24 27 122 | 24 28 123 | 24 29 124 | 24 30 125 | 24 31 126 | 24 32 127 | 24 33 128 | 24 34 129 | 24 35 130 | 25 26 131 | 25 27 132 | 25 28 133 | 25 29 134 | 25 30 135 | 25 31 136 | 25 32 137 | 25 33 138 | 25 34 139 | 25 35 140 | 25 80 141 | 26 27 142 | 26 28 143 | 26 29 144 | 26 30 145 | 26 31 146 | 26 32 147 | 26 33 148 | 26 34 149 | 26 35 150 | 26 57 151 | 26 75 152 | 27 28 153 | 27 29 154 | 27 30 155 | 27 31 156 | 27 32 157 | 27 33 158 | 27 34 159 | 27 35 160 | 28 29 161 | 28 30 162 | 28 31 163 | 28 32 164 | 28 33 165 | 28 34 166 | 28 35 167 | 29 30 168 | 29 31 169 | 29 32 170 | 29 33 171 | 29 34 172 | 29 35 173 | 29 75 174 | 30 31 175 | 30 32 176 | 30 33 177 | 30 34 178 | 30 35 179 | 31 32 180 | 31 33 181 | 31 34 182 | 31 35 183 | 31 37 184 | 32 33 185 | 32 34 186 | 32 35 187 | 32 43 188 | 33 34 189 | 33 35 190 | 33 57 191 | 33 65 192 | 34 35 193 | 34 44 194 | 36 37 195 | 36 38 196 | 36 39 197 | 36 40 198 | 36 41 199 | 36 42 200 | 36 43 201 | 36 44 202 | 36 45 203 | 36 46 204 | 36 69 205 | 37 38 206 | 37 39 207 | 37 40 208 | 37 41 209 | 37 42 210 | 37 43 211 | 37 44 212 | 37 45 213 | 37 46 214 | 38 39 215 | 38 40 216 | 38 41 217 | 38 42 218 | 38 43 219 | 38 44 220 | 38 45 221 | 38 46 222 | 38 72 223 | 39 40 224 | 39 41 225 | 39 42 226 | 39 43 227 | 39 44 228 | 39 45 229 | 39 46 230 | 40 41 231 | 40 42 232 | 40 43 233 | 40 44 234 | 40 45 235 | 40 46 236 | 40 64 237 | 40 69 238 | 41 42 239 | 41 43 240 | 41 44 241 | 41 45 242 | 41 46 243 | 41 54 244 | 42 43 245 | 42 44 246 | 42 45 247 | 42 46 248 | 43 44 249 | 43 45 250 | 43 46 251 | 44 45 252 | 44 46 253 | 44 56 254 | 45 46 255 | 45 72 256 | 47 48 257 | 47 49 258 | 47 50 259 | 47 51 260 | 47 68 261 | 48 49 262 | 48 50 263 | 48 51 264 | 49 50 265 | 49 51 266 | 50 51 267 | 52 53 268 | 52 54 269 | 52 55 270 | 52 56 271 | 52 57 272 | 52 58 273 | 52 59 274 | 53 54 275 | 53 55 276 | 53 56 277 | 53 57 278 | 53 58 279 | 53 59 280 | 54 55 281 | 54 56 282 | 54 57 283 | 54 58 284 | 54 59 285 | 55 56 286 | 55 57 287 | 55 58 288 | 55 59 289 | 55 69 290 | 56 57 291 | 56 58 292 | 56 59 293 | 56 64 294 | 57 58 295 | 57 59 296 | 57 79 297 | 58 59 298 | 58 68 299 | 58 71 300 | 60 61 301 | 60 62 302 | 60 63 303 | 60 64 304 | 60 65 305 | 60 66 306 | 60 67 307 | 61 62 308 | 61 63 309 | 61 64 310 | 61 65 311 | 61 66 312 | 61 67 313 | 62 63 314 | 62 64 315 | 62 65 316 | 62 66 317 | 62 67 318 | 63 64 319 | 63 65 320 | 63 66 321 | 63 67 322 | 64 65 323 | 64 66 324 | 64 67 325 | 65 66 326 | 65 67 327 | 66 67 328 | 68 69 329 | 68 70 330 | 68 71 331 | 68 72 332 | 68 73 333 | 69 70 334 | 69 71 335 | 69 72 336 | 69 73 337 | 70 71 338 | 70 72 339 | 70 73 340 | 71 72 341 | 71 73 342 | 72 73 343 | 74 75 344 | 74 76 345 | 74 77 346 | 74 78 347 | 74 79 348 | 74 80 349 | 75 76 350 | 75 77 351 | 75 78 352 | 75 79 353 | 75 80 354 | 76 77 355 | 76 78 356 | 76 79 357 | 76 80 358 | 77 78 359 | 77 79 360 | 77 80 361 | 78 79 362 | 78 80 363 | 79 80 -------------------------------------------------------------------------------- /Python/FindCommunities.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3.0 3 | 4 | # 5 | import pdb 6 | import sys 7 | import igraph 8 | from igraph import Graph 9 | import random 10 | import numpy as np 11 | import pandas as pd 12 | 13 | 14 | def read_graph(file_name): 15 | # Input edge list file name and output igraph representation 16 | df = pd.read_csv(file_name, sep=" ", names=["Edge1", "Edge2"]) 17 | n_vertex, n_edge = df.irow(0) 18 | df = df.drop(0) 19 | graph = Graph(edges=[(x[1]["Edge1"], x[1]["Edge2"]) 20 | for x in df.iterrows()], directed=False) 21 | assert(graph.vcount() == n_vertex) 22 | assert(graph.ecount() == n_edge) 23 | return preprocess_graph(graph) 24 | 25 | 26 | def density(subgraph): 27 | # Get density of graph (as defined in paper) 28 | if subgraph.vcount() == 0: 29 | return 0 30 | else: 31 | return subgraph.ecount() * 2.0 / subgraph.vcount() 32 | 33 | 34 | def pagerank_order(graph, reverse=False): 35 | # Generator that returns indices of graph vertices in 36 | # page rank order. Default is smallest-to-largest. 37 | for (y, x) in sorted(zip(graph.pagerank(directed=False), range(0, graph.vcount())), reverse=reverse): 38 | yield x 39 | 40 | 41 | def vertices_in_pagerank_order(graph, reverse=False): 42 | # Generator that returns graph vertices in page rank 43 | # order. Default is smallest-to-largest. 44 | for i in pagerank_order(graph, reverse=reverse): 45 | yield graph.vs.select([i])[0] 46 | 47 | 48 | def update_graph(graph, subgraph, vertex, density_metric): 49 | # Add vertex to subgraph. If density is improved, 50 | # return this extended subgraph. Otherwise, 51 | # return original subgraph. 52 | new_subgraph = graph.subgraph( 53 | subgraph.vs["original_index"] + [vertex.index]) 54 | if density_metric(new_subgraph) > density_metric(subgraph): 55 | return new_subgraph, True 56 | else: 57 | return subgraph, False 58 | 59 | 60 | def preprocess_graph(graph): 61 | # Preprocess graph to ensure it doesn't have loops 62 | # or multiedges 63 | graph = graph.simplify() 64 | graph.vs["original_index"] = graph.vs.indices 65 | return graph 66 | 67 | 68 | def LinkAggregateAlgorithm(graph, density_metric): 69 | # Run link Aggregate Algorithm as describedin paper 70 | C = set() 71 | 72 | vertex_count = graph.vcount() 73 | 74 | for i, v in enumerate(vertices_in_pagerank_order(graph, reverse=True)): 75 | if len(C) == 0: 76 | C.add(graph.subgraph([v.index])) 77 | C, added_array = zip( 78 | *map(lambda subgraph: update_graph(graph, subgraph, v, density_metric), C) 79 | ) 80 | C = set(C) 81 | if not np.any(added_array): 82 | C.add(graph.subgraph([v.index])) 83 | 84 | if (i + 1) % 500 == 0: 85 | print >> sys.stderr, "\t...Processed vertex", i + \ 86 | 1, "of", vertex_count 87 | return C 88 | 89 | 90 | def ImprovedIterativeScanAlgorithm(subgraph, graph, density_metric): 91 | # Run Improved Iterative Scan algorithm as described in paper 92 | 93 | C = subgraph 94 | w = density_metric(subgraph) 95 | increased = True 96 | 97 | while increased: 98 | N = C.as_undirected() 99 | for v in C.vs: 100 | v_neighbor_indices = [x["original_index"] for x in v.neighbors()] 101 | N = graph.subgraph(v_neighbor_indices + N.vs["original_index"]) 102 | for v in N.vs: 103 | if v["original_index"] in C.vs["original_index"]: 104 | C_prime = graph.subgraph( 105 | [x for x in C.vs["original_index"] if x != v["original_index"]]) 106 | else: 107 | C_prime = graph.subgraph( 108 | C.vs["original_index"] + [v["original_index"]]) 109 | if density_metric(C_prime) > density_metric(C): 110 | C = C_prime 111 | if density_metric(C) == w: 112 | increased = False 113 | else: 114 | w = density_metric(C) 115 | return C 116 | 117 | 118 | def process(file_name): 119 | 120 | graph = read_graph(file_name) 121 | print >> sys.stderr, "Running Link Aggregate Algorithm..." 122 | out = LinkAggregateAlgorithm(graph, density) 123 | print >> sys.stderr, "Running Improved Improved Iterative Scan Algorithm..." 124 | for i, subgraph in enumerate(out): 125 | for v in ImprovedIterativeScanAlgorithm(subgraph, graph, 126 | density).vs["original_index"]: 127 | print v, 128 | print 129 | 130 | 131 | if __name__ == "__main__": 132 | if len(sys.argv) != 2: 133 | print >> sys.stderr, "ERROR: Program requires single argument: path to graph file." 134 | else: 135 | try: 136 | with open(sys.argv[1]) as f: 137 | process(sys.argv[1]) 138 | except IOError: 139 | print >> sys.stderr, 'ERROR: Input file does not exist.' 140 | --------------------------------------------------------------------------------