├── Output
└── SampleOutput.txt
├── .gitattributes
├── README.md
├── .gitignore
├── Graphs
└── sample.graph
└── Python
└── FindCommunities.py
/Output/SampleOutput.txt:
--------------------------------------------------------------------------------
1 | 0.0209999084473 seconds File Read Time
2 | Running Link Aggregate Algorithm...
3 | Running Improved Improved Iterative Scan Algorithm...
4 | 74 76 77 78 79 80
5 | 27 28 29 30 31 32 35
6 | 47 48 49 50 51 68 70 71 72 73
7 | 36 37 38 39 40 41 42 43 44 45 46
8 | 16 17 19 20 21 22 23
9 | 5 8 10 11 12 14 15
10 | 60 61 62 63 64 65 66 67
11 | 52 53 54 55 56 58 59
12 | 24 25 26 27 28 29 30 31 32 33 34 35
13 | 24 25 26 27 28 29 30 31 32 33 34 35
14 | 0 1 2 3 4
15 | 52 53 54 55 59
16 | 5 6 7 8 9 10 11 12 13 14 15
17 | 0.12700009346 seconds in Total
18 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 | *.sln merge=union
7 | *.csproj merge=union
8 | *.vbproj merge=union
9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 |
12 | # Standard to msysgit
13 | *.doc diff=astextplain
14 | *.DOC diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot diff=astextplain
18 | *.DOT diff=astextplain
19 | *.pdf diff=astextplain
20 | *.PDF diff=astextplain
21 | *.rtf diff=astextplain
22 | *.RTF diff=astextplain
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Requirements:
2 | ============
3 | *Software that needs to be installed (if any) with URL’s to download and instructions to install them.*
4 | Packages are required for R: igraph, compiler, doSNOW, foreach, parallel
5 |
6 | _Environment variable settings (if any) and OS it should/could run on._
7 | =======================================================================
8 | None.
9 |
10 | _Instructions on how to run the program._
11 | =========================================
12 | We need to change the path where the graph lies (setwd).
13 | For sample code run
14 |
15 | ```````````````````````EXAMPLE``````````````````````````
16 | 1. unzip the archive
17 | 2. cd archive/
18 | 3. update virusprop.R with full path where the static graph lies.
19 | 4. execute
20 | ````````````````````````````````````````````````````````
21 |
22 | _Instructions on how to interpret the results._
23 | ===============================================
24 | Graphs are generated for displaying the results for each section.
25 |
26 | Tested On
27 | =========
28 | static.network
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #################
2 | ## Eclipse
3 | #################
4 |
5 | *.pydevproject
6 | .project
7 | .metadata
8 | bin/
9 | tmp/
10 | *.tmp
11 | *.bak
12 | *.swp
13 | *~.nib
14 | local.properties
15 | .classpath
16 | .settings/
17 | .loadpath
18 |
19 | # External tool builders
20 | .externalToolBuilders/
21 |
22 | # Locally stored "Eclipse launch configurations"
23 | *.launch
24 |
25 | # CDT-specific
26 | .cproject
27 |
28 | # PDT-specific
29 | .buildpath
30 |
31 |
32 | #################
33 | ## Visual Studio
34 | #################
35 |
36 | ## Ignore Visual Studio temporary files, build results, and
37 | ## files generated by popular Visual Studio add-ons.
38 |
39 | # User-specific files
40 | *.suo
41 | *.user
42 | *.sln.docstates
43 |
44 | # Build results
45 |
46 | [Dd]ebug/
47 | [Rr]elease/
48 | x64/
49 | build/
50 | [Bb]in/
51 | [Oo]bj/
52 |
53 | # MSTest test Results
54 | [Tt]est[Rr]esult*/
55 | [Bb]uild[Ll]og.*
56 |
57 | *_i.c
58 | *_p.c
59 | *.ilk
60 | *.meta
61 | *.obj
62 | *.pch
63 | *.pdb
64 | *.pgc
65 | *.pgd
66 | *.rsp
67 | *.sbr
68 | *.tlb
69 | *.tli
70 | *.tlh
71 | *.tmp
72 | *.tmp_proj
73 | *.log
74 | *.vspscc
75 | *.vssscc
76 | .builds
77 | *.pidb
78 | *.log
79 | *.scc
80 |
81 | # Visual C++ cache files
82 | ipch/
83 | *.aps
84 | *.ncb
85 | *.opensdf
86 | *.sdf
87 | *.cachefile
88 |
89 | # Visual Studio profiler
90 | *.psess
91 | *.vsp
92 | *.vspx
93 |
94 | # Guidance Automation Toolkit
95 | *.gpState
96 |
97 | # ReSharper is a .NET coding add-in
98 | _ReSharper*/
99 | *.[Rr]e[Ss]harper
100 |
101 | # TeamCity is a build add-in
102 | _TeamCity*
103 |
104 | # DotCover is a Code Coverage Tool
105 | *.dotCover
106 |
107 | # NCrunch
108 | *.ncrunch*
109 | .*crunch*.local.xml
110 |
111 | # Installshield output folder
112 | [Ee]xpress/
113 |
114 | # DocProject is a documentation generator add-in
115 | DocProject/buildhelp/
116 | DocProject/Help/*.HxT
117 | DocProject/Help/*.HxC
118 | DocProject/Help/*.hhc
119 | DocProject/Help/*.hhk
120 | DocProject/Help/*.hhp
121 | DocProject/Help/Html2
122 | DocProject/Help/html
123 |
124 | # Click-Once directory
125 | publish/
126 |
127 | # Publish Web Output
128 | *.Publish.xml
129 | *.pubxml
130 |
131 | # NuGet Packages Directory
132 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line
133 | #packages/
134 |
135 | # Windows Azure Build Output
136 | csx
137 | *.build.csdef
138 |
139 | # Windows Store app package directory
140 | AppPackages/
141 |
142 | # Others
143 | sql/
144 | *.Cache
145 | ClientBin/
146 | [Ss]tyle[Cc]op.*
147 | ~$*
148 | *~
149 | *.dbmdl
150 | *.[Pp]ublish.xml
151 | *.pfx
152 | *.publishsettings
153 |
154 | # RIA/Silverlight projects
155 | Generated_Code/
156 |
157 | # Backup & report files from converting an old project file to a newer
158 | # Visual Studio version. Backup files are not needed, because we have git ;-)
159 | _UpgradeReport_Files/
160 | Backup*/
161 | UpgradeLog*.XML
162 | UpgradeLog*.htm
163 |
164 | # SQL Server files
165 | App_Data/*.mdf
166 | App_Data/*.ldf
167 |
168 | #############
169 | ## Windows detritus
170 | #############
171 |
172 | # Windows image file caches
173 | Thumbs.db
174 | ehthumbs.db
175 |
176 | # Folder config file
177 | Desktop.ini
178 |
179 | # Recycle Bin used on file shares
180 | $RECYCLE.BIN/
181 |
182 | # Mac crap
183 | .DS_Store
184 |
185 |
186 | #############
187 | ## Python
188 | #############
189 |
190 | *.py[co]
191 |
192 | # Packages
193 | *.egg
194 | *.egg-info
195 | dist/
196 | build/
197 | eggs/
198 | parts/
199 | var/
200 | sdist/
201 | develop-eggs/
202 | .installed.cfg
203 |
204 | # Installer logs
205 | pip-log.txt
206 |
207 | # Unit test / coverage reports
208 | .coverage
209 | .tox
210 |
211 | #Translations
212 | *.mo
213 |
214 | #Mr Developer
215 | .mr.developer.cfg
216 |
--------------------------------------------------------------------------------
/Graphs/sample.graph:
--------------------------------------------------------------------------------
1 | 81 362
2 | 0 1
3 | 0 2
4 | 0 3
5 | 0 4
6 | 1 2
7 | 1 3
8 | 1 4
9 | 1 6
10 | 1 26
11 | 2 3
12 | 2 4
13 | 2 6
14 | 2 7
15 | 3 4
16 | 3 26
17 | 4 33
18 | 5 6
19 | 5 7
20 | 5 8
21 | 5 9
22 | 5 10
23 | 5 11
24 | 5 12
25 | 5 13
26 | 5 14
27 | 5 15
28 | 5 75
29 | 6 7
30 | 6 8
31 | 6 9
32 | 6 10
33 | 6 11
34 | 6 12
35 | 6 13
36 | 6 14
37 | 6 15
38 | 6 46
39 | 7 8
40 | 7 9
41 | 7 10
42 | 7 11
43 | 7 12
44 | 7 13
45 | 7 14
46 | 7 15
47 | 7 65
48 | 8 9
49 | 8 10
50 | 8 11
51 | 8 12
52 | 8 13
53 | 8 14
54 | 8 15
55 | 9 10
56 | 9 11
57 | 9 12
58 | 9 13
59 | 9 14
60 | 9 15
61 | 9 47
62 | 9 70
63 | 10 11
64 | 10 12
65 | 10 13
66 | 10 14
67 | 10 15
68 | 11 12
69 | 11 13
70 | 11 14
71 | 11 15
72 | 11 75
73 | 12 13
74 | 12 14
75 | 12 15
76 | 13 14
77 | 13 15
78 | 13 70
79 | 14 15
80 | 16 17
81 | 16 18
82 | 16 19
83 | 16 20
84 | 16 21
85 | 16 22
86 | 16 23
87 | 16 24
88 | 16 61
89 | 17 18
90 | 17 19
91 | 17 20
92 | 17 21
93 | 17 22
94 | 17 23
95 | 17 34
96 | 18 19
97 | 18 20
98 | 18 21
99 | 18 22
100 | 18 23
101 | 18 30
102 | 18 53
103 | 18 69
104 | 19 20
105 | 19 21
106 | 19 22
107 | 19 23
108 | 19 78
109 | 19 80
110 | 20 21
111 | 20 22
112 | 20 23
113 | 20 62
114 | 21 22
115 | 21 23
116 | 21 57
117 | 22 23
118 | 23 50
119 | 24 25
120 | 24 26
121 | 24 27
122 | 24 28
123 | 24 29
124 | 24 30
125 | 24 31
126 | 24 32
127 | 24 33
128 | 24 34
129 | 24 35
130 | 25 26
131 | 25 27
132 | 25 28
133 | 25 29
134 | 25 30
135 | 25 31
136 | 25 32
137 | 25 33
138 | 25 34
139 | 25 35
140 | 25 80
141 | 26 27
142 | 26 28
143 | 26 29
144 | 26 30
145 | 26 31
146 | 26 32
147 | 26 33
148 | 26 34
149 | 26 35
150 | 26 57
151 | 26 75
152 | 27 28
153 | 27 29
154 | 27 30
155 | 27 31
156 | 27 32
157 | 27 33
158 | 27 34
159 | 27 35
160 | 28 29
161 | 28 30
162 | 28 31
163 | 28 32
164 | 28 33
165 | 28 34
166 | 28 35
167 | 29 30
168 | 29 31
169 | 29 32
170 | 29 33
171 | 29 34
172 | 29 35
173 | 29 75
174 | 30 31
175 | 30 32
176 | 30 33
177 | 30 34
178 | 30 35
179 | 31 32
180 | 31 33
181 | 31 34
182 | 31 35
183 | 31 37
184 | 32 33
185 | 32 34
186 | 32 35
187 | 32 43
188 | 33 34
189 | 33 35
190 | 33 57
191 | 33 65
192 | 34 35
193 | 34 44
194 | 36 37
195 | 36 38
196 | 36 39
197 | 36 40
198 | 36 41
199 | 36 42
200 | 36 43
201 | 36 44
202 | 36 45
203 | 36 46
204 | 36 69
205 | 37 38
206 | 37 39
207 | 37 40
208 | 37 41
209 | 37 42
210 | 37 43
211 | 37 44
212 | 37 45
213 | 37 46
214 | 38 39
215 | 38 40
216 | 38 41
217 | 38 42
218 | 38 43
219 | 38 44
220 | 38 45
221 | 38 46
222 | 38 72
223 | 39 40
224 | 39 41
225 | 39 42
226 | 39 43
227 | 39 44
228 | 39 45
229 | 39 46
230 | 40 41
231 | 40 42
232 | 40 43
233 | 40 44
234 | 40 45
235 | 40 46
236 | 40 64
237 | 40 69
238 | 41 42
239 | 41 43
240 | 41 44
241 | 41 45
242 | 41 46
243 | 41 54
244 | 42 43
245 | 42 44
246 | 42 45
247 | 42 46
248 | 43 44
249 | 43 45
250 | 43 46
251 | 44 45
252 | 44 46
253 | 44 56
254 | 45 46
255 | 45 72
256 | 47 48
257 | 47 49
258 | 47 50
259 | 47 51
260 | 47 68
261 | 48 49
262 | 48 50
263 | 48 51
264 | 49 50
265 | 49 51
266 | 50 51
267 | 52 53
268 | 52 54
269 | 52 55
270 | 52 56
271 | 52 57
272 | 52 58
273 | 52 59
274 | 53 54
275 | 53 55
276 | 53 56
277 | 53 57
278 | 53 58
279 | 53 59
280 | 54 55
281 | 54 56
282 | 54 57
283 | 54 58
284 | 54 59
285 | 55 56
286 | 55 57
287 | 55 58
288 | 55 59
289 | 55 69
290 | 56 57
291 | 56 58
292 | 56 59
293 | 56 64
294 | 57 58
295 | 57 59
296 | 57 79
297 | 58 59
298 | 58 68
299 | 58 71
300 | 60 61
301 | 60 62
302 | 60 63
303 | 60 64
304 | 60 65
305 | 60 66
306 | 60 67
307 | 61 62
308 | 61 63
309 | 61 64
310 | 61 65
311 | 61 66
312 | 61 67
313 | 62 63
314 | 62 64
315 | 62 65
316 | 62 66
317 | 62 67
318 | 63 64
319 | 63 65
320 | 63 66
321 | 63 67
322 | 64 65
323 | 64 66
324 | 64 67
325 | 65 66
326 | 65 67
327 | 66 67
328 | 68 69
329 | 68 70
330 | 68 71
331 | 68 72
332 | 68 73
333 | 69 70
334 | 69 71
335 | 69 72
336 | 69 73
337 | 70 71
338 | 70 72
339 | 70 73
340 | 71 72
341 | 71 73
342 | 72 73
343 | 74 75
344 | 74 76
345 | 74 77
346 | 74 78
347 | 74 79
348 | 74 80
349 | 75 76
350 | 75 77
351 | 75 78
352 | 75 79
353 | 75 80
354 | 76 77
355 | 76 78
356 | 76 79
357 | 76 80
358 | 77 78
359 | 77 79
360 | 77 80
361 | 78 79
362 | 78 80
363 | 79 80
--------------------------------------------------------------------------------
/Python/FindCommunities.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # 3.0
3 |
4 | #
5 | import pdb
6 | import sys
7 | import igraph
8 | from igraph import Graph
9 | import random
10 | import numpy as np
11 | import pandas as pd
12 |
13 |
14 | def read_graph(file_name):
15 | # Input edge list file name and output igraph representation
16 | df = pd.read_csv(file_name, sep=" ", names=["Edge1", "Edge2"])
17 | n_vertex, n_edge = df.irow(0)
18 | df = df.drop(0)
19 | graph = Graph(edges=[(x[1]["Edge1"], x[1]["Edge2"])
20 | for x in df.iterrows()], directed=False)
21 | assert(graph.vcount() == n_vertex)
22 | assert(graph.ecount() == n_edge)
23 | return preprocess_graph(graph)
24 |
25 |
26 | def density(subgraph):
27 | # Get density of graph (as defined in paper)
28 | if subgraph.vcount() == 0:
29 | return 0
30 | else:
31 | return subgraph.ecount() * 2.0 / subgraph.vcount()
32 |
33 |
34 | def pagerank_order(graph, reverse=False):
35 | # Generator that returns indices of graph vertices in
36 | # page rank order. Default is smallest-to-largest.
37 | for (y, x) in sorted(zip(graph.pagerank(directed=False), range(0, graph.vcount())), reverse=reverse):
38 | yield x
39 |
40 |
41 | def vertices_in_pagerank_order(graph, reverse=False):
42 | # Generator that returns graph vertices in page rank
43 | # order. Default is smallest-to-largest.
44 | for i in pagerank_order(graph, reverse=reverse):
45 | yield graph.vs.select([i])[0]
46 |
47 |
48 | def update_graph(graph, subgraph, vertex, density_metric):
49 | # Add vertex to subgraph. If density is improved,
50 | # return this extended subgraph. Otherwise,
51 | # return original subgraph.
52 | new_subgraph = graph.subgraph(
53 | subgraph.vs["original_index"] + [vertex.index])
54 | if density_metric(new_subgraph) > density_metric(subgraph):
55 | return new_subgraph, True
56 | else:
57 | return subgraph, False
58 |
59 |
60 | def preprocess_graph(graph):
61 | # Preprocess graph to ensure it doesn't have loops
62 | # or multiedges
63 | graph = graph.simplify()
64 | graph.vs["original_index"] = graph.vs.indices
65 | return graph
66 |
67 |
68 | def LinkAggregateAlgorithm(graph, density_metric):
69 | # Run link Aggregate Algorithm as describedin paper
70 | C = set()
71 |
72 | vertex_count = graph.vcount()
73 |
74 | for i, v in enumerate(vertices_in_pagerank_order(graph, reverse=True)):
75 | if len(C) == 0:
76 | C.add(graph.subgraph([v.index]))
77 | C, added_array = zip(
78 | *map(lambda subgraph: update_graph(graph, subgraph, v, density_metric), C)
79 | )
80 | C = set(C)
81 | if not np.any(added_array):
82 | C.add(graph.subgraph([v.index]))
83 |
84 | if (i + 1) % 500 == 0:
85 | print >> sys.stderr, "\t...Processed vertex", i + \
86 | 1, "of", vertex_count
87 | return C
88 |
89 |
90 | def ImprovedIterativeScanAlgorithm(subgraph, graph, density_metric):
91 | # Run Improved Iterative Scan algorithm as described in paper
92 |
93 | C = subgraph
94 | w = density_metric(subgraph)
95 | increased = True
96 |
97 | while increased:
98 | N = C.as_undirected()
99 | for v in C.vs:
100 | v_neighbor_indices = [x["original_index"] for x in v.neighbors()]
101 | N = graph.subgraph(v_neighbor_indices + N.vs["original_index"])
102 | for v in N.vs:
103 | if v["original_index"] in C.vs["original_index"]:
104 | C_prime = graph.subgraph(
105 | [x for x in C.vs["original_index"] if x != v["original_index"]])
106 | else:
107 | C_prime = graph.subgraph(
108 | C.vs["original_index"] + [v["original_index"]])
109 | if density_metric(C_prime) > density_metric(C):
110 | C = C_prime
111 | if density_metric(C) == w:
112 | increased = False
113 | else:
114 | w = density_metric(C)
115 | return C
116 |
117 |
118 | def process(file_name):
119 |
120 | graph = read_graph(file_name)
121 | print >> sys.stderr, "Running Link Aggregate Algorithm..."
122 | out = LinkAggregateAlgorithm(graph, density)
123 | print >> sys.stderr, "Running Improved Improved Iterative Scan Algorithm..."
124 | for i, subgraph in enumerate(out):
125 | for v in ImprovedIterativeScanAlgorithm(subgraph, graph,
126 | density).vs["original_index"]:
127 | print v,
128 | print
129 |
130 |
131 | if __name__ == "__main__":
132 | if len(sys.argv) != 2:
133 | print >> sys.stderr, "ERROR: Program requires single argument: path to graph file."
134 | else:
135 | try:
136 | with open(sys.argv[1]) as f:
137 | process(sys.argv[1])
138 | except IOError:
139 | print >> sys.stderr, 'ERROR: Input file does not exist.'
140 |
--------------------------------------------------------------------------------