├── README.md
└── dynamic_community_detection.py


/README.md:
--------------------------------------------------------------------------------
 1 | Overview:
 2 | 
 3 | This algorithm performs community detection on an edge list one edge at a time, instead of batch processing an entire
 4 | edge list all at once (like Louvain). The algorithm works by first performing Louvain community detection on a subset
 5 | of the edge list, to determine the initial set of communities. A subsequent edge list is input into the 
 6 | "community_detection" function where each edge is processed one at a time, and nodes are either assigned to existing 
 7 | communities, assigned to new communities, or moved from one community to another.
 8 | 
 9 | 
10 | Source:
11 | 
12 | The source of the dynamic community detection algorithm implemented here is:
13 | 
14 | 	Dynamic Social Community Detection and Its Applications (2014) - Nam P. Nguyen, Thang N. Dinh, Yilin Shen, My T. Thai
15 | 	
16 | 
17 | Requirements:
18 | 
19 | python-louvain==0.10
20 | 
21 | networkx==1.11
22 | 
23 | numpy==1.13.1
24 | 
25 | 
26 | Execution:
27 | 
28 | Sample execution in main (Returns final_partition):
29 | 
30 | 	python dynamic_community_detection.py
31 | 
32 | To use community detection algorithm:
33 | 
34 | 	final_partition = community_detection(G_new, edge_list, previous_partition)
35 | 
36 | 
37 | Full documentation can be found in the source file.
38 | 


--------------------------------------------------------------------------------
/dynamic_community_detection.py:
--------------------------------------------------------------------------------
  1 | """
  2 | % Dynamic Community Detection Algorithm
  3 | % Version 1.0 (09/04/18)
  4 | %
  5 | % Original author:
  6 | % Tyrone Naidoo (naidootea@gmail.com)
  7 | %
  8 | % License:
  9 | % CC BY-NC-SA 4.0 (https://creativecommons.org/licenses/by-nc-sa/4.0/)
 10 | %
 11 | """
 12 | 
 13 | import networkx as nx
 14 | import community
 15 | import operator
 16 | import itertools
 17 | import numpy as np
 18 | from copy import deepcopy
 19 | 
 20 | import logging
 21 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s', datefmt='%H:%M:%S')
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | 
 25 | def _compute_e(node, community, Graph, partition):
 26 |     """
 27 |     This function calculates the "e" value between a specified node and
 28 |     community. The "e" value of a node is the number of edges from
 29 |     that node to a specific community.
 30 | 
 31 |     Args:
 32 |         node (any):       A node in a graph. May be an integer or string
 33 |                           or object, etc.
 34 |         community (int):  The community to check if edges are connected
 35 |                           to from the node.
 36 |         Graph (graph):    A graph consisting of nodes and edges.
 37 |         partition (dict): Keys represent nodes, and values represent the
 38 |                           community that each node belongs to.
 39 | 
 40 |     Returns:
 41 |         e (int): The number of edges between a node and a community.
 42 |     """
 43 | 
 44 |     neighbors = Graph.neighbors(node)
 45 | 
 46 |     e = 0
 47 | 
 48 |     # Check if neighbors are in the specified community
 49 |     for node_obj in neighbors:
 50 |         community_neighbor = partition.get(node_obj)
 51 |         if community_neighbor == community:
 52 |             e += 1
 53 | 
 54 |     return e
 55 | 
 56 | 
 57 | def _compute_degree_of_community(community, Graph, partition):
 58 |     """
 59 |     This function calculates the degree of a community. The degree of a
 60 |     community is the sum of the degrees of each node belonging to that
 61 |     community.
 62 | 
 63 |     Args:
 64 |         community (int):  The community whose degree is to be calculated
 65 |         Graph (graph):    A graph consisting of nodes and edges.
 66 |         partition (dict): Keys represent nodes, and values represent the
 67 |                           community that each node belongs to.
 68 | 
 69 |     Returns:
 70 |         degree_community (int): The degree of the community
 71 |     """
 72 | 
 73 |     # Get all the nodes that are in the community
 74 |     keys = []
 75 | 
 76 |     for node, community_node in partition.items():
 77 |         if community_node == community:
 78 |             keys.append(node)
 79 | 
 80 |     degree_community = 0
 81 | 
 82 |     # Get each nodes degree and sum
 83 |     for node in keys:
 84 |         degree_community += Graph.degree(node)
 85 | 
 86 |     return degree_community
 87 | 
 88 | 
 89 | def _compute_fin(node, Graph, partition):
 90 |     """
 91 |     This function computes the force F_in for a node, which signifies
 92 |     how strongly a node is attracted to the current community that it is
 93 |     in. The force F_in is used to determine if a node should stay in
 94 |     it's current community.
 95 | 
 96 |     Args:
 97 |         node (any):       A node in a graph. May be an integer or string
 98 |                           or object, etc.
 99 |         Graph (graph):    A graph consisting of nodes and edges.
100 |         partition (dict): Keys represent nodes, and values represent the
101 |                           community that each node belongs to.
102 | 
103 |     Returns:
104 |         fin (float): The force that a node's community exerts on that
105 |                      node.
106 |     """
107 | 
108 |     community_node = partition.get(node)
109 | 
110 |     e = _compute_e(node, community_node, Graph, partition)
111 |     degree_node = Graph.degree(node)
112 |     degree_community = _compute_degree_of_community(community_node,
113 |                                                    Graph, partition)
114 |     num_edges = Graph.number_of_edges()
115 | 
116 |     fin = e - ( (degree_node * (degree_community - degree_node)) / (2 * num_edges) )
117 | 
118 |     return fin
119 | 
120 | 
121 | def _compute_fout(node, Graph, partition, community):
122 |     """
123 |     This function computes the force F_out for a node, which signifies
124 |     how strongly a node is attracted to another community. The force
125 |     F_out is used to determine if a node should move to a different
126 |     community.
127 | 
128 |     Args:
129 |         node (any):       A node in a graph. May be an integer or string
130 |                           or object, etc.
131 |         Graph (graph):    A graph consisting of nodes and edges.
132 |         partition (dict): Keys represent nodes, and values represent the
133 |                           community that each node belongs to.
134 |         community (int):  The external community to check if node "node"
135 |                           has edges connecting to it.
136 | 
137 |     Returns:
138 |         fout (float): The force that an external community exerts on a
139 |                       node.
140 |     """
141 | 
142 |     e = _compute_e(node, community, Graph, partition)
143 |     degree_node = Graph.degree(node)
144 |     degree_community = _compute_degree_of_community(community, Graph,
145 |                                                    partition)
146 |     num_edges = Graph.number_of_edges()
147 | 
148 |     fout = e - ((degree_node * degree_community) / (2 * num_edges))
149 | 
150 |     return fout
151 | 
152 | 
153 | def _find_neighbour_communities(community, Graph, partition):
154 |     """
155 |     This function finds the neighbour communities of a specified
156 |     community. It first finds all the nodes inside the specified
157 |     community then finds the neighbours of each node and determines
158 |     the communities that they belong to.
159 | 
160 |     Args:
161 |         community (int):  The neighbour communities of this "community"
162 |                           will be found.
163 |         Graph (graph):    A graph consisting of nodes and edges.
164 |         partition (dict): Keys represent nodes, and values represent the
165 |                           community that each node belongs to.
166 | 
167 | 
168 |     Returns:
169 |         neighbor_communities (list): A list of communities that are
170 |                                      neighbors to the community provided
171 |                                      as a parameter.
172 |     """
173 | 
174 |     # Find all nodes in "community"
175 |     community_nodes = []
176 |     all_neighbors = []
177 |     for node, community_node in partition.items():
178 |         if community_node == community:
179 |             neighbors = Graph.neighbors(node)
180 |             all_neighbors.append(neighbors)
181 |             community_nodes.append(node)
182 | 
183 |     # all_neighbors is a list of lists, convert to one list
184 |     # and then remove duplicates
185 |     all_neighbors_list = list(itertools.chain(*all_neighbors))
186 |     unique_neighbors = set(all_neighbors_list)
187 | 
188 |     # Remove all the neighbors that are within the same community
189 |     # that was provided as a parameter.
190 |     # Hence, we have only those nodes external to the community
191 |     # ie. nodes belonging to neighbor communities
192 |     external_nodes = [x for x in unique_neighbors if
193 |                       x not in community_nodes]
194 | 
195 |     # Now collect each of the external nodes communities
196 |     all_communities = []
197 |     for node in external_nodes:
198 |         neighbor_community = partition[node]
199 |         all_communities.append(neighbor_community)
200 | 
201 |     # Get the unique list of neighbor communities
202 |     # ie. remove duplicates
203 |     neighbor_communities = set(all_communities)
204 | 
205 |     return neighbor_communities
206 | 
207 | 
208 | def _compute_q_uv(u, v, Graph, partition):
209 |     """
210 |     This function computes delta q for nodes u and v. When a new edge is
211 |     introduced, delta q is used to determine whether node u should move
212 |     to node v's community (C(v)), or if node v should move to node u's
213 |     community (C(u)), or neither.
214 | 
215 |     Args:
216 |         u (any):          A node in a graph. May be an integer or string
217 |                           or object, etc.
218 |         v (any):          A node in a graph. May be an integer or string
219 |                           or object, etc.
220 |         Graph (graph):    A graph consisting of nodes and edges.
221 |         partition (dict): Keys represent nodes, and values represent the
222 |                           community that each node belongs to.
223 | 
224 |     Returns:
225 |         delta_qu (int): The delta q value pertaining to node u.
226 |         delta_qv (int): The delta q value pertaining to node v.
227 |     """
228 | 
229 |     num_edges = Graph.number_of_edges()
230 |     degree_u = Graph.degree(u)
231 |     degree_v = Graph.degree(v)
232 |     community_u = partition.get(u)
233 |     degree_community_u = _compute_degree_of_community(community_u,
234 |                                                       Graph,
235 |                                                       partition)
236 |     community_v = partition.get(v)
237 | 
238 |     degree_community_v = _compute_degree_of_community(community_v,
239 |                                                       Graph,
240 |                                                       partition)
241 | 
242 |     # Compute the e values (Edges from node to community)
243 |     e_u_Cu = _compute_e(u, community_u, Graph, partition)
244 |     e_u_Cv = _compute_e(u, community_v, Graph, partition)
245 |     e_v_Cu = _compute_e(v, community_u, Graph, partition)
246 |     e_v_Cv = _compute_e(v, community_v, Graph, partition)
247 | 
248 |     A = 4 * (num_edges + 1) * (e_u_Cv + 1 - e_u_Cu)
249 |     B = e_u_Cu
250 |     C = (2 * degree_community_v) - (2 * degree_community_u) - e_u_Cu
251 |     D = 2 * (degree_u + 1)
252 |     E = degree_u + 1 + degree_community_v - degree_community_u
253 | 
254 |     delta_qu = A + (B * C) - (D * E)
255 | 
256 |     A = 4 * (num_edges + 1) * (e_v_Cu + 1 - e_v_Cv)
257 |     B = e_v_Cv
258 |     C = (2 * degree_community_u) - (2 * degree_community_v) - e_v_Cv
259 |     D = 2 * (degree_v + 1)
260 |     E = degree_v + 1 + degree_community_u - degree_community_v
261 | 
262 |     delta_qv = A + (B * C) - (D * E)
263 | 
264 |     return delta_qu, delta_qv
265 | 
266 | 
267 | def _compute_modularity(Graph, partition):
268 |     """
269 |     This function computes the modularity of the given community
270 |     structure. A higher modularity indicates a better community
271 |     structure. The modularity is used to determine whether a node should
272 |     move from one community to another.
273 | 
274 |     Args:
275 |         Graph (graph):    A graph consisting of nodes and edges.
276 |         partition (dict): Keys represent nodes, and values represent the
277 |                           community that each node belongs to.
278 | 
279 |     Returns:
280 |         modularity (float): The modularity of the given community
281 |                             structure.
282 |     """
283 | 
284 |     num_edges = Graph.number_of_edges()
285 |     all_communities = set(partition.values())
286 |     all_c_degrees = []
287 | 
288 |     # Get the degree of each community
289 |     for community in all_communities:
290 |         degree = _compute_degree_of_community(community,
291 |                                               Graph,
292 |                                               partition)
293 |         all_c_degrees.append(degree)
294 | 
295 |     # Get the number of edges inside each community
296 |     all_num_edges = []
297 |     for community in all_communities:
298 |         all_nodes_in_c = []
299 |         for node, comm in partition.items():
300 |             if comm == community:
301 |                 all_nodes_in_c.append(node)
302 | 
303 |         # At this stage we have all nodes belonging to "community"
304 |         # Now create subgraph from these nodes (Easier to count number
305 |         # of edges). Also, excludes the edges that connects to external
306 |         # nodes (ie. outside of the community)
307 |         G_temp = Graph.subgraph(all_nodes_in_c)
308 |         all_num_edges.append(G_temp.number_of_edges())
309 | 
310 |     # Convert to numpy arrays - makes the modularity computation easier
311 |     degrees_array = np.array(all_c_degrees)
312 |     degrees_array_sqrd = np.square(degrees_array)
313 | 
314 |     com_num_edges_array = np.array(all_num_edges)
315 |     overall_num_edges_sqrd = np.square(num_edges)
316 | 
317 |     A = np.divide(com_num_edges_array, num_edges)
318 |     B = np.divide( degrees_array_sqrd, (4 * overall_num_edges_sqrd) )
319 | 
320 |     modularity = sum(A - B)
321 | 
322 |     return modularity
323 | 
324 | 
325 | def _find_best_community_for_neighbors(neighbors, Graph, partition,
326 |                                        modularity_initial):
327 |     """
328 |     This function finds the best community for the neighbors of node
329 |     w (where w is a node that moved to a new community).
330 | 
331 |     The neighbors of node w are found and each neighbor is processed.
332 |     The neighbor communities of each node is found and the modularity
333 |     of the community structure is computed when the neighbor node is
334 |     placed in each of its neighbor communities.
335 | 
336 |     If the modularity increases when moving the neighbor to a neighbor
337 |     community, then the neighbor is placed in that community.
338 | 
339 |     Args:
340 |         neighbors (list): A list of neighbor nodes
341 |         Graph (graph):    A graph consisting of nodes and edges.
342 |         partition (dict): Keys represent nodes, and values represent the
343 |                           community that each node belongs to.
344 |         modularity_initial (float): The initial modularity prior to
345 |                                     moving a neighbor node to other
346 |                                     communities.
347 | 
348 |     Returns:
349 |         changes (dict): A dictionary where keys are nodes, and the
350 |                         values are the new communities that each node is
351 |                         to be assigned to.
352 |     """
353 | 
354 |     changes = {}
355 |     for node in neighbors:
356 |         neighbors_of_node = Graph.neighbors(node)
357 | 
358 |         # Get communities of neighbors of "node" (different communities)
359 |         communities = []
360 |         for node_x in neighbors_of_node:
361 |             # If they are in different communities
362 |             if partition.get(node_x) != partition.get(node):
363 |                 communities.append(partition.get(node_x))
364 | 
365 |         # Place node in each community and compute modularity
366 |         modularities = {}
367 | 
368 |         temp_partition = deepcopy(partition)
369 |         for c in set(communities):
370 |             temp_partition[node] = c
371 | 
372 |             # Compute modularity when node is placed in each neighbour
373 |             # community
374 |             modularities[c] = _compute_modularity(Graph, temp_partition)
375 | 
376 |         # If modularity increases record the change that needs to me
377 |         # made ie. move node to other community.
378 |         if modularities:
379 |             if max(modularities.values()) > modularity_initial:
380 |                 # store all changes that need to be made
381 |                 for community, mod in modularities.items():
382 |                     if mod == max(modularities.values()):
383 |                         changes[node] = community
384 | 
385 |     return changes
386 | 
387 | 
388 | def _find_missing_elements(element_list):
389 |     """
390 |     This function finds elements that are missing in a list of
391 |     a sequence of numbers.
392 |     
393 |     Eg: List = [1,2,3,4,6,7,8,9,10]
394 |         Return = 5
395 | 
396 |     Args:
397 |         element_list (list): A list of integers.
398 |         
399 |     Returns:
400 |         missing_elements (list): A list of the integer values that
401 |                                  are missing in 'element_list'.
402 |     """
403 |         
404 |     start, end = element_list[0], element_list[-1]
405 |     
406 |     missing_elements = sorted(set(range(start, end + 1)).difference(element_list))
407 |     
408 |     return missing_elements
409 | 
410 | 
411 | def _reset_community_numbers(partition, community_list, missing_community):
412 |     """
413 |     This function moves the nodes of the last community into the 
414 |     community that is currently empty. It is used to reset the community
415 |     numbers when a community becomes empty due to nodes moving to
416 |     other communities.
417 |     
418 |     Eg: Communities = [1,2,3,4,6,7,8,9,10]
419 |         Returns = [1,2,3,4,5,6,7,8,9]
420 | 
421 |     Args:
422 |         partition (dict): Keys represent nodes, and values represent the
423 |                           community that each node belongs to.
424 |         community_list (list): A list of integers representing community
425 |                                numbers
426 |         missing_community (int): The empty community
427 |         
428 |     Returns:
429 |         partition (dict): Keys represent nodes, and values represent the
430 |                           community that each node belongs to. The
431 |                           partition that results from moving nodes in the
432 |                           last community to the empty community.
433 |     """
434 |     
435 |     last_community = community_list[-1]
436 |     
437 |     for node, community in partition.items():
438 |         if community == last_community:
439 |             partition[node] = missing_community
440 |         
441 |     return partition
442 | 
443 | 
444 | def _process_new_node(Graph, partition_1, node_u):
445 |     """
446 |     This function is used to determine whether a new node should create
447 |     it's own community, along with neighbor nodes, or whether the new
448 |     node should be assigned to another community.
449 | 
450 |     Args:
451 |         Graph (graph):      A graph consisting of nodes and edges.
452 |         partition_1 (dict): Keys represent nodes, and values represent
453 |                             the community that each node belongs to.
454 |         node_u (any):       A new node that was added to the graph. May
455 |                             be an integer or string or object, etc.
456 | 
457 |     Returns:
458 |         partition_1 (dict): A dictionary where keys are nodes, and the
459 |                             values are the communities that each node is
460 |                             assigned to. Partition_1 is the community
461 |                             structure that results from a node and some
462 |                             of its neighbors being assigned to an
463 |                             entirely new community.
464 | 
465 |         OR
466 | 
467 |         partition_2 (dict): A dictionary where keys are nodes, and the
468 |                             values are the communities that each node is
469 |                             assigned to. Partition_2 is the community
470 |                             structure that results from a node being
471 |                             assigned to an already existing community.
472 |     """
473 | 
474 |     # Add node u to it's own community
475 |     community_u = max(list(partition_1.values())) + 1
476 |     partition_1[node_u] = community_u
477 | 
478 |     # The while loop alters the partition, but the for loop
479 |     # afterwards may need to undo those changes
480 |     partition_2 = deepcopy(partition_1)
481 | 
482 |     done = False
483 | 
484 |     non_visited_neighbors = Graph.neighbors(node_u)
485 | 
486 |     while not done:
487 | 
488 |         F_in_neighbors = {}
489 |         F_out_neighbors = {}
490 |         qualifying_nodes = {}
491 | 
492 |         # Compute Fin and Fout for each of u's neighbours v
493 |         for v in non_visited_neighbors:
494 | 
495 |             F_in_neighbors[v] = _compute_fin(v, Graph, partition_1)
496 |             F_out_neighbors[v] = _compute_fout(v, Graph, partition_1,
497 |                                                community_u)
498 |             
499 |             if F_out_neighbors[v] > F_in_neighbors[v]:
500 |                 qualifying_nodes[v] = F_out_neighbors[v]
501 | 
502 |         # Now sort the neighbors (v's) according to their fout score
503 |         # and add the first node to the new community
504 |         # If there are no qualifying nodes, then end the while loop.
505 |         if qualifying_nodes:
506 | 
507 |             # Sort in descending order
508 |             sorted_nodes = sorted(qualifying_nodes.items(),
509 |                                   key=operator.itemgetter(1),
510 |                                   reverse=True)
511 | 
512 |             # Add first element from sorted_nodes to the community of u
513 |             # Remove element from non_visited_neighbors
514 |             partition_1[sorted_nodes[0][0]] = community_u
515 |             non_visited_neighbors.remove(sorted_nodes[0][0])
516 | 
517 |             # If the last neighbor has been processed and removed
518 |             if not non_visited_neighbors:
519 |                 done = True
520 | 
521 |         # If there are no qualifying nodes
522 |         else:
523 |             done = True
524 | 
525 |     # Find F_in for u inside community C(u)
526 |     F_in_u = _compute_fin(node_u, Graph, partition_1)
527 | 
528 |     # Find neighbor communities of C(u)
529 |     neighbor_communities = _find_neighbour_communities(community_u,
530 |                                                        Graph,
531 |                                                        partition_1)
532 |     
533 |     if neighbor_communities:
534 |     
535 |         # Find f_out for u exerted by each neighbor community
536 |         all_F_out_u = {}
537 |         for neighbor_community in neighbor_communities:
538 |             all_F_out_u[neighbor_community] = _compute_fout(node_u, Graph,
539 |                                                             partition_1,
540 |                                                             neighbor_community)
541 | 
542 |         max_fout_u = max(all_F_out_u.values())
543 | 
544 |         # Test if the maximum f_out is greater than f_in for node u,
545 |         # if so, then place u in that community, while all other nodes
546 |         # of C(u) go back to their original communities
547 |         if max_fout_u > F_in_u:
548 |             # Get the community of the maximum f_out
549 |             for community, fout in all_F_out_u.items():
550 |                 if fout == max_fout_u:
551 |                     key = community
552 | 
553 |             # Assign u to the other community.
554 |             # Other nodes should remain in their previous communities
555 |             # Partition_2 has all the v nodes back in their original
556 |             # communities
557 |             partition_2[node_u] = key
558 |             return partition_2
559 | 
560 |         # Else the force that community C(u) exerts on node u
561 |         # is the strongest and hence community C(u), remains as is,
562 |         # after the modifications in the while loop
563 |         else:
564 |             return partition_1
565 |         
566 |     else:
567 |         return partition_1
568 | 
569 | 
570 | def _process_new_edge(Graph, partition, node_1, node_2):
571 |     """
572 |     This function takes in a new edge and determines whether either of
573 |     the nodes attached to that edge, should move to the other nodes
574 |     community. If a node should move to the other nodes community,
575 |     the function then determines if any of that nodes neighbors should
576 |     move to any other community.
577 | 
578 |     Args:
579 |         Graph (graph):      A graph consisting of nodes and edges.
580 |         partition (dict):   Keys represent nodes, and values represent
581 |                             the community that each node belongs to.
582 |         node_1 (any):       A node that represents one point of the new
583 |                             edge that was added to the graph. May be an
584 |                             integer or string or object, etc.
585 |         node_2 (any):       A node that represents the other point of
586 |                             the new edge that was added to the graph.
587 |                             May be an integer or string or object, etc.
588 | 
589 |     Returns:
590 |         partition_1 (dict): A dictionary where keys are nodes, and the
591 |                             values are the communities that each node is
592 |                             assigned to. Partition is the community
593 |                             structure that results from a node in an
594 |                             edge moving to the other nodes community,
595 |                             which may include that nodes neighbors
596 |                             moving as well.
597 | 
598 |     """
599 | 
600 |     # Check if the nodes are not in the same community
601 |     if partition[node_1] != partition[node_2]:
602 | 
603 |         # Now Compute the two delta(q) values
604 |         delta_qu, delta_qv = _compute_q_uv(node_1, node_2, Graph,
605 |                                            partition)
606 | 
607 |         # Modularity does not change, so leave as is
608 |         if (delta_qu == delta_qv) or \
609 |                 ((delta_qu <= 0) and (delta_qv <= 0)):
610 |             return partition
611 | 
612 |         # Decide which node to move to the other community
613 |         elif (delta_qu > 0) or (delta_qv > 0):
614 |             if delta_qu > delta_qv:
615 |                 w = node_1
616 |                 new_community = partition.get(node_2)
617 |             elif delta_qv > delta_qu:
618 |                 w = node_2
619 |                 new_community = partition.get(node_1)
620 | 
621 |         # Move node (w) to the new community
622 |         partition[w] = new_community
623 | 
624 |         # The modularity of the current community structure,
625 |         # prior to moving any of w's neighbors
626 |         modularity_initial = _compute_modularity(Graph, partition)
627 |         
628 |         # Decide if w's neighbours should also move
629 |         # Get neighbours of w
630 |         neighbors_w = Graph.neighbors(w)
631 |         
632 |         changes = _find_best_community_for_neighbors(neighbors_w, Graph,
633 |                                                     partition,
634 |                                                     modularity_initial)
635 | 
636 |         # Make changes
637 |         for n, c in changes.items():
638 |             partition[n] = c
639 | 
640 |         return partition
641 | 
642 |     else:
643 |         return partition
644 | 
645 | 
646 | def community_detection(G_new, edge_list, previous_partition):
647 |     """
648 |     This function performs community assignment on an edge list, one
649 |     row/edge at a time, or on a singular node. The function determines
650 |     whether the edge already exists within the current graph, or if
651 |     either node exists in the current graph. Based on this outcome, the
652 |     function processes the new node or the new edge using the
653 |     appropriate functions. It then returns an updated community
654 |     structure once all the edges in the edge list have been processed.
655 |     It also takes in a singular node with no edges and adds it to its
656 |     own community.
657 | 
658 |     Args:
659 |         G_new (graph):  A graph consisting of nodes and edges. This
660 |                         graph evolves as each node / edge is added to
661 |                         the graph.
662 | 
663 |         edge_list (list): A list of 2-tuples or a list consisting
664 |                           of one new node with no edges (neighbours).
665 |                           New nodes and edges are added to graph G_new.
666 | 
667 |         previous_partition (dict):  A dictionary where keys are nodes,
668 |                                     and the values are the communities
669 |                                     that each node is assigned to. The
670 |                                     previous partition prior to adding
671 |                                     new nodes and edges.
672 | 
673 |     Returns:
674 |         new_partition (dict):   A dictionary where keys are nodes, and
675 |                                 the values are the communities that each
676 |                                 node is assigned to. The new partition
677 |                                 where new nodes and edges are added to
678 |                                 the graph and nodes are assigned to
679 |                                 communities.
680 | 
681 |         G_new (graph):  A graph consisting of nodes and edges. This
682 |                         is the graph that results from adding new nodes
683 |                         and edges.
684 |     """
685 | 
686 |     new_partition = deepcopy(previous_partition)
687 | 
688 |     # If a singular node with no edges is sent to the function
689 |     if not isinstance(edge_list[0], tuple):
690 |         new_node = edge_list[0]
691 |         has_node = G_new.has_node(new_node)
692 | 
693 |         if has_node:
694 |             return new_partition, G_new
695 | 
696 |         G_new.add_node(new_node)
697 |         new_partition[new_node] = max(previous_partition.values()) + 1
698 | 
699 |         return new_partition, G_new
700 | 
701 |     # Process the list of 2-tuples (edges)
702 |     for row in edge_list:
703 | 
704 |         # Three cases occur depending on whether:
705 |         # 1) One node is not in the graph,
706 |         # 2) Both nodes are not in the graph,
707 |         # 3) Both nodes are in the graph but the edge between
708 |         #    them is not
709 | 
710 |         node_1 = row[0]
711 |         node_2 = row[1]
712 | 
713 |         # In the event that some users send tweets to themselves,
714 |         # ignore these
715 |         if node_1 == node_2:
716 |             continue
717 | 
718 |         has_edge = G_new.has_edge(node_1, node_2)
719 |         has_node_1 = G_new.has_node(node_1)
720 |         has_node_2 = G_new.has_node(node_2)
721 | 
722 |         if (not has_edge) and ((not has_node_1) or (not has_node_2)):
723 | 
724 |             if (not has_node_1) and (not has_node_2):
725 |                 # Add BOTH new nodes to one new community. If we add
726 |                 # the first node to its own community, then process the
727 |                 # second node by adding it to its own community, and
728 |                 # then determining if its neighbors should be added to
729 |                 # that community, then both node 1 and node 2 will end
730 |                 # up in the same community (node_2 will only have one
731 |                 # neighbour, ie node_1, which will "pull" node_2 into
732 |                 # its community). F_out will always be > F_in
733 | 
734 |                 G_new.add_edge(node_1, node_2)
735 | 
736 |                 new_community = max(list(new_partition.values())) + 1
737 | 
738 |                 new_partition[node_1] = new_community
739 |                 new_partition[node_2] = new_community
740 | 
741 |             elif (not has_node_1) and (has_node_2):
742 |                 G_new.add_edge(node_1, node_2)
743 | 
744 |                 new_partition = _process_new_node(G_new,
745 |                                                   new_partition,
746 |                                                   node_1)
747 | 
748 |             elif (has_node_1) and (not has_node_2):
749 |                 G_new.add_edge(node_1, node_2)
750 | 
751 |                 new_partition = _process_new_node(G_new,
752 |                                                   new_partition,
753 |                                                   node_2)
754 |                 
755 |         elif (not has_edge) and (has_node_1) and (has_node_2):
756 |             G_new.add_edge(node_1, node_2)
757 |             
758 |             new_partition = _process_new_edge(G_new,
759 |                                               new_partition,
760 |                                               node_1,
761 |                                               node_2)
762 |             
763 |             # Check if an empty community resulted from the 
764 |             # processing of the new edge
765 |             community_list = list(set(new_partition.values()))
766 |             missing_community = _find_missing_elements(community_list)
767 |             
768 |             # Move the nodes in the last community to the empty
769 |             # community
770 |             if missing_community:
771 |                 new_partition = _reset_community_numbers(new_partition,
772 |                                                          community_list,
773 |                                                          missing_community[0])
774 |                        
775 |         # Skip duplicates
776 |         elif (has_edge) and (has_node_1) and (has_node_2):
777 |             continue
778 | 
779 |     return new_partition, G_new
780 | 
781 | 
782 | def main(training_set_size=500, testing_set_size=500):
783 |     """
784 |     This is as example of how to use the "community_detection" function.
785 |     Executing the main function will perform community detection on a
786 |     random graph of 1000 nodes. The Louvain community detection
787 |     algorithm will be applied to the first 500 nodes (and accompanying
788 |     edges) in order to obtain the initial community structure.
789 |     Thereafter, the dynamic community detection algorithm will be
790 |     applied to the remaining 500 nodes (and accompanying edges).
791 | 
792 |     Args:
793 |         training_set_size (integer):    Number of nodes in the initial
794 |                                         training set
795 | 
796 |         testing_set_size (integer):     Number of nodes in the testing
797 |                                         set
798 | 
799 |     """
800 | 
801 |     test_set_range = list(range(training_set_size,
802 |                                 training_set_size+testing_set_size))
803 | 
804 |     G_training = nx.erdos_renyi_graph(1000, 0.01)
805 |     G_testing = G_training.subgraph(test_set_range)
806 |     G_training.remove_nodes_from(test_set_range)
807 | 
808 |     # Perform Louvain community detection on the initial data set
809 |     # "initial_partition" is a list of key value pairs. Key = node,
810 |     # Value = community the node is assigned to
811 |     initial_partition = community.best_partition(G_training)
812 | 
813 |     print("number edges", G_testing.number_of_edges())
814 | 
815 |     dynamic_partition = community_detection(G_training,
816 |                                             G_testing.edges(),
817 |                                             initial_partition)
818 | 
819 |     logger.info("Final Partition %s %s %s",
820 |                  '\n', dynamic_partition, '\n')
821 | 
822 | if __name__ == "__main__":
823 |     main()
824 | 
825 | 
826 | 
827 | 


--------------------------------------------------------------------------------