├── .gitignore
├── README.md
├── __init__.py
├── information_propagation
    ├── __init__.py
    ├── independent_cascade.py
    ├── linear_threshold.py
    └── tests
    │   ├── test_independent_cascade.py
    │   └── test_linear_threshold.py
└── similarity
    ├── __init__.py
    ├── ascos.py
    ├── cosine.py
    ├── jaccard.py
    ├── katz.py
    ├── lhn.py
    ├── rss2.py
    ├── simrank.py
    └── tests
        ├── test_ascos.py
        ├── test_cos_sim.py
        ├── test_jaccard.py
        ├── test_katz.py
        ├── test_lhn.py
        ├── test_rss2.py
        └── test_simrank.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | 
21 | # Installer logs
22 | pip-log.txt
23 | 
24 | # Unit test / coverage reports
25 | .coverage
26 | .tox
27 | nosetests.xml
28 | 
29 | # Translations
30 | *.mo
31 | 
32 | # Mr Developer
33 | .mr.developer.cfg
34 | .project
35 | .pydevproject
36 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | networkx_addon
 2 | ==============
 3 | 
 4 | ***CAUTION***
 5 | This project was developed in 2013, when Python 2.x and networkx 1.x were still popular.
 6 | 
 7 | If you want to use this library on later Python (e.g., Python 3.x) and later networkx (e.g., networkx 2.x), you'll need to modify the code.
 8 | 
 9 | ### Some add-on modules to networkx library
10 | 
11 | 1. Information propagation models
12 |   (1) independent cascade model
13 |   (2) linear threshold model
14 | 
15 | 2. Vertex similarity measures
16 |   (1) ASCOS (for both weighted and unweighted network)
17 |   (2) Jaccard
18 |   (3) Cosine
19 |   (4) SimRank
20 |   (5) RSS (r=2)
21 |   (6) Katz
22 |   (7) LHN
23 | 
24 | ### Dependent packages
25 | * numpy
26 | * scipy
27 | * networkx
28 | 
29 | ### How to use it
30 | Put the "networkx_addon/" folder inside your source directory
31 | 
32 | ### Sample usage
33 | 
34 | 1. network propagation
35 | 
36 | ```Python
37 | >>> import networkx
38 | >>> import networkx_addon
39 | >>> G = networkx.DiGraph()
40 | >>> G.add_edge(1,2,act_prob=.5)
41 | >>> G.add_edge(2,1,act_prob=.5)
42 | >>> G.add_edge(1,3,act_prob=.2)
43 | >>> G.add_edge(3,1,act_prob=.2)
44 | >>> G.add_edge(2,3,act_prob=.3)
45 | >>> networkx_addon.information_propagation.independent_cascade(G, [1], steps=2)
46 | ```
47 | 
48 | 2. network similarity
49 | 
50 | ```Python
51 | >>> import networkx
52 | >>> import networkx_addon
53 | >>> G = networkx.Graph()
54 | >>> G.add_edges_from([(0,1),(1,2),(0,2)])
55 | ```
56 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from . import information_propagation
2 | from . import similarity
3 | 


--------------------------------------------------------------------------------
/information_propagation/__init__.py:
--------------------------------------------------------------------------------
1 | from .linear_threshold import *
2 | from .independent_cascade import *
3 | 


--------------------------------------------------------------------------------
/information_propagation/independent_cascade.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implement independent cascade model
  3 | """
  4 | #!/usr/bin/env python
  5 | #    Copyright (C) 2004-2010 by
  6 | #    Hung-Hsuan Chen <hhchen@psu.edu>
  7 | #    All rights reserved.
  8 | #    BSD license.
  9 | #    NetworkX:http://networkx.lanl.gov/.
 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
 11 | 
 12 | import copy
 13 | import networkx as nx
 14 | import random
 15 | 
 16 | __all__ = ['independent_cascade']
 17 | 
 18 | def independent_cascade(G, seeds, steps=0):
 19 |   """Return the active nodes of each diffusion step by the independent cascade
 20 |   model
 21 | 
 22 |   Parameters
 23 |   -----------
 24 |   G : graph
 25 |     A NetworkX graph
 26 |   seeds : list of nodes
 27 |     The seed nodes for diffusion
 28 |   steps: integer
 29 |     The number of steps to diffuse.  If steps <= 0, the diffusion runs until
 30 |     no more nodes can be activated.  If steps > 0, the diffusion runs for at
 31 |     most "steps" rounds
 32 | 
 33 |   Returns
 34 |   -------
 35 |   layer_i_nodes : list of list of activated nodes
 36 |     layer_i_nodes[0]: the seeds
 37 |     layer_i_nodes[k]: the nodes activated at the kth diffusion step
 38 | 
 39 |   Notes
 40 |   -----
 41 |   When node v in G becomes active, it has a *single* chance of activating
 42 |   each currently inactive neighbor w with probability p_{vw}
 43 | 
 44 |   Examples
 45 |   --------
 46 |   >>> DG = nx.DiGraph()
 47 |   >>> DG.add_edges_from([(1,2), (1,3), (1,5), (2,1), (3,2), (4,2), (4,3), \
 48 |   >>>   (4,6), (5,3), (5,4), (5,6), (6,4), (6,5)], act_prob=0.2)
 49 |   >>> layers = networkx_addon.information_propagation.independent_cascade(DG, [6])
 50 | 
 51 |   References
 52 |   ----------
 53 |   [1] David Kempe, Jon Kleinberg, and Eva Tardos.
 54 |       Influential nodes in a diffusion model for social networks.
 55 |       In Automata, Languages and Programming, 2005.
 56 |   """
 57 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
 58 |       raise Exception( \
 59 |           "independent_cascade() is not defined for graphs with multiedges.")
 60 | 
 61 |   # make sure the seeds are in the graph
 62 |   for s in seeds:
 63 |     if s not in G.nodes():
 64 |       raise Exception("seed", s, "is not in graph")
 65 | 
 66 |   # change to directed graph
 67 |   if not G.is_directed():
 68 |     DG = G.to_directed()
 69 |   else:
 70 |     DG = copy.deepcopy(G)
 71 | 
 72 |   # init activation probabilities
 73 |   for e in DG.edges():
 74 |     if 'act_prob' not in DG[e[0]][e[1]]:
 75 |       DG[e[0]][e[1]]['act_prob'] = 0.1
 76 |     elif DG[e[0]][e[1]]['act_prob'] > 1:
 77 |       raise Exception("edge activation probability:", \
 78 |           DG[e[0]][e[1]]['act_prob'], "cannot be larger than 1")
 79 | 
 80 |   # perform diffusion
 81 |   A = copy.deepcopy(seeds)  # prevent side effect
 82 |   if steps <= 0:
 83 |     # perform diffusion until no more nodes can be activated
 84 |     return _diffuse_all(DG, A)
 85 |   # perform diffusion for at most "steps" rounds
 86 |   return _diffuse_k_rounds(DG, A, steps)
 87 | 
 88 | def _diffuse_all(G, A):
 89 |   tried_edges = set()
 90 |   layer_i_nodes = [ ]
 91 |   layer_i_nodes.append([i for i in A])  # prevent side effect
 92 |   while True:
 93 |     len_old = len(A)
 94 |     (A, activated_nodes_of_this_round, cur_tried_edges) = \
 95 |         _diffuse_one_round(G, A, tried_edges)
 96 |     layer_i_nodes.append(activated_nodes_of_this_round)
 97 |     tried_edges = tried_edges.union(cur_tried_edges)
 98 |     if len(A) == len_old:
 99 |       break
100 |   return layer_i_nodes
101 | 
102 | def _diffuse_k_rounds(G, A, steps):
103 |   tried_edges = set()
104 |   layer_i_nodes = [ ]
105 |   layer_i_nodes.append([i for i in A])
106 |   while steps > 0 and len(A) < len(G):
107 |     len_old = len(A)
108 |     (A, activated_nodes_of_this_round, cur_tried_edges) = \
109 |         _diffuse_one_round(G, A, tried_edges)
110 |     layer_i_nodes.append(activated_nodes_of_this_round)
111 |     tried_edges = tried_edges.union(cur_tried_edges)
112 |     if len(A) == len_old:
113 |       break
114 |     steps -= 1
115 |   return layer_i_nodes
116 | 
117 | def _diffuse_one_round(G, A, tried_edges):
118 |   activated_nodes_of_this_round = set()
119 |   cur_tried_edges = set()
120 |   for s in A:
121 |     for nb in G.successors(s):
122 |       if nb in A or (s, nb) in tried_edges or (s, nb) in cur_tried_edges:
123 |         continue
124 |       if _prop_success(G, s, nb):
125 |         activated_nodes_of_this_round.add(nb)
126 |       cur_tried_edges.add((s, nb))
127 |   activated_nodes_of_this_round = list(activated_nodes_of_this_round)
128 |   A.extend(activated_nodes_of_this_round)
129 |   return A, activated_nodes_of_this_round, cur_tried_edges
130 | 
131 | def _prop_success(G, src, dest):
132 |   return random.random() <= G[src][dest]['act_prob']
133 | 
134 | 
135 | 


--------------------------------------------------------------------------------
/information_propagation/linear_threshold.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implement linear threshold models
  3 | """
  4 | #!/usr/bin/env python
  5 | #    Copyright (C) 2004-2010 by
  6 | #    Hung-Hsuan Chen <hhchen@psu.edu>
  7 | #    All rights reserved.
  8 | #    BSD license.
  9 | #    NetworkX:http://networkx.lanl.gov/.
 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
 11 | 
 12 | import copy
 13 | import networkx as nx
 14 | 
 15 | __all__ = ['linear_threshold']
 16 | 
 17 | def linear_threshold(G, seeds, steps=0):
 18 |   """Return the active nodes of each diffusion step by linear threshold model
 19 | 
 20 |   Parameters
 21 |   ----------
 22 |   G : networkx graph
 23 |       The number of nodes.
 24 | 
 25 |   seeds: list of nodes
 26 |       The seed nodes of the graph
 27 | 
 28 |   steps: int
 29 |       The number of steps to diffuse
 30 |       When steps <= 0, the model diffuses until no more nodes
 31 |       can be activated
 32 | 
 33 |   Return
 34 |   ------
 35 |   layer_i_nodes : list of list of activated nodes
 36 |     layer_i_nodes[0]: the seeds
 37 |     layer_i_nodes[k]: the nodes activated at the kth diffusion step
 38 | 
 39 |   Notes
 40 |   -----
 41 |   1. Each node is supposed to have an attribute "threshold".  If not, the
 42 |      default value is given (0.5).
 43 |   2. Each edge is supposed to have an attribute "influence".  If not, the
 44 |      default value is given (1/in_degree)
 45 | 
 46 |   References
 47 |   ----------
 48 |   [1] GranovetterMark. Threshold models of collective behavior.
 49 |       The American journal of sociology, 1978.
 50 | 
 51 |   Examples
 52 |   --------
 53 |   >>> DG = nx.DiGraph()
 54 |   >>> DG.add_edges_from([(1,2), (1,3), (1,5), (2,1), (3,2), (4,2), (4,3), \
 55 |   >>>   (4,6), (5,3), (5,4), (5,6), (6,4), (6,5)])
 56 |   >>> layers = networkx_addon.information_propagation.linear_threshold(DG, [1])
 57 | 
 58 |   """
 59 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
 60 |       raise Exception( \
 61 |           "linear_threshold() is not defined for graphs with multiedges.")
 62 | 
 63 |   # make sure the seeds are in the graph
 64 |   for s in seeds:
 65 |     if s not in G.nodes():
 66 |       raise Exception("seed", s, "is not in graph")
 67 | 
 68 |   # change to directed graph
 69 |   if not G.is_directed():
 70 |     DG = G.to_directed()
 71 |   else:
 72 |     DG = copy.deepcopy(G)
 73 | 
 74 |   # init thresholds
 75 |   for n in DG.nodes():
 76 |     if 'threshold' not in DG.node[n]:
 77 |       DG.node[n]['threshold'] = 0.5
 78 |     elif DG.node[n]['threshold'] > 1:
 79 |       raise Exception("node threshold:", DG.node[n]['threshold'], \
 80 |           "cannot be larger than 1")
 81 | 
 82 |   # init influences
 83 |   in_deg = DG.in_degree()
 84 |   for e in DG.edges():
 85 |     if 'influence' not in DG[e[0]][e[1]]:
 86 |       DG[e[0]][e[1]]['influence'] = 1.0 / in_deg[e[1]]
 87 |     elif DG[e[0]][e[1]]['influence'] > 1:
 88 |       raise Exception("edge influence:", DG[e[0]][e[1]]['influence'], \
 89 |           "cannot be larger than 1")
 90 | 
 91 |   # perform diffusion
 92 |   A = copy.deepcopy(seeds)
 93 |   if steps <= 0:
 94 |     # perform diffusion until no more nodes can be activated
 95 |     return _diffuse_all(DG, A)
 96 |   # perform diffusion for at most "steps" rounds only
 97 |   return _diffuse_k_rounds(DG, A, steps)
 98 | 
 99 | def _diffuse_all(G, A):
100 |   layer_i_nodes = [ ]
101 |   layer_i_nodes.append([i for i in A])
102 |   while True:
103 |     len_old = len(A)
104 |     A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
105 |     layer_i_nodes.append(activated_nodes_of_this_round)
106 |     if len(A) == len_old:
107 |       break
108 |   return layer_i_nodes
109 | 
110 | def _diffuse_k_rounds(G, A, steps):
111 |   layer_i_nodes = [ ]
112 |   layer_i_nodes.append([i for i in A])
113 |   while steps > 0 and len(A) < len(G):
114 |     len_old = len(A)
115 |     A, activated_nodes_of_this_round = _diffuse_one_round(G, A)
116 |     layer_i_nodes.append(activated_nodes_of_this_round)
117 |     if len(A) == len_old:
118 |       break
119 |     steps -= 1
120 |   return layer_i_nodes
121 | 
122 | def _diffuse_one_round(G, A):
123 |   activated_nodes_of_this_round = set()
124 |   for s in A:
125 |     nbs = G.successors(s)
126 |     for nb in nbs:
127 |       if nb in A:
128 |         continue
129 |       active_nb = list(set(G.predecessors(nb)).intersection(set(A)))
130 |       if _influence_sum(G, active_nb, nb) >= G.node[nb]['threshold']:
131 |         activated_nodes_of_this_round.add(nb)
132 |   A.extend(list(activated_nodes_of_this_round))
133 |   return A, list(activated_nodes_of_this_round)
134 | 
135 | def _influence_sum(G, froms, to):
136 |   influence_sum = 0.0
137 |   for f in froms:
138 |     influence_sum += G[f][to]['influence']
139 |   return influence_sum
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/information_propagation/tests/test_independent_cascade.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import networkx
 4 | import os
 5 | import sys
 6 | 
 7 | from nose.tools import assert_almost_equal
 8 | 
 9 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
10 | from information_propagation import independent_cascade
11 | 
12 | class TestIndependentCascade():
13 |   run_times = 10000
14 |   def test_independent_cascade(self):
15 |     G = networkx.DiGraph()
16 |     G.add_edge(1,2,act_prob=.5)
17 |     G.add_edge(2,1,act_prob=.5)
18 |     G.add_edge(1,3,act_prob=.2)
19 |     G.add_edge(3,1,act_prob=.2)
20 |     G.add_edge(2,3,act_prob=.3)
21 |     G.add_edge(2,4,act_prob=.5)
22 |     G.add_edge(3,4,act_prob=.1)
23 |     G.add_edge(3,5,act_prob=.2)
24 |     G.add_edge(4,5,act_prob=.2)
25 |     G.add_edge(5,6,act_prob=.6)
26 |     G.add_edge(6,5,act_prob=.6)
27 |     G.add_edge(6,4,act_prob=.3)
28 |     G.add_edge(6,2,act_prob=.4)
29 | 
30 |     n_A = 0.0
31 |     for i in range(TestIndependentCascade.run_times):
32 |       A = independent_cascade(G, [1], steps=1)
33 |       for layer in A:
34 |         n_A += len(layer)
35 |     assert_almost_equal(n_A / TestIndependentCascade.run_times, 1.7, places=1)
36 | 
37 |     n_A = 0.0
38 |     A = [ ]
39 |     for i in range(TestIndependentCascade.run_times):
40 |       A = independent_cascade(G, [1], steps=2)
41 |       for layer in A:
42 |         n_A += len(layer)
43 |     assert_almost_equal(n_A / TestIndependentCascade.run_times, 2.16, places=1)
44 | 
45 |     G = networkx.DiGraph()
46 |     G.add_edges_from([(1,2), (1,3), (2,4), (3,4)], act_prob=0.4)
47 |     n_A = 0.0
48 |     A = [ ]
49 |     for i in range(TestIndependentCascade.run_times):
50 |       A = independent_cascade(G, [1])
51 |       for layer in A:
52 |         n_A += len(layer)
53 |     assert_almost_equal(n_A / TestIndependentCascade.run_times, 2.09, places=1)
54 | 
55 |   def test_independent_cascade_without_attribute(self):
56 |     G = networkx.DiGraph()
57 |     G.add_edges_from([(1,2), (1,3), (2,4), (3,4)])
58 | 
59 |     n_A = 0.0
60 |     A = [ ]
61 |     for i in range(TestIndependentCascade.run_times):
62 |       A = independent_cascade(G, [1], steps=1)
63 |       for layer in A:
64 |         n_A += len(layer)
65 |     assert_almost_equal(n_A / TestIndependentCascade.run_times, 1.2, places=1)
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/information_propagation/tests/test_linear_threshold.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import networkx
  4 | import os
  5 | import sys
  6 | 
  7 | from nose.tools import assert_equal
  8 | 
  9 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 10 | from information_propagation import linear_threshold
 11 | 
 12 | class TestDiffusionLinearThreshold():
 13 |   def test_linear_threshold(self):
 14 |     G = networkx.DiGraph()
 15 |     G.add_edge(1,2,influence=.5)
 16 |     G.add_edge(2,1,influence=.5)
 17 |     G.add_edge(1,3,influence=.2)
 18 |     G.add_edge(3,1,influence=.2)
 19 |     G.add_edge(2,3,influence=.3)
 20 |     G.add_edge(2,4,influence=.5)
 21 |     G.add_edge(3,4,influence=.1)
 22 |     G.add_edge(3,5,influence=.2)
 23 |     G.add_edge(4,5,influence=.2)
 24 |     G.add_edge(5,6,influence=.6)
 25 |     G.add_edge(6,5,influence=.6)
 26 |     G.add_edge(6,4,influence=.3)
 27 |     G.add_edge(6,2,influence=.4)
 28 |     G.node[2]['threshold'] = .4
 29 |     G.node[3]['threshold'] = .4
 30 |     G.node[4]['threshold'] = .55
 31 |     G.node[5]['threshold'] = .5
 32 |     G.node[6]['threshold'] = .3
 33 | 
 34 |     layers = linear_threshold(G, [1])
 35 | 
 36 |     print(layers)
 37 | 
 38 |     assert_equal(len(layers[0]), 1)
 39 |     assert(1 in layers[0])
 40 |     assert_equal(len(layers[1]), 1)
 41 |     assert(2 in layers[1])
 42 |     assert_equal(len(layers[2]), 1)
 43 |     assert(3 in layers[2])
 44 |     assert_equal(len(layers[3]), 1)
 45 |     assert(4 in layers[3])
 46 |     assert_equal(len(reduce(lambda x,y: x+y, layers)), 4)
 47 | 
 48 |   def test_linear_threshold_graph_without_attribute(self):
 49 |     G = networkx.Graph()
 50 |     G.add_edges_from([(1,2), (1,3), (2,3), (3,4), (3,5), (4,5), (4,6), (5,6)])
 51 | 
 52 |     layers = linear_threshold(G, [1])
 53 |     assert_equal(len(layers[0]), 1)
 54 |     assert(1 in layers[0])
 55 |     assert_equal(len(layers[1]), 1)
 56 |     assert(2 in layers[1])
 57 |     assert_equal(len(layers[2]), 1)
 58 |     assert(3 in layers[2])
 59 | 
 60 |     layers = linear_threshold(G, [1,4])
 61 |     assert_equal(len(reduce(lambda x,y: x+y, layers)), 6)
 62 | 
 63 |     layers = linear_threshold(G, [1,2])
 64 |     assert_equal(len(layers[0]), 2)
 65 |     assert(1 in layers[0])
 66 |     assert(2 in layers[0])
 67 |     assert_equal(len(layers[1]), 1)
 68 |     assert(3 in layers[1])
 69 | 
 70 |   def test_linear_threshold_with_step(self):
 71 |     G = networkx.DiGraph()
 72 |     G.add_edge(1,2,influence=.5)
 73 |     G.add_edge(2,1,influence=.5)
 74 |     G.add_edge(1,3,influence=.2)
 75 |     G.add_edge(3,1,influence=.2)
 76 |     G.add_edge(2,3,influence=.3)
 77 |     G.add_edge(2,4,influence=.5)
 78 |     G.add_edge(3,4,influence=.1)
 79 |     G.add_edge(3,5,influence=.2)
 80 |     G.add_edge(4,5,influence=.2)
 81 |     G.add_edge(5,6,influence=.6)
 82 |     G.add_edge(6,5,influence=.6)
 83 |     G.add_edge(6,4,influence=.3)
 84 |     G.add_edge(6,2,influence=.4)
 85 |     G.node[2]['threshold'] = .4
 86 |     G.node[3]['threshold'] = .4
 87 |     G.node[4]['threshold'] = .55
 88 |     G.node[5]['threshold'] = .5
 89 |     G.node[6]['threshold'] = .3
 90 | 
 91 |     layers = linear_threshold(G, [1], 1)
 92 |     assert_equal(len(layers[0]), 1)
 93 |     assert(1 in layers[0])
 94 |     assert_equal(len(layers[1]), 1)
 95 |     assert(2 in layers[1])
 96 | 
 97 |     layers = linear_threshold(G, [1], 2)
 98 |     assert_equal(len(layers[0]), 1)
 99 |     assert(1 in layers[0])
100 |     assert_equal(len(layers[1]), 1)
101 |     assert(2 in layers[1])
102 |     assert_equal(len(layers[2]), 1)
103 |     assert(3 in layers[2])
104 | 
105 | 
106 | 


--------------------------------------------------------------------------------
/similarity/__init__.py:
--------------------------------------------------------------------------------
1 | from .ascos import *
2 | from .jaccard import *
3 | from .cosine import *
4 | from .simrank import *
5 | from .rss2 import *
6 | from .katz import *
7 | from .lhn import *
8 | 


--------------------------------------------------------------------------------
/similarity/ascos.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ASCOS similarity measure
  3 | """
  4 | #!/usr/bin/env python
  5 | #    Copyright (C) 2004-2010 by
  6 | #    Hung-Hsuan Chen <hhchen@psu.edu>
  7 | #    All rights reserved.
  8 | #    BSD license.
  9 | #    NetworkX:http://networkx.lanl.gov/.
 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
 11 | 
 12 | import copy
 13 | import math
 14 | import networkx as nx
 15 | import numpy
 16 | 
 17 | __all__ = ['ascos']
 18 | 
 19 | 
 20 | def ascos(G, c=0.9, max_iter=100, is_weighted=False, remove_neighbors=False, remove_self=False, dump_process=False):
 21 |   """Return the ASCOS similarity between nodes
 22 | 
 23 |   Parameters
 24 |   -----------
 25 |   G: graph
 26 |     A NetworkX graph
 27 |   c: float, 0 < c <= 1
 28 |     The number represents the relative importance between in-direct neighbors
 29 |     and direct neighbors
 30 |   max_iter: integer
 31 |     The number specifies the maximum number of iterations for ASCOS
 32 |     calculation
 33 |   is_weighted: boolean
 34 |     Whether use weighted ASCOS or not
 35 |   remove_neighbors: boolean
 36 |     if true, the similarity value between neighbor nodes is set to zero
 37 |   remove_self: boolean
 38 |     if true, the similarity value between a node and itself is set to zero
 39 |   dump_process: boolean
 40 |     if true, the calculation process is dumped
 41 | 
 42 |   Returns
 43 |   -------
 44 |   node_ids : list of node ids
 45 |   sim : numpy matrix
 46 |     sim[i,j] is the similarity value between node_ids[i] and node_ids[j]
 47 | 
 48 |   Examples
 49 |   --------
 50 |   >>> G = nx.Graph()
 51 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
 52 |   >>> networkx_addon.similarity.ascos(G)
 53 | 
 54 |   Notes
 55 |   -----
 56 | 
 57 |   References
 58 |   ----------
 59 |   [1] ASCOS: an Asymmetric network Structure COntext Similarity measure.
 60 |   Hung-Hsuan Chen and C. Lee Giles.  ASONAM 2013
 61 |   """
 62 | 
 63 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
 64 |     raise Exception("ascos() not defined for graphs with multiedges.")
 65 | 
 66 |   if G.is_directed():
 67 |     raise Exception("ascos() not defined for directed graphs.")
 68 | 
 69 |   node_ids = G.nodes()
 70 |   node_id_lookup_tbl = { }
 71 |   for i, n in enumerate(node_ids):
 72 |     node_id_lookup_tbl[n] = i
 73 | 
 74 |   nb_ids = [G.neighbors(n) for n in node_ids]
 75 |   nbs = [ ]
 76 |   for nb_id in nb_ids:
 77 |     nbs.append([node_id_lookup_tbl[n] for n in nb_id])
 78 |   del(node_id_lookup_tbl)
 79 | 
 80 |   n = G.number_of_nodes()
 81 |   sim = numpy.eye(n)
 82 |   sim_old = numpy.zeros(shape = (n, n))
 83 | 
 84 |   for iter_ctr in range(max_iter):
 85 |     if _is_converge(sim, sim_old, n, n):
 86 |       break
 87 |     sim_old = copy.deepcopy(sim)
 88 |     for i in range(n):
 89 |       if dump_process:
 90 |         print(iter_ctr, ':', i, '/', n)
 91 |       if is_weighted:
 92 |         w_i = G.degree(weight='weight')[node_ids[i]]
 93 |       for j in range(n):
 94 |         if not is_weighted:
 95 |           if i == j:
 96 |             continue
 97 |           s_ij = 0.0
 98 |           for n_i in nbs[i]:
 99 |             s_ij += sim_old[n_i, j]
100 |           sim[i, j] = c * s_ij / len(nbs[i]) if len(nbs[i]) > 0 else 0
101 |         else:
102 |           if i == j:
103 |             continue
104 |           s_ij = 0.0
105 |           for n_i in nbs[i]:
106 |             w_ik = G[node_ids[i]][node_ids[n_i]]['weight'] if 'weight' in G[node_ids[i]][node_ids[n_i]] else 1
107 |             s_ij += float(w_ik) * (1 - math.exp(-w_ik)) * sim_old[n_i, j]
108 | 
109 |           sim[i, j] = c * s_ij / w_i if w_i > 0 else 0
110 | 
111 |   if remove_self:
112 |     for i in range(n):
113 |       sim[i,i] = 0
114 | 
115 |   if remove_neighbors:
116 |     for i in range(n):
117 |       for j in nbs[i]:
118 |         sim[i,j] = 0
119 | 
120 |   return node_ids, sim
121 | 
122 | def _is_converge(sim, sim_old, nrow, ncol, eps=1e-4):
123 |   for i in range(nrow):
124 |     for j in range(ncol):
125 |       if abs(sim[i,j] - sim_old[i,j]) >= eps:
126 |         return False
127 |   return True
128 | 


--------------------------------------------------------------------------------
/similarity/cosine.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement cosine similarity
 3 | """
 4 | #    Copyright (C) 2004-2010 by
 5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
 6 | #    All rights reserved.
 7 | #    BSD license.
 8 | #    NetworkX:http://networkx.lanl.gov/.
 9 | import networkx as nx
10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
11 | __all__ = ['cosine']
12 | 
13 | def cosine(G, remove_neighbors=False, dump_process=False):
14 |   """Return the cosine similarity between nodes
15 | 
16 |   Parameters
17 |   -----------
18 |   G : graph
19 |     A NetworkX graph
20 |   remove_neighbors: boolean
21 |     if true, only return cosine similarity of non-neighbor nodes
22 |   dump_process: boolean
23 |     if true, the calculation process is dumped
24 | 
25 |   Returns
26 |   -------
27 |   cosine: dictionary of dictionary of double
28 |     if cosine[i][j] = k, this means the cosine similarity
29 |     between node i and node j is k
30 | 
31 |   Examples
32 |   --------
33 |   >>> G=nx.Graph()
34 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
35 |   >>> networkx_addon.similarity.cosine(G)
36 | 
37 |   Notes
38 |   -----
39 | 
40 |   References
41 |   ----------
42 |   """
43 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
44 |     raise Exception("cosine() not defined for graphs with multiedges.")
45 | 
46 |   if G.is_directed():
47 |     raise Exception("cosine() not defined for directed graphs.")
48 | 
49 |   cos = { }
50 |   total_iter = G.number_of_nodes()
51 |   for i, a in enumerate(G.nodes()):
52 |     if dump_process:
53 |       print(i+1, '/', total_iter)
54 |     for b in G.neighbors(a):
55 |       for c in G.neighbors(b):
56 |         if a == c:
57 |           continue
58 |         if remove_neighbors and c in G.neighbors(a):
59 |           continue
60 |         s1 = set(G.neighbors(a))
61 |         s2 = set(G.neighbors(c))
62 |         if a not in cos:
63 |           cos[a] = { }
64 |         cos[a][c] = float(len(s1 & s2)) / (len(s1) + len(s2))
65 | 
66 |   return cos
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/similarity/jaccard.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement jaccard similarity
 3 | """
 4 | #    Copyright (C) 2004-2010 by
 5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
 6 | #    All rights reserved.
 7 | #    BSD license.
 8 | #    NetworkX: http://networkx.lanl.gov/
 9 | 
10 | import networkx as nx
11 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
12 | __all__ = ['jaccard']
13 | 
14 | def jaccard(G, remove_neighbors=False, dump_process=False):
15 |   """Return the jaccard similarity between nodes
16 | 
17 |   Parameters
18 |   -----------
19 |   G : graph
20 |     A NetworkX graph
21 |   remove_neighbors: boolean
22 |     if true, only return jaccard similarity of non-neighbor nodes
23 |   dump_process: boolean
24 |     if true, the calculation process is dumped
25 | 
26 |   Returns
27 |   -------
28 |   jaccard: dictionary of dictionary of double
29 |     if jaccard[i][j] = k, this means the jaccard similarity
30 |     between node i and node j is k
31 | 
32 |   Examples
33 |   --------
34 |   >>> G=nx.Graph()
35 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
36 |   >>> networkx_addon.similarity.jaccard(G)
37 | 
38 |   Notes
39 |   -----
40 | 
41 |   References
42 |   ----------
43 |   """
44 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
45 |     raise Exception("jaccard() not defined for graphs with multiedges.")
46 | 
47 |   if G.is_directed():
48 |     raise Exception("jaccard() not defined for directed graphs.")
49 | 
50 |   jac = { }
51 |   total_iter = G.number_of_nodes()
52 |   for i, a in enumerate(G.nodes(), 1):
53 |     if dump_process:
54 |       print(i, '/', total_iter)
55 |     for b in G.neighbors(a):
56 |       for c in G.neighbors(b):
57 |         if a == c:
58 |           continue
59 |         if remove_neighbors and c in G.neighbors(a):
60 |           continue
61 |         s1 = set(G.neighbors(a))
62 |         s2 = set(G.neighbors(c))
63 |         if a not in jac:
64 |           jac[a] = { }
65 |         jac[a][c] = float(len(s1 & s2)) / len(s1 | s2)
66 | 
67 |   return jac
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/similarity/katz.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement katz similarity
 3 | """
 4 | #    Copyright (C) 2004-2010 by
 5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
 6 | #    All rights reserved.
 7 | #    BSD license.
 8 | #    NetworkX:http://networkx.lanl.gov/.
 9 | import networkx as nx
10 | import numpy
11 | import scipy.linalg
12 | 
13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
14 | __all__ = ['katz']
15 | 
16 | def katz(G, c=0.9, remove_neighbors=False, inv_method=0):
17 |   # TODO: remove sim scores b2n neighbors when remove_neighbors==True
18 |   """Return the katz similarity between nodes
19 | 
20 |   Parameters
21 |   -----------
22 |   G : graph
23 |     A NetworkX graph
24 |   remove_neighbors: boolean
25 |     if true, only return katz similarity of non-neighbor nodes
26 | 
27 |   Returns
28 |   -------
29 |   katz: matrix of similarity
30 |   nodelist: the node ids
31 | 
32 |   Examples
33 |   --------
34 |   >>> G=nx.Graph()
35 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
36 |   >>> networkx_addon.similarity.katz(G)
37 | 
38 |   Notes
39 |   -----
40 | 
41 |   References
42 |   ----------
43 |   """
44 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
45 |     raise Exception("katz() not defined for graphs with multiedges.")
46 | 
47 |   if G.is_directed():
48 |     raise Exception("katz() not defined for directed graphs.")
49 | 
50 |   A = nx.adjacency_matrix(G, nodelist=G.nodes(), weight=None)
51 |   w, v = numpy.linalg.eigh(A)
52 |   lambda1 = max([abs(x) for x in w])
53 |   I = numpy.eye(A.shape[0])
54 |   S = None
55 |   if inv_method == 1:
56 |     S = scipy.linalg.pinv(I - c/lambda1 * A)
57 |   elif inv_method == 2:
58 |     S = numpy.linalg.inv(I - c/lambda1 * A)
59 |   else:
60 |     S = numpy.linalg.pinv(I - c/lambda1 * A)
61 |   return S, G.nodes()
62 | 
63 | 


--------------------------------------------------------------------------------
/similarity/lhn.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement LHN similarity
 3 | """
 4 | #    Copyright (C) 2004-2010 by
 5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
 6 | #    All rights reserved.
 7 | #    BSD license.
 8 | #    NetworkX:http://networkx.lanl.gov/.
 9 | import networkx as nx
10 | import numpy
11 | import scipy.linalg
12 | 
13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
14 | __all__ = ['lhn']
15 | 
16 | def lhn(G, c=0.9, remove_neighbors=False, inv_method=0):
17 |   # TODO: remove sim scores b2n neighbors when remove_neighbors==True
18 |   """Return the LHN similarity between nodes
19 | 
20 |   Parameters
21 |   -----------
22 |   G : graph
23 |     A NetworkX graph
24 |   remove_neighbors: boolean
25 |     if true, only return LHN similarity of non-neighbor nodes
26 | 
27 |   Returns
28 |   -------
29 |   S: matrix of similarity
30 |   nodelist: the node ids
31 | 
32 |   Examples
33 |   --------
34 |   >>> G=nx.Graph()
35 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
36 |   >>> networkx_addon.similarity.lhn(G)
37 | 
38 |   Notes
39 |   -----
40 | 
41 |   References
42 |   ----------
43 |   """
44 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
45 |     raise Exception("lhn() not defined for graphs with multiedges.")
46 | 
47 |   if G.is_directed():
48 |     raise Exception("lhn() not defined for directed graphs.")
49 | 
50 |   A = nx.adjacency_matrix(G, nodelist=G.nodes(), weight=None)
51 |   w, v = numpy.linalg.eigh(A)
52 |   lambda1 = max([abs(x) for x in w])
53 |   I = numpy.eye(A.shape[0])
54 |   S = None
55 |   if inv_method == 1:
56 |     S = scipy.linalg.pinv(I - c/lambda1 * A)
57 |   elif inv_method == 2:
58 |     S = numpy.linalg.inv(I - c/lambda1 * A)
59 |   else:
60 |     S = numpy.linalg.pinv(I - c/lambda1 * A)
61 |   deg = numpy.array(sum(A)).reshape(-1,)
62 |   for i in range(S.shape[0]):
63 |     for j in range(S.shape[1]):
64 |       S[i,j] /= (deg[i]*deg[j])
65 |   return S, G.nodes()
66 | 
67 | 


--------------------------------------------------------------------------------
/similarity/rss2.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implement rss2 similarity
 3 | """
 4 | #    Copyright (C) 2004-2010 by
 5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
 6 | #    All rights reserved.
 7 | #    BSD license.
 8 | #    NetworkX:http://networkx.lanl.gov/.
 9 | import networkx as nx
10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
11 | __all__ = ['rss2']
12 | 
13 | def rss2(G, remove_neighbors=False, dump_process=False, disregard_weight=False):
14 |   """Return the rss2 similarity between nodes
15 | 
16 |   Parameters
17 |   -----------
18 |   G : graph
19 |     A NetworkX graph
20 |   remove_neighbors: boolean
21 |     if true, only return rss2 similarity of non-neighbor nodes
22 |   dump_process: boolean
23 |     if true, the calculation process is dumped
24 |   disregard_weight: boolean
25 |     if true, the edge weight is ignored
26 | 
27 |   Returns
28 |   -------
29 |   rss2: dictionary of dictionary of double
30 |     if rss2[i][j] = k, this means the rss2 similarity
31 |     between node i and node j is k
32 | 
33 |   Examples
34 |   --------
35 |   >>> G=nx.Graph()
36 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)], weight=1)
37 |   >>> networkx_addon.similarity.rss2(G)
38 | 
39 |   Notes
40 |   -----
41 | 
42 |   References
43 |   ----------
44 |   """
45 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
46 |     raise Exception("rss2() not defined for graphs with multiedges.")
47 | 
48 |   if G.is_directed():
49 |     raise Exception("rss2() not defined for directed graphs.")
50 | 
51 |   weighted_deg = G.degree(weight='weight')
52 |   rss2 = { }
53 |   cur_iter = 0
54 |   total_iter = G.number_of_nodes()
55 |   for a in G.nodes():
56 |     if dump_process:
57 |       cur_iter += 1
58 |       print(cur_iter, '/', total_iter)
59 |     for b in G.neighbors(a):
60 |       for c in G.neighbors(b):
61 |         if a == c:
62 |           continue
63 |         if remove_neighbors and c in G.neighbors(a):
64 |           continue
65 |         if disregard_weight:
66 |           t1 = float(1)
67 |           t2 = float(1)
68 |           s1 = len(set(G.neighbors(a)))
69 |           s2 = len(set(G.neighbors(b)))
70 |         else:
71 |           t1 = float(G[a][b]['weight'])
72 |           t2 = float(G[b][c]['weight'])
73 |           s1 = weighted_deg[a]
74 |           s2 = weighted_deg[b]
75 |         if a not in rss2:
76 |           rss2[a] = { }
77 |         if c not in rss2[a]:
78 |           rss2[a][c] = t1/s1 * t2/s2
79 |         else:
80 |           rss2[a][c] += t1/s1 * t2/ s2
81 |   return rss2
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/similarity/simrank.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implement SimRank similarity
  3 | """
  4 | #    Copyright (C) 2004-2010 by
  5 | #    Hung-Hsuan Chen <hhchen@psu.edu>
  6 | #    All rights reserved.
  7 | #    BSD license.
  8 | #    NetworkX:http://networkx.lanl.gov/.
  9 | import copy
 10 | import sys
 11 | import networkx as nx
 12 | from collections import defaultdict
 13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)"""
 14 | __all__ = ['simrank']
 15 | 
 16 | def simrank(G, c=0.9, max_iter=100, remove_neighbors=False, remove_self=False, dump_process=False):
 17 |   """Return the SimRank similarity between nodes
 18 | 
 19 |   Parameters
 20 |   -----------
 21 |   G : graph
 22 |     A NetworkX graph
 23 |   c : float, 0 < c <= 1
 24 |     The number represents the relative importance between in-direct neighbors
 25 |     and direct neighbors
 26 |   max_iter : integer
 27 |     The number specifies the maximum number of iterations for simrank
 28 |     calculation
 29 |   remove_neighbors: boolean
 30 |     if true, the similarity value between neighbor nodes is set to zero
 31 |   remove_self : boolean
 32 |     if true, the similarity value between a node and itself is set to zero
 33 |   dump_process: boolean
 34 |     if true, the calculation process is dumped
 35 | 
 36 |   Returns
 37 |   -------
 38 |   simrank: dictionary of dictionary of double
 39 |     if simrank[i][j] = k, this means the SimRank similarity
 40 |     between node i and node j is k
 41 | 
 42 |   Examples
 43 |   --------
 44 |   >>> G=nx.Graph()
 45 |   >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)])
 46 |   >>> networkx_addon.similarity.simrank(G)
 47 | 
 48 |   Notes
 49 |   -----
 50 | 
 51 |   References
 52 |   ----------
 53 |   [1] G. Jeh and J. Widom.
 54 |   SimRank: a measure of structural-context similarity.
 55 |   In KDD'02 pages 538-543. ACM Press, 2002.
 56 |   """
 57 |   if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph:
 58 |     raise Exception("simrank() not defined for graphs with multiedges.")
 59 | 
 60 |   if G.is_directed():
 61 |     raise Exception("simrank() not defined for directed graphs.")
 62 | 
 63 |   sim_old = defaultdict(list)
 64 |   sim = defaultdict(list)
 65 |   for n in G.nodes():
 66 |     sim[n] = defaultdict(int)
 67 |     sim[n][n] = 1
 68 |     sim_old[n] = defaultdict(int)
 69 |     sim_old[n][n] = 0
 70 | 
 71 |   # calculate simrank
 72 |   for iter_ctr in range(max_iter):
 73 |     if _is_converge(sim, sim_old):
 74 |       break
 75 |     sim_old = copy.deepcopy(sim)
 76 |     for i, u in enumerate(G.nodes()):
 77 |       if dump_process:
 78 |         sys.stdout.write("\r%d : % d / %d" % (iter_ctr, i, G.number_of_nodes()))
 79 |       for v in G.nodes():
 80 |         if u == v:
 81 |           continue
 82 |         s_uv = 0.0
 83 |         for n_u in G.neighbors(u):
 84 |           for n_v in G.neighbors(v):
 85 |             s_uv += sim_old[n_u][n_v]
 86 |         sim[u][v] = (c * s_uv / (len(list(G.neighbors(u))) * len(list(G.neighbors(v))))) \
 87 |             if len(list(G.neighbors(u))) * len(list(G.neighbors(v))) > 0 else 0
 88 |     if dump_process:
 89 |       print('')
 90 | 
 91 |   if remove_self:
 92 |     for m in G.nodes():
 93 |       G[m][m] = 0
 94 | 
 95 |   if remove_neighbors:
 96 |     for m in G.nodes():
 97 |       for n in G.neighbors(m):
 98 |         sim[m][n] = 0
 99 | 
100 |   return sim
101 | 
102 | def _is_converge(s1, s2, eps=1e-4):
103 |   for i in s1.keys():
104 |     for j in s1[i].keys():
105 |       if abs(s1[i][j] - s2[i][j]) >= eps:
106 |         return False
107 |   return True
108 | 
109 | 
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/similarity/tests/test_ascos.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import math
  4 | import networkx
  5 | import numpy
  6 | import os
  7 | import sys
  8 | 
  9 | import nose.tools as nt
 10 | 
 11 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 12 | from similarity import ascos
 13 | 
 14 | class TestAscos():
 15 |   def test_ascos(self):
 16 |     G = networkx.Graph()
 17 |     G.add_edge(1,2)
 18 |     G.add_edge(1,4)
 19 |     G.add_edge(1,5)
 20 |     G.add_edge(1,6)
 21 |     G.add_edge(2,3)
 22 | 
 23 |     node_ids, sim = ascos(G)
 24 |     nt.assert_equal(len(node_ids), 6)
 25 |     nt.assert_equal(sim.shape, (6, 6))
 26 |     sim_ans = numpy.matrix((
 27 |         '1      0.5732 0.3474 0.5296 0.5296 0.5296;'
 28 |         '0.7563 1      0.6063 0.4005 0.4005 0.4005;'
 29 |         '0.6807 0.9000 1      0.3604 0.3604 0.3604;'
 30 |         '0.9000 0.5159 0.3126 1      0.4766 0.4766;'
 31 |         '0.9000 0.5159 0.3126 0.4766 1      0.4766;'
 32 |         '0.9000 0.5159 0.3126 0.4766 0.4766 1'))
 33 |     for i in range(sim.shape[0]):
 34 |       for j in range(sim.shape[1]):
 35 |         nt.assert_almost_equal(sim[i,j], sim_ans[i,j], 4)
 36 | 
 37 |   def test_weighted_ascos(self):
 38 |     G = networkx.Graph()
 39 |     G.add_edge('a', 'b', weight=1)
 40 |     node_ids, sim = ascos(G, is_weighted=True)
 41 |     for i in range(sim.shape[0]):
 42 |       for j in range(sim.shape[1]):
 43 |         if i == j:
 44 |           nt.assert_equal(sim[i, j], 1)
 45 |         else:
 46 |           nt.assert_almost_equal(sim[i,j], .9 * (1 - math.exp(-1)), 4)
 47 | 
 48 |     G['a']['b']['weight'] = 100
 49 |     node_ids, sim = ascos(G, is_weighted=True)
 50 |     for i in range(sim.shape[0]):
 51 |       for j in range(sim.shape[1]):
 52 |         if i == j:
 53 |           nt.assert_equal(sim[i, j], 1)
 54 |         else:
 55 |           nt.assert_almost_equal(sim[i,j], .9 * (1 - math.exp(-100)), 4)
 56 | 
 57 |     G = networkx.Graph()
 58 |     G.add_edge('a', 'b', weight=1)
 59 |     G.add_edge('b', 'c', weight=1)
 60 |     node_ids, sim = ascos(G, is_weighted=True)
 61 |     sim_ans = numpy.matrix((
 62 |         '1 0.1931 .5689;'
 63 |         '0.1931 1 0.5689;'
 64 |         '0.3394 0.3394 1'))
 65 |     for i in range(sim.shape[0]):
 66 |       for j in range(sim.shape[1]):
 67 |         nt.assert_almost_equal(sim[i, j], sim_ans[i, j], 4)
 68 | 
 69 |     G = networkx.Graph()
 70 |     G.add_edge('a', 'b', weight=1)
 71 |     G.add_edge('b', 'c', weight=10)
 72 |     node_ids, sim = ascos(G, is_weighted=True)
 73 |     sim_ans = numpy.matrix((
 74 |         '1 0.4796 0.5689;'
 75 |         '0.1762 1 0.9000;'
 76 |         '0.1959 0.8429 1'))
 77 |     for i in range(sim.shape[0]):
 78 |       for j in range(sim.shape[1]):
 79 |         nt.assert_almost_equal(sim[i, j], sim_ans[i, j], 4)
 80 | 
 81 |     G = networkx.Graph()
 82 |     G.add_edge(1,2)
 83 |     G.add_edge(1,4, weight=2)
 84 |     G.add_edge(1,5)
 85 |     G.add_edge(1,6)
 86 |     G.add_edge(2,3)
 87 | 
 88 |     node_ids, sim = ascos(G, is_weighted=True)
 89 |     nt.assert_equal(len(node_ids), 6)
 90 |     nt.assert_equal(sim.shape, (6, 6))
 91 |     sim_ans = numpy.matrix((
 92 |         '1      0.1810 0.0543 0.3742 0.1738 0.1738;'
 93 |         '0.3394 1      0.2999 0.1270 0.0590 0.0590;'
 94 |         '0.1931 0.5689 1      0.0722 0.0335 0.0335;'
 95 |         '0.7782 0.1409 0.0422 1      0.1353 0.1353;'
 96 |         '0.5689 0.1030 0.0308 0.2129 1      0.0989;'
 97 |         '0.5689 0.1030 0.0308 0.2129 0.0989 1'))
 98 |     for i in range(sim.shape[0]):
 99 |       for j in range(sim.shape[1]):
100 |         nt.assert_almost_equal(sim[i,j], sim_ans[i,j], 4)
101 | 
102 | 
103 | 


--------------------------------------------------------------------------------
/similarity/tests/test_cos_sim.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | import nose.tools as ns
 5 | import networkx
 6 | 
 7 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 8 | from similarity import cosine
 9 | 
10 | class TestCosSim:
11 |   def setUp(self):
12 |     G=networkx.Graph()
13 |     G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6)])
14 |     self.G = G
15 |     self.G.cos_sim = { }
16 |     self.G.cos_sim[0] = {1:0.5, 3:0.3333, 4:0.3333}
17 |     self.G.cos_sim[1] = {0:0.5, 3:0.3333, 4:0.3333}
18 |     self.G.cos_sim[2] = {5:0.2857}
19 |     self.G.cos_sim[3] = {0:0.3333, 1:0.3333, 4:0.5, 6:0.3333}
20 |     self.G.cos_sim[4] = {0:0.3333, 1:0.3333, 3:0.5, 6:0.3333}
21 |     self.G.cos_sim[5] = {2:0.2857}
22 |     self.G.cos_sim[6] = {3:0.3333, 4:0.3333}
23 | 
24 |   def test_cosine(self):
25 |     G = self.G
26 |     cos = cosine(G)
27 |     ns.assert_equal(len(cos), 7)
28 |     for i in range(7):
29 |       assert(i in cos)
30 |     for i in self.G.cos_sim.keys():
31 |       ns.assert_equal(len(self.G.cos_sim[i]), len(cos[i]))
32 |       for j in self.G.cos_sim[i].keys():
33 |         ns.assert_almost_equal(cos[i][j], self.G.cos_sim[i][j], places=4)
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/similarity/tests/test_jaccard.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | import nose.tools as nt
 6 | import networkx
 7 | 
 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 9 | from similarity import jaccard
10 | 
11 | class TestJaccard:
12 |   def setUp(self):
13 |     G=networkx.Graph()
14 |     G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6)])
15 |     self.G=G
16 |     self.G.jaccard = { }
17 |     self.G.jaccard[0] = {1:1, 3:0.5, 4:0.5}
18 |     self.G.jaccard[1] = {0:1, 3:0.5, 4:0.5}
19 |     self.G.jaccard[2] = {5:0.4}
20 |     self.G.jaccard[3] = {0:0.5, 1:0.5, 4:1, 6:0.5}
21 |     self.G.jaccard[4] = {0:0.5, 1:0.5, 3:1, 6:0.5}
22 |     self.G.jaccard[5] = {2:0.4}
23 |     self.G.jaccard[6] = {3:0.5, 4:0.5}
24 | 
25 |   def test_jaccard(self):
26 |     G = self.G
27 |     jac = jaccard(G)
28 |     nt.assert_equal(len(jac), 7)
29 |     for i in range(7):
30 |       assert(i in jac)
31 |     for i in self.G.jaccard.keys():
32 |       nt.assert_equal(len(self.G.jaccard[i]), len(jac[i]))
33 |       for j in self.G.jaccard[i].keys():
34 |         nt.assert_almost_equal(jac[i][j], self.G.jaccard[i][j], places=4)
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/similarity/tests/test_katz.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import nose.tools as nt
 3 | import networkx
 4 | import numpy
 5 | import os
 6 | import sys
 7 | 
 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 9 | from similarity import katz
10 | 
11 | class TestKatzSim:
12 |   def setUp(self):
13 |     G=networkx.Graph()
14 |     G.add_edges_from([(0,1), (0,3), (0,4), (0,5), (1,2)])
15 |     self.G=G
16 |     self.G.katz_sim = numpy.matrix([\
17 |         [4.9178, 2.6286, 1.1405, 2.1337, 2.1337, 2.1337], \
18 |         [2.6286, 2.6369, 1.1441, 1.1405, 1.1405, 1.1405], \
19 |         [1.1405, 1.1441, 1.4964, 0.4948, 0.4948, 0.4948], \
20 |         [2.1337, 1.1405, 0.4948, 1.9258, 0.9258, 0.9258], \
21 |         [2.1337, 1.1405, 0.4948, 0.9258, 1.9258, 0.9258], \
22 |         [2.1337, 1.1405, 0.4948, 0.9258, 0.9258, 1.9258]
23 |     ])
24 | 
25 |   def test_katz_sim(self):
26 |     G = self.G
27 |     katz_sim, nodelist = katz(G)
28 |     nt.assert_equal(len(nodelist), 6)
29 |     for i in range(6):
30 |       nt.assert_true(i in nodelist)
31 |     nt.assert_equal(len(katz_sim), 6)
32 |     for i in range(self.G.katz_sim.shape[0]):
33 |       for j in range(self.G.katz_sim.shape[1]):
34 |         print i, ',', j
35 |         nt.assert_almost_equal(self.G.katz_sim[i,j], katz_sim[i,j], places=4)
36 | 
37 | 


--------------------------------------------------------------------------------
/similarity/tests/test_lhn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import nose.tools as nt
 3 | import networkx
 4 | import numpy
 5 | import os
 6 | import sys
 7 | 
 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 9 | from similarity import lhn
10 | 
11 | class TestKatzSim:
12 |   def setUp(self):
13 |     G=networkx.Graph()
14 |     G.add_edges_from([(0,1), (0,3), (0,4), (0,5), (1,2)])
15 |     self.G=G
16 |     self.G.lhn_sim = numpy.matrix([\
17 |         [0.3074, 0.3286, 0.2851, 0.5334, 0.5334, 0.5334], \
18 |         [0.3286, 0.6592, 0.5720, 0.5702, 0.5702, 0.5702], \
19 |         [0.2851, 0.5720, 1.4964, 0.4948, 0.4948, 0.4948], \
20 |         [0.5334, 0.5702, 0.4948, 1.9258, 0.9258, 0.9258], \
21 |         [0.5334, 0.5702, 0.4948, 0.9258, 1.9258, 0.9258], \
22 |         [0.5334, 0.5702, 0.4948, 0.9258, 0.9258, 1.9258]
23 |     ])
24 | 
25 |   def test_lhn_sim(self):
26 |     G = self.G
27 |     lhn_sim, nodelist = lhn(G)
28 |     nt.assert_equal(len(nodelist), 6)
29 |     for i in range(6):
30 |       nt.assert_true(i in nodelist)
31 |     nt.assert_equal(len(lhn_sim), 6)
32 |     for i in range(self.G.lhn_sim.shape[0]):
33 |       for j in range(self.G.lhn_sim.shape[1]):
34 |         print i, ',', j
35 |         nt.assert_almost_equal(self.G.lhn_sim[i,j], lhn_sim[i,j], places=4)
36 | 
37 | 


--------------------------------------------------------------------------------
/similarity/tests/test_rss2.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import nose.tools as nt
 4 | import networkx
 5 | import os
 6 | import sys
 7 | 
 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 9 | from similarity import rss2
10 | 
11 | class TestRss2:
12 |   def setUp(self):
13 |     G = networkx.Graph()
14 |     G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6),(3,7)])
15 |     self.G = G
16 |     self.G.rss2_sim = { }
17 |     self.G.rss2_sim[0] = {1:0.25, 3:0.25, 4:0.25}
18 |     self.G.rss2_sim[1] = {0:0.25, 3:0.25, 4:0.25}
19 |     self.G.rss2_sim[2] = {5:0.2083, 7:0.0833}
20 |     self.G.rss2_sim[3] = {0:0.0833, 1:0.0833, 4:0.1944, 6:0.1111}
21 |     self.G.rss2_sim[4] = {0:0.125, 1:0.125, 3:0.2917, 6:0.1667}
22 |     self.G.rss2_sim[5] = {2:0.2778, 7:0.1111}
23 |     self.G.rss2_sim[6] = {3:0.3333, 4:0.3333}
24 |     self.G.rss2_sim[7] = {2:0.3333, 5:0.3333}
25 | 
26 |     H = networkx.Graph()
27 |     H.add_edges_from([(0,1,{'weight':2}), (1,2,{'weight':1}), \
28 |         (0,3,{'weight':1}), (3,2,{'weight':3}), (2,4,{'weight':4})])
29 |     self.H = H
30 |     self.H.rss2_sim = { }
31 |     self.H.rss2_sim[0] = {2:0.4722}
32 |     self.H.rss2_sim[1] = {3:0.3472, 4:0.1667}
33 |     self.H.rss2_sim[2] = {0:0.1771}
34 |     self.H.rss2_sim[3] = {1:0.2604, 4:0.375}
35 |     self.H.rss2_sim[4] = {1:0.125, 3:0.375}
36 | 
37 |   def test_rss2_sim_no_weight(self):
38 |     G = self.G
39 |     rss2_sim = rss2(G, disregard_weight=True)
40 |     nt.assert_equal(len(rss2_sim), 8)
41 |     for i in range(8):
42 |       assert(i in rss2_sim)
43 |     for i in self.G.rss2_sim.keys():
44 |       nt.assert_equal(len(self.G.rss2_sim[i]), len(rss2_sim[i]))
45 |       for j in self.G.rss2_sim[i].keys():
46 |         nt.assert_almost_equal(rss2_sim[i][j], self.G.rss2_sim[i][j], places=4)
47 | 
48 |   def test_rss2_sim_with_weight(self):
49 |     H = self.H
50 |     rss2_sim = rss2(H)
51 |     nt.assert_equal(len(rss2_sim), 5)
52 |     for i in range(5):
53 |       assert(i in rss2_sim)
54 |     for i in self.H.rss2_sim.keys():
55 |       nt.assert_equal(len(self.H.rss2_sim[i]), len(rss2_sim[i]))
56 |       for j in self.H.rss2_sim[i].keys():
57 |         print i, ',', j
58 |         nt.assert_almost_equal(rss2_sim[i][j], self.H.rss2_sim[i][j], places=4)
59 | 
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/similarity/tests/test_simrank.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import nose.tools as nt
 3 | import networkx
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..')))
 8 | from similarity import simrank
 9 | 
10 | class TestSimRank:
11 |   def setUp(self):
12 |     G = networkx.Graph()
13 |     G.add_edges_from([(0,1),(1,2),(0,2)])
14 |     self.G = G
15 |     self.G.simrank = { }
16 |     self.G.simrank[0] = {0:1, 1:0.6921, 2:0.6921}
17 |     self.G.simrank[1] = {0:0.6921, 1:1, 2:0.6921}
18 |     self.G.simrank[2] = {0:0.6921, 1:0.6921, 2:1}
19 | 
20 |     H = networkx.Graph()
21 |     H.add_edges_from([(0,1),(0,2),(1,2),(2,3)])
22 |     self.H = H
23 |     self.H.simrank = { }
24 |     self.H.simrank[0] = {0:1, 1:0.6538, 2:0.6261, 3:0.7317}
25 |     self.H.simrank[1] = {0:0.6538, 1:1, 2:0.6261, 3:0.7317}
26 |     self.H.simrank[2] = {0:0.6261, 1:0.6261, 2:1, 3:0.5365}
27 |     self.H.simrank[3] = {0:0.7317, 1:0.7317, 2:0.5365, 3:1}
28 | 
29 |     I = networkx.Graph()
30 |     I.add_edges_from([(0,1), (1,2), (2,0)])
31 |     I.add_node(3)
32 |     self.I = I
33 |     self.I.simrank = { }
34 |     self.I.simrank[0] = {0:1, 1:0.6921, 2:0.6921, 3:0}
35 |     self.I.simrank[1] = {0:0.6921, 1:1, 2:0.6921, 3:0}
36 |     self.I.simrank[2] = {0:0.6921, 1:0.6921, 2:1, 3:0}
37 |     self.I.simrank[3] = {0:0, 1:0, 2:0, 3:1}
38 | 
39 |   def test_simrank(self):
40 |     # test graph G
41 |     G = self.G
42 |     sim = simrank(G, remove_neighbors=False, remove_self=False)
43 |     nt.assert_equal(len(sim), 3)
44 |     for i in range(3):
45 |       nt.assert_in(i, sim)
46 |     for i in self.G.simrank.keys():
47 |       nt.assert_equal(len(self.G.simrank[i]), len(sim[i]))
48 |       for j in self.G.simrank[i].keys():
49 |         nt.assert_almost_equal(sim[i][j], self.G.simrank[i][j], places=4)
50 |     # test graph H
51 |     H = self.H
52 |     sim = simrank(H, remove_neighbors=False, remove_self=False)
53 |     nt.assert_equal(len(sim), 4)
54 |     for i in range(4):
55 |       nt.assert_in(i, sim)
56 |     for i in self.H.simrank.keys():
57 |       nt.assert_equal(len(self.H.simrank[i]), len(sim[i]))
58 |       for j in self.H.simrank[i].keys():
59 |         nt.assert_almost_equal(sim[i][j], self.H.simrank[i][j], places=4)
60 | 
61 |   def test_simrank_disregard_nb(self):
62 |     # test graph G
63 |     G = self.G
64 |     sim = simrank(G, remove_neighbors=False, remove_self=False)
65 |     nt.assert_equal(len(sim), 3)
66 |     for i in range(3):
67 |       nt.assert_in(i, sim)
68 |     for i in self.G.simrank.keys():
69 |       nt.assert_equal(len(self.G.simrank[i]), len(sim[i]))
70 |       for j in self.G.simrank[i].keys():
71 |         nt.assert_almost_equal(sim[i][j], self.G.simrank[i][j], places=4)
72 |     # test graph H
73 |     H = self.H
74 |     sim = simrank(H, remove_neighbors=False, remove_self=False)
75 |     nt.assert_equal(len(sim), 4)
76 |     for i in range(4):
77 |       nt.assert_in(i, sim)
78 |     for i in self.H.simrank.keys():
79 |       nt.assert_equal(len(self.H.simrank[i]), len(sim[i]))
80 |       for j in self.H.simrank[i].keys():
81 |         nt.assert_almost_equal(sim[i][j], self.H.simrank[i][j], places=4)
82 | 
83 |   def test_graph_with_orphan(self):
84 |     I = self.I
85 |     sim = simrank(I, remove_neighbors=False, remove_self=False)
86 |     nt.assert_equal(len(sim), 4)
87 |     for i in range(4):
88 |       nt.assert_in(i, sim)
89 |     for i in self.I.simrank.keys():
90 |       nt.assert_equal(len(self.I.simrank[i]), len(sim[i]))
91 |       for j in self.I.simrank[i].keys():
92 |         nt.assert_almost_equal(sim[i][j], self.I.simrank[i][j], places=4)
93 | 
94 | 
95 | 


--------------------------------------------------------------------------------