├── .gitignore ├── README.md ├── __init__.py ├── information_propagation ├── __init__.py ├── independent_cascade.py ├── linear_threshold.py └── tests │ ├── test_independent_cascade.py │ └── test_linear_threshold.py └── similarity ├── __init__.py ├── ascos.py ├── cosine.py ├── jaccard.py ├── katz.py ├── lhn.py ├── rss2.py ├── simrank.py └── tests ├── test_ascos.py ├── test_cos_sim.py ├── test_jaccard.py ├── test_katz.py ├── test_lhn.py ├── test_rss2.py └── test_simrank.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | 21 | # Installer logs 22 | pip-log.txt 23 | 24 | # Unit test / coverage reports 25 | .coverage 26 | .tox 27 | nosetests.xml 28 | 29 | # Translations 30 | *.mo 31 | 32 | # Mr Developer 33 | .mr.developer.cfg 34 | .project 35 | .pydevproject 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | networkx_addon 2 | ============== 3 | 4 | ***CAUTION*** 5 | This project was developed in 2013, when Python 2.x and networkx 1.x were still popular. 6 | 7 | If you want to use this library on later Python (e.g., Python 3.x) and later networkx (e.g., networkx 2.x), you'll need to modify the code. 8 | 9 | ### Some add-on modules to networkx library 10 | 11 | 1. Information propagation models 12 | (1) independent cascade model 13 | (2) linear threshold model 14 | 15 | 2. Vertex similarity measures 16 | (1) ASCOS (for both weighted and unweighted network) 17 | (2) Jaccard 18 | (3) Cosine 19 | (4) SimRank 20 | (5) RSS (r=2) 21 | (6) Katz 22 | (7) LHN 23 | 24 | ### Dependent packages 25 | * numpy 26 | * scipy 27 | * networkx 28 | 29 | ### How to use it 30 | Put the "networkx_addon/" folder inside your source directory 31 | 32 | ### Sample usage 33 | 34 | 1. network propagation 35 | 36 | ```Python 37 | >>> import networkx 38 | >>> import networkx_addon 39 | >>> G = networkx.DiGraph() 40 | >>> G.add_edge(1,2,act_prob=.5) 41 | >>> G.add_edge(2,1,act_prob=.5) 42 | >>> G.add_edge(1,3,act_prob=.2) 43 | >>> G.add_edge(3,1,act_prob=.2) 44 | >>> G.add_edge(2,3,act_prob=.3) 45 | >>> networkx_addon.information_propagation.independent_cascade(G, [1], steps=2) 46 | ``` 47 | 48 | 2. network similarity 49 | 50 | ```Python 51 | >>> import networkx 52 | >>> import networkx_addon 53 | >>> G = networkx.Graph() 54 | >>> G.add_edges_from([(0,1),(1,2),(0,2)]) 55 | ``` 56 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import information_propagation 2 | from . import similarity 3 | -------------------------------------------------------------------------------- /information_propagation/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_threshold import * 2 | from .independent_cascade import * 3 | -------------------------------------------------------------------------------- /information_propagation/independent_cascade.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement independent cascade model 3 | """ 4 | #!/usr/bin/env python 5 | # Copyright (C) 2004-2010 by 6 | # Hung-Hsuan Chen 7 | # All rights reserved. 8 | # BSD license. 9 | # NetworkX:http://networkx.lanl.gov/. 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | 12 | import copy 13 | import networkx as nx 14 | import random 15 | 16 | __all__ = ['independent_cascade'] 17 | 18 | def independent_cascade(G, seeds, steps=0): 19 | """Return the active nodes of each diffusion step by the independent cascade 20 | model 21 | 22 | Parameters 23 | ----------- 24 | G : graph 25 | A NetworkX graph 26 | seeds : list of nodes 27 | The seed nodes for diffusion 28 | steps: integer 29 | The number of steps to diffuse. If steps <= 0, the diffusion runs until 30 | no more nodes can be activated. If steps > 0, the diffusion runs for at 31 | most "steps" rounds 32 | 33 | Returns 34 | ------- 35 | layer_i_nodes : list of list of activated nodes 36 | layer_i_nodes[0]: the seeds 37 | layer_i_nodes[k]: the nodes activated at the kth diffusion step 38 | 39 | Notes 40 | ----- 41 | When node v in G becomes active, it has a *single* chance of activating 42 | each currently inactive neighbor w with probability p_{vw} 43 | 44 | Examples 45 | -------- 46 | >>> DG = nx.DiGraph() 47 | >>> DG.add_edges_from([(1,2), (1,3), (1,5), (2,1), (3,2), (4,2), (4,3), \ 48 | >>> (4,6), (5,3), (5,4), (5,6), (6,4), (6,5)], act_prob=0.2) 49 | >>> layers = networkx_addon.information_propagation.independent_cascade(DG, [6]) 50 | 51 | References 52 | ---------- 53 | [1] David Kempe, Jon Kleinberg, and Eva Tardos. 54 | Influential nodes in a diffusion model for social networks. 55 | In Automata, Languages and Programming, 2005. 56 | """ 57 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 58 | raise Exception( \ 59 | "independent_cascade() is not defined for graphs with multiedges.") 60 | 61 | # make sure the seeds are in the graph 62 | for s in seeds: 63 | if s not in G.nodes(): 64 | raise Exception("seed", s, "is not in graph") 65 | 66 | # change to directed graph 67 | if not G.is_directed(): 68 | DG = G.to_directed() 69 | else: 70 | DG = copy.deepcopy(G) 71 | 72 | # init activation probabilities 73 | for e in DG.edges(): 74 | if 'act_prob' not in DG[e[0]][e[1]]: 75 | DG[e[0]][e[1]]['act_prob'] = 0.1 76 | elif DG[e[0]][e[1]]['act_prob'] > 1: 77 | raise Exception("edge activation probability:", \ 78 | DG[e[0]][e[1]]['act_prob'], "cannot be larger than 1") 79 | 80 | # perform diffusion 81 | A = copy.deepcopy(seeds) # prevent side effect 82 | if steps <= 0: 83 | # perform diffusion until no more nodes can be activated 84 | return _diffuse_all(DG, A) 85 | # perform diffusion for at most "steps" rounds 86 | return _diffuse_k_rounds(DG, A, steps) 87 | 88 | def _diffuse_all(G, A): 89 | tried_edges = set() 90 | layer_i_nodes = [ ] 91 | layer_i_nodes.append([i for i in A]) # prevent side effect 92 | while True: 93 | len_old = len(A) 94 | (A, activated_nodes_of_this_round, cur_tried_edges) = \ 95 | _diffuse_one_round(G, A, tried_edges) 96 | layer_i_nodes.append(activated_nodes_of_this_round) 97 | tried_edges = tried_edges.union(cur_tried_edges) 98 | if len(A) == len_old: 99 | break 100 | return layer_i_nodes 101 | 102 | def _diffuse_k_rounds(G, A, steps): 103 | tried_edges = set() 104 | layer_i_nodes = [ ] 105 | layer_i_nodes.append([i for i in A]) 106 | while steps > 0 and len(A) < len(G): 107 | len_old = len(A) 108 | (A, activated_nodes_of_this_round, cur_tried_edges) = \ 109 | _diffuse_one_round(G, A, tried_edges) 110 | layer_i_nodes.append(activated_nodes_of_this_round) 111 | tried_edges = tried_edges.union(cur_tried_edges) 112 | if len(A) == len_old: 113 | break 114 | steps -= 1 115 | return layer_i_nodes 116 | 117 | def _diffuse_one_round(G, A, tried_edges): 118 | activated_nodes_of_this_round = set() 119 | cur_tried_edges = set() 120 | for s in A: 121 | for nb in G.successors(s): 122 | if nb in A or (s, nb) in tried_edges or (s, nb) in cur_tried_edges: 123 | continue 124 | if _prop_success(G, s, nb): 125 | activated_nodes_of_this_round.add(nb) 126 | cur_tried_edges.add((s, nb)) 127 | activated_nodes_of_this_round = list(activated_nodes_of_this_round) 128 | A.extend(activated_nodes_of_this_round) 129 | return A, activated_nodes_of_this_round, cur_tried_edges 130 | 131 | def _prop_success(G, src, dest): 132 | return random.random() <= G[src][dest]['act_prob'] 133 | 134 | 135 | -------------------------------------------------------------------------------- /information_propagation/linear_threshold.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement linear threshold models 3 | """ 4 | #!/usr/bin/env python 5 | # Copyright (C) 2004-2010 by 6 | # Hung-Hsuan Chen 7 | # All rights reserved. 8 | # BSD license. 9 | # NetworkX:http://networkx.lanl.gov/. 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | 12 | import copy 13 | import networkx as nx 14 | 15 | __all__ = ['linear_threshold'] 16 | 17 | def linear_threshold(G, seeds, steps=0): 18 | """Return the active nodes of each diffusion step by linear threshold model 19 | 20 | Parameters 21 | ---------- 22 | G : networkx graph 23 | The number of nodes. 24 | 25 | seeds: list of nodes 26 | The seed nodes of the graph 27 | 28 | steps: int 29 | The number of steps to diffuse 30 | When steps <= 0, the model diffuses until no more nodes 31 | can be activated 32 | 33 | Return 34 | ------ 35 | layer_i_nodes : list of list of activated nodes 36 | layer_i_nodes[0]: the seeds 37 | layer_i_nodes[k]: the nodes activated at the kth diffusion step 38 | 39 | Notes 40 | ----- 41 | 1. Each node is supposed to have an attribute "threshold". If not, the 42 | default value is given (0.5). 43 | 2. Each edge is supposed to have an attribute "influence". If not, the 44 | default value is given (1/in_degree) 45 | 46 | References 47 | ---------- 48 | [1] GranovetterMark. Threshold models of collective behavior. 49 | The American journal of sociology, 1978. 50 | 51 | Examples 52 | -------- 53 | >>> DG = nx.DiGraph() 54 | >>> DG.add_edges_from([(1,2), (1,3), (1,5), (2,1), (3,2), (4,2), (4,3), \ 55 | >>> (4,6), (5,3), (5,4), (5,6), (6,4), (6,5)]) 56 | >>> layers = networkx_addon.information_propagation.linear_threshold(DG, [1]) 57 | 58 | """ 59 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 60 | raise Exception( \ 61 | "linear_threshold() is not defined for graphs with multiedges.") 62 | 63 | # make sure the seeds are in the graph 64 | for s in seeds: 65 | if s not in G.nodes(): 66 | raise Exception("seed", s, "is not in graph") 67 | 68 | # change to directed graph 69 | if not G.is_directed(): 70 | DG = G.to_directed() 71 | else: 72 | DG = copy.deepcopy(G) 73 | 74 | # init thresholds 75 | for n in DG.nodes(): 76 | if 'threshold' not in DG.node[n]: 77 | DG.node[n]['threshold'] = 0.5 78 | elif DG.node[n]['threshold'] > 1: 79 | raise Exception("node threshold:", DG.node[n]['threshold'], \ 80 | "cannot be larger than 1") 81 | 82 | # init influences 83 | in_deg = DG.in_degree() 84 | for e in DG.edges(): 85 | if 'influence' not in DG[e[0]][e[1]]: 86 | DG[e[0]][e[1]]['influence'] = 1.0 / in_deg[e[1]] 87 | elif DG[e[0]][e[1]]['influence'] > 1: 88 | raise Exception("edge influence:", DG[e[0]][e[1]]['influence'], \ 89 | "cannot be larger than 1") 90 | 91 | # perform diffusion 92 | A = copy.deepcopy(seeds) 93 | if steps <= 0: 94 | # perform diffusion until no more nodes can be activated 95 | return _diffuse_all(DG, A) 96 | # perform diffusion for at most "steps" rounds only 97 | return _diffuse_k_rounds(DG, A, steps) 98 | 99 | def _diffuse_all(G, A): 100 | layer_i_nodes = [ ] 101 | layer_i_nodes.append([i for i in A]) 102 | while True: 103 | len_old = len(A) 104 | A, activated_nodes_of_this_round = _diffuse_one_round(G, A) 105 | layer_i_nodes.append(activated_nodes_of_this_round) 106 | if len(A) == len_old: 107 | break 108 | return layer_i_nodes 109 | 110 | def _diffuse_k_rounds(G, A, steps): 111 | layer_i_nodes = [ ] 112 | layer_i_nodes.append([i for i in A]) 113 | while steps > 0 and len(A) < len(G): 114 | len_old = len(A) 115 | A, activated_nodes_of_this_round = _diffuse_one_round(G, A) 116 | layer_i_nodes.append(activated_nodes_of_this_round) 117 | if len(A) == len_old: 118 | break 119 | steps -= 1 120 | return layer_i_nodes 121 | 122 | def _diffuse_one_round(G, A): 123 | activated_nodes_of_this_round = set() 124 | for s in A: 125 | nbs = G.successors(s) 126 | for nb in nbs: 127 | if nb in A: 128 | continue 129 | active_nb = list(set(G.predecessors(nb)).intersection(set(A))) 130 | if _influence_sum(G, active_nb, nb) >= G.node[nb]['threshold']: 131 | activated_nodes_of_this_round.add(nb) 132 | A.extend(list(activated_nodes_of_this_round)) 133 | return A, list(activated_nodes_of_this_round) 134 | 135 | def _influence_sum(G, froms, to): 136 | influence_sum = 0.0 137 | for f in froms: 138 | influence_sum += G[f][to]['influence'] 139 | return influence_sum 140 | 141 | 142 | -------------------------------------------------------------------------------- /information_propagation/tests/test_independent_cascade.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import networkx 4 | import os 5 | import sys 6 | 7 | from nose.tools import assert_almost_equal 8 | 9 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 10 | from information_propagation import independent_cascade 11 | 12 | class TestIndependentCascade(): 13 | run_times = 10000 14 | def test_independent_cascade(self): 15 | G = networkx.DiGraph() 16 | G.add_edge(1,2,act_prob=.5) 17 | G.add_edge(2,1,act_prob=.5) 18 | G.add_edge(1,3,act_prob=.2) 19 | G.add_edge(3,1,act_prob=.2) 20 | G.add_edge(2,3,act_prob=.3) 21 | G.add_edge(2,4,act_prob=.5) 22 | G.add_edge(3,4,act_prob=.1) 23 | G.add_edge(3,5,act_prob=.2) 24 | G.add_edge(4,5,act_prob=.2) 25 | G.add_edge(5,6,act_prob=.6) 26 | G.add_edge(6,5,act_prob=.6) 27 | G.add_edge(6,4,act_prob=.3) 28 | G.add_edge(6,2,act_prob=.4) 29 | 30 | n_A = 0.0 31 | for i in range(TestIndependentCascade.run_times): 32 | A = independent_cascade(G, [1], steps=1) 33 | for layer in A: 34 | n_A += len(layer) 35 | assert_almost_equal(n_A / TestIndependentCascade.run_times, 1.7, places=1) 36 | 37 | n_A = 0.0 38 | A = [ ] 39 | for i in range(TestIndependentCascade.run_times): 40 | A = independent_cascade(G, [1], steps=2) 41 | for layer in A: 42 | n_A += len(layer) 43 | assert_almost_equal(n_A / TestIndependentCascade.run_times, 2.16, places=1) 44 | 45 | G = networkx.DiGraph() 46 | G.add_edges_from([(1,2), (1,3), (2,4), (3,4)], act_prob=0.4) 47 | n_A = 0.0 48 | A = [ ] 49 | for i in range(TestIndependentCascade.run_times): 50 | A = independent_cascade(G, [1]) 51 | for layer in A: 52 | n_A += len(layer) 53 | assert_almost_equal(n_A / TestIndependentCascade.run_times, 2.09, places=1) 54 | 55 | def test_independent_cascade_without_attribute(self): 56 | G = networkx.DiGraph() 57 | G.add_edges_from([(1,2), (1,3), (2,4), (3,4)]) 58 | 59 | n_A = 0.0 60 | A = [ ] 61 | for i in range(TestIndependentCascade.run_times): 62 | A = independent_cascade(G, [1], steps=1) 63 | for layer in A: 64 | n_A += len(layer) 65 | assert_almost_equal(n_A / TestIndependentCascade.run_times, 1.2, places=1) 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /information_propagation/tests/test_linear_threshold.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import networkx 4 | import os 5 | import sys 6 | 7 | from nose.tools import assert_equal 8 | 9 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 10 | from information_propagation import linear_threshold 11 | 12 | class TestDiffusionLinearThreshold(): 13 | def test_linear_threshold(self): 14 | G = networkx.DiGraph() 15 | G.add_edge(1,2,influence=.5) 16 | G.add_edge(2,1,influence=.5) 17 | G.add_edge(1,3,influence=.2) 18 | G.add_edge(3,1,influence=.2) 19 | G.add_edge(2,3,influence=.3) 20 | G.add_edge(2,4,influence=.5) 21 | G.add_edge(3,4,influence=.1) 22 | G.add_edge(3,5,influence=.2) 23 | G.add_edge(4,5,influence=.2) 24 | G.add_edge(5,6,influence=.6) 25 | G.add_edge(6,5,influence=.6) 26 | G.add_edge(6,4,influence=.3) 27 | G.add_edge(6,2,influence=.4) 28 | G.node[2]['threshold'] = .4 29 | G.node[3]['threshold'] = .4 30 | G.node[4]['threshold'] = .55 31 | G.node[5]['threshold'] = .5 32 | G.node[6]['threshold'] = .3 33 | 34 | layers = linear_threshold(G, [1]) 35 | 36 | print(layers) 37 | 38 | assert_equal(len(layers[0]), 1) 39 | assert(1 in layers[0]) 40 | assert_equal(len(layers[1]), 1) 41 | assert(2 in layers[1]) 42 | assert_equal(len(layers[2]), 1) 43 | assert(3 in layers[2]) 44 | assert_equal(len(layers[3]), 1) 45 | assert(4 in layers[3]) 46 | assert_equal(len(reduce(lambda x,y: x+y, layers)), 4) 47 | 48 | def test_linear_threshold_graph_without_attribute(self): 49 | G = networkx.Graph() 50 | G.add_edges_from([(1,2), (1,3), (2,3), (3,4), (3,5), (4,5), (4,6), (5,6)]) 51 | 52 | layers = linear_threshold(G, [1]) 53 | assert_equal(len(layers[0]), 1) 54 | assert(1 in layers[0]) 55 | assert_equal(len(layers[1]), 1) 56 | assert(2 in layers[1]) 57 | assert_equal(len(layers[2]), 1) 58 | assert(3 in layers[2]) 59 | 60 | layers = linear_threshold(G, [1,4]) 61 | assert_equal(len(reduce(lambda x,y: x+y, layers)), 6) 62 | 63 | layers = linear_threshold(G, [1,2]) 64 | assert_equal(len(layers[0]), 2) 65 | assert(1 in layers[0]) 66 | assert(2 in layers[0]) 67 | assert_equal(len(layers[1]), 1) 68 | assert(3 in layers[1]) 69 | 70 | def test_linear_threshold_with_step(self): 71 | G = networkx.DiGraph() 72 | G.add_edge(1,2,influence=.5) 73 | G.add_edge(2,1,influence=.5) 74 | G.add_edge(1,3,influence=.2) 75 | G.add_edge(3,1,influence=.2) 76 | G.add_edge(2,3,influence=.3) 77 | G.add_edge(2,4,influence=.5) 78 | G.add_edge(3,4,influence=.1) 79 | G.add_edge(3,5,influence=.2) 80 | G.add_edge(4,5,influence=.2) 81 | G.add_edge(5,6,influence=.6) 82 | G.add_edge(6,5,influence=.6) 83 | G.add_edge(6,4,influence=.3) 84 | G.add_edge(6,2,influence=.4) 85 | G.node[2]['threshold'] = .4 86 | G.node[3]['threshold'] = .4 87 | G.node[4]['threshold'] = .55 88 | G.node[5]['threshold'] = .5 89 | G.node[6]['threshold'] = .3 90 | 91 | layers = linear_threshold(G, [1], 1) 92 | assert_equal(len(layers[0]), 1) 93 | assert(1 in layers[0]) 94 | assert_equal(len(layers[1]), 1) 95 | assert(2 in layers[1]) 96 | 97 | layers = linear_threshold(G, [1], 2) 98 | assert_equal(len(layers[0]), 1) 99 | assert(1 in layers[0]) 100 | assert_equal(len(layers[1]), 1) 101 | assert(2 in layers[1]) 102 | assert_equal(len(layers[2]), 1) 103 | assert(3 in layers[2]) 104 | 105 | 106 | -------------------------------------------------------------------------------- /similarity/__init__.py: -------------------------------------------------------------------------------- 1 | from .ascos import * 2 | from .jaccard import * 3 | from .cosine import * 4 | from .simrank import * 5 | from .rss2 import * 6 | from .katz import * 7 | from .lhn import * 8 | -------------------------------------------------------------------------------- /similarity/ascos.py: -------------------------------------------------------------------------------- 1 | """ 2 | ASCOS similarity measure 3 | """ 4 | #!/usr/bin/env python 5 | # Copyright (C) 2004-2010 by 6 | # Hung-Hsuan Chen 7 | # All rights reserved. 8 | # BSD license. 9 | # NetworkX:http://networkx.lanl.gov/. 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | 12 | import copy 13 | import math 14 | import networkx as nx 15 | import numpy 16 | 17 | __all__ = ['ascos'] 18 | 19 | 20 | def ascos(G, c=0.9, max_iter=100, is_weighted=False, remove_neighbors=False, remove_self=False, dump_process=False): 21 | """Return the ASCOS similarity between nodes 22 | 23 | Parameters 24 | ----------- 25 | G: graph 26 | A NetworkX graph 27 | c: float, 0 < c <= 1 28 | The number represents the relative importance between in-direct neighbors 29 | and direct neighbors 30 | max_iter: integer 31 | The number specifies the maximum number of iterations for ASCOS 32 | calculation 33 | is_weighted: boolean 34 | Whether use weighted ASCOS or not 35 | remove_neighbors: boolean 36 | if true, the similarity value between neighbor nodes is set to zero 37 | remove_self: boolean 38 | if true, the similarity value between a node and itself is set to zero 39 | dump_process: boolean 40 | if true, the calculation process is dumped 41 | 42 | Returns 43 | ------- 44 | node_ids : list of node ids 45 | sim : numpy matrix 46 | sim[i,j] is the similarity value between node_ids[i] and node_ids[j] 47 | 48 | Examples 49 | -------- 50 | >>> G = nx.Graph() 51 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 52 | >>> networkx_addon.similarity.ascos(G) 53 | 54 | Notes 55 | ----- 56 | 57 | References 58 | ---------- 59 | [1] ASCOS: an Asymmetric network Structure COntext Similarity measure. 60 | Hung-Hsuan Chen and C. Lee Giles. ASONAM 2013 61 | """ 62 | 63 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 64 | raise Exception("ascos() not defined for graphs with multiedges.") 65 | 66 | if G.is_directed(): 67 | raise Exception("ascos() not defined for directed graphs.") 68 | 69 | node_ids = G.nodes() 70 | node_id_lookup_tbl = { } 71 | for i, n in enumerate(node_ids): 72 | node_id_lookup_tbl[n] = i 73 | 74 | nb_ids = [G.neighbors(n) for n in node_ids] 75 | nbs = [ ] 76 | for nb_id in nb_ids: 77 | nbs.append([node_id_lookup_tbl[n] for n in nb_id]) 78 | del(node_id_lookup_tbl) 79 | 80 | n = G.number_of_nodes() 81 | sim = numpy.eye(n) 82 | sim_old = numpy.zeros(shape = (n, n)) 83 | 84 | for iter_ctr in range(max_iter): 85 | if _is_converge(sim, sim_old, n, n): 86 | break 87 | sim_old = copy.deepcopy(sim) 88 | for i in range(n): 89 | if dump_process: 90 | print(iter_ctr, ':', i, '/', n) 91 | if is_weighted: 92 | w_i = G.degree(weight='weight')[node_ids[i]] 93 | for j in range(n): 94 | if not is_weighted: 95 | if i == j: 96 | continue 97 | s_ij = 0.0 98 | for n_i in nbs[i]: 99 | s_ij += sim_old[n_i, j] 100 | sim[i, j] = c * s_ij / len(nbs[i]) if len(nbs[i]) > 0 else 0 101 | else: 102 | if i == j: 103 | continue 104 | s_ij = 0.0 105 | for n_i in nbs[i]: 106 | w_ik = G[node_ids[i]][node_ids[n_i]]['weight'] if 'weight' in G[node_ids[i]][node_ids[n_i]] else 1 107 | s_ij += float(w_ik) * (1 - math.exp(-w_ik)) * sim_old[n_i, j] 108 | 109 | sim[i, j] = c * s_ij / w_i if w_i > 0 else 0 110 | 111 | if remove_self: 112 | for i in range(n): 113 | sim[i,i] = 0 114 | 115 | if remove_neighbors: 116 | for i in range(n): 117 | for j in nbs[i]: 118 | sim[i,j] = 0 119 | 120 | return node_ids, sim 121 | 122 | def _is_converge(sim, sim_old, nrow, ncol, eps=1e-4): 123 | for i in range(nrow): 124 | for j in range(ncol): 125 | if abs(sim[i,j] - sim_old[i,j]) >= eps: 126 | return False 127 | return True 128 | -------------------------------------------------------------------------------- /similarity/cosine.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement cosine similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX:http://networkx.lanl.gov/. 9 | import networkx as nx 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | __all__ = ['cosine'] 12 | 13 | def cosine(G, remove_neighbors=False, dump_process=False): 14 | """Return the cosine similarity between nodes 15 | 16 | Parameters 17 | ----------- 18 | G : graph 19 | A NetworkX graph 20 | remove_neighbors: boolean 21 | if true, only return cosine similarity of non-neighbor nodes 22 | dump_process: boolean 23 | if true, the calculation process is dumped 24 | 25 | Returns 26 | ------- 27 | cosine: dictionary of dictionary of double 28 | if cosine[i][j] = k, this means the cosine similarity 29 | between node i and node j is k 30 | 31 | Examples 32 | -------- 33 | >>> G=nx.Graph() 34 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 35 | >>> networkx_addon.similarity.cosine(G) 36 | 37 | Notes 38 | ----- 39 | 40 | References 41 | ---------- 42 | """ 43 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 44 | raise Exception("cosine() not defined for graphs with multiedges.") 45 | 46 | if G.is_directed(): 47 | raise Exception("cosine() not defined for directed graphs.") 48 | 49 | cos = { } 50 | total_iter = G.number_of_nodes() 51 | for i, a in enumerate(G.nodes()): 52 | if dump_process: 53 | print(i+1, '/', total_iter) 54 | for b in G.neighbors(a): 55 | for c in G.neighbors(b): 56 | if a == c: 57 | continue 58 | if remove_neighbors and c in G.neighbors(a): 59 | continue 60 | s1 = set(G.neighbors(a)) 61 | s2 = set(G.neighbors(c)) 62 | if a not in cos: 63 | cos[a] = { } 64 | cos[a][c] = float(len(s1 & s2)) / (len(s1) + len(s2)) 65 | 66 | return cos 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /similarity/jaccard.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement jaccard similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX: http://networkx.lanl.gov/ 9 | 10 | import networkx as nx 11 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 12 | __all__ = ['jaccard'] 13 | 14 | def jaccard(G, remove_neighbors=False, dump_process=False): 15 | """Return the jaccard similarity between nodes 16 | 17 | Parameters 18 | ----------- 19 | G : graph 20 | A NetworkX graph 21 | remove_neighbors: boolean 22 | if true, only return jaccard similarity of non-neighbor nodes 23 | dump_process: boolean 24 | if true, the calculation process is dumped 25 | 26 | Returns 27 | ------- 28 | jaccard: dictionary of dictionary of double 29 | if jaccard[i][j] = k, this means the jaccard similarity 30 | between node i and node j is k 31 | 32 | Examples 33 | -------- 34 | >>> G=nx.Graph() 35 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 36 | >>> networkx_addon.similarity.jaccard(G) 37 | 38 | Notes 39 | ----- 40 | 41 | References 42 | ---------- 43 | """ 44 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 45 | raise Exception("jaccard() not defined for graphs with multiedges.") 46 | 47 | if G.is_directed(): 48 | raise Exception("jaccard() not defined for directed graphs.") 49 | 50 | jac = { } 51 | total_iter = G.number_of_nodes() 52 | for i, a in enumerate(G.nodes(), 1): 53 | if dump_process: 54 | print(i, '/', total_iter) 55 | for b in G.neighbors(a): 56 | for c in G.neighbors(b): 57 | if a == c: 58 | continue 59 | if remove_neighbors and c in G.neighbors(a): 60 | continue 61 | s1 = set(G.neighbors(a)) 62 | s2 = set(G.neighbors(c)) 63 | if a not in jac: 64 | jac[a] = { } 65 | jac[a][c] = float(len(s1 & s2)) / len(s1 | s2) 66 | 67 | return jac 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /similarity/katz.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement katz similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX:http://networkx.lanl.gov/. 9 | import networkx as nx 10 | import numpy 11 | import scipy.linalg 12 | 13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 14 | __all__ = ['katz'] 15 | 16 | def katz(G, c=0.9, remove_neighbors=False, inv_method=0): 17 | # TODO: remove sim scores b2n neighbors when remove_neighbors==True 18 | """Return the katz similarity between nodes 19 | 20 | Parameters 21 | ----------- 22 | G : graph 23 | A NetworkX graph 24 | remove_neighbors: boolean 25 | if true, only return katz similarity of non-neighbor nodes 26 | 27 | Returns 28 | ------- 29 | katz: matrix of similarity 30 | nodelist: the node ids 31 | 32 | Examples 33 | -------- 34 | >>> G=nx.Graph() 35 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 36 | >>> networkx_addon.similarity.katz(G) 37 | 38 | Notes 39 | ----- 40 | 41 | References 42 | ---------- 43 | """ 44 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 45 | raise Exception("katz() not defined for graphs with multiedges.") 46 | 47 | if G.is_directed(): 48 | raise Exception("katz() not defined for directed graphs.") 49 | 50 | A = nx.adjacency_matrix(G, nodelist=G.nodes(), weight=None) 51 | w, v = numpy.linalg.eigh(A) 52 | lambda1 = max([abs(x) for x in w]) 53 | I = numpy.eye(A.shape[0]) 54 | S = None 55 | if inv_method == 1: 56 | S = scipy.linalg.pinv(I - c/lambda1 * A) 57 | elif inv_method == 2: 58 | S = numpy.linalg.inv(I - c/lambda1 * A) 59 | else: 60 | S = numpy.linalg.pinv(I - c/lambda1 * A) 61 | return S, G.nodes() 62 | 63 | -------------------------------------------------------------------------------- /similarity/lhn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement LHN similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX:http://networkx.lanl.gov/. 9 | import networkx as nx 10 | import numpy 11 | import scipy.linalg 12 | 13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 14 | __all__ = ['lhn'] 15 | 16 | def lhn(G, c=0.9, remove_neighbors=False, inv_method=0): 17 | # TODO: remove sim scores b2n neighbors when remove_neighbors==True 18 | """Return the LHN similarity between nodes 19 | 20 | Parameters 21 | ----------- 22 | G : graph 23 | A NetworkX graph 24 | remove_neighbors: boolean 25 | if true, only return LHN similarity of non-neighbor nodes 26 | 27 | Returns 28 | ------- 29 | S: matrix of similarity 30 | nodelist: the node ids 31 | 32 | Examples 33 | -------- 34 | >>> G=nx.Graph() 35 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 36 | >>> networkx_addon.similarity.lhn(G) 37 | 38 | Notes 39 | ----- 40 | 41 | References 42 | ---------- 43 | """ 44 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 45 | raise Exception("lhn() not defined for graphs with multiedges.") 46 | 47 | if G.is_directed(): 48 | raise Exception("lhn() not defined for directed graphs.") 49 | 50 | A = nx.adjacency_matrix(G, nodelist=G.nodes(), weight=None) 51 | w, v = numpy.linalg.eigh(A) 52 | lambda1 = max([abs(x) for x in w]) 53 | I = numpy.eye(A.shape[0]) 54 | S = None 55 | if inv_method == 1: 56 | S = scipy.linalg.pinv(I - c/lambda1 * A) 57 | elif inv_method == 2: 58 | S = numpy.linalg.inv(I - c/lambda1 * A) 59 | else: 60 | S = numpy.linalg.pinv(I - c/lambda1 * A) 61 | deg = numpy.array(sum(A)).reshape(-1,) 62 | for i in range(S.shape[0]): 63 | for j in range(S.shape[1]): 64 | S[i,j] /= (deg[i]*deg[j]) 65 | return S, G.nodes() 66 | 67 | -------------------------------------------------------------------------------- /similarity/rss2.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement rss2 similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX:http://networkx.lanl.gov/. 9 | import networkx as nx 10 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 11 | __all__ = ['rss2'] 12 | 13 | def rss2(G, remove_neighbors=False, dump_process=False, disregard_weight=False): 14 | """Return the rss2 similarity between nodes 15 | 16 | Parameters 17 | ----------- 18 | G : graph 19 | A NetworkX graph 20 | remove_neighbors: boolean 21 | if true, only return rss2 similarity of non-neighbor nodes 22 | dump_process: boolean 23 | if true, the calculation process is dumped 24 | disregard_weight: boolean 25 | if true, the edge weight is ignored 26 | 27 | Returns 28 | ------- 29 | rss2: dictionary of dictionary of double 30 | if rss2[i][j] = k, this means the rss2 similarity 31 | between node i and node j is k 32 | 33 | Examples 34 | -------- 35 | >>> G=nx.Graph() 36 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)], weight=1) 37 | >>> networkx_addon.similarity.rss2(G) 38 | 39 | Notes 40 | ----- 41 | 42 | References 43 | ---------- 44 | """ 45 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 46 | raise Exception("rss2() not defined for graphs with multiedges.") 47 | 48 | if G.is_directed(): 49 | raise Exception("rss2() not defined for directed graphs.") 50 | 51 | weighted_deg = G.degree(weight='weight') 52 | rss2 = { } 53 | cur_iter = 0 54 | total_iter = G.number_of_nodes() 55 | for a in G.nodes(): 56 | if dump_process: 57 | cur_iter += 1 58 | print(cur_iter, '/', total_iter) 59 | for b in G.neighbors(a): 60 | for c in G.neighbors(b): 61 | if a == c: 62 | continue 63 | if remove_neighbors and c in G.neighbors(a): 64 | continue 65 | if disregard_weight: 66 | t1 = float(1) 67 | t2 = float(1) 68 | s1 = len(set(G.neighbors(a))) 69 | s2 = len(set(G.neighbors(b))) 70 | else: 71 | t1 = float(G[a][b]['weight']) 72 | t2 = float(G[b][c]['weight']) 73 | s1 = weighted_deg[a] 74 | s2 = weighted_deg[b] 75 | if a not in rss2: 76 | rss2[a] = { } 77 | if c not in rss2[a]: 78 | rss2[a][c] = t1/s1 * t2/s2 79 | else: 80 | rss2[a][c] += t1/s1 * t2/ s2 81 | return rss2 82 | 83 | 84 | -------------------------------------------------------------------------------- /similarity/simrank.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implement SimRank similarity 3 | """ 4 | # Copyright (C) 2004-2010 by 5 | # Hung-Hsuan Chen 6 | # All rights reserved. 7 | # BSD license. 8 | # NetworkX:http://networkx.lanl.gov/. 9 | import copy 10 | import sys 11 | import networkx as nx 12 | from collections import defaultdict 13 | __author__ = """Hung-Hsuan Chen (hhchen@psu.edu)""" 14 | __all__ = ['simrank'] 15 | 16 | def simrank(G, c=0.9, max_iter=100, remove_neighbors=False, remove_self=False, dump_process=False): 17 | """Return the SimRank similarity between nodes 18 | 19 | Parameters 20 | ----------- 21 | G : graph 22 | A NetworkX graph 23 | c : float, 0 < c <= 1 24 | The number represents the relative importance between in-direct neighbors 25 | and direct neighbors 26 | max_iter : integer 27 | The number specifies the maximum number of iterations for simrank 28 | calculation 29 | remove_neighbors: boolean 30 | if true, the similarity value between neighbor nodes is set to zero 31 | remove_self : boolean 32 | if true, the similarity value between a node and itself is set to zero 33 | dump_process: boolean 34 | if true, the calculation process is dumped 35 | 36 | Returns 37 | ------- 38 | simrank: dictionary of dictionary of double 39 | if simrank[i][j] = k, this means the SimRank similarity 40 | between node i and node j is k 41 | 42 | Examples 43 | -------- 44 | >>> G=nx.Graph() 45 | >>> G.add_edges_from([(0,7), (0,1), (0,2), (0,3), (1,4), (2,4), (3,4), (4,5), (4,6)]) 46 | >>> networkx_addon.similarity.simrank(G) 47 | 48 | Notes 49 | ----- 50 | 51 | References 52 | ---------- 53 | [1] G. Jeh and J. Widom. 54 | SimRank: a measure of structural-context similarity. 55 | In KDD'02 pages 538-543. ACM Press, 2002. 56 | """ 57 | if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: 58 | raise Exception("simrank() not defined for graphs with multiedges.") 59 | 60 | if G.is_directed(): 61 | raise Exception("simrank() not defined for directed graphs.") 62 | 63 | sim_old = defaultdict(list) 64 | sim = defaultdict(list) 65 | for n in G.nodes(): 66 | sim[n] = defaultdict(int) 67 | sim[n][n] = 1 68 | sim_old[n] = defaultdict(int) 69 | sim_old[n][n] = 0 70 | 71 | # calculate simrank 72 | for iter_ctr in range(max_iter): 73 | if _is_converge(sim, sim_old): 74 | break 75 | sim_old = copy.deepcopy(sim) 76 | for i, u in enumerate(G.nodes()): 77 | if dump_process: 78 | sys.stdout.write("\r%d : % d / %d" % (iter_ctr, i, G.number_of_nodes())) 79 | for v in G.nodes(): 80 | if u == v: 81 | continue 82 | s_uv = 0.0 83 | for n_u in G.neighbors(u): 84 | for n_v in G.neighbors(v): 85 | s_uv += sim_old[n_u][n_v] 86 | sim[u][v] = (c * s_uv / (len(list(G.neighbors(u))) * len(list(G.neighbors(v))))) \ 87 | if len(list(G.neighbors(u))) * len(list(G.neighbors(v))) > 0 else 0 88 | if dump_process: 89 | print('') 90 | 91 | if remove_self: 92 | for m in G.nodes(): 93 | G[m][m] = 0 94 | 95 | if remove_neighbors: 96 | for m in G.nodes(): 97 | for n in G.neighbors(m): 98 | sim[m][n] = 0 99 | 100 | return sim 101 | 102 | def _is_converge(s1, s2, eps=1e-4): 103 | for i in s1.keys(): 104 | for j in s1[i].keys(): 105 | if abs(s1[i][j] - s2[i][j]) >= eps: 106 | return False 107 | return True 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /similarity/tests/test_ascos.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import math 4 | import networkx 5 | import numpy 6 | import os 7 | import sys 8 | 9 | import nose.tools as nt 10 | 11 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 12 | from similarity import ascos 13 | 14 | class TestAscos(): 15 | def test_ascos(self): 16 | G = networkx.Graph() 17 | G.add_edge(1,2) 18 | G.add_edge(1,4) 19 | G.add_edge(1,5) 20 | G.add_edge(1,6) 21 | G.add_edge(2,3) 22 | 23 | node_ids, sim = ascos(G) 24 | nt.assert_equal(len(node_ids), 6) 25 | nt.assert_equal(sim.shape, (6, 6)) 26 | sim_ans = numpy.matrix(( 27 | '1 0.5732 0.3474 0.5296 0.5296 0.5296;' 28 | '0.7563 1 0.6063 0.4005 0.4005 0.4005;' 29 | '0.6807 0.9000 1 0.3604 0.3604 0.3604;' 30 | '0.9000 0.5159 0.3126 1 0.4766 0.4766;' 31 | '0.9000 0.5159 0.3126 0.4766 1 0.4766;' 32 | '0.9000 0.5159 0.3126 0.4766 0.4766 1')) 33 | for i in range(sim.shape[0]): 34 | for j in range(sim.shape[1]): 35 | nt.assert_almost_equal(sim[i,j], sim_ans[i,j], 4) 36 | 37 | def test_weighted_ascos(self): 38 | G = networkx.Graph() 39 | G.add_edge('a', 'b', weight=1) 40 | node_ids, sim = ascos(G, is_weighted=True) 41 | for i in range(sim.shape[0]): 42 | for j in range(sim.shape[1]): 43 | if i == j: 44 | nt.assert_equal(sim[i, j], 1) 45 | else: 46 | nt.assert_almost_equal(sim[i,j], .9 * (1 - math.exp(-1)), 4) 47 | 48 | G['a']['b']['weight'] = 100 49 | node_ids, sim = ascos(G, is_weighted=True) 50 | for i in range(sim.shape[0]): 51 | for j in range(sim.shape[1]): 52 | if i == j: 53 | nt.assert_equal(sim[i, j], 1) 54 | else: 55 | nt.assert_almost_equal(sim[i,j], .9 * (1 - math.exp(-100)), 4) 56 | 57 | G = networkx.Graph() 58 | G.add_edge('a', 'b', weight=1) 59 | G.add_edge('b', 'c', weight=1) 60 | node_ids, sim = ascos(G, is_weighted=True) 61 | sim_ans = numpy.matrix(( 62 | '1 0.1931 .5689;' 63 | '0.1931 1 0.5689;' 64 | '0.3394 0.3394 1')) 65 | for i in range(sim.shape[0]): 66 | for j in range(sim.shape[1]): 67 | nt.assert_almost_equal(sim[i, j], sim_ans[i, j], 4) 68 | 69 | G = networkx.Graph() 70 | G.add_edge('a', 'b', weight=1) 71 | G.add_edge('b', 'c', weight=10) 72 | node_ids, sim = ascos(G, is_weighted=True) 73 | sim_ans = numpy.matrix(( 74 | '1 0.4796 0.5689;' 75 | '0.1762 1 0.9000;' 76 | '0.1959 0.8429 1')) 77 | for i in range(sim.shape[0]): 78 | for j in range(sim.shape[1]): 79 | nt.assert_almost_equal(sim[i, j], sim_ans[i, j], 4) 80 | 81 | G = networkx.Graph() 82 | G.add_edge(1,2) 83 | G.add_edge(1,4, weight=2) 84 | G.add_edge(1,5) 85 | G.add_edge(1,6) 86 | G.add_edge(2,3) 87 | 88 | node_ids, sim = ascos(G, is_weighted=True) 89 | nt.assert_equal(len(node_ids), 6) 90 | nt.assert_equal(sim.shape, (6, 6)) 91 | sim_ans = numpy.matrix(( 92 | '1 0.1810 0.0543 0.3742 0.1738 0.1738;' 93 | '0.3394 1 0.2999 0.1270 0.0590 0.0590;' 94 | '0.1931 0.5689 1 0.0722 0.0335 0.0335;' 95 | '0.7782 0.1409 0.0422 1 0.1353 0.1353;' 96 | '0.5689 0.1030 0.0308 0.2129 1 0.0989;' 97 | '0.5689 0.1030 0.0308 0.2129 0.0989 1')) 98 | for i in range(sim.shape[0]): 99 | for j in range(sim.shape[1]): 100 | nt.assert_almost_equal(sim[i,j], sim_ans[i,j], 4) 101 | 102 | 103 | -------------------------------------------------------------------------------- /similarity/tests/test_cos_sim.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import nose.tools as ns 5 | import networkx 6 | 7 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 8 | from similarity import cosine 9 | 10 | class TestCosSim: 11 | def setUp(self): 12 | G=networkx.Graph() 13 | G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6)]) 14 | self.G = G 15 | self.G.cos_sim = { } 16 | self.G.cos_sim[0] = {1:0.5, 3:0.3333, 4:0.3333} 17 | self.G.cos_sim[1] = {0:0.5, 3:0.3333, 4:0.3333} 18 | self.G.cos_sim[2] = {5:0.2857} 19 | self.G.cos_sim[3] = {0:0.3333, 1:0.3333, 4:0.5, 6:0.3333} 20 | self.G.cos_sim[4] = {0:0.3333, 1:0.3333, 3:0.5, 6:0.3333} 21 | self.G.cos_sim[5] = {2:0.2857} 22 | self.G.cos_sim[6] = {3:0.3333, 4:0.3333} 23 | 24 | def test_cosine(self): 25 | G = self.G 26 | cos = cosine(G) 27 | ns.assert_equal(len(cos), 7) 28 | for i in range(7): 29 | assert(i in cos) 30 | for i in self.G.cos_sim.keys(): 31 | ns.assert_equal(len(self.G.cos_sim[i]), len(cos[i])) 32 | for j in self.G.cos_sim[i].keys(): 33 | ns.assert_almost_equal(cos[i][j], self.G.cos_sim[i][j], places=4) 34 | 35 | 36 | -------------------------------------------------------------------------------- /similarity/tests/test_jaccard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import nose.tools as nt 6 | import networkx 7 | 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 9 | from similarity import jaccard 10 | 11 | class TestJaccard: 12 | def setUp(self): 13 | G=networkx.Graph() 14 | G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6)]) 15 | self.G=G 16 | self.G.jaccard = { } 17 | self.G.jaccard[0] = {1:1, 3:0.5, 4:0.5} 18 | self.G.jaccard[1] = {0:1, 3:0.5, 4:0.5} 19 | self.G.jaccard[2] = {5:0.4} 20 | self.G.jaccard[3] = {0:0.5, 1:0.5, 4:1, 6:0.5} 21 | self.G.jaccard[4] = {0:0.5, 1:0.5, 3:1, 6:0.5} 22 | self.G.jaccard[5] = {2:0.4} 23 | self.G.jaccard[6] = {3:0.5, 4:0.5} 24 | 25 | def test_jaccard(self): 26 | G = self.G 27 | jac = jaccard(G) 28 | nt.assert_equal(len(jac), 7) 29 | for i in range(7): 30 | assert(i in jac) 31 | for i in self.G.jaccard.keys(): 32 | nt.assert_equal(len(self.G.jaccard[i]), len(jac[i])) 33 | for j in self.G.jaccard[i].keys(): 34 | nt.assert_almost_equal(jac[i][j], self.G.jaccard[i][j], places=4) 35 | 36 | 37 | -------------------------------------------------------------------------------- /similarity/tests/test_katz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import nose.tools as nt 3 | import networkx 4 | import numpy 5 | import os 6 | import sys 7 | 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 9 | from similarity import katz 10 | 11 | class TestKatzSim: 12 | def setUp(self): 13 | G=networkx.Graph() 14 | G.add_edges_from([(0,1), (0,3), (0,4), (0,5), (1,2)]) 15 | self.G=G 16 | self.G.katz_sim = numpy.matrix([\ 17 | [4.9178, 2.6286, 1.1405, 2.1337, 2.1337, 2.1337], \ 18 | [2.6286, 2.6369, 1.1441, 1.1405, 1.1405, 1.1405], \ 19 | [1.1405, 1.1441, 1.4964, 0.4948, 0.4948, 0.4948], \ 20 | [2.1337, 1.1405, 0.4948, 1.9258, 0.9258, 0.9258], \ 21 | [2.1337, 1.1405, 0.4948, 0.9258, 1.9258, 0.9258], \ 22 | [2.1337, 1.1405, 0.4948, 0.9258, 0.9258, 1.9258] 23 | ]) 24 | 25 | def test_katz_sim(self): 26 | G = self.G 27 | katz_sim, nodelist = katz(G) 28 | nt.assert_equal(len(nodelist), 6) 29 | for i in range(6): 30 | nt.assert_true(i in nodelist) 31 | nt.assert_equal(len(katz_sim), 6) 32 | for i in range(self.G.katz_sim.shape[0]): 33 | for j in range(self.G.katz_sim.shape[1]): 34 | print i, ',', j 35 | nt.assert_almost_equal(self.G.katz_sim[i,j], katz_sim[i,j], places=4) 36 | 37 | -------------------------------------------------------------------------------- /similarity/tests/test_lhn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import nose.tools as nt 3 | import networkx 4 | import numpy 5 | import os 6 | import sys 7 | 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 9 | from similarity import lhn 10 | 11 | class TestKatzSim: 12 | def setUp(self): 13 | G=networkx.Graph() 14 | G.add_edges_from([(0,1), (0,3), (0,4), (0,5), (1,2)]) 15 | self.G=G 16 | self.G.lhn_sim = numpy.matrix([\ 17 | [0.3074, 0.3286, 0.2851, 0.5334, 0.5334, 0.5334], \ 18 | [0.3286, 0.6592, 0.5720, 0.5702, 0.5702, 0.5702], \ 19 | [0.2851, 0.5720, 1.4964, 0.4948, 0.4948, 0.4948], \ 20 | [0.5334, 0.5702, 0.4948, 1.9258, 0.9258, 0.9258], \ 21 | [0.5334, 0.5702, 0.4948, 0.9258, 1.9258, 0.9258], \ 22 | [0.5334, 0.5702, 0.4948, 0.9258, 0.9258, 1.9258] 23 | ]) 24 | 25 | def test_lhn_sim(self): 26 | G = self.G 27 | lhn_sim, nodelist = lhn(G) 28 | nt.assert_equal(len(nodelist), 6) 29 | for i in range(6): 30 | nt.assert_true(i in nodelist) 31 | nt.assert_equal(len(lhn_sim), 6) 32 | for i in range(self.G.lhn_sim.shape[0]): 33 | for j in range(self.G.lhn_sim.shape[1]): 34 | print i, ',', j 35 | nt.assert_almost_equal(self.G.lhn_sim[i,j], lhn_sim[i,j], places=4) 36 | 37 | -------------------------------------------------------------------------------- /similarity/tests/test_rss2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import nose.tools as nt 4 | import networkx 5 | import os 6 | import sys 7 | 8 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 9 | from similarity import rss2 10 | 11 | class TestRss2: 12 | def setUp(self): 13 | G = networkx.Graph() 14 | G.add_edges_from([(0,2),(1,2),(2,3),(2,4),(3,5),(4,5),(5,6),(3,7)]) 15 | self.G = G 16 | self.G.rss2_sim = { } 17 | self.G.rss2_sim[0] = {1:0.25, 3:0.25, 4:0.25} 18 | self.G.rss2_sim[1] = {0:0.25, 3:0.25, 4:0.25} 19 | self.G.rss2_sim[2] = {5:0.2083, 7:0.0833} 20 | self.G.rss2_sim[3] = {0:0.0833, 1:0.0833, 4:0.1944, 6:0.1111} 21 | self.G.rss2_sim[4] = {0:0.125, 1:0.125, 3:0.2917, 6:0.1667} 22 | self.G.rss2_sim[5] = {2:0.2778, 7:0.1111} 23 | self.G.rss2_sim[6] = {3:0.3333, 4:0.3333} 24 | self.G.rss2_sim[7] = {2:0.3333, 5:0.3333} 25 | 26 | H = networkx.Graph() 27 | H.add_edges_from([(0,1,{'weight':2}), (1,2,{'weight':1}), \ 28 | (0,3,{'weight':1}), (3,2,{'weight':3}), (2,4,{'weight':4})]) 29 | self.H = H 30 | self.H.rss2_sim = { } 31 | self.H.rss2_sim[0] = {2:0.4722} 32 | self.H.rss2_sim[1] = {3:0.3472, 4:0.1667} 33 | self.H.rss2_sim[2] = {0:0.1771} 34 | self.H.rss2_sim[3] = {1:0.2604, 4:0.375} 35 | self.H.rss2_sim[4] = {1:0.125, 3:0.375} 36 | 37 | def test_rss2_sim_no_weight(self): 38 | G = self.G 39 | rss2_sim = rss2(G, disregard_weight=True) 40 | nt.assert_equal(len(rss2_sim), 8) 41 | for i in range(8): 42 | assert(i in rss2_sim) 43 | for i in self.G.rss2_sim.keys(): 44 | nt.assert_equal(len(self.G.rss2_sim[i]), len(rss2_sim[i])) 45 | for j in self.G.rss2_sim[i].keys(): 46 | nt.assert_almost_equal(rss2_sim[i][j], self.G.rss2_sim[i][j], places=4) 47 | 48 | def test_rss2_sim_with_weight(self): 49 | H = self.H 50 | rss2_sim = rss2(H) 51 | nt.assert_equal(len(rss2_sim), 5) 52 | for i in range(5): 53 | assert(i in rss2_sim) 54 | for i in self.H.rss2_sim.keys(): 55 | nt.assert_equal(len(self.H.rss2_sim[i]), len(rss2_sim[i])) 56 | for j in self.H.rss2_sim[i].keys(): 57 | print i, ',', j 58 | nt.assert_almost_equal(rss2_sim[i][j], self.H.rss2_sim[i][j], places=4) 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /similarity/tests/test_simrank.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import nose.tools as nt 3 | import networkx 4 | import os 5 | import sys 6 | 7 | sys.path.insert(0, os.path.realpath(os.path.join(os.path.dirname(__file__), '../..'))) 8 | from similarity import simrank 9 | 10 | class TestSimRank: 11 | def setUp(self): 12 | G = networkx.Graph() 13 | G.add_edges_from([(0,1),(1,2),(0,2)]) 14 | self.G = G 15 | self.G.simrank = { } 16 | self.G.simrank[0] = {0:1, 1:0.6921, 2:0.6921} 17 | self.G.simrank[1] = {0:0.6921, 1:1, 2:0.6921} 18 | self.G.simrank[2] = {0:0.6921, 1:0.6921, 2:1} 19 | 20 | H = networkx.Graph() 21 | H.add_edges_from([(0,1),(0,2),(1,2),(2,3)]) 22 | self.H = H 23 | self.H.simrank = { } 24 | self.H.simrank[0] = {0:1, 1:0.6538, 2:0.6261, 3:0.7317} 25 | self.H.simrank[1] = {0:0.6538, 1:1, 2:0.6261, 3:0.7317} 26 | self.H.simrank[2] = {0:0.6261, 1:0.6261, 2:1, 3:0.5365} 27 | self.H.simrank[3] = {0:0.7317, 1:0.7317, 2:0.5365, 3:1} 28 | 29 | I = networkx.Graph() 30 | I.add_edges_from([(0,1), (1,2), (2,0)]) 31 | I.add_node(3) 32 | self.I = I 33 | self.I.simrank = { } 34 | self.I.simrank[0] = {0:1, 1:0.6921, 2:0.6921, 3:0} 35 | self.I.simrank[1] = {0:0.6921, 1:1, 2:0.6921, 3:0} 36 | self.I.simrank[2] = {0:0.6921, 1:0.6921, 2:1, 3:0} 37 | self.I.simrank[3] = {0:0, 1:0, 2:0, 3:1} 38 | 39 | def test_simrank(self): 40 | # test graph G 41 | G = self.G 42 | sim = simrank(G, remove_neighbors=False, remove_self=False) 43 | nt.assert_equal(len(sim), 3) 44 | for i in range(3): 45 | nt.assert_in(i, sim) 46 | for i in self.G.simrank.keys(): 47 | nt.assert_equal(len(self.G.simrank[i]), len(sim[i])) 48 | for j in self.G.simrank[i].keys(): 49 | nt.assert_almost_equal(sim[i][j], self.G.simrank[i][j], places=4) 50 | # test graph H 51 | H = self.H 52 | sim = simrank(H, remove_neighbors=False, remove_self=False) 53 | nt.assert_equal(len(sim), 4) 54 | for i in range(4): 55 | nt.assert_in(i, sim) 56 | for i in self.H.simrank.keys(): 57 | nt.assert_equal(len(self.H.simrank[i]), len(sim[i])) 58 | for j in self.H.simrank[i].keys(): 59 | nt.assert_almost_equal(sim[i][j], self.H.simrank[i][j], places=4) 60 | 61 | def test_simrank_disregard_nb(self): 62 | # test graph G 63 | G = self.G 64 | sim = simrank(G, remove_neighbors=False, remove_self=False) 65 | nt.assert_equal(len(sim), 3) 66 | for i in range(3): 67 | nt.assert_in(i, sim) 68 | for i in self.G.simrank.keys(): 69 | nt.assert_equal(len(self.G.simrank[i]), len(sim[i])) 70 | for j in self.G.simrank[i].keys(): 71 | nt.assert_almost_equal(sim[i][j], self.G.simrank[i][j], places=4) 72 | # test graph H 73 | H = self.H 74 | sim = simrank(H, remove_neighbors=False, remove_self=False) 75 | nt.assert_equal(len(sim), 4) 76 | for i in range(4): 77 | nt.assert_in(i, sim) 78 | for i in self.H.simrank.keys(): 79 | nt.assert_equal(len(self.H.simrank[i]), len(sim[i])) 80 | for j in self.H.simrank[i].keys(): 81 | nt.assert_almost_equal(sim[i][j], self.H.simrank[i][j], places=4) 82 | 83 | def test_graph_with_orphan(self): 84 | I = self.I 85 | sim = simrank(I, remove_neighbors=False, remove_self=False) 86 | nt.assert_equal(len(sim), 4) 87 | for i in range(4): 88 | nt.assert_in(i, sim) 89 | for i in self.I.simrank.keys(): 90 | nt.assert_equal(len(self.I.simrank[i]), len(sim[i])) 91 | for j in self.I.simrank[i].keys(): 92 | nt.assert_almost_equal(sim[i][j], self.I.simrank[i][j], places=4) 93 | 94 | 95 | --------------------------------------------------------------------------------