├── BiMLPA ├── test │ ├── __init__.py │ ├── southernwomen.net │ └── test_bimlpa.py ├── __init__.py ├── generator.py ├── community.py ├── utils.py ├── modularity.py └── bimlpa.py ├── requirements.txt ├── setup.cfg ├── LICENSE ├── READMEja.md ├── README.md └── setup.py /BiMLPA/test/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | networkx<=2.4 2 | numpy 3 | scikit-learn 4 | matplotlib 5 | pytest>=3.6 -------------------------------------------------------------------------------- /BiMLPA/__init__.py: -------------------------------------------------------------------------------- 1 | from .bimlpa import * 2 | from .community import * 3 | from .generator import generate_network, generate_network_with_name 4 | from .modularity import * 5 | from .utils import relabeling, output_community, calc_NMI 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | # This flag says that the code is written to work on both Python 2 and Python 3 | # 3. If at all possible, it is good practice to do this. If you cannot, you 4 | # will need to generate wheels for each Python version that you support. 5 | -------------------------------------------------------------------------------- /BiMLPA/test/southernwomen.net: -------------------------------------------------------------------------------- 1 | 1 19 2 | 2 19 3 | 4 19 4 | 1 20 5 | 2 20 6 | 3 20 7 | 1 21 8 | 2 21 9 | 4 21 10 | 3 21 11 | 5 21 12 | 6 21 13 | 1 22 14 | 4 22 15 | 3 22 16 | 5 22 17 | 1 23 18 | 2 23 19 | 4 23 20 | 3 23 21 | 5 23 22 | 6 23 23 | 7 23 24 | 9 23 25 | 1 24 26 | 2 24 27 | 4 24 28 | 3 24 29 | 6 24 30 | 7 24 31 | 8 24 32 | 14 24 33 | 1 26 34 | 2 26 35 | 4 26 36 | 3 26 37 | 6 26 38 | 7 26 39 | 9 26 40 | 8 26 41 | 10 26 42 | 13 26 43 | 15 26 44 | 11 26 45 | 12 26 46 | 16 26 47 | 1 27 48 | 3 27 49 | 9 27 50 | 8 27 51 | 14 27 52 | 10 27 53 | 13 27 54 | 11 27 55 | 12 27 56 | 16 27 57 | 17 27 58 | 18 27 59 | 2 25 60 | 4 25 61 | 3 25 62 | 5 25 63 | 7 25 64 | 9 25 65 | 14 25 66 | 10 25 67 | 13 25 68 | 15 25 69 | 14 30 70 | 10 30 71 | 13 30 72 | 15 30 73 | 11 30 74 | 12 30 75 | 14 28 76 | 13 28 77 | 15 28 78 | 11 28 79 | 12 28 80 | 14 31 81 | 13 31 82 | 12 31 83 | 14 32 84 | 13 32 85 | 12 32 86 | 14 29 87 | 15 29 88 | 17 29 89 | 18 29 90 | -------------------------------------------------------------------------------- /BiMLPA/test/test_bimlpa.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from BiMLPA import * 3 | 4 | 5 | class BiMLPATestCase(unittest.TestCase): 6 | 7 | def test_bimlpa_SqrtDeg(self): 8 | 9 | G = generate_network('southernwomen.net') 10 | bimlpa = BiMLPA_SqrtDeg(G, 0.3, 7) 11 | bimlpa.start() 12 | relabeling(G) 13 | top, bottom = output_community(G) 14 | self.assertIsInstance(top, list) 15 | self.assertIsInstance(bottom, list) 16 | 17 | def test_bimlpa(self): 18 | 19 | G = generate_network_withName('southernwomen.net') 20 | bimlpa = BiMLPA(G, 0.3, 7) 21 | bimlpa.start() 22 | relabeling(G) 23 | top, bottom = output_community(G) 24 | self.assertIsInstance(top, list) 25 | self.assertIsInstance(bottom, list) 26 | 27 | def test_bimlpa_BiMLPA_EdgeProb(self): 28 | G = generate_network('southernwomen.net') 29 | bimlpa = BiMLPA_EdgeProb(G, 0.3, 7) 30 | bimlpa.start() 31 | relabeling(G) 32 | top, bottom = output_community(G) 33 | self.assertIsInstance(top, list) 34 | self.assertIsInstance(bottom, list) 35 | 36 | 37 | if __name__ == '__main__': 38 | unittest.main() 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2019, Giulio Rossetti 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 | -------------------------------------------------------------------------------- /BiMLPA/generator.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | 3 | 4 | def generate_network(path): 5 | with open(path, 'r') as f: 6 | lines = f.readlines() 7 | u_list = [] 8 | v_list = [] 9 | for l in lines: 10 | u, v = list(map(int, l.split())) 11 | u_list.append(u) 12 | v_list.append(v) 13 | u_max = max(u_list) 14 | v_min = min(v_list) 15 | if u_max < v_min: 16 | padding = 0 17 | elif v_min == 0: 18 | padding = u_max + 1 19 | else: 20 | padding = u_max 21 | 22 | G = nx.Graph() 23 | u_set = list(set(u_list)) 24 | v_set = list(map(lambda x: x+padding, set(v_list))) 25 | 26 | G.add_nodes_from(u_set, bipartite=0) 27 | G.add_nodes_from(v_set, bipartite=1) 28 | 29 | for i in range(len(u_list)): 30 | G.add_edge(u_list[i], v_list[i]+padding) 31 | return G 32 | 33 | 34 | def generate_network_with_name(path): 35 | with open(path, 'r') as f: 36 | lines = f.readlines() 37 | u_list = [] 38 | v_list = [] 39 | for l in lines: 40 | l = l[:-1] 41 | u, v = l.split('\t') 42 | u_list.append(u) 43 | v_list.append(v) 44 | 45 | G = nx.Graph() 46 | u_set = list(set(u_list)) 47 | v_set = list(set(v_list)) 48 | 49 | G.add_nodes_from(u_set, bipartite=0) 50 | G.add_nodes_from(v_set, bipartite=1) 51 | 52 | for i in range(len(u_list)): 53 | G.add_edge(u_list[i], v_list[i]) 54 | return G 55 | -------------------------------------------------------------------------------- /READMEja.md: -------------------------------------------------------------------------------- 1 | # BiMLPA 2 | "BiMLPA: Community Detection in Bipartite Networks by Multi-Label Propagation", NetSci-X 2020 https://link.springer.com/chapter/10.1007/978-3-030-38965-9_2 3 | 4 | [マルチラベル伝搬法を用いた二部ネットワークからのコミュニティ抽出(JSAI 2019)](https://confit.atlas.jp/guide/event-img/jsai2019/4B2-J-3-02/public/pdf?type=in) 5 | 6 | 二部ネットワークにおける,多対多対応のコミュニティ抽出を行うプログラムです. 7 | 8 | ## インストール 9 | 10 | pip経由でインストールする場合: 11 | ```bash 12 | sudo pip install bimlpa 13 | ``` 14 | 15 | レポジトリからlatest versionをインストールしたい場合: 16 | ```bash 17 | sudo pip install git+https://github.com/marblet/BiMLPA 18 | ``` 19 | 20 | ## 使い方 21 | 22 | ```python 23 | from BiMLPA import * 24 | import networkx as nx 25 | 26 | G = generate_network('BiMLPA/test/southernwomen.net') 27 | 28 | # The parameters are set to theta=0.3, lambda=7 29 | bimlpa = BiMLPA_SqrtDeg(G, 0.3, 7) 30 | bimlpa.start() 31 | relabeling(G) 32 | top_coms, bottom_coms = output_community(G) 33 | 34 | # If the community structure is known, the normalized mutual information score can be calculated 35 | # using calc_NMI by assigning the correct community number to the attribute 'community' of the node. 36 | community = {i+1: 0 for i in range(9)} 37 | community.update({i+10: 1 for i in range(9)}) 38 | community.update({i+19: 2 for i in range(6)}) 39 | community.update({i+25: 3 for i in range(3)}) 40 | community.update({i+28: 4 for i in range(5)}) 41 | nx.set_node_attributes(G, name='community', values=community) 42 | 43 | print('NMI : ', calc_NMI(G)) 44 | ``` 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BiMLPA 2 | "BiMLPA: Community Detection in Bipartite Networks by Multi-Label Propagation", NetSci-X 2020 3 | https://link.springer.com/chapter/10.1007/978-3-030-38965-9_2 4 | 5 | This is the implementation of BiMLPA. 6 | BiMLPA is to detect the many-to-many correspondence community in bipartite networks using multi-label propagation algorithm. 7 | 日本語版READMEは[こちら](https://github.com/hbkt/BiMLPA/blob/master/READMEja.md) 8 | 9 | ## Installation 10 | 11 | In order to install the package just download (or clone) the current project and copy the demon folder in the root of your application. 12 | 13 | Alternatively use pip: 14 | ```bash 15 | sudo pip install bimlpa 16 | ``` 17 | 18 | If you like to install the latest version of the package from the repository use: 19 | ```bash 20 | sudo pip install git+https://github.com/marblet/BiMLPA 21 | ``` 22 | 23 | 24 | ## Usage 25 | 26 | ```python 27 | from BiMLPA import * 28 | import networkx as nx 29 | 30 | G = generate_network('BiMLPA/test/southernwomen.net') 31 | 32 | # The parameters are set to theta=0.3, lambda=7 33 | bimlpa = BiMLPA_SqrtDeg(G, 0.3, 7) 34 | bimlpa.start() 35 | relabeling(G) 36 | top_coms, bottom_coms = output_community(G) 37 | 38 | # If the community structure is known, the normalized mutual information score can be calculated 39 | # using calc_NMI by assigning the correct community number to the attribute 'community' of the node. 40 | community = {i+1: 0 for i in range(9)} 41 | community.update({i+10: 1 for i in range(9)}) 42 | community.update({i+19: 2 for i in range(6)}) 43 | community.update({i+25: 3 for i in range(3)}) 44 | community.update({i+28: 4 for i in range(5)}) 45 | nx.set_node_attributes(G, name='community', values=community) 46 | 47 | print('NMI : ', calc_NMI(G)) 48 | ``` 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | # from codecs import open 3 | # from os import path 4 | 5 | __author__ = '' 6 | __license__ = "BSD-2-Clause" 7 | __email__ = "" 8 | 9 | # here = path.abspath(path.dirname(__file__)) 10 | 11 | # Get the long description from the README file 12 | # with open(path.join(here, 'README.md'), encoding='utf-8') as f: 13 | # long_description = f.read() 14 | 15 | 16 | setup(name='bimlpa', 17 | version='0.1.2', 18 | license='BSD-Clause-2', 19 | description='Community detection in bipartite networks using multi-label propagation algorithm', 20 | url='https://github.com/hbkt/BiMLPA', 21 | author=['Hibiki Taguchi'], 22 | author_email='', 23 | classifiers=[ 24 | # How mature is this project? Common values are 25 | # 3 - Alpha 26 | # 4 - Beta 27 | # 5 - Production/Stable 28 | 'Development Status :: 5 - Production/Stable', 29 | 30 | # Indicate who your project is intended for 31 | 'Intended Audience :: Developers', 32 | 'Topic :: Software Development :: Build Tools', 33 | 34 | # Pick your license as you wish (should match "license" above) 35 | 'License :: OSI Approved :: BSD License', 36 | 37 | "Operating System :: OS Independent", 38 | 39 | # Specify the Python versions you support here. In particular, ensure 40 | # that you indicate whether you support Python 2, Python 3 or both. 41 | 'Programming Language :: Python :: 3' 42 | ], 43 | keywords='complex-networks community-discovery labeled-graph', 44 | install_requires=['scikit-learn', 'matplotlib', 'networkx', ''], 45 | packages=find_packages(exclude=["*.test", "*.test.*", "test.*", "test", "BiMLPA.test", "BiMLPA.test.*"]), 46 | ) 47 | -------------------------------------------------------------------------------- /BiMLPA/community.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from collections import Counter 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def draw_community(G, pos=None): 7 | if pos is None: 8 | top = {n for n, d in G.nodes(data=True) if d['bipartite'] == 0} 9 | bottom = set(G) - top 10 | bottom = list(bottom) 11 | bottom.sort() 12 | 13 | c = Counter(nx.get_node_attributes(G, 'label').values()) 14 | num_of_label = max(nx.get_node_attributes(G, 'label').values()) + 1 15 | 16 | pos = dict() 17 | pos_label = [0] * num_of_label 18 | for i in range(1, num_of_label): 19 | pos_label[i] = pos_label[i-1] + c[i-1] 20 | for v in top: 21 | label = G.nodes[v]['label'] 22 | pos[v] = (label + pos_label[label], 1) 23 | pos_label[label] += 1 24 | pos_label = [0] * num_of_label 25 | for i in range(1, num_of_label): 26 | pos_label[i] = pos_label[i-1] + c[i-1] 27 | for v in bottom: 28 | label = G.nodes[v]['label'] 29 | pos[v] = (label + pos_label[label], 0) 30 | pos_label[label] += 1 31 | 32 | color = [d['label'] for node, d in G.nodes(data=True)] 33 | 34 | nx.draw_networkx(G, pos, node_color=color) 35 | plt.tick_params(labelbottom=False, labelleft=False, labelright=False, labeltop=False) 36 | plt.gca().spines['right'].set_visible(False) 37 | plt.gca().spines['top'].set_visible(False) 38 | plt.gca().spines['left'].set_visible(False) 39 | plt.gca().spines['bottom'].set_visible(False) 40 | plt.tick_params(length=0) 41 | 42 | plt.show() 43 | 44 | 45 | def number_of_communities(G): 46 | c_top = Counter([d['label'] for n, d in G.nodes(data=True) if d['bipartite'] == 0]) 47 | c_bottom = Counter([d['label'] for n, d in G.nodes(data=True) if d['bipartite'] == 1]) 48 | return len(c_top), len(c_bottom) 49 | -------------------------------------------------------------------------------- /BiMLPA/utils.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | from sklearn.metrics import normalized_mutual_info_score as NMI 3 | 4 | 5 | def relabeling(G): 6 | node2label = nx.get_node_attributes(G, 'label') 7 | labels = [list(l.keys()) for l in node2label.values()] 8 | for l in labels: 9 | l.sort() 10 | labels = list(map(str, labels)) 11 | labels_set = set(labels) 12 | labels_dict = dict({s: i for i, s in enumerate(labels_set, start=1)}) 13 | 14 | new_labels = {v: labels_dict[labels[i]] for i, v in enumerate(node2label)} 15 | nx.set_node_attributes(G, new_labels, 'label') 16 | 17 | 18 | def calc_NMI(G): 19 | # top と bottom 両方を計算して返す 20 | top = {n: d for n, d in G.nodes(data=True) if d['bipartite'] == 0} 21 | bottom = {n: d for n, d in G.nodes(data=True) if d['bipartite'] == 1} 22 | pred_top = [] 23 | truth_top = [] 24 | for n, d in top.items(): 25 | pred_top.append(d['label']) 26 | truth_top.append(d['community']) 27 | pred_bottom = [] 28 | truth_bottom = [] 29 | for n, d in bottom.items(): 30 | pred_bottom.append(d['label']) 31 | truth_bottom.append(d['community']) 32 | return NMI(truth_top, pred_top, average_method='arithmetic'), \ 33 | NMI(truth_bottom, pred_bottom, average_method='arithmetic') 34 | 35 | 36 | def output_community(G): 37 | top = {n: d['label'] for n, d in G.nodes(data=True) if d['bipartite'] == 0} 38 | bottom = {n: d['label'] for n, d in G.nodes(data=True) if d['bipartite'] == 1} 39 | 40 | top_max = max(top.values()) 41 | bottom_max = max(bottom.values()) 42 | 43 | top_com_list = [[] for _ in range(top_max)] 44 | for k, v in top.items(): 45 | top_com_list[v - 1].append(k) 46 | 47 | bottom_com_list = [[] for _ in range(bottom_max)] 48 | for k, v in bottom.items(): 49 | bottom_com_list[v - 1].append(k) 50 | 51 | return top_com_list, bottom_com_list 52 | -------------------------------------------------------------------------------- /BiMLPA/modularity.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, Counter 2 | from itertools import combinations 3 | 4 | 5 | # calculate the modularity based on labels assigned each node 6 | def guimera_modularity(G): 7 | top = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 0} 8 | bottom = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 1} 9 | top_coms = defaultdict(set) 10 | bottom_coms = defaultdict(set) 11 | for v, d in top.items(): 12 | top_coms[d['label']].add(v) 13 | for v, d in bottom.items(): 14 | bottom_coms[d['label']].add(v) 15 | 16 | node2degree = dict(G.degree()) 17 | E = G.number_of_edges() 18 | Q_top = 0 19 | Q_bottom = 0 20 | Mb = 0 21 | for v in bottom: 22 | Mb += node2degree[v] * (node2degree[v] - 1) 23 | for _, nodes in top_coms.items(): 24 | for pair in combinations(nodes, 2): 25 | i, j = pair 26 | tmpQ = len(set(G.neighbors(i)) & set(G.neighbors(j))) / Mb 27 | tmpQ -= node2degree[i] * node2degree[j] / (E * E) 28 | Q_top += tmpQ 29 | 30 | Mt = 0 31 | for v in top: 32 | Mt += node2degree[v] * (node2degree[v] - 1) 33 | for _, nodes in bottom_coms.items(): 34 | for pair in combinations(nodes, 2): 35 | i, j = pair 36 | tmpQ = len(set(G.neighbors(i)) & set(G.neighbors(j))) / Mt 37 | tmpQ -= node2degree[i] * node2degree[j] / (E * E) 38 | Q_bottom += tmpQ 39 | 40 | return Q_top*2, Q_bottom*2 41 | 42 | 43 | def murata_modularity(G): 44 | top = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 0} 45 | bottom = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 1} 46 | top_coms = defaultdict(set) 47 | bottom_coms = defaultdict(set) 48 | 49 | for v, d in top.items(): 50 | top_coms[d['label']].add(v) 51 | for v, d in bottom.items(): 52 | bottom_coms[d['label']].add(v) 53 | 54 | topC_to_bottomC = dict() 55 | topC_to_V = dict() 56 | bottomC_to_topC = dict() 57 | bottomC_to_V = dict() 58 | 59 | for c, v in top_coms.items(): 60 | c_count = Counter() 61 | for u in v: 62 | for neig in G.neighbors(u): 63 | c_count.update({G.nodes[neig]['label']: 1}) 64 | topC_to_bottomC[c] = c_count 65 | topC_to_V[c] = sum(c_count.values()) 66 | for c, v in bottom_coms.items(): 67 | c_count = Counter() 68 | for u in v: 69 | for neig in G.neighbors(u): 70 | c_count.update({G.nodes[neig]['label']: 1}) 71 | bottomC_to_topC[c] = c_count 72 | bottomC_to_V[c] = sum(c_count.values()) 73 | 74 | E = G.number_of_edges() 75 | Q_top = 0 76 | Q_bottom = 0 77 | # top -> bottom 78 | for Ck, coms in topC_to_bottomC.items(): 79 | Cl = max(coms, key=coms.get) 80 | Q_top += (coms[Cl] / (2 * E) - topC_to_V[Ck] * bottomC_to_V[Cl] / (4 * E * E)) 81 | # bottom -> top 82 | for Ck, coms in bottomC_to_topC.items(): 83 | Cl = max(coms, key=coms.get) 84 | Q_bottom += (coms[Cl] / (2 * E) - bottomC_to_V[Ck] * topC_to_V[Cl] / (4 * E * E)) 85 | return Q_top + Q_bottom 86 | 87 | 88 | def suzuki_modularity(G): 89 | top = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 0} 90 | bottom = {v: d for v, d in G.nodes(data=True) if d['bipartite'] == 1} 91 | top_coms = defaultdict(set) 92 | bottom_coms = defaultdict(set) 93 | 94 | for v, d in top.items(): 95 | top_coms[d['label']].add(v) 96 | for v, d in bottom.items(): 97 | bottom_coms[d['label']].add(v) 98 | 99 | topC_to_bottomC = dict() 100 | topC_to_V = dict() 101 | bottomC_to_topC = dict() 102 | bottomC_to_V = dict() 103 | 104 | for c, v in top_coms.items(): 105 | c_count = Counter() 106 | for u in v: 107 | for neig in G.neighbors(u): 108 | c_count.update({G.nodes[neig]['label']: 1}) 109 | topC_to_bottomC[c] = c_count 110 | topC_to_V[c] = sum(c_count.values()) 111 | for c, v in bottom_coms.items(): 112 | c_count = Counter() 113 | for u in v: 114 | for neig in G.neighbors(u): 115 | c_count.update({G.nodes[neig]['label']: 1}) 116 | bottomC_to_topC[c] = c_count 117 | bottomC_to_V[c] = sum(c_count.values()) 118 | 119 | E = G.number_of_edges() 120 | Q_top = 0 121 | Q_bottom = 0 122 | # top -> bottom 123 | for Ck, coms in topC_to_bottomC.items(): 124 | Ck_to_V = topC_to_V[Ck] 125 | for Cl, cnt in coms.items(): 126 | tmpQ = cnt/E - Ck_to_V * bottomC_to_V[Cl] / (E*E) 127 | tmpQ *= (cnt / Ck_to_V) 128 | Q_top += tmpQ 129 | # bottom -> top 130 | for Ck, coms in bottomC_to_topC.items(): 131 | Ck_to_V = bottomC_to_V[Ck] 132 | for Cl, cnt in coms.items(): 133 | tmpQ = cnt/E - Ck_to_V * topC_to_V[Cl]/(E*E) 134 | tmpQ *= (cnt / Ck_to_V) 135 | Q_bottom += tmpQ 136 | Q = (Q_top + Q_bottom) / 2 137 | return Q 138 | -------------------------------------------------------------------------------- /BiMLPA/bimlpa.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | 4 | from collections import Counter 5 | from math import sqrt 6 | from random import choice 7 | 8 | 9 | class BiMLPA(object): 10 | def __init__(self, G, threshold, max_prop_label, max_MM_iter=100, max_MS_iter=100): 11 | self.G = G 12 | self.threshold = threshold 13 | self.max_prop_label = max_prop_label 14 | self.max_MM_iter = max_MM_iter 15 | self.max_MS_iter = max_MS_iter 16 | 17 | def _initialize(self): 18 | G = self.G 19 | top = {n for n, d in G.nodes(data=True) if d['bipartite'] == 0} 20 | bottom = set(G) - top 21 | if len(top) >= len(bottom): 22 | self.red = top 23 | self.blue = bottom 24 | else: 25 | self.red = bottom 26 | self.blue = top 27 | for i, v in enumerate(self.red): 28 | G.nodes[v]['label'] = {i+1: 1} 29 | for v in self.blue: 30 | G.nodes[v]['label'] = {} 31 | 32 | def _label_to_list(self, propagaters): 33 | # propagaterが持つラベル数をmax_prop_label以下に 34 | # ラベルの重みの降順でソート 35 | node2label = dict(nx.get_node_attributes(self.G, 'label')) 36 | for node in propagaters: 37 | label = node2label[node] 38 | l = list(label.keys()) 39 | r = list(label.values()) 40 | if len(label) > self.max_prop_label: 41 | index = np.argsort(r)[::-1][:self.max_prop_label] 42 | new_label = [[l[i] for i in index], [r[j] for j in index]] 43 | else: 44 | new_label = [l, r] 45 | node2label[node] = new_label 46 | return node2label 47 | 48 | def _sum_label_ratio(self, label_freq, u, node2label): 49 | neighbor = self.G.neighbors(u) 50 | for v in neighbor: 51 | label_index, label_ratio = node2label[v] 52 | for i in range(len(label_index)): 53 | label_freq.update({label_index[i]: label_ratio[i]}) 54 | 55 | def _propagate_multi_labels(self, receivers): 56 | G = self.G 57 | convergence = True 58 | propagaters = set(G) - set(receivers) 59 | node2label = self._label_to_list(propagaters) 60 | 61 | # 各ノード、neighborからラベルを取得しthresholdを超えたラベルのみ取得 62 | for u in receivers: 63 | old_label = node2label[u] 64 | label_freq = Counter() 65 | self._sum_label_ratio(label_freq, u, node2label) 66 | 67 | freq_max = max(label_freq.values()) 68 | new_labels = {label: freq for label, freq in label_freq.items() if freq/freq_max >= self.threshold} 69 | freq_sum = sum(new_labels.values()) 70 | new_labels = {label: new_labels[label]/freq_sum for label in new_labels} 71 | G.nodes[u]['label'] = new_labels 72 | if convergence and (old_label.keys() != new_labels.keys()): 73 | convergence = False 74 | return convergence 75 | 76 | def _propagate_single_label(self, receivers): 77 | G = self.G 78 | convergence = True 79 | propagaters = set(G) - set(receivers) 80 | node2label = self._label_to_list(propagaters) 81 | 82 | for u in receivers: 83 | old_label = node2label[u] 84 | label_freq = Counter() 85 | self._sum_label_ratio(label_freq, u, node2label) 86 | 87 | freq_max = max(label_freq.values()) 88 | candidate = [label for label, freq in label_freq.items() if freq == freq_max] 89 | new_label = {choice(candidate): 1} 90 | G.nodes[u]['label'] = new_label 91 | if convergence and old_label != new_label: 92 | convergence = False 93 | return convergence 94 | 95 | def _multi_multi_LP(self): 96 | # Multi Multi LP 97 | for _ in range(self.max_MM_iter): 98 | conv_blue = self._propagate_multi_labels(self.blue) 99 | conv_red = self._propagate_multi_labels(self.red) 100 | if conv_blue and conv_red: 101 | break 102 | 103 | def _multi_single_LP(self): 104 | # Multi Single LP 105 | for _ in range(self.max_MS_iter): 106 | conv_blue = self._propagate_multi_labels(self.blue) 107 | conv_red = self._propagate_single_label(self.red) 108 | if conv_blue and conv_red: 109 | break 110 | 111 | def start(self): 112 | self._initialize() 113 | self._multi_multi_LP() 114 | self._multi_single_LP() 115 | 116 | 117 | class BiMLPA_SqrtDeg(BiMLPA): 118 | def __init__(self, G, threshold, max_prop_label, max_MM_iter=100, max_MS_iter=100): 119 | super().__init__(G, threshold, max_prop_label, max_MM_iter, max_MS_iter) 120 | self.node2degree = dict(G.degree()) 121 | 122 | def _label_to_list(self, propagaters): 123 | node2label = dict(nx.get_node_attributes(self.G, 'label')) 124 | for node in propagaters: 125 | d_sqrt = sqrt(self.node2degree[node]) 126 | label = node2label[node] 127 | l = list(label.keys()) 128 | r = list(label.values()) 129 | if len(label) > self.max_prop_label: 130 | index = np.argsort(r)[::-1][:self.max_prop_label] 131 | new_label = [[l[i] for i in index], [r[j]/d_sqrt for j in index]] 132 | else: 133 | r = [ratio/d_sqrt for ratio in r] 134 | new_label = [l, r] 135 | node2label[node] = new_label 136 | return node2label 137 | 138 | 139 | class BiMLPA_EdgeProb(BiMLPA): 140 | def __init__(self, G, threshold, max_prop_label, max_MM_iter=100, max_MS_iter=100): 141 | super().__init__(G, threshold, max_prop_label, max_MM_iter, max_MS_iter) 142 | self.node2degree = dict(G.degree()) 143 | self.M = G.number_of_edges() 144 | 145 | def _sum_label_ratio(self, label_freq, u, node2label): 146 | neighbor = self.G.neighbors(u) 147 | d_u = self.node2degree[u] 148 | for v in neighbor: 149 | label_index, label_ratio = node2label[v] 150 | d_v = self.node2degree[v] 151 | for i in range(len(label_index)): 152 | label_freq.update({label_index[i]: label_ratio[i]*(1-d_u*d_v/self.M)}) 153 | --------------------------------------------------------------------------------