├── .idea ├── .gitignore ├── CommunityDetection.iml ├── inspectionProfiles │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── BIGCLAM.py ├── CORPA.py ├── CPM.py ├── FN.py ├── GN.py ├── Infomap.py ├── KL.py ├── LFM.py ├── LPA.py ├── Leiden.py ├── Louvain.py ├── Networkx_Pyg.ipynb ├── OSLOM.py ├── PYG.ipynb ├── README.md ├── SCAN.py ├── SLPA.py ├── SLPA_V2.py ├── SpectralClustering.py ├── Walktrap.py ├── copra.py ├── craw_dblp.py ├── data ├── OpenFlights.txt ├── club.txt ├── dolphin.txt ├── dummy.txt ├── football.txt ├── google.txt └── louvain_example.txt └── 论文.xls /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /../../../../:\APTX-4869\CommunityDetection\.idea/dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/CommunityDetection.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /BIGCLAM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | 4 | 5 | def sigm(x): 6 | # sigmoid操作 求梯度会用到 7 | # numpy.divide数组对应位置元素做除法。 8 | return np.divide(np.exp(-1. * x), 1. - np.exp(-1. * x)) 9 | 10 | 11 | def log_likelihood(F, A): 12 | # 代入计算公式计算log似然度 13 | A_soft = F.dot(F.T) 14 | 15 | # 用邻接矩阵可以帮助我们只取到相邻的两个节点 16 | FIRST_PART = A * np.log(1. - np.exp(-1. * A_soft)) 17 | sum_edges = np.sum(FIRST_PART) 18 | # 1-A取的不相邻的节点 19 | SECOND_PART = (1 - A) * A_soft 20 | sum_nedges = np.sum(SECOND_PART) 21 | 22 | log_likeli = sum_edges - sum_nedges 23 | return log_likeli 24 | 25 | 26 | def gradient(F, A, i): 27 | # 代入公式计算梯度值 28 | N, C = F.shape 29 | 30 | # 通过邻接矩阵找到相邻 和 不相邻节点 31 | neighbours = np.where(A[i]) 32 | nneighbours = np.where(1 - A[i]) 33 | 34 | # 公式第一部分 35 | sum_neigh = np.zeros((C,)) 36 | for nb in neighbours[0]: 37 | dotproduct = F[nb].dot(F[i]) 38 | sum_neigh += F[nb] * sigm(dotproduct) 39 | 40 | # 公式第二部分 41 | sum_nneigh = np.zeros((C,)) 42 | # Speed up this computation using eq.4 43 | for nnb in nneighbours[0]: 44 | sum_nneigh += F[nnb] 45 | 46 | grad = sum_neigh - sum_nneigh 47 | return grad 48 | 49 | 50 | def train(A, C, iterations=100): 51 | # 初始化F 52 | N = A.shape[0] 53 | F = np.random.rand(N, C) 54 | 55 | # 梯度下降最优化F 56 | for n in range(iterations): 57 | for person in range(N): 58 | grad = gradient(F, A, person) 59 | 60 | F[person] += 0.005 * grad 61 | 62 | F[person] = np.maximum(0.001, F[person]) # F应该大于0 63 | ll = log_likelihood(F, A) 64 | print('At step %5i/%5i ll is %5.3f' % (n, iterations, ll)) 65 | return F 66 | 67 | 68 | # 加载图数据集 69 | def load_graph(path): 70 | G = nx.Graph() 71 | with open(path, 'r') as text: 72 | for line in text: 73 | vertices = line.strip().split(' ') 74 | source = int(vertices[0]) 75 | target = int(vertices[1]) 76 | G.add_edge(source, target) 77 | return G 78 | 79 | 80 | if __name__ == "__main__": 81 | # adj = np.load('data/adj.npy') 82 | G = load_graph('data/club.txt') 83 | # adj = np.array(nx.adjacency_matrix(G).todense()) 84 | adj = nx.to_numpy_array(G) # 邻接矩阵 85 | 86 | F = train(adj, 4) 87 | F_argmax = np.argmax(F, 1) 88 | 89 | for i, row in enumerate(F): 90 | print(row) 91 | -------------------------------------------------------------------------------- /CORPA.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import random 3 | import time 4 | import numpy as np 5 | import networkx as nx 6 | import matplotlib.pyplot as plt 7 | from collections import Counter 8 | 9 | 10 | class COPRA: 11 | def __init__(self, G, T, v): 12 | """ 13 | :param G:图本身 14 | :param T: 迭代次数T 15 | :param r:满足社区次数要求的阈值r 16 | """ 17 | self._G = G 18 | self._n = len(G.nodes(False)) # 节点数目 19 | self._T = T 20 | self._v = v 21 | 22 | def execute(self): 23 | # 建立成员标签记录 24 | # 节点将被分配隶属度大于阈值的社区标签 25 | lablelist = {i: {i: 1} for i in self._G.nodes()} 26 | for t in range(self._T): 27 | visitlist = list(self._G.nodes()) 28 | # 随机排列遍历顺序 29 | np.random.shuffle(visitlist) 30 | # 开始遍历节点 31 | for visit in visitlist: 32 | temp_count = 0 33 | temp_label = {} 34 | total = len(self._G[visit]) 35 | # 根据邻居利用公式计算标签 36 | for i in self._G.neighbors(visit): 37 | res = {key: value / total for key, value in lablelist[i].items()} 38 | temp_label = dict(Counter(res) + Counter(temp_label)) 39 | temp_count = len(temp_label) 40 | temp_label2 = temp_label.copy() 41 | for key, value in list(temp_label.items()): 42 | if value < 1 / self._v: 43 | del temp_label[key] 44 | temp_count -= 1 45 | # 如果一个节点中所有的标签都低于阈值就随机选择一个 46 | if temp_count == 0: 47 | # temp_label = {} 48 | # v = self._v 49 | # if self._v > len(temp_label2): 50 | # v = len(temp_label2) 51 | # b = random.sample(temp_label2.keys(), v) 52 | # tsum = 0.0 53 | # for i in b: 54 | # tsum += temp_label2[i] 55 | # temp_label = {i: temp_label2[i]/tsum for i in b} 56 | b = random.sample(temp_label2.keys(), 1) 57 | temp_label = {b[0]: 1} 58 | # 否则标签个数一定小于等于v个 进行归一化即可 59 | else: 60 | tsum = sum(temp_label.values()) 61 | temp_label = {key: value / tsum for key, value in temp_label.items()} 62 | lablelist[visit] = temp_label 63 | 64 | communities = collections.defaultdict(lambda: list()) 65 | # 扫描lablelist中的记录标签,相同标签的节点加入同一个社区中 66 | for primary, change in lablelist.items(): 67 | for label in change.keys(): 68 | communities[label].append(primary) 69 | # 返回值是个数据字典,value以集合的形式存在 70 | return communities.values() 71 | 72 | 73 | def cal_EQ(cover, G): 74 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 75 | # 存储每个节点所在的社区 76 | vertex_community = collections.defaultdict(lambda: set()) 77 | # i为社区编号(第几个社区) c为该社区中拥有的节点 78 | for i, c in enumerate(cover): 79 | # v为社区中的某一个节点 80 | for v in c: 81 | # 根据节点v统计他所在的社区i有哪些 82 | vertex_community[v].add(i) 83 | total = 0.0 84 | for c in cover: 85 | for i in c: 86 | # o_i表示i节点所同时属于的社区数目 87 | o_i = len(vertex_community[i]) 88 | # k_i表示i节点的度数(所关联的边数) 89 | k_i = len(G[i]) 90 | for j in c: 91 | t = 0.0 92 | # o_j表示j节点所同时属于的社区数目 93 | o_j = len(vertex_community[j]) 94 | # k_j表示j节点的度数(所关联的边数) 95 | k_j = len(G[j]) 96 | if G.has_edge(i, j): 97 | t += 1.0 / (o_i * o_j) 98 | t -= k_i * k_j / (2 * m * o_i * o_j) 99 | total += t 100 | return round(total / (2 * m), 4) 101 | 102 | 103 | def cal_Q(partition, G): # 计算Q 104 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 105 | # print(G.edges(None,False)) 106 | # print("=======6666666") 107 | a = [] 108 | e = [] 109 | for community in partition: # 把每一个联通子图拿出来 110 | t = 0.0 111 | for node in community: # 找出联通子图的每一个顶点 112 | t += len([x for x in G.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 113 | a.append(t / (2 * m)) 114 | # self.zidian[t/(2*m)]=community 115 | for community in partition: 116 | t = 0.0 117 | for i in range(len(community)): 118 | for j in range(len(community)): 119 | if (G.has_edge(community[i], community[j])): 120 | t += 1.0 121 | e.append(t / (2 * m)) 122 | 123 | q = 0.0 124 | for ei, ai in zip(e, a): 125 | q += (ei - ai ** 2) 126 | return q 127 | 128 | 129 | def load_graph(path): 130 | G = nx.Graph() 131 | with open(path, 'r') as text: 132 | for line in text: 133 | vertices = line.strip().split(' ') 134 | source = int(vertices[0]) 135 | target = int(vertices[1]) 136 | G.add_edge(source, target) 137 | return G 138 | 139 | 140 | if __name__ == '__main__': 141 | # G = nx.karate_club_graph() 142 | G = load_graph('data/dolphin.txt') 143 | start_time = time.time() 144 | algorithm = COPRA(G, 20, 3) 145 | 146 | communities = algorithm.execute() 147 | end_time = time.time() 148 | for i, community in enumerate(communities): 149 | print(i, community) 150 | 151 | print(cal_EQ(communities, G)) 152 | print(f'算法执行时间{end_time - start_time}') -------------------------------------------------------------------------------- /CPM.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import time 3 | import numpy as np 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def load_graph(path): 9 | G = nx.Graph() 10 | with open(path, 'r') as text: 11 | for line in text: 12 | vertices = line.strip().split(' ') 13 | source = int(vertices[0]) 14 | target = int(vertices[1]) 15 | G.add_edge(source, target) 16 | return G 17 | 18 | 19 | def get_percolated_cliques(G, k): 20 | cliques = list(frozenset(c) for c in nx.find_cliques(G) if len(c) >= k) # 找出所有大于k的最大k-派系 21 | 22 | # print(cliques) 23 | matrix = np.zeros((len(cliques), len(cliques))) # 构造全0的重叠矩阵 24 | # print(matrix) 25 | for i in range(len(cliques)): 26 | for j in range(len(cliques)): 27 | if i == j: # 将对角线值大于等于k的值设为1,否则设0 28 | n = len(cliques[i]) 29 | if n >= k: 30 | matrix[i][j] = 1 31 | else: 32 | matrix[i][j] = 0 33 | else: # 将非对角线值大于等于k的值设为1,否则设0 34 | n = len(cliques[i].intersection(cliques[j])) 35 | if n >= k - 1: 36 | matrix[i][j] = 1 37 | else: 38 | matrix[i][j] = 0 39 | 40 | # print(matrix) 41 | # for i in matrix: 42 | # print(i) 43 | 44 | # l = [-1]*len(cliques) 45 | l = list(range(len(cliques))) # l(社区号)用来记录每个派系的连接情况,连接的话值相同 46 | for i in range(len(matrix)): 47 | for j in range(len(matrix)): 48 | if matrix[i][j] == 1 and i != j: # 矩阵值等于1代表,行派系与列派系相连,让l中的行列派系社区号变一致 49 | l[j] = l[i] # 让列派系与行派系社区号相同(划分为一个社区) 50 | # print(l) 51 | q = [] # 用来保存有哪些社区号 52 | for i in l: 53 | if i not in q: # 每个号只取一次 54 | q.append(i) 55 | # print(q) 56 | 57 | p = [] # p用来保存所有社区 58 | for i in q: 59 | print(frozenset.union(*[cliques[j] for j in range(len(l)) if l[j] == i])) # 每个派系的节点取并集获得社区节点 60 | p.append(list(frozenset.union(*[cliques[j] for j in range(len(l)) if l[j] == i]))) 61 | return p 62 | 63 | 64 | def cal_Q(partition, G): # 计算Q 65 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 66 | # print(G.edges(None,False)) 67 | # print("=======6666666") 68 | a = [] 69 | e = [] 70 | for community in partition: # 把每一个联通子图拿出来 71 | t = 0.0 72 | for node in community: # 找出联通子图的每一个顶点 73 | t += len([x for x in G.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 74 | a.append(t / (2 * m)) 75 | # self.zidian[t/(2*m)]=community 76 | for community in partition: 77 | t = 0.0 78 | for i in range(len(community)): 79 | for j in range(len(community)): 80 | if (G.has_edge(community[i], community[j])): 81 | t += 1.0 82 | e.append(t / (2 * m)) 83 | 84 | q = 0.0 85 | for ei, ai in zip(e, a): 86 | q += (ei - ai ** 2) 87 | return q 88 | 89 | 90 | def cal_EQ(cover, G): 91 | # 存储每个节点所在的社区 92 | vertex_community = collections.defaultdict(lambda: set()) 93 | # i为社区编号(第几个社区) c为该社区中拥有的节点 94 | for i, c in enumerate(cover): 95 | # v为社区中的某一个节点 96 | for v in c: 97 | # 根据节点v统计他所在的社区i有哪些 98 | vertex_community[v].add(i) 99 | 100 | m = 0.0 101 | for v in G.nodes(): 102 | for n in G.neighbors(v): 103 | if v > n: 104 | m += 1 105 | 106 | # m = len(G.edges(None, False)) 107 | 108 | total = 0.0 109 | # 遍历社区 110 | for c in cover: 111 | # 遍历社区中的节点i 112 | for i in c: 113 | # o_i表示i节点所同时属于的社区数目 114 | o_i = len(vertex_community[i]) 115 | # k_i表示i节点的度数(所关联的边数) 116 | k_i = len(G[i]) 117 | # 遍历社区中的节点j 118 | for j in c: 119 | # o_j表示j节点所同时属于的社区数目 120 | o_j = len(vertex_community[j]) 121 | # k_j表示j节点的度数(所关联的边数) 122 | k_j = len(G[j]) 123 | # 对称情况后面乘以2就行 124 | if i > j: 125 | continue 126 | t = 0.0 127 | # 计算公式前半部分 即相邻的点除以重叠度 128 | if j in G[i]: 129 | t += 1.0 / (o_i * o_j) 130 | # 计算公式后半部分 131 | t -= k_i * k_j / (2 * m * o_i * o_j) 132 | if i == j: 133 | total += t 134 | else: 135 | total += 2 * t 136 | 137 | return round(total / (2 * m), 4) 138 | 139 | 140 | def cal_EQ2(cover, G): 141 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 142 | # 存储每个节点所在的社区 143 | vertex_community = collections.defaultdict(lambda: set()) 144 | # i为社区编号(第几个社区) c为该社区中拥有的节点 145 | for i, c in enumerate(cover): 146 | # v为社区中的某一个节点 147 | for v in c: 148 | # 根据节点v统计他所在的社区i有哪些 149 | vertex_community[v].add(i) 150 | total = 0.0 151 | for c in cover: 152 | for i in c: 153 | # o_i表示i节点所同时属于的社区数目 154 | o_i = len(vertex_community[i]) 155 | # k_i表示i节点的度数(所关联的边数) 156 | k_i = len(G[i]) 157 | for j in c: 158 | t = 0.0 159 | # o_j表示j节点所同时属于的社区数目 160 | o_j = len(vertex_community[j]) 161 | # k_j表示j节点的度数(所关联的边数) 162 | k_j = len(G[j]) 163 | if G.has_edge(i, j): 164 | t += 1.0 / (o_i * o_j) 165 | t -= k_i * k_j / (2 * m * o_i * o_j) 166 | total += t 167 | return round(total / (2 * m), 4) 168 | 169 | 170 | def add_group(p, G): 171 | num = 0 172 | nodegroup = {} 173 | for partition in p: 174 | for node in partition: 175 | nodegroup[node] = {'group': num} 176 | num = num + 1 177 | nx.set_node_attributes(G, nodegroup) 178 | 179 | 180 | def setColor(G): 181 | color_map = [] 182 | color = ['red', 'green', 'yellow', 'pink', 'blue', 'grey', 'white', 'khaki', 'peachpuff', 'brown'] 183 | for i in G.nodes.data(): 184 | if 'group' not in i[1]: 185 | color_map.append(color[9]) 186 | else: 187 | color_map.append(color[i[1]['group']]) 188 | return color_map 189 | 190 | 191 | # G = load_graph('data/club.txt') 192 | G = load_graph('data/dolphin.txt') 193 | start_time = time.time() 194 | p = get_percolated_cliques(G, 3) 195 | end_time = time.time() 196 | # print(cal_Q(p, G)) 197 | print(cal_EQ(p, G)) 198 | # add_group(p, G) 199 | # nx.draw(G, with_labels=True, node_color=setColor(G)) 200 | # plt.show() 201 | print(f'算法执行时间{end_time - start_time}') -------------------------------------------------------------------------------- /FN.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | import time 5 | 6 | # def G_name2id(path): 7 | # ''' 8 | # 将graph的name转换为数字id(id从0开始) 9 | # 10 | # path:(源点、端点)文件路径 11 | # ''' 12 | # G = nx.read_edgelist(path, delimiter=',') 13 | # N = dict.fromkeys(G.nodes()) # node_id 14 | # i = 0 15 | # for name, id in N.items(): 16 | # N[name] = i 17 | # i = i+1 18 | # 19 | # G_n = nx.Graph() 20 | # with open(path,'r') as f: 21 | # lines = f.readlines() 22 | # for line in lines: 23 | # n = line.strip().split(',') 24 | # G_n.add_edge(N[n[0]],N[n[1]]) 25 | # 26 | # return G_n 27 | 28 | # 加载图数据集 29 | def load_graph(path): 30 | G = nx.Graph() 31 | with open(path, 'r') as text: 32 | for line in text: 33 | vertices = line.strip().split(' ') 34 | source = int(vertices[0]) 35 | target = int(vertices[1]) 36 | G.add_edge(source, target) 37 | return G 38 | 39 | 40 | class FastNewman: 41 | def __init__(self, path): 42 | self.G = load_graph(path) 43 | # G = nx.read_gml('dolphins.gml') 44 | self.A = nx.to_numpy_array(self.G) # 邻接矩阵 45 | self.num_node = len(self.A) # 点数 46 | self.num_edge = sum(sum(self.A)) # 边数 47 | self.c = {} # 记录所有Q值对应的社团分布 48 | 49 | # def merge_community(self, iter_num, detaQ, e, b): 50 | # # 一起合并容易出bug 查询的结果I在遍历过程中 可能在已经前面某次作为J被合并了 51 | # # 比如某次是[ 3, 11] [11, 54] 第一轮迭代中11被合并 第二轮54合并到旧的11中 会导致后面被删除 导致节点消失 需要将54合并到现在11所在位置 比较麻烦 不如一个个合并 52 | # b_num = sum([len(i) for i in b]) 53 | # det_max = np.amax(detaQ) 54 | # 55 | # (I, J) = np.where(detaQ == det_max) 56 | # print((I, J) ) 57 | # # 由于先遍历的I I中可能有多个相同值 所以合并时候因应该将J合并到I中 58 | # # 如果将I合并到J中 后续删除删不到 59 | # for m in range(len(I)): 60 | # # 确保J还未被合并 61 | # if J.tolist().index(J[m]) == m: 62 | # # 将第J合并到I 然后将J清零 63 | # e[I[m], :] = e[J[m], :] + e[I[m], :] 64 | # e[J[m], :] = 0 65 | # e[:, I[m]] = e[:, J[m]] + e[:, I[m]] 66 | # e[:, J[m]] = 0 67 | # b[I[m]] = b[I[m]] + b[J[m]] 68 | # 69 | # e = np.delete(e, J, axis=0) 70 | # e = np.delete(e, J, axis=1) 71 | # J = sorted(list(set(J)), reverse=True) 72 | # for j in J: 73 | # b.remove(b[j]) # 删除第J组社团,(都合并到I组中了) 74 | # b_num2 = sum([len(i) for i in b]) 75 | # if b_num2 != b_num: 76 | # print("111") 77 | # self.c[iter_num] = b.copy() 78 | # return e, b 79 | 80 | def merge_community(self, iter_num, detaQ, e, b): 81 | # 一个个合并 82 | (I, J) = np.where(detaQ == np.amax(detaQ)) 83 | # 由于先遍历的I I中可能有多个相同值 所以合并时候因应该将J合并到I中 84 | # 如果将I合并到J中 后续删除删不到 85 | e[I[0], :] = e[J[0], :] + e[I[0], :] 86 | e[J[0], :] = 0 87 | e[:, I[0]] = e[:, J[0]] + e[:, I[0]] 88 | e[:, J[0]] = 0 89 | b[I[0]] = b[I[0]] + b[J[0]] 90 | 91 | e = np.delete(e, J[0], axis=0) 92 | e = np.delete(e, J[0], axis=1) 93 | b.remove(b[J[0]]) # 删除第J组社团,(都合并到I组中了) 94 | self.c[iter_num] = b.copy() 95 | return e, b 96 | 97 | def Run_FN(self): 98 | e = self.A / self.num_edge # 社区i,j连边数量占总的边的比例 99 | a = np.sum(e, axis=0) # e的列和,表示与社区i中节点相连的边占总边数的比例 100 | b = [[i] for i in range(self.num_node)] # 本轮迭代的社团分布 101 | Q = [] 102 | iter_num = 0 103 | while len(e) > 1: 104 | num_com = len(e) 105 | detaQ = -np.power(10, 9) * np.ones((self.num_node, self.num_node)) # detaQ可能为负数,初始设为负无穷 106 | for i in range(num_com - 1): 107 | for j in range(i + 1, num_com): 108 | if e[i, j] != 0: 109 | detaQ[i, j] = 2 * (e[i, j] - a[i] * a[j]) 110 | if np.sum(detaQ + np.power(10, 9)) == 0: 111 | break 112 | 113 | e, b = self.merge_community(iter_num, detaQ, e, b) 114 | 115 | a = np.sum(e, axis=0) 116 | # 计算Q值 117 | Qt = 0.0 118 | for n in range(len(e)): 119 | Qt += e[n, n] - a[n] * a[n] 120 | Q.append(Qt) 121 | iter_num += 1 122 | max_Q, community = self.get_community(Q) 123 | return max_Q, community 124 | 125 | def get_community(self, Q): 126 | max_k = np.argmax(Q) 127 | community = self.c[max_k] 128 | return Q[max_k], community 129 | 130 | 131 | def showCommunity(G, partition, pos): 132 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 133 | cluster = {} 134 | labels = {} 135 | for index, item in enumerate(partition): 136 | for nodeID in item: 137 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 138 | cluster[nodeID] = index # 节点分区号 139 | 140 | # 可视化节点 141 | colors = ['r', 'g', 'b', 'y', 'm'] 142 | shapes = ['v', 'D', 'o', '^', '<'] 143 | for index, item in enumerate(partition): 144 | nx.draw_networkx_nodes(G, pos, nodelist=item, 145 | node_color=colors[index], 146 | node_shape=shapes[index], 147 | node_size=350, 148 | alpha=1) 149 | 150 | # 可视化边 151 | edges = {len(partition): []} 152 | for link in G.edges(): 153 | # cluster间的link 154 | if cluster[link[0]] != cluster[link[1]]: 155 | edges[len(partition)].append(link) 156 | else: 157 | # cluster内的link 158 | if cluster[link[0]] not in edges: 159 | edges[cluster[link[0]]] = [link] 160 | else: 161 | edges[cluster[link[0]]].append(link) 162 | 163 | for index, edgelist in enumerate(edges.values()): 164 | # cluster内 165 | if index < len(partition): 166 | nx.draw_networkx_edges(G, pos, 167 | edgelist=edgelist, 168 | width=1, alpha=0.8, edge_color=colors[index]) 169 | else: 170 | # cluster间 171 | nx.draw_networkx_edges(G, pos, 172 | edgelist=edgelist, 173 | width=3, alpha=0.8, edge_color=colors[index]) 174 | 175 | # 可视化label 176 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 177 | 178 | plt.axis('off') 179 | plt.show() 180 | 181 | 182 | # def get_value(G, community): 183 | # ''' 184 | # Each node gets respective value. Nodes in one community have same value 185 | # community: 形如 [[1,2,3],[4,5],[6,7,8]] 186 | # ''' 187 | # num_node = nx.number_of_nodes(G) 188 | # value = [[] for i in range(num_node)] 189 | # for index, com in enumerate(community): 190 | # for q in com: 191 | # value[q] = index 192 | # return value 193 | # 194 | # 195 | # def draw_community(G, com): 196 | # value = get_value(G, com) 197 | # pos = nx.spring_layout(G) 198 | # nx.draw_networkx_nodes(G, pos, cmap=plt.get_cmap('jet'), node_color=value, node_size=20) 199 | # nx.draw_networkx_edges(G, pos, edge_color='gray', alpha=0.5) 200 | # plt.show() 201 | # # plt.savefig('community.jpg') 202 | 203 | 204 | if __name__ == "__main__": 205 | start_time = time.time() 206 | Q, community = FastNewman('data/club.txt').Run_FN() 207 | print(Q) 208 | print(community) 209 | end_time = time.time() 210 | print(f'算法执行时间{end_time - start_time}') 211 | # end_time = time.time() 212 | G = load_graph('data/club.txt') 213 | pos = nx.spring_layout(G) 214 | showCommunity(G, community, pos) 215 | -------------------------------------------------------------------------------- /GN.py: -------------------------------------------------------------------------------- 1 | import time 2 | import networkx as nx 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | # 加载图数据集 7 | def load_graph(path): 8 | G = nx.Graph() 9 | with open(path, 'r') as text: 10 | for line in text: 11 | vertices = line.strip().split(' ') 12 | source = int(vertices[0]) 13 | target = int(vertices[1]) 14 | G.add_edge(source, target) 15 | return G 16 | 17 | 18 | # 克隆图数据集 19 | def cloned_graph(G): 20 | cloned_g = nx.Graph() 21 | for edge in G.edges(): 22 | cloned_g.add_edge(edge[0], edge[1]) 23 | return cloned_g 24 | 25 | 26 | # 计算模块度 27 | def cal_Q(partition, G): 28 | # m代表图中边的数目 29 | m = len(G.edges(None, False)) 30 | # 利用模块度的化简公式计算 31 | a = [] 32 | e = [] 33 | 34 | # a表示社区内部的点所关联的所有的边的数目与总边数的比例。 35 | for community in partition: 36 | t = 0.0 37 | for node in community: 38 | t += len(list(G.neighbors(node))) 39 | a.append(t/(2*m)) 40 | 41 | # e表示的是节点全在社区i内部中的边所占的比例 42 | for community in partition: 43 | t = 0.0 44 | for i in range(len(community)): 45 | for j in range(len(community)): 46 | if(G.has_edge(community[i], community[j])): 47 | t += 1.0 48 | e.append(t/(2*m)) 49 | 50 | q = 0.0 51 | for ei, ai in zip(e, a): 52 | q += (ei - ai**2) 53 | 54 | return q 55 | 56 | 57 | class GN: 58 | def __init__(self, G): 59 | self._G = G 60 | self._G_cloned = cloned_graph(G) 61 | # 初始划分情况为所有节点为一个社区 62 | self._partition = [[n for n in G.nodes()]] 63 | self._max_Q = 0.0 64 | 65 | def execute(self): 66 | while len(self._G.edges()) != 0: 67 | # 1.计算每一条边的边介数 68 | # nx.edge_betweenness返回边介数字典,items返回可遍历的(键, 值) 元组数组。这里每个item是((vi,vj), edge_betweenness)) 69 | # 因此我们取的item[1]最大的item,再取该最小item的[0],为我们要删除的两个点(即要删除的边) 70 | edge = max(nx.edge_betweenness_centrality(self._G).items(), 71 | key=lambda item: item[1])[0] 72 | # 2. 移去边介数最大的边 73 | self._G.remove_edge(edge[0], edge[1]) 74 | # 获得移去边后的子连通图 75 | components = [list(c) 76 | for c in list(nx.connected_components(self._G))] 77 | if len(components) != len(self._partition): 78 | # 3. 计算Q值 79 | cur_Q = cal_Q(components, self._G_cloned) 80 | if cur_Q > self._max_Q: 81 | self._max_Q = cur_Q 82 | self._partition = components 83 | print(self._max_Q) 84 | print(self._partition) 85 | return self._partition 86 | 87 | 88 | # 可视化划分结果 89 | def showCommunity(G, partition, pos): 90 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 91 | cluster = {} 92 | labels = {} 93 | for index, item in enumerate(partition): 94 | for nodeID in item: 95 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 96 | cluster[nodeID] = index # 节点分区号 97 | 98 | # 可视化节点 99 | colors = ['r', 'g', 'b', 'y', 'm'] 100 | shapes = ['v', 'D', 'o', '^', '<'] 101 | for index, item in enumerate(partition): 102 | nx.draw_networkx_nodes(G, pos, nodelist=item, 103 | node_color=colors[index], 104 | node_shape=shapes[index], 105 | node_size=350, 106 | alpha=1) 107 | 108 | # 可视化边 109 | edges = {len(partition): []} 110 | for link in G.edges(): 111 | # cluster间的link 112 | if cluster[link[0]] != cluster[link[1]]: 113 | edges[len(partition)].append(link) 114 | else: 115 | # cluster内的link 116 | if cluster[link[0]] not in edges: 117 | edges[cluster[link[0]]] = [link] 118 | else: 119 | edges[cluster[link[0]]].append(link) 120 | 121 | for index, edgelist in enumerate(edges.values()): 122 | # cluster内 123 | if index < len(partition): 124 | nx.draw_networkx_edges(G, pos, 125 | edgelist=edgelist, 126 | width=1, alpha=0.8, edge_color=colors[index]) 127 | else: 128 | # cluster间 129 | nx.draw_networkx_edges(G, pos, 130 | edgelist=edgelist, 131 | width=3, alpha=0.8, edge_color=colors[index]) 132 | 133 | # 可视化label 134 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 135 | 136 | plt.axis('off') 137 | plt.show() 138 | 139 | 140 | if __name__ == '__main__': 141 | # 加载数据集并可视化 142 | G = load_graph('data/club.txt') 143 | print(G.nodes(False)) 144 | print(G.nodes(True)) 145 | # print(len(G.edges(None, False))) 146 | pos = nx.spring_layout(G) 147 | nx.draw(G, pos, with_labels=True, font_weight='bold') 148 | plt.show() 149 | 150 | # GN算法 151 | start_time = time.time() 152 | algorithm = GN(G) 153 | partition = algorithm.execute() 154 | end_time = time.time() 155 | print(f'算法执行时间{end_time - start_time}') 156 | 157 | # 可视化结果 158 | showCommunity(algorithm._G_cloned, partition, pos) 159 | -------------------------------------------------------------------------------- /Infomap.py: -------------------------------------------------------------------------------- 1 | import infomap 2 | import collections 3 | import networkx as nx 4 | import networkx.algorithms as nalgos 5 | import matplotlib.pyplot as plt 6 | import matplotlib.colors as colors 7 | 8 | 9 | class Graph: 10 | graph = nx.DiGraph() 11 | 12 | def __init__(self): 13 | self.graph = nx.DiGraph() 14 | 15 | def createGraph(self, filename): 16 | file = open(filename, 'r') 17 | 18 | for line in file.readlines(): 19 | nodes = line.split() 20 | edge = (int(nodes[0]), int(nodes[1])) 21 | self.graph.add_edge(*edge) 22 | 23 | return self.graph 24 | 25 | 26 | class Infomap: 27 | graph = Graph() 28 | 29 | def __init__(self, G): 30 | self.graph = G 31 | 32 | def findCommunities(self, G): 33 | """ 34 | 用 InfoMap 算法划分网络。 35 | 用 "社区 "ID对节点进行注释,并返回发现的社区数量。 36 | """ 37 | infomapWrapper = infomap.Infomap("--two-level --directed") 38 | network = infomapWrapper.network 39 | 40 | print("Building Infomap network from a NetworkX graph...") 41 | for e in G.edges(): 42 | network.addLink(*e) 43 | 44 | print("Find communities with Infomap...") 45 | infomapWrapper.run() 46 | 47 | tree = infomapWrapper.iterTree() 48 | 49 | print("Found %d modules with codelength: %f" % (infomapWrapper.numTopModules(), infomapWrapper.codelength)) 50 | 51 | # 为每个节点打上社区标签作为属性 52 | communities = {} 53 | for node in infomapWrapper.iterLeafNodes(): 54 | communities[node.physicalId] = node.moduleIndex() 55 | 56 | nx.set_node_attributes(G, name='community', values=communities) 57 | 58 | # communities = collections.defaultdict(lambda: list()) 59 | # for node in infomapWrapper.iterLeafNodes(): 60 | # communities[node.moduleIndex()].append(node.physicalId) 61 | # print(communities) 62 | 63 | return infomapWrapper.numTopModules() 64 | 65 | def printCom(self, G): 66 | self.findCommunities(G) 67 | communities = collections.defaultdict(lambda: list()) 68 | for k, v in nx.get_node_attributes(G, 'community').items(): 69 | communities[v].append(k) 70 | communitie_sort = sorted(communities.values(), key=lambda b: -len(b)) 71 | count = 0 72 | for communitie in communitie_sort: 73 | count += 1 74 | print(f'社区{count},成员{communitie}', end='\n') 75 | print(self.cal_Q(communities.values())) 76 | 77 | def drawNetwork(self, G): 78 | # position map 79 | pos = nx.spring_layout(G) 80 | # community ids 81 | communities = [v for k, v in nx.get_node_attributes(G, 'community').items()] 82 | numCommunities = max(communities) + 1 83 | # color map from http://colorbrewer2.org/ 84 | cmapLight = colors.ListedColormap(['#a6cee3', '#b2df8a', '#fb9a99', '#fdbf6f', '#cab2d6'], 'indexed', 85 | numCommunities) 86 | cmapDark = colors.ListedColormap(['#1f78b4', '#33a02c', '#e31a1c', '#ff7f00', '#6a3d9a'], 'indexed', 87 | numCommunities) 88 | 89 | # Draw edges 90 | nx.draw_networkx_edges(G, pos) 91 | 92 | # Draw nodes 93 | nodeCollection = nx.draw_networkx_nodes(G, 94 | pos=pos, 95 | node_color=communities, 96 | cmap=cmapLight 97 | ) 98 | # Set node border color to the darker shade 99 | darkColors = [cmapDark(v) for v in communities] 100 | nodeCollection.set_edgecolor(darkColors) 101 | 102 | # Draw node labels 103 | for n in G.nodes(): 104 | plt.annotate(n, 105 | xy=pos[n], 106 | textcoords='offset points', 107 | horizontalalignment='center', 108 | verticalalignment='center', 109 | xytext=[0, 0], 110 | color=cmapDark(communities[n - 1]) 111 | ) 112 | 113 | plt.axis('off') 114 | plt.savefig("image1.png") 115 | plt.show() 116 | 117 | def cal_Q(self, partition): # 计算Q 118 | m = len(self.graph.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 119 | # print(G.edges(None,False)) 120 | # print("=======6666666") 121 | a = [] 122 | e = [] 123 | for community in partition: # 把每一个联通子图拿出来 124 | t = 0.0 125 | for node in community: # 找出联通子图的每一个顶点 126 | t += len([x for x in self.graph.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 127 | a.append(t / (2 * m)) 128 | # self.zidian[t/(2*m)]=community 129 | for community in partition: 130 | t = 0.0 131 | for i in range(len(community)): 132 | for j in range(len(community)): 133 | if (self.graph.has_edge(community[i], community[j])): 134 | t += 1.0 135 | e.append(t / (2 * m)) 136 | 137 | q = 0.0 138 | for ei, ai in zip(e, a): 139 | q += (ei - ai ** 2) 140 | return q 141 | 142 | def plot3D(self, G): 143 | pass 144 | 145 | def visualize(self, G): 146 | self.findCommunities(G) 147 | self.drawNetwork(G) 148 | 149 | def getNumberOfConnectedComponents(self, G): 150 | return nalgos.number_connected_components(G) 151 | 152 | def getNumberOfCliques(self, G): 153 | return nalgos.number_of_cliques(G) 154 | 155 | def getNumberOfStronglyConnectedComponents(self, G): 156 | return nalgos.number_strongly_connected_components(G) 157 | 158 | def getNumberOfWeaklyConnectedComponents(self, G): 159 | return nalgos.number_weakly_connected_components(G) 160 | 161 | def getNumberOfIsolates(self, G): 162 | return nalgos.number_of_isolates(G) 163 | 164 | def getDegreeCentrality(self, G): 165 | return nalgos.degree_centrality(G) 166 | 167 | def getBetweenessCentrality(self, G): 168 | return nalgos.betweenness_centrality(G) 169 | 170 | def getAllPairsShortestPath(self, G): 171 | return nalgos.all_pairs_shortest_path(G) 172 | 173 | def getAllPairsNodeConnectivity(self, G): 174 | return nalgos.all_pairs_node_connectivity(G) 175 | 176 | def getClosenessCentrality(self, G): 177 | return nalgos.closeness_centrality(G) 178 | 179 | def getBridges(self, G): 180 | return nalgos.bridges(G) 181 | 182 | def getConnectedComponents(self, G): 183 | return nalgos.connected_components(G) 184 | 185 | def getDiameter(self, G): 186 | return nalgos.diameter(G) 187 | 188 | def getKatzCentrality(self, G): 189 | return nalgos.katz_centrality 190 | 191 | def getPageRank(self, G): 192 | return nalgos.pagerank(G) 193 | 194 | def getTriangles(self, G): 195 | return nalgos.triangles(G) 196 | 197 | def getNeighbours(self, G, vertex): 198 | neighbourList = [] 199 | for neighbour in G: 200 | neighbourList.append(neighbour) 201 | return neighbourList 202 | 203 | 204 | # results = open("results3.txt", 'a') 205 | obj = Graph() 206 | # graph = nx.karate_club_graph() 207 | # graph = obj.createGraph("data//google.txt") 208 | graph = obj.createGraph("data//OpenFlights.txt") 209 | # results.write("Network info:") 210 | # results.write("\n") 211 | # results.write("Nodes:{}, Edges:{}, Self loops:{}".format(graph.number_of_nodes(), graph.number_of_edges(), graph.number_of_selfloops())) 212 | # results.write("\n") 213 | # results.write("Graph type: " + "undirected" if graph.is_directed() == False else "directed") 214 | # results.write("\n") 215 | # results.write("Is multigraph? - {}".format(graph.is_multigraph())) 216 | # results.write("\n") 217 | 218 | a = Infomap(graph) 219 | # a.findCommunities(graph) 220 | # a.visualize(graph) 221 | a.printCom(graph) 222 | # 223 | 224 | # results.write("Number of connected components: {}".format(a.getNumberOfConnectedComponents(graph))) 225 | # results.write("\n") 226 | # results.write("Number of weakly connected components: {}".format(a.getNumberOfWeaklyConnectedComponents(graph)) if graph.is_directed() else "Weakly connected components not implemented for undirected case") 227 | # results.write("\n") 228 | # results.write("Number of Isolates: {}".format(a.getNumberOfIsolates(graph))) 229 | # results.write("\n") 230 | # results.write("Degree Centrality: {}".format(a.getDegreeCentrality(graph))) 231 | # results.write("\n") 232 | # results.write("Betweeness Centrality: {}".format(a.getBetweenessCentrality(graph))) 233 | # print(a.getNeighbours(graph,1)) 234 | # for component in a.getConnectedComponents(graph): 235 | # subgraph = Graph() 236 | # for neighbours in component: 237 | # print("Diameter of {} is: {}\n".format(component,"pass")) 238 | # results.write("\n") 239 | # results.write("Closeness centrality: {}".format(a.getClosenessCentrality(graph))) 240 | # results.write("\n") 241 | # results.write("Katz centrality: {}".format(a.getKatzCentrality(graph))) 242 | # results.write("\n") 243 | # results.write("Pagerank: {}".format(a.getPageRank(graph))) 244 | # results.write("\n") 245 | # results.write("Triangles: {}".format(a.getTriangles(graph))) 246 | # results.write("\n") 247 | # results.write("All Pairs Shortest Path: {}".format(a.getAllPairsShortestPath(graph))) 248 | # results.write("\n") 249 | # results.write("All Pairs Shortest Connectivity: {}".format(a.getAllPairsNodeConnectivity(graph))) 250 | # results.write("\n") 251 | # results.write("Network bridges: {}".format(a.getBridges(graph))) 252 | # results.write("\n") 253 | # results.write("All Connected Components: {}".format(a.getConnectedComponents(graph))) 254 | -------------------------------------------------------------------------------- /KL.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import matplotlib.pyplot as plt 3 | from networkx.algorithms.community import kernighan_lin_bisection 4 | 5 | 6 | def draw_spring(G, com): 7 | """ 8 | G:图 9 | com:划分好的社区 10 | node_size表示节点大小 11 | node_color表示节点颜色 12 | node_shape表示节点形状 13 | with_labels=True表示节点是否带标签 14 | """ 15 | pos = nx.spring_layout(G) # 节点的布局为spring型 16 | NodeId = list(G.nodes()) 17 | node_size = [G.degree(i) ** 1.2 * 90 for i in NodeId] # 节点大小 18 | 19 | plt.figure(figsize=(8, 6)) # 图片大小 20 | nx.draw(G, pos, with_labels=True, node_size=node_size, node_color='w', node_shape='.') 21 | 22 | color_list = ['pink', 'orange', 'r', 'g', 'b', 'y', 'm', 'gray', 'black', 'c', 'brown'] 23 | # node_shape = ['s','o','H','D'] 24 | 25 | for i in range(len(com)): 26 | nx.draw_networkx_nodes(G, pos, nodelist=com[i], node_color=color_list[i]) 27 | plt.show() 28 | 29 | 30 | if __name__ == "__main__": 31 | G = nx.karate_club_graph() # 空手道俱乐部 32 | # KL算法 33 | com = list(kernighan_lin_bisection(G)) 34 | print('社区数量', len(com)) 35 | print(com) 36 | draw_spring(G, com) 37 | -------------------------------------------------------------------------------- /LFM.py: -------------------------------------------------------------------------------- 1 | import random 2 | import networkx as nx 3 | import time 4 | import collections 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class Community: 9 | """ 定义一个社区类 方便计算操作""" 10 | 11 | def __init__(self, G, alpha=1.0): 12 | self._G = G 13 | # α为超参数 控制观察到社区的尺度 大的α值产生非常小的社区,小的α值反而提供大的社区 14 | self._alpha = alpha 15 | self._nodes = set() 16 | # k_in,k_out分别代表社区总的的内部度与外部度 17 | self._k_in = 0 18 | self._k_out = 0 19 | 20 | def add_node(self, node): 21 | """ 添加节点到社区 """ 22 | # 获得节点的邻居集 因为两点维护一边 从邻居点可以得到它的内部、外部度 23 | neighbors = set(self._G.neighbors(node)) 24 | # 这里使用了集合操作简化运算 节点的k_in就等于节点在社区内的节点数(也就是节点的邻居与已经在社区中的节点集的集合) 25 | node_k_in = len(neighbors & self._nodes) 26 | # k_out自然就等于邻居数(总边数) - k_in(内部边数) 27 | node_k_out = len(neighbors) - node_k_in 28 | # 更新社区的节点、k_in、k_out 29 | self._nodes.add(node) 30 | # 对于内部度 节点本身的内部度以及在社区内的邻居节点以前的外部度变为了内部度 所以度*2 31 | self._k_in += 2 * node_k_in 32 | # 对于外部度 邻居节点在社区外 只需要计算一次 但要减去一个内部度(因为添加节点后 该节点到了社区内,以前提供的外部度变为了内部度 应该减去) 33 | self._k_out = self._k_out + node_k_out - node_k_in 34 | 35 | def remove_node(self, node): 36 | """ 社区去除节点 """ 37 | neighbors = set(self._G.neighbors(node)) 38 | # 计算与添加相反 39 | # community_nodes = self._nodes 40 | # node_k_in = len(neighbors & community_nodes) 41 | node_k_in = len(neighbors & self._nodes) 42 | node_k_out = len(neighbors) - node_k_in 43 | self._nodes.remove(node) 44 | self._k_in -= 2 * node_k_in 45 | self._k_out = self._k_out - node_k_out + node_k_in 46 | 47 | def cal_add_fitness(self, node): 48 | """ 添加时计算适应度该变量 """ 49 | neighbors = set(self._G.neighbors(node)) 50 | old_k_in = self._k_in 51 | old_k_out = self._k_out 52 | vertex_k_in = len(neighbors & self._nodes) 53 | vertex_k_out = len(neighbors) - vertex_k_in 54 | new_k_in = old_k_in + 2 * vertex_k_in 55 | new_k_out = old_k_out + vertex_k_out - vertex_k_in 56 | # 分别用适应度公式计算 57 | new_fitness = new_k_in / (new_k_in + new_k_out) ** self._alpha 58 | old_fitness = old_k_in / (old_k_in + old_k_out) ** self._alpha 59 | return new_fitness - old_fitness 60 | 61 | def cal_remove_fitness(self, node): 62 | """ 删除时计算适应度该变量 """ 63 | neighbors = set(self._G.neighbors(node)) 64 | new_k_in = self._k_in 65 | new_k_out = self._k_out 66 | node_k_in = len(neighbors & self._nodes) 67 | node_k_out = len(neighbors) - node_k_in 68 | old_k_in = new_k_in - 2 * node_k_in 69 | old_k_out = new_k_out - node_k_out + node_k_in 70 | old_fitness = old_k_in / (old_k_in + old_k_out) ** self._alpha 71 | new_fitness = new_k_in / (new_k_in + new_k_out) ** self._alpha 72 | return new_fitness - old_fitness 73 | 74 | def recalculate(self): 75 | # 遍历社区中是否有适应度为负的节点 76 | for vid in self._nodes: 77 | fitness = self.cal_remove_fitness(vid) 78 | if fitness < 0.0: 79 | return vid 80 | return None 81 | 82 | def get_neighbors(self): 83 | """ 获得社区的邻居节点 方便后面遍历 """ 84 | neighbors = set() 85 | # 统计社区内所有节点的邻居,其中不在社区内部的邻居节点 就是社区的邻居节点 86 | for node in self._nodes: 87 | neighbors.update(set(self._G.neighbors(node)) - self._nodes) 88 | return neighbors 89 | 90 | def get_fitness(self): 91 | return float(self._k_in) / ((self._k_in + self._k_out) ** self._alpha) 92 | 93 | 94 | class LFM: 95 | 96 | def __init__(self, G, alpha): 97 | self._G = G 98 | # α为超参数 控制观察到社区的尺度 大的α值产生非常小的社区,小的α值反而提供大的社区 99 | self._alpha = alpha 100 | 101 | def execute(self): 102 | communities = [] 103 | # 统计还没被分配到社区的节点(初始是所有节点) 104 | # node_not_include = self._G.node.keys()[:] 105 | node_not_include = list(self._G.nodes()) 106 | while len(node_not_include) != 0: 107 | # 初始化一个社区 108 | c = Community(self._G, self._alpha) 109 | # 随机选择一个种子节点 110 | seed = random.choice(node_not_include) 111 | # print(seed) 112 | c.add_node(seed) 113 | 114 | # 获得社区的邻居节点并遍历 115 | to_be_examined = c.get_neighbors() 116 | while to_be_examined: 117 | # 添加适应度最大的节点到社区 118 | m = {} 119 | for node in to_be_examined: 120 | fitness = c.cal_add_fitness(node) 121 | m[node] = fitness 122 | to_be_add = sorted(m.items(), key=lambda x: x[1], reverse=True)[0] 123 | 124 | # 当所有节点适应度为负 停止迭代 125 | if to_be_add[1] < 0.0: 126 | break 127 | c.add_node(to_be_add[0]) 128 | 129 | # 遍历社区中是否有适应度为负的节点 有则删除 130 | to_be_remove = c.recalculate() 131 | while to_be_remove is not None: 132 | c.remove_node(to_be_remove) 133 | to_be_remove = c.recalculate() 134 | 135 | to_be_examined = c.get_neighbors() 136 | 137 | # 还没被分配到社区的节点集中删除已经被添加到社区中的节点 138 | for node in c._nodes: 139 | if node in node_not_include: 140 | node_not_include.remove(node) 141 | communities.append(c._nodes) 142 | return communities 143 | 144 | 145 | def cal_EQ(cover, G): 146 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 147 | # 存储每个节点所在的社区 148 | vertex_community = collections.defaultdict(lambda: set()) 149 | # i为社区编号(第几个社区) c为该社区中拥有的节点 150 | for i, c in enumerate(cover): 151 | # v为社区中的某一个节点 152 | for v in c: 153 | # 根据节点v统计他所在的社区i有哪些 154 | vertex_community[v].add(i) 155 | total = 0.0 156 | for c in cover: 157 | for i in c: 158 | # o_i表示i节点所同时属于的社区数目 159 | o_i = len(vertex_community[i]) 160 | # k_i表示i节点的度数(所关联的边数) 161 | k_i = len(G[i]) 162 | for j in c: 163 | t = 0.0 164 | # o_j表示j节点所同时属于的社区数目 165 | o_j = len(vertex_community[j]) 166 | # k_j表示j节点的度数(所关联的边数) 167 | k_j = len(G[j]) 168 | if G.has_edge(i, j): 169 | t += 1.0 / (o_i * o_j) 170 | t -= k_i * k_j / (2 * m * o_i * o_j) 171 | total += t 172 | return round(total / (2 * m), 4) 173 | 174 | 175 | def load_graph(path): 176 | G = nx.Graph() 177 | with open(path, 'r') as text: 178 | for line in text: 179 | vertices = line.strip().split(' ') 180 | source = int(vertices[0]) 181 | target = int(vertices[1]) 182 | G.add_edge(source, target) 183 | return G 184 | 185 | 186 | if __name__ == "__main__": 187 | seed = 1024 188 | random.seed(seed) # python的随机性 189 | # G = nx.karate_club_graph() 190 | G = load_graph('data/football.txt') 191 | pos = nx.spring_layout(G) 192 | nx.draw(G, pos, with_labels=True, font_weight='bold') 193 | plt.show() 194 | start_time = time.time() 195 | algorithm = LFM(G, 0.9) 196 | communities = algorithm.execute() 197 | end_time = time.time() 198 | for i, c in enumerate(communities): 199 | print(f'社区{i},节点数目{len(c)},社区节点{sorted(c)}' ) 200 | 201 | print(cal_EQ(communities, G)) 202 | print(f'算法执行时间{end_time - start_time}') -------------------------------------------------------------------------------- /LPA.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import random 3 | import time 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class LPA: 9 | def __init__(self, G, max_iter=20): 10 | self._G = G 11 | self._n = len(G.nodes(False)) # 节点数目 12 | self._max_iter = max_iter 13 | 14 | # 判断是否收敛 15 | def can_stop(self): 16 | # 每个节点的标签和邻近节点最多的标签一样 17 | for i in range(self._n): 18 | node = self._G.nodes[i] 19 | label = node["label"] 20 | max_labels = self.get_max_neighbor_label(i) 21 | if label not in max_labels: 22 | return False 23 | return True 24 | 25 | # 获得邻近节点最多的标签 26 | def get_max_neighbor_label(self, node_index): 27 | m = collections.defaultdict(int) 28 | for neighbor_index in self._G.neighbors(node_index): 29 | neighbor_label = self._G.nodes[neighbor_index]["label"] 30 | m[neighbor_label] += 1 31 | max_v = max(m.values()) 32 | # 可能多个标签数目相同,这里都要返回 33 | return [item[0] for item in m.items() if item[1] == max_v] 34 | 35 | # 异步更新 36 | def populate_label(self): 37 | # 随机访问 38 | visitSequence = random.sample(self._G.nodes(), len(self._G.nodes())) 39 | for i in visitSequence: 40 | node = self._G.nodes[i] 41 | label = node["label"] 42 | max_labels = self.get_max_neighbor_label(i) 43 | # 如果标签不在最大标签集中才更新,否则相同随机选取没有意义 44 | if label not in max_labels: 45 | newLabel = random.choice(max_labels) 46 | node["label"] = newLabel 47 | 48 | # 根据标签得到社区结构 49 | def get_communities(self): 50 | communities = collections.defaultdict(lambda: list()) 51 | for node in self._G.nodes(True): 52 | label = node[1]["label"] 53 | communities[label].append(node[0]) 54 | return communities.values() 55 | 56 | def execute(self): 57 | # 初始化标签 58 | for i in range(self._n): 59 | self._G.nodes[i]["label"] = i 60 | iter_time = 0 61 | # 更新标签 62 | while (not self.can_stop() and iter_time < self._max_iter): 63 | self.populate_label() 64 | iter_time += 1 65 | return self.get_communities() 66 | 67 | 68 | # 可视化划分结果 69 | def showCommunity(G, partition, pos): 70 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 71 | cluster = {} 72 | labels = {} 73 | for index, item in enumerate(partition): 74 | for nodeID in item: 75 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 76 | cluster[nodeID] = index # 节点分区号 77 | 78 | # 可视化节点 79 | colors = ['r', 'g', 'b', 'y', 'm'] 80 | shapes = ['v', 'D', 'o', '^', '<'] 81 | for index, item in enumerate(partition): 82 | nx.draw_networkx_nodes(G, pos, nodelist=item, 83 | node_color=colors[index], 84 | node_shape=shapes[index], 85 | node_size=350, 86 | alpha=1) 87 | 88 | # 可视化边 89 | edges = {len(partition): []} 90 | for link in G.edges(): 91 | # cluster间的link 92 | if cluster[link[0]] != cluster[link[1]]: 93 | edges[len(partition)].append(link) 94 | else: 95 | # cluster内的link 96 | if cluster[link[0]] not in edges: 97 | edges[cluster[link[0]]] = [link] 98 | else: 99 | edges[cluster[link[0]]].append(link) 100 | 101 | for index, edgelist in enumerate(edges.values()): 102 | # cluster内 103 | if index < len(partition): 104 | nx.draw_networkx_edges(G, pos, 105 | edgelist=edgelist, 106 | width=1, alpha=0.8, edge_color=colors[index]) 107 | else: 108 | # cluster间 109 | nx.draw_networkx_edges(G, pos, 110 | edgelist=edgelist, 111 | width=3, alpha=0.8, edge_color=colors[index]) 112 | 113 | # 可视化label 114 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 115 | 116 | plt.axis('off') 117 | plt.show() 118 | 119 | 120 | def cal_Q(partition, G): # 计算Q 121 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 122 | # print(G.edges(None,False)) 123 | # print("=======6666666") 124 | a = [] 125 | e = [] 126 | for community in partition: # 把每一个联通子图拿出来 127 | t = 0.0 128 | for node in community: # 找出联通子图的每一个顶点 129 | t += len([x for x in G.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 130 | a.append(t / (2 * m)) 131 | # self.zidian[t/(2*m)]=community 132 | for community in partition: 133 | t = 0.0 134 | for i in range(len(community)): 135 | for j in range(len(community)): 136 | if (G.has_edge(community[i], community[j])): 137 | t += 1.0 138 | e.append(t / (2 * m)) 139 | 140 | q = 0.0 141 | for ei, ai in zip(e, a): 142 | q += (ei - ai ** 2) 143 | return q 144 | 145 | 146 | if __name__ == '__main__': 147 | G = nx.karate_club_graph() 148 | pos = nx.spring_layout(G) 149 | start_time = time.time() 150 | algorithm = LPA(G) 151 | communities = algorithm.execute() 152 | end_time = time.time() 153 | for community in communities: 154 | print(community) 155 | 156 | print(cal_Q(communities, G)) 157 | print(f'算法执行时间{end_time - start_time}') 158 | # 可视化结果 159 | showCommunity(G, communities, pos) -------------------------------------------------------------------------------- /Leiden.py: -------------------------------------------------------------------------------- 1 | import igraph as ig 2 | import leidenalg 3 | import louvain 4 | 5 | # 按照边列表的形式读入文件,生成无向图 6 | g = ig.Graph.Read_Edgelist("data//OpenFlights.txt", directed=False) 7 | 8 | 9 | part = leidenalg.find_partition(g, leidenalg.ModularityVertexPartition) 10 | print(part) 11 | print(ig.summary(g)) 12 | print(part.modularity) 13 | ig.plot(part) 14 | 15 | part2 = leidenalg.find_partition(g, leidenalg.CPMVertexPartition, resolution_parameter=0.01) 16 | print(part2.modularity) 17 | # ig.plot(part2) 18 | 19 | -------------------------------------------------------------------------------- /Louvain.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import random 3 | import time 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | def load_graph(path): 9 | G = collections.defaultdict(dict) 10 | with open(path) as text: 11 | for line in text: 12 | vertices = line.strip().split() 13 | v_i = int(vertices[0]) 14 | v_j = int(vertices[1]) 15 | w = 1.0 # 数据集有权重的话则读取数据集中的权重 16 | G[v_i][v_j] = w 17 | G[v_j][v_i] = w 18 | return G 19 | 20 | 21 | # 节点类 存储社区与节点编号信息 22 | class Vertex: 23 | def __init__(self, vid, cid, nodes, k_in=0): 24 | # 节点编号 25 | self._vid = vid 26 | # 社区编号 27 | self._cid = cid 28 | self._nodes = nodes 29 | self._kin = k_in # 结点内部的边的权重 30 | 31 | 32 | class Louvain: 33 | def __init__(self, G): 34 | self._G = G 35 | self._m = 0 # 边数量 图会凝聚动态变化 36 | self._cid_vertices = {} # 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合) 37 | self._vid_vertex = {} # 需维护的关于结点的信息(结点编号,相应的Vertex实例) 38 | for vid in self._G.keys(): 39 | # 刚开始每个点作为一个社区 40 | self._cid_vertices[vid] = {vid} 41 | # 刚开始社区编号就是节点编号 42 | self._vid_vertex[vid] = Vertex(vid, vid, {vid}) 43 | # 计算边数 每两个点维护一条边 44 | self._m += sum([1 for neighbor in self._G[vid].keys() 45 | if neighbor > vid]) 46 | 47 | # 模块度优化阶段 48 | def first_stage(self): 49 | mod_inc = False # 用于判断算法是否可终止 50 | visit_sequence = self._G.keys() 51 | # 随机访问 52 | random.shuffle(list(visit_sequence)) 53 | while True: 54 | can_stop = True # 第一阶段是否可终止 55 | # 遍历所有节点 56 | for v_vid in visit_sequence: 57 | # 获得节点的社区编号 58 | v_cid = self._vid_vertex[v_vid]._cid 59 | # k_v节点的权重(度数) 内部与外部边权重之和 60 | k_v = sum(self._G[v_vid].values()) + \ 61 | self._vid_vertex[v_vid]._kin 62 | # 存储模块度增益大于0的社区编号 63 | cid_Q = {} 64 | # 遍历节点的邻居 65 | for w_vid in self._G[v_vid].keys(): 66 | # 获得该邻居的社区编号 67 | w_cid = self._vid_vertex[w_vid]._cid 68 | if w_cid in cid_Q: 69 | continue 70 | else: 71 | # tot是关联到社区C中的节点的链路上的权重的总和 72 | tot = sum( 73 | [sum(self._G[k].values()) + self._vid_vertex[k]._kin for k in self._cid_vertices[w_cid]]) 74 | if w_cid == v_cid: 75 | tot -= k_v 76 | # k_v_in是从节点i连接到C中的节点的链路的总和 77 | k_v_in = sum( 78 | [v for k, v in self._G[v_vid].items() if k in self._cid_vertices[w_cid]]) 79 | # 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m) 80 | delta_Q = k_v_in - k_v * tot / self._m 81 | cid_Q[w_cid] = delta_Q 82 | 83 | # 取得最大增益的编号 84 | cid, max_delta_Q = sorted( 85 | cid_Q.items(), key=lambda item: item[1], reverse=True)[0] 86 | if max_delta_Q > 0.0 and cid != v_cid: 87 | # 让该节点的社区编号变为取得最大增益邻居节点的编号 88 | self._vid_vertex[v_vid]._cid = cid 89 | # 在该社区编号下添加该节点 90 | self._cid_vertices[cid].add(v_vid) 91 | # 以前的社区中去除该节点 92 | self._cid_vertices[v_cid].remove(v_vid) 93 | # 模块度还能增加 继续迭代 94 | can_stop = False 95 | mod_inc = True 96 | if can_stop: 97 | break 98 | return mod_inc 99 | 100 | # 网络凝聚阶段 101 | def second_stage(self): 102 | cid_vertices = {} 103 | vid_vertex = {} 104 | # 遍历社区和社区内的节点 105 | for cid, vertices in self._cid_vertices.items(): 106 | if len(vertices) == 0: 107 | continue 108 | new_vertex = Vertex(cid, cid, set()) 109 | # 将该社区内的所有点看做一个点 110 | for vid in vertices: 111 | new_vertex._nodes.update(self._vid_vertex[vid]._nodes) 112 | new_vertex._kin += self._vid_vertex[vid]._kin 113 | # k,v为邻居和它们之间边的权重 计算kin社区内部总权重 这里遍历vid的每一个在社区内的邻居 因为边被两点共享后面还会计算 所以权重/2 114 | for k, v in self._G[vid].items(): 115 | if k in vertices: 116 | new_vertex._kin += v / 2.0 117 | # 新的社区与节点编号 118 | cid_vertices[cid] = {cid} 119 | vid_vertex[cid] = new_vertex 120 | 121 | G = collections.defaultdict(dict) 122 | # 遍历现在不为空的社区编号 求社区之间边的权重 123 | for cid1, vertices1 in self._cid_vertices.items(): 124 | if len(vertices1) == 0: 125 | continue 126 | for cid2, vertices2 in self._cid_vertices.items(): 127 | # 找到cid后另一个不为空的社区 128 | if cid2 <= cid1 or len(vertices2) == 0: 129 | continue 130 | edge_weight = 0.0 131 | # 遍历 cid1社区中的点 132 | for vid in vertices1: 133 | # 遍历该点在社区2的邻居已经之间边的权重(即两个社区之间边的总权重 将多条边看做一条边) 134 | for k, v in self._G[vid].items(): 135 | if k in vertices2: 136 | edge_weight += v 137 | if edge_weight != 0: 138 | G[cid1][cid2] = edge_weight 139 | G[cid2][cid1] = edge_weight 140 | # 更新社区和点 每个社区看做一个点 141 | self._cid_vertices = cid_vertices 142 | self._vid_vertex = vid_vertex 143 | self._G = G 144 | 145 | def get_communities(self): 146 | communities = [] 147 | for vertices in self._cid_vertices.values(): 148 | if len(vertices) != 0: 149 | c = set() 150 | for vid in vertices: 151 | c.update(self._vid_vertex[vid]._nodes) 152 | communities.append(list(c)) 153 | return communities 154 | 155 | def execute(self): 156 | iter_time = 1 157 | while True: 158 | iter_time += 1 159 | # 反复迭代,直到网络中任何节点的移动都不能再改善总的 modularity 值为止 160 | mod_inc = self.first_stage() 161 | if mod_inc: 162 | self.second_stage() 163 | else: 164 | break 165 | return self.get_communities() 166 | 167 | 168 | # 可视化划分结果 169 | def showCommunity(G, partition, pos): 170 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 171 | cluster = {} 172 | labels = {} 173 | for index, item in enumerate(partition): 174 | for nodeID in item: 175 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 176 | cluster[nodeID] = index # 节点分区号 177 | 178 | # 可视化节点 179 | colors = ['r', 'g', 'b', 'y', 'm'] 180 | shapes = ['v', 'D', 'o', '^', '<'] 181 | for index, item in enumerate(partition): 182 | nx.draw_networkx_nodes(G, pos, nodelist=item, 183 | node_color=colors[index], 184 | node_shape=shapes[index], 185 | node_size=350, 186 | alpha=1) 187 | 188 | # 可视化边 189 | edges = {len(partition): []} 190 | for link in G.edges(): 191 | # cluster间的link 192 | if cluster[link[0]] != cluster[link[1]]: 193 | edges[len(partition)].append(link) 194 | else: 195 | # cluster内的link 196 | if cluster[link[0]] not in edges: 197 | edges[cluster[link[0]]] = [link] 198 | else: 199 | edges[cluster[link[0]]].append(link) 200 | 201 | for index, edgelist in enumerate(edges.values()): 202 | # cluster内 203 | if index < len(partition): 204 | nx.draw_networkx_edges(G, pos, 205 | edgelist=edgelist, 206 | width=1, alpha=0.8, edge_color=colors[index]) 207 | else: 208 | # cluster间 209 | nx.draw_networkx_edges(G, pos, 210 | edgelist=edgelist, 211 | width=3, alpha=0.8, edge_color=colors[index]) 212 | 213 | # 可视化label 214 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 215 | 216 | plt.axis('off') 217 | plt.show() 218 | 219 | 220 | def cal_Q(partition, G): # 计算Q 221 | # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 222 | m = len(G.edges(None, False)) 223 | # print(G.edges(None,False)) 224 | # print("=======6666666") 225 | a = [] 226 | e = [] 227 | for community in partition: # 把每一个联通子图拿出来 228 | t = 0.0 229 | for node in community: # 找出联通子图的每一个顶点 230 | # G.neighbors(node)找node节点的邻接节点 231 | t += len([x for x in G.neighbors(node)]) 232 | a.append(t / (2 * m)) 233 | # self.zidian[t/(2*m)]=community 234 | for community in partition: 235 | t = 0.0 236 | for i in range(len(community)): 237 | for j in range(len(community)): 238 | if (G.has_edge(community[i], community[j])): 239 | t += 1.0 240 | e.append(t / (2 * m)) 241 | 242 | q = 0.0 243 | for ei, ai in zip(e, a): 244 | q += (ei - ai ** 2) 245 | return q 246 | 247 | class Graph: 248 | # graph = nx.DiGraph() 249 | 250 | def __init__(self): 251 | self.graph = nx.Graph() 252 | 253 | def createGraph(self, filename): 254 | file = open(filename, 'r') 255 | 256 | for line in file.readlines(): 257 | nodes = line.split() 258 | edge = (int(nodes[0]), int(nodes[1])) 259 | self.graph.add_edge(*edge) 260 | 261 | return self.graph 262 | 263 | 264 | if __name__ == '__main__': 265 | G = load_graph('data/club.txt') 266 | # G = load_graph('data/dummy.txt') 267 | obj = Graph() 268 | G1 = obj.createGraph("data/club.txt") 269 | # G1 = nx.karate_club_graph() 270 | pos = nx.spring_layout(G1) 271 | start_time = time.time() 272 | algorithm = Louvain(G) 273 | communities = algorithm.execute() 274 | end_time = time.time() 275 | # 按照社区大小从大到小排序输出 276 | communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序 277 | count = 0 278 | for communitie in communities: 279 | count += 1 280 | print("社区", count, " ", communitie) 281 | 282 | print(cal_Q(communities, G1)) 283 | print(f'算法执行时间{end_time - start_time}') 284 | # 可视化结果 285 | showCommunity(G1, communities, pos) 286 | -------------------------------------------------------------------------------- /OSLOM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | import argparse 6 | import subprocess 7 | import shutil 8 | import uuid 9 | 10 | 11 | def _parse_arguments(desc, args): 12 | """ 13 | Parses command line arguments 14 | :param desc: 15 | :param args: 16 | :return: 17 | """ 18 | help_fm = argparse.ArgumentDefaultsHelpFormatter 19 | parser = argparse.ArgumentParser(description=desc, 20 | formatter_class=help_fm) 21 | parser.add_argument('input', 22 | help='Edge file in tab delimited format') 23 | parser.add_argument('--directed', dest='directed', action='store_true', 24 | help='If set, then treat input as a directed graph') 25 | parser.set_defaults(directed=False) 26 | parser.add_argument('--singlet', dest='singlet', action='store_true', 27 | help='If set, do NOT merge singlet with existing modules') 28 | parser.set_defaults(nosinglet=False) 29 | parser.add_argument('--seed', default=-1, type=int, 30 | help='Seed for random generator') 31 | parser.add_argument('--p_val', default=0.1, type=float, 32 | help='p-value: increase to get more module') 33 | parser.add_argument('--cp', default=0.5, type=float, 34 | help='coverage parameter: Bigger value leads to bigger clusters') 35 | parser.add_argument('--oslomdirected', default='/oslom/OSLOM2/oslom_dir', 36 | help='Full path to oslom_dir binary') 37 | parser.add_argument('--oslomundirected', default='/oslom/OSLOM2/oslom_undir', 38 | help='Full path to oslom_undir binary') 39 | parser.add_argument('--tempdir', default='/tmp', 40 | help='Directory needed to hold files temporarily for processing') 41 | 42 | return parser.parse_args(args) 43 | 44 | 45 | def run_oslom_cmd(cmd): 46 | """ 47 | Runs docker 48 | :param cmd_to_run: command to run as list 49 | :return: 50 | """ 51 | p = subprocess.Popen(cmd, 52 | stdout=subprocess.PIPE, 53 | stderr=subprocess.PIPE) 54 | 55 | out, err = p.communicate() 56 | 57 | return p.returncode, out, err 58 | 59 | 60 | def create_tmpdir(theargs): 61 | """ 62 | :param theargs: 63 | :return: 64 | """ 65 | tmpdir = os.path.join(theargs.tempdir, 'cdoslom_' + str(uuid.uuid4())) 66 | os.makedirs(tmpdir, mode=0o755) 67 | return tmpdir 68 | 69 | 70 | def run_oslom(graph, theargs): 71 | """ 72 | :param outdir: the output directory to comprehend the output link file 73 | :param graph: input file 74 | :param directed: whether to treat input file as directed 75 | :param nosinglet: whether to merge singlets 76 | :param seed: int 77 | :param p_val: greater for more clusters 78 | :param_cp: greater for larger communities 79 | :return 80 | """ 81 | 82 | seed = theargs.seed 83 | p_val = theargs.p_val 84 | cp = theargs.cp 85 | 86 | if theargs.directed is True: 87 | cmdargs = [theargs.oslomdirected] 88 | else: 89 | cmdargs = [theargs.oslomundirected] 90 | 91 | if graph is None or not os.path.isfile(graph): 92 | sys.stderr.write(str(graph) + ' is not a file') 93 | return 3 94 | 95 | if os.path.getsize(graph) == 0: 96 | sys.stderr.write(str(graph) + ' is an empty file') 97 | return 4 98 | 99 | olddir = os.getcwd() 100 | tmpdir = create_tmpdir(theargs) 101 | oldgraph = graph 102 | graph = os.path.join(tmpdir, os.path.basename(oldgraph)) 103 | shutil.copyfile(oldgraph, graph) 104 | os.chdir(tmpdir) 105 | cmdargs.extend(['-f', graph, '-fast']) 106 | 107 | try: 108 | with open(graph, 'r') as file: 109 | lines = file.read().splitlines() 110 | while lines[0][0] == '#': 111 | lines.pop(0) 112 | if len(lines[0].split()) >= 3: 113 | weight = '-w' 114 | else: 115 | weight = '-uw' 116 | cmdargs.append(weight) 117 | if theargs.singlet is True: 118 | cmdargs.append('-singlet') 119 | if isinstance(seed, int) and seed >= 1: 120 | cmdargs.append('-seed') 121 | cmdargs.append(str(seed)) 122 | cmdargs.extend(['-t', str(p_val), '-cp', str(cp)]) 123 | sys.stderr.write('Running ' + str(cmdargs) + '\n') 124 | sys.stderr.flush() 125 | cmdecode, cmdout, cmderr = run_oslom_cmd(cmdargs) 126 | 127 | if cmdecode != 0: 128 | sys.stderr.write('Command failed with non-zero exit code: ' + 129 | str(cmdecode) + ' : ' + str(cmderr) + '\n') 130 | return 1 131 | 132 | if len(cmdout) > 0: 133 | sys.stderr.write('Output from cmd: ' + str(cmdout) + '\n') 134 | 135 | if len(cmderr) > 0: 136 | sys.stderr.write('Error output from cmd: ' + str(cmderr) + '\n') 137 | 138 | outfolder = graph + '_oslo_files' 139 | clusts_layers = [] 140 | clusts_layers.append([]) 141 | sys.stderr.write('Attempting to open ' + outfolder + '\n') 142 | with open(os.path.join(outfolder, 'tp'), 'r') as cfile: 143 | lines = cfile.read().splitlines() 144 | for i in range(len(lines) // 2): 145 | clusts_layers[0].append([]) 146 | members = lines[2 * i + 1].split() 147 | for m in members: 148 | clusts_layers[0][i].append(m) 149 | cfile.close() 150 | i = 1 151 | sys.stderr.write('Opening something else\n') 152 | sys.stderr.flush() 153 | while os.path.isfile(os.path.join(outfolder, 'tp' + str(i))): 154 | with open(os.path.join(outfolder, 'tp' + str(i)), 'r') as cfile: 155 | clusts_layers.append([]) 156 | lines = cfile.read().splitlines() 157 | for j in range(len(lines) // 2): 158 | clusts_layers[i].append([]) 159 | members = lines[2 * j + 1].split() 160 | for m in members: 161 | clusts_layers[i][j].append(m) 162 | cfile.close() 163 | i = i + 1 164 | 165 | lines = [] 166 | maxNode = 0 167 | for clust in clusts_layers[0]: 168 | maxNode = max(maxNode, max(list(map(int, clust)))) 169 | for i in range(len(clusts_layers[0])): 170 | for n in clusts_layers[0][i]: 171 | lines.append(str(maxNode + i + 1) + '\t' + str(n)) 172 | maxNode = maxNode + len(clusts_layers[0]) 173 | for i in range(1, len(clusts_layers)): 174 | for j in range(len(clusts_layers[i - 1])): 175 | for k in range(len(clusts_layers[i])): 176 | if all(x in clusts_layers[i][k] for x in clusts_layers[i - 1][j]): 177 | lines.append(str(maxNode + k + 1) + '\t' + str(maxNode - len(clusts_layers[i - 1]) + j + 1)) 178 | break 179 | maxNode = maxNode + len(clusts_layers[i]) 180 | for i in range(len(clusts_layers[-1])): 181 | lines.append(str(maxNode + 1) + '\t' + str(maxNode - len(clusts_layers[-1]) + i + 1)) 182 | 183 | # trim the hierarchy to remove contigs 184 | up_tree = {} 185 | down_tree = {} 186 | for line in lines: 187 | elts = line.split() 188 | down_tree.setdefault(elts[0], []) 189 | down_tree[elts[0]].append(elts[1]) 190 | up_tree.setdefault(elts[1], []) 191 | up_tree[elts[1]].append(elts[0]) 192 | 193 | # store root and leaves 194 | set1 = set(down_tree.keys()) 195 | set2 = set(up_tree.keys()) 196 | root_l = list(set1.difference(set2)) 197 | leaf_l = list(set2.difference(set1)) 198 | node_l = list(set1.union(set2)) 199 | 200 | # find all contigs in the DAG 201 | Contigs = [] 202 | work_list = root_l 203 | visited = {} 204 | for node in node_l: 205 | visited[node] = 0 206 | work_path = [] 207 | new_path = False 208 | while work_list: 209 | key = work_list.pop(0) 210 | if new_path == False: 211 | work_path.append(key) 212 | else: 213 | work_path.append(up_tree[key][visited[key]]) 214 | work_path.append(key) 215 | if key in leaf_l: 216 | new_path = True 217 | Contigs.append(work_path) 218 | work_path = [] 219 | elif len(down_tree[key]) > 1 or visited[key] > 0: 220 | new_path = True 221 | Contigs.append(work_path) 222 | work_path = [] 223 | if visited[key] == 0 and key not in leaf_l: 224 | work_list = down_tree[key] + work_list 225 | visited[key] += 1 226 | 227 | # write trimmed DAG 228 | for path in Contigs[1:]: 229 | sys.stdout.write(path[0] + ',' + path[-1] + ',') 230 | if path[-1] in leaf_l: 231 | sys.stdout.write('c-m' + ';') 232 | else: 233 | sys.stdout.write('c-c' + ';') 234 | 235 | sys.stdout.flush() 236 | return 0 237 | finally: 238 | os.chdir(olddir) 239 | shutil.rmtree(tmpdir) 240 | 241 | 242 | def main(args): 243 | """ 244 | Main entry point for program 245 | :param args: command line arguments usually :py:const:`sys.argv` 246 | :return: 0 for success otherwise failure 247 | :rtype: int 248 | """ 249 | desc = """ 250 | Runs oslom on command line, sending output to standard out 251 | """ 252 | 253 | theargs = _parse_arguments(desc, args[1:]) 254 | 255 | try: 256 | inputfile = os.path.abspath(theargs.input) 257 | 258 | return run_oslom(inputfile, theargs) 259 | 260 | except Exception as e: 261 | sys.stderr.write('Caught exception: ' + str(e) + '\n') 262 | return 2 263 | 264 | 265 | if __name__ == '__main__': # pragma: no cover 266 | sys.exit(main(sys.argv)) -------------------------------------------------------------------------------- /PYG.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "%matplotlib inline\n", 10 | "import torch\n", 11 | "import networkx as nx\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "\n", 14 | "# Visualization function for NX graph or PyTorch tensor\n", 15 | "def visualize(h, color, epoch=None, loss=None):\n", 16 | " plt.figure(figsize=(7,7))\n", 17 | " plt.xticks([])\n", 18 | " plt.yticks([])\n", 19 | "\n", 20 | " if torch.is_tensor(h): #可视化神经网络运行中间结果\n", 21 | " h = h.detach().cpu().numpy()\n", 22 | " plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap=\"Set2\")\n", 23 | " if epoch is not None and loss is not None:\n", 24 | " plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)\n", 25 | " else: #可视化图\n", 26 | " nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,\n", 27 | " node_color=color, cmap=\"Set2\")\n", 28 | " plt.show()\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 2, 34 | "metadata": {}, 35 | "outputs": [ 36 | { 37 | "name": "stderr", 38 | "output_type": "stream", 39 | "text": [ 40 | "Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip\n", 41 | "Extracting \\tmp\\ENZYMES\\ENZYMES\\ENZYMES.zip\n", 42 | "Processing...\n" 43 | ] 44 | }, 45 | { 46 | "name": "stdout", 47 | "output_type": "stream", 48 | "text": [ 49 | "ENZYMES(600)\n", 50 | "600\n", 51 | "6\n", 52 | "3\n" 53 | ] 54 | }, 55 | { 56 | "name": "stderr", 57 | "output_type": "stream", 58 | "text": [ 59 | "Done!\n" 60 | ] 61 | } 62 | ], 63 | "source": [ 64 | "from torch_geometric.datasets import TUDataset\n", 65 | "\n", 66 | "dataset = TUDataset(root=\"/tmp/ENZYMES\",name='ENZYMES')\n", 67 | "\n", 68 | "print(dataset)\n", 69 | "print(len(dataset))\n", 70 | "print(dataset.num_classes)\n", 71 | "print(dataset.num_node_features)" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": 3, 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "data = dataset[0]" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 4, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "True" 92 | ] 93 | }, 94 | "execution_count": 4, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "data.is_undirected()" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 9, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "name": "stderr", 110 | "output_type": "stream", 111 | "text": [ 112 | "Using existing file ind.cora.x\n", 113 | "Using existing file ind.cora.tx\n", 114 | "Using existing file ind.cora.allx\n", 115 | "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y\n", 116 | "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty\n", 117 | "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally\n", 118 | "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph\n", 119 | "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index\n", 120 | "Processing...\n", 121 | "Done!\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "from torch_geometric.datasets import Planetoid\n", 127 | "\n", 128 | "dataset = Planetoid(root='/tmp/Cora', name='Cora')" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 10, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "1\n", 141 | "7\n", 142 | "1433\n" 143 | ] 144 | } 145 | ], 146 | "source": [ 147 | "print(len(dataset))\n", 148 | "\n", 149 | "print(dataset.num_classes)\n", 150 | "\n", 151 | "print(dataset.num_node_features)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 11, 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "True\n", 164 | "140\n", 165 | "500\n", 166 | "1000\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "data = dataset[0]\n", 172 | "\n", 173 | "print(data.is_undirected())\n", 174 | "\n", 175 | "print(data.train_mask.sum().item())\n", 176 | "\n", 177 | "print(data.val_mask.sum().item())\n", 178 | "\n", 179 | "print(data.test_mask.sum().item())" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 12, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "Dataset: KarateClub():\n", 192 | "======================\n", 193 | "Number of graphs: 1\n", 194 | "Number of features: 34\n", 195 | "Number of classes: 4\n" 196 | ] 197 | } 198 | ], 199 | "source": [ 200 | "from torch_geometric.datasets import KarateClub\n", 201 | "\n", 202 | "dataset = KarateClub()\n", 203 | "print(f'Dataset: {dataset}:')\n", 204 | "print('======================')\n", 205 | "print(f'Number of graphs: {len(dataset)}')\n", 206 | "print(f'Number of features: {dataset.num_features}')\n", 207 | "print(f'Number of classes: {dataset.num_classes}')" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 13, 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "# 创建Data\n", 217 | "\n", 218 | "import torch\n", 219 | "from torch_geometric.data import Data\n", 220 | "\n", 221 | "edge_index = torch.tensor([[0, 1, 1, 2],\n", 222 | " [1, 0, 2, 1]], dtype=torch.long)\n", 223 | "x = torch.tensor([[-1], [0], [1]], dtype=torch.float)\n", 224 | "\n", 225 | "data = Data(x=x, edge_index=edge_index)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 15, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAGKCAYAAAArGbdLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAABJ8UlEQVR4nO3dd2AUdeI28GfK1hRCSEiA0ELvNRRFQERBioJgPyznWe7Uu3vVs576s5+ed57l7Hqih6cUESmC9CqQgPQivYQW0rN9Z+b9IwYpAXZ3ZrPt+fyZZIcnmuTZmW8TNE0DERGRXmKkAxARUXxgoRARkSFYKEREZAgWChERGYKFQkREhpAv9MmioiLtwIEDdZWFiIhiQO/evU8CyDz74xcslAMHDiAvLy9soYiIKPZomlbrnQYfeRERkSFYKEREZAgWChERGYKFQkREhmChEBGRIVgoRERkCBYKEREZgoVCRESGYKEQEZEhWChERGQIFgoRERmChUJERIZgoRARkSFYKEREZAgWChERGYKFQkREhmChEBGRIVgoRERkCBYKEREZgoVCRESGYKEQEZEhWChERGQIOdIBiIhCZbfbMWTIEFxxxRVITU2FKIpwOBxYvXo15syZg5KSkkhHTCgsFCKKOfXq1cOdd96JAQMGQNM0WK3WU5/LyMhAo0aNMHbsWGzevBmffvopDh48GMG0iYOPvIgopjRu3BhvvfUWBg0aBIvFckaZ1DCbzTCbzejRowdef/11dO3aNQJJEw8LhYhiRnp6Ol577TWkpaVBli/+gEUURdhsNjz99NNo06ZNHSRMbCwUIooZjzzyCJKSkiCKwf3pslqtePbZZ4N+HQWH/3WJKCZkZ2ejbdu2Ad2Z1MZkMqFPnz4Gp6LTsVCIKCaMHj1a1x2G3W7HuHHjDExEZ2OhEFFMGDp0KEwmk65r5ObmIj093aBEdDYWChFFPUmSap3NFSyfz8dCCSMWChFFPZPJBFVVdV9H0zSYzWYDElFtWChEFPXcbrchM7QEQYDD4TAgEdWGhUJEMaGwsFD3NSRJwrFjxwxIQ7VhoRBRTJg2bRpcLlfIr/f7/Vi8eDE8Ho+Bqeh0LBQiignLly/X9XpFUTBjxgyD0lBtWChEFBO8Xi+mTp0Kt9sd9Gs9Hg82bNhgyGMzOj8WChHFjMmTJ2PNmjVBlYrH48GRI0fw97//PYzJCGChEFGM+cc//oEFCxbA7XZDUZTzfp2qqnC5XNi5cyceffRRjp3UAZ6HQkQxRdM0fPDBB1iwYAGuvfZaXHLJJVAUBZIkQRAE+P1+yLKMrVu34ptvvsHGjRuhaVqkYycE4UL/oQsKCrS8vLw6jENEFJykpCT07Nnz1ImNVVVV2Lx5M06ePBnpaHFL07R1AHqf/XHeoRBRTHM4HLpngJExOIZCRESGYKEQEZEhWChERGQIFgoRERmChUJERIZgoRARkSFYKEREZAgWChERGYKFQkREhmChEBGRIVgoRERkCBYKEREZgoVCRESGYKEQEZEhWChERGQIFgoRERmChUJERIZgoRARkSFYKEREZAieKU8Uw9LS0jBs2DD06dMHycnJUBQFZWVlWLBgAVasWAGv1xvpiJRABE3TzvvJgoICLS8vrw7jEFEgcnJycPvtt6Nnz57QNA0Wi+WMzzudTgiCgPnz52PSpElwOp0RSkrxSNO0dQB6n/1xPvIiijFdunTBP//5T/Tp0wdms/mcMgEAu90Om82G4cOH480330RGRkYEklKiYaEQxZA2bdrgmWeegc1mgyhe/NfXbDYjIyMDr732GpKTk+sgISUyFgpRjJAkCc8++yysVmtQr5NlGWlpafjzn/8cnmBEv+CgPOmWndMKmdnNYLHY4fE4UXT0II4V7ol0rLjTr18/mEymkF5rMpnQo0cPpKeno6SkxOBkRNVYKBQS2WRGt7yhGDLydqRnNIKiKBAFEaqmQpIklBQdwaI5E7ExfyH8Ps40MsL48eNht9t1XWP48OH48ssvDUpEdCbO8qKgNWraBvf95d+QTWZYbUnn/Tq3ywGfz4P3X/sDjh3mHYsemZmZeO+992odgA9GWVkZJkyYYFAqSlSc5UWGaNqyIx586mPYk+tdsEwAwGpLQlJyGv7410+R07x9HSWMT5mZmfD5fLqvk5qaakAaotrxkRcFLDUtE/c88jYs1sAfu4iiCIvVjnsf/Tdee/IGVJYXhzFh/NJ7Z3I6SZKgKIph17uYzMxMDBw4EFlZWbBarSgvL8eOHTuwevXqOs1B4cdCoYANGnYLzObgZhjVMJmtGDjsFsye/LbBqRKDw+GAIAi6r6MoSp39Ee/evTvGjx+PDh06QBCEMyYUOJ1OPPDAA5gzZw5mzpyJsrKyOslE4cVHXhQQ2WRGv8FjIZvMIb3eZDKj/+DrIMmhzVJKdIWFhZBl/e//CgsLDUhzYYIg4N5778VTTz2Fbt26wWw2nzM7zW63Izk5GWPGjMF7772H1q1bhz0XhR8LhQLStfcVuq8hCAK69rrcgDSJx+Fw6H5E5HQ6MW3aNANT1e6BBx7A0KFDA1ovYzabkZycjJdffhktW7YMezYKLxYKBaR1+14XHYS/GKstCa06nDMxhAI0ffp03QPzK1euNChN7a666ioMHDgw6MWXFosFL774YtCvo+jCQqGAJKWmGXKd5JT6hlwnEe3Zswe7d+8OaQdht9uNqVOnGjJT7EJuvvnmkEpBFEWYTCYMGjQoDKmorrBQKCB+g7ZB93k9hlwnUb3wwgsoLi4Oqhjcbjfy8/MxZcqUMCYDunbtqmvhpc1mw/jx4w1MRHWNhUIBKSk+CkXx67qG4vejtPioQYkSk9PpxEMPPYQDBw5cdEt6RVHgdruxaNEivP7662HPNmbMGN2PrOrVq4e2bdsalIjqGqcNU0DWrZqDAVfcAEkK/UdGUfxYt+p7A1MlpqqqKjz88MPo168fxo0bh+bNm0PTNJhMJqiqCr/fD0mSkJ+fj+nTp2Pnzp11kqtZs2YB7YB8IZqmoUmTJvj5558NSkV1iYVCATl2eA+Kjh1Ak+btQr7G8SP7cPzIXgNTJS5VVbFq1SqsWrUKOTk56NKly6kTG8vLy5Gfn4+Kioo6zWTEgLokSUhK0jf5gyKHhUIBWzR7Im747V+DWilfw+N2YtGciWFIRYcPH8bhw4cjHQMej/7xMVVV4XK5DEhDkcAxFArYhrXzsXPLang9wf3Cez0u7Ni0CpvyF4YpGUWDI0eO6L6Gpmk4epTjbLGKhUJB+e/7f8WeHevhcQdWKh63C7u2F2DSB0+HORlF2nfffaf77Hqn04lt27YZlIjqGguFgqL4ffjkX/8Pi2Z/BpejEm6Xo9avc7sccDoqsXD2f/CfNx/WPUOMot+6det0rXNxu92YPn26gYmornEMhYKmaRoWzPwUi+ZMROeegzF4+K1o0DAHJrMVPq8bJ48fxtK5/8WWn5ZC5W6yCUNVVUydOhW33nprSAP0qqpi4UI+Fo1lLBQKmaoo2JS/kGMjdMqMGTPQqVMn9OjRI6gt991uN55//nk4HLXf8VJs4CMvIjKMpml49dVXkZ+fH9BsLUVR4HK58OKLL2Lr1q11kJDCiXcoRGQov9+PV199FYMGDcL111+PrKwsmEwmSJJ06mtcLhdEUcSyZcswZcoUzuyKEywUohgnyzK6deuGBg0awGQywel0YteuXRFfm7J06VIsXboUubm5GDp06KkTGysqKrB582YsXryYa07iDAuFKEZlZmZixIgRuPrqqyEIAkRRhCiKp7ZeOXDgAKZNm4Y1a9ZE9KjdvXv34sMPP4zYv091R9A07byfLCgo0PLy8uowDhEF4sorr8S9994LQRBgNp//FE2n04mSkhI89dRTKCkpqcOEFM80TVsH4JzDjTgoTxRjrrnmGtxzzz2wWCwXLBOg+qjd7Oxs/Otf/0J6enodJaRExUIhiiE9e/bEbbfdFtQ6D1mWkZKSgpdeeumMgXEio7FQiGLIb3/726DWd9SQZRnp6eno06dPGFIRVWOhEMWI3NxcZGVlhfx6u93OExEprDjLyyAWaxK69h6C+hnZsFiT4HSU4+ih3di+cSU0TY10PIoD1157LUwmk65rNG/eHDk5ORGfUkzxiYWiU1bjXAwafit69L0KmqbCZLZCFEWoigKv1wW/34/lP3yF1Uu+QVVlaaTjUgzr0KGD7jEQRVHQqlUrFgqFBQtFh0uGjMfoG/8ESZIgyWe+cxQlCVZbMgDgitF3YPDVt+LD1x/Ewb3cXoJCY7PZdF+DJyJSOHEMJUQDh92C0Tf+EWaL9ZwyOZvZbIXNnoL7HnsPzXI71VFCijd+v/4jADRNg9frNSAN0blYKCFo0yEPV1/3e5gtwb1jtFhsuOeRt5GUXC9MySieGbEwUVEULnCksOEjrxAMu+4+mC3Bn/cAAJIko++gMVg0m+erU3Bmz56NnJwc2O32kK+haRo2btxoYCr9GjVqhH79+iE9PR2SJKGsrAwbN27Ezp07Ix2NgsRCCVJGVjPkNG8b8uvNFhsGDrsFi+d8wdlfFJQVK1bgvvvuC/n1Xq8Xs2fPjui+XjUEQUDv3r0xfvx4tGrV6owtZBRFwfjx41FaWopp06ZhyZIlfEwXI/jIK0iXXnE9BFHfTBuTyYJ2nfsZlIgShdfrxQ8//ACPxxPS6zVNw/fff29wquDJsownn3wSf/nLX9CxY8dztpCRJAk2mw2NGzfG3XffjTfffBNpaWmRC0wBY6EEqVluJ8gXGYS/GNlkRqOcVgYlokTyxRdf4OjRo0Gf3e52u/Huu++iuLg4TMkCI4oi/u///g89evQIaNaa1WpFdnY23njjDaSmptZBQtKDhRIkm13/lEtZNsFqTzYgDSUaj8eDp556CoWFhQHfqXg8HkycOBGLFi0Kc7qLu/POO9GuXbugto+RZRn16tXDs88+G8ZkZAQWSpC8HrfuayiKHx43Dxai0FRUVODhhx/G999/D6fTCafTec7X+P1+uN1u7N27Fy+99BJmzZoVgaRnstlsuPrqq4Pa2LKGyWRC06ZN0aZNmzAkI6NwUD5IRccOoknzdhB1jKP4vB6UFvPIUwqd1+vFJ598gs8//xwDBgzAyJEjkZ6efurExm3btmHGjBnYv39/pKOeMnjwYKhq6BNRzGYzxo4di9dee83AVGQkFkqQVi6cgk49BsJiDX3qpiAI2LJ+iXGhKGH5fD4sXrwYixcvjnSUixo7dqyu1f6SJKFPnz5ISkqCw+EwMBkZhY+8grR/9yZUVYS+MMzv9yF/xSz4vKHN1CGKVXp2Sq7h9/uRnZ1tQBoKB96hhGDBzE8x5jd/gSXIlfIAoCoKls//Kgyp9GnSrC0GDL0RjZq2gcVqh9fjxLHDe7F8wdc4vH97pONRjLNYLLjQceOB0jRN18JOCi8WSgjWLp+Jtp36oVOPy4LafsXrcWHqxL/h5PFDYUwXnC69LsfwsfciPbMxJNkESfr1R6JR07bomjcEpcXH8MO3H2HD2vkRTEqxzOv1QhAE3dcRBAFut/6JMRQefOQVoi8/egZb1i8NeLaW1+PGjP/9C+tWzQlzssCNvulPuOWe55Cd0wpmi+2MMgGqn1mbLTZkNW6JG+56GmN/84ghfxQo8WiahvLyct3XMZlMKCoqMiARhQMLJUSqomDSB0/jmy9eRdGxg/C4XVDP2tLC7/PC5/Vg17Z8fPj6A1i95JsIpT3XiPH3o//gcQHfYVksNvS57BqMvunP4Q1GcWvOnDkhr/KvsWPHDpSVlRkTiAzHR146FaycjYKVs9EstxMuGTIeGVlNYbbY4HZW4sCeLVi1aCpKi49FOuYZWrXvhcuuvDHo3ZLNFhv6DRqDn7eswY7Nq8KUjuLV3Llzcf3114f8eqfTiWnTphmYiIzGQjHIwb1bY+bwrCtG3QnZFPhK5dNZrHZcMeoOFgoFraysDGvXrkWfPn3O2LsrEKqqoqqqCj/99FOY0pER+MgrwaSlZyG3XXeIYuj/65u27IgGmU0MTEWJ4q233sLJkyeD2otMVVW43W4888wzhswUo/BhoSSYXpeM0H0NQRSQd9loA9JQonG5XHj00Udx5MiRgMZTfD4fHA4HnnjiCRQWFtZBQtKDhZJgMrOawhTi464asmxGZnYzgxJRoikvL8fDDz+MyZMno7y8vNa9yFwuF9xuN+bPn48HH3wQe/fujUBSChbHUBKMxaZ/t2QAuraeIfJ4PJg8eTKmTp2KXr16YciQIadObCwvL8eaNWuwdOlS3bPCqG4lTKGkpWchKSUNgiDC6ahASVFi3j47qsoMuY6zSv+aAiJVVZGfn4/8/PxIRyEDxHWhmM1W9Og/HENG3IbU+plQ/NUDgZIkw+moxJK5/0X+iplwO6sinLTuHNy7FT37Ddd1h+FxO3FgzxYDUxFRPBAuNGuioKBAy8vLq8M4xul1yQiMv/1xaJp23j+eHrcLoijg+2nvY+m8SXWcMDLMZiuee/uHoNegnM7rdeO5Pw2H28UdX4kSkaZp6wD0PvvjcTkof/mICRh/++MwW2wXfCdusdpgMlsxbOy9GHPrw3WYMHK8XjcKVs2B3x/cEbI1FMWPjWsXsEyI6BxxVyg9+l6Fq669O6h34BarDX0uuxaDht0axmTRY8mcL049/guW4vdh0eyJBiciongQV4UiShKuu+2xkB7nWKw2DB93H6wGzYKKZsVFhZj4zmPweoI7htjrceO/7/0VJ47uD08wIoppcVUonXoM1HU0r6aq6H3pSAMTRa+dW1bj0zcfgcftuOhhXz6vBx63ExPfeQxbNyyro4REFGviqlCGjLhd1x2GxWrH4KsnGJgouu3athavPDYOi+ZMhLOqHG6XA/5ftsTw+3xwu6rgclRi6bxJ+Nvj47l/FxFdUNxMGxYlCTktOui+TlJKGtLSs1BWctyAVNGvsrwYP3z7ERZ89yk6dL0UmdnNYLElweN24OTxQ9i2ccU52/ITEdUmbgrFbk+F4vdBNOvbVkTx+2FLSk2YQqmhqgofZxGRLnHzyEvTVMCAwwQFoXoshYiIghM3heJyVkEU9d9wSbLJsO1JiIgSSdwUiqoq2L29AKrOu4viE4WoLC82KBURUeKIm0IBgCXffw5fkGsrTud2Obhoj4goRHFVKLu25cPtDn1LEEEQsDF/gYGJiIgSR1wViqZpmPTBM/B63UG/1utx4etPXoDf5w1DMiKi+BdXhQIAe3asw1cfPQevJ/BS8XrcmD3lHd6dEBHpEDfrUE63MX8BqipLcMs9z8NmT4HJbIUontud1SvDvZjy2UvYsn5pBJKSkcwWG+xJqdA0DU5H+UW3lCEiY8XteSg1WnfojcuvnoC2nftWzwDTNIiSjP27N2HR7InYsWlV9RoWikmSJKNLr8tx+cjb0Cin9a+HqMkmHNq3HYvnfI5tG5ZDVbnan8go5zsPJe4LpYYgCLBY7RAEEW5XFS70fVNs6N73Soy//QkIgnjePdzcLgcUxYevPn4e2zYsr+OERPEpoQ7Yqo2maXC7HHA5K1kmcWDw1RNw42+fhs2ecsENQa22JCQlp2HC719G30Fj6i4gUQJKmEKh+NGz/3AMGxPcIWpmixVjbnkYHbtfFsZkRImNhUIxRTZZMP72J0I6RM1sseKmu56BIPDHnigc+JtFMaV7n6G6HllKsgkduw0wMBER1WChUEwZMlLfIWpWWxIuH3GbgYmIqEZcrkOJB0nJ9dD70lFo0rwtbPZUuF2VOFa4F/krZqGi7GSk40VEcmo6GmQ20X2d5q06Q5JNp6YYE5ExWChRpkmztrhi9J3o2P0yaKp6xliBz+vBldf+Dru25WPhzP9g/+5NEUxa95KS68Hv90E2mXVdx6/4YE9K5a7SRAZjoUSRvAGjcN2ERyHLZoiSdM7nTb+cRtm+yyVo3b4X5n37IZZ8/9+6jhlhBkz55qxxorBgoUSJ3peOxHUTHg1o9pIoijBbbLhqzD0AkDCl4nRUQJJMuq8jyya4nJUGJCKi03FQPgpk57TCuNseC3oqrMViw7Ax9yC3bY8wJYsuleXFKCs5rvs6h/Zv567SRGHAQokCl199GyQ5tHfessmCodfcZXCi6LVozkS4XaGfeeN2ObB4zucGJiKiGiyUCLPak9EtbwgkKbSnj6IoIrddd6SlZxmcLDr9tHoeBEEI+fWq4sfWn5YZmIiIarBQIqxH32FQde8tJqDvwGsNyRPtfF4Pvv3yn/CGcNSz1+PG5M9e4s7DRGHCQomw7JxcWELYRuR0JpMZjZq2MShR9Fu7bAYWzfk8qFLxelyYPeUdbC5YHMZkRImNs7wizG5PMeQ6VnuyIdeJFfNnfIzykhMYc+vD0ABYrfZav656vEXD15++iE35C+s0I1GiYaFEmNNhzPRVt7PKkOvEkrXLv8P61fPQo++VGDLydqRnNIb/l9XvssmM40f2YfHsidi0bjFXxRPVARZKhB0r3AOP2wnLed5hB8Ln9eDIoZ8NTBU7/D4P8lfMQv6KWUhKSYM9qV71EcBV5XA6yiMdjyihcAwlwn5aPQ9CLefdB2vN0hkGpIltjsoyFB07gJPHD7JMiCKAdyi/EAQBbTrmITunNay2JHg9LpSePIatG5aFdRGc2+XAxrUL0LP/8JCmDquqir07f0J56YkwpCMiClzCF4otKRV9L7sGg4bfCrPFBlk2Ve9Eq/jh93kgCE9j9dJvsWLBZJQUFYYlw+I5X6Bb3hUhFYrf58H8mZ+EIRURUXAS+pFXTosOePK16Rg29h6kpmXAakuCbDJDEATIsglWWzIsVjsuHTIef3nxK/TsPzwsOY4f2Yspn70Mr8cd1Ou8Hhe+/+Z97Pt5Q1hyEREFI2HvUJq27IjfP/Y+LNaLrwGp2S79+juehCybsXb5d4bnWf/jXAiCgPG3PwFJNkOqZbfhGqqqwO/zYs7Ud7F8/leGZyEiCkVCFkpSShrueeTtgMrkdGaLDWN/8whOHN0flrNI1q36HkcO7sKQUXegS8/B0LQzz0Pxet0QIGDH5h+xaPZnOLh3q+EZiIhClZCF0m/w2JAPaZJNFgwbey8++Pv9BqeqdvTwbkx6/6+w2VPQ+9IRaNy0LWzJqXA7KnHsyD4UrJyNqoqSM15jtSejZZtup6bMOqrKsHfnevi8nrBkJCKqTcIViiCIGHjVzTCbrSG9XhRFtGzTDWnpWYZspX4+Lmclls//+oJf06RZWwwcdiu65Q2B3+8/tWmipmkQRRH5K2Zh+fyvcfL4wbDlJCKqkXCF0r5rf8iyviNkIQCXDBmHOVPfNSZUkERJwk13PYsuvQZDkk2QJBm13XD1GzwWfQZegxULJmP25LfrPigRJZSEK5TGTdvAbAnt7qSGyWRBi9ZdDUoUHFGUcPdDb6FF6y4XPZBLlk0ATLh0yHikpNbHVx8/XzchiSghJdy0YXtyGkTx/DOoAmU1aFPHYF034dGAyuR0FqsdXXsPxZCRd4QvGBElvIQrFI+rCpqm6r6OL8g1I0ZITctA70tHBn1UMABYrDYMHX0nTGZLGJIRESVgoZQWHw96AeHZVFXByeOHDEoUuEuGjNd9jR79hhmQhIjoXAlXKJvXLdK9GaPP68GqxdMMShQYUZRw6RXX67rDsFjtGDLidgNTERH9KuEKpXozxvlQFH/I16gsL8aBPZsNTHVx9Rtkh3zu/OkaNMwx5DpERGdLuEIBgCVzJ0Hxh1YoHrcLC2f9x+BEF2ezpxhyFrri98EWoQkFRBTfErJQjh3eg9lT3g7qTHIA8Hrc2L5pBdYunxmmZOfn9/tOLVzUQxBF+P3h246fiBJXQhYKAKxYMBnzvv0o4FLxuF3YvmklvvzgmTAnq11lRbH+BZm/8LidhlyHiOh0Cf0wfcn3X+DIwZ9x9bjfIzunFSRRhiT/+p9EVVX4vG44qsqxcOZ/sHrp9IhldVSW4dD+7WjZplvI11AUPzbmL4CmaQYmIyKqltCFAgA/b12Dn7euQcNGLTDgyhvRrGVHWGxJ8HrcOHn8EFYunIK9O9dHOiYAYPGcz9HonudhtSWF9HrF78PSuZMMTkVEVC3hC6XGiaP78c3nr0Y6xgVt37gSPp8npEJRVQXFRYUoPLAzDMmIiBJ4DCUWqaqCz976S9CTCYDq0x0nvvN4GFIREVVjocSY/bs3YeI7jwdcKqqiwOWsxPuv3Y+iYwfCnI6IEhkLJQbt2LwK77x8N/bt2gSv1w2/33fO1/i8Hvi8Huzalo83np2AQ/u2RSApESUS4UIzfgoKCrS8vLw6jEPByshqigFDb0TnnoNgtSVD01S4nVVYv3ouVi2ahvLSE5GOSERxRtO0dQB6n/1xFgoREQXlfIXCR15ERGQIFgoRERmChUJERIZgoRARkSG4Uj5BCYKI9l37Y8DQm5DRsAlMFis8bicKD+zEsnlf4uDerZGOSEQxhoWSYARBwMBht2DIiNsgmyznbOOSkZmDjt0uQ0VZEeZMexeb8hdGKCkRxRoWSgKRTWbc8cBraNW+J8wWW61fI0oSLJINmdnNcPPvnkXTlh0xe/LbdZyUiGIRx1AShCAIuO0Pr6BV+17nLZOzmS02XDrkelx57d1hTkdE8YB3KAmiz2XXoHXHPJgt1qBeZ7HaMGTEBOzYtDKmt29JTk1Hv0Fj0aHrJbAlpUBR/CgvPYE1S2dg60/LDDlemSjRsVASxBWj7oAlwDuTs8myGYOvnoAv3n3C4FThl9U4FyPG/x7tOveHBg1m86+F2rhpG7Rs0x2q4seKBZOxcPZE+H2eCKYlim0slATQonVXJKemh/x6UZLQqftlSEquB0dVuYHJwqtd5364/YFXYTJbIIpSrV9TMylh8IgJ6NxzEN5/7Q8x9T0SRROOoSSAfoPHwmS26LqGpqno1udKgxKFX8u23XHHg6/BYrWft0xOZzZb0bBRC/zhiQ/PuIshosCxUBJAembjgP6oXojZYkNaepZBicLLZLbgrj//M+DJBzVkkxkNMptgzK0PhykZUXxjoSQAo95xW6yhjcHUtR79hoVcoCazBT36Dw/pmGWiRMdCSQBOR6Xua6iqiqqKMv1h6sCQEbfDYrWH/HpNVdH70pEGJiJKDCyUBLB353p4vW5d1/B6XDh8YLtBicInO6cVUtMydV3DYrVjwNAbDUpElDhYKAlg9dJvIUDQdQ2/z4Mdm340KFH41G+QDVX1675OalqGAWmIEgsLJQFUVZRg55YfoapqSK/3etxY+sP/oGmhvb4umc02QGd5AoAkm/SHIUowLJQEMe/bj0JatKeqKnw+N9YsmR6GVMZzOSsBnP9Y60D5dD4iJEpELJQEceTgz/jyo2fh9QT+h1JVVfi8brz/2v0xs9jvWOFeyLJZ/3UO7zUgDVFiYaEkkM0Fi/H5u4/D43ZedJDe43bC6SjHWy/+FkcO/lxHCfWrKCvCvl0bQ368BwBulwOLv//cwFREiYGFkmC2b1yJlx8di4UzP4OjsgxuVxW8Hhf8Pi88Hhd8Xg9czkocPbQbG9cuRKt2PWFPqhfp2EFZPOdz+DyukF+v+H3YvnGlgYmIEoOgaed/3lxQUKDl5eXVYRyqS4Igom2nPshp2QHtO/dH09yOUFUNltN2JPZ6XBAEEVt+WorFcz5H4YGdEUwcGEEQ8PAL/0NmdjPIQQ6uezwuzJ7yDlYumBymdESxT9O0dQB6n/1x3qEkME1TIQgirhh5B5q16gyTyXJGmQDVW66YzBZ0630FHnjyIwwYekOE0gZO0zR88Pf74XRUQPH7An6dx+PChjU/sEyIQsRCSWDtu1yC2x94FRar/aLv5EVJgtliw8jrH8DAYbfUUcLQVZYX441nJ6Dk5FF43M4Lfq2qKPB6XFi9+BtM+c9LdZSQKP5w+/oEVb9BNm67/5WgD9wyW2y4+rr7UHhgB/bsWB+mdMaoKCvC60/fjO59rsSQkbejfoNsCKIEWTZBU1X4fB6IkoStPy3H0rn/xcG9WyMdmSimsVAS1IChN0KUQvvfb7bYcNWYe/De3+4zOJXx/D4vClbORsHK2chp0QEt23aH3Z4Cv9+LyooSbF2/NGamRBNFOxZKApJN5uozUkyhr9dontsZ6RmNUXLyiIHJwuvw/u04vD/69yMjilUcQ0lAXXoO1n8RQUD/IeP0X4eI4gYLJQFlNW6pa3t3ADCZzMhp3t6gREQUD1goCciWXA+CoH8DRZs92YA0RBQvWCgJyOWo1LU1SQ23y2FAGiKKFyyUBFR84rDu3XQVvx/Hj+wzKBERxQMWSgLaVLAQgqDvf72i+rFq0TSDEhFRPGChJCCP24n1q+dC8Yd+suHxwn04foRbvBPRr7gOJUEtnTsJPfsNgyQH/yPgcTsxf8bH5/28KcmGhh1awZKSBFVR4CqtwIltu6Ep0X/iIxGFjoWSoE4c3Y+pE/+G8bc/DrPFFvDrPG4XflzyDbZuWHbO59JbNUPXG0cg9/K+UHx+CAKqD08UAFVRsfWbH7BtxkK4issM+z6IKHqwUBLYulVzIEkyxv7mL5BNZojihZ+Aejwu/Lh4GmZNfuuMj4smGUOevh9N+3WDJMsQZQmy5dxV+N1uHoVuN4/Cmve/wtZp8wz9Xogo8lgoCW7t8u9w9PBuXHnNXWjbuS80TYPZ/OuGkX6/D6qq4MjBXVgw85NzDp4SZQmj/vUkGrRpAZPVcsF/q6Zk+txzA6xpKVj3yVTjvyEiihgWCuHQvm349M2HkVKvAfoOHIOmLTvAlpQCj8uJE0f348cl03Hy+MFaXzv4yfsCKpPTmWxWdL1xBCoOH8OueSuM+jaIKMJYKHRKZXkxFsz8JOCvr9e0EVpc1rvWx1sXY7Ja0O/+W7F7/kpo6vlPDSWi2MFpwxSyzuOHQbjIuMuFSCYTmvbtblwgIoooFgqFRLKY0Xb4QEim0G9yzUk2dLtllIGpiCiSWCgUkvotmkBTFN3XyeyQa0AaIooGLBQKiSUlCZqmf+xDkmUIEn8MieIBf5MpJKov9G1bzsYV9ETxgbO8KCTOknKIIWzbcjavw2VAGjpbkl3ELaPSMOyyFGSkyfD6NBw44sXE6aVYsY7HDlB4sFAiIKtzW3S9aQSa9OoM2WYBNA0+pxt7l6zBlilzUbq/MNIRL6r80FE4i0tRLyc75GsoPh/XoRisWWMTnry3IX5zTTpUTUNKknTqc6qq4cYRaThZquDVj07g4ynFMGAYjOgU4ULPwQsKCrS8vLw6jBPfmvTqhMseuQu29HqQzGaIZ40dqH4/VL+C0gOFWPLyByjddzhCSQPTftTl6P/ArTDZA98L7HR+jxdT73gcFYXHDU6WmPp1t+P7j3ORZBVhMl34RE6HU8HazS6Mvm8fHE4+cqTgaJq2DkDvsz/OMZQ60nb4ZRj2ysNIbZIFk816TpkAgCjLkK0WNGjTAmPeew6NuneIQNLA7V6wCkBoRwmrioKTO/exTAzSvYMN8z/NRVqKdNEyAYAku4R+3eyY90luQF9PFAgWSh1o1r8HBjx0J+QAtycRRREmuxXD//YI0nObhjld6PxuDxa98G/43Z6gX+tzurDoxXfDkCrxmE0Cfvg0F3ZbcL/ONquI7h2seOWh0B9bEp2OhRJmktmEIc/cH3CZnE62mnHF/z0YhlTGObByPVb88z8Bl4rqV+CpqMKsP72MqmMnw5wuMYwfXg8WswBRDP5OI8km4d6bGsBm5V0K6cdCCbPcwX1DfSoEQRSRnJWBzPbRvfjv57nLMffx11G6vxA+lxtqLSO9fo8Xfo8Xheu3YtrvnkLx7gMRSBqfHru7IVKTpYt/4XloGnDTyPoGJqJExVleYdbt1tEwhzhoDQCSWUaXG67Gouf/bWAq4x1Zvw1TbnsUGe1aoutNI9GkVyeYbFZoqgpPpQO75q3Atm8XwFFUEumocaV9rgWtmgV/93u6lCQJD92Rgf9M4/8b0oeFEkZJDdOR2rihrmuIkoSWA2Nnpt3Jnfuw6Ll3Ih0jJBazgPHD6+GR3zZEm+ZmWC0iPD4NR0748ObEInz+bSkqqqJrRlSrZmb4fBoQ+nsWAEDTRsHvGE10Nj7yCiNbWioUA1aUC5IIyWwyIBHVRhCAZ+5viBM/dsK7z+agewcbkuwSJEmA3SqidTMLXnm4EY6u7IQPns+B1RI94w1JNqn6qGWdoul7otjFO5Qwql5Jrn+/K03VIJpkKF6f/lB0BlkGpr3dAkP6JSPZfv5xiJrP/eaaNOR1tePyCbtRXhn5u5VKh2LInmouN8+kIf14hxJGnkqHrvNCaoiSCJ/TbUAiOttnrzTDFf0vXCans9skdMi1YO4nuTBHwfqNHXs9sJj1/4ztOhD81G+is7FQwqjiyHGofv17WxT9vL96Kg4ZavjAFFw7NBVJtuBmSFktIrq0teH+3zQIU7LA7TvsxU/b9O2HVlGl4O+fnDAoESUyFkoYaYqKLdPmwe/xhnwNr8OFjZNmGpiKajz2u4YB35mcLckm4uE7GxoyfqHXqx+fQEVV6G9cVBX4dkGFgYkoUcXEGIrZYkPPfsPRd9C1SElNhyhJcLsc2L5pFVYumIySk0ciHfG8tn+3CN1vGR3y6zVVxf4V6wxMRADQvIkJfbvZdV0jJUnEkH7JWPhjlUGpQjN7SQVKyhXYrQJkObj3iFVOBa9/cqJ6phiRTlFdKDZ7CkZcfz96XzICmqbBYv31D0C9+kB6ZhNcOmQ8Du7dgu++ehOH92+PYNrauYrLsG3GQnQYfTlMNmtQr/W5PPjx3/815GREOtP4YWkIYWH5GZLtIu4clx7xQlEUYMhte7BuehukJgmQpMC+MYdLxYJVVXj5Az7uImNE7SOvtPQsPPT8JOQNGA2zxXZGmdQwmcwwmS1o1b4X7n/iA3TuOSgCSS9u9b8n4XD+FvhcgQ+s+1xubJ7yPX6esyyMyRJXkywTLBZ9P/6iKCAnKzqmc+877EW/G3bjRIkfTtfFZ59VORR880MZrv/Tfg7PkWGislDsSfXwwFMfo179TJhMgS24MltsuPXeF9GmQxQuAtQ0LHjmX9g5Zxn8Hu8Fp//63R74PV6s/fBrFHw8pQ5DJhaL2ZjBj2iY6VXj530edByxE8/9+xiOn/SdM67i9apwuVUsy6/CTQ8dwG2PHoLfuIM3iaLzkdf1dz6JlNR0SFJw8cwWK+7449/x3J+Gw+uNrmm2mqph1ZsTsXnyHHQadxXaj7ocULVTawgEUYTi9WHz5O+xY9ZiuMsrI5w4vh0/6YeqaiFtqHi64rLo+otcVqHgtY+K8PePizDsshQM7J2ErAwZLreGQ0d9mDK3DHsPhT5JhOhCoq5QklPT0aHrpZADvDM5myAI6N7vKqxd9p3ByYxRebQIq9+ZhPwPJ6NB6+awpCRBU1W4yytRvPsANJXPH+rCsoIqOFyZZ5xoGKwqh4JZS6Kz+DUNmLusEnOXRWc+ik9RVyj9Bo2FpmN1ucVqx5ARt0dtodRQvD6c2LY70jES1pI1DpRVKLoKRRQFTJpZamAqotgWdWMo/S8fC7M5uNlQZ0utn4msxtG95TtF3t8/KYLDFdoMOp9fw6SZpahyRH77FaJoEXWFkpyi/1wGVfGjXv0MA9JQPPvPNyUoKlHg9wdfCg6Xipfe5/HFRKeLukIRpdAfQZzOpPMuh+JflUPF4Am7UVqhwhdgqaiqhiqnguF37cWBQm7WSXS6qCsUn9eYTepcDg5G0sUdKPShx5ifseegFxVVCtQLTIqoqFJQVOrHgJt3Y81GZx2mJIoNUVcoRw7u0n0N2WTGscK9BqShRFB43IeOI3Zi/B/3Y9HqKrjcKsoqFJRV+FFWUb1QcN1WJ37310PIGbgNG3dE15R0omgRdbO8Fn//ORo1fR5WW1JIr1cVBds2rIDTUW5wMopnmgbMX1mF+Sur0LihjLYtLUhNluBwqthf6MWeg1G6dkMQkNW5DZIbNoBsMcPrcOLkrgOoPMLtVKjuRV2hbN+4Eoo/9GfTPp8XS+b+18BElGiOnPDjyInoWrB4NktKEtqNHISuN46EbK0+U14QBWiKCtEk4+TOfdj4v1k4+ONPXNtEdSbqCkVVFcz55j1cc9OfYbEEd1C2z+vBkYM7cXDPljClI4q85pf2xJBnHgAAmGyWWr8mu2s7pLdqBmdxKWb96SU4i8vqMCElqqgbQwGA1Yu/Qf7y7+DxBH5wkN/nRXlpET5+48/hC0YUYa2u6I8hzz4Ak81y3jKpYU6yIaVxQ1z3ycuwZ+ifjk90MVFZKAAw/b+vY+nc/8LrcUO5yA52bpcDRw7twr+euw1ul6OOEhLVrYYdW2PQY3fDZL1wkZxOkmVYUpMw+s2/QjRF3QMJijNR/RM2b/qH2LBmPi676mb06n81VNUPUZQhCAIUVYEkSji4dysWf/85dmz6EZrGVcsUv/red9Op8ZJgSLIMW4M0tBzUB3sWrApDMqJqgnaBwxAKCgq0vLzo2A7ebLGhfZf+SE5NhySb4HJUYO/PG1BSVBjpaERhl9IoE9d//hpkS2ibpgJA8e6DmPbbJwxMReGWmizitjH1cd1V9ZBeT4aiajh20o/Pp5fim/nlETtpU9O0dQB6n/3xqL5DOZ3X48KmgkWRjkEUEZ3GXQVB5wH2qTlZSG/VDCV7DhqUisKlWWMTnnswGzeOSIOiaki2n7mDyICeSfjg+Ry897+TePn9E6iMkj3lonYMhYh+1aRnZ0hm/adDZnVqbUAaCqfeXWzYMKMtfnNNfdis4jllAgCpyRLqpUj48+2ZWP9tWzSJkpNDWShEMcCcHNwU+tpIsgRz8rlHaVP06NjagoWftUL9VBmyfPE7UqtFRPPGJqz8qjXq1zNmH0Q9WChEMUDx6V9oqaqaIdeh8BAEYO7HuUi2B/dn2SSLyM6Q8flrTcOULHAxM4ZClMicJ0uR1rSRrmuoPj8XOOqUlSHjtjH10am1FanJEkor/Niw3Y0vZpSirCK0s3VqDLssBfVSpJCOpbaYRVzRPwVNskwoPB65XbBZKBRWyVkZyGzfEuZkOxRv9R+0oxu3Q1OiYxAxVmybsRAZbVvCnBT6oy9REnHwx58MTJU4+nW344l7G+LKS1OgaYDd+utdhMOp4NVHGmHaD+V49aMT2PJzaJuHPvq7hkhN1vHYSgP+cEsDPPXGsdCvoVPMTBumGCIIyMnrgm43j0JW5zZQfX4IkghN0wBNg6qo2PrND9g2YyFcfMccEFGWMOG792EJcQxE9fuxY/ZSrPjHpwYni38P/zYTz/0xGzaLcMG7B79fhccH/O6pQ/hqdllQ/0bDBjL2L+4Am0XfKERxmR8ZfbfqukYgYn7aMMUGS2oyRv7jcaQ2zYbZ/su76VrWTnS7eRS63TwKy//xH+yau6yOU8Ye1a9g2/T56HLD1SGtRVH9KrZMnRuGZPHtL3dl4pkHspBku/gfelkWIcvAxy81haoCk78vC/jfadrIBI9Xw0V207mo+qkSRBFQI/QAgIPyZBhLajKu+/gl1G+Z82uZnIdsMUO2mDHgoTvQadywOkoY29ZPnI7yQ0eDHlj3udxY99k0lB04EqZk8WlgXhKefTC71mm7F5JkE/HpK03RLjfwdrBbxeozFHRSFMAeQPmFCwuFjCEIGPnPx2FPrxfUegmT1YK+996Ipn27hTFcfFC8Psz688soO3gEfk9g57P4XB5smToPG7+cFeZ08eeZ+wO7M6mN2STg4TszA/76iipF98JVABAlwOGM3PgkC4UMkZPXBak52SEtvpOtFvS7/9YwpIo/nooqfHvfs9gxawl8Lg+8znN35FZVFT6nG5XHirDs1Q+R/9HkCCSNbc0am3BJj9AO+QMAkyzg1tH1kZwU2J/YPYe8MJv0F8r+Qq8RNzoh4xgKGaLbLaMu+pjrQpKzM5DRriVO7txnYKr4pHi8WPXmRKz94Cu0uqI/Oo+7CvaM+pBMMnwuT/XhWl/NxrGNOyIdNWbdc0MDhDB79wyqpuHGq9PwydSSi35tlUPF5O/LcOvoNMhyaO/zqxwK/v5xUUivNQoLhXRLzspAVqc2uq4hmWR0vXEEFj3/b4NSxT+/24Ods5dg5+wlkY4Sd7q1t8Kic8ZVsl1Cx9aBj6O88VkRxg9PgxziX2VRFDBpZmloLzYIH3mRbpntc6Fe5MyaixElCdld2xmUiEifeinGbGOSXi/wdti4w438TU64PcGPgVQ5Ffx70klURXiTSN6hUEDSWzVDg9bNYE6yw+/xoOrYSRz5aRs0VYM52Q5B1P/exGSzGpCUSL+KKn2r3muUlAf3RuvaP+zD+ultkZNtgsUc2O+Uw6VgxToHHv/H0VAiGoqFQuclmU3IHdwX3W4ZhZRGDaFpKkRJgqao0DQVis+PLVPmwlPpwIUWyAZK5T5TFCU27nBj6CUpAf9Rr02VU8H2PZ6gXlNRpaLP9buw4LNWaN3MjJSk898pqaoGp0vFrCUVuO2xQxFbe3I6FgrVKrVJFka99VeYk2wXHGzvMeFaQBAMKRRXabnuaxAZ4cPJxfh/dwQ+7bc2oiDg6zllQb+upExB3+t3YdxV9fDY3Q3RpoUFkohTs8DcHg2iCCwrcOD1T05g/soqXTmNxEKhc9TLycaYD56HyW6FKF34WXLNkbR6C8XncmPrtwt0XYPIKAcKfVi90YnL+yaH9HqfX8NXs0tDPvjK59Pw1ewyfDW7DF3aWTG0fzLS02T4fBpOlvrx3aIKHD4WuU0gz4eFQmcw2awY9dZfYbLbIEqB3+4Lv9ylhLo4SxAE7P5hZUivJQqHF949jj5d7SEtbvT6NLz+qTFTeDfvdGPzztA2nKxrnOVFZ2h91aUwJwVXJnr5vV7sXrAKPlds/NJQYli8ugovvXscDmdwA/QOl4p7nj4U9PhJPOAdCp2h+62jQ55tFcpdiuL3w3GiBD++Mymkf5MonF758AQUVcMzD1x8t2FF0eD2arjvmUP4cmZZ3YWMIrxDoVOyu7aDNTVF1zVURQl4TYrf60XVsZOY+eAL8NWyhQhRNHjt4yIMu2sv5i6vhMujwuU+c1zE4VLgcquYMrcMl960C//9riwyQaMA71DolMwOrSDK+hZ0SbKM0gOFAIDkhg0gmU3nDOz7nG4IooDdC1bhx3cmsUwo6q1c58DIe/ahcUMZd1yXjk5trKiXLKKkXMFP212YOL0UJWXGrF2JZSwUOsWcbNddKDWmTHgUme1z0eWGq5HdtR1MNitUvx+u0gps+3YBds1bwTETijlHTvjx8vsnIh0jakV1oaQ0yoS1XgoEUYSn0oGKwmPQ1AhupRnnFI+3egxE93WqpzMW7djLvbmIEkjUFYpstaD10EvQ7ZZRSMqoD8WvANAgihL8Xi82T5mLHTMXw11WEemoccdxshR+j1fXrsEAUHX8pEGJyGiCKCCnT1d0u3kUGrRuDtlqqb5zLCnH1unzsXPOUnirnJGOSTEqqs6UbzWkHwY+djegaTCd54+a3+0BBAGbvpqNgk+m1lm2RGBKsmHCt++GdMRsDa/ThQXPvIXDazcZmIyM0G7UYOT97gbIFjPMSef+fvlcbgiiiL1L1mDlGxM5tkXndb4z5aNmllencVdh4OP3wGSznrdMgOo7GNliRpcbrsbgJ++tw4Txz+dwYe/i1VB07Bzsd3twOH+zganICP3/OAGXPDgB9vR6tZYJUL2oVbaYkTu4L677+EXY0uvVcUqKdVFRKC0u640+994EkzXwswNMNitaDuqDXneND2OyxLPp6++h+kObreJze7Dpq9mGnI1Nxul15zi0Hzk44PVFssWMlOwMjH7rr5Btgf9OEkW8UARRwICHfxtUmdQw2azodtNIWNNSw5AsMZXsOYj1E6cHPQPL7/GiaMdebJ4yN0zJKBT1c5ui280jg16sKsoykrMy0Jtv2CgIES+UnD5ddT2zh6ahw+jLjQtE2DhpJrZMnQefO7CtI/xuD4p27sPcx/4OTYmCPbTplC7XD4cQ4lRw2WJG+1GXQzKbDE5F8SrihdLt5lHnfaYbCNlqQefrh0PQewA0nSH/o8lY8tL7KD98DD6XG2otReF1uOCpdGDj/2Zj1p9egt+VeHsXRTOT3YbWV/SHFOqZsr/IHdzXoEQU7yI+bdiIY18lswn1mjZC2YEjBiSiGvuWrsW+pWvRsGNrdLlxBDLbtoDJboPf40XV8ZPYMm0e9i9fB03hCuFo1Kxft1rfCATDbLehwzVDsOuHFQalongW0UKRLGbAgPFbTVVhSQnt3AK6uBPbdmPhs29FOgYFydYgzZCdDzjbiwIV2UdemgaEeH7GOZeKhvMviaKIKEkQRP2/4qLOR2aUOCJaKIrXZ8jjElGS4S6vNCARUfzwVjkD3vn5YtchCkTEB+UPrFoPVWepuErLUVF43KBERPHh2OadIZ+gWcPv9eFQPnc9oMBEvFA2fTUHijf0s5F9Ljc2fjnTwERE8aHswBGU7Dus7yKahm3fzDcmEMW9iBfKiW274TxZGvoFBAG75vMscqLabPxyFrw69uQ6vuVnbvZJAYt4oQDAohfeDXgR3el8bg+W//0Trn8gOo/9K9ah6mgRFF/wYyk+twdr3vtfGFJRvIqKQinasRcLnn4zqFLxuz0o+GQqdvPuhOi8NEXBrP/3Clyl5UE9Wva7PVj8wrs4+fP+8IWjuBMVhQIAh9ZsxKw/vXjBldlA9epsV1kFFr/8PjZ/PaeOUxLFHndZBabd9SRK9x2G1+mCeoEp9l6nG16nC/Oe/Cf2Ly+ow5QUD6LqPJQaDTu1RrebRqLZJT0hiEL1KYKCiONbd2HjlzNxaPWGgE9urN8yB13GD0dW17YwJ9mheH1wnCjB1m/nY/+y/JB31iWKRY16dES3m0eicc+OULw+CBCgQYMoSXCVVmDjlzOxe/4qHs9MF3S+81CislBOJ5lNECUp6B/wJr06oe/vb0Fas0YQZfmcFcNehwvQNGydPh/rPvsGagjPmIlila1BGtJzm8KSbIff7YWjqATFuw9EOhbFiPMVStQvgVW8PigIblpxx7FXot/vb4Z8gS3xazak7HL9cOTkdcHsh17hAi5KGK7iMhQWl0U6BsWZqBlDMUqb4Zeh70XK5HSy1YL03KYY+c8nIJqivl+JiKJWXBVKUsN0XPbQnUEf1iWZTajfogl63XldmJIREcW/uCqUjmOuDHmzSdlqQacxV/IuhYgoRHHz11OUJXQcM1Tf6Y+CgJYD87Bn4Y/GBaOEkdmhFZIy60Mym+GtcqJ49wF9u0AQxZi4KZQmvbvo3gnfnGRDp+uuZKFQwMzJdrS9eiC63jSyeqKHqgFC9ckMkknG0Q07sPF/s3Bk/dZIRyUKu7gplKTM+oac/ZCUkW5AGkoEzS/tiSHPPABAg8lmrfVrcvI6I6tLG1QcOobZj/wNnvKqug1JVIfiZgxFMpshSPq/HclsMiANxbtWV16CK559ACab5bxlAgCCKMJst6F+yxyM+/hlWOul1GFKoroVN4XidTih+vWf2qhnZ1ZKDFmd22LQX34X8NR0oPqNii09FSPfeMKQNz5E0ShufrJP7twHQdQ3iKL4/TixdZdBiShe9fvDLUGVSQ3JZEJK44Zo1r9HGFIRRV7cFErpvsMoP3RM1zVUv4LNk+calIjiUb2cbDRo0zzk15vtNnS7ZZSBiYiiR9wUCgBs/HJm9R5dIaooPK57PyPJbArp3SvFhk7jh+me/JHRpgVSc7IMSkQUPeJmlhcA7FuWj7y7b4BsMZ+zGeTF+HUcJpTZIRddbxyJFgN6VT8f1wAIwPEtu7Dxy1k4tCbw3ZEpuuX07gxJ5+JXVVGQ1bktKg4fNygVUXSIq0JRfX7M/OOLuO7jl2BJtgdcKj6XBwUfT8bhtZuC+vcy2rbAkGfuR1Jm+qldkU/XqFt7NGjdHIrPhxX/+A/2LV0b1PUp+tRsKqqHKEmwJNsNSEMUXeLqkRcAOE4U45vfPYWqE8UXnbHl9/rgd3uw6q2J2DwluLGTJnldMPrtZ5DWrDFMNus5ZVLDnGSDLS0Vlz91H7rcOCKof4OijxHn52iaFtKRvETRLq7uUGo4ThRj8oS/oOWgPuh+yyikNskGoEGQJGiqCs2vAIKA7TMXYes3P6Dq2Mmgrp/RtgWuevH/wWQLfKxEtlrQ+67xcJdVYNe8FUF+R/FFlCVYUpIgyjI8lQ74gzj6OdKcxWVIzsrQdQ3Nr8BVUm5QIqLoEZeFAlQ//tqzYBX2LFiF9NymyGyfC3OyHX6PF86TpTi0dlPIh2pd/vQfgiqTGiarBQMe/i32LytIyBPxak4LzOndBaqiQNM0SLKMiiPHsWHSTOxZtBqKxxvpmBe0/btFSGvRBGZ76I++BEnEoSAfrxLFgrgtlNOV7D2Ekr2HDLlWZvtcJDdsEPoFVA2tr7oU22csNCRPLMjs0ApDn/sjLKlJMFktEETxjPGttGaNccmfbsOlf74d+R9Nxpap8yKY9sJ2L/wRl/zptpBfr/j82DlnWdQXJ1Eo4m4MJdy63DhC1/YsJrsV3W8ZbWCi6JbTpytGvfkUUrIzYLbbzjvl1my3wWSzIu/uG3DJH0P/gx1uisdbXQje4E4RraGpKrZM41onik8slCC1GNDrvAPwgbKmpaJeTrZBiaJXRruWuPKFPwd14JnJZkW7kYPQ/TfXhDGZPvkfTUbV8WIo/uAemfpcbhR8OpXThSluJcQjL6OIsqR7DQIAqIof1rQUlB/Wt7L/fKxpqWg1pB+SszNgslngKqvEyZ37cPDHn6Ap+vc7C9Sgx+8JbazJZkXP28di55ylUTl47XO6MPOPL2D0O88gKbM+ZPPFz+DxuTzYMnUeNv1vdh0kJIoMFkoQBFGEpmnQeezKqWsZrWGn1uh2yyg07dMNmqadujPQNA0+pxuqX8GWafOw7dsFcJdVGP7vn65B6+ZIbaJnNbiGDtdcgfWffWNYJiM5i8vwze+ewiV/vA2thvSDpmrnlKemqvC7PfBUubDmg/9hz/xVEUpLVDdYKEFQvL7qk5N0EkQRnkqHAYl+1evOceh608hfFlieWVaCIJxakNf91tHoMn4YZj/0Ck7+vN/QDKfrcsNwSHLoP16yxYLO44bhpy++rdO7qmD4HC4sfeUD/Pj2F2g7/DJ0HHslbPVTIZlM8LncOLlz3y+Ha22LdFSiOsFCCdLRTT+jSc+Ouq6h+hWUHTxiUCKg7+9vRscxQwN6vCRbzJAtZox++2l89+ALKA5TqTS/tFfQ29+cTZQlZLRpgaIdew1KFR7eKie2TJ0X1bPTiOoCB+WDtOl/s3SdmeL3eLBl2jzD3nW3HNwHHcdcecFDnmojW8wY+Y8nYNKxnuJCTPbg8tRG01RYUpMNSENEdYGFEqRDazfpXNktGLoGJe/uG0Ia+BZEEZJZRpthAwzLcuY/YMBIk1b9uI6IYgMLJViahuWvfwpfCKXic7mxZeo8w2YuZXZohaSM+iG/3mSzotvN4Tmbw2/ATgCCIMBdwTPYiWIFCyUEB1asQ/6HXwdVKj6XGwdWrsfaD74yLEeXG67WtcgSACypScju2s6gRL86uHojVEXfRoqapqF4135jAhFR2LFQQrRl6jws+9uH8Lk98F1gTMXv9sLv8WLLtB+w6Pl/G5qhQetmuhdZCoKAtOaNDUr0q81fzwl5NTkA+D1ebPt2gSG7+xJR3eAsLx32LFqNAz/+hNZXXIJut45GUoM0KH4FgAZRlKD4fNgydS62z1wclgV6wQ7E10aUZUPO+Dhb0Y69cJwo0VVW275dYGAiIgo3FopOfpcHO2Ytxo5Zi5HSKBPWtBSIkgRPpQPlh4+FdQ2F34ANBlVFgc8Vnu3jl772EUb84/Ggtl4BfhlrmvYDHEUlYclFROHBQjFQ5dEiVB4tqrN/r+LwcdRrkqVr1b2mKGHLfHzzz1jy0vsY/NR9AZeKz+XGvqX5yP/w67Bkqmv1c5uiXk4WTDYrfE43yg4eRdmBwkjHIgoLFkoM2zJ1LrK7tdN1NofqV1C4bouBqc60b+laeCqrMPS5P17w8ZrP5YYgitj01Rys+8+0sOWpC5LZhNzL+6LbLaORkp0BVVEhiAI0VYMoSagoPI4NX87EvqVrQz6ThygaCdoFthIpKCjQ8vLy6jAOBUUQcOu0t0OeOux3e7Fh0gysn/itsblqIUgimvXvge63jEZGuxbVR+Bq1avhPZUObPpqFnbOWQZvlTPsWcKpQevmGPHPxyGZTRcseq/DBb/Hi9l/fgml+3nHQrFF07R1AHqf/XEWSozrNO4q9LnnxpAG6H0uN7666f/BVRrejSLPZk62w5qWAumXI4CdJeWG7JEWaZkdcjHqjacgW80BPYZUVRV+lwczH3wBxbsP1EFCImOcr1A4bTjGbf1mPgoLtgS90NLv9mDBs2/VeZkA1XtfVRw+jtL9hXAWl8VFmSRlpldPQLBbAx7TEkURJpsFI//1JGzp9cKckCj8WCixTtOw4Nm3cGj1hoDOqVcVFT63B4teeBeHVm+sg4CJoetNIyFbQtsCR7aY0XncsDCkIqpbLJQ4oPoVLHjmLax44zOUHTwCn8t9zip1n9sDv8eL/csLMOP3/4f9ywsilDb+SGYT2o8cHPLha7LFjI5jhkLQuUiVKNI4yyuO7Jq7HLvmLkdm+1x0GD0EKY0bQraa4amowrFNP2PHrMVwl1dGOmbcyb28LzToe2wniCJaDuyNvYvXGJSKqO6xUOJQ0Y69UX+GSDxp0quTrqnbAGBOsqFR9w4sFIppfORFpJO1Xooh17HVTzXkOkSRwkIh0kkxaHGiEVvpEEUSC4VIp8qjRbq36lf9flQdO2lQIqLIYKEQ6bRr3grddymqX8Wu+SsNSkQUGSwUIp2Kdx9A5ZETuq5Rsu8Qyg8eNSgRUWSwUIgMsGHSdwEtLK2Nz+XGhknfGZyIqO6xUIgMsHvBjygs2AJ/kFvg+NweHFi1HvuXcaEpxT4WCpERNA0Ln3sHRzftDPhOxedyo3DdFix56f0whyOqGywUIoMoXh/mPvoaNv5vNjxVDngdrlq/zutwwVNZhZ++mIEfnnwDql/fDDGiaMHt64nCQJQltBiYh643XI2UxlmQLSb4PV5UHD6GTV/Pwf4V66HpnGpMFCnn276eW68QhYHqV7B30WrsXbQ60lGI6gwfeRERkSFYKEREZAgWChERGYKFQkREhmChEBGRIVgoRERkCBYKEREZgoVCRESGYKEQEZEhWChERGQIFgoRERmChUJERIZgoRARkSFYKEREZAgWChERGYKFQkREhmChEBGRIVgoRERkCBYKEREZgoVCRESGYKEQEZEhWChERGQIFgoRERlC0DTtQp8vAnCgjrIQEVFsaA4g8+wPXqxQiIiIAsJHXkREZAgWChERGYKFQkREhmChEBGRIVgoRERkiP8PFLYKtMbMN5UAAAAASUVORK5CYII=", 236 | "text/plain": [ 237 | "
" 238 | ] 239 | }, 240 | "metadata": {}, 241 | "output_type": "display_data" 242 | } 243 | ], 244 | "source": [ 245 | "from torch_geometric.utils import to_networkx\n", 246 | "from torch_geometric.datasets import KarateClub\n", 247 | "\n", 248 | "dataset = KarateClub()\n", 249 | "data = dataset[0]\n", 250 | "G = to_networkx(data, to_undirected=True)\n", 251 | "\n", 252 | "visualize(G, color=data.y)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 16, 258 | "metadata": {}, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "DataBatch(edge_index=[2, 4120], x=[1094, 21], y=[32], batch=[1094], ptr=[33])\n", 265 | "32\n", 266 | "DataBatch(edge_index=[2, 4182], x=[1038, 21], y=[32], batch=[1038], ptr=[33])\n", 267 | "32\n", 268 | "DataBatch(edge_index=[2, 3900], x=[992, 21], y=[32], batch=[992], ptr=[33])\n", 269 | "32\n", 270 | "DataBatch(edge_index=[2, 3552], x=[984, 21], y=[32], batch=[984], ptr=[33])\n", 271 | "32\n", 272 | "DataBatch(edge_index=[2, 3880], x=[980, 21], y=[32], batch=[980], ptr=[33])\n", 273 | "32\n", 274 | "DataBatch(edge_index=[2, 4082], x=[1068, 21], y=[32], batch=[1068], ptr=[33])\n", 275 | "32\n", 276 | "DataBatch(edge_index=[2, 3612], x=[909, 21], y=[32], batch=[909], ptr=[33])\n", 277 | "32\n", 278 | "DataBatch(edge_index=[2, 3446], x=[987, 21], y=[32], batch=[987], ptr=[33])\n", 279 | "32\n", 280 | "DataBatch(edge_index=[2, 3986], x=[1049, 21], y=[32], batch=[1049], ptr=[33])\n", 281 | "32\n", 282 | "DataBatch(edge_index=[2, 4100], x=[1094, 21], y=[32], batch=[1094], ptr=[33])\n", 283 | "32\n", 284 | "DataBatch(edge_index=[2, 4096], x=[1049, 21], y=[32], batch=[1049], ptr=[33])\n", 285 | "32\n", 286 | "DataBatch(edge_index=[2, 4072], x=[1065, 21], y=[32], batch=[1065], ptr=[33])\n", 287 | "32\n", 288 | "DataBatch(edge_index=[2, 3892], x=[1025, 21], y=[32], batch=[1025], ptr=[33])\n", 289 | "32\n", 290 | "DataBatch(edge_index=[2, 4026], x=[1030, 21], y=[32], batch=[1030], ptr=[33])\n", 291 | "32\n", 292 | "DataBatch(edge_index=[2, 4206], x=[1122, 21], y=[32], batch=[1122], ptr=[33])\n", 293 | "32\n", 294 | "DataBatch(edge_index=[2, 3824], x=[1055, 21], y=[32], batch=[1055], ptr=[33])\n", 295 | "32\n", 296 | "DataBatch(edge_index=[2, 3694], x=[943, 21], y=[32], batch=[943], ptr=[33])\n", 297 | "32\n", 298 | "DataBatch(edge_index=[2, 4672], x=[1286, 21], y=[32], batch=[1286], ptr=[33])\n", 299 | "32\n", 300 | "DataBatch(edge_index=[2, 3222], x=[810, 21], y=[24], batch=[810], ptr=[25])\n", 301 | "24\n" 302 | ] 303 | }, 304 | { 305 | "name": "stderr", 306 | "output_type": "stream", 307 | "text": [ 308 | "D:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch_geometric\\deprecation.py:12: UserWarning: 'data.DataLoader' is deprecated, use 'loader.DataLoader' instead\n", 309 | " warnings.warn(out)\n" 310 | ] 311 | } 312 | ], 313 | "source": [ 314 | "# Mini-batch\n", 315 | "from torch_geometric.datasets import TUDataset\n", 316 | "from torch_geometric.data import DataLoader\n", 317 | "\n", 318 | "dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES', use_node_attr=True)\n", 319 | "loader = DataLoader(dataset, batch_size=32, shuffle=True)\n", 320 | "\n", 321 | "for batch in loader:\n", 322 | " print(batch)\n", 323 | " print(batch.num_graphs)\n" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 17, 329 | "metadata": {}, 330 | "outputs": [ 331 | { 332 | "name": "stdout", 333 | "output_type": "stream", 334 | "text": [ 335 | "GCN(\n", 336 | " (conv1): GCNConv(21, 4)\n", 337 | " (conv2): GCNConv(4, 4)\n", 338 | " (conv3): GCNConv(4, 2)\n", 339 | " (classifier): Linear(in_features=2, out_features=6, bias=True)\n", 340 | ")\n" 341 | ] 342 | } 343 | ], 344 | "source": [ 345 | "# GNN\n", 346 | "import torch\n", 347 | "from torch.nn import Linear\n", 348 | "from torch_geometric.nn import GCNConv\n", 349 | "\n", 350 | "class GCN(torch.nn.Module):\n", 351 | " def __init__(self):\n", 352 | " super(GCN, self).__init__()\n", 353 | " torch.manual_seed(12345)\n", 354 | " self.conv1 = GCNConv(dataset.num_features, 4)\n", 355 | " self.conv2 = GCNConv(4, 4)\n", 356 | " self.conv3 = GCNConv(4, 2)\n", 357 | " self.classifier = Linear(2, dataset.num_classes)\n", 358 | "\n", 359 | " def forward(self, x, edge_index):\n", 360 | " h = self.conv1(x, edge_index)\n", 361 | " h = h.tanh()\n", 362 | " h = self.conv2(h, edge_index)\n", 363 | " h = h.tanh()\n", 364 | " h = self.conv3(h, edge_index)\n", 365 | " h = h.tanh() # Final GNN embedding space.\n", 366 | " \n", 367 | " # Apply a final (linear) classifier.\n", 368 | " out = self.classifier(h)\n", 369 | "\n", 370 | " return out, h\n", 371 | "\n", 372 | "model = GCN()\n", 373 | "print(model)\n" 374 | ] 375 | }, 376 | { 377 | "cell_type": "code", 378 | "execution_count": 19, 379 | "metadata": {}, 380 | "outputs": [ 381 | { 382 | "ename": "RuntimeError", 383 | "evalue": "mat1 and mat2 shapes cannot be multiplied (34x34 and 21x4)", 384 | "output_type": "error", 385 | "traceback": [ 386 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 387 | "\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", 388 | "\u001b[1;32mD:\\Temp;\\ipykernel_11144\\1363450205.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mepoch\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m401\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mloss\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[1;31m# Visualize the node embeddings every 10 epochs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[1;31m# if epoch % 10 == 0:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 389 | "\u001b[1;32mD:\\Temp;\\ipykernel_11144\\1363450205.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(data)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Clear gradients.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0medge_index\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Perform a single forward pass.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[0mloss\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_mask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_mask\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Compute the loss solely based on the training nodes.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# Derive gradients.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 390 | "\u001b[1;32mD:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m 1108\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m 1109\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1110\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1111\u001b[0m \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1112\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 391 | "\u001b[1;32mD:\\Temp;\\ipykernel_11144\\2568048666.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, x, edge_index)\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0medge_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconv1\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0medge_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 17\u001b[0m \u001b[0mh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mh\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtanh\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 18\u001b[0m \u001b[0mh\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconv2\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mh\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0medge_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 392 | "\u001b[1;32mD:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m 1108\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m 1109\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1110\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1111\u001b[0m \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1112\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 393 | "\u001b[1;32mD:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch_geometric\\nn\\conv\\gcn_conv.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, x, edge_index, edge_weight)\u001b[0m\n\u001b[0;32m 189\u001b[0m \u001b[0medge_index\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcache\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 190\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 191\u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlin\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 192\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 193\u001b[0m \u001b[1;31m# propagate_type: (x: Tensor, edge_weight: OptTensor)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 394 | "\u001b[1;32mD:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch\\nn\\modules\\module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[1;34m(self, *input, **kwargs)\u001b[0m\n\u001b[0;32m 1108\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[0;32m 1109\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[1;32m-> 1110\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0minput\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1111\u001b[0m \u001b[1;31m# Do not call functions when jit is used\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1112\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 395 | "\u001b[1;32mD:\\anaconda3\\envs\\graph\\lib\\site-packages\\torch_geometric\\nn\\dense\\linear.py\u001b[0m in \u001b[0;36mforward\u001b[1;34m(self, x)\u001b[0m\n\u001b[0;32m 116\u001b[0m \u001b[0mx\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mTensor\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mfeatures\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 117\u001b[0m \"\"\"\n\u001b[1;32m--> 118\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mF\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mlinear\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweight\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbias\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 119\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 396 | "\u001b[1;31mRuntimeError\u001b[0m: mat1 and mat2 shapes cannot be multiplied (34x34 and 21x4)" 397 | ] 398 | } 399 | ], 400 | "source": [ 401 | "import time\n", 402 | "\n", 403 | "model = GCN()\n", 404 | "criterion = torch.nn.CrossEntropyLoss() # Define loss criterion.\n", 405 | "optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # Define optimizer.\n", 406 | "\n", 407 | "def train(data):\n", 408 | " optimizer.zero_grad() # Clear gradients.\n", 409 | " out, h = model(data.x, data.edge_index) # Perform a single forward pass.\n", 410 | " loss = criterion(out[data.train_mask], data.y[data.train_mask]) # Compute the loss solely based on the training nodes.\n", 411 | " loss.backward() # Derive gradients.\n", 412 | " optimizer.step() # Update parameters based on gradients.\n", 413 | " return loss, h\n", 414 | "\n", 415 | "for epoch in range(401):\n", 416 | " loss, h = train(data)\n", 417 | " # Visualize the node embeddings every 10 epochs\n", 418 | " # if epoch % 10 == 0:\n", 419 | " # visualize(h, color=data.y, epoch=epoch, loss=loss)\n", 420 | " # time.sleep(0.3)\n" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": null, 426 | "metadata": {}, 427 | "outputs": [], 428 | "source": [] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [] 436 | } 437 | ], 438 | "metadata": { 439 | "interpreter": { 440 | "hash": "d118e8625ddf9565a7a601079b239702bff57fbe54bd264ef0a63b2e5e00a7ee" 441 | }, 442 | "kernelspec": { 443 | "display_name": "Python 3.7.11 ('graph')", 444 | "language": "python", 445 | "name": "python3" 446 | }, 447 | "language_info": { 448 | "codemirror_mode": { 449 | "name": "ipython", 450 | "version": 3 451 | }, 452 | "file_extension": ".py", 453 | "mimetype": "text/x-python", 454 | "name": "python", 455 | "nbconvert_exporter": "python", 456 | "pygments_lexer": "ipython3", 457 | "version": "3.7.11" 458 | }, 459 | "orig_nbformat": 4 460 | }, 461 | "nbformat": 4, 462 | "nbformat_minor": 2 463 | } 464 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CommunityDetection 2 | 3 | 一些经典的社区划分算法的python3实现, 包括KL算法、GN, FN, LPA, SLPA, COPAR、Louvain 算法、LFM算法、InfoMap算法等。 4 | 5 | [具体算法可以查看博客](https://blog.csdn.net/qq_16543881/category_11619276.html?spm=1001.2014.3001.5482) -------------------------------------------------------------------------------- /SCAN.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import random 3 | import math 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | class SCAN: 8 | 9 | def __init__(self, G, epsilon=0.5, mu=3): 10 | self._G = G 11 | self._epsilon = epsilon 12 | self._mu = mu 13 | 14 | # 节点的 ϵ-邻居定义为与其相似度不小于 ϵ 的节点所组成的集合 15 | def get_epsilon_neighbor(self, node): 16 | return [neighbor for neighbor in self._G.neighbors(node) if 17 | cal_similarity(self._G, node, neighbor) >= self._epsilon] 18 | 19 | # 判断是否是核节点 20 | def is_core(self, node): 21 | # 核节点是指ϵ-邻居的数目大于 μ 的节点。 22 | return len(self.get_epsilon_neighbor(node)) >= self._mu 23 | 24 | # 获得桥节点和离群点 25 | def get_hubs_outliers(self, communities): 26 | other_nodes = set(list(self._G.nodes())) 27 | node_community = {} 28 | for i, c in enumerate(communities): 29 | for node in c: 30 | # 已经有社区的节点删除 31 | other_nodes.discard(node) 32 | # 为节点打上社区标签 33 | node_community[node] = i 34 | hubs = [] 35 | outliers = [] 36 | # 遍历还未被划分到社区中的节点 37 | for node in other_nodes: 38 | neighbors = self._G.neighbors(node) 39 | # 统计该节点的邻居节点所在的社区 大于1为桥节点 否则为离群点 40 | neighbor_community = set() 41 | for neighbor in neighbors: 42 | if neighbor in node_community: 43 | neighbor_community.add(node_community[neighbor]) 44 | if len(neighbor_community) > 1: 45 | hubs.append(node) 46 | else: 47 | outliers.append(node) 48 | return hubs, outliers 49 | 50 | def execute(self): 51 | # 随机访问节点 52 | visit_sequence = list(self._G.nodes()) 53 | random.shuffle(visit_sequence) 54 | communities = [] 55 | for node_name in visit_sequence: 56 | node = self._G.nodes[node_name] 57 | # 如果节点已经分类好 则迭代下一个节点 58 | if node.get("classified"): 59 | continue 60 | # 如果是核节点 则是一个新社区 61 | if self.is_core(node_name): # a new community 62 | community = [node_name] 63 | communities.append(community) 64 | node["type"] = "core" 65 | node["classified"] = True 66 | # 获得该核心点的ϵ-邻居 67 | queue = self.get_epsilon_neighbor(node_name) 68 | # 首先将核心点v的所有其 ϵ-邻居放进队列中。对于队列中的每个顶点,它计算所有直接可达的顶点,并将那些仍未分类的顶点插入队列中。重复此操作,直到队列为空 69 | while len(queue) != 0: 70 | temp = queue.pop(0) 71 | # 若该ϵ-邻居没被分类 则将它标记为已分类 并添加到该社区 72 | if not self._G.nodes[temp].get("classified"): 73 | self._G.nodes[temp]["classified"] = True 74 | community.append(temp) 75 | # 如果该点不是核心节点 遍历下一个节点 否则继续(不是核心节点则说明可达的点到该点终止了) 76 | if not self.is_core(temp): 77 | continue 78 | # 如果是核心节点 获得他的ϵ-邻居 看他的ϵ-邻居是否有还未被划分的 添加到当前社区 79 | R = self.get_epsilon_neighbor(temp) 80 | for r in R: 81 | node_r = self._G.nodes[r] 82 | is_classified = node_r.get("classified") 83 | if is_classified: 84 | continue 85 | node_r["classified"] = True 86 | community.append(r) 87 | # r是核心节点还能可达其它节点 还没观察他的ϵ-邻居 放入queue中 88 | queue.append(r) 89 | return communities 90 | 91 | 92 | def cal_similarity(G, node_i, node_j): 93 | # 按照公式计算相似度 94 | # 节点相似度定义为两个节点共同邻居的数目与两个节点邻居数目的几何平均数的比值(这里的邻居均包含节点自身) 95 | s1 = set(G.neighbors(node_i)) 96 | s1.add(node_i) 97 | s2 = set(G.neighbors(node_j)) 98 | s2.add(node_j) 99 | return len(s1 & s2) / math.sqrt(len(s1) * len(s2)) 100 | 101 | 102 | def draw_spring(G, pos, com): 103 | """ 104 | G:图 105 | com:划分好的社区 106 | node_size表示节点大小 107 | node_color表示节点颜色 108 | node_shape表示节点形状 109 | with_labels=True表示节点是否带标签 110 | """ 111 | pos = pos # 节点的布局为spring型 112 | NodeId = list(G.nodes()) 113 | node_size = [G.degree(i) ** 1.2 * 90 for i in NodeId] # 节点大小 114 | 115 | plt.figure(figsize=(8, 6)) # 图片大小 116 | nx.draw(G, pos, with_labels=True, node_size=node_size, node_color='w', node_shape='.') 117 | 118 | color_list = ['pink', 'orange', 'r', 'g', 'b', 'y', 'm', 'gray', 'black', 'c', 'brown'] 119 | # node_shape = ['s','o','H','D'] 120 | 121 | for i in range(len(com)): 122 | nx.draw_networkx_nodes(G, pos, nodelist=com[i], node_color=color_list[i]) 123 | plt.show() 124 | 125 | 126 | # 加载图数据集 127 | def load_graph(path): 128 | G = nx.Graph() 129 | with open(path, 'r') as text: 130 | for line in text: 131 | vertices = line.strip().split(' ') 132 | source = int(vertices[0]) 133 | target = int(vertices[1]) 134 | G.add_edge(source, target) 135 | return G 136 | 137 | 138 | if __name__ == '__main__': 139 | # G = nx.karate_club_graph() 140 | G = load_graph('data/dolphin.txt') 141 | pos = nx.spring_layout(G) 142 | nx.draw(G, pos, with_labels=True, font_weight='bold') 143 | plt.show() 144 | # print(G.node.keys()) 145 | 146 | algorithm = SCAN(G, 0.5, 3) 147 | communities = algorithm.execute() 148 | for community in communities: 149 | print('community: ', sorted(community)) 150 | hubs_outliers = algorithm.get_hubs_outliers(communities) 151 | print('hubs: ', hubs_outliers[0]) 152 | print('outliers: ', hubs_outliers[1]) 153 | 154 | draw_spring(G,pos, communities) 155 | -------------------------------------------------------------------------------- /SLPA.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import time 3 | import numpy as np 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | 7 | 8 | class SLPA: 9 | def __init__(self, G, T, r): 10 | """ 11 | :param G:图本省 12 | :param T: 迭代次数T 13 | :param r:满足社区次数要求的阈值r 14 | """ 15 | self._G = G 16 | self._n = len(G.nodes(False)) # 节点数目 17 | self._T = T 18 | self._r = r 19 | 20 | def execute(self): 21 | # 将图中数据录入到数据字典中以便使用 22 | weight = {j: {} for j in self._G.nodes()} 23 | for q in weight.keys(): 24 | for m in self._G[q].keys(): 25 | # weight[q][m] = self._G[q][m]['weight'] 26 | weight[q][m] = 1 27 | # 建立成员标签记录 初始本身标签为1 28 | memory = {i: {i: 1} for i in self._G.nodes()} 29 | # 开始遍历T次所有节点 30 | for t in range(self._T): 31 | listenerslist = list(self._G.nodes()) 32 | # 随机排列遍历顺序 33 | np.random.shuffle(listenerslist) 34 | # 开始遍历节点 35 | for listener in listenerslist: 36 | # 每个节点的key就是与他相连的节点标签名 37 | # speakerlist = self._G[listener].keys() 38 | labels = collections.defaultdict(int) 39 | # 遍历所有与其相关联的节点 40 | for speaker in self._G.neighbors(listener): 41 | total = float(sum(memory[speaker].values())) 42 | # 查看speaker中memory中出现概率最大的标签并记录,key是标签名,value是Listener与speaker之间的权 43 | # multinomial从多项式分布中提取样本。 44 | # 多项式分布是二项式分布的多元推广。做一个有P个可能结果的实验。这种实验的一个例子是掷骰子,结果可以是1到6。 45 | # 从分布图中提取的每个样本代表n个这样的实验。其值x_i = [x_0,x_1,…,x_p] 表示结果为i的次数。 46 | # 函数语法 47 | # numpy.random.multinomial(n, pvals, size=None) 48 | # 49 | # 参数 50 | # n : int:实验次数 51 | # pvals:浮点数序列,长度p。P个不同结果的概率。这些值应该和为1(但是,只要求和(pvals[:-1])<=1,最后一个元素总是被假定为考虑剩余的概率)。 52 | # size : int 或 int的元组,可选。 输出形状。如果给定形状为(m,n,k),则绘制 m*n*k 样本。默认值为无,在这种情况下返回单个值。 53 | labels[list(memory[speaker].keys())[ 54 | np.random.multinomial(1, [freq / total for freq in memory[speaker].values()]).argmax()]] += \ 55 | weight[listener][speaker] 56 | # 查看labels中值最大的标签,让其成为当前listener的一个记录 57 | maxlabel = max(labels, key=labels.get) 58 | if maxlabel in memory[listener]: 59 | memory[listener][maxlabel] += 1 60 | else: 61 | memory[listener][maxlabel] = 1.5 62 | # 提取出每个节点memory中记录标签出现最多的一个 63 | # for primary in memory: 64 | # p = list(memory[primary].keys())[ 65 | # np.random.multinomial(1, [freq / total for freq in memory[primary].values()]).argmax()] 66 | # memory[primary] = {p: memory[primary][p]} 67 | 68 | for m in memory.values(): 69 | sum_label = sum(m.values()) 70 | threshold_num = sum_label * self._r 71 | for k, v in list(m.items()): 72 | if v < threshold_num: 73 | del m[k] 74 | 75 | communities = collections.defaultdict(lambda: list()) 76 | # 扫描memory中的记录标签,相同标签的节点加入同一个社区中 77 | for primary, change in memory.items(): 78 | for label in change.keys(): 79 | communities[label].append(primary) 80 | # 返回值是个数据字典,value以集合的形式存在 81 | return communities.values() 82 | 83 | 84 | def cal_Q(partition, G): # 计算Q 85 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 86 | # print(G.edges(None,False)) 87 | # print("=======6666666") 88 | a = [] 89 | e = [] 90 | for community in partition: # 把每一个联通子图拿出来 91 | t = 0.0 92 | for node in community: # 找出联通子图的每一个顶点 93 | t += len([x for x in G.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 94 | a.append(t / (2 * m)) 95 | # self.zidian[t/(2*m)]=community 96 | for community in partition: 97 | t = 0.0 98 | for i in range(len(community)): 99 | for j in range(len(community)): 100 | if (G.has_edge(community[i], community[j])): 101 | t += 1.0 102 | e.append(t / (2 * m)) 103 | 104 | q = 0.0 105 | for ei, ai in zip(e, a): 106 | q += (ei - ai ** 2) 107 | return q 108 | 109 | 110 | # 可视化划分结果 111 | def showCommunity(G, partition, pos): 112 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 113 | cluster = {} 114 | labels = {} 115 | for index, item in enumerate(partition): 116 | for nodeID in item: 117 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 118 | cluster[nodeID] = index # 节点分区号 119 | 120 | # 可视化节点 121 | colors = ['r', 'g', 'b', 'y', 'm'] 122 | shapes = ['v', 'D', 'o', '^', '<'] 123 | for index, item in enumerate(partition): 124 | nx.draw_networkx_nodes(G, pos, nodelist=item, 125 | node_color=colors[index], 126 | node_shape=shapes[index], 127 | node_size=350, 128 | alpha=1) 129 | 130 | # 可视化边 131 | edges = {len(partition): []} 132 | for link in G.edges(): 133 | # cluster间的link 134 | if cluster[link[0]] != cluster[link[1]]: 135 | edges[len(partition)].append(link) 136 | else: 137 | # cluster内的link 138 | if cluster[link[0]] not in edges: 139 | edges[cluster[link[0]]] = [link] 140 | else: 141 | edges[cluster[link[0]]].append(link) 142 | 143 | for index, edgelist in enumerate(edges.values()): 144 | # cluster内 145 | if index < len(partition): 146 | nx.draw_networkx_edges(G, pos, 147 | edgelist=edgelist, 148 | width=1, alpha=0.8, edge_color=colors[index]) 149 | else: 150 | # cluster间 151 | nx.draw_networkx_edges(G, pos, 152 | edgelist=edgelist, 153 | width=3, alpha=0.8, edge_color=colors[index]) 154 | 155 | # 可视化label 156 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 157 | 158 | plt.axis('off') 159 | plt.show() 160 | 161 | 162 | def cal_EQ(cover, G): 163 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 164 | # 存储每个节点所在的社区 165 | vertex_community = collections.defaultdict(lambda: set()) 166 | # i为社区编号(第几个社区) c为该社区中拥有的节点 167 | for i, c in enumerate(cover): 168 | # v为社区中的某一个节点 169 | for v in c: 170 | # 根据节点v统计他所在的社区i有哪些 171 | vertex_community[v].add(i) 172 | total = 0.0 173 | for c in cover: 174 | for i in c: 175 | # o_i表示i节点所同时属于的社区数目 176 | o_i = len(vertex_community[i]) 177 | # k_i表示i节点的度数(所关联的边数) 178 | k_i = len(G[i]) 179 | for j in c: 180 | t = 0.0 181 | # o_j表示j节点所同时属于的社区数目 182 | o_j = len(vertex_community[j]) 183 | # k_j表示j节点的度数(所关联的边数) 184 | k_j = len(G[j]) 185 | if G.has_edge(i, j): 186 | t += 1.0 / (o_i * o_j) 187 | t -= k_i * k_j / (2 * m * o_i * o_j) 188 | total += t 189 | return round(total / (2 * m), 4) 190 | 191 | 192 | def load_graph(path): 193 | G = nx.Graph() 194 | with open(path, 'r') as text: 195 | for line in text: 196 | vertices = line.strip().split(' ') 197 | source = int(vertices[0]) 198 | target = int(vertices[1]) 199 | G.add_edge(source, target) 200 | return G 201 | 202 | 203 | if __name__ == '__main__': 204 | # G = nx.karate_club_graph() 205 | # pos = nx.spring_layout(G) 206 | G = load_graph('data/dolphin.txt') 207 | start_time = time.time() 208 | algorithm = SLPA(G, 20, 0.5) 209 | communities = algorithm.execute() 210 | end_time = time.time() 211 | for community in communities: 212 | print(community) 213 | 214 | print(cal_EQ(communities, G)) 215 | # 可视化结果 216 | # showCommunity(G, communities, pos) 217 | print(f'算法执行时间{end_time - start_time}') -------------------------------------------------------------------------------- /SLPA_V2.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import time 3 | import random 4 | import numpy as np 5 | import networkx as nx 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | class SLPA: 10 | def __init__(self, G, T, r): 11 | """ 12 | :param G:图本身 13 | :param T: 迭代次数T 14 | :param r:满足社区次数要求的阈值r 15 | """ 16 | self._G = G 17 | self._n = len(G.nodes(False)) # 节点数目 18 | self._T = T 19 | self._r = r 20 | 21 | def execute(self): 22 | # 节点存储器初始化 23 | node_memory = [] 24 | for i in range(self._n): 25 | node_memory.append({i: 1}) 26 | 27 | # 算法迭代过程 28 | for t in range(self._T): 29 | # 任意选择一个监听器 30 | # np.random.permutation():随机排列序列 31 | order = [x for x in np.random.permutation(self._n)] 32 | for i in order: 33 | label_list = {} 34 | # 从speaker中选择一个标签传播到listener 35 | for j in self._G.neighbors(i): 36 | sum_label = sum(node_memory[j].values()) 37 | label = list(node_memory[j].keys())[np.random.multinomial( 38 | 1, [float(c) / sum_label for c in node_memory[j].values()]).argmax()] 39 | label_list[label] = label_list.setdefault(label, 0) + 1 40 | # listener选择一个最流行的标签添加到内存中 41 | max_v = max(label_list.values()) 42 | # selected_label = max(label_list, key=label_list.get) 43 | selected_label = random.choice([item[0] for item in label_list.items() if item[1] == max_v]) 44 | # setdefault如果键不存在于字典中,将会添加键并将值设为默认值。 45 | node_memory[i][selected_label] = node_memory[i].setdefault(selected_label, 0) + 1 46 | 47 | # 根据阈值threshold删除不符合条件的标签 48 | for memory in node_memory: 49 | sum_label = sum(memory.values()) 50 | threshold_num = sum_label * self._r 51 | for k, v in list(memory.items()): 52 | if v < threshold_num: 53 | del memory[k] 54 | 55 | communities = collections.defaultdict(lambda: list()) 56 | # 扫描memory中的记录标签,相同标签的节点加入同一个社区中 57 | for primary, change in enumerate(node_memory): 58 | for label in change.keys(): 59 | communities[label].append(primary) 60 | # 返回值是个数据字典,value以集合的形式存在 61 | return communities.values() 62 | 63 | 64 | def cal_Q(partition, G): # 计算Q 65 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 66 | # print(G.edges(None,False)) 67 | # print("=======6666666") 68 | a = [] 69 | e = [] 70 | for community in partition: # 把每一个联通子图拿出来 71 | t = 0.0 72 | for node in community: # 找出联通子图的每一个顶点 73 | t += len([x for x in G.neighbors(node)]) # G.neighbors(node)找node节点的邻接节点 74 | a.append(t / (2 * m)) 75 | # self.zidian[t/(2*m)]=community 76 | for community in partition: 77 | t = 0.0 78 | for i in range(len(community)): 79 | for j in range(len(community)): 80 | if (G.has_edge(community[i], community[j])): 81 | t += 1.0 82 | e.append(t / (2 * m)) 83 | 84 | q = 0.0 85 | for ei, ai in zip(e, a): 86 | q += (ei - ai ** 2) 87 | return q 88 | 89 | 90 | # 可视化划分结果 91 | def showCommunity(G, partition, pos): 92 | # 划分在同一个社区的用一个符号表示,不同社区之间的边用黑色粗体 93 | cluster = {} 94 | labels = {} 95 | for index, item in enumerate(partition): 96 | for nodeID in item: 97 | labels[nodeID] = r'$' + str(nodeID) + '$' # 设置可视化label 98 | cluster[nodeID] = index # 节点分区号 99 | 100 | # 可视化节点 101 | colors = ['r', 'g', 'b', 'y', 'm'] 102 | shapes = ['v', 'D', 'o', '^', '<'] 103 | for index, item in enumerate(partition): 104 | nx.draw_networkx_nodes(G, pos, nodelist=item, 105 | node_color=colors[index], 106 | node_shape=shapes[index], 107 | node_size=350, 108 | alpha=1) 109 | 110 | # 可视化边 111 | edges = {len(partition): []} 112 | for link in G.edges(): 113 | # cluster间的link 114 | if cluster[link[0]] != cluster[link[1]]: 115 | edges[len(partition)].append(link) 116 | else: 117 | # cluster内的link 118 | if cluster[link[0]] not in edges: 119 | edges[cluster[link[0]]] = [link] 120 | else: 121 | edges[cluster[link[0]]].append(link) 122 | 123 | for index, edgelist in enumerate(edges.values()): 124 | # cluster内 125 | if index < len(partition): 126 | nx.draw_networkx_edges(G, pos, 127 | edgelist=edgelist, 128 | width=1, alpha=0.8, edge_color=colors[index]) 129 | else: 130 | # cluster间 131 | nx.draw_networkx_edges(G, pos, 132 | edgelist=edgelist, 133 | width=3, alpha=0.8, edge_color=colors[index]) 134 | 135 | # 可视化label 136 | nx.draw_networkx_labels(G, pos, labels, font_size=12) 137 | 138 | plt.axis('off') 139 | plt.show() 140 | 141 | 142 | def cal_EQ(cover, G): 143 | m = len(G.edges(None, False)) # 如果为真,则返回3元组(u、v、ddict)中的边缘属性dict。如果为false,则返回2元组(u,v) 144 | # 存储每个节点所在的社区 145 | vertex_community = collections.defaultdict(lambda: set()) 146 | # i为社区编号(第几个社区) c为该社区中拥有的节点 147 | for i, c in enumerate(cover): 148 | # v为社区中的某一个节点 149 | for v in c: 150 | # 根据节点v统计他所在的社区i有哪些 151 | vertex_community[v].add(i) 152 | total = 0.0 153 | for c in cover: 154 | for i in c: 155 | # o_i表示i节点所同时属于的社区数目 156 | o_i = len(vertex_community[i]) 157 | # k_i表示i节点的度数(所关联的边数) 158 | k_i = len(G[i]) 159 | for j in c: 160 | t = 0.0 161 | # o_j表示j节点所同时属于的社区数目 162 | o_j = len(vertex_community[j]) 163 | # k_j表示j节点的度数(所关联的边数) 164 | k_j = len(G[j]) 165 | if G.has_edge(i, j): 166 | t += 1.0 / (o_i * o_j) 167 | t -= k_i * k_j / (2 * m * o_i * o_j) 168 | total += t 169 | return round(total / (2 * m), 4) 170 | 171 | 172 | def load_graph(path): 173 | G = nx.Graph() 174 | with open(path, 'r') as text: 175 | for line in text: 176 | vertices = line.strip().split(' ') 177 | source = int(vertices[0]) 178 | target = int(vertices[1]) 179 | G.add_edge(source, target) 180 | return G 181 | 182 | 183 | if __name__ == '__main__': 184 | # G = nx.karate_club_graph() 185 | # pos = nx.spring_layout(G) 186 | G = load_graph('data/dolphin.txt') 187 | start_time = time.time() 188 | algorithm = SLPA(G, 20, 0.5) 189 | communities = algorithm.execute() 190 | end_time = time.time() 191 | for i, community in enumerate(communities): 192 | print(i, community) 193 | 194 | print(cal_EQ(communities, G)) 195 | print(f'算法执行时间{end_time - start_time}') -------------------------------------------------------------------------------- /SpectralClustering.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | from sklearn.cluster import KMeans 4 | import scipy.linalg as linalg 5 | from matplotlib import pyplot as plt 6 | 7 | 8 | def partition(G, k): 9 | # 获得邻接矩阵 10 | A = nx.to_numpy_array(G) 11 | # 获得度矩阵 12 | D = degree_matrix(G) 13 | # 获得拉普拉斯算子 14 | L = D - A 15 | 16 | # 获得归一化拉普拉斯算子Lsm 17 | Dn = np.power(np.linalg.matrix_power(D, -1), 0.5) 18 | L = np.dot(np.dot(Dn, L), Dn) 19 | # L = np.dot(Dn, L) 20 | 21 | # 获得特征值,特征向量 22 | eigvals, eigvecs = linalg.eig(L) 23 | n = len(eigvals) 24 | 25 | 26 | dict_eigvals = dict(zip(eigvals, range(0, n))) 27 | 28 | # 获得前k个特征值 29 | k_eigvals = np.sort(eigvals)[0:k] 30 | eigval_indexs = [dict_eigvals[k] for k in k_eigvals] 31 | k_eigvecs = eigvecs[:, eigval_indexs] 32 | 33 | # 归一化 34 | # sum_co = k_eigvecs.sum(axis=0) 35 | # norm_ans = k_eigvecs/sum_co 36 | 37 | # 使用k-means聚类 38 | result = KMeans(n_clusters=k).fit_predict(k_eigvecs) 39 | # result = KMeans(n_clusters=k).fit_predict(norm_ans) 40 | return result 41 | 42 | 43 | 44 | def degree_matrix(G): 45 | n = G.number_of_nodes() 46 | V = [node for node in G.nodes()] 47 | D = np.zeros((n, n)) 48 | for i in range(n): 49 | node = V[i] 50 | d_node = G.degree(node) 51 | D[i][i] = d_node 52 | return np.array(D) 53 | 54 | 55 | if __name__ == '__main__': 56 | 57 | G = nx.read_edgelist("data/football.txt") 58 | k = 12 59 | sc_com = partition(G, k) 60 | print(sc_com) 61 | 62 | # 可视化 63 | pos = nx.spring_layout(G) 64 | nx.draw(G, pos, with_labels=False, node_size=70, width=0.5, node_color=sc_com) 65 | plt.show() 66 | 67 | V = [node for node in G.nodes()] 68 | com_dict = {node: com for node, com in zip(V, sc_com)} 69 | com = [[V[i] for i in range(G.number_of_nodes()) if sc_com[i] == j] for j in range(k)] 70 | 71 | # 构造可视化所需要的图 72 | G_graph = nx.Graph() 73 | for each in com: 74 | G_graph.update(nx.subgraph(G, each)) # 75 | color = [com_dict[node] for node in G_graph.nodes()] 76 | 77 | # 可视化 78 | pos = nx.spring_layout(G_graph, seed=4, k=0.33) 79 | nx.draw(G, pos, with_labels=False, node_size=1, width=0.1, alpha=0.2) 80 | nx.draw(G_graph, pos, with_labels=True, node_color=color, node_size=70, width=0.5, font_size=5, 81 | font_color='#000000') 82 | plt.show() -------------------------------------------------------------------------------- /Walktrap.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | from heapq import heappush, heappop 4 | from matplotlib import pyplot as plt 5 | import copy 6 | import time 7 | 8 | 9 | def walktrap(G, t, verbose=False): 10 | class Community: 11 | def __init__(self, new_C_id, C1=None, C2=None): 12 | self.id = new_C_id 13 | # 一个节点作为一个社区 14 | if C1 is None: 15 | self.size = 1 16 | self.P_c = P_t[self.id] # probab vector 17 | self.adj_coms = {} 18 | self.vertices = set([self.id]) 19 | self.internal_weight = 0. 20 | self.total_weight = self.internal_weight + (len([id for id, x in enumerate(A[self.id]) if 21 | x == 1. and id != self.id]) / 2.) # External edges have 0.5 weight, ignore edge to itself 22 | # 合并形成新社区 23 | else: 24 | self.size = C1.size + C2.size 25 | self.P_c = (C1.size * C1.P_c + C2.size * C2.P_c) / self.size 26 | # Merge info about adjacent communities, but remove C1, C2 27 | self.adj_coms = dict(C1.adj_coms.items() | C2.adj_coms.items()) 28 | del self.adj_coms[C1.id] 29 | del self.adj_coms[C2.id] 30 | self.vertices = C1.vertices.union(C2.vertices) 31 | weight_between_C1C2 = 0. 32 | for v1 in C1.vertices: 33 | for id, x in enumerate(A[v1]): 34 | if x == 1. and id in C2.vertices: 35 | weight_between_C1C2 += 1. 36 | self.internal_weight = C1.internal_weight + C2.internal_weight + weight_between_C1C2 37 | self.total_weight = C1.total_weight + C2.total_weight 38 | 39 | def modularity(self): 40 | # 模块度计算 41 | return (self.internal_weight - (self.total_weight * self.total_weight / G_total_weight)) / G_total_weight 42 | 43 | # 获得节点数目 44 | N = G.number_of_nodes() 45 | # 获得邻接矩阵 46 | A = np.array(nx.to_numpy_matrix(G)) 47 | 48 | # 转移矩阵P 以及 对角度矩阵D 49 | Dx = np.zeros((N, N)) 50 | P = np.zeros((N, N)) 51 | # 遍历邻接矩阵的每一行 对应每个节点与其他节点的邻接关系 52 | for i, A_row in enumerate(A): 53 | # 邻接矩阵一行的和为对应节点的度 54 | d_i = np.sum(A_row) 55 | # 转移概率 等于每个邻居节点的权重除以该节点的度 56 | P[i] = A_row / d_i 57 | # 后面计算都是D-0.5 提前处理好方便后面计算 58 | Dx[i, i] = d_i ** (-0.5) 59 | 60 | # 采用t次随机游走 61 | P_t = np.linalg.matrix_power(P, t) 62 | 63 | # 边的总权重 64 | G_total_weight = G.number_of_edges() 65 | 66 | # 当前的社区 67 | community_count = N 68 | communities = {} 69 | # 刚开始将每个节点作为一个社区 70 | for C_id in range(N): 71 | communities[C_id] = Community(C_id) 72 | 73 | # 储存Δσ: 74 | min_sigma_heap = [] 75 | # 遍历所有相邻节点 计算Δσ 76 | for e in G.edges: 77 | C1_id = e[0] 78 | C2_id = e[1] 79 | if C1_id != C2_id: 80 | # 利用公式计算 这里|c1||c2|\(|c1|+|c2|)个数都是1直接等于0.5 81 | ds = (0.5 / N) * np.sum(np.square(np.matmul(Dx, P_t[C1_id]) - np.matmul(Dx, P_t[C2_id]))) 82 | # 利用堆排序存储 83 | heappush(min_sigma_heap, (ds, C1_id, C2_id)) 84 | # 更新每个社区以及它的邻居社区 85 | communities[C1_id].adj_coms[C2_id] = ds 86 | communities[C2_id].adj_coms[C1_id] = ds 87 | 88 | delta_sigmas = [] 89 | 90 | partitions = [set(np.arange(N))] 91 | # 第一次划分时每个节点为自身的社区 92 | # 计算初始模块度Q 93 | modularities = [np.sum([communities[C_id].modularity() for C_id in partitions[0]])] 94 | if verbose: 95 | print("Partition 0: ", partitions[0]) 96 | print("Q(0) = ", modularities[0]) 97 | 98 | # 开始迭代 99 | for k in range(1, N): 100 | # 根据最小的Δσ合并C1,C2 101 | # 需要确保最小的Δσ的C1,C2仍然在当前的划分社区列表中 102 | while min_sigma_heap: 103 | # 取出当前最小的Δσ 104 | delta_sigma_C1C2, C1_id, C2_id = heappop(min_sigma_heap) 105 | if C1_id in partitions[k - 1] and C2_id in partitions[k - 1]: 106 | break 107 | # Record delta sigma at this step 108 | delta_sigmas.append(delta_sigma_C1C2) 109 | 110 | # 合并C1,C2为C3, 分配id 111 | C3_id = community_count 112 | community_count += 1 # increase for the next one 113 | communities[C3_id] = Community(C3_id, communities[C1_id], communities[C2_id]) 114 | 115 | # 添加新的划分(k-th) 116 | partitions.append(copy.deepcopy(partitions[k - 1])) 117 | partitions[k].add(C3_id) # add C3_ID 118 | partitions[k].remove(C1_id) 119 | partitions[k].remove(C2_id) 120 | 121 | # 更新C3和以前C1或C2的邻居社区之间的delta_sigma_heap 122 | # 遍历C3的邻居社区 123 | for C_id in communities[C3_id].adj_coms.keys(): 124 | # 如果C是C1,C2共同的邻居则用公式4 125 | if (C_id in communities[C1_id].adj_coms) and (C_id in communities[C2_id].adj_coms): 126 | delta_sigma_C1C = communities[C1_id].adj_coms[C_id] 127 | delta_sigma_C2C = communities[C2_id].adj_coms[C_id] 128 | # 使用公式4 to (C, C3) 129 | ds = ((communities[C1_id].size + communities[C_id].size) * delta_sigma_C1C + ( 130 | communities[C2_id].size + communities[C_id].size) * delta_sigma_C2C - communities[ 131 | C_id].size * delta_sigma_C1C2) / (communities[C3_id].size + communities[C_id].size) 132 | 133 | # 否则使用公式3 to (C, C3) 134 | else: 135 | ds = np.sum(np.square(np.matmul(Dx, communities[C_id].P_c) - np.matmul(Dx, communities[C3_id].P_c))) * \ 136 | communities[C_id].size * communities[C3_id].size / ( 137 | (communities[C_id].size + communities[C3_id].size) * N) 138 | 139 | # 更新min_sigma_heap以及更新C3,C之间的delta sigmas 140 | heappush(min_sigma_heap, (ds, C3_id, C_id)) 141 | communities[C3_id].adj_coms[C_id] = ds 142 | communities[C_id].adj_coms[C3_id] = ds 143 | 144 | # 计算并存储当前划分的模块度 145 | modularities.append(np.sum([communities[C_id].modularity() for C_id in partitions[k]])) 146 | 147 | if verbose: 148 | print("Partition ", k, ": ", partitions[k]) 149 | print("\tMerging ", C1_id, " + ", C2_id, " --> ", C3_id) 150 | print("\tQ(", k, ") = ", modularities[k]) 151 | print("\tdelta_sigma = ", delta_sigmas[k - 1]) 152 | 153 | return np.array(partitions), communities, np.array(delta_sigmas), np.array(modularities) 154 | 155 | 156 | # 计算 Rand index 157 | def calculate_rand_index(P1, P2): 158 | N = 0 159 | sum_intersect = 0. 160 | sum_C1 = 0. 161 | sum_C2 = np.sum([len(s) ** 2 for s in P2]) 162 | for s1 in P1: 163 | N += len(s1) 164 | sum_C1 += len(s1) ** 2 165 | for s2 in P2: 166 | sum_intersect += len(s1.intersection(s2)) ** 2 167 | return (N * N * sum_intersect - sum_C1 * sum_C2) / (0.5 * N * N * (sum_C1 + sum_C2) - sum_C1 * sum_C2) 168 | 169 | 170 | def partition_to_plot(coms, partition): 171 | p_dict = {} 172 | for i, C_id in enumerate(partition): 173 | for v in coms[C_id].vertices: 174 | p_dict[v] = i 175 | return p_dict 176 | 177 | 178 | def partition_dict_to_sets(d): 179 | inverse_dict = {} 180 | for k, v in d.items(): 181 | if v in inverse_dict: 182 | inverse_dict[v].add(k) 183 | else: 184 | inverse_dict[v] = set([k]) 185 | 186 | return inverse_dict.values() 187 | 188 | 189 | def partition_set_to_sets(comms, partition): 190 | list_of_sets = [] 191 | for C_id in partition: 192 | list_of_sets.append(copy.deepcopy(comms[C_id].vertices)) 193 | return list_of_sets 194 | 195 | 196 | if __name__ == '__main__': 197 | G = nx.read_edgelist("data/football.txt") 198 | k = 12 199 | G = nx.convert_node_labels_to_integers(G) 200 | pos = nx.spring_layout(G) 201 | 202 | t = 2 203 | parts, coms, _, Qs = walktrap(G, t) 204 | Qmax_index = np.argmax(Qs) 205 | my_best_part = partition_to_plot(coms, parts[Qmax_index]) 206 | sort_p = sorted(my_best_part.items(), key=lambda x: x[0]) 207 | # print([x[1] for x in sort_p]) 208 | nx.draw(G, pos, node_color=[x[1] for x in sort_p]) 209 | plt.show() 210 | print(my_best_part) 211 | 212 | print(partition_dict_to_sets(my_best_part)) 213 | print(calculate_rand_index(partition_dict_to_sets(my_best_part), partition_dict_to_sets(my_best_part))) -------------------------------------------------------------------------------- /copra.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import time 3 | import copy 4 | import networkx as nx 5 | import matplotlib.pyplot as plt 6 | from collections import Counter 7 | import random 8 | import numpy as np 9 | 10 | 11 | class COPRA: 12 | def __init__(self, G, T, v): 13 | """ 14 | :param G:图本身 15 | :param T: 迭代次数T 16 | :param r:满足社区次数要求的阈值r 17 | """ 18 | self._G = G 19 | self._n = len(G.nodes(False)) # 节点数目 20 | self._T = T 21 | self._v = v 22 | 23 | def Propagate(self, x, old, new, v, asynchronous): 24 | # 依据邻结点标签集更新该节点 25 | for eachpoint in self._G.neighbors(x): 26 | for eachlable in old[eachpoint]: 27 | b = old[eachpoint][eachlable] 28 | if eachlable in new[x]: 29 | new[x][eachlable] += b 30 | else: 31 | new[x].update({eachlable: b}) 32 | if asynchronous: 33 | old[x] = copy.deepcopy(new[x]) 34 | Normalize(new[x]) 35 | # 存储最大b值 36 | maxb = 0.0 37 | # 存储最大b值的节点序号 38 | maxc = 0 39 | # 记录需要删除的节点序号 40 | t = [] 41 | # 去除小于1/v的候选项,若均小于则''选b最大的赋值'' 若都小于取最大的一个 42 | for each in new[x]: 43 | if new[x][each] < 1 / float(v): 44 | t.append(each) 45 | if new[x][each] >= maxb: # 取最后一个 46 | maxb = new[x][each] 47 | maxc = each 48 | for i in range(len(t)): 49 | del new[x][t[i]] 50 | if len(new[x]) == 0: 51 | new[x][maxc] = 1 52 | else: 53 | self.Normalize(new[x]) 54 | 55 | def Normalize(self, x): 56 | sums = 0.0 57 | for each in x: 58 | sums += x[each] 59 | for each in x: 60 | if sums != 0: 61 | x[each] = x[each] / sums 62 | 63 | def id_l(self, l): 64 | ids = [] 65 | for each in l: 66 | ids.append(id_x(each)) 67 | return ids 68 | 69 | def id_x(self, x): 70 | ids = [] 71 | for each in x: 72 | ids.append(each) 73 | return ids 74 | 75 | def count(self, l): 76 | counts = {} 77 | for eachpoint in l: 78 | for eachlable in eachpoint: 79 | if eachlable in counts: 80 | n = counts[eachlable] 81 | counts.update({eachlable: n + 1}) 82 | else: 83 | counts.update({eachlable: 1}) 84 | return counts 85 | 86 | def mc(self, cs1, cs2): 87 | # print cs1,cs2 88 | cs = {} 89 | for each in cs1: 90 | if each in cs2: 91 | cs[each] = min(cs1[each], cs2[each]) 92 | return cs 93 | 94 | def execute(self): 95 | label_new = [{} for i in self._G.nodes()] 96 | label_old = [{i: 1} for i in self._G.nodes()] 97 | minl = {} 98 | oldmin = {} 99 | flag = True # asynchronous 100 | itera = 0 # 迭代次数 101 | start = time.perf_counter() # 计时 102 | 103 | visitlist = list(self._G.nodes()) 104 | # 随机排列遍历顺序 105 | np.random.shuffle(visitlist) 106 | # 同异步迭代过程 107 | while True: 108 | ''' 109 | if flag: 110 | flag = False 111 | else: 112 | flag = True 113 | ''' 114 | itera += 1 115 | for each in visitlist: 116 | self.Propagate(each, label_old, label_new, self._v, flag) 117 | if self.id_l(label_old) == self.id_l(label_new): 118 | inl = self.mc(minl, label_new) 119 | else: 120 | minl = label_new 121 | if minl != oldmin: 122 | label_old = label_new 123 | oldmin = minl 124 | else: 125 | break 126 | print(itera, label_old) 127 | coms = {} 128 | sub = {} 129 | for each in range(vertices): 130 | ids = id_x(label_old[each]) 131 | for eachc in ids: 132 | if eachc in coms and eachc in sub: 133 | coms[eachc].append(each) 134 | # elif : 135 | sub.update({eachc: set(sub[eachc]) & set(ids)}) 136 | else: 137 | coms.update({eachc: [each]}) 138 | sub.update({eachc: ids}) 139 | print('lastiter', coms) 140 | # 获取每个节点属于的标签数 141 | o = [0 for i in range(vertices)] 142 | for eachid in range(vertices): 143 | for eachl in coms: 144 | if eachid in coms[eachl]: 145 | o[eachid] += 1 146 | # 去重取标签 147 | for each in sub: 148 | if len(sub[each]): 149 | for eachc in sub[each]: 150 | if eachc != each: 151 | coms[eachc] = list(set(coms[eachc]) - set(coms[each])) 152 | # 标签整理 153 | clusterment = [0 for i in range(vertices)] 154 | a = 0 155 | for eachc in coms: 156 | if len(coms[eachc]) != 0: 157 | for e in coms[eachc]: 158 | clusterment[e] = a + 1 159 | a += 1 160 | degree_s = sorted(degree_s, key=lambda x: x[0], reverse=False) 161 | elapsed = (time.perf_counter() - start) 162 | print('t=', elapsed) 163 | print('result=', coms) 164 | print('clusterment=', clusterment) 165 | print('Q =', Modulartiy(A, coms, sums, vertices)) 166 | print('EQ =', ExtendQ(A, coms, sums, degree_s, o)) 167 | # print 'NMI=',NMI(coms,coms) 168 | return coms 169 | 170 | 171 | if __name__ == '__main__': 172 | # 节点个数,V 173 | vertices = [34, 115, 1589, 62] 174 | # txtlist = ['karate.txt','football.txt','science.txt','dolphins.txt'] 175 | txtlist = ['karate.txt'] 176 | # vertices = [64,128,256,512] 177 | # txtlist = ['RN1.txt','RN2.txt','RN3.txt','RN4.txt'] 178 | testv = [2, 3, 4, 5] 179 | for i in range(len(txtlist)): 180 | print("txt name: <<{}>> vertices num: {}".format(txtlist[i], vertices[i])) 181 | for ev in testv: 182 | print('v =', ev) 183 | A = LoadAdjacentMatrixData(txtlist[i], vertices[i]) 184 | degree_s, neighbours, sums = Degree_Sorting(A, vertices[i]) 185 | # print neighbours 186 | getcoms(degree_s, neighbours, sums, A, ev, vertices[i]) 187 | -------------------------------------------------------------------------------- /craw_dblp.py: -------------------------------------------------------------------------------- 1 | import urllib.request 2 | import re 3 | import os 4 | import xlwt 5 | 6 | 7 | def get_pdf_arxiv(web_site, path): 8 | rep = urllib.request.urlopen(urllib.request.Request(web_site)) 9 | page = rep.read().decode('utf-8') 10 | pdf_download = re.findall('', page, re.S) # 查询到网页中对应的pdf下载链接 11 | print(pdf_download[0]) 12 | if (len(pdf_download) != 0): 13 | try: 14 | u = urllib.request.urlopen(pdf_download[0]) 15 | except urllib.error.HTTPError: 16 | print(pdf_download[0], "url file not found") 17 | return 18 | block_sz = 8192 19 | with open(path, 'wb') as f: 20 | while True: 21 | buffer = u.read(block_sz) 22 | if buffer: 23 | f.write(buffer) 24 | else: 25 | break 26 | print("Sucessful to download " + path) 27 | 28 | 29 | # 创建一个Excel文件 30 | file = xlwt.Workbook() 31 | # 创建sheet工作表 32 | sheet1 = file.add_sheet(u'表1', cell_overwrite_ok=True) 33 | 34 | for j in range(10): 35 | req = urllib.request.Request( 36 | 'https://dblp.uni-trier.de/search//publ/inc?q=Community%20Detection&s=ydvspc&h=30&b=' + str( 37 | j)) # 此处只需要修改q=后面的内容,并且保留&s=ydvspc之后的内容 38 | response = urllib.request.urlopen(req) 39 | the_page = response.read().decode('utf-8') 40 | 41 | paper_title = re.findall('(.*?)', the_page, re.S) # 检索页面中给的论文名字 42 | paper_pub = re.findall('(.*?)', the_page, re.S) # 检索出版商 43 | paper_year = re.findall('', the_page, re.S) # 检索出版年份 44 | # paper_web = re.findall('view