├── comm ├── __init__.py ├── community_status.py └── community_main.py ├── IM_spread.py ├── README.md ├── main_vary_eps.py ├── main.py ├── main_vary_N.py ├── main_vary_t.py └── utils.py /comm/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | This package implements community detection. 5 | """ 6 | 7 | from .community_main import ( 8 | partition_at_level, 9 | modularity, 10 | best_partition, 11 | generate_dendrogram, 12 | induced_graph, 13 | load_binary, 14 | ) 15 | 16 | -------------------------------------------------------------------------------- /IM_spread.py: -------------------------------------------------------------------------------- 1 | from utils import * 2 | 3 | 4 | 5 | def IM_spread(dataset_name,file_name,seed_size): 6 | 7 | 8 | data_path = './data/%s.txt' %(dataset_name) 9 | 10 | # obtain the set of seed nodes of PrivGraph 11 | S = find_seed(file_name,seed_size=seed_size) 12 | 13 | # calculate the influence spread 14 | influence_spread = cal_spread(data_path,S_all=S,seed_size=seed_size) 15 | 16 | return influence_spread 17 | 18 | 19 | if __name__ == '__main__': 20 | epsilon = 1.5 21 | 22 | seed_size = 20 23 | 24 | # set the dataset 25 | # dataset_name = 'Enron' 26 | # dataset_name = 'CA-HepPh' 27 | # dataset_name = 'Facebook' 28 | dataset_name = 'Chamelon' 29 | 30 | root_path = './result/' 31 | 32 | # import the txt file 33 | file_name = root_path + 'PrivGraph_%s_%.1f.txt' %(dataset_name,epsilon) 34 | 35 | print('dataset:%s,epsilon:%.1f,seed_size:%d'%(dataset_name,epsilon,seed_size)) 36 | 37 | influence_spread = IM_spread(dataset_name,file_name,seed_size) 38 | 39 | print('Influence Spread:',influence_spread) 40 | 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PrivGraph 2 | Implementation of PrivGraph 3 | ## Requirements 4 | 5 | 6 | ``` 7 | numpy >= 1.20.1 8 | pandas >= 1.2.4 9 | networkx >= 2.5 10 | scikit-learn >= 0.24.1 11 | python-louvain >= 0.15 12 | python >= 3.8 13 | ``` 14 | 15 | ## Contents 16 | 17 | The project contains 3 folders and 6 files. 18 | 19 | 1. data (folder): All datasets are in this folder. 20 | 2. comm (folder): This folder is used for community discovery. 21 | 3. result (folder): This folder is used to store the results and contains four examples of synthetic graphs. 22 | 4. main.py (file): The file is used to obtain the results of PrivGraph for End-to-End experiments. 23 | 5. main_vary_N.py (file): The file is used to obtain the results for different number of nodes. 24 | 6. main_vary_eps.py (file): The file is used to obtain the results for different privacy budget allocations. 25 | 7. main_vary_t.py (file): The file is used to obtain the results for different resolution parameters. 26 | 8. IM_spread.py (file): The file is used to obtain the results of influence maximization. 27 | 9. utils.py (file): The file includes some functions that are needed for other files. 28 | 29 | ## Run 30 | 31 | 32 | ``` 33 | ###### Example 1: End to End ###### 34 | python main.py 35 | 36 | ###### Example 2: Impact of the number of nodes ###### 37 | python main_vary_N.py 38 | 39 | ###### Example 3: Impact of the privacy budget allocation ###### 40 | python main_vary_eps.py 41 | 42 | ###### Example 4: Impact of the resolution parameter ###### 43 | python main_vary_t.py 44 | 45 | ###### Example 5: Influence Maximization ###### 46 | python IM_spread.py 47 | ``` 48 | 49 | ## Citation 50 | 51 | ``` 52 | @inproceedings{YZDCCS23, 53 | author = {Quan Yuan and Zhikun Zhang and Linkang Du and Min Chen and Peng Cheng and Mingyang Sun}, 54 | title = {{PrivGraph: Differentially Private Graph Data Publication by Exploiting Community Information}}, 55 | booktitle = {{USENIX Security}}, 56 | publisher = {}, 57 | year = {2023}, 58 | } 59 | ``` -------------------------------------------------------------------------------- /comm/community_status.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | 4 | class Status(object): 5 | 6 | node2com = {} 7 | total_weight = 0 8 | internals = {} 9 | degrees = {} 10 | gdegrees = {} 11 | 12 | def __init__(self): 13 | self.node2com = dict([]) 14 | self.total_weight = 0 15 | self.degrees = dict([]) 16 | self.gdegrees = dict([]) 17 | self.internals = dict([]) 18 | self.loops = dict([]) 19 | self.remain_eps = 0 20 | 21 | def __str__(self): 22 | return ("node2com : " + str(self.node2com) + " degrees : " 23 | + str(self.degrees) + " internals : " + str(self.internals) 24 | + " total_weight : " + str(self.total_weight)) 25 | 26 | def copy(self): 27 | """Perform a deep copy of status""" 28 | new_status = Status() 29 | new_status.node2com = self.node2com.copy() 30 | new_status.internals = self.internals.copy() 31 | new_status.degrees = self.degrees.copy() 32 | new_status.gdegrees = self.gdegrees.copy() 33 | new_status.total_weight = self.total_weight 34 | 35 | def init(self, graph, weight, part=None): 36 | """Initialize the status of a graph with every node in one community""" 37 | # count is used to represent the number of community 38 | count = 0 39 | self.node2com = dict([]) 40 | self.total_weight = 0 41 | # degrees is the degree sum in a community, tot 42 | self.degrees = dict([]) 43 | # gdegrees is the degree of a node 44 | self.gdegrees = dict([]) 45 | # internal is the degree of internal nodes 46 | self.internals = dict([]) 47 | self.total_weight = graph.size(weight=weight) 48 | # remain epsilon 49 | self.remain_eps = 0 50 | if part is None: 51 | for node in graph.nodes(): 52 | self.node2com[node] = count 53 | deg = float(graph.degree(node, weight=weight)) 54 | if deg < 0: 55 | error = "Bad node degree ({})".format(deg) 56 | raise ValueError(error) 57 | self.degrees[count] = deg 58 | self.gdegrees[node] = deg 59 | # edge_data is used to check whether there exists self-loop 60 | edge_data = graph.get_edge_data(node, node, default={weight: 0}) 61 | self.loops[node] = float(edge_data.get(weight, 1)) 62 | self.internals[count] = self.loops[node] 63 | count += 1 64 | else: 65 | for node in graph.nodes(): 66 | com = part[node] 67 | self.node2com[node] = com 68 | deg = float(graph.degree(node, weight=weight)) 69 | self.degrees[com] = self.degrees.get(com, 0) + deg 70 | self.gdegrees[node] = deg 71 | inc = 0. 72 | for neighbor, datas in graph[node].items(): 73 | edge_weight = datas.get(weight, 1) 74 | if edge_weight <= 0: 75 | error = "Bad graph type ({})".format(type(graph)) 76 | raise ValueError(error) 77 | if part[neighbor] == com: 78 | if neighbor == node: 79 | inc += float(edge_weight) 80 | else: 81 | inc += float(edge_weight) / 2. 82 | self.internals[com] = self.internals.get(com, 0) + inc 83 | -------------------------------------------------------------------------------- /main_vary_eps.py: -------------------------------------------------------------------------------- 1 | import community 2 | import networkx as nx 3 | import time 4 | import numpy as np 5 | 6 | from numpy.random import laplace 7 | from sklearn import metrics 8 | 9 | from utils import * 10 | 11 | import os 12 | 13 | 14 | 15 | def main_vary_eps(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N=20,exp_num=10,save_csv=False): 16 | 17 | 18 | t_begin = time.time() 19 | 20 | data_path = './data/' + dataset_name + '.txt' 21 | mat0,mid = get_mat(data_path) 22 | 23 | 24 | cols = ['eps','exper','nmi','evc_overlap','evc_MAE','deg_kl', \ 25 | 'diam_rel','cc_rel','mod_rel'] 26 | 27 | 28 | all_data = pd.DataFrame(None,columns=cols) 29 | 30 | # original graph 31 | mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph) 32 | 33 | mat0_edge = mat0_graph.number_of_edges() 34 | mat0_node = mat0_graph.number_of_nodes() 35 | print('Dataset:%s'%(dataset_name)) 36 | print('Node number:%d'%(mat0_graph.number_of_nodes())) 37 | print('Edge number:%d'%(mat0_graph.number_of_edges())) 38 | 39 | 40 | mat0_par = community.best_partition(mat0_graph) 41 | 42 | mat0_degree = np.sum(mat0,0) 43 | mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution 44 | 45 | mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000) 46 | mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True)) 47 | mat0_evc_ak = list(mat0_evc_a.keys()) 48 | mat0_evc_val = np.array(list(mat0_evc_a.values())) 49 | evc_kn = np.int64(0.01*mat0_node) 50 | 51 | mat0_diam = cal_diam(mat0) 52 | 53 | mat0_cc = nx.transitivity(mat0_graph) 54 | 55 | mat0_mod = community.modularity(mat0_par,mat0_graph) 56 | 57 | 58 | all_deg_kl = [] 59 | all_mod_rel = [] 60 | all_nmi_arr = [] 61 | all_evc_overlap = [] 62 | all_evc_MAE = [] 63 | all_cc_rel = [] 64 | all_diam_rel = [] 65 | 66 | 67 | 68 | ti = time.time() 69 | 70 | e1 = e1_r * epsilon 71 | 72 | e2 = e2_r * epsilon 73 | e3_r = 1 - e1_r - e2_r 74 | 75 | e3 = e3_r * epsilon 76 | 77 | ed = e3 78 | ev = e3 79 | 80 | ev_lambda = 1/ed 81 | dd_lam = 2/ev 82 | 83 | 84 | 85 | 86 | nmi_arr = np.zeros([exp_num]) 87 | deg_kl_arr = np.zeros([exp_num]) 88 | mod_rel_arr = np.zeros([exp_num]) 89 | cc_rel_arr = np.zeros([exp_num]) 90 | diam_rel_arr = np.zeros([exp_num]) 91 | evc_overlap_arr = np.zeros([exp_num]) 92 | evc_MAE_arr = np.zeros([exp_num]) 93 | 94 | for exper in range(exp_num): 95 | print('-----------epsilon=%.1f,e1_r=%.1f,e2_r=%.1f,exper=%d/%d-------------'%(epsilon,e1_r,e2_r,exper+1,exp_num)) 96 | 97 | 98 | t1 = time.time() 99 | 100 | # Community Initialization 101 | mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=N) 102 | 103 | part1 = {} 104 | for i in range(len(mat1_pvarr1)): 105 | part1[i] = mat1_pvarr1[i] 106 | 107 | # Community Adjustment 108 | mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2) 109 | mat1_pvarr = np.array(list(mat1_par1.values())) 110 | 111 | # Information Extraction 112 | mat1_pvs = [] 113 | for i in range(max(mat1_pvarr)+1): 114 | pv1 = np.where(mat1_pvarr==i)[0] 115 | pvs = list(pv1) 116 | mat1_pvs.append(pvs) 117 | 118 | comm_n = max(mat1_pvarr) + 1 119 | 120 | ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64) 121 | 122 | 123 | # edge vector 124 | for i in range(comm_n): 125 | pi = mat1_pvs[i] 126 | ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)]) 127 | for j in range(i+1,comm_n): 128 | pj = mat1_pvs[j] 129 | ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)])) 130 | ev_mat[j,i] = ev_mat[i,j] 131 | 132 | ga = get_uptri_arr(ev_mat,ind=1) 133 | ga_noise = ga + laplace(0,ev_lambda,len(ga)) 134 | 135 | ga_noise_pp = FO_pp(ga_noise) 136 | ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1) 137 | 138 | # degree sequence 139 | dd_s = [] 140 | for i in range(comm_n): 141 | dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])] 142 | dd1 = np.sum(dd1,1) 143 | 144 | dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int) 145 | dd1 = FO_pp(dd1) 146 | dd1[dd1<0] = 0 147 | dd1[dd1>=len(dd1)] = len(dd1)-1 148 | 149 | dd1 = list(dd1) 150 | dd_s.append(dd1) 151 | 152 | # Graph Reconstruction 153 | mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8) 154 | for i in range(comm_n): 155 | # Intra-community 156 | dd_ind = mat1_pvs[i] 157 | dd1 = dd_s[i] 158 | mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1) 159 | 160 | # Inter-community 161 | for j in range(i+1,comm_n): 162 | ev1 = ev_mat[i,j] 163 | pj = mat1_pvs[j] 164 | if ev1 > 0: 165 | c1 = np.random.choice(pi,ev1) 166 | c2 = np.random.choice(pj,ev1) 167 | for ind in range(ev1): 168 | mat2[c1[ind],c2[ind]] = 1 169 | mat2[c2[ind],c1[ind]] = 1 170 | 171 | mat2 = mat2 + np.transpose(mat2) 172 | mat2 = np.triu(mat2,1) 173 | mat2 = mat2 + np.transpose(mat2) 174 | mat2[mat2>0] = 1 175 | 176 | mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph) 177 | 178 | # save the graph 179 | # file_name = './result/' + 'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper) 180 | # write_edge_txt(mat2,mid,file_name) 181 | 182 | #evaluate 183 | mat2_edge = mat2_graph.number_of_edges() 184 | mat2_node = mat2_graph.number_of_nodes() 185 | 186 | mat2_par = community.best_partition(mat2_graph) 187 | mat2_mod = community.modularity(mat2_par,mat2_graph) 188 | 189 | mat2_cc = nx.transitivity(mat2_graph) 190 | 191 | mat2_degree = np.sum(mat2,0) 192 | mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution 193 | 194 | mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000) 195 | mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True)) 196 | mat2_evc_ak = list(mat2_evc_a.keys()) 197 | mat2_evc_val = np.array(list(mat2_evc_a.values())) 198 | 199 | 200 | mat2_diam = cal_diam(mat2) 201 | 202 | # calculate the metrics 203 | # clustering coefficent 204 | cc_rel = cal_rel(mat0_cc,mat2_cc) 205 | 206 | # degree distribution 207 | deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist) 208 | 209 | # modularity 210 | mod_rel = cal_rel(mat0_mod,mat2_mod) 211 | 212 | 213 | # NMI 214 | labels_true = list(mat0_par.values()) 215 | labels_pred = list(mat2_par.values()) 216 | nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred) 217 | 218 | 219 | # Overlap of eigenvalue nodes 220 | evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node)) 221 | 222 | # MAE of EVC 223 | evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn) 224 | 225 | # diameter 226 | diam_rel = cal_rel(mat0_diam,mat2_diam) 227 | 228 | 229 | nmi_arr[exper] = nmi 230 | cc_rel_arr[exper] = cc_rel 231 | deg_kl_arr[exper] = deg_kl 232 | mod_rel_arr[exper] = mod_rel 233 | evc_overlap_arr[exper] = evc_overlap 234 | evc_MAE_arr[exper] = evc_MAE 235 | diam_rel_arr[exper] = diam_rel 236 | 237 | print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \ 238 | %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel)) 239 | 240 | 241 | 242 | data_col = [epsilon,exper,nmi,evc_overlap,evc_MAE,deg_kl, \ 243 | diam_rel,cc_rel,mod_rel] 244 | col_len = len(data_col) 245 | data_col = np.array(data_col).reshape(1,col_len) 246 | data1 = pd.DataFrame(data_col,columns=cols) 247 | all_data = all_data.append(data1) 248 | 249 | 250 | 251 | all_nmi_arr.append(np.mean(nmi_arr)) 252 | all_cc_rel.append(np.mean(cc_rel_arr)) 253 | all_deg_kl.append(np.mean(deg_kl_arr)) 254 | all_mod_rel.append(np.mean(mod_rel_arr)) 255 | all_evc_overlap.append(np.mean(evc_overlap_arr)) 256 | all_evc_MAE.append(np.mean(evc_MAE_arr)) 257 | all_diam_rel.append(np.mean(diam_rel_arr)) 258 | 259 | 260 | # print('Done.%.2fs\n'%(time.time()-ti)) 261 | 262 | res_path = './result' 263 | save_name = res_path + '/' + '%s_%d_%.1f_%.2f_%.2f_%d.csv' %(dataset_name,N,e1_r,e2_r,exp_num) 264 | if not os.path.exists(res_path): 265 | os.mkdir(res_path) 266 | 267 | if save_csv == True: 268 | all_data.to_csv(save_name,index=False,sep=',') 269 | 270 | print('-----------------------------') 271 | 272 | print('dataset:',dataset_name) 273 | 274 | print('epsilon=',epsilon) 275 | print('all_nmi_arr=',all_nmi_arr) 276 | print('all_evc_overlap=',all_evc_overlap) 277 | print('all_evc_MAE=',all_evc_MAE) 278 | print('all_deg_kl=',all_deg_kl) 279 | print('all_diam_rel=',all_diam_rel) 280 | print('all_cc_rel=',all_cc_rel) 281 | print('all_mod_rel=',all_mod_rel) 282 | 283 | print('All time:%.2fs\n'%(time.time()-t_begin)) 284 | 285 | 286 | 287 | if __name__ == '__main__': 288 | # set the dataset 289 | # 'Facebook', 'CA-HepPh', 'Enron' 290 | dataset_name = 'Chamelon' 291 | 292 | # set the privacy budget 293 | epsilon = 2 294 | 295 | # set the number of experiments 296 | exp_num = 10 297 | 298 | # set the number of nodes for community initialization 299 | n1 = 20 300 | 301 | for e1_ind in range(1,9): 302 | e1_r = e1_ind / 10 303 | for e2_ind in range(1,9): 304 | e2_r = e2_ind / 10 305 | e3_r = 1 - e1_r - e2_r 306 | if e3_r > 0: 307 | # run the function 308 | main_vary_eps(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N=n1,exp_num=exp_num) 309 | 310 | 311 | 312 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import community 2 | import networkx as nx 3 | import time 4 | import numpy as np 5 | 6 | from numpy.random import laplace 7 | from sklearn import metrics 8 | 9 | from utils import * 10 | 11 | import os 12 | 13 | 14 | 15 | def main_func(dataset_name='Chamelon',eps=[0.5,1,1.5,2,2.5,3,3.5],e1_r=1/3,e2_r=1/3,N=20,t=1.0,exp_num=10,save_csv=False): 16 | 17 | 18 | t_begin = time.time() 19 | 20 | data_path = './data/' + dataset_name + '.txt' 21 | mat0,mid = get_mat(data_path) 22 | 23 | 24 | cols = ['eps','exper','nmi','evc_overlap','evc_MAE','deg_kl', \ 25 | 'diam_rel','cc_rel','mod_rel'] 26 | 27 | 28 | all_data = pd.DataFrame(None,columns=cols) 29 | 30 | # original graph 31 | mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph) 32 | 33 | mat0_edge = mat0_graph.number_of_edges() 34 | mat0_node = mat0_graph.number_of_nodes() 35 | print('Dataset:%s'%(dataset_name)) 36 | print('Node number:%d'%(mat0_graph.number_of_nodes())) 37 | print('Edge number:%d'%(mat0_graph.number_of_edges())) 38 | 39 | 40 | mat0_par = community.best_partition(mat0_graph) 41 | 42 | mat0_degree = np.sum(mat0,0) 43 | mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution 44 | 45 | mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000) 46 | mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True)) 47 | mat0_evc_ak = list(mat0_evc_a.keys()) 48 | mat0_evc_val = np.array(list(mat0_evc_a.values())) 49 | evc_kn = np.int64(0.01*mat0_node) 50 | 51 | mat0_diam = cal_diam(mat0) 52 | 53 | mat0_cc = nx.transitivity(mat0_graph) 54 | 55 | mat0_mod = community.modularity(mat0_par,mat0_graph) 56 | 57 | 58 | all_deg_kl = [] 59 | all_mod_rel = [] 60 | all_nmi_arr = [] 61 | all_evc_overlap = [] 62 | all_evc_MAE = [] 63 | all_cc_rel = [] 64 | all_diam_rel = [] 65 | 66 | 67 | for ei in range(len(eps)): 68 | epsilon = eps[ei] 69 | ti = time.time() 70 | 71 | e1 = e1_r * epsilon 72 | 73 | e2 = e2_r * epsilon 74 | e3_r = 1 - e1_r - e2_r 75 | 76 | e3 = e3_r * epsilon 77 | 78 | ed = e3 79 | ev = e3 80 | 81 | ev_lambda = 1/ed 82 | dd_lam = 2/ev 83 | 84 | 85 | nmi_arr = np.zeros([exp_num]) 86 | deg_kl_arr = np.zeros([exp_num]) 87 | mod_rel_arr = np.zeros([exp_num]) 88 | cc_rel_arr = np.zeros([exp_num]) 89 | diam_rel_arr = np.zeros([exp_num]) 90 | evc_overlap_arr = np.zeros([exp_num]) 91 | evc_MAE_arr = np.zeros([exp_num]) 92 | 93 | 94 | for exper in range(exp_num): 95 | print('-----------epsilon=%.1f,exper=%d/%d-------------'%(epsilon,exper+1,exp_num)) 96 | 97 | 98 | t1 = time.time() 99 | 100 | # Community Initialization 101 | mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=N,t=t) 102 | 103 | part1 = {} 104 | for i in range(len(mat1_pvarr1)): 105 | part1[i] = mat1_pvarr1[i] 106 | 107 | # Community Adjustment 108 | mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2) 109 | mat1_pvarr = np.array(list(mat1_par1.values())) 110 | 111 | # Information Extraction 112 | mat1_pvs = [] 113 | for i in range(max(mat1_pvarr)+1): 114 | pv1 = np.where(mat1_pvarr==i)[0] 115 | pvs = list(pv1) 116 | mat1_pvs.append(pvs) 117 | 118 | comm_n = max(mat1_pvarr) + 1 119 | 120 | ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64) 121 | 122 | 123 | # edge vector 124 | for i in range(comm_n): 125 | pi = mat1_pvs[i] 126 | ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)]) 127 | for j in range(i+1,comm_n): 128 | pj = mat1_pvs[j] 129 | ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)])) 130 | ev_mat[j,i] = ev_mat[i,j] 131 | 132 | ga = get_uptri_arr(ev_mat,ind=1) 133 | ga_noise = ga + laplace(0,ev_lambda,len(ga)) 134 | 135 | ga_noise_pp = FO_pp(ga_noise) 136 | ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1) 137 | 138 | # degree sequence 139 | dd_s = [] 140 | for i in range(comm_n): 141 | dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])] 142 | dd1 = np.sum(dd1,1) 143 | 144 | dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int) 145 | dd1 = FO_pp(dd1) 146 | dd1[dd1<0] = 0 147 | dd1[dd1>=len(dd1)] = len(dd1)-1 148 | 149 | dd1 = list(dd1) 150 | dd_s.append(dd1) 151 | 152 | # Graph Reconstruction 153 | mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8) 154 | for i in range(comm_n): 155 | # Intra-community 156 | dd_ind = mat1_pvs[i] 157 | dd1 = dd_s[i] 158 | mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1) 159 | 160 | # Inter-community 161 | for j in range(i+1,comm_n): 162 | ev1 = ev_mat[i,j] 163 | pj = mat1_pvs[j] 164 | if ev1 > 0: 165 | c1 = np.random.choice(pi,ev1) 166 | c2 = np.random.choice(pj,ev1) 167 | for ind in range(ev1): 168 | mat2[c1[ind],c2[ind]] = 1 169 | mat2[c2[ind],c1[ind]] = 1 170 | 171 | mat2 = mat2 + np.transpose(mat2) 172 | mat2 = np.triu(mat2,1) 173 | mat2 = mat2 + np.transpose(mat2) 174 | mat2[mat2>0] = 1 175 | 176 | mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph) 177 | 178 | # save the graph 179 | # file_name = './result/' + 'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper) 180 | # write_edge_txt(mat2,mid,file_name) 181 | 182 | #evaluate 183 | mat2_edge = mat2_graph.number_of_edges() 184 | mat2_node = mat2_graph.number_of_nodes() 185 | 186 | mat2_par = community.best_partition(mat2_graph) 187 | mat2_mod = community.modularity(mat2_par,mat2_graph) 188 | 189 | mat2_cc = nx.transitivity(mat2_graph) 190 | 191 | mat2_degree = np.sum(mat2,0) 192 | mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution 193 | 194 | mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000) 195 | mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True)) 196 | mat2_evc_ak = list(mat2_evc_a.keys()) 197 | mat2_evc_val = np.array(list(mat2_evc_a.values())) 198 | 199 | 200 | mat2_diam = cal_diam(mat2) 201 | 202 | # calculate the metrics 203 | # clustering coefficent 204 | cc_rel = cal_rel(mat0_cc,mat2_cc) 205 | 206 | # degree distribution 207 | deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist) 208 | 209 | # modularity 210 | mod_rel = cal_rel(mat0_mod,mat2_mod) 211 | 212 | 213 | # NMI 214 | labels_true = list(mat0_par.values()) 215 | labels_pred = list(mat2_par.values()) 216 | nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred) 217 | 218 | 219 | # Overlap of eigenvalue nodes 220 | evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node)) 221 | 222 | # MAE of EVC 223 | evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn) 224 | 225 | # diameter 226 | diam_rel = cal_rel(mat0_diam,mat2_diam) 227 | 228 | 229 | nmi_arr[exper] = nmi 230 | cc_rel_arr[exper] = cc_rel 231 | deg_kl_arr[exper] = deg_kl 232 | mod_rel_arr[exper] = mod_rel 233 | evc_overlap_arr[exper] = evc_overlap 234 | evc_MAE_arr[exper] = evc_MAE 235 | diam_rel_arr[exper] = diam_rel 236 | 237 | print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \ 238 | %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel)) 239 | 240 | 241 | 242 | data_col = [epsilon,exper,nmi,evc_overlap,evc_MAE,deg_kl, \ 243 | diam_rel,cc_rel,mod_rel] 244 | col_len = len(data_col) 245 | data_col = np.array(data_col).reshape(1,col_len) 246 | data1 = pd.DataFrame(data_col,columns=cols) 247 | all_data = all_data.append(data1) 248 | 249 | 250 | 251 | all_nmi_arr.append(np.mean(nmi_arr)) 252 | all_cc_rel.append(np.mean(cc_rel_arr)) 253 | all_deg_kl.append(np.mean(deg_kl_arr)) 254 | all_mod_rel.append(np.mean(mod_rel_arr)) 255 | all_evc_overlap.append(np.mean(evc_overlap_arr)) 256 | all_evc_MAE.append(np.mean(evc_MAE_arr)) 257 | all_diam_rel.append(np.mean(diam_rel_arr)) 258 | 259 | 260 | print('all_index=%d/%d Done.%.2fs\n'%(ei+1,len(eps),time.time()-ti)) 261 | 262 | res_path = './result' 263 | save_name = res_path + '/' + '%s_%d_%.1f_%.2f_%.2f_%d.csv' %(dataset_name,N,t,e1_r,e2_r,exp_num) 264 | if not os.path.exists(res_path): 265 | os.mkdir(res_path) 266 | 267 | if save_csv == True: 268 | all_data.to_csv(save_name,index=False,sep=',') 269 | 270 | print('-----------------------------') 271 | 272 | print('dataset:',dataset_name) 273 | 274 | print('eps=',eps) 275 | print('all_nmi_arr=',all_nmi_arr) 276 | print('all_evc_overlap=',all_evc_overlap) 277 | print('all_evc_MAE=',all_evc_MAE) 278 | print('all_deg_kl=',all_deg_kl) 279 | print('all_diam_rel=',all_diam_rel) 280 | print('all_cc_rel=',all_cc_rel) 281 | print('all_mod_rel=',all_mod_rel) 282 | 283 | print('All time:%.2fs'%(time.time()-t_begin)) 284 | 285 | 286 | 287 | if __name__ == '__main__': 288 | # set the dataset 289 | # 'Facebook', 'CA-HepPh', 'Enron' 290 | dataset_name = 'Chamelon' 291 | 292 | # set the privacy budget, list type 293 | eps = [0.5,1,1.5,2,2.5,3,3.5] 294 | 295 | # set the ratio of the privacy budget 296 | e1_r = 1/3 297 | e2_r = 1/3 298 | 299 | # set the number of experiments 300 | exp_num = 10 301 | 302 | # set the number of nodes for community initialization 303 | n1 = 20 304 | 305 | # set the resolution parameter 306 | t = 1.0 307 | 308 | # run the function 309 | main_func(dataset_name=dataset_name,eps=eps,e1_r=e1_r,e2_r=e2_r,N=n1,t=t,exp_num=exp_num) 310 | 311 | 312 | 313 | -------------------------------------------------------------------------------- /main_vary_N.py: -------------------------------------------------------------------------------- 1 | import community 2 | import networkx as nx 3 | import time 4 | import numpy as np 5 | 6 | from numpy.random import laplace 7 | from sklearn import metrics 8 | 9 | from utils import * 10 | 11 | import os 12 | 13 | 14 | 15 | def main_vary_N(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N_List=[10,20],exp_num=10,save_csv=False): 16 | 17 | 18 | t_begin = time.time() 19 | 20 | data_path = './data/' + dataset_name + '.txt' 21 | mat0,mid = get_mat(data_path) 22 | 23 | 24 | cols = ['eps','exper','N','nmi','evc_overlap','evc_MAE','deg_kl', \ 25 | 'diam_rel','cc_rel','mod_rel'] 26 | 27 | 28 | all_data = pd.DataFrame(None,columns=cols) 29 | 30 | # original graph 31 | mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph) 32 | 33 | mat0_edge = mat0_graph.number_of_edges() 34 | mat0_node = mat0_graph.number_of_nodes() 35 | print('Dataset:%s'%(dataset_name)) 36 | print('Node number:%d'%(mat0_graph.number_of_nodes())) 37 | print('Edge number:%d'%(mat0_graph.number_of_edges())) 38 | 39 | 40 | mat0_par = community.best_partition(mat0_graph) 41 | 42 | mat0_degree = np.sum(mat0,0) 43 | mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution 44 | 45 | mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000) 46 | mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True)) 47 | mat0_evc_ak = list(mat0_evc_a.keys()) 48 | mat0_evc_val = np.array(list(mat0_evc_a.values())) 49 | evc_kn = np.int64(0.01*mat0_node) 50 | 51 | mat0_diam = cal_diam(mat0) 52 | 53 | mat0_cc = nx.transitivity(mat0_graph) 54 | 55 | mat0_mod = community.modularity(mat0_par,mat0_graph) 56 | 57 | 58 | all_deg_kl = [] 59 | all_mod_rel = [] 60 | all_nmi_arr = [] 61 | all_evc_overlap = [] 62 | all_evc_MAE = [] 63 | all_cc_rel = [] 64 | all_diam_rel = [] 65 | 66 | 67 | 68 | for ni in range(len(N_List)): 69 | 70 | ti = time.time() 71 | n1 = N_List[ni] 72 | 73 | e1 = e1_r * epsilon 74 | 75 | e2 = e2_r * epsilon 76 | e3_r = 1 - e1_r - e2_r 77 | 78 | e3 = e3_r * epsilon 79 | 80 | ed = e3 81 | ev = e3 82 | 83 | ev_lambda = 1/ed 84 | dd_lam = 2/ev 85 | 86 | 87 | 88 | 89 | nmi_arr = np.zeros([exp_num]) 90 | deg_kl_arr = np.zeros([exp_num]) 91 | mod_rel_arr = np.zeros([exp_num]) 92 | cc_rel_arr = np.zeros([exp_num]) 93 | diam_rel_arr = np.zeros([exp_num]) 94 | evc_overlap_arr = np.zeros([exp_num]) 95 | evc_MAE_arr = np.zeros([exp_num]) 96 | 97 | 98 | for exper in range(exp_num): 99 | print('-----------N=%d,exper=%d/%d-------------'%(n1,exper+1,exp_num)) 100 | 101 | 102 | t1 = time.time() 103 | 104 | # Community Initialization 105 | mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=n1) 106 | 107 | part1 = {} 108 | for i in range(len(mat1_pvarr1)): 109 | part1[i] = mat1_pvarr1[i] 110 | 111 | # Community Adjustment 112 | mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2) 113 | mat1_pvarr = np.array(list(mat1_par1.values())) 114 | 115 | # Information Extraction 116 | mat1_pvs = [] 117 | for i in range(max(mat1_pvarr)+1): 118 | pv1 = np.where(mat1_pvarr==i)[0] 119 | pvs = list(pv1) 120 | mat1_pvs.append(pvs) 121 | 122 | comm_n = max(mat1_pvarr) + 1 123 | 124 | ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64) 125 | 126 | 127 | # edge vector 128 | for i in range(comm_n): 129 | pi = mat1_pvs[i] 130 | ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)]) 131 | for j in range(i+1,comm_n): 132 | pj = mat1_pvs[j] 133 | ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)])) 134 | ev_mat[j,i] = ev_mat[i,j] 135 | 136 | ga = get_uptri_arr(ev_mat,ind=1) 137 | ga_noise = ga + laplace(0,ev_lambda,len(ga)) 138 | 139 | ga_noise_pp = FO_pp(ga_noise) 140 | ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1) 141 | 142 | # degree sequence 143 | dd_s = [] 144 | for i in range(comm_n): 145 | dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])] 146 | dd1 = np.sum(dd1,1) 147 | 148 | dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int) 149 | dd1 = FO_pp(dd1) 150 | dd1[dd1<0] = 0 151 | dd1[dd1>=len(dd1)] = len(dd1)-1 152 | 153 | dd1 = list(dd1) 154 | dd_s.append(dd1) 155 | 156 | # Graph Reconstruction 157 | mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8) 158 | for i in range(comm_n): 159 | # Intra-community 160 | dd_ind = mat1_pvs[i] 161 | dd1 = dd_s[i] 162 | mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1) 163 | 164 | # Inter-community 165 | for j in range(i+1,comm_n): 166 | ev1 = ev_mat[i,j] 167 | pj = mat1_pvs[j] 168 | if ev1 > 0: 169 | c1 = np.random.choice(pi,ev1) 170 | c2 = np.random.choice(pj,ev1) 171 | for ind in range(ev1): 172 | mat2[c1[ind],c2[ind]] = 1 173 | mat2[c2[ind],c1[ind]] = 1 174 | 175 | mat2 = mat2 + np.transpose(mat2) 176 | mat2 = np.triu(mat2,1) 177 | mat2 = mat2 + np.transpose(mat2) 178 | mat2[mat2>0] = 1 179 | 180 | mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph) 181 | 182 | # save the graph 183 | # file_name = './result/' + 'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper) 184 | # write_edge_txt(mat2,mid,file_name) 185 | 186 | #evaluate 187 | mat2_edge = mat2_graph.number_of_edges() 188 | mat2_node = mat2_graph.number_of_nodes() 189 | 190 | mat2_par = community.best_partition(mat2_graph) 191 | mat2_mod = community.modularity(mat2_par,mat2_graph) 192 | 193 | mat2_cc = nx.transitivity(mat2_graph) 194 | 195 | 196 | mat2_degree = np.sum(mat2,0) 197 | mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution 198 | 199 | mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000) 200 | mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True)) 201 | mat2_evc_ak = list(mat2_evc_a.keys()) 202 | mat2_evc_val = np.array(list(mat2_evc_a.values())) 203 | 204 | 205 | mat2_diam = cal_diam(mat2) 206 | 207 | # calculate the metrics 208 | # clustering coefficent 209 | cc_rel = cal_rel(mat0_cc,mat2_cc) 210 | 211 | # degree distribution 212 | deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist) 213 | 214 | # modularity 215 | mod_rel = cal_rel(mat0_mod,mat2_mod) 216 | 217 | 218 | # NMI 219 | labels_true = list(mat0_par.values()) 220 | labels_pred = list(mat2_par.values()) 221 | nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred) 222 | 223 | 224 | # Overlap of eigenvalue nodes 225 | evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node)) 226 | 227 | # MAE of EVC 228 | evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn) 229 | 230 | # diameter 231 | diam_rel = cal_rel(mat0_diam,mat2_diam) 232 | 233 | 234 | nmi_arr[exper] = nmi 235 | cc_rel_arr[exper] = cc_rel 236 | deg_kl_arr[exper] = deg_kl 237 | mod_rel_arr[exper] = mod_rel 238 | evc_overlap_arr[exper] = evc_overlap 239 | evc_MAE_arr[exper] = evc_MAE 240 | diam_rel_arr[exper] = diam_rel 241 | 242 | print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \ 243 | %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel)) 244 | 245 | 246 | 247 | data_col = [epsilon,exper,n1,nmi,evc_overlap,evc_MAE,deg_kl, \ 248 | diam_rel,cc_rel,mod_rel] 249 | col_len = len(data_col) 250 | data_col = np.array(data_col).reshape(1,col_len) 251 | data1 = pd.DataFrame(data_col,columns=cols) 252 | all_data = all_data.append(data1) 253 | 254 | 255 | all_nmi_arr.append(np.mean(nmi_arr)) 256 | all_cc_rel.append(np.mean(cc_rel_arr)) 257 | all_deg_kl.append(np.mean(deg_kl_arr)) 258 | all_mod_rel.append(np.mean(mod_rel_arr)) 259 | all_evc_overlap.append(np.mean(evc_overlap_arr)) 260 | all_evc_MAE.append(np.mean(evc_MAE_arr)) 261 | all_diam_rel.append(np.mean(diam_rel_arr)) 262 | 263 | 264 | print('all_index=%d/%d Done.%.2fs\n'%(ni+1,len(N_List),time.time()-ti)) 265 | 266 | res_path = './result' 267 | save_name = res_path + '/' + '%s_%.2f_%.2f_%.2f_%d.csv' %(dataset_name,epsilon,e1_r,e2_r,exp_num) 268 | if not os.path.exists(res_path): 269 | os.mkdir(res_path) 270 | 271 | if save_csv == True: 272 | all_data.to_csv(save_name,index=False,sep=',') 273 | 274 | print('-----------------------------') 275 | 276 | print('dataset:',dataset_name) 277 | 278 | print('epsilon=',epsilon) 279 | print('all_N=',N_List) 280 | print('all_nmi_arr=',all_nmi_arr) 281 | print('all_evc_overlap=',all_evc_overlap) 282 | print('all_evc_MAE=',all_evc_MAE) 283 | print('all_deg_kl=',all_deg_kl) 284 | print('all_diam_rel=',all_diam_rel) 285 | print('all_cc_rel=',all_cc_rel) 286 | print('all_mod_rel=',all_mod_rel) 287 | print('All time:%.2fs'%(time.time()-t_begin)) 288 | 289 | 290 | 291 | if __name__ == '__main__': 292 | # set the dataset 293 | # 'Facebook', 'CA-HepPh', 'Enron' 294 | dataset_name = 'Chamelon' 295 | 296 | # set the privacy budget 297 | epsilon = 2 298 | 299 | # set the ratio of the privacy budget 300 | e1_r = 1/3 301 | e2_r = 1/3 302 | 303 | # set the number of experiments 304 | exp_num = 10 305 | 306 | # set the number of nodes for community initialization, list type 307 | N_List = [5,10,15,20,25,30,35] 308 | 309 | # run the function 310 | main_vary_N(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N_List=N_List,exp_num=exp_num) 311 | 312 | 313 | 314 | -------------------------------------------------------------------------------- /main_vary_t.py: -------------------------------------------------------------------------------- 1 | import community 2 | import networkx as nx 3 | import time 4 | import numpy as np 5 | 6 | from numpy.random import laplace 7 | from sklearn import metrics 8 | 9 | from utils import * 10 | 11 | import os 12 | 13 | 14 | 15 | def main_vary_t(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N=20,t_List=[0.2,0.5,0.8,1.0,1.2,1.5],exp_num=10,save_csv=False): 16 | 17 | 18 | t_begin = time.time() 19 | 20 | data_path = './data/' + dataset_name + '.txt' 21 | mat0,mid = get_mat(data_path) 22 | 23 | 24 | cols = ['eps','exper','t','nmi','evc_overlap','evc_MAE','deg_kl', \ 25 | 'diam_rel','cc_rel','mod_rel'] 26 | 27 | 28 | all_data = pd.DataFrame(None,columns=cols) 29 | 30 | # original graph 31 | mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph) 32 | 33 | mat0_edge = mat0_graph.number_of_edges() 34 | mat0_node = mat0_graph.number_of_nodes() 35 | print('Dataset:%s'%(dataset_name)) 36 | print('Node number:%d'%(mat0_graph.number_of_nodes())) 37 | print('Edge number:%d'%(mat0_graph.number_of_edges())) 38 | 39 | 40 | mat0_par = community.best_partition(mat0_graph) 41 | 42 | mat0_degree = np.sum(mat0,0) 43 | mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution 44 | 45 | mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000) 46 | mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True)) 47 | mat0_evc_ak = list(mat0_evc_a.keys()) 48 | mat0_evc_val = np.array(list(mat0_evc_a.values())) 49 | evc_kn = np.int64(0.01*mat0_node) 50 | 51 | mat0_diam = cal_diam(mat0) 52 | 53 | mat0_cc = nx.transitivity(mat0_graph) 54 | 55 | mat0_mod = community.modularity(mat0_par,mat0_graph) 56 | 57 | 58 | all_deg_kl = [] 59 | all_mod_rel = [] 60 | all_nmi_arr = [] 61 | all_evc_overlap = [] 62 | all_evc_MAE = [] 63 | all_cc_rel = [] 64 | all_diam_rel = [] 65 | 66 | 67 | 68 | for ti in range(len(t_List)): 69 | 70 | ti = time.time() 71 | t = t_List[ti] 72 | 73 | n1 = N 74 | 75 | e1 = e1_r * epsilon 76 | 77 | e2 = e2_r * epsilon 78 | e3_r = 1 - e1_r - e2_r 79 | 80 | e3 = e3_r * epsilon 81 | 82 | ed = e3 83 | ev = e3 84 | 85 | ev_lambda = 1/ed 86 | dd_lam = 2/ev 87 | 88 | 89 | 90 | 91 | nmi_arr = np.zeros([exp_num]) 92 | deg_kl_arr = np.zeros([exp_num]) 93 | mod_rel_arr = np.zeros([exp_num]) 94 | cc_rel_arr = np.zeros([exp_num]) 95 | diam_rel_arr = np.zeros([exp_num]) 96 | evc_overlap_arr = np.zeros([exp_num]) 97 | evc_MAE_arr = np.zeros([exp_num]) 98 | 99 | 100 | for exper in range(exp_num): 101 | print('-----------t=%d,exper=%d/%d-------------'%(t,exper+1,exp_num)) 102 | 103 | 104 | t1 = time.time() 105 | 106 | # Community Initialization 107 | mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=n1,t=t) 108 | 109 | part1 = {} 110 | for i in range(len(mat1_pvarr1)): 111 | part1[i] = mat1_pvarr1[i] 112 | 113 | # Community Adjustment 114 | mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2) 115 | mat1_pvarr = np.array(list(mat1_par1.values())) 116 | 117 | # Information Extraction 118 | mat1_pvs = [] 119 | for i in range(max(mat1_pvarr)+1): 120 | pv1 = np.where(mat1_pvarr==i)[0] 121 | pvs = list(pv1) 122 | mat1_pvs.append(pvs) 123 | 124 | comm_n = max(mat1_pvarr) + 1 125 | 126 | ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64) 127 | 128 | 129 | # edge vector 130 | for i in range(comm_n): 131 | pi = mat1_pvs[i] 132 | ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)]) 133 | for j in range(i+1,comm_n): 134 | pj = mat1_pvs[j] 135 | ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)])) 136 | ev_mat[j,i] = ev_mat[i,j] 137 | 138 | ga = get_uptri_arr(ev_mat,ind=1) 139 | ga_noise = ga + laplace(0,ev_lambda,len(ga)) 140 | 141 | ga_noise_pp = FO_pp(ga_noise) 142 | ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1) 143 | 144 | # degree sequence 145 | dd_s = [] 146 | for i in range(comm_n): 147 | dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])] 148 | dd1 = np.sum(dd1,1) 149 | 150 | dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int) 151 | dd1 = FO_pp(dd1) 152 | dd1[dd1<0] = 0 153 | dd1[dd1>=len(dd1)] = len(dd1)-1 154 | 155 | dd1 = list(dd1) 156 | dd_s.append(dd1) 157 | 158 | # Graph Reconstruction 159 | mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8) 160 | for i in range(comm_n): 161 | # Intra-community 162 | dd_ind = mat1_pvs[i] 163 | dd1 = dd_s[i] 164 | mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1) 165 | 166 | # Inter-community 167 | for j in range(i+1,comm_n): 168 | ev1 = ev_mat[i,j] 169 | pj = mat1_pvs[j] 170 | if ev1 > 0: 171 | c1 = np.random.choice(pi,ev1) 172 | c2 = np.random.choice(pj,ev1) 173 | for ind in range(ev1): 174 | mat2[c1[ind],c2[ind]] = 1 175 | mat2[c2[ind],c1[ind]] = 1 176 | 177 | mat2 = mat2 + np.transpose(mat2) 178 | mat2 = np.triu(mat2,1) 179 | mat2 = mat2 + np.transpose(mat2) 180 | mat2[mat2>0] = 1 181 | 182 | mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph) 183 | 184 | # save the graph 185 | # file_name = './result/' + 'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper) 186 | # write_edge_txt(mat2,mid,file_name) 187 | 188 | #evaluate 189 | mat2_edge = mat2_graph.number_of_edges() 190 | mat2_node = mat2_graph.number_of_nodes() 191 | 192 | mat2_par = community.best_partition(mat2_graph) 193 | mat2_mod = community.modularity(mat2_par,mat2_graph) 194 | 195 | mat2_cc = nx.transitivity(mat2_graph) 196 | 197 | 198 | mat2_degree = np.sum(mat2,0) 199 | mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution 200 | 201 | mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000) 202 | mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True)) 203 | mat2_evc_ak = list(mat2_evc_a.keys()) 204 | mat2_evc_val = np.array(list(mat2_evc_a.values())) 205 | 206 | 207 | mat2_diam = cal_diam(mat2) 208 | 209 | # calculate the metrics 210 | # clustering coefficent 211 | cc_rel = cal_rel(mat0_cc,mat2_cc) 212 | 213 | # degree distribution 214 | deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist) 215 | 216 | # modularity 217 | mod_rel = cal_rel(mat0_mod,mat2_mod) 218 | 219 | 220 | # NMI 221 | labels_true = list(mat0_par.values()) 222 | labels_pred = list(mat2_par.values()) 223 | nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred) 224 | 225 | 226 | # Overlap of eigenvalue nodes 227 | evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node)) 228 | 229 | # MAE of EVC 230 | evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn) 231 | 232 | # diameter 233 | diam_rel = cal_rel(mat0_diam,mat2_diam) 234 | 235 | 236 | nmi_arr[exper] = nmi 237 | cc_rel_arr[exper] = cc_rel 238 | deg_kl_arr[exper] = deg_kl 239 | mod_rel_arr[exper] = mod_rel 240 | evc_overlap_arr[exper] = evc_overlap 241 | evc_MAE_arr[exper] = evc_MAE 242 | diam_rel_arr[exper] = diam_rel 243 | 244 | print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \ 245 | %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel)) 246 | 247 | 248 | 249 | data_col = [epsilon,exper,t,nmi,evc_overlap,evc_MAE,deg_kl, \ 250 | diam_rel,cc_rel,mod_rel] 251 | col_len = len(data_col) 252 | data_col = np.array(data_col).reshape(1,col_len) 253 | data1 = pd.DataFrame(data_col,columns=cols) 254 | all_data = all_data.append(data1) 255 | 256 | 257 | all_nmi_arr.append(np.mean(nmi_arr)) 258 | all_cc_rel.append(np.mean(cc_rel_arr)) 259 | all_deg_kl.append(np.mean(deg_kl_arr)) 260 | all_mod_rel.append(np.mean(mod_rel_arr)) 261 | all_evc_overlap.append(np.mean(evc_overlap_arr)) 262 | all_evc_MAE.append(np.mean(evc_MAE_arr)) 263 | all_diam_rel.append(np.mean(diam_rel_arr)) 264 | 265 | 266 | print('all_index=%d/%d Done.%.2fs\n'%(ti+1,len(t_List),time.time()-ti)) 267 | 268 | res_path = './result' 269 | save_name = res_path + '/' + '%s_%.2f_%d_%.2f_%.2f_%d.csv' %(dataset_name,epsilon,N,e1_r,e2_r,exp_num) 270 | if not os.path.exists(res_path): 271 | os.mkdir(res_path) 272 | 273 | if save_csv == True: 274 | all_data.to_csv(save_name,index=False,sep=',') 275 | 276 | print('-----------------------------') 277 | 278 | print('dataset:',dataset_name) 279 | 280 | print('epsilon=',epsilon) 281 | print('all_t=',t_List) 282 | print('all_nmi_arr=',all_nmi_arr) 283 | print('all_evc_overlap=',all_evc_overlap) 284 | print('all_evc_MAE=',all_evc_MAE) 285 | print('all_deg_kl=',all_deg_kl) 286 | print('all_diam_rel=',all_diam_rel) 287 | print('all_cc_rel=',all_cc_rel) 288 | print('all_mod_rel=',all_mod_rel) 289 | print('All time:%.2fs'%(time.time()-t_begin)) 290 | 291 | 292 | 293 | if __name__ == '__main__': 294 | # set the dataset 295 | # 'Facebook', 'CA-HepPh', 'Enron' 296 | dataset_name = 'Chamelon' 297 | 298 | # set the privacy budget 299 | epsilon = 2 300 | 301 | # set the ratio of the privacy budget 302 | e1_r = 1/3 303 | e2_r = 1/3 304 | 305 | # set the number of experiments 306 | exp_num = 10 307 | 308 | # set the number of nodes for community initialization 309 | N = 20 310 | 311 | # set the resolution parameter, list type 312 | t_List = [0.2,0.5,0.8,1.0,1.2,1.5] 313 | 314 | # run the function 315 | main_vary_t(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N=N,t_List=t_List,exp_num=exp_num) 316 | 317 | 318 | 319 | -------------------------------------------------------------------------------- /comm/community_main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | This module implements community detection. 4 | """ 5 | from __future__ import print_function 6 | 7 | import array 8 | import math 9 | 10 | import numbers 11 | from random import random 12 | import warnings 13 | import random 14 | import networkx as nx 15 | import numpy as np 16 | from numpy.random import laplace 17 | import time 18 | 19 | from .community_status import Status 20 | 21 | 22 | 23 | #__PASS_MAX = -1 24 | __PASS_MAX = 10000 25 | __MIN = 0.0000001 26 | 27 | 28 | def check_random_state(seed): 29 | 30 | if seed is None or seed is np.random: 31 | return np.random.mtrand._rand 32 | if isinstance(seed, (numbers.Integral, np.integer)): 33 | return np.random.RandomState(seed) 34 | if isinstance(seed, np.random.RandomState): 35 | return seed 36 | raise ValueError("%r cannot be used to seed a numpy.random.RandomState" 37 | " instance" % seed) 38 | 39 | 40 | def partition_at_level(dendrogram, level): 41 | 42 | partition = dendrogram[0].copy() 43 | for index in range(1, level + 1): 44 | for node, community in partition.items(): 45 | partition[node] = dendrogram[index][community] 46 | return partition 47 | 48 | 49 | def modularity(partition, graph, weight='weight'): 50 | 51 | if graph.is_directed(): 52 | raise TypeError("Bad graph type, use only non directed graph") 53 | 54 | inc = dict([]) 55 | deg = dict([]) 56 | # links stands for the number of edge 57 | links = graph.size(weight=weight) 58 | if links == 0: 59 | raise ValueError("A graph without link has an undefined modularity") 60 | 61 | for node in graph: 62 | # com stands for the node's corresponding community 63 | com = partition[node] 64 | # deg[com] is used to storage the degree of relative community 65 | deg[com] = deg.get(com, 0.) + graph.degree(node, weight=weight) 66 | for neighbor, datas in graph[node].items(): 67 | # if the result of get() is None, return 1 68 | edge_weight = datas.get(weight, 1) 69 | if partition[neighbor] == com: 70 | if neighbor == node: 71 | inc[com] = inc.get(com, 0.) + float(edge_weight) 72 | else: 73 | inc[com] = inc.get(com, 0.) + float(edge_weight) / 2. 74 | 75 | res = 0. 76 | for com in set(partition.values()): 77 | # calculate the modularity based on the formula: Q = deg_in/m - (deg_com/(2m))^2 78 | res += (inc.get(com, 0.) / links) - \ 79 | (deg.get(com, 0.) / (2. * links)) ** 2 80 | return res 81 | 82 | 83 | def best_partition(graph, 84 | partition=None, 85 | weight='weight', 86 | resolution=1., 87 | randomize=None, 88 | random_state=None, 89 | epsilon_EM=None, 90 | divide=1): 91 | 92 | dendo = generate_dendrogram(graph, 93 | partition, 94 | weight, 95 | resolution, 96 | randomize, 97 | random_state, 98 | epsilon_EM, 99 | divide) 100 | return partition_at_level(dendo, len(dendo) - 1) 101 | 102 | def generate_dendrogram(graph, 103 | part_init=None, 104 | weight='weight', 105 | resolution=1., 106 | randomize=None, 107 | random_state=None, 108 | epsilon_EM=None, 109 | divide=1): 110 | 111 | if graph.is_directed(): 112 | raise TypeError("Bad graph type, use only non directed graph") 113 | 114 | # Properly handle random state, eventually remove old `randomize` parameter 115 | # NOTE: when `randomize` is removed, delete code up to random_state = ... 116 | if randomize is not None: 117 | warnings.warn("The `randomize` parameter will be deprecated in future " 118 | "versions. Use `random_state` instead.", DeprecationWarning) 119 | # If shouldn't randomize, we set a fixed seed to get determinisitc results 120 | if randomize is False: 121 | random_state = 0 122 | 123 | # We don't know what to do if both `randomize` and `random_state` are defined 124 | if randomize and random_state is not None: 125 | raise ValueError("`randomize` and `random_state` cannot be used at the " 126 | "same time") 127 | 128 | random_state = check_random_state(random_state) 129 | 130 | # special case, when there is no link 131 | # the best partition is everyone in its community 132 | if graph.number_of_edges() == 0: 133 | part = dict([]) 134 | for i, node in enumerate(graph.nodes()): 135 | part[node] = i 136 | return [part] 137 | 138 | current_graph = graph.copy() 139 | 140 | 141 | 142 | 143 | status = Status() 144 | status.init(current_graph, weight, part_init) 145 | # status.init(current_graph, weight, part1) 146 | status_list = list() 147 | 148 | 149 | v1 = np.sum(list(status.internals.values())) 150 | # print('initial internals:%d'%v1) 151 | 152 | t1 = time.time() 153 | __comm_adjust_em(current_graph, status, weight, resolution, random_state, epsilon_EM , divide) 154 | 155 | v1 = np.sum(list(status.internals.values())) 156 | # print('final internals:%d'%v1) 157 | 158 | # print('adjust time:%.2fs'%(time.time()-t1)) 159 | new_mod = __modularity(status, resolution) 160 | partition = __renumber(status.node2com) 161 | status_list.append(partition) 162 | mod = new_mod 163 | # induced_graph is to create new graph based on the partition 164 | current_graph = induced_graph(partition, current_graph, weight) 165 | status.init(current_graph, weight) 166 | 167 | return status_list[:] 168 | 169 | 170 | def induced_graph(partition, graph, weight="weight"): 171 | 172 | ret = nx.Graph() 173 | ret.add_nodes_from(partition.values()) 174 | 175 | for node1, node2, datas in graph.edges(data=True): 176 | edge_weight = datas.get(weight, 1) 177 | com1 = partition[node1] 178 | com2 = partition[node2] 179 | w_prec = ret.get_edge_data(com1, com2, {weight: 0}).get(weight, 1) 180 | ret.add_edge(com1, com2, **{weight: w_prec + edge_weight}) 181 | 182 | return ret 183 | 184 | 185 | def __renumber(dictionary): 186 | 187 | values = set(dictionary.values()) 188 | target = set(range(len(values))) 189 | 190 | if values == target: 191 | # no renumbering necessary 192 | ret = dictionary.copy() 193 | else: 194 | # add the values that won't be renumbered 195 | renumbering = dict(zip(target.intersection(values), 196 | target.intersection(values))) 197 | # add the values that will be renumbered 198 | renumbering.update(dict(zip(values.difference(target), 199 | target.difference(values)))) 200 | ret = {k: renumbering[v] for k, v in dictionary.items()} 201 | 202 | return ret 203 | 204 | 205 | def load_binary(data): 206 | data = open(data, "rb") 207 | 208 | reader = array.array("I") 209 | reader.fromfile(data, 1) 210 | num_nodes = reader.pop() 211 | reader = array.array("I") 212 | reader.fromfile(data, num_nodes) 213 | cum_deg = reader.tolist() 214 | num_links = reader.pop() 215 | reader = array.array("I") 216 | reader.fromfile(data, num_links) 217 | links = reader.tolist() 218 | graph = nx.Graph() 219 | graph.add_nodes_from(range(num_nodes)) 220 | prec_deg = 0 221 | 222 | for index in range(num_nodes): 223 | last_deg = cum_deg[index] 224 | neighbors = links[prec_deg:last_deg] 225 | graph.add_edges_from([(index, int(neigh)) for neigh in neighbors]) 226 | prec_deg = last_deg 227 | 228 | return graph 229 | 230 | def __comm_adjust_em(graph, status, weight_key, resolution, random_state, epsilon, divide): 231 | 232 | nb_pass_done = 0 233 | cur_mod = __modularity(status, resolution) 234 | new_mod = cur_mod 235 | 236 | pass_max = round(divide) 237 | 238 | deltau = 1 239 | c1 = epsilon / (2 * pass_max * deltau * 2 ) 240 | 241 | 242 | # print('epsilon:',c1) 243 | 244 | while nb_pass_done < pass_max: 245 | cur_mod = new_mod 246 | 247 | nb_pass_done += 1 248 | 249 | # iteration over the nodes 250 | for node in __randomize(graph.nodes(), random_state): 251 | 252 | com_node = status.node2com[node] 253 | 254 | # obtain all communities 255 | candi_communities = __allcom(node, graph, status, weight_key) 256 | 257 | remove_cost = - resolution * candi_communities.get(com_node,0) 258 | 259 | # remove the node from the original community 260 | __remove(node, com_node, 261 | candi_communities.get(com_node, 0.), status) 262 | best_com = com_node 263 | 264 | 265 | coms = [] 266 | incrs = [] 267 | for com, dnc in __randomize(candi_communities.items(), random_state): 268 | incr = remove_cost + resolution * dnc 269 | incrs.append(incr) 270 | coms.append(com) 271 | 272 | incrs = np.array(incrs) 273 | incrs = incrs * c1 274 | incrs_m = max(np.max(incrs),0) 275 | exp_inc = np.exp(incrs-incrs_m) 276 | 277 | # Exponential Mechanism 278 | prob_inc = exp_inc / np.sum(exp_inc) 279 | best_com = np.random.choice(coms,p=prob_inc) 280 | 281 | # put the node into the best_com 282 | __insert(node, best_com, 283 | candi_communities.get(best_com, 0.), status) 284 | 285 | new_mod = __modularity(status, resolution) 286 | 287 | 288 | 289 | def __neighcom(node, graph, status, weight_key): 290 | 291 | weights = {} 292 | for neighbor, datas in graph[node].items(): 293 | if neighbor != node: 294 | edge_weight = datas.get(weight_key, 1) 295 | neighborcom = status.node2com[neighbor] 296 | weights[neighborcom] = weights.get(neighborcom, 0) + edge_weight 297 | 298 | return weights 299 | 300 | 301 | 302 | def __allcom(node, graph, status, weight_key): 303 | all_coms = list(status.node2com.values()) 304 | candi_weights = dict.fromkeys(all_coms,0) 305 | 306 | for neighbor, datas in graph[node].items(): 307 | if neighbor != node: 308 | edge_weight = datas.get(weight_key, 1) 309 | neighborcom = status.node2com[neighbor] 310 | candi_weights[neighborcom] = candi_weights.get(neighborcom, 0) + edge_weight 311 | 312 | return candi_weights 313 | 314 | 315 | def __remove(node, com, weight, status): 316 | status.degrees[com] = (status.degrees.get(com, 0.) 317 | - status.gdegrees.get(node, 0.)) 318 | status.internals[com] = float(status.internals.get(com, 0.) - 319 | weight - status.loops.get(node, 0.)) 320 | status.node2com[node] = -1 321 | 322 | 323 | def __insert(node, com, weight, status): 324 | 325 | status.node2com[node] = com 326 | status.degrees[com] = (status.degrees.get(com, 0.) + 327 | status.gdegrees.get(node, 0.)) 328 | status.internals[com] = float(status.internals.get(com, 0.) + 329 | weight + status.loops.get(node, 0.)) 330 | 331 | 332 | def __modularity(status, resolution): 333 | 334 | links = float(status.total_weight) 335 | result = 0. 336 | for community in set(status.node2com.values()): 337 | in_degree = status.internals.get(community, 0.) 338 | degree = status.degrees.get(community, 0.) 339 | if links > 0: 340 | result += in_degree * resolution / links - ((degree / (2. * links)) ** 2) 341 | return result 342 | 343 | 344 | def __randomize(items, random_state): 345 | randomized_items = list(items) 346 | random_state.shuffle(randomized_items) 347 | return randomized_items 348 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.random import laplace 3 | import pandas as pd 4 | 5 | import networkx as nx 6 | 7 | import community 8 | import comm 9 | import time 10 | import random 11 | 12 | import itertools 13 | from heapq import * 14 | 15 | from heapq import nlargest 16 | 17 | 18 | 19 | def get_mat(data_path): 20 | # data_path = './data/' + dataset_name + '.txt' 21 | data = np.loadtxt(data_path) 22 | 23 | 24 | # initial statistics 25 | dat = (np.append(data[:,0],data[:,1])).astype(int) 26 | dat_c = np.bincount(dat) 27 | 28 | d = {} 29 | node = 0 30 | mid = [] 31 | for i in range(len(dat_c)): 32 | if dat_c[i] > 0: 33 | d[i] = node 34 | mid.append(i) 35 | node = node + 1 36 | mid = np.array(mid,dtype=np.int32) 37 | 38 | # initial statistics 39 | Edge_num = data.shape[0] 40 | c = len(d) 41 | 42 | 43 | # genarated adjancent matrix 44 | mat0 = np.zeros([c,c],dtype=np.uint8) 45 | for i in range(Edge_num): 46 | mat0[d[int(data[i,0])],d[int(data[i,1])]] = 1 47 | 48 | 49 | # transfer direct to undirect 50 | mat0 = mat0 + np.transpose(mat0) 51 | mat0 = np.triu(mat0,1) 52 | mat0 = mat0 + np.transpose(mat0) 53 | mat0[mat0>0] = 1 54 | return mat0,mid 55 | 56 | def community_init(mat0,mat0_graph,epsilon,nr,t=1.0): 57 | 58 | # t1 = time.time() 59 | # Divide the nodes randomly 60 | g1 = list(np.zeros(len(mat0))) 61 | ind = -1 62 | 63 | for i in range(len(mat0)): 64 | if i % nr == 0: 65 | ind = ind + 1 66 | g1[i] = ind 67 | 68 | random.shuffle(g1) 69 | 70 | mat0_par3 = {} 71 | for i in range(len(mat0)): 72 | mat0_par3[i] = g1[i] 73 | 74 | gr1 = max(mat0_par3.values()) + 1 75 | 76 | # mat0_mod3 = community.modularity(mat0_par3,mat0_graph) 77 | # print('mat0_mod2=%.3f,gr1=%d'%(mat0_mod3,gr1)) 78 | 79 | 80 | mat0_par3_pv = np.array(list(mat0_par3.values())) 81 | mat0_par3_pvs = [] 82 | for i in range(gr1): 83 | pv = np.where(mat0_par3_pv==i)[0] 84 | pvs = list(pv) 85 | mat0_par3_pvs.append(pvs) 86 | mat_one_level = np.zeros([gr1,gr1]) 87 | 88 | for i in range(gr1): 89 | pi = mat0_par3_pvs[i] 90 | mat_one_level[i,i] = np.sum(mat0[np.ix_(pi,pi)]) 91 | for j in range(i+1,gr1): 92 | pj = mat0_par3_pvs[j] 93 | mat_one_level[i,j] = np.sum(mat0[np.ix_(pi,pj)]) 94 | # print('generate new matrix time:%.2fs'%(time.time()-t1)) 95 | 96 | lap_noise = laplace(0,1/epsilon,gr1*gr1).astype(np.int32) 97 | lap_noise = lap_noise.reshape(gr1,gr1) 98 | 99 | ga = get_uptri_arr(mat_one_level,ind=1) 100 | ga_noise = ga + laplace(0,1/epsilon,len(ga)) 101 | ga_noise_pp = FO_pp(ga_noise) 102 | mat_one_level_noise = get_upmat(ga_noise_pp,gr1,ind=1) 103 | 104 | 105 | noise_diag = np.int32(mat_one_level.diagonal() + laplace(0,2/epsilon,len(mat_one_level))) 106 | 107 | # keep consistency 108 | noise_diag = FO_pp(noise_diag) 109 | 110 | mat_one_level_noise = np.triu(mat_one_level_noise,1) 111 | mat_one_level_noise = mat_one_level_noise + np.transpose(mat_one_level_noise) 112 | 113 | row,col = np.diag_indices_from(mat_one_level_noise) 114 | mat_one_level_noise[row,col] = noise_diag 115 | mat_one_level_noise[mat_one_level_noise<0] = 0 116 | 117 | mat_one_level_graph = nx.from_numpy_array(mat_one_level_noise,create_using=nx.Graph) 118 | 119 | # Apply the Louvain method 120 | mat_new_par = community.best_partition(mat_one_level_graph,resolution=t) 121 | gr2 = max(mat_new_par.values()) + 1 122 | mat_new_pv = np.array(list(mat_new_par.values())) 123 | mat_final_pvs = [] 124 | for i in range(gr2): 125 | pv = np.where(mat_new_pv==i)[0] 126 | mat_final_pv = [] 127 | for j in range(len(pv)): 128 | pvj = pv[j] 129 | mat_final_pv.extend(mat0_par3_pvs[pvj]) 130 | mat_final_pvs.append(mat_final_pv) 131 | 132 | label1 = np.zeros([len(mat0)],dtype=np.int32) 133 | for i in range(len(mat_final_pvs)): 134 | label1[mat_final_pvs[i]] = i 135 | 136 | return label1 137 | 138 | 139 | 140 | def get_uptri_arr(mat_init,ind=0): 141 | a = len(mat_init) 142 | res = [] 143 | for i in range(a): 144 | dat = mat_init[i][i+ind:] 145 | res.extend(dat) 146 | arr = np.array(res) 147 | return arr 148 | 149 | 150 | def get_upmat(arr,k,ind=0): 151 | mat = np.zeros([k,k],dtype=np.int32) 152 | left = 0 153 | for i in range(k): 154 | delta = k - i - ind 155 | mat[i,i+ind:] = arr[left:left+delta] 156 | left = left + delta 157 | 158 | return mat 159 | 160 | # Post processing 161 | def FO_pp(data_noise,type='norm_sub'): 162 | if type == 'norm_sub': 163 | data = norm_sub_deal(data_noise) 164 | 165 | if type == 'norm_mul': 166 | data = norm_mul_deal(data_noise) 167 | 168 | return data 169 | 170 | def norm_sub_deal(data): 171 | data = np.array(data,dtype=np.int32) 172 | data_min = np.min(data) 173 | data_sum = np.sum(data) 174 | delta_m = 0 - data_min 175 | 176 | if delta_m > 0: 177 | dm = 100000000 178 | data_seq = np.zeros([len(data)],dtype=np.int32) 179 | for i in range(0,delta_m): 180 | data_t = data - i 181 | data_t[data_t<0] = 0 182 | data_t_s = np.sum(data_t) 183 | dt = np.abs(data_t_s - data_sum) 184 | if dt < dm: 185 | dm = dt 186 | data_seq = data_t 187 | if dt == 0: 188 | break 189 | 190 | else: 191 | data_seq = data 192 | return data_seq 193 | 194 | 195 | 196 | 197 | # generate graph(intra edges) based on degree sequence 198 | def generate_intra_edge(dd1,div=1): 199 | dd1 = np.array(dd1,dtype=np.int32) 200 | dd1[dd1<0] = 0 201 | dd1_len = len(dd1) 202 | dd1_p = dd1.reshape(dd1_len,1) * dd1.reshape(1,dd1_len) 203 | s1 = np.sum(dd1) 204 | 205 | dd1_res = np.zeros([dd1_len,dd1_len],dtype=np.int8) 206 | if s1 > 0: 207 | batch_num = int(dd1_len / div) 208 | begin_id = 0 209 | for i in range(div): 210 | if i == div-1: 211 | batch_n = dd1_len - begin_id 212 | dd1_r = np.random.randint(0,high=s1,size=(batch_n,dd1_len)) 213 | res = dd1_p[begin_id:,:] - dd1_r 214 | res[res>0] = 1 215 | res[res<1] = 0 216 | dd1_res[begin_id:,:] = res 217 | else: 218 | dd1_r = np.random.randint(0,high=s1,size=(batch_num,dd1_len)) 219 | res = dd1_p[begin_id:begin_id+batch_num,:] - dd1_r 220 | res[res>0] = 1 221 | res[res<1] = 0 222 | dd1_res[begin_id:begin_id+batch_num,:] = res 223 | begin_id = begin_id + batch_num 224 | 225 | # make sure the final adjacency matrix is symmetric 226 | dd1_out = np.triu(dd1_res,1) 227 | dd1_out = dd1_out + np.transpose(dd1_out) 228 | return dd1_out 229 | 230 | # calculate the diameter 231 | def cal_diam(mat): 232 | mat_graph = nx.from_numpy_array(mat,create_using=nx.Graph) 233 | max_diam = 0 234 | for com in nx.connected_components(mat_graph): 235 | com_list = list(com) 236 | mat_sub = mat[np.ix_(com_list,com_list)] 237 | sub_g = nx.from_numpy_array(mat_sub,create_using=nx.Graph) 238 | diam = nx.diameter(sub_g) 239 | if diam > max_diam: 240 | max_diam = diam 241 | return max_diam 242 | 243 | # calculate the overlap 244 | def cal_overlap(la,lb,k): 245 | la = la[:k] 246 | lb = lb[:k] 247 | la_s = set(la) 248 | lb_s = set(lb) 249 | num = len(la_s & lb_s) 250 | rate = num / k 251 | return rate 252 | 253 | 254 | # calculate the KL divergence 255 | def cal_kl(A,B): 256 | p = A / sum(A) 257 | q = B / sum(B) 258 | if A.shape[0] > B.shape[0]: 259 | q = np.pad(q,(0,p.shape[0]-q.shape[0]),'constant',constant_values=(0,0)) 260 | elif A.shape[0] < B.shape[0]: 261 | p = np.pad(p,(0,q.shape[0]-p.shape[0]),'constant',constant_values=(0,0)) 262 | kl = p * np.log((p+np.finfo(np.float64).eps)/(q+np.finfo(np.float64).eps)) 263 | kl = np.sum(kl) 264 | return kl 265 | 266 | 267 | # calculate the RE 268 | def cal_rel(A,B): 269 | eps = 0.000000000000001 270 | A = np.float64(A) 271 | B = np.float64(B) 272 | #eps = np.float64(eps) 273 | res = abs((A-B)/(A+eps)) 274 | return res 275 | 276 | # calculate the MSE 277 | def cal_MSE(A,B): 278 | res = np.mean((A-B)**2) 279 | return res 280 | 281 | # calculate the MAE 282 | def cal_MAE(A,B,k=None): 283 | if k== None: 284 | res = np.mean(abs(A-B)) 285 | else: 286 | a = np.array(A[:k]) 287 | b = np.array(B[:k]) 288 | res = np.mean(abs(a-b)) 289 | return res 290 | 291 | 292 | def write_edge_txt(mat0,mid,file_name): 293 | a0 = np.where(mat0==1)[0] 294 | a1 = np.where(mat0==1)[1] 295 | with open(file_name,'w+') as f: 296 | for i in range(len(a0)): 297 | f.write('%d\t%d\n'%(mid[a0[i]],mid[a1[i]])) 298 | 299 | 300 | class PriorityQueue(object): 301 | def __init__(self): 302 | self.pq = [] # list of entries arranged in a heap 303 | self.entry_finder = {} # mapping of tasks to entries 304 | self.REMOVED = '' # placeholder for a removed task 305 | self.counter = itertools.count() # unique sequence count 306 | 307 | def add_task(self, task, priority=0): 308 | 'Add a new task or update the priority of an existing task' 309 | if task in self.entry_finder: 310 | self.remove_task(task) 311 | count = next(self.counter) 312 | entry = [priority, count, task] 313 | self.entry_finder[task] = entry 314 | heappush(self.pq, entry) 315 | 316 | def remove_task(self, task): 317 | 'Mark an existing task as REMOVED. Raise KeyError if not found.' 318 | entry = self.entry_finder.pop(task) 319 | entry[-1] = self.REMOVED 320 | 321 | def pop_item(self): 322 | 'Remove and return the lowest priority task. Raise KeyError if empty.' 323 | while self.pq: 324 | priority, count, task = heappop(self.pq) 325 | if task is not self.REMOVED: 326 | del self.entry_finder[task] 327 | return task, priority 328 | raise KeyError('pop from an empty priority queue') 329 | 330 | def __str__(self): 331 | return str([entry for entry in self.pq if entry[2] != self.REMOVED]) 332 | 333 | 334 | def degreeDiscountIC(G, k, p=0.01): 335 | 336 | S = [] 337 | dd = PriorityQueue() # degree discount 338 | t = dict() # number of adjacent vertices that are in S 339 | d = dict() # degree of each vertex 340 | 341 | # initialize degree discount 342 | for u in G.nodes(): 343 | d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1 344 | # d[u] = len(G[u]) # each neighbor adds degree 1 345 | dd.add_task(u, -d[u]) # add degree of each node 346 | t[u] = 0 347 | 348 | # add vertices to S greedily 349 | for i in range(k): 350 | u, priority = dd.pop_item() # extract node with maximal degree discount 351 | S.append(u) 352 | for v in G[u]: 353 | if v not in S: 354 | t[v] += G[u][v]['weight'] # increase number of selected neighbors 355 | priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree 356 | dd.add_task(v, -priority) 357 | return S 358 | 359 | def runIC (G, S, p = 0.01): 360 | 361 | from copy import deepcopy 362 | from random import random 363 | T = deepcopy(S) # copy already selected nodes 364 | 365 | i = 0 366 | while i < len(T): 367 | for v in G[T[i]]: # for neighbors of a selected node 368 | if v not in T: # if it wasn't selected yet 369 | w = G[T[i]][v]['weight'] # count the number of edges between two nodes 370 | if random() <= 1 - (1-p)**w: # if at least one of edges propagate influence 371 | # print (T[i], 'influences', v) 372 | T.append(v) 373 | i += 1 374 | return T 375 | 376 | def find_seed(graph_path,seed_size=20): 377 | 378 | # read in graph 379 | G = nx.Graph() 380 | with open(graph_path) as f: 381 | 382 | for line in f: 383 | u, v = map(int, line.split()) 384 | try: 385 | G[u][v]['weight'] += 1 386 | except: 387 | G.add_edge(u,v, weight=1) 388 | 389 | 390 | S = degreeDiscountIC(G, seed_size) 391 | return S 392 | 393 | 394 | 395 | def cal_spread(graph_path,S_all,p=0.01,seed_size=20,iterations=100): 396 | 397 | # read in graph 398 | G = nx.Graph() 399 | with open(graph_path) as f: 400 | 401 | for line in f: 402 | u, v = map(int, line.split()) 403 | # print('u:%s,v:%s'%(u,v)) 404 | try: 405 | G[u][v]['weight'] += 1 406 | except: 407 | G.add_edge(u,v, weight=1) 408 | 409 | 410 | #calculate initial set 411 | 412 | if seed_size <= len(S_all): 413 | S = S_all[:seed_size] 414 | else: 415 | print('seed_size is too large.') 416 | S = S_all 417 | 418 | 419 | avg = 0 420 | for i in range(iterations): 421 | T = runIC(G, S, p) 422 | avg += float(len(T))/iterations 423 | 424 | avg_final = int(round(avg)) 425 | 426 | return avg_final --------------------------------------------------------------------------------