├── comm
    ├── __init__.py
    ├── community_status.py
    └── community_main.py
├── IM_spread.py
├── README.md
├── main_vary_eps.py
├── main.py
├── main_vary_N.py
├── main_vary_t.py
└── utils.py


/comm/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | This package implements community detection.
 5 | """
 6 | 
 7 | from .community_main import (
 8 |     partition_at_level,
 9 |     modularity,
10 |     best_partition,
11 |     generate_dendrogram,
12 |     induced_graph,
13 |     load_binary,
14 | )
15 | 
16 | 


--------------------------------------------------------------------------------
/IM_spread.py:
--------------------------------------------------------------------------------
 1 | from utils import *
 2 | 
 3 | 
 4 | 
 5 | def IM_spread(dataset_name,file_name,seed_size):
 6 | 
 7 | 
 8 |     data_path = './data/%s.txt' %(dataset_name)  
 9 | 
10 |     # obtain the set of seed nodes of PrivGraph
11 |     S = find_seed(file_name,seed_size=seed_size)
12 | 
13 |     # calculate the influence spread
14 |     influence_spread = cal_spread(data_path,S_all=S,seed_size=seed_size)
15 | 
16 |     return influence_spread
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     epsilon = 1.5
21 | 
22 |     seed_size = 20
23 | 
24 |     # set the dataset
25 |     # dataset_name = 'Enron'
26 |     # dataset_name = 'CA-HepPh'
27 |     # dataset_name = 'Facebook'
28 |     dataset_name = 'Chamelon'
29 | 
30 |     root_path = './result/'
31 | 
32 |     # import the txt file
33 |     file_name = root_path + 'PrivGraph_%s_%.1f.txt'  %(dataset_name,epsilon)
34 | 
35 |     print('dataset:%s,epsilon:%.1f,seed_size:%d'%(dataset_name,epsilon,seed_size))
36 | 
37 |     influence_spread = IM_spread(dataset_name,file_name,seed_size)
38 | 
39 |     print('Influence Spread:',influence_spread)
40 | 
41 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PrivGraph
 2 | Implementation of PrivGraph
 3 | ## Requirements
 4 | 
 5 | 
 6 | ```
 7 | numpy >= 1.20.1
 8 | pandas >= 1.2.4
 9 | networkx >= 2.5
10 | scikit-learn >= 0.24.1
11 | python-louvain >= 0.15
12 | python >= 3.8
13 | ```
14 | 
15 | ## Contents
16 | 
17 | The project contains 3 folders and 6 files.
18 | 
19 | 1. data (folder): All datasets are in this folder.
20 | 2. comm (folder): This folder is used for community discovery.
21 | 3. result (folder): This folder is used to store the results and contains four examples of synthetic graphs.
22 | 4. main.py (file): The file is used to obtain the results of PrivGraph for End-to-End experiments.
23 | 5. main_vary_N.py (file): The file is used to obtain the results for different number of nodes.
24 | 6. main_vary_eps.py (file): The file is used to obtain the results for different privacy budget allocations.
25 | 7. main_vary_t.py (file): The file is used to obtain the results for different resolution parameters.
26 | 8. IM_spread.py (file): The file is used to obtain the results of influence maximization.
27 | 9. utils.py (file): The file includes some functions that are needed for other files.
28 | 
29 | ## Run
30 | 
31 | 
32 | ```
33 | ###### Example 1: End to End ######
34 | python main.py
35 | 
36 | ###### Example 2: Impact of the number of nodes ######
37 | python main_vary_N.py
38 | 
39 | ###### Example 3: Impact of the privacy budget allocation ######
40 | python main_vary_eps.py
41 | 
42 | ###### Example 4: Impact of the resolution parameter ######
43 | python main_vary_t.py
44 | 
45 | ###### Example 5: Influence Maximization ######
46 | python IM_spread.py
47 | ```
48 | 
49 | ## Citation
50 | 
51 | ```
52 |  @inproceedings{YZDCCS23,
53 |     author = {Quan Yuan and Zhikun Zhang and Linkang Du and Min Chen and Peng Cheng and Mingyang Sun},
54 |     title = {{PrivGraph: Differentially Private Graph Data Publication by Exploiting Community Information}},
55 |     booktitle = {{USENIX Security}},
56 |     publisher = {},
57 |     year = {2023},
58 | }
59 | ```


--------------------------------------------------------------------------------
/comm/community_status.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | 
 3 | 
 4 | class Status(object):
 5 | 
 6 |     node2com = {}
 7 |     total_weight = 0
 8 |     internals = {}
 9 |     degrees = {}
10 |     gdegrees = {}
11 | 
12 |     def __init__(self):
13 |         self.node2com = dict([])
14 |         self.total_weight = 0
15 |         self.degrees = dict([])
16 |         self.gdegrees = dict([])
17 |         self.internals = dict([])
18 |         self.loops = dict([])
19 |         self.remain_eps = 0
20 | 
21 |     def __str__(self):
22 |         return ("node2com : " + str(self.node2com) + " degrees : "
23 |                 + str(self.degrees) + " internals : " + str(self.internals)
24 |                 + " total_weight : " + str(self.total_weight))
25 | 
26 |     def copy(self):
27 |         """Perform a deep copy of status"""
28 |         new_status = Status()
29 |         new_status.node2com = self.node2com.copy()
30 |         new_status.internals = self.internals.copy()
31 |         new_status.degrees = self.degrees.copy()
32 |         new_status.gdegrees = self.gdegrees.copy()
33 |         new_status.total_weight = self.total_weight
34 | 
35 |     def init(self, graph, weight, part=None):
36 |         """Initialize the status of a graph with every node in one community"""
37 |         # count is used to represent the number of community 
38 |         count = 0
39 |         self.node2com = dict([])
40 |         self.total_weight = 0
41 |         # degrees is the degree sum in a community, tot
42 |         self.degrees = dict([])
43 |         # gdegrees is the degree of a node
44 |         self.gdegrees = dict([])
45 |         # internal is the degree of internal nodes
46 |         self.internals = dict([])
47 |         self.total_weight = graph.size(weight=weight)
48 |         # remain epsilon
49 |         self.remain_eps = 0
50 |         if part is None:
51 |             for node in graph.nodes():
52 |                 self.node2com[node] = count
53 |                 deg = float(graph.degree(node, weight=weight))
54 |                 if deg < 0:
55 |                     error = "Bad node degree ({})".format(deg)
56 |                     raise ValueError(error)
57 |                 self.degrees[count] = deg
58 |                 self.gdegrees[node] = deg
59 |                 # edge_data is used to check whether there exists self-loop
60 |                 edge_data = graph.get_edge_data(node, node, default={weight: 0})
61 |                 self.loops[node] = float(edge_data.get(weight, 1))
62 |                 self.internals[count] = self.loops[node]
63 |                 count += 1
64 |         else:
65 |             for node in graph.nodes():
66 |                 com = part[node]
67 |                 self.node2com[node] = com
68 |                 deg = float(graph.degree(node, weight=weight))
69 |                 self.degrees[com] = self.degrees.get(com, 0) + deg
70 |                 self.gdegrees[node] = deg
71 |                 inc = 0.
72 |                 for neighbor, datas in graph[node].items():
73 |                     edge_weight = datas.get(weight, 1)
74 |                     if edge_weight <= 0:
75 |                         error = "Bad graph type ({})".format(type(graph))
76 |                         raise ValueError(error)
77 |                     if part[neighbor] == com:
78 |                         if neighbor == node:
79 |                             inc += float(edge_weight)
80 |                         else:
81 |                             inc += float(edge_weight) / 2.
82 |                 self.internals[com] = self.internals.get(com, 0) + inc
83 | 


--------------------------------------------------------------------------------
/main_vary_eps.py:
--------------------------------------------------------------------------------
  1 | import community
  2 | import networkx as nx
  3 | import time
  4 | import numpy as np
  5 | 
  6 | from numpy.random import laplace
  7 | from sklearn import metrics
  8 | 
  9 | from utils import *
 10 | 
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | def main_vary_eps(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N=20,exp_num=10,save_csv=False):
 16 | 
 17 | 
 18 |     t_begin = time.time()
 19 | 
 20 |     data_path = './data/' + dataset_name + '.txt'
 21 |     mat0,mid = get_mat(data_path)
 22 |     
 23 | 
 24 |     cols = ['eps','exper','nmi','evc_overlap','evc_MAE','deg_kl', \
 25 |     'diam_rel','cc_rel','mod_rel']
 26 |     
 27 | 
 28 |     all_data = pd.DataFrame(None,columns=cols)
 29 | 
 30 |     # original graph
 31 |     mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph)
 32 | 
 33 |     mat0_edge = mat0_graph.number_of_edges()
 34 |     mat0_node = mat0_graph.number_of_nodes()
 35 |     print('Dataset:%s'%(dataset_name))
 36 |     print('Node number:%d'%(mat0_graph.number_of_nodes()))
 37 |     print('Edge number:%d'%(mat0_graph.number_of_edges()))
 38 | 
 39 | 
 40 |     mat0_par = community.best_partition(mat0_graph)
 41 | 
 42 |     mat0_degree = np.sum(mat0,0)
 43 |     mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution
 44 | 
 45 |     mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000)
 46 |     mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True))
 47 |     mat0_evc_ak = list(mat0_evc_a.keys())
 48 |     mat0_evc_val = np.array(list(mat0_evc_a.values()))
 49 |     evc_kn = np.int64(0.01*mat0_node)
 50 | 
 51 |     mat0_diam = cal_diam(mat0)
 52 | 
 53 |     mat0_cc = nx.transitivity(mat0_graph)
 54 | 
 55 |     mat0_mod = community.modularity(mat0_par,mat0_graph)
 56 | 
 57 | 
 58 |     all_deg_kl = []
 59 |     all_mod_rel = []
 60 |     all_nmi_arr = []
 61 |     all_evc_overlap = []
 62 |     all_evc_MAE = []
 63 |     all_cc_rel = []
 64 |     all_diam_rel = []
 65 | 
 66 | 
 67 | 
 68 |     ti = time.time()
 69 |     
 70 |     e1 = e1_r * epsilon
 71 | 
 72 |     e2 = e2_r * epsilon
 73 |     e3_r = 1 - e1_r - e2_r
 74 | 
 75 |     e3 = e3_r * epsilon
 76 | 
 77 |     ed = e3
 78 |     ev = e3
 79 |     
 80 |     ev_lambda = 1/ed
 81 |     dd_lam = 2/ev
 82 | 
 83 |     
 84 | 
 85 | 
 86 |     nmi_arr = np.zeros([exp_num])
 87 |     deg_kl_arr = np.zeros([exp_num])
 88 |     mod_rel_arr = np.zeros([exp_num])
 89 |     cc_rel_arr =  np.zeros([exp_num])
 90 |     diam_rel_arr = np.zeros([exp_num])
 91 |     evc_overlap_arr = np.zeros([exp_num])
 92 |     evc_MAE_arr = np.zeros([exp_num])
 93 | 
 94 |     for exper in range(exp_num):
 95 |         print('-----------epsilon=%.1f,e1_r=%.1f,e2_r=%.1f,exper=%d/%d-------------'%(epsilon,e1_r,e2_r,exper+1,exp_num))
 96 | 
 97 | 
 98 |         t1 = time.time()
 99 | 
100 |         # Community Initialization
101 |         mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=N)
102 | 
103 |         part1 = {}
104 |         for i in range(len(mat1_pvarr1)):
105 |             part1[i] = mat1_pvarr1[i]
106 | 
107 |         # Community Adjustment
108 |         mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2)
109 |         mat1_pvarr = np.array(list(mat1_par1.values()))
110 | 
111 |         # Information Extraction
112 |         mat1_pvs = []
113 |         for i in range(max(mat1_pvarr)+1):
114 |             pv1 = np.where(mat1_pvarr==i)[0]
115 |             pvs = list(pv1)
116 |             mat1_pvs.append(pvs)
117 | 
118 |         comm_n = max(mat1_pvarr) + 1
119 | 
120 |         ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64)
121 | 
122 |     
123 |         # edge vector
124 |         for i in range(comm_n):
125 |             pi = mat1_pvs[i]
126 |             ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)])
127 |             for j in range(i+1,comm_n):
128 |                 pj = mat1_pvs[j]
129 |                 ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)]))
130 |                 ev_mat[j,i] = ev_mat[i,j]
131 | 
132 |         ga = get_uptri_arr(ev_mat,ind=1)
133 |         ga_noise = ga + laplace(0,ev_lambda,len(ga))
134 |     
135 |         ga_noise_pp = FO_pp(ga_noise)
136 |         ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1)
137 | 
138 |         # degree sequence
139 |         dd_s = []
140 |         for i in range(comm_n):
141 |             dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])]
142 |             dd1 = np.sum(dd1,1) 
143 |     
144 |             dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int)
145 |             dd1 = FO_pp(dd1)
146 |             dd1[dd1<0] = 0
147 |             dd1[dd1>=len(dd1)] = len(dd1)-1
148 | 
149 |             dd1 = list(dd1)
150 |             dd_s.append(dd1)
151 | 
152 |         # Graph Reconstruction
153 |         mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8)
154 |         for i in range(comm_n):
155 |             # Intra-community
156 |             dd_ind = mat1_pvs[i]
157 |             dd1 = dd_s[i]
158 |             mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1)
159 |                 
160 |             # Inter-community
161 |             for j in range(i+1,comm_n):
162 |                 ev1 = ev_mat[i,j]
163 |                 pj = mat1_pvs[j]
164 |                 if ev1 > 0:
165 |                     c1 = np.random.choice(pi,ev1)
166 |                     c2 = np.random.choice(pj,ev1)
167 |                     for ind in range(ev1):
168 |                         mat2[c1[ind],c2[ind]] = 1
169 |                         mat2[c2[ind],c1[ind]] = 1
170 |                         
171 |         mat2 = mat2 + np.transpose(mat2)
172 |         mat2 = np.triu(mat2,1)
173 |         mat2 = mat2 + np.transpose(mat2)
174 |         mat2[mat2>0] = 1
175 | 
176 |         mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph)
177 | 
178 |         # save the graph
179 |         # file_name = './result/' +  'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper)
180 |         # write_edge_txt(mat2,mid,file_name)
181 | 
182 |         #evaluate
183 |         mat2_edge = mat2_graph.number_of_edges()
184 |         mat2_node = mat2_graph.number_of_nodes()
185 | 
186 |         mat2_par = community.best_partition(mat2_graph)
187 |         mat2_mod = community.modularity(mat2_par,mat2_graph)
188 | 
189 |         mat2_cc = nx.transitivity(mat2_graph)
190 | 
191 |         mat2_degree = np.sum(mat2,0)
192 |         mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution
193 |         
194 |         mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000)
195 |         mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True))
196 |         mat2_evc_ak = list(mat2_evc_a.keys())
197 |         mat2_evc_val = np.array(list(mat2_evc_a.values()))
198 |     
199 | 
200 |         mat2_diam = cal_diam(mat2)
201 | 
202 |         # calculate the metrics
203 |         # clustering coefficent
204 |         cc_rel = cal_rel(mat0_cc,mat2_cc)
205 | 
206 |         # degree distribution
207 |         deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist)
208 | 
209 |         # modularity
210 |         mod_rel = cal_rel(mat0_mod,mat2_mod)
211 |         
212 |     
213 |         # NMI
214 |         labels_true = list(mat0_par.values())
215 |         labels_pred = list(mat2_par.values())
216 |         nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred)
217 | 
218 | 
219 |         # Overlap of eigenvalue nodes 
220 |         evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node))
221 | 
222 |         # MAE of EVC
223 |         evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn)
224 | 
225 |         # diameter
226 |         diam_rel = cal_rel(mat0_diam,mat2_diam)
227 | 
228 | 
229 |         nmi_arr[exper] = nmi
230 |         cc_rel_arr[exper] = cc_rel
231 |         deg_kl_arr[exper] = deg_kl
232 |         mod_rel_arr[exper] = mod_rel
233 |         evc_overlap_arr[exper] = evc_overlap
234 |         evc_MAE_arr[exper] = evc_MAE
235 |         diam_rel_arr[exper] = diam_rel
236 | 
237 |         print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \
238 |             %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel))
239 | 
240 |     
241 | 
242 |         data_col = [epsilon,exper,nmi,evc_overlap,evc_MAE,deg_kl, \
243 |             diam_rel,cc_rel,mod_rel]
244 |         col_len = len(data_col)
245 |         data_col = np.array(data_col).reshape(1,col_len)
246 |         data1 = pd.DataFrame(data_col,columns=cols)
247 |         all_data = all_data.append(data1)        
248 | 
249 |                 
250 | 
251 |     all_nmi_arr.append(np.mean(nmi_arr))
252 |     all_cc_rel.append(np.mean(cc_rel_arr))
253 |     all_deg_kl.append(np.mean(deg_kl_arr))
254 |     all_mod_rel.append(np.mean(mod_rel_arr))
255 |     all_evc_overlap.append(np.mean(evc_overlap_arr))
256 |     all_evc_MAE.append(np.mean(evc_MAE_arr))
257 |     all_diam_rel.append(np.mean(diam_rel_arr))
258 | 
259 |         
260 |     # print('Done.%.2fs\n'%(time.time()-ti))
261 | 
262 |     res_path = './result'
263 |     save_name = res_path + '/' + '%s_%d_%.1f_%.2f_%.2f_%d.csv' %(dataset_name,N,e1_r,e2_r,exp_num)
264 |     if not os.path.exists(res_path):
265 |         os.mkdir(res_path)
266 |     
267 |     if save_csv == True:
268 |         all_data.to_csv(save_name,index=False,sep=',')
269 | 
270 |     print('-----------------------------')
271 | 
272 |     print('dataset:',dataset_name)
273 |     
274 |     print('epsilon=',epsilon)
275 |     print('all_nmi_arr=',all_nmi_arr)
276 |     print('all_evc_overlap=',all_evc_overlap)
277 |     print('all_evc_MAE=',all_evc_MAE)
278 |     print('all_deg_kl=',all_deg_kl)
279 |     print('all_diam_rel=',all_diam_rel)
280 |     print('all_cc_rel=',all_cc_rel)
281 |     print('all_mod_rel=',all_mod_rel)
282 | 
283 |     print('All time:%.2fs\n'%(time.time()-t_begin))
284 | 
285 | 
286 | 
287 | if __name__ == '__main__':
288 |     # set the dataset
289 |     # 'Facebook', 'CA-HepPh', 'Enron'
290 |     dataset_name = 'Chamelon'
291 | 
292 |     # set the privacy budget
293 |     epsilon = 2
294 | 
295 |     # set the number of experiments
296 |     exp_num = 10
297 | 
298 |     # set the number of nodes for community initialization
299 |     n1 = 20
300 | 
301 |     for e1_ind in range(1,9):
302 |         e1_r = e1_ind / 10
303 |         for e2_ind in range(1,9):
304 |             e2_r = e2_ind / 10
305 |             e3_r = 1 - e1_r - e2_r
306 |             if e3_r > 0:
307 |                 # run the function
308 |                 main_vary_eps(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N=n1,exp_num=exp_num)
309 |             
310 | 
311 | 
312 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import community
  2 | import networkx as nx
  3 | import time
  4 | import numpy as np
  5 | 
  6 | from numpy.random import laplace
  7 | from sklearn import metrics
  8 | 
  9 | from utils import *
 10 | 
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | def main_func(dataset_name='Chamelon',eps=[0.5,1,1.5,2,2.5,3,3.5],e1_r=1/3,e2_r=1/3,N=20,t=1.0,exp_num=10,save_csv=False):
 16 | 
 17 | 
 18 |     t_begin = time.time()
 19 | 
 20 |     data_path = './data/' + dataset_name + '.txt'
 21 |     mat0,mid = get_mat(data_path)
 22 |     
 23 | 
 24 |     cols = ['eps','exper','nmi','evc_overlap','evc_MAE','deg_kl', \
 25 |     'diam_rel','cc_rel','mod_rel']
 26 |     
 27 | 
 28 |     all_data = pd.DataFrame(None,columns=cols)
 29 | 
 30 |     # original graph
 31 |     mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph)
 32 | 
 33 |     mat0_edge = mat0_graph.number_of_edges()
 34 |     mat0_node = mat0_graph.number_of_nodes()
 35 |     print('Dataset:%s'%(dataset_name))
 36 |     print('Node number:%d'%(mat0_graph.number_of_nodes()))
 37 |     print('Edge number:%d'%(mat0_graph.number_of_edges()))
 38 | 
 39 | 
 40 |     mat0_par = community.best_partition(mat0_graph)
 41 | 
 42 |     mat0_degree = np.sum(mat0,0)
 43 |     mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution
 44 | 
 45 |     mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000)
 46 |     mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True))
 47 |     mat0_evc_ak = list(mat0_evc_a.keys())
 48 |     mat0_evc_val = np.array(list(mat0_evc_a.values()))
 49 |     evc_kn = np.int64(0.01*mat0_node)
 50 | 
 51 |     mat0_diam = cal_diam(mat0)
 52 | 
 53 |     mat0_cc = nx.transitivity(mat0_graph)
 54 | 
 55 |     mat0_mod = community.modularity(mat0_par,mat0_graph)
 56 | 
 57 | 
 58 |     all_deg_kl = []
 59 |     all_mod_rel = []
 60 |     all_nmi_arr = []
 61 |     all_evc_overlap = []
 62 |     all_evc_MAE = []
 63 |     all_cc_rel = []
 64 |     all_diam_rel = []
 65 | 
 66 | 
 67 |     for ei in range(len(eps)):
 68 |         epsilon = eps[ei]
 69 |         ti = time.time()
 70 |         
 71 |         e1 = e1_r * epsilon
 72 | 
 73 |         e2 = e2_r * epsilon
 74 |         e3_r = 1 - e1_r - e2_r
 75 | 
 76 |         e3 = e3_r * epsilon
 77 | 
 78 |         ed = e3
 79 |         ev = e3
 80 |         
 81 |         ev_lambda = 1/ed
 82 |         dd_lam = 2/ev
 83 | 
 84 |     
 85 |         nmi_arr = np.zeros([exp_num])
 86 |         deg_kl_arr = np.zeros([exp_num])
 87 |         mod_rel_arr = np.zeros([exp_num])
 88 |         cc_rel_arr =  np.zeros([exp_num])
 89 |         diam_rel_arr = np.zeros([exp_num])
 90 |         evc_overlap_arr = np.zeros([exp_num])
 91 |         evc_MAE_arr = np.zeros([exp_num])
 92 | 
 93 | 
 94 |         for exper in range(exp_num):
 95 |             print('-----------epsilon=%.1f,exper=%d/%d-------------'%(epsilon,exper+1,exp_num))
 96 | 
 97 | 
 98 |             t1 = time.time()
 99 | 
100 |             # Community Initialization
101 |             mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=N,t=t)
102 | 
103 |             part1 = {}
104 |             for i in range(len(mat1_pvarr1)):
105 |                 part1[i] = mat1_pvarr1[i]
106 | 
107 |             # Community Adjustment
108 |             mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2)
109 |             mat1_pvarr = np.array(list(mat1_par1.values()))
110 | 
111 |             # Information Extraction
112 |             mat1_pvs = []
113 |             for i in range(max(mat1_pvarr)+1):
114 |                 pv1 = np.where(mat1_pvarr==i)[0]
115 |                 pvs = list(pv1)
116 |                 mat1_pvs.append(pvs)
117 | 
118 |             comm_n = max(mat1_pvarr) + 1
119 | 
120 |             ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64)
121 | 
122 |         
123 |             # edge vector
124 |             for i in range(comm_n):
125 |                 pi = mat1_pvs[i]
126 |                 ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)])
127 |                 for j in range(i+1,comm_n):
128 |                     pj = mat1_pvs[j]
129 |                     ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)]))
130 |                     ev_mat[j,i] = ev_mat[i,j]
131 | 
132 |             ga = get_uptri_arr(ev_mat,ind=1)
133 |             ga_noise = ga + laplace(0,ev_lambda,len(ga))
134 |         
135 |             ga_noise_pp = FO_pp(ga_noise)
136 |             ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1)
137 | 
138 |             # degree sequence
139 |             dd_s = []
140 |             for i in range(comm_n):
141 |                 dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])]
142 |                 dd1 = np.sum(dd1,1) 
143 |         
144 |                 dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int)
145 |                 dd1 = FO_pp(dd1)
146 |                 dd1[dd1<0] = 0
147 |                 dd1[dd1>=len(dd1)] = len(dd1)-1
148 | 
149 |                 dd1 = list(dd1)
150 |                 dd_s.append(dd1)
151 | 
152 |             # Graph Reconstruction
153 |             mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8)
154 |             for i in range(comm_n):
155 |                 # Intra-community
156 |                 dd_ind = mat1_pvs[i]
157 |                 dd1 = dd_s[i]
158 |                 mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1)
159 |                     
160 |                 # Inter-community
161 |                 for j in range(i+1,comm_n):
162 |                     ev1 = ev_mat[i,j]
163 |                     pj = mat1_pvs[j]
164 |                     if ev1 > 0:
165 |                         c1 = np.random.choice(pi,ev1)
166 |                         c2 = np.random.choice(pj,ev1)
167 |                         for ind in range(ev1):
168 |                             mat2[c1[ind],c2[ind]] = 1
169 |                             mat2[c2[ind],c1[ind]] = 1
170 |                             
171 |             mat2 = mat2 + np.transpose(mat2)
172 |             mat2 = np.triu(mat2,1)
173 |             mat2 = mat2 + np.transpose(mat2)
174 |             mat2[mat2>0] = 1
175 | 
176 |             mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph)
177 | 
178 |             # save the graph
179 |             # file_name = './result/' +  'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper)
180 |             # write_edge_txt(mat2,mid,file_name)
181 | 
182 |             #evaluate
183 |             mat2_edge = mat2_graph.number_of_edges()
184 |             mat2_node = mat2_graph.number_of_nodes()
185 | 
186 |             mat2_par = community.best_partition(mat2_graph)
187 |             mat2_mod = community.modularity(mat2_par,mat2_graph)
188 | 
189 |             mat2_cc = nx.transitivity(mat2_graph)
190 | 
191 |             mat2_degree = np.sum(mat2,0)
192 |             mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution
193 |             
194 |             mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000)
195 |             mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True))
196 |             mat2_evc_ak = list(mat2_evc_a.keys())
197 |             mat2_evc_val = np.array(list(mat2_evc_a.values()))
198 |         
199 | 
200 |             mat2_diam = cal_diam(mat2)
201 | 
202 |             # calculate the metrics
203 |             # clustering coefficent
204 |             cc_rel = cal_rel(mat0_cc,mat2_cc)
205 | 
206 |             # degree distribution
207 |             deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist)
208 | 
209 |             # modularity
210 |             mod_rel = cal_rel(mat0_mod,mat2_mod)
211 |             
212 |         
213 |             # NMI
214 |             labels_true = list(mat0_par.values())
215 |             labels_pred = list(mat2_par.values())
216 |             nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred)
217 | 
218 | 
219 |             # Overlap of eigenvalue nodes 
220 |             evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node))
221 | 
222 |             # MAE of EVC
223 |             evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn)
224 | 
225 |             # diameter
226 |             diam_rel = cal_rel(mat0_diam,mat2_diam)
227 | 
228 | 
229 |             nmi_arr[exper] = nmi
230 |             cc_rel_arr[exper] = cc_rel
231 |             deg_kl_arr[exper] = deg_kl
232 |             mod_rel_arr[exper] = mod_rel
233 |             evc_overlap_arr[exper] = evc_overlap
234 |             evc_MAE_arr[exper] = evc_MAE
235 |             diam_rel_arr[exper] = diam_rel
236 | 
237 |             print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \
238 |                 %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel))
239 | 
240 |      
241 | 
242 |             data_col = [epsilon,exper,nmi,evc_overlap,evc_MAE,deg_kl, \
243 |                 diam_rel,cc_rel,mod_rel]
244 |             col_len = len(data_col)
245 |             data_col = np.array(data_col).reshape(1,col_len)
246 |             data1 = pd.DataFrame(data_col,columns=cols)
247 |             all_data = all_data.append(data1)         
248 | 
249 |                 
250 | 
251 |         all_nmi_arr.append(np.mean(nmi_arr))
252 |         all_cc_rel.append(np.mean(cc_rel_arr))
253 |         all_deg_kl.append(np.mean(deg_kl_arr))
254 |         all_mod_rel.append(np.mean(mod_rel_arr))
255 |         all_evc_overlap.append(np.mean(evc_overlap_arr))
256 |         all_evc_MAE.append(np.mean(evc_MAE_arr))
257 |         all_diam_rel.append(np.mean(diam_rel_arr))
258 | 
259 |         
260 |         print('all_index=%d/%d Done.%.2fs\n'%(ei+1,len(eps),time.time()-ti))
261 | 
262 |     res_path = './result'
263 |     save_name = res_path + '/' + '%s_%d_%.1f_%.2f_%.2f_%d.csv' %(dataset_name,N,t,e1_r,e2_r,exp_num)
264 |     if not os.path.exists(res_path):
265 |         os.mkdir(res_path)
266 |     
267 |     if save_csv == True:
268 |         all_data.to_csv(save_name,index=False,sep=',')
269 | 
270 |     print('-----------------------------')
271 | 
272 |     print('dataset:',dataset_name)
273 |     
274 |     print('eps=',eps)
275 |     print('all_nmi_arr=',all_nmi_arr)
276 |     print('all_evc_overlap=',all_evc_overlap)
277 |     print('all_evc_MAE=',all_evc_MAE)
278 |     print('all_deg_kl=',all_deg_kl)
279 |     print('all_diam_rel=',all_diam_rel)
280 |     print('all_cc_rel=',all_cc_rel)
281 |     print('all_mod_rel=',all_mod_rel)
282 | 
283 |     print('All time:%.2fs'%(time.time()-t_begin))
284 | 
285 | 
286 | 
287 | if __name__ == '__main__':
288 |     # set the dataset
289 |     # 'Facebook', 'CA-HepPh', 'Enron'
290 |     dataset_name = 'Chamelon'
291 | 
292 |     # set the privacy budget, list type
293 |     eps = [0.5,1,1.5,2,2.5,3,3.5]
294 | 
295 |     # set the ratio of the privacy budget
296 |     e1_r = 1/3
297 |     e2_r = 1/3
298 | 
299 |     # set the number of experiments
300 |     exp_num = 10
301 | 
302 |     # set the number of nodes for community initialization
303 |     n1 = 20
304 | 
305 |     # set the resolution parameter
306 |     t = 1.0
307 | 
308 |     # run the function
309 |     main_func(dataset_name=dataset_name,eps=eps,e1_r=e1_r,e2_r=e2_r,N=n1,t=t,exp_num=exp_num)
310 |    
311 | 
312 | 
313 | 


--------------------------------------------------------------------------------
/main_vary_N.py:
--------------------------------------------------------------------------------
  1 | import community
  2 | import networkx as nx
  3 | import time
  4 | import numpy as np
  5 | 
  6 | from numpy.random import laplace
  7 | from sklearn import metrics
  8 | 
  9 | from utils import *
 10 | 
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | def main_vary_N(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N_List=[10,20],exp_num=10,save_csv=False):
 16 | 
 17 | 
 18 |     t_begin = time.time()
 19 | 
 20 |     data_path = './data/' + dataset_name + '.txt'
 21 |     mat0,mid = get_mat(data_path)
 22 |     
 23 | 
 24 |     cols = ['eps','exper','N','nmi','evc_overlap','evc_MAE','deg_kl', \
 25 |     'diam_rel','cc_rel','mod_rel']
 26 |     
 27 | 
 28 |     all_data = pd.DataFrame(None,columns=cols)
 29 | 
 30 |     # original graph
 31 |     mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph)
 32 | 
 33 |     mat0_edge = mat0_graph.number_of_edges()
 34 |     mat0_node = mat0_graph.number_of_nodes()
 35 |     print('Dataset:%s'%(dataset_name))
 36 |     print('Node number:%d'%(mat0_graph.number_of_nodes()))
 37 |     print('Edge number:%d'%(mat0_graph.number_of_edges()))
 38 | 
 39 | 
 40 |     mat0_par = community.best_partition(mat0_graph)
 41 | 
 42 |     mat0_degree = np.sum(mat0,0)
 43 |     mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution
 44 | 
 45 |     mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000)
 46 |     mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True))
 47 |     mat0_evc_ak = list(mat0_evc_a.keys())
 48 |     mat0_evc_val = np.array(list(mat0_evc_a.values()))
 49 |     evc_kn = np.int64(0.01*mat0_node)
 50 | 
 51 |     mat0_diam = cal_diam(mat0)
 52 | 
 53 |     mat0_cc = nx.transitivity(mat0_graph)
 54 | 
 55 |     mat0_mod = community.modularity(mat0_par,mat0_graph)
 56 | 
 57 | 
 58 |     all_deg_kl = []
 59 |     all_mod_rel = []
 60 |     all_nmi_arr = []
 61 |     all_evc_overlap = []
 62 |     all_evc_MAE = []
 63 |     all_cc_rel = []
 64 |     all_diam_rel = []
 65 | 
 66 | 
 67 | 
 68 |     for ni in range(len(N_List)):
 69 |         
 70 |         ti = time.time()
 71 |         n1 = N_List[ni]
 72 |         
 73 |         e1 = e1_r * epsilon
 74 | 
 75 |         e2 = e2_r * epsilon
 76 |         e3_r = 1 - e1_r - e2_r
 77 | 
 78 |         e3 = e3_r * epsilon
 79 | 
 80 |         ed = e3
 81 |         ev = e3
 82 |         
 83 |         ev_lambda = 1/ed
 84 |         dd_lam = 2/ev
 85 | 
 86 |         
 87 | 
 88 |     
 89 |         nmi_arr = np.zeros([exp_num])
 90 |         deg_kl_arr = np.zeros([exp_num])
 91 |         mod_rel_arr = np.zeros([exp_num])
 92 |         cc_rel_arr =  np.zeros([exp_num])
 93 |         diam_rel_arr = np.zeros([exp_num])
 94 |         evc_overlap_arr = np.zeros([exp_num])
 95 |         evc_MAE_arr = np.zeros([exp_num])
 96 | 
 97 | 
 98 |         for exper in range(exp_num):
 99 |             print('-----------N=%d,exper=%d/%d-------------'%(n1,exper+1,exp_num))
100 | 
101 | 
102 |             t1 = time.time()
103 | 
104 |             # Community Initialization
105 |             mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=n1)
106 | 
107 |             part1 = {}
108 |             for i in range(len(mat1_pvarr1)):
109 |                 part1[i] = mat1_pvarr1[i]
110 | 
111 |             # Community Adjustment
112 |             mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2)
113 |             mat1_pvarr = np.array(list(mat1_par1.values()))
114 | 
115 |             # Information Extraction
116 |             mat1_pvs = []
117 |             for i in range(max(mat1_pvarr)+1):
118 |                 pv1 = np.where(mat1_pvarr==i)[0]
119 |                 pvs = list(pv1)
120 |                 mat1_pvs.append(pvs)
121 | 
122 |             comm_n = max(mat1_pvarr) + 1
123 | 
124 |             ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64)
125 | 
126 |         
127 |             # edge vector
128 |             for i in range(comm_n):
129 |                 pi = mat1_pvs[i]
130 |                 ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)])
131 |                 for j in range(i+1,comm_n):
132 |                     pj = mat1_pvs[j]
133 |                     ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)]))
134 |                     ev_mat[j,i] = ev_mat[i,j]
135 | 
136 |             ga = get_uptri_arr(ev_mat,ind=1)
137 |             ga_noise = ga + laplace(0,ev_lambda,len(ga))
138 |         
139 |             ga_noise_pp = FO_pp(ga_noise)
140 |             ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1)
141 | 
142 |             # degree sequence
143 |             dd_s = []
144 |             for i in range(comm_n):
145 |                 dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])]
146 |                 dd1 = np.sum(dd1,1) 
147 |         
148 |                 dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int)
149 |                 dd1 = FO_pp(dd1)
150 |                 dd1[dd1<0] = 0
151 |                 dd1[dd1>=len(dd1)] = len(dd1)-1
152 | 
153 |                 dd1 = list(dd1)
154 |                 dd_s.append(dd1)
155 | 
156 |             # Graph Reconstruction
157 |             mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8)
158 |             for i in range(comm_n):
159 |                 # Intra-community
160 |                 dd_ind = mat1_pvs[i]
161 |                 dd1 = dd_s[i]
162 |                 mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1)
163 |                     
164 |                 # Inter-community
165 |                 for j in range(i+1,comm_n):
166 |                     ev1 = ev_mat[i,j]
167 |                     pj = mat1_pvs[j]
168 |                     if ev1 > 0:
169 |                         c1 = np.random.choice(pi,ev1)
170 |                         c2 = np.random.choice(pj,ev1)
171 |                         for ind in range(ev1):
172 |                             mat2[c1[ind],c2[ind]] = 1
173 |                             mat2[c2[ind],c1[ind]] = 1
174 |                             
175 |             mat2 = mat2 + np.transpose(mat2)
176 |             mat2 = np.triu(mat2,1)
177 |             mat2 = mat2 + np.transpose(mat2)
178 |             mat2[mat2>0] = 1
179 | 
180 |             mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph)
181 | 
182 |             # save the graph
183 |             # file_name = './result/' +  'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper)
184 |             # write_edge_txt(mat2,mid,file_name)
185 | 
186 |             #evaluate
187 |             mat2_edge = mat2_graph.number_of_edges()
188 |             mat2_node = mat2_graph.number_of_nodes()
189 | 
190 |             mat2_par = community.best_partition(mat2_graph)
191 |             mat2_mod = community.modularity(mat2_par,mat2_graph)
192 | 
193 |             mat2_cc = nx.transitivity(mat2_graph)
194 | 
195 |         
196 |             mat2_degree = np.sum(mat2,0)
197 |             mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution
198 |             
199 |             mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000)
200 |             mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True))
201 |             mat2_evc_ak = list(mat2_evc_a.keys())
202 |             mat2_evc_val = np.array(list(mat2_evc_a.values()))
203 |         
204 | 
205 |             mat2_diam = cal_diam(mat2)
206 | 
207 |             # calculate the metrics
208 |             # clustering coefficent
209 |             cc_rel = cal_rel(mat0_cc,mat2_cc)
210 | 
211 |             # degree distribution
212 |             deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist)
213 | 
214 |             # modularity
215 |             mod_rel = cal_rel(mat0_mod,mat2_mod)
216 |             
217 |         
218 |             # NMI
219 |             labels_true = list(mat0_par.values())
220 |             labels_pred = list(mat2_par.values())
221 |             nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred)
222 | 
223 | 
224 |             # Overlap of eigenvalue nodes 
225 |             evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node))
226 | 
227 |             # MAE of EVC
228 |             evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn)
229 | 
230 |             # diameter
231 |             diam_rel = cal_rel(mat0_diam,mat2_diam)
232 | 
233 | 
234 |             nmi_arr[exper] = nmi
235 |             cc_rel_arr[exper] = cc_rel
236 |             deg_kl_arr[exper] = deg_kl
237 |             mod_rel_arr[exper] = mod_rel
238 |             evc_overlap_arr[exper] = evc_overlap
239 |             evc_MAE_arr[exper] = evc_MAE
240 |             diam_rel_arr[exper] = diam_rel
241 | 
242 |             print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \
243 |                 %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel))
244 | 
245 |      
246 | 
247 |             data_col = [epsilon,exper,n1,nmi,evc_overlap,evc_MAE,deg_kl, \
248 |                 diam_rel,cc_rel,mod_rel]
249 |             col_len = len(data_col)
250 |             data_col = np.array(data_col).reshape(1,col_len)
251 |             data1 = pd.DataFrame(data_col,columns=cols)
252 |             all_data = all_data.append(data1)   
253 |                 
254 | 
255 |         all_nmi_arr.append(np.mean(nmi_arr))
256 |         all_cc_rel.append(np.mean(cc_rel_arr))
257 |         all_deg_kl.append(np.mean(deg_kl_arr))
258 |         all_mod_rel.append(np.mean(mod_rel_arr))
259 |         all_evc_overlap.append(np.mean(evc_overlap_arr))
260 |         all_evc_MAE.append(np.mean(evc_MAE_arr))
261 |         all_diam_rel.append(np.mean(diam_rel_arr))
262 | 
263 |         
264 |         print('all_index=%d/%d Done.%.2fs\n'%(ni+1,len(N_List),time.time()-ti))
265 | 
266 |     res_path = './result'
267 |     save_name = res_path + '/' + '%s_%.2f_%.2f_%.2f_%d.csv' %(dataset_name,epsilon,e1_r,e2_r,exp_num)
268 |     if not os.path.exists(res_path):
269 |         os.mkdir(res_path)
270 |     
271 |     if save_csv == True:
272 |         all_data.to_csv(save_name,index=False,sep=',')
273 | 
274 |     print('-----------------------------')
275 | 
276 |     print('dataset:',dataset_name)
277 |     
278 |     print('epsilon=',epsilon)
279 |     print('all_N=',N_List)
280 |     print('all_nmi_arr=',all_nmi_arr)
281 |     print('all_evc_overlap=',all_evc_overlap)
282 |     print('all_evc_MAE=',all_evc_MAE)
283 |     print('all_deg_kl=',all_deg_kl)
284 |     print('all_diam_rel=',all_diam_rel)
285 |     print('all_cc_rel=',all_cc_rel)
286 |     print('all_mod_rel=',all_mod_rel)
287 |     print('All time:%.2fs'%(time.time()-t_begin))
288 | 
289 | 
290 | 
291 | if __name__ == '__main__':
292 |     # set the dataset
293 |     # 'Facebook', 'CA-HepPh', 'Enron'
294 |     dataset_name = 'Chamelon'
295 | 
296 |     # set the privacy budget
297 |     epsilon = 2
298 | 
299 |     # set the ratio of the privacy budget
300 |     e1_r = 1/3
301 |     e2_r = 1/3
302 | 
303 |     # set the number of experiments
304 |     exp_num = 10
305 | 
306 |     # set the number of nodes for community initialization, list type
307 |     N_List = [5,10,15,20,25,30,35]
308 | 
309 |     # run the function
310 |     main_vary_N(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N_List=N_List,exp_num=exp_num)
311 |    
312 | 
313 | 
314 | 


--------------------------------------------------------------------------------
/main_vary_t.py:
--------------------------------------------------------------------------------
  1 | import community
  2 | import networkx as nx
  3 | import time
  4 | import numpy as np
  5 | 
  6 | from numpy.random import laplace
  7 | from sklearn import metrics
  8 | 
  9 | from utils import *
 10 | 
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | def main_vary_t(dataset_name='Chamelon',epsilon=2,e1_r=1/3,e2_r=1/3,N=20,t_List=[0.2,0.5,0.8,1.0,1.2,1.5],exp_num=10,save_csv=False):
 16 | 
 17 | 
 18 |     t_begin = time.time()
 19 | 
 20 |     data_path = './data/' + dataset_name + '.txt'
 21 |     mat0,mid = get_mat(data_path)
 22 |     
 23 | 
 24 |     cols = ['eps','exper','t','nmi','evc_overlap','evc_MAE','deg_kl', \
 25 |     'diam_rel','cc_rel','mod_rel']
 26 |     
 27 | 
 28 |     all_data = pd.DataFrame(None,columns=cols)
 29 | 
 30 |     # original graph
 31 |     mat0_graph = nx.from_numpy_array(mat0,create_using=nx.Graph)
 32 | 
 33 |     mat0_edge = mat0_graph.number_of_edges()
 34 |     mat0_node = mat0_graph.number_of_nodes()
 35 |     print('Dataset:%s'%(dataset_name))
 36 |     print('Node number:%d'%(mat0_graph.number_of_nodes()))
 37 |     print('Edge number:%d'%(mat0_graph.number_of_edges()))
 38 | 
 39 | 
 40 |     mat0_par = community.best_partition(mat0_graph)
 41 | 
 42 |     mat0_degree = np.sum(mat0,0)
 43 |     mat0_deg_dist = np.bincount(np.int64(mat0_degree)) # degree distribution
 44 | 
 45 |     mat0_evc = nx.eigenvector_centrality(mat0_graph,max_iter=10000)
 46 |     mat0_evc_a = dict(sorted(mat0_evc.items(),key = lambda x:x[1],reverse=True))
 47 |     mat0_evc_ak = list(mat0_evc_a.keys())
 48 |     mat0_evc_val = np.array(list(mat0_evc_a.values()))
 49 |     evc_kn = np.int64(0.01*mat0_node)
 50 | 
 51 |     mat0_diam = cal_diam(mat0)
 52 | 
 53 |     mat0_cc = nx.transitivity(mat0_graph)
 54 | 
 55 |     mat0_mod = community.modularity(mat0_par,mat0_graph)
 56 | 
 57 | 
 58 |     all_deg_kl = []
 59 |     all_mod_rel = []
 60 |     all_nmi_arr = []
 61 |     all_evc_overlap = []
 62 |     all_evc_MAE = []
 63 |     all_cc_rel = []
 64 |     all_diam_rel = []
 65 | 
 66 | 
 67 | 
 68 |     for ti in range(len(t_List)):
 69 |         
 70 |         ti = time.time()
 71 |         t = t_List[ti]
 72 | 
 73 |         n1 = N
 74 |         
 75 |         e1 = e1_r * epsilon
 76 | 
 77 |         e2 = e2_r * epsilon
 78 |         e3_r = 1 - e1_r - e2_r
 79 | 
 80 |         e3 = e3_r * epsilon
 81 | 
 82 |         ed = e3
 83 |         ev = e3
 84 |         
 85 |         ev_lambda = 1/ed
 86 |         dd_lam = 2/ev
 87 | 
 88 |         
 89 | 
 90 |     
 91 |         nmi_arr = np.zeros([exp_num])
 92 |         deg_kl_arr = np.zeros([exp_num])
 93 |         mod_rel_arr = np.zeros([exp_num])
 94 |         cc_rel_arr =  np.zeros([exp_num])
 95 |         diam_rel_arr = np.zeros([exp_num])
 96 |         evc_overlap_arr = np.zeros([exp_num])
 97 |         evc_MAE_arr = np.zeros([exp_num])
 98 | 
 99 | 
100 |         for exper in range(exp_num):
101 |             print('-----------t=%d,exper=%d/%d-------------'%(t,exper+1,exp_num))
102 | 
103 | 
104 |             t1 = time.time()
105 | 
106 |             # Community Initialization
107 |             mat1_pvarr1 = community_init(mat0,mat0_graph,epsilon=e1,nr=n1,t=t)
108 | 
109 |             part1 = {}
110 |             for i in range(len(mat1_pvarr1)):
111 |                 part1[i] = mat1_pvarr1[i]
112 | 
113 |             # Community Adjustment
114 |             mat1_par1 = comm.best_partition(mat0_graph,part1,epsilon_EM=e2)
115 |             mat1_pvarr = np.array(list(mat1_par1.values()))
116 | 
117 |             # Information Extraction
118 |             mat1_pvs = []
119 |             for i in range(max(mat1_pvarr)+1):
120 |                 pv1 = np.where(mat1_pvarr==i)[0]
121 |                 pvs = list(pv1)
122 |                 mat1_pvs.append(pvs)
123 | 
124 |             comm_n = max(mat1_pvarr) + 1
125 | 
126 |             ev_mat = np.zeros([comm_n,comm_n],dtype=np.int64)
127 | 
128 |         
129 |             # edge vector
130 |             for i in range(comm_n):
131 |                 pi = mat1_pvs[i]
132 |                 ev_mat[i,i] = np.sum(mat0[np.ix_(pi,pi)])
133 |                 for j in range(i+1,comm_n):
134 |                     pj = mat1_pvs[j]
135 |                     ev_mat[i,j] = int(np.sum(mat0[np.ix_(pi,pj)]))
136 |                     ev_mat[j,i] = ev_mat[i,j]
137 | 
138 |             ga = get_uptri_arr(ev_mat,ind=1)
139 |             ga_noise = ga + laplace(0,ev_lambda,len(ga))
140 |         
141 |             ga_noise_pp = FO_pp(ga_noise)
142 |             ev_mat = get_upmat(ga_noise_pp,comm_n,ind=1)
143 | 
144 |             # degree sequence
145 |             dd_s = []
146 |             for i in range(comm_n):
147 |                 dd1 = mat0[np.ix_(mat1_pvs[i],mat1_pvs[i])]
148 |                 dd1 = np.sum(dd1,1) 
149 |         
150 |                 dd1 = (dd1 + laplace(0,dd_lam,len(dd1))).astype(int)
151 |                 dd1 = FO_pp(dd1)
152 |                 dd1[dd1<0] = 0
153 |                 dd1[dd1>=len(dd1)] = len(dd1)-1
154 | 
155 |                 dd1 = list(dd1)
156 |                 dd_s.append(dd1)
157 | 
158 |             # Graph Reconstruction
159 |             mat2 = np.zeros([mat0_node,mat0_node],dtype=np.int8)
160 |             for i in range(comm_n):
161 |                 # Intra-community
162 |                 dd_ind = mat1_pvs[i]
163 |                 dd1 = dd_s[i]
164 |                 mat2[np.ix_(dd_ind,dd_ind)] = generate_intra_edge(dd1)
165 |                     
166 |                 # Inter-community
167 |                 for j in range(i+1,comm_n):
168 |                     ev1 = ev_mat[i,j]
169 |                     pj = mat1_pvs[j]
170 |                     if ev1 > 0:
171 |                         c1 = np.random.choice(pi,ev1)
172 |                         c2 = np.random.choice(pj,ev1)
173 |                         for ind in range(ev1):
174 |                             mat2[c1[ind],c2[ind]] = 1
175 |                             mat2[c2[ind],c1[ind]] = 1
176 |                             
177 |             mat2 = mat2 + np.transpose(mat2)
178 |             mat2 = np.triu(mat2,1)
179 |             mat2 = mat2 + np.transpose(mat2)
180 |             mat2[mat2>0] = 1
181 | 
182 |             mat2_graph = nx.from_numpy_array(mat2,create_using=nx.Graph)
183 | 
184 |             # save the graph
185 |             # file_name = './result/' +  'PrivGraph_%s_%.1f_%d.txt' %(dataset_name,epsilon,exper)
186 |             # write_edge_txt(mat2,mid,file_name)
187 | 
188 |             #evaluate
189 |             mat2_edge = mat2_graph.number_of_edges()
190 |             mat2_node = mat2_graph.number_of_nodes()
191 | 
192 |             mat2_par = community.best_partition(mat2_graph)
193 |             mat2_mod = community.modularity(mat2_par,mat2_graph)
194 | 
195 |             mat2_cc = nx.transitivity(mat2_graph)
196 | 
197 |         
198 |             mat2_degree = np.sum(mat2,0)
199 |             mat2_deg_dist = np.bincount(np.int64(mat2_degree)) # degree distribution
200 |             
201 |             mat2_evc = nx.eigenvector_centrality(mat2_graph,max_iter=10000)
202 |             mat2_evc_a = dict(sorted(mat2_evc.items(),key = lambda x:x[1],reverse=True))
203 |             mat2_evc_ak = list(mat2_evc_a.keys())
204 |             mat2_evc_val = np.array(list(mat2_evc_a.values()))
205 |         
206 | 
207 |             mat2_diam = cal_diam(mat2)
208 | 
209 |             # calculate the metrics
210 |             # clustering coefficent
211 |             cc_rel = cal_rel(mat0_cc,mat2_cc)
212 | 
213 |             # degree distribution
214 |             deg_kl = cal_kl(mat0_deg_dist,mat2_deg_dist)
215 | 
216 |             # modularity
217 |             mod_rel = cal_rel(mat0_mod,mat2_mod)
218 |             
219 |         
220 |             # NMI
221 |             labels_true = list(mat0_par.values())
222 |             labels_pred = list(mat2_par.values())
223 |             nmi = metrics.normalized_mutual_info_score(labels_true,labels_pred)
224 | 
225 | 
226 |             # Overlap of eigenvalue nodes 
227 |             evc_overlap = cal_overlap(mat0_evc_ak,mat2_evc_ak,np.int64(0.01*mat0_node))
228 | 
229 |             # MAE of EVC
230 |             evc_MAE = cal_MAE(mat0_evc_val,mat2_evc_val,k=evc_kn)
231 | 
232 |             # diameter
233 |             diam_rel = cal_rel(mat0_diam,mat2_diam)
234 | 
235 | 
236 |             nmi_arr[exper] = nmi
237 |             cc_rel_arr[exper] = cc_rel
238 |             deg_kl_arr[exper] = deg_kl
239 |             mod_rel_arr[exper] = mod_rel
240 |             evc_overlap_arr[exper] = evc_overlap
241 |             evc_MAE_arr[exper] = evc_MAE
242 |             diam_rel_arr[exper] = diam_rel
243 | 
244 |             print('Nodes=%d,Edges=%d,nmi=%.4f,cc_rel=%.4f,deg_kl=%.4f,mod_rel=%.4f,evc_overlap=%.4f,evc_MAE=%.4f,diam_rel=%.4f' \
245 |                 %(mat2_node,mat2_edge,nmi,cc_rel,deg_kl,mod_rel,evc_overlap,evc_MAE,diam_rel))
246 | 
247 |      
248 | 
249 |             data_col = [epsilon,exper,t,nmi,evc_overlap,evc_MAE,deg_kl, \
250 |                 diam_rel,cc_rel,mod_rel]
251 |             col_len = len(data_col)
252 |             data_col = np.array(data_col).reshape(1,col_len)
253 |             data1 = pd.DataFrame(data_col,columns=cols)
254 |             all_data = all_data.append(data1)   
255 |                 
256 | 
257 |         all_nmi_arr.append(np.mean(nmi_arr))
258 |         all_cc_rel.append(np.mean(cc_rel_arr))
259 |         all_deg_kl.append(np.mean(deg_kl_arr))
260 |         all_mod_rel.append(np.mean(mod_rel_arr))
261 |         all_evc_overlap.append(np.mean(evc_overlap_arr))
262 |         all_evc_MAE.append(np.mean(evc_MAE_arr))
263 |         all_diam_rel.append(np.mean(diam_rel_arr))
264 | 
265 |         
266 |         print('all_index=%d/%d Done.%.2fs\n'%(ti+1,len(t_List),time.time()-ti))
267 | 
268 |     res_path = './result'
269 |     save_name = res_path + '/' + '%s_%.2f_%d_%.2f_%.2f_%d.csv' %(dataset_name,epsilon,N,e1_r,e2_r,exp_num)
270 |     if not os.path.exists(res_path):
271 |         os.mkdir(res_path)
272 |     
273 |     if save_csv == True:
274 |         all_data.to_csv(save_name,index=False,sep=',')
275 | 
276 |     print('-----------------------------')
277 | 
278 |     print('dataset:',dataset_name)
279 |     
280 |     print('epsilon=',epsilon)
281 |     print('all_t=',t_List)
282 |     print('all_nmi_arr=',all_nmi_arr)
283 |     print('all_evc_overlap=',all_evc_overlap)
284 |     print('all_evc_MAE=',all_evc_MAE)
285 |     print('all_deg_kl=',all_deg_kl)
286 |     print('all_diam_rel=',all_diam_rel)
287 |     print('all_cc_rel=',all_cc_rel)
288 |     print('all_mod_rel=',all_mod_rel)
289 |     print('All time:%.2fs'%(time.time()-t_begin))
290 | 
291 | 
292 | 
293 | if __name__ == '__main__':
294 |     # set the dataset
295 |     # 'Facebook', 'CA-HepPh', 'Enron'
296 |     dataset_name = 'Chamelon'
297 | 
298 |     # set the privacy budget
299 |     epsilon = 2
300 | 
301 |     # set the ratio of the privacy budget
302 |     e1_r = 1/3
303 |     e2_r = 1/3
304 | 
305 |     # set the number of experiments
306 |     exp_num = 10
307 | 
308 |     # set the number of nodes for community initialization
309 |     N = 20
310 | 
311 |     # set the resolution parameter, list type
312 |     t_List = [0.2,0.5,0.8,1.0,1.2,1.5]
313 | 
314 |     # run the function
315 |     main_vary_t(dataset_name=dataset_name,epsilon=epsilon,e1_r=e1_r,e2_r=e2_r,N=N,t_List=t_List,exp_num=exp_num)
316 |    
317 | 
318 | 
319 | 


--------------------------------------------------------------------------------
/comm/community_main.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This module implements community detection.
  4 | """
  5 | from __future__ import print_function
  6 | 
  7 | import array
  8 | import math
  9 | 
 10 | import numbers
 11 | from random import random
 12 | import warnings
 13 | import random
 14 | import networkx as nx
 15 | import numpy as np
 16 | from numpy.random import laplace
 17 | import time
 18 | 
 19 | from .community_status import Status
 20 | 
 21 | 
 22 | 
 23 | #__PASS_MAX = -1
 24 | __PASS_MAX = 10000
 25 | __MIN = 0.0000001
 26 | 
 27 | 
 28 | def check_random_state(seed):
 29 |     
 30 |     if seed is None or seed is np.random:
 31 |         return np.random.mtrand._rand
 32 |     if isinstance(seed, (numbers.Integral, np.integer)):
 33 |         return np.random.RandomState(seed)
 34 |     if isinstance(seed, np.random.RandomState):
 35 |         return seed
 36 |     raise ValueError("%r cannot be used to seed a numpy.random.RandomState"
 37 |                      " instance" % seed)
 38 | 
 39 | 
 40 | def partition_at_level(dendrogram, level):
 41 |     
 42 |     partition = dendrogram[0].copy()
 43 |     for index in range(1, level + 1):
 44 |         for node, community in partition.items():
 45 |             partition[node] = dendrogram[index][community]
 46 |     return partition
 47 | 
 48 | 
 49 | def modularity(partition, graph, weight='weight'):
 50 |    
 51 |     if graph.is_directed():
 52 |         raise TypeError("Bad graph type, use only non directed graph")
 53 | 
 54 |     inc = dict([])
 55 |     deg = dict([])
 56 |     # links stands for the number of edge
 57 |     links = graph.size(weight=weight)
 58 |     if links == 0:
 59 |         raise ValueError("A graph without link has an undefined modularity")
 60 | 
 61 |     for node in graph:
 62 |         # com stands for the node's corresponding community
 63 |         com = partition[node]
 64 |         # deg[com] is used to storage the degree of relative community
 65 |         deg[com] = deg.get(com, 0.) + graph.degree(node, weight=weight)
 66 |         for neighbor, datas in graph[node].items():
 67 |             # if the result of get() is None, return 1
 68 |             edge_weight = datas.get(weight, 1)
 69 |             if partition[neighbor] == com:
 70 |                 if neighbor == node:
 71 |                     inc[com] = inc.get(com, 0.) + float(edge_weight)
 72 |                 else:
 73 |                     inc[com] = inc.get(com, 0.) + float(edge_weight) / 2.
 74 | 
 75 |     res = 0.
 76 |     for com in set(partition.values()):
 77 |         # calculate the modularity based on the formula: Q = deg_in/m - (deg_com/(2m))^2
 78 |         res += (inc.get(com, 0.) / links) - \
 79 |                (deg.get(com, 0.) / (2. * links)) ** 2
 80 |     return res
 81 | 
 82 | 
 83 | def best_partition(graph,
 84 |                    partition=None,
 85 |                    weight='weight',
 86 |                    resolution=1.,
 87 |                    randomize=None,
 88 |                    random_state=None,
 89 |                    epsilon_EM=None,
 90 |                    divide=1):
 91 | 
 92 |     dendo = generate_dendrogram(graph,
 93 |                                 partition,
 94 |                                 weight,
 95 |                                 resolution,
 96 |                                 randomize,
 97 |                                 random_state,
 98 |                                 epsilon_EM,
 99 |                                 divide)
100 |     return partition_at_level(dendo, len(dendo) - 1)
101 | 
102 | def generate_dendrogram(graph,
103 |                         part_init=None,
104 |                         weight='weight',
105 |                         resolution=1.,
106 |                         randomize=None,
107 |                         random_state=None,
108 |                         epsilon_EM=None,
109 |                         divide=1):
110 | 
111 |     if graph.is_directed():
112 |         raise TypeError("Bad graph type, use only non directed graph")
113 | 
114 |     # Properly handle random state, eventually remove old `randomize` parameter
115 |     # NOTE: when `randomize` is removed, delete code up to random_state = ...
116 |     if randomize is not None:
117 |         warnings.warn("The `randomize` parameter will be deprecated in future "
118 |                       "versions. Use `random_state` instead.", DeprecationWarning)
119 |         # If shouldn't randomize, we set a fixed seed to get determinisitc results
120 |         if randomize is False:
121 |             random_state = 0
122 | 
123 |     # We don't know what to do if both `randomize` and `random_state` are defined
124 |     if randomize and random_state is not None:
125 |         raise ValueError("`randomize` and `random_state` cannot be used at the "
126 |                          "same time")
127 | 
128 |     random_state = check_random_state(random_state)
129 | 
130 |     # special case, when there is no link
131 |     # the best partition is everyone in its community
132 |     if graph.number_of_edges() == 0:
133 |         part = dict([])
134 |         for i, node in enumerate(graph.nodes()):
135 |             part[node] = i
136 |         return [part]
137 | 
138 |     current_graph = graph.copy()
139 | 
140 | 
141 | 
142 | 
143 |     status = Status()
144 |     status.init(current_graph, weight, part_init)
145 |     # status.init(current_graph, weight, part1)
146 |     status_list = list()
147 | 
148 | 
149 |     v1 = np.sum(list(status.internals.values()))
150 |     # print('initial internals:%d'%v1)
151 | 
152 |     t1 = time.time()
153 |     __comm_adjust_em(current_graph, status, weight, resolution, random_state, epsilon_EM  , divide)
154 | 
155 |     v1 = np.sum(list(status.internals.values()))
156 |     # print('final internals:%d'%v1)
157 | 
158 |     # print('adjust time:%.2fs'%(time.time()-t1))
159 |     new_mod = __modularity(status, resolution)
160 |     partition = __renumber(status.node2com)
161 |     status_list.append(partition)
162 |     mod = new_mod
163 |     # induced_graph is to create new graph based on the partition
164 |     current_graph = induced_graph(partition, current_graph, weight)
165 |     status.init(current_graph, weight)
166 | 
167 |     return status_list[:]
168 | 
169 | 
170 | def induced_graph(partition, graph, weight="weight"):
171 | 
172 |     ret = nx.Graph()
173 |     ret.add_nodes_from(partition.values())
174 | 
175 |     for node1, node2, datas in graph.edges(data=True):
176 |         edge_weight = datas.get(weight, 1)
177 |         com1 = partition[node1]
178 |         com2 = partition[node2]
179 |         w_prec = ret.get_edge_data(com1, com2, {weight: 0}).get(weight, 1)
180 |         ret.add_edge(com1, com2, **{weight: w_prec + edge_weight})
181 | 
182 |     return ret
183 | 
184 | 
185 | def __renumber(dictionary):
186 | 
187 |     values = set(dictionary.values())
188 |     target = set(range(len(values)))
189 | 
190 |     if values == target:
191 |         # no renumbering necessary
192 |         ret = dictionary.copy()
193 |     else:
194 |         # add the values that won't be renumbered
195 |         renumbering = dict(zip(target.intersection(values),
196 |                                target.intersection(values)))
197 |         # add the values that will be renumbered
198 |         renumbering.update(dict(zip(values.difference(target),
199 |                                     target.difference(values))))
200 |         ret = {k: renumbering[v] for k, v in dictionary.items()}
201 | 
202 |     return ret
203 | 
204 | 
205 | def load_binary(data):
206 |     data = open(data, "rb")
207 | 
208 |     reader = array.array("I")
209 |     reader.fromfile(data, 1)
210 |     num_nodes = reader.pop()
211 |     reader = array.array("I")
212 |     reader.fromfile(data, num_nodes)
213 |     cum_deg = reader.tolist()
214 |     num_links = reader.pop()
215 |     reader = array.array("I")
216 |     reader.fromfile(data, num_links)
217 |     links = reader.tolist()
218 |     graph = nx.Graph()
219 |     graph.add_nodes_from(range(num_nodes))
220 |     prec_deg = 0
221 | 
222 |     for index in range(num_nodes):
223 |         last_deg = cum_deg[index]
224 |         neighbors = links[prec_deg:last_deg]
225 |         graph.add_edges_from([(index, int(neigh)) for neigh in neighbors])
226 |         prec_deg = last_deg
227 | 
228 |     return graph
229 | 
230 | def __comm_adjust_em(graph, status, weight_key, resolution, random_state, epsilon, divide):
231 |     
232 |     nb_pass_done = 0
233 |     cur_mod = __modularity(status, resolution)
234 |     new_mod = cur_mod
235 |   
236 |     pass_max = round(divide)
237 | 
238 |     deltau = 1
239 |     c1 = epsilon / (2 * pass_max * deltau * 2 )
240 |     
241 |   
242 |     # print('epsilon:',c1)
243 | 
244 |     while nb_pass_done < pass_max:
245 |         cur_mod = new_mod
246 |         
247 |         nb_pass_done += 1
248 |         
249 |         # iteration over the nodes
250 |         for node in __randomize(graph.nodes(), random_state):
251 |             
252 |             com_node = status.node2com[node]
253 |             
254 |             # obtain all communities
255 |             candi_communities = __allcom(node, graph, status, weight_key)
256 |             
257 |             remove_cost = - resolution * candi_communities.get(com_node,0)
258 | 
259 |             # remove the node from the original community
260 |             __remove(node, com_node,
261 |                     candi_communities.get(com_node, 0.), status)
262 |             best_com = com_node
263 |            
264 | 
265 |             coms = []
266 |             incrs = []
267 |             for com, dnc in __randomize(candi_communities.items(), random_state):
268 |                 incr = remove_cost + resolution * dnc
269 |                 incrs.append(incr)
270 |                 coms.append(com)
271 |     
272 |             incrs = np.array(incrs)
273 |             incrs = incrs * c1
274 |             incrs_m = max(np.max(incrs),0)
275 |             exp_inc = np.exp(incrs-incrs_m)
276 | 
277 |             # Exponential Mechanism
278 |             prob_inc = exp_inc / np.sum(exp_inc)
279 |             best_com = np.random.choice(coms,p=prob_inc)
280 |                 
281 |             # put the node into the best_com
282 |             __insert(node, best_com,
283 |                     candi_communities.get(best_com, 0.), status)
284 |             
285 |         new_mod = __modularity(status, resolution)
286 |         
287 | 
288 | 
289 | def __neighcom(node, graph, status, weight_key):
290 | 
291 |     weights = {}
292 |     for neighbor, datas in graph[node].items():
293 |         if neighbor != node:
294 |             edge_weight = datas.get(weight_key, 1)
295 |             neighborcom = status.node2com[neighbor]
296 |             weights[neighborcom] = weights.get(neighborcom, 0) + edge_weight
297 | 
298 |     return weights
299 | 
300 | 
301 | 
302 | def __allcom(node, graph, status, weight_key):
303 |     all_coms = list(status.node2com.values())
304 |     candi_weights = dict.fromkeys(all_coms,0)
305 | 
306 |     for neighbor, datas in graph[node].items():
307 |         if neighbor != node:
308 |             edge_weight = datas.get(weight_key, 1)
309 |             neighborcom = status.node2com[neighbor]
310 |             candi_weights[neighborcom] = candi_weights.get(neighborcom, 0) + edge_weight
311 | 
312 |     return candi_weights 
313 | 
314 | 
315 | def __remove(node, com, weight, status):
316 |     status.degrees[com] = (status.degrees.get(com, 0.)
317 |                            - status.gdegrees.get(node, 0.))
318 |     status.internals[com] = float(status.internals.get(com, 0.) -
319 |                                   weight - status.loops.get(node, 0.))
320 |     status.node2com[node] = -1
321 | 
322 | 
323 | def __insert(node, com, weight, status):
324 | 
325 |     status.node2com[node] = com
326 |     status.degrees[com] = (status.degrees.get(com, 0.) +
327 |                            status.gdegrees.get(node, 0.))
328 |     status.internals[com] = float(status.internals.get(com, 0.) +
329 |                                   weight + status.loops.get(node, 0.))
330 | 
331 | 
332 | def __modularity(status, resolution):
333 | 
334 |     links = float(status.total_weight)
335 |     result = 0.
336 |     for community in set(status.node2com.values()):
337 |         in_degree = status.internals.get(community, 0.)
338 |         degree = status.degrees.get(community, 0.)
339 |         if links > 0:
340 |             result += in_degree * resolution / links -  ((degree / (2. * links)) ** 2)
341 |     return result
342 | 
343 | 
344 | def __randomize(items, random_state):
345 |     randomized_items = list(items)
346 |     random_state.shuffle(randomized_items)
347 |     return randomized_items
348 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from numpy.random import laplace
  3 | import pandas as pd
  4 | 
  5 | import networkx as nx
  6 | 
  7 | import community
  8 | import comm
  9 | import time
 10 | import random
 11 | 
 12 | import itertools
 13 | from heapq import *
 14 | 
 15 | from heapq import nlargest
 16 | 
 17 | 
 18 | 
 19 | def get_mat(data_path):
 20 |     # data_path = './data/' + dataset_name + '.txt'
 21 |     data = np.loadtxt(data_path)
 22 | 
 23 |     
 24 |     # initial statistics
 25 |     dat = (np.append(data[:,0],data[:,1])).astype(int)
 26 |     dat_c = np.bincount(dat)
 27 | 
 28 |     d = {}
 29 |     node = 0
 30 |     mid = []
 31 |     for i in range(len(dat_c)):
 32 |         if dat_c[i] > 0:
 33 |             d[i] = node
 34 |             mid.append(i)
 35 |             node = node + 1
 36 |     mid = np.array(mid,dtype=np.int32)
 37 | 
 38 |     # initial statistics
 39 |     Edge_num = data.shape[0] 
 40 |     c = len(d) 
 41 | 
 42 | 
 43 |     # genarated adjancent matrix
 44 |     mat0 = np.zeros([c,c],dtype=np.uint8)
 45 |     for i in range(Edge_num):
 46 |         mat0[d[int(data[i,0])],d[int(data[i,1])]] = 1
 47 | 
 48 | 
 49 |     # transfer direct to undirect
 50 |     mat0 = mat0 + np.transpose(mat0)
 51 |     mat0 = np.triu(mat0,1)
 52 |     mat0 = mat0 + np.transpose(mat0)
 53 |     mat0[mat0>0] = 1
 54 |     return mat0,mid
 55 | 
 56 | def community_init(mat0,mat0_graph,epsilon,nr,t=1.0):
 57 | 
 58 |     # t1 = time.time()
 59 |     # Divide the nodes randomly
 60 |     g1 = list(np.zeros(len(mat0)))
 61 |     ind = -1
 62 | 
 63 |     for i in range(len(mat0)):
 64 |         if i % nr == 0:
 65 |             ind = ind + 1
 66 |         g1[i] = ind
 67 | 
 68 |     random.shuffle(g1)
 69 | 
 70 |     mat0_par3 = {}
 71 |     for i in range(len(mat0)):
 72 |         mat0_par3[i] = g1[i]
 73 | 
 74 |     gr1 = max(mat0_par3.values()) + 1
 75 | 
 76 |     # mat0_mod3 = community.modularity(mat0_par3,mat0_graph)
 77 |     # print('mat0_mod2=%.3f,gr1=%d'%(mat0_mod3,gr1)) 
 78 | 
 79 |     
 80 |     mat0_par3_pv = np.array(list(mat0_par3.values()))
 81 |     mat0_par3_pvs = []
 82 |     for i in range(gr1):
 83 |         pv = np.where(mat0_par3_pv==i)[0]
 84 |         pvs = list(pv)
 85 |         mat0_par3_pvs.append(pvs)
 86 |     mat_one_level = np.zeros([gr1,gr1])
 87 | 
 88 |     for i in range(gr1):
 89 |         pi = mat0_par3_pvs[i]
 90 |         mat_one_level[i,i] = np.sum(mat0[np.ix_(pi,pi)])
 91 |         for j in range(i+1,gr1):
 92 |             pj = mat0_par3_pvs[j]
 93 |             mat_one_level[i,j] = np.sum(mat0[np.ix_(pi,pj)])
 94 |     # print('generate new matrix time:%.2fs'%(time.time()-t1))
 95 |     
 96 |     lap_noise = laplace(0,1/epsilon,gr1*gr1).astype(np.int32)
 97 |     lap_noise = lap_noise.reshape(gr1,gr1)
 98 | 
 99 |     ga = get_uptri_arr(mat_one_level,ind=1)
100 |     ga_noise = ga + laplace(0,1/epsilon,len(ga))
101 |     ga_noise_pp = FO_pp(ga_noise)
102 |     mat_one_level_noise = get_upmat(ga_noise_pp,gr1,ind=1)
103 | 
104 | 
105 |     noise_diag = np.int32(mat_one_level.diagonal() + laplace(0,2/epsilon,len(mat_one_level)))
106 | 
107 |     # keep consistency
108 |     noise_diag = FO_pp(noise_diag)
109 |   
110 |     mat_one_level_noise = np.triu(mat_one_level_noise,1)
111 |     mat_one_level_noise = mat_one_level_noise + np.transpose(mat_one_level_noise)
112 | 
113 |     row,col = np.diag_indices_from(mat_one_level_noise) 
114 |     mat_one_level_noise[row,col] = noise_diag
115 |     mat_one_level_noise[mat_one_level_noise<0] = 0
116 | 
117 |     mat_one_level_graph = nx.from_numpy_array(mat_one_level_noise,create_using=nx.Graph)
118 |     
119 |     # Apply the Louvain method
120 |     mat_new_par = community.best_partition(mat_one_level_graph,resolution=t)
121 |     gr2 = max(mat_new_par.values()) + 1 
122 |     mat_new_pv = np.array(list(mat_new_par.values()))
123 |     mat_final_pvs = []
124 |     for i in range(gr2):
125 |         pv = np.where(mat_new_pv==i)[0]
126 |         mat_final_pv = []
127 |         for j in range(len(pv)):
128 |             pvj = pv[j]
129 |             mat_final_pv.extend(mat0_par3_pvs[pvj])
130 |         mat_final_pvs.append(mat_final_pv)
131 | 
132 |     label1 = np.zeros([len(mat0)],dtype=np.int32)
133 |     for i in range(len(mat_final_pvs)):
134 |         label1[mat_final_pvs[i]] = i
135 | 
136 |     return label1
137 | 
138 | 
139 | 
140 | def get_uptri_arr(mat_init,ind=0):
141 |     a = len(mat_init)
142 |     res = []
143 |     for i in range(a):
144 |         dat = mat_init[i][i+ind:]
145 |         res.extend(dat)
146 |     arr = np.array(res)
147 |     return arr
148 | 
149 | 
150 | def get_upmat(arr,k,ind=0):
151 |     mat = np.zeros([k,k],dtype=np.int32)
152 |     left = 0
153 |     for i in range(k):
154 |         delta = k - i - ind
155 |         mat[i,i+ind:] = arr[left:left+delta]
156 |         left = left + delta
157 |         
158 |     return mat
159 | 
160 | # Post processing
161 | def FO_pp(data_noise,type='norm_sub'):
162 |     if type == 'norm_sub':
163 |         data = norm_sub_deal(data_noise)
164 |         
165 |     if type == 'norm_mul':
166 |         data = norm_mul_deal(data_noise)
167 |     
168 |     return data
169 | 
170 | def norm_sub_deal(data):
171 |     data = np.array(data,dtype=np.int32)
172 |     data_min = np.min(data)
173 |     data_sum = np.sum(data)
174 |     delta_m = 0 - data_min
175 |     
176 |     if delta_m > 0:
177 |         dm = 100000000
178 |         data_seq = np.zeros([len(data)],dtype=np.int32)
179 |         for i in range(0,delta_m):
180 |             data_t = data - i
181 |             data_t[data_t<0] = 0
182 |             data_t_s = np.sum(data_t)
183 |             dt = np.abs(data_t_s - data_sum)
184 |             if dt < dm:
185 |                 dm = dt
186 |                 data_seq = data_t
187 |                 if dt == 0:
188 |                     break
189 |                 
190 |     else:
191 |         data_seq = data
192 |     return data_seq
193 |         
194 | 
195 | 
196 | 
197 | # generate graph(intra edges) based on degree sequence
198 | def generate_intra_edge(dd1,div=1):
199 |     dd1 = np.array(dd1,dtype=np.int32)
200 |     dd1[dd1<0] = 0
201 |     dd1_len = len(dd1)
202 |     dd1_p = dd1.reshape(dd1_len,1) * dd1.reshape(1,dd1_len)
203 |     s1 = np.sum(dd1)
204 | 
205 |     dd1_res = np.zeros([dd1_len,dd1_len],dtype=np.int8)
206 |     if s1 > 0:
207 |         batch_num = int(dd1_len / div)
208 |         begin_id = 0
209 |         for i in range(div):
210 |             if i == div-1:
211 |                 batch_n = dd1_len - begin_id
212 |                 dd1_r = np.random.randint(0,high=s1,size=(batch_n,dd1_len))
213 |                 res = dd1_p[begin_id:,:] - dd1_r
214 |                 res[res>0] = 1
215 |                 res[res<1] = 0
216 |                 dd1_res[begin_id:,:] = res
217 |             else:
218 |                 dd1_r = np.random.randint(0,high=s1,size=(batch_num,dd1_len))
219 |                 res = dd1_p[begin_id:begin_id+batch_num,:] - dd1_r
220 |                 res[res>0] = 1
221 |                 res[res<1] = 0
222 |                 dd1_res[begin_id:begin_id+batch_num,:] = res
223 |                 begin_id = begin_id + batch_num
224 |     
225 |     # make sure the final adjacency matrix is symmetric
226 |     dd1_out = np.triu(dd1_res,1)
227 |     dd1_out = dd1_out + np.transpose(dd1_out)
228 |     return dd1_out
229 | 
230 | # calculate the diameter
231 | def cal_diam(mat):
232 |     mat_graph = nx.from_numpy_array(mat,create_using=nx.Graph)
233 |     max_diam = 0
234 |     for com in nx.connected_components(mat_graph):
235 |         com_list = list(com)
236 |         mat_sub = mat[np.ix_(com_list,com_list)]
237 |         sub_g = nx.from_numpy_array(mat_sub,create_using=nx.Graph)
238 |         diam = nx.diameter(sub_g)
239 |         if diam > max_diam:
240 |             max_diam = diam
241 |     return max_diam
242 | 
243 | # calculate the overlap 
244 | def cal_overlap(la,lb,k):
245 |     la = la[:k]
246 |     lb = lb[:k]
247 |     la_s = set(la)
248 |     lb_s = set(lb)
249 |     num = len(la_s & lb_s)
250 |     rate = num / k
251 |     return rate
252 | 
253 | 
254 | # calculate the KL divergence
255 | def cal_kl(A,B): 
256 |     p = A / sum(A)
257 |     q = B / sum(B)
258 |     if A.shape[0] > B.shape[0]:
259 |         q = np.pad(q,(0,p.shape[0]-q.shape[0]),'constant',constant_values=(0,0))
260 |     elif A.shape[0] < B.shape[0]:
261 |         p = np.pad(p,(0,q.shape[0]-p.shape[0]),'constant',constant_values=(0,0))
262 |     kl = p * np.log((p+np.finfo(np.float64).eps)/(q+np.finfo(np.float64).eps))
263 |     kl = np.sum(kl)
264 |     return kl
265 | 
266 | 
267 | # calculate the RE
268 | def cal_rel(A,B): 
269 |     eps = 0.000000000000001
270 |     A = np.float64(A)
271 |     B = np.float64(B)
272 |     #eps = np.float64(eps)
273 |     res = abs((A-B)/(A+eps))
274 |     return res
275 | 
276 | # calculate the MSE
277 | def cal_MSE(A,B): 
278 |     res = np.mean((A-B)**2)
279 |     return res
280 | 
281 | # calculate the MAE
282 | def cal_MAE(A,B,k=None): 
283 |     if k== None:
284 |         res = np.mean(abs(A-B))
285 |     else:
286 |         a = np.array(A[:k])
287 |         b = np.array(B[:k])
288 |         res = np.mean(abs(a-b))
289 |     return res
290 | 
291 | 
292 | def write_edge_txt(mat0,mid,file_name):
293 |     a0 = np.where(mat0==1)[0]
294 |     a1 = np.where(mat0==1)[1]
295 |     with open(file_name,'w+') as f:
296 |         for i in range(len(a0)):
297 |             f.write('%d\t%d\n'%(mid[a0[i]],mid[a1[i]]))
298 | 
299 | 
300 | class PriorityQueue(object):
301 |     def __init__(self):
302 |         self.pq = []                         # list of entries arranged in a heap
303 |         self.entry_finder = {}               # mapping of tasks to entries
304 |         self.REMOVED = '<removed-task>'      # placeholder for a removed task
305 |         self.counter = itertools.count()     # unique sequence count
306 | 
307 |     def add_task(self, task, priority=0):
308 |         'Add a new task or update the priority of an existing task'
309 |         if task in self.entry_finder:
310 |             self.remove_task(task)
311 |         count = next(self.counter)
312 |         entry = [priority, count, task]
313 |         self.entry_finder[task] = entry
314 |         heappush(self.pq, entry)
315 | 
316 |     def remove_task(self, task):
317 |         'Mark an existing task as REMOVED.  Raise KeyError if not found.'
318 |         entry = self.entry_finder.pop(task)
319 |         entry[-1] = self.REMOVED
320 | 
321 |     def pop_item(self):
322 |         'Remove and return the lowest priority task. Raise KeyError if empty.'
323 |         while self.pq:
324 |             priority, count, task = heappop(self.pq)
325 |             if task is not self.REMOVED:
326 |                 del self.entry_finder[task]
327 |                 return task, priority
328 |         raise KeyError('pop from an empty priority queue')
329 | 
330 |     def __str__(self):
331 |         return str([entry for entry in self.pq if entry[2] != self.REMOVED])
332 | 
333 | 
334 | def degreeDiscountIC(G, k, p=0.01):
335 | 
336 |     S = []
337 |     dd = PriorityQueue() # degree discount
338 |     t = dict() # number of adjacent vertices that are in S
339 |     d = dict() # degree of each vertex
340 | 
341 |     # initialize degree discount
342 |     for u in G.nodes():
343 |         d[u] = sum([G[u][v]['weight'] for v in G[u]]) # each edge adds degree 1
344 |         # d[u] = len(G[u]) # each neighbor adds degree 1
345 |         dd.add_task(u, -d[u]) # add degree of each node
346 |         t[u] = 0
347 | 
348 |     # add vertices to S greedily
349 |     for i in range(k):
350 |         u, priority = dd.pop_item() # extract node with maximal degree discount
351 |         S.append(u)
352 |         for v in G[u]:
353 |             if v not in S:
354 |                 t[v] += G[u][v]['weight'] # increase number of selected neighbors
355 |                 priority = d[v] - 2*t[v] - (d[v] - t[v])*t[v]*p # discount of degree
356 |                 dd.add_task(v, -priority)
357 |     return S
358 | 
359 | def runIC (G, S, p = 0.01):
360 | 
361 |     from copy import deepcopy
362 |     from random import random
363 |     T = deepcopy(S) # copy already selected nodes
364 | 
365 |     i = 0
366 |     while i < len(T):
367 |         for v in G[T[i]]: # for neighbors of a selected node
368 |             if v not in T: # if it wasn't selected yet
369 |                 w = G[T[i]][v]['weight'] # count the number of edges between two nodes
370 |                 if random() <= 1 - (1-p)**w: # if at least one of edges propagate influence
371 |                     # print (T[i], 'influences', v)
372 |                     T.append(v)
373 |         i += 1
374 |     return T
375 | 
376 | def find_seed(graph_path,seed_size=20):
377 |     
378 |     # read in graph
379 |     G = nx.Graph()
380 |     with open(graph_path) as f:
381 | 
382 |         for line in f:
383 |             u, v = map(int, line.split())
384 |             try:
385 |                 G[u][v]['weight'] += 1
386 |             except:
387 |                 G.add_edge(u,v, weight=1)
388 |         
389 |     
390 |     S = degreeDiscountIC(G, seed_size)
391 |     return S
392 | 
393 | 
394 | 
395 | def cal_spread(graph_path,S_all,p=0.01,seed_size=20,iterations=100):
396 |     
397 |     # read in graph
398 |     G = nx.Graph()
399 |     with open(graph_path) as f:
400 | 
401 |         for line in f:
402 |             u, v = map(int, line.split())
403 |             # print('u:%s,v:%s'%(u,v))
404 |             try:
405 |                 G[u][v]['weight'] += 1
406 |             except:
407 |                 G.add_edge(u,v, weight=1)
408 |            
409 | 
410 |     #calculate initial set
411 |     
412 |     if seed_size <= len(S_all):
413 |         S = S_all[:seed_size]
414 |     else:
415 |         print('seed_size is too large.')
416 |         S = S_all
417 | 
418 |     
419 |     avg = 0
420 |     for i in range(iterations):
421 |         T = runIC(G, S, p)
422 |         avg += float(len(T))/iterations
423 | 
424 |     avg_final = int(round(avg))
425 | 
426 |     return avg_final


--------------------------------------------------------------------------------