├── Graclus_centers.py ├── LICENSE ├── Propagation_Phase.py ├── README.md ├── com1.txt ├── com2.txt ├── compare.py ├── data_example ├── facebook_combined.txt └── graph.txt ├── f1max-master ├── README.md ├── f1max ├── f1max.c └── test.sh ├── graph_building.py ├── init.py ├── logs.txt └── seed_set_expansion.py /Graclus_centers.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import networkx as nx 4 | import community 5 | 6 | sigma =1 7 | 8 | def minimum_of_float_list(liste): 9 | minimum=0 10 | for i in liste: 11 | print i 12 | if float(i) 2 | #include 3 | #include 4 | 5 | #define NNODES 10000000 //max number of nodes: will increase if needed 6 | #define NCOMS 10000000 //max number of communities: will increase if needed 7 | #define SCOM 10000000 //max size of community: will increase if needed 8 | #define NCPN 10 //max number of community per nodes: will increase if needed 9 | 10 | typedef struct { 11 | unsigned s;//size of the community 12 | unsigned smax;//maximum size of the community 13 | unsigned *nodes;//nodes in the community 14 | } community; 15 | 16 | typedef struct { 17 | unsigned n;//number of nodes 18 | unsigned m;//max number of nodes 19 | 20 | unsigned **c;//c[i]=communities containing node i. 21 | unsigned *s;//s[i]=number of communities containing node i. 22 | unsigned *smax;//smax[i]=max number of communities 23 | 24 | unsigned nc;//number of communities 25 | unsigned mc;//max number of communities 26 | unsigned *size;//size[i]=size of the community i 27 | 28 | unsigned *tmp;//tmp[i] number of nodes shared by the current community and community i 29 | unsigned *list;//list of the community sharing at least one node with the current community 30 | unsigned nlist;//length of list 31 | } compare; 32 | 33 | 34 | compare *alloccompare(){ 35 | compare *comp=malloc(sizeof(compare)); 36 | comp->n=0; 37 | comp->m=NNODES; 38 | comp->c=calloc(comp->m,sizeof(unsigned*)); 39 | comp->s=calloc(comp->m,sizeof(unsigned)); 40 | comp->smax=calloc(comp->m,sizeof(unsigned)); 41 | 42 | comp->nc=0; 43 | comp->mc=NCOMS; 44 | comp->size=malloc(comp->mc*sizeof(unsigned)); 45 | 46 | comp->tmp=calloc(comp->mc,sizeof(unsigned)); 47 | comp->list=calloc(comp->mc,sizeof(unsigned)); 48 | comp->nlist=0; 49 | return comp; 50 | } 51 | 52 | community *alloccom(){ 53 | community *com=malloc(sizeof(community)); 54 | com->smax=SCOM; 55 | com->nodes=malloc(SCOM*sizeof(unsigned)); 56 | return com; 57 | } 58 | 59 | bool readlinecom(FILE* file,community* com){ 60 | char c; 61 | 62 | com->s=0; 63 | while(fscanf(file,"%u%c",com->nodes+com->s,&c)==2){ 64 | if ( ++(com->s) == com->smax) { 65 | com->smax+=SCOM; 66 | com->nodes=realloc(com->nodes,com->smax*sizeof(unsigned)); 67 | } 68 | if (c=='\n') { 69 | return 1; 70 | } 71 | } 72 | return 0; 73 | } 74 | 75 | void com2comp(community* com,compare* comp){ 76 | unsigned i,j,tmp; 77 | 78 | if (comp->nc==comp->mc){ 79 | comp->mc+=NCOMS; 80 | comp->size=realloc(comp->size,comp->mc*sizeof(unsigned)); 81 | comp->tmp=realloc(comp->tmp,comp->mc*sizeof(unsigned)); 82 | comp->list=realloc(comp->size,comp->mc*sizeof(unsigned)); 83 | for (i=comp->mc-NCOMS;imc;i++){ 84 | comp->tmp[i]=0; 85 | } 86 | } 87 | comp->size[comp->nc]=com->s; 88 | for (i=0;is;i++){ 89 | if (com->nodes[i]>comp->m){ 90 | tmp=com->nodes[i]-comp->m+NCOMS; 91 | comp->m+=tmp; 92 | comp->c=realloc(comp->c,comp->m*sizeof(unsigned*)); 93 | comp->s=realloc(comp->s,comp->m*sizeof(unsigned)); 94 | comp->smax=realloc(comp->smax,comp->m*sizeof(unsigned)); 95 | for (j=comp->m-tmp;jm;j++){ 96 | comp->c[j]=NULL; 97 | comp->s[j]=0; 98 | comp->smax[j]=0; 99 | } 100 | } 101 | if (comp->s[com->nodes[i]]==comp->smax[com->nodes[i]]){ 102 | if (comp->s[com->nodes[i]]==0){ 103 | comp->c[com->nodes[i]]=malloc(NCPN*sizeof(unsigned)); 104 | comp->smax[com->nodes[i]]=NCPN; 105 | } 106 | else { 107 | comp->smax[com->nodes[i]]+=NCPN; 108 | comp->c[com->nodes[i]]=realloc(comp->c[com->nodes[i]],comp->smax[com->nodes[i]]*sizeof(unsigned)); 109 | } 110 | } 111 | comp->c[com->nodes[i]][comp->s[com->nodes[i]]++]=comp->nc; 112 | } 113 | comp->nc++; 114 | } 115 | 116 | double comsim(community* com,compare *comp){ 117 | double sim=0,sim2; 118 | unsigned i,j; 119 | for (i=0;is;i++){ 120 | for (j=0;js[com->nodes[i]];j++){ 121 | if (comp->tmp[comp->c[com->nodes[i]][j]]==0){ 122 | comp->list[comp->nlist++]=comp->c[com->nodes[i]][j]; 123 | } 124 | comp->tmp[comp->c[com->nodes[i]][j]]++; 125 | } 126 | } 127 | 128 | for (i=0;inlist;i++){ 129 | sim2=((double)(comp->tmp[comp->list[i]]))/(com->s+comp->size[comp->list[i]]); 130 | if (sim2>sim){ 131 | sim=sim2; 132 | } 133 | comp->tmp[comp->list[i]]=0; 134 | } 135 | comp->nlist=0; 136 | return 2*sim; 137 | } 138 | 139 | int main(int argc,char** argv){ 140 | 141 | unsigned ncom=0; 142 | double score=0; 143 | community *com=alloccom(); 144 | compare *comp=alloccompare(); 145 | FILE* file; 146 | 147 | file=fopen(argv[2],"r"); 148 | while(readlinecom(file,com)){ 149 | com2comp(com,comp); 150 | } 151 | fclose(file); 152 | 153 | file=fopen(argv[1],"r"); 154 | while(readlinecom(file,com)){ 155 | score+=comsim(com,comp); 156 | ncom++; 157 | } 158 | fclose(file); 159 | 160 | score/=(double)ncom; 161 | 162 | printf("%lf\n",score); 163 | 164 | return 0; 165 | } 166 | -------------------------------------------------------------------------------- /f1max-master/test.sh: -------------------------------------------------------------------------------- 1 | s1="$(./f1max $1 $2)" 2 | s2="$(./f1max $2 $1)" 3 | s3=$(bc <<<"scale=6; ($s1+$s2)/2") 4 | s4=$(bc <<<"scale=6; 2*$s1*$s2/($s1+$s2)") 5 | echo $s1, $s2, $s3, $s4 6 | -------------------------------------------------------------------------------- /graph_building.py: -------------------------------------------------------------------------------- 1 | 2 | import matplotlib.pyplot as plt 3 | import networkx as nx 4 | import sys 5 | 6 | def file_graph_show( g,i ): 7 | plt.figure(i) 8 | 9 | sp=nx.spring_layout(g) 10 | 11 | plt.axis('off') 12 | 13 | nx.draw_networkx(g,pos=sp,with_labels=False,node_size=35) 14 | return plt 15 | 16 | def file_graph_building( path ): 17 | 18 | g=nx.read_edgelist(path,create_using=nx.Graph(),nodetype=int) 19 | 20 | print nx.info(g) 21 | return g 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /init.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8 -* 2 | import matplotlib.pyplot as plt 3 | import networkx as nx 4 | import sys 5 | import graph_building as gb 6 | import community 7 | import Graclus_centers as gc 8 | import seed_set_expansion as sse 9 | import Propagation_Phase as pp 10 | import time 11 | 12 | 13 | # filtering phase phase Remove unimportant regions of the graph 14 | #Trivially separable from the rest of the graph 15 | #Do not participate in overlapping clustering 16 | #Our filtering procedure 17 | #Remove all single-edge biconnected components (remain connected after 18 | #removing any vertex and its adjacent edges) 19 | #Compute the largest connected component 20 | 21 | mon_fichier = open("logs.txt", "w") # Argh j'ai tout écrasé ! 22 | 23 | 24 | 25 | 26 | 27 | 28 | def filtering_phase( G ): 29 | 30 | liste=[] 31 | nb_node=G.nodes_iter(data=False) 32 | 33 | for node in nb_node: 34 | if len(G.neighbors(node)) <= 1: 35 | liste.append(node) 36 | 37 | 38 | if(len(liste) !=0): 39 | G.remove_node(liste[0]) 40 | 41 | return filtering_phase(G) 42 | return G 43 | 44 | 45 | G=gb.file_graph_building( sys.argv[1] ) 46 | 47 | 48 | t = time.time() 49 | print "filtering_phase processing...." 50 | 51 | 52 | G= filtering_phase( G ) 53 | print len(G.nodes()) 54 | 55 | 56 | mon_fichier.write("Filtering phase in :"+repr(time.time()-t)+"\n") 57 | 58 | print "filtering_phase done!" 59 | 60 | t = time.time() 61 | 62 | print "seeding phase" 63 | 64 | seeds= gc.Graclus_centers( G ) 65 | 66 | mon_fichier.write("seeding phase in :"+repr(time.time()-t)+"\n") 67 | print "seeding phase done!" 68 | 69 | t = time.time() 70 | 71 | print "seed set expansion phase" 72 | 73 | expansion=sse.seed_set_expansion(G,seeds) 74 | mon_fichier.write("seed set expansion phase in :"+repr(time.time()-t)+"\n") 75 | print "seedingset expansion phase done!" 76 | 77 | #seeds= gc.Graclus_centers( G ) 78 | #print seeds 79 | #print gc.minimum_of_float_list(seeds.values()) 80 | 81 | #degCi=sum(G.degree(G.nodes())) 82 | 83 | 84 | #print G.nodes() 85 | #print G.edges() 86 | #G=nx.to_numpy_matrix(G, nodelist=G.nodes()) 87 | #print G 88 | print "Graph building with coloring community" 89 | 90 | values=sse.color_building_list(G,expansion) 91 | nx.draw_spring(G, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) 92 | mon_fichier.write("building graph with community colors in :"+repr(time.time()-t)+"\n") 93 | 94 | #plt.figure(0) 95 | 96 | #sp=nx.spring_layout(G) 97 | 98 | #plt.axis('off') 99 | 100 | #nx.draw_networkx(G,pos=sp,with_labels=False,node_size=35) 101 | 102 | print "building graph done!" 103 | mon_fichier.close() 104 | plt.show() 105 | -------------------------------------------------------------------------------- /logs.txt: -------------------------------------------------------------------------------- 1 | Filtering phase in :0.7024049758911133 2 | seeding phase in :44.99510598182678 3 | seed set expansion phase in :0.03710007667541504 4 | building graph with community colors in :0.03859090805053711 5 | -------------------------------------------------------------------------------- /seed_set_expansion.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import random as rdm 4 | 5 | import networkx as nx 6 | def seed_set_expansion(G,seeds): 7 | #star random walk 8 | expansion ={} 9 | compteur= 0 #Start execution counter 10 | execution = 0 11 | for seed in seeds: 12 | 13 | vertexid = seed 14 | VisitedVertex = {} 15 | 16 | #Execute the random walk with size 10000 (10000 steps) 17 | while compteur < G.number_of_nodes()/len(seeds): 18 | #Accumulate the amount of times each vertex is visited 19 | if vertexid in VisitedVertex: 20 | VisitedVertex[vertexid] += 1 21 | else: 22 | VisitedVertex[vertexid] = 1 23 | #Visualize the vertex neighborhood 24 | Vertex_Neighbors = G.neighbors(vertexid) 25 | #Choose a vertex from the vertex neighborhood to start the next random walk 26 | vertexid = rdm.choice(Vertex_Neighbors) 27 | compteur = compteur + 1 28 | mostvisited = sorted(VisitedVertex, key = VisitedVertex.get,reverse = True) 29 | expansion.update({seed: mostvisited}) 30 | compteur = 0 31 | 32 | return expansion 33 | def color_building_list(G,expansion): 34 | partition ={} 35 | 36 | 37 | valeur=0 38 | for liste_value in expansion.values(): 39 | for node in liste_value: 40 | partition.update({node: valeur}) 41 | valeur = valeur+1 42 | 43 | values = [partition.get(node) for node in G.nodes()] 44 | print values 45 | for i, val in enumerate(values): 46 | if val == None: 47 | values[i]=values[i-1] 48 | return values 49 | --------------------------------------------------------------------------------