├── .gitignore
├── DGK
├── README
├── canonical_maps
│ ├── canonical_map_n2.p
│ ├── canonical_map_n3.p
│ ├── canonical_map_n4.p
│ ├── canonical_map_n5.p
│ ├── canonical_map_n6.p
│ ├── canonical_map_n7.p
│ └── canonical_map_n8.p
├── datasets
│ └── README
├── deep_kernel.py
├── go.sh
└── graphlet_counter_maps
│ ├── graphlet_counter_nodebased_n2.p
│ ├── graphlet_counter_nodebased_n3.p
│ ├── graphlet_counter_nodebased_n4.p
│ ├── graphlet_counter_nodebased_n5.p
│ ├── graphlet_counter_nodebased_n6.p
│ ├── graphlet_counter_nodebased_n7.p
│ └── graphlet_counter_nodebased_n8.p
├── MLGkernel
├── LICENSE
├── MLGkernel
│ ├── FLGinstance.cpp
│ ├── FLGinstance.hpp
│ ├── FLGkernel.cpp
│ ├── FLGkernel.hpp
│ ├── Kernel.hpp
│ ├── Linearizer.hpp
│ ├── MLG_base.hpp
│ ├── MLGdataset.cpp
│ ├── MLGdataset.hpp
│ ├── MLGgraph.cpp
│ ├── MLGgraph.hpp
│ ├── Makefile
│ ├── RandomSelection.hpp
│ ├── params.hpp
│ ├── runMLG
│ ├── runMLG.cpp
│ └── swig
│ │ ├── MLGK.cpp
│ │ ├── MLGK.i
│ │ ├── Makefile
│ │ ├── README.txt
│ │ └── test.py
├── Makefile
├── Makefile.base
├── Makefile.options
├── README.md
├── data_utils.py
├── evaluate_embedding.py
├── go.sh
├── include
│ ├── pMMFbase.hpp
│ └── pMMFglobal.inc
├── matrices
│ ├── Activemap.cpp
│ ├── Activemap.hpp
│ ├── AtomicCmatrix.hpp
│ ├── Cmatrix.cpp
│ ├── Cmatrix.hpp
│ ├── Cvector.cpp
│ ├── Cvector.hpp
│ ├── DenseVector.hpp
│ ├── EigenInterface.hpp
│ ├── GramMatrix.cpp
│ ├── GramMatrix.hpp
│ ├── LapackInterface.hpp
│ ├── Makefile
│ ├── Matrix.cpp
│ ├── Matrix.hpp
│ ├── MatrixX.cpp
│ ├── MatrixX.hpp
│ ├── Remap.cpp
│ ├── Remap.hpp
│ ├── SparseVector.hpp
│ ├── Vector.cpp
│ ├── Vector.hpp
│ ├── Vectorh.cpp
│ ├── Vectorh.hpp
│ ├── Vectorl.cpp
│ ├── Vectorl.hpp
│ ├── Vectorv.cpp
│ ├── Vectorv.hpp
│ ├── matrices.cpp
│ └── matrices.o
├── preprocess.py
├── test.py
└── utility
│ ├── Bifstream.cpp
│ ├── Bifstream.hpp
│ ├── Bofstream.cpp
│ ├── Bofstream.hpp
│ ├── Graph.cpp
│ ├── Graph.hpp
│ ├── Log.cpp
│ ├── Log.hpp
│ ├── Makefile
│ ├── Rstream.cpp
│ ├── Rstream.hpp
│ ├── Serializable.cpp
│ ├── Serializable.hpp
│ ├── ThreadBank.cpp
│ ├── ThreadBank.hpp
│ ├── ThreadManager.cpp
│ ├── ThreadManager.hpp
│ ├── TopkList.cpp
│ ├── TopkList.hpp
│ └── filetypes
│ ├── Makefile
│ ├── MatrixIF.hpp
│ ├── MatrixIF_ASCII.cpp
│ ├── MatrixIF_ASCII.hpp
│ ├── MatrixIF_Boeing.cpp
│ ├── MatrixIF_Boeing.hpp
│ ├── MatrixIF_Matlab.cpp
│ ├── MatrixIF_Matlab.hpp
│ ├── MatrixOF.hpp
│ ├── MatrixOF_ASCII.cpp
│ ├── MatrixOF_ASCII.hpp
│ ├── MatrixOF_Boeing.cpp
│ ├── MatrixOF_Boeing.hpp
│ ├── MatrixOF_Matlab.cpp
│ ├── MatrixOF_Matlab.hpp
│ └── filetypes.cpp
├── README.md
├── data
└── PTC_MR
│ ├── PTC_MR_A.txt
│ ├── PTC_MR_edge_labels.txt
│ ├── PTC_MR_graph_indicator.txt
│ ├── PTC_MR_graph_labels.txt
│ └── PTC_MR_node_labels.txt
├── diffpool
├── aggregators.py
├── cross_val.py
├── encoders.py
├── gen
│ ├── data.py
│ └── feat.py
├── go.sh
├── graph_embedding.py
├── graph_sampler.py
├── graphsage.py
├── load_data.py
├── partition.py
├── set2set.py
├── test.py
├── train.py
└── util.py
├── graph2vec_tf
├── README.md
├── __init__.py
├── classify.py
├── corpus_parser.py
├── go.sh
├── main.py
├── make_graph2vec_corpus.py
├── preprocess.py
├── skipgram.py
├── test.py
├── train_utils.py
└── utils.py
├── kcnn
├── README.md
├── go.sh
├── graph_kernels.py
├── graph_kernels_labeled.py
├── main.py
├── model.py
├── nystrom.py
└── utils.py
├── kernel_methods
├── README.md
├── go.sh
├── main.py
└── utils.py
└── sub2vec
├── go.sh
├── preprocess.py
├── src
├── graphUtils_n.py
├── graphUtils_s.py
├── main.py
├── neighborhood.py
└── structural.py
└── test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | log
2 | __pycache__
3 | *.pyc
4 | *.walk
5 | preprocessed_dataset
6 | TEST
7 | checkpoint
8 | tmp
9 | *.nexf
10 | results
11 |
12 |
--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n2.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | S''
3 | p1
4 | (dp2
5 | S'graph'
6 | p3
7 | (tsS'idx'
8 | p4
9 | I0
10 | sS'n'
11 | p5
12 | I0
13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
14 | p6
15 | (dp7
16 | g3
17 | (tsg4
18 | I1
19 | sg5
20 | I1
21 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
22 | p8
23 | (dp9
24 | g3
25 | (I1
26 | tp10
27 | sg4
28 | I3
29 | sg5
30 | I2
31 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
32 | p11
33 | (dp12
34 | g3
35 | (I0
36 | tp13
37 | sg4
38 | I2
39 | sg5
40 | I2
41 | ss.
--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n3.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | S''
3 | p1
4 | (dp2
5 | S'graph'
6 | p3
7 | (tsS'idx'
8 | p4
9 | I0
10 | sS'n'
11 | p5
12 | I0
13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
14 | p6
15 | (dp7
16 | g3
17 | (I0
18 | tp8
19 | sg4
20 | I2
21 | sg5
22 | I2
23 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
24 | p9
25 | (dp10
26 | g3
27 | (tsg4
28 | I1
29 | sg5
30 | I1
31 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
32 | p11
33 | (dp12
34 | g3
35 | (I1
36 | tp13
37 | sg4
38 | I3
39 | sg5
40 | I2
41 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
42 | p14
43 | (dp15
44 | g3
45 | (I0
46 | I0
47 | I0
48 | tp16
49 | sg4
50 | I4
51 | sg5
52 | I3
53 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0'
54 | p17
55 | (dp18
56 | g3
57 | (I1
58 | I1
59 | I1
60 | tp19
61 | sg4
62 | I7
63 | sg5
64 | I3
65 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@'
66 | p20
67 | (dp21
68 | g3
69 | (I0
70 | I0
71 | I1
72 | tp22
73 | sg4
74 | I5
75 | sg5
76 | I3
77 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0'
78 | p23
79 | (dp24
80 | g3
81 | (I0
82 | I1
83 | I1
84 | tp25
85 | sg4
86 | I6
87 | sg5
88 | I3
89 | ss.
--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n4.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | S''
3 | p1
4 | (dp2
5 | S'graph'
6 | p3
7 | (tsS'idx'
8 | p4
9 | I0
10 | sS'n'
11 | p5
12 | I0
13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
14 | p6
15 | (dp7
16 | g3
17 | (I0
18 | tp8
19 | sg4
20 | I2
21 | sg5
22 | I2
23 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 '
24 | p9
25 | (dp10
26 | g3
27 | (I0
28 | I0
29 | I1
30 | I1
31 | I0
32 | I0
33 | tp11
34 | sg4
35 | I13
36 | sg5
37 | I4
38 | ssS'\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0'
39 | p12
40 | (dp13
41 | g3
42 | (I0
43 | I1
44 | I1
45 | I1
46 | I1
47 | I1
48 | tp14
49 | sg4
50 | I17
51 | sg5
52 | I4
53 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
54 | p15
55 | (dp16
56 | g3
57 | (I0
58 | I0
59 | I0
60 | I0
61 | I0
62 | I0
63 | tp17
64 | sg4
65 | I8
66 | sg5
67 | I4
68 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
69 | p18
70 | (dp19
71 | g3
72 | (tsg4
73 | I1
74 | sg5
75 | I1
76 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\xe0'
77 | p20
78 | (dp21
79 | g3
80 | (I0
81 | I0
82 | I1
83 | I1
84 | I1
85 | I1
86 | tp22
87 | sg4
88 | I15
89 | sg5
90 | I4
91 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\xe0'
92 | p23
93 | (dp24
94 | g3
95 | (I0
96 | I0
97 | I1
98 | I0
99 | I1
100 | I1
101 | tp25
102 | sg4
103 | I12
104 | sg5
105 | I4
106 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 '
107 | p26
108 | (dp27
109 | g3
110 | (I0
111 | I0
112 | I0
113 | I0
114 | I0
115 | I1
116 | tp28
117 | sg4
118 | I9
119 | sg5
120 | I4
121 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
122 | p29
123 | (dp30
124 | g3
125 | (I1
126 | tp31
127 | sg4
128 | I3
129 | sg5
130 | I2
131 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
132 | p32
133 | (dp33
134 | g3
135 | (I0
136 | I0
137 | I0
138 | tp34
139 | sg4
140 | I4
141 | sg5
142 | I3
143 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00`'
144 | p35
145 | (dp36
146 | g3
147 | (I0
148 | I0
149 | I0
150 | I0
151 | I1
152 | I1
153 | tp37
154 | sg4
155 | I10
156 | sg5
157 | I4
158 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0'
159 | p38
160 | (dp39
161 | g3
162 | (I1
163 | I1
164 | I1
165 | tp40
166 | sg4
167 | I7
168 | sg5
169 | I3
170 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00`'
171 | p41
172 | (dp42
173 | g3
174 | (I0
175 | I0
176 | I0
177 | I1
178 | I1
179 | I1
180 | tp43
181 | sg4
182 | I11
183 | sg5
184 | I4
185 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@'
186 | p44
187 | (dp45
188 | g3
189 | (I0
190 | I0
191 | I1
192 | tp46
193 | sg4
194 | I5
195 | sg5
196 | I3
197 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`'
198 | p47
199 | (dp48
200 | g3
201 | (I0
202 | I1
203 | I1
204 | I1
205 | I1
206 | I0
207 | tp49
208 | sg4
209 | I16
210 | sg5
211 | I4
212 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0'
213 | p50
214 | (dp51
215 | g3
216 | (I0
217 | I1
218 | I1
219 | tp52
220 | sg4
221 | I6
222 | sg5
223 | I3
224 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`'
225 | p53
226 | (dp54
227 | g3
228 | (I0
229 | I0
230 | I1
231 | I1
232 | I0
233 | I1
234 | tp55
235 | sg4
236 | I14
237 | sg5
238 | I4
239 | ssS'\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0'
240 | p56
241 | (dp57
242 | g3
243 | (I1
244 | I1
245 | I1
246 | I1
247 | I1
248 | I1
249 | tp58
250 | sg4
251 | I18
252 | sg5
253 | I4
254 | ss.
--------------------------------------------------------------------------------
/DGK/datasets/README:
--------------------------------------------------------------------------------
1 | Please refer to the README under the main folder
2 | for more information on how to obtain the datasets.
3 |
--------------------------------------------------------------------------------
/DGK/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # Fill in the name of the dataset
4 | DS=
5 |
6 | # Run multiple trials
7 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
8 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
9 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
10 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
11 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
12 |
--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n2.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | I0
3 | ccollections
4 | Counter
5 | p1
6 | ((dp2
7 | I1
8 | I1
9 | stp3
10 | Rp4
11 | sI1
12 | g1
13 | ((dp5
14 | I2
15 | I2
16 | sI3
17 | I2
18 | stp6
19 | Rp7
20 | sI2
21 | g1
22 | ((dp8
23 | tp9
24 | Rp10
25 | sI3
26 | g1
27 | ((dp11
28 | tp12
29 | Rp13
30 | s.
--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n3.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | I0
3 | ccollections
4 | Counter
5 | p1
6 | ((dp2
7 | I1
8 | I1
9 | stp3
10 | Rp4
11 | sI1
12 | g1
13 | ((dp5
14 | I2
15 | I2
16 | sI3
17 | I2
18 | stp6
19 | Rp7
20 | sI2
21 | g1
22 | ((dp8
23 | I4
24 | I3
25 | sI5
26 | I2
27 | sI6
28 | I1
29 | stp9
30 | Rp10
31 | sI3
32 | g1
33 | ((dp11
34 | I5
35 | I1
36 | sI6
37 | I2
38 | sI7
39 | I3
40 | stp12
41 | Rp13
42 | sI4
43 | g1
44 | ((dp14
45 | tp15
46 | Rp16
47 | sI5
48 | g1
49 | ((dp17
50 | tp18
51 | Rp19
52 | sI6
53 | g1
54 | ((dp20
55 | tp21
56 | Rp22
57 | sI7
58 | g1
59 | ((dp23
60 | tp24
61 | Rp25
62 | s.
--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n4.p:
--------------------------------------------------------------------------------
1 | (dp0
2 | I0
3 | ccollections
4 | Counter
5 | p1
6 | ((dp2
7 | I1
8 | I1
9 | stp3
10 | Rp4
11 | sI1
12 | g1
13 | ((dp5
14 | I2
15 | I2
16 | sI3
17 | I2
18 | stp6
19 | Rp7
20 | sI2
21 | g1
22 | ((dp8
23 | I4
24 | I3
25 | sI5
26 | I2
27 | sI6
28 | I1
29 | stp9
30 | Rp10
31 | sI3
32 | g1
33 | ((dp11
34 | I5
35 | I1
36 | sI6
37 | I2
38 | sI7
39 | I3
40 | stp12
41 | Rp13
42 | sI4
43 | g1
44 | ((dp14
45 | I8
46 | I4
47 | sI9
48 | I2
49 | sI10
50 | I1
51 | sI12
52 | I1
53 | stp15
54 | Rp16
55 | sI5
56 | g1
57 | ((dp17
58 | I9
59 | I2
60 | sI10
61 | I2
62 | sI11
63 | I3
64 | sI13
65 | I4
66 | sI14
67 | I2
68 | sI15
69 | I1
70 | stp18
71 | Rp19
72 | sI6
73 | g1
74 | ((dp20
75 | I16
76 | I4
77 | sI17
78 | I2
79 | sI10
80 | I1
81 | sI12
82 | I3
83 | sI14
84 | I2
85 | sI15
86 | I2
87 | stp21
88 | Rp22
89 | sI7
90 | g1
91 | ((dp23
92 | I17
93 | I2
94 | sI18
95 | I4
96 | sI11
97 | I1
98 | sI15
99 | I1
100 | stp24
101 | Rp25
102 | sI8
103 | g1
104 | ((dp26
105 | tp27
106 | Rp28
107 | sI9
108 | g1
109 | ((dp29
110 | tp30
111 | Rp31
112 | sI10
113 | g1
114 | ((dp32
115 | tp33
116 | Rp34
117 | sI11
118 | g1
119 | ((dp35
120 | tp36
121 | Rp37
122 | sI12
123 | g1
124 | ((dp38
125 | tp39
126 | Rp40
127 | sI13
128 | g1
129 | ((dp41
130 | tp42
131 | Rp43
132 | sI14
133 | g1
134 | ((dp44
135 | tp45
136 | Rp46
137 | sI15
138 | g1
139 | ((dp47
140 | tp48
141 | Rp49
142 | sI16
143 | g1
144 | ((dp50
145 | tp51
146 | Rp52
147 | sI17
148 | g1
149 | ((dp53
150 | tp54
151 | Rp55
152 | sI18
153 | g1
154 | ((dp56
155 | tp57
156 | Rp58
157 | s.
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGinstance.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #include"FLGinstance.hpp"
28 |
29 |
30 | void FLGinstance::precompute(const double gamma){
31 | if(Sinv.nrows>0) return;
32 |
33 | //cout<<"L="< eigenp=M.symmetricEigensolver();
61 | Cmatrix& eigs=*eigenp.first;
62 | Cvector& lambda=*eigenp.second;
63 | int n=M.nrows;
64 | //cout<<"eigs="<.
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _FLGinstance
28 | #define _FLGinstance
29 |
30 | #include "Cmatrix.hpp"
31 | #include "Cvector.hpp"
32 |
33 |
34 | class FLGinstance{
35 | public:
36 |
37 | //FLGinstance(Cmatrix&& _L, vector&& _labels):
38 | // L(move(_L)), labels(_labels){};
39 |
40 | FLGinstance(){}
41 |
42 | FLGinstance(Cmatrix&& _L, Cmatrix&& _U):
43 | L(move(_L)), U(move(_U)){};
44 |
45 |
46 | public:
47 |
48 | void precompute(const double gamma);
49 |
50 | bool operator==(const FLGinstance& x) const{
51 | if(L!=x.L) return false;
52 | if(U!=x.U) return false;
53 | //if(labels.size()!=x.labels.size()) return false;
54 | //if(labels!=x.labels) return false;
55 | return true;
56 | }
57 |
58 | string str(){
59 | ostringstream oss; oss< labels;
69 | Cmatrix U;
70 |
71 | Cmatrix Sinv; // actually Sinv/2
72 | //double detS;
73 | double log_detS;
74 |
75 | // Cvector linearization;
76 |
77 | };
78 |
79 |
80 |
81 |
82 | namespace std{
83 | template<>
84 | class hash{
85 | public:
86 | size_t operator()(const FLGinstance& x) const{
87 | size_t h=hash()(x.L)^hash()(x.U);
88 | //for(auto& p: G.labels) h=(h<<1)^hash()(p);
89 | return h;
90 | }
91 | };
92 | };
93 |
94 |
95 | #endif
96 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGkernel.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #include"FLGkernel.hpp"
28 |
29 |
30 | double FLGkernel::operator()(const FLGinstance& x1, const FLGinstance& x2) const
31 | {
32 |
33 | if(x1.Sinv.nrows==0) const_cast(x1).precompute(gamma);
34 | if(x2.Sinv.nrows==0) const_cast(x2).precompute(gamma);
35 |
36 | Cvector lambda=(x1.Sinv+x2.Sinv).eigenvalues();
37 | //double detS=1; for(int i=0; i.
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _FLGkernel
28 | #define _FLGkernel
29 |
30 | #include "FLGinstance.hpp"
31 | #include "MLGgraph.hpp"
32 | #include "Kernel.hpp"
33 |
34 |
35 |
36 | class FLGkernel: public Kernel{
37 | public:
38 |
39 | FLGkernel(const double _gamma): gamma(_gamma){}
40 |
41 | double operator()(const FLGinstance& x1, const FLGinstance& x2) const;
42 |
43 | double operator()(const MLGgraph& x1, const MLGgraph& x2) const{return (*this)(x1.flg,x2.flg);};
44 |
45 | public:
46 |
47 | double gamma=0.1;
48 |
49 | };
50 |
51 | #endif
52 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/Kernel.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _Kernel
28 | #define _Kernel
29 |
30 | #include "pMMFbase.hpp"
31 |
32 |
33 | template
34 | class Kernel{
35 | public:
36 |
37 | virtual double operator()(const TYPE& x1, const TYPE& x2) const =0;
38 |
39 | };
40 |
41 |
42 |
43 |
44 | #endif
45 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLG_base.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #include "pMMFbase.hpp"
28 |
29 |
30 | typedef Cvector VertexFeatures;
31 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLGdataset.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _MLGdataset
28 | #define _MLGdataset
29 |
30 | #include "MLGgraph.hpp"
31 | #include
32 |
33 | class MLGdataset{
34 | public:
35 |
36 | MLGdataset(){}
37 | MLGdataset(const std::string filename, double eta, double gamma, bool grow): gamma(gamma), grow(grow), eta(eta){
38 | loadGraphs(filename);
39 | }
40 | ~MLGdataset() {for(auto p:graphs) delete p;}
41 |
42 | public:
43 |
44 | void condense(const int nlevels, const int leaf_radius=2);
45 | void computeGram(const int levels, const int radius);
46 |
47 | public:
48 |
49 | void loadGraphs(std::string filename);
50 | void loadDiscreteFeatures(std::string filename, int numFeatures);
51 | void loadFeatures(std::string filename);
52 | void saveGram(std::string filename);
53 | void fillGram(double *npmatrix, int rows, int cols);
54 |
55 | public:
56 |
57 | vector graphs;
58 | double gamma; // regularizer constant
59 | double eta; // regularizer constant
60 | int levels;
61 | int radius;
62 | bool grow; // 1 to grow by the leaf radius, 0 to double
63 | Cmatrix gram;
64 | };
65 |
66 | #endif
67 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLGgraph.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _MLGgraph
28 | #define _MLGgraph
29 |
30 | #include
31 |
32 | #include "Cmatrix.hpp"
33 | #include "Graph.hpp"
34 | #include "FLGinstance.hpp"
35 | #include "Linearizer.hpp"
36 |
37 |
38 | class MLGgraph{
39 | public:
40 |
41 | MLGgraph(const MLGgraph& x): n(x.n), adj(x.adj.copy()), labels(x.labels.size()){
42 | for(int i=0; i(n); for(auto& p:labels) p=Cvector::Filled(1,0); init();
49 | }
50 | MLGgraph(Graph&& G){
51 | n=G.n; adj=move(G.adj); labels=vector(n); init();
52 | }
53 | MLGgraph& operator=(Graph&& G){
54 | n=G.n; adj=move(G.adj); labels=vector(n); init(); return *this;
55 | }
56 |
57 | public:
58 |
59 | void grow_subgraphs(const int radius);
60 | void double_subgraphs();
61 | void push_to_linearizer(Linearizer& linearizer, double eta);
62 | void pull_features();
63 | void compute_flg();
64 |
65 | void computeDegreeFeatures(const int maxdeg);
66 |
67 | string str() const;
68 |
69 | private:
70 |
71 | void init();
72 | Cmatrix subLaplacian(const vector& vset, double eta) const;
73 | Cmatrix FloydWarshall(const Cmatrix& A) const;
74 |
75 | public:
76 |
77 | int n;
78 | Cmatrix adj;
79 | vector labels;
80 |
81 | vector< vector > neighbors;
82 | vector< unordered_set > subgraphs;
83 | vector*> subinstances;
84 |
85 | FLGinstance flg;
86 |
87 | Cmatrix dist;
88 |
89 |
90 | };
91 |
92 |
93 | namespace std{
94 | template<>
95 | class hash< Hwrapper >{
96 | public:
97 | size_t operator()(const Hwrapper& x) const{
98 | return hash()(*x.ptr);}
99 | };
100 | };
101 |
102 |
103 | #endif
104 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/Makefile:
--------------------------------------------------------------------------------
1 | ROOTDIR= ../
2 | include $(ROOTDIR)/Makefile.base
3 |
4 | CFLAGS= -std=c++11 -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(FILETYPESDIR) -I $(MATRIXDIR) -fPIC
5 | LIBS= -lstdc++ -lm -lpthread
6 |
7 | ALLOBJECTS= $(UTILITYDIR)/*.o $(MATRIXDIR)/*.o *.o #$(BLOCKEDDIR)/*.o
8 |
9 | FLGinstance.o: FLGinstance.hpp FLGinstance.cpp
10 | $(CC) -c FLGinstance.cpp $(CFLAGS)
11 |
12 | FLGkernel.o: FLGkernel.hpp FLGkernel.cpp FLGinstance.hpp
13 | $(CC) -c FLGkernel.cpp $(CFLAGS)
14 |
15 | MLGgraph.o: MLGgraph.hpp MLGgraph.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp
16 | $(CC) -c MLGgraph.cpp $(CFLAGS)
17 |
18 | MLGdataset.o: MLGdataset.hpp MLGdataset.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp MLGgraph.hpp
19 | $(CC) -c MLGdataset.cpp $(CFLAGS)
20 |
21 | runMLG: runMLG.cpp params.hpp MLGgraph.o MLGdataset.o FLGinstance.o
22 | $(CC) -o runMLG runMLG.cpp $(CFLAGS) $(ALLOBJECTS) $(LIBS)
23 |
24 | all: FLGinstance.o FLGkernel.o MLGgraph.o MLGdataset.o runMLG
25 |
26 | clean:
27 | @rm -f runMLG *.o
28 |
29 | anew: clean all
30 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/RandomSelection.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #ifndef _RandomSelection
28 | #define _RandomSelection
29 |
30 | #include
31 |
32 |
33 | #include "Activemap.hpp"
34 | #include "pMMFbase.hpp"
35 |
36 | extern default_random_engine randomNumberGenerator;
37 |
38 |
39 | class RandomSelection: public vector{
40 | public:
41 |
42 | RandomSelection(const int k, const int n): vector(k){
43 | assert(k<=n);
44 |
45 | if(k<0.3*n){
46 | uniform_int_distribution distri(0,n-1);
47 | for(int i=0; i distri(0,n-i);
58 | int j=amap(distri(randomNumberGenerator));
59 | amap.remove(j);
60 | (*this)[i]=j;
61 | }
62 |
63 | }
64 |
65 | public:
66 |
67 | unordered_set selected;
68 | //Activemap activemap;
69 |
70 | };
71 |
72 | #endif
73 |
74 |
75 | //do{x=distri(randomNumberGenerator);
76 | //}while(selected.find(x)!=selected.end());
77 |
--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/params.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
8 |
9 |
10 | This program is free software; you can redistribute it and/or
11 | modify it under the terms of the GNU General Public License
12 | as published by the Free Software Foundation; either version 2
13 | of the License, or (at your option) any later version.
14 |
15 | This program is distributed in the hope that it will be useful,
16 | but WITHOUT ANY WARRANTY; without even the implied warranty of
17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 | GNU General Public License for more details.
19 |
20 | You should have received a copy of the GNU General Public License
21 | along with this program; if not, see .
22 |
23 | ----------------------------------------------------------------------------- */
24 |
25 |
26 |
27 | #include
28 | #include
29 | using namespace std;
30 |
31 | class Params{
32 | /**
33 | A simple object that holds all the parameters necessary for the MLGkernel including
34 | file paths for where the dataset and dataset features are stored and file path to
35 | save the resulting gram matrix.
36 | **/
37 | public:
38 | // Constructer that inits the model variables.
39 | // set the data, feature and save paths separately.
40 | Params(double e, double g, int r, int l, int t, bool b):
41 | eta(e), gamma(g), radius(r), levels(l), num_threads(t), grow_or_double(b) {}
42 |
43 | public:
44 | void set_paths(string data, string features){
45 | data_path = data;
46 | features_path = features;
47 | }
48 |
49 | void set_save_path(string save){
50 | save_path = save;
51 | }
52 |
53 | void show() {
54 | cout << "Current parameter settings:" << endl;
55 | cout << " -eta : " << eta < max_nodes:
82 | continue
83 |
84 | # add features and labels
85 | G.graph['label'] = graph_labels[i-1]
86 | for u in G.nodes():
87 | if len(node_labels) > 0:
88 | node_label_one_hot = [0] * num_unique_node_labels
89 | node_label = node_labels[u-1]
90 | node_label_one_hot[node_label] = 1
91 | G.node[u]['label'] = node_label_one_hot
92 | if len(node_attrs) > 0:
93 | G.node[u]['feat'] = node_attrs[u-1]
94 | if len(node_attrs) > 0:
95 | G.graph['feat_dim'] = node_attrs[0].shape[0]
96 |
97 | # relabeling
98 | mapping={}
99 | it=0
100 | if float(nx.__version__)<2.0:
101 | for n in G.nodes():
102 | mapping[n]=it
103 | it+=1
104 | else:
105 | for n in G.nodes:
106 | mapping[n]=it
107 | it+=1
108 |
109 | # indexed from 0
110 | graphs.append(nx.relabel_nodes(G, mapping))
111 |
112 |
113 | np.random.shuffle(graphs)
114 | #idx = np.random.RandomState(seed=2).permutation(len(graphs))
115 | #graphs = [graphs[i] for i in idx]
116 |
117 | return graphs
118 |
119 |
--------------------------------------------------------------------------------
/MLGkernel/evaluate_embedding.py:
--------------------------------------------------------------------------------
1 | from data_utils import read_graphfile
2 | import numpy as np
3 | import pandas as pd
4 | import os
5 | import sys
6 |
7 | from sklearn.model_selection import cross_val_score
8 | from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
9 | from sklearn.svm import SVC, LinearSVC
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.ensemble import RandomForestClassifier
12 | from sklearn import preprocessing
13 | from sklearn.metrics import accuracy_score
14 | from sklearn.manifold import TSNE
15 |
16 | def evaluate_embedding(embeddings, labels):
17 |
18 | labels = preprocessing.LabelEncoder().fit_transform(labels)
19 | x, y = np.array(embeddings), np.array(labels)
20 | print(x.shape, y.shape)
21 |
22 | kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
23 | accuracies = []
24 | for train_index, test_index in kf.split(x, y):
25 |
26 | x_train, x_test = x[train_index], x[test_index]
27 | y_train, y_test = y[train_index], y[test_index]
28 | search=True
29 | if search:
30 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
31 | classifier = GridSearchCV(SVC(), params, cv=5, scoring='accuracy', verbose=0)
32 | else:
33 | classifier = SVC(C=10)
34 | classifier.fit(x_train, y_train)
35 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
36 |
37 | svm_accuracies = np.mean(accuracies)
38 |
39 | accuracies = []
40 | for train_index, test_index in kf.split(x, y):
41 |
42 | x_train, x_test = x[train_index], x[test_index]
43 | y_train, y_test = y[train_index], y[test_index]
44 | search=True
45 | if search:
46 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
47 | classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
48 | else:
49 | classifier = LinearSVC(C=10)
50 | classifier.fit(x_train, y_train)
51 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
52 | print('LinearSvc', np.mean(accuracies))
53 | print('svc', svm_accuracies)
54 |
55 | # def get_mutag():
56 | # emb = []
57 | # with open('data/results/output.txt', 'r') as f:
58 | # for line in f:
59 | # emb.append(list(map(float, [x for x in line.strip().split()])))
60 |
61 | # ret = []
62 | # for i in range(188):
63 | # with open('./data/mutag/mutag_{}.graph'.format(i+1), 'r') as f:
64 | # x = f.readlines()
65 | # ret.append(int(x[-1].strip()))
66 | # return emb, ret
67 |
68 |
69 | if __name__ == '__main__':
70 | # x, y = get_mutag()
71 | emb = []
72 | with open('data/results/{}_output.txt'.format(sys.argv[1]), 'r') as f:
73 | for line in f:
74 | emb.append(list(map(float, [x for x in line.strip().split()])))
75 |
76 | with open('../data/{}_label.txt'.format(sys.argv[1]), 'r') as f:
77 | y = f.readlines()
78 | y = [int(x.strip()) for x in y]
79 |
80 | evaluate_embedding(emb, y)
81 |
--------------------------------------------------------------------------------
/MLGkernel/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | # Run the MLG kernel on the MUTAG dataset with parameters:
3 | # radius = 1
4 | # levels = 2
5 | # eta = 0.1
6 | # gamma = 0.01
7 | # num threads = 32
8 | # grow = 1 # if you want the subgraphs to double in size at each level, set this equal to 0
9 |
10 | # Replace MUTAG with the dataset name of your choice(PTC/PROTEINS/NCI1/NCI109).
11 | BASE=`pwd`
12 | dset=$1
13 | data=$BASE/../data/$dset.txt
14 | feats=$BASE/..//data/$dset\_nodelabels.txt
15 | save=$BASE//data/results/output.txt
16 | mkdir -p $BASE/data/results/
17 |
18 | ~/ENV/bin/python3 preprocess.py $dset
19 |
20 | for r in 1 2 3 4
21 | do
22 | for l in 1 2 3 4
23 | do
24 | for g in 0.01 0.1 1
25 | do
26 | for e in 0.01 0.1 1
27 | do
28 |
29 | cd MLGkernel
30 | ./runMLG -d $data -f $feats -s $save -r $r -l $l -e $e -g $g -t 32 -m 1
31 | cd ../
32 | ~/ENV/bin/python3 evaluate_embedding.py $dset >> $dset.log
33 | done
34 | done
35 | done
36 | done
37 |
--------------------------------------------------------------------------------
/MLGkernel/include/pMMFglobal.inc:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include "pMMFbase.hpp"
19 | #include "Vectorv.hpp"
20 | #include "Vectorl.hpp"
21 | #include "Vectorh.hpp"
22 | #include "ThreadManager.hpp"
23 |
24 | bool multithreading=true;
25 | ThreadManager threadManager(4);
26 | mutex cout_mutex;
27 | mutex CoutLock::mx;
28 |
29 | std::default_random_engine randomNumberGenerator;
30 |
31 | FIELD Vectorv::dummyZero=0;
32 | FIELD Vectorl::dummyZero=0;
33 | FIELD Vectorh::dummyZero=0;
34 |
35 | char strbuffer[255];
36 |
37 | Log mlog;
38 |
39 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/Activemap.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include "Activemap.hpp"
19 |
20 | extern default_random_engine randomNumberGenerator;
21 |
22 |
23 | int Activemap::random(){
24 | uniform_int_distribution distri(0,nactive-1);
25 | return forward[distri(randomNumberGenerator)];
26 | }
27 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/Activemap.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #ifndef _Activemap
19 | #define _Activemap
20 |
21 | #include "Remap.hpp"
22 |
23 | class Activemap: public Remap{
24 | public:
25 |
26 | Activemap(const int n=1): Remap(n), nactive(n){}
27 |
28 | public:
29 |
30 | int random();
31 |
32 | void remove(const int i){
33 | if(backward[i]!=nactive-1) swap(backward[i],nactive-1);
34 | nactive--;
35 | }
36 |
37 | bool isactive(const int i) const {return(backward[i] lock(mx);
40 | assert(x.nrows==nrows); assert(x.ncols==ncols);
41 | for(int i=0; i distr;
34 | for(int i=0; i distr;
41 | for(int i=0; i list): DenseVector(list.size()){
46 | array=new FIELD[n]; int i=0; for(FIELD v:list) array[i++]=v;
47 | }
48 |
49 |
50 | Cvector::Cvector(const int _n, const FIELD* _array): DenseVector(_n){
51 | array=new FIELD[n]; for(int i=0; i
87 | Eigen::VectorXd Cvector::convert() const{
88 | Eigen::VectorXd v(n);
89 | for(int i=0; i
22 | #include
23 | //#include
24 |
25 | // The purpose of these adaptors is to avoid having to include Eigen/Dense or Eigen/Core in any of the
26 | // header files of the native vector/matrix classes, which would slow down compilation.
27 |
28 | typedef Eigen::SparseMatrix EigenSparseMatrix;
29 |
30 | class EigenVectorXdAdaptor: public Eigen::VectorXd{
31 | public:
32 | EigenVectorXdAdaptor(const Eigen::VectorXd& M): Eigen::VectorXd(M){}
33 | };
34 |
35 | class EigenMatrixXdAdaptor: public Eigen::MatrixXd{
36 | public:
37 | EigenMatrixXdAdaptor(const Eigen::MatrixXd& M): Eigen::MatrixXd(M){}
38 | };
39 |
40 |
41 | #endif
42 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/GramMatrix.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include"GramMatrix.hpp"
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/GramMatrix.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #ifndef _GramMatrix
19 | #define _GramMatrix
20 |
21 | #include
22 | #include "MatrixX.hpp"
23 |
24 | template
25 | class GramMatrix: public MATRIX{
26 | public:
27 |
28 | using class MATRIX::MATRIX;
29 |
30 | public:
31 |
32 | template
33 | GramMatrix(MatrixX& A)
34 |
35 |
36 | public:
37 |
38 |
39 | };
40 |
41 |
42 | template
43 | template
44 | GramMatrix::GramMatrix(MatrixX& A): MATRIX(MATRIX::Zero(A.nrows,A.nrows)){
45 | assert(A.nrows==A.ncols); // assumption: A is symmetric
46 | for(int i=0; inFilled>0.2*nrows){
48 | for(int j=0; j<=i; j++){
49 | (*this)(i,j)=A.column[i]->dot(*A.column[j]);
50 | (*this)(j,i)=(*this)(i,j);
51 | }
52 | }else{
53 | unordered_map neighbors;
54 | A.column[i]->for_each([&A,&neighbors](int j, FIELD dummy){
55 | A.column[j]->for_each([&neighbors](int k, FIELD dummy){neighbors.insert(k);});
56 | });
57 | for(auto j:neighbors){
58 | (*this)(i,j)=A.column[i]->dot(*A.column[j]);
59 | (*this)(j,i)=(*this)(i,j);
60 | }
61 | }
62 | }
63 |
64 | }
65 |
66 |
67 |
68 | #endif
69 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/LapackInterface.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #ifndef _LapackInterface
19 | #define _LapackInterface
20 |
21 | #include
22 | #include
23 |
24 | // The purpose of these adaptors is to avoid having to include lapacke/include in any of the
25 | // header files of the native vector/matrix classes, which would slow down compilation.
26 |
27 | #endif
28 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/Makefile:
--------------------------------------------------------------------------------
1 | ROOTDIR=..
2 | include $(ROOTDIR)/Makefile.base
3 |
4 | CFLAGS+= -fPIC#-std=c++11
5 | INCLUDE= -I$(INCLUDEDIR) -I$(UTILITYDIR) -I$(FILETYPESDIR) #-I$(MATRIXDIR)
6 | OBJFILES= $(UTILITYDIR)/*.o $(FILETYPESDIR)/*.o ./*.o
7 | LIBS= -lstdc++ -lm
8 | LMACROS=
9 |
10 | ifneq (,$(findstring withEigen,$(MACROS)))
11 | INCLUDE+= -I$(EIGENDIR)
12 | LMACROS+=-D_withEigen
13 | else ifneq (,$(findstring withLapack,$(MACROS)))
14 | INCLUDE+= -I$(LAPACKDIR)/LAPACKE/include/ -I$(CBLASDIR)/include
15 | OBJFILES+= $(LAPACKDIR)/liblapacke.a $(LAPACKDIR)/liblapack.a $(BLASDIR)/blas_LINUX.a $(CBLASDIR)/lib/cblas_LINUX.a
16 | LIBS+= -L$(FORTRANDIR)/lib/ -lgfortran
17 | LMACROS+=-D_withLapack
18 | endif
19 |
20 |
21 | matrices.o: *.hpp *.cpp
22 | $(CC) -c matrices.cpp $(CFLAGS) $(INCLUDE) $(LMACROS)
23 |
24 |
25 | objects: matrices.o
26 |
27 | tests:
28 |
29 | all: objects
30 |
31 | clean:
32 | @rm -f *.o
33 |
34 | anew: clean all
35 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/Matrix.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include "Matrix.hpp"
19 | #include "Cmatrix.hpp"
20 |
21 |
22 | Cmatrix Matrix::dot(const Matrix& x) const{
23 | return Cmatrix(0,0);
24 | }
25 |
26 |
27 | string Matrix::str(const Dense dummy) const{
28 | ostringstream stream;
29 | stream.precision(3);
30 | stream.setf(ios_base::fixed, ios_base::floatfield);
31 | for(int i=0; iread(i,j)<<" ";}
33 | stream<<" ]\n";}
34 | return stream.str();
35 | }
36 |
37 |
38 | string Matrix::str(const Sparse dummy) const{
39 | ostringstream stream;
40 | for(int i=0; i
23 |
24 | class Matrix{
25 | public:
26 |
27 | virtual ~Matrix(){}
28 |
29 | public: // constructors
30 |
31 | Matrix(const int _nrows, const int _ncols): nrows(_nrows), ncols(_ncols) {}
32 |
33 |
34 | public: // member access
35 |
36 | virtual FIELD& operator()(const int i, const int j)=0;
37 | virtual FIELD operator()(const int i, const int j) const=0;
38 | virtual FIELD read(const int i, const int j) const=0;
39 | virtual bool isFilled(const int i, const int j) const=0;
40 | virtual int nFilled() const=0;
41 | virtual bool isSparse() const=0;
42 |
43 | //virtual void (foreach)(std::function lambda)=0;
44 | //virtual void (foreach)(std::function lambda) const=0;
45 | virtual void foreach_in_column(const int j, std::function lambda)=0;
46 | virtual void foreach_in_column(const int j, std::function lambda) const=0;
47 |
48 | public: // scalar valued operations
49 |
50 | virtual int nnz() const=0;
51 |
52 | public:
53 |
54 | virtual Cmatrix dot(const Matrix& x) const; // {};
55 |
56 | public:
57 |
58 | virtual void saveTo(MatrixOF& file) const=0;
59 |
60 | virtual string str(const Dense dummy) const;
61 | virtual string str(const Sparse dummy) const;
62 | virtual string str() const{return str(Dense());}
63 |
64 |
65 |
66 | public:
67 |
68 | int nrows;
69 | int ncols;
70 |
71 | };
72 |
73 |
74 | ostream& operator<<(ostream& stream, const Matrix& x);
75 |
76 |
77 |
78 | class SparseMatrix: public Matrix{
79 | public:
80 | using Matrix::Matrix;
81 | bool isSparse() const {return true;}
82 | };
83 |
84 |
85 |
86 | class DenseMatrix: public Matrix{
87 | public:
88 | using Matrix::Matrix;
89 | bool isFilled(const int i, const int j) const {return true;}
90 | int nFilled() const {return nrows*ncols;}
91 | bool isSparse() const {return false;}
92 | };
93 |
94 |
95 |
96 |
97 | // virtual Matrix* newof()=0;
98 |
99 | //virtual Cmatrix Cmatrix() const=0;
100 | // virtual SparseMatrixX MatrixXv() const=0;
101 | // virtual SparseMatrixX MatrixXl() const=0;
102 | // virtual SparseMatrixX MatrixXh() const=0;
103 |
104 |
105 |
106 | #endif
107 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/MatrixX.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include "MatrixX.hpp"
19 |
20 |
21 | /*
22 | template<>
23 | void MatrixX::serialize(Bofstream& ofs) const{
24 | ofs.tag("MatrixXv",0);
25 | ofs.write(nrows);
26 | ofs.write(ncols);
27 | for(int j=0; jserialize(ofs);
29 | };
30 |
31 | template<>
32 | void MatrixX::serialize(Bofstream& ofs) const{
33 | ofs.tag("MatrixXl",0);
34 | ofs.write(nrows);
35 | ofs.write(ncols);
36 | for(int j=0; jserialize(ofs);
38 | };
39 |
40 | template<>
41 | void MatrixX::serialize(Bofstream& ofs) const{
42 | ofs.tag("MatrixXh",0);
43 | ofs.write(nrows);
44 | ofs.write(ncols);
45 | for(int j=0; jserialize(ofs);
47 | };
48 | */
49 |
50 | /*
51 | template<>
52 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
53 | ifs.check("MatrixXv",0);
54 | ifs.read(nrows);
55 | ifs.read(ncols);
56 | for(int j=0; j
61 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
62 | ifs.check("MatrixXl",0);
63 | ifs.read(nrows);
64 | ifs.read(ncols);
65 | for(int j=0; j
70 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
71 | ifs.check("MatrixXh",0);
72 | ifs.read(nrows);
73 | ifs.read(ncols);
74 | for(int j=0; j
20 |
21 | extern default_random_engine randomNumberGenerator;
22 |
23 |
24 | Remap::Remap(const Remap& x): n(x.n){
25 | forward=new int[n]; for(int i=0; i distr(i+1,n-1);
58 | int j=distr(randomNumberGenerator);
59 | R.swap(i,j);}
60 | return R;
61 | }
62 |
63 |
64 | /* DEPRECATED
65 | Remap::Remap(const int _n, const Random random): Remap(_n){
66 | for(int i=0; i distr(i+1,n-1);
68 | int j=distr(randomNumberGenerator);
69 | swap(i,j);
70 | }
71 | }
72 | */
73 |
74 |
75 | string Remap::str() const{
76 | ostringstream stream;
77 | for(int i=0; i "<
20 |
21 |
22 | string Vector::str(const Dense dummy) const{
23 | ostringstream stream;
24 | for(int i=0; i
23 |
24 | class Vector{ //: public Serializable{
25 | public:
26 |
27 | Vector(const int _n): n(_n){}
28 |
29 | public:
30 |
31 | virtual FIELD& operator()(const int n)=0;
32 | virtual FIELD operator()(const int n) const=0;
33 | virtual FIELD read(const int i) const {return (*this)(i);}
34 |
35 | //virtual void (foreach)(std::function lambda)=0;
36 | //virtual void (foreach)(std::function lambda) const=0;
37 |
38 | virtual bool isFilled (const int i)const =0;
39 | virtual int nFilled() const=0;
40 |
41 | public:
42 |
43 | virtual int nnz() const=0;
44 |
45 | virtual int argmax() const=0;
46 | virtual int argmax_abs() const=0;
47 |
48 | virtual FIELD norm2() const=0;
49 | // FIELD diff2(const VECTOR& x)=0;
50 |
51 | public:
52 |
53 | //virtual void serialize(Bofstream& ofs) const=0;
54 | //virtual void serialize(Rstream& rstream) const=0;
55 |
56 | virtual string str(const Dense dummy) const;
57 | virtual string str(const Sparse dummy) const;
58 | virtual string str() const{return str(Dense());};
59 |
60 | public:
61 |
62 | int n;
63 |
64 | };
65 |
66 |
67 | ostream& operator<<(ostream& stream, const Vector& x);
68 |
69 |
70 |
71 |
72 | #endif
73 |
--------------------------------------------------------------------------------
/MLGkernel/matrices/Vectorh.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 |
18 | #include "Vectorv.hpp"
19 | #include "Vectorl.hpp"
20 | #include "Vectorh.hpp"
21 | #include "Cvector.hpp"
22 | #include
23 |
24 | extern default_random_engine randomNumberGenerator;
25 |
26 |
27 | /*
28 | Vectorh::Vectorh(const int _n, const class Random& dummy): SparseVector(_n){
29 | uniform_real_distribution distr(0,1);
30 | for(int i=0; i distr(0,1);
38 | for(int i=0; i<_n; i++)
39 | if(distr(randomNumberGenerator)<=p) v[i]=distr(randomNumberGenerator);
40 | return v;
41 | }
42 |
43 |
44 |
45 | Vectorh::Vectorh(const Cvector& x): SparseVector(x.n){
46 | for(int i=0; i
23 |
24 | extern default_random_engine randomNumberGenerator;
25 |
26 |
27 | /*
28 | Vectorl::Vectorl(const int _n, const class Random& dummy): SparseVector(_n){
29 | uniform_real_distribution distr(0,1);
30 | for(int i=0; i distr(0,1);
38 | for(int i=0; i<_n; i++)
39 | if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator)));
40 | return v;
41 | }
42 |
43 |
44 | Vectorl::Vectorl(const Cvector& x): SparseVector(x.n){
45 | for(int i=0; i(x).sort(); for(auto& p:x) push_back(p);}
51 |
52 |
53 | Vectorl::Vectorl(const Vectorh& x): SparseVector(x.n){
54 | for(auto& p:x) (*this)(p.first)=p.second;}
55 |
56 |
57 | Vectorl::Vectorl(const Vectorl& x, const class Remap& remap, const bool inverse): SparseVector(x.n){
58 | if(!inverse) for(auto& p:x) (*this)(remap.forward[p.first])=p.second;
59 | else for(auto& p:x) (*this)(remap.backward[p.first])=p.second;
60 | }
61 |
62 |
63 |
64 | // ---- I/O ------------------------------------------------------------------------------------------------------
65 |
66 |
67 | string Vectorl::classname(){return "Vectorl";}
68 |
69 |
70 | Vectorl::Vectorl(Bifstream& ifs): SparseVector(0){
71 | ifs.check("Vectorl",0);
72 | ifs.read(n);
73 | ifs.read_list(*this);
74 | }
75 |
76 |
77 | void Vectorl::serialize(Bofstream& ofs) const{
78 | ofs.tag("Vectorl",0);
79 | ofs.write(n);
80 | ofs.write_list(*this);
81 | }
82 |
83 |
84 | void Vectorl::serialize(Rstream& rstream) const{
85 | rstream<<"Vectorl{"<
23 |
24 | extern default_random_engine randomNumberGenerator;
25 |
26 |
27 | /*
28 | Vectorv::Vectorv(const int _n, const class Random& dummy): SparseVector(_n), sorted(0){
29 | uniform_real_distribution distr(0,1);
30 | for(int i=0; i distr(0,1);
39 | for(int i=0; i<_n; i++)
40 | if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator)));
41 | return v;
42 | }
43 |
44 |
45 | // ---- Conversions -----------------------------------------------------------------------------------------------
46 |
47 |
48 | Vectorv::Vectorv(const Cvector& x): SparseVector(x.n), sorted(true){
49 | for(int i=0; i
21 | #include "pMMFbase.hpp"
22 |
23 | class LogStream{
24 | public:
25 |
26 | //virtual void write(const string& s)=0;
27 | virtual LogStream& operator<<(const char* s)=0;
28 | virtual LogStream& operator<<(const string& s)=0;
29 | virtual LogStream& operator<<(const int& x)=0;
30 | virtual LogStream& operator<<(const double& x)=0;
31 |
32 | };
33 |
34 |
35 | class Log{
36 | public:
37 |
38 | Log(){startClock();}
39 |
40 | public:
41 |
42 | Log& operator<<(const string& s);
43 | Log& operator<<(const char* s);
44 |
45 | /*
46 | Log& skipline(const int n=1, const int v=0){
47 | if(verbosity=n) return *this;
49 | for(int i=0; i=n) return *this;
57 | if(stream==nullptr) for(int i=0; i > time;
78 |
79 | chrono::time_point t;
80 |
81 | int verbosity=0;
82 | int skippedlines=0;
83 |
84 | LogStream* stream=nullptr;
85 | };
86 |
87 |
88 |
89 | #endif
90 |
--------------------------------------------------------------------------------
/MLGkernel/utility/Makefile:
--------------------------------------------------------------------------------
1 | ROOTDIR= ..
2 | include $(ROOTDIR)/Makefile.base
3 |
4 | CFLAGS+= -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(MATRIXDIR) -fPIC # -std=c++11
5 |
6 |
7 | Log.o: Log.hpp Log.cpp
8 | $(CC) -c Log.cpp $(CFLAGS)
9 |
10 | Rstream.o: Rstream.hpp Rstream.cpp
11 | $(CC) -c Rstream.cpp $(CFLAGS)
12 |
13 | Bofstream.o: Bofstream.hpp Bofstream.cpp
14 | $(CC) -c Bofstream.cpp $(CFLAGS)
15 |
16 | Bifstream.o: Bifstream.hpp Bifstream.cpp
17 | $(CC) -c Bifstream.cpp $(CFLAGS)
18 |
19 | Serializable.o: Serializable.hpp Serializable.cpp
20 | $(CC) -c Serializable.cpp $(CFLAGS)
21 |
22 | TopkList.o: TopkList.hpp TopkList.cpp
23 | $(CC) -c TopkList.cpp $(CFLAGS)
24 |
25 | ThreadBank.o: ThreadBank.hpp ThreadBank.cpp
26 | $(CC) -c ThreadBank.cpp $(CFLAGS)
27 |
28 | ThreadManager.o: ThreadManager.hpp ThreadManager.cpp ThreadBank.hpp
29 | $(CC) -c ThreadManager.cpp $(CFLAGS)
30 |
31 | #Graph.o: Graph.cpp Graph.hpp
32 | # $(CC) -c Graph.cpp $(CFLAGS) $(INCLUDE)
33 |
34 | filetypes.o: filetypes/*.hpp filetypes/*.cpp
35 | $(CC) -c filetypes/filetypes.cpp $(CFLAGS) $(INCLUDE) -I filetypes/ $(MACROS)
36 |
37 |
38 | objects: Log.o Rstream.o Bofstream.o Bifstream.o Serializable.o TopkList.o \
39 | ThreadBank.o ThreadManager.o filetypes.o #Graph.o
40 |
41 | tests:
42 |
43 | all: objects tests
44 |
45 | clean:
46 | @rm -f *.o
47 |
48 | anew: clean all
49 |
--------------------------------------------------------------------------------
/MLGkernel/utility/Rstream.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #include "Rstream.hpp"
18 |
19 |
--------------------------------------------------------------------------------
/MLGkernel/utility/Rstream.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #ifndef _Rstream
18 | #define _Rstream
19 |
20 | #include "pMMFbase.hpp"
21 |
22 |
23 | class Rstream{
24 | public:
25 |
26 | Rstream(ostream& _out, const int _depth=16):out(_out),indent(0),depth(_depth),bol(true){}
27 |
28 | ~Rstream(){out<
33 | Rstream& operator<<(const T& x){
34 | //if(typeid(x)==typeid(Rstream::end)) {out<
52 | Rstream& write(const T& x){
53 | if(depth<0){out<
62 | const Rstream& var(const char* name, const T& x){
63 | if(bol) {for(int i=0; i > CoutType;
72 | //typedef CoutType& (*StandardEndLine)(CoutType&);
73 | //Rstream& operator<<(StandardEndLine manip){return *this;}
74 |
75 |
76 | public:
77 |
78 | int indent;
79 | int depth;
80 | mutable bool bol;
81 |
82 | ostream& out;
83 |
84 |
85 | };
86 |
87 | #endif
88 |
--------------------------------------------------------------------------------
/MLGkernel/utility/Serializable.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #include "Serializable.hpp"
18 | #include "Bofstream.hpp"
19 | #include "Bifstream.hpp"
20 |
21 | void Serializable::save(const char* filename) const {Bofstream ofs(filename); serialize(ofs);}
22 |
--------------------------------------------------------------------------------
/MLGkernel/utility/Serializable.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #ifndef _Serializable
18 | #define _Serializable
19 |
20 | #include "Bifstream.hpp"
21 | #include "Bofstream.hpp"
22 |
23 |
24 | class Serializable{
25 | public:
26 |
27 | virtual void serialize(Bofstream& ofs) const=0;
28 |
29 | public:
30 |
31 | void save(const char* filename) const;
32 |
33 | };
34 |
35 |
36 |
37 | #endif
38 |
--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadBank.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #include "ThreadBank.hpp"
18 |
19 |
20 |
21 |
--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadBank.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #ifndef _ThreadBank
18 | #define _ThreadBank
19 |
20 | #include
21 | #include "pMMFbase.hpp"
22 | #include "ThreadManager.hpp"
23 |
24 | using namespace std;
25 |
26 |
27 | //extern mutex cout_mutex;
28 | extern ThreadManager threadManager;
29 |
30 |
31 | class ThreadBank{
32 | public:
33 |
34 | ThreadBank()=delete;
35 |
36 | ThreadBank(const int _maxthreads=1000, const int _maxprivileged=1):
37 | maxthreads(_maxthreads), maxprivileged(_maxprivileged), nthreads(0), nprivileged(0) {gate.lock();};
38 |
39 | ~ThreadBank(){for(auto& th:threads) th.join();}
40 |
41 |
42 | public:
43 |
44 | template
45 | void add(FUNCTION lambda, const OBJ x){
46 | lock_guard lock(mx); // unnecessary if called from a single thread
47 | threadManager.enqueue(this);
48 | gate.lock(); // gate can only be unlocked by threadManager
49 | nthreads++;
50 | threads.push_back(thread([this,lambda](OBJ _x){lambda(_x); nthreads--; threadManager.release(this);},x));
51 | #ifdef _THREADBANKVERBOSE
52 | printinfo();
53 | #endif
54 | }
55 |
56 |
57 | template
58 | void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2){
59 | lock_guard lock(mx);
60 | threadManager.enqueue(this);
61 | gate.lock();
62 | nthreads++;
63 | threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2){
64 | lambda(_x1,_x2); nthreads--; threadManager.release(this);},x1,x2));
65 | #ifdef _THREADBANKVERBOSE
66 | printinfo();
67 | #endif
68 | }
69 |
70 |
71 | template
72 | void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2, const OBJ3 x3){
73 | lock_guard lock(mx);
74 | threadManager.enqueue(this);
75 | gate.lock();
76 | nthreads++;
77 | threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2, OBJ3 _x3){
78 | lambda(_x1,_x2,_x3); nthreads--; threadManager.release(this);},x1,x2,x3));
79 | #ifdef _THREADBANKVERBOSE
80 | printinfo();
81 | #endif
82 | }
83 |
84 |
85 | bool is_ready(){return nthreads nthreads;
100 | int nprivileged=0; // only to be touched by threadManager
101 | int maxthreads=4;
102 | int maxprivileged=1;
103 |
104 | vector threads;
105 |
106 | };
107 |
108 |
109 |
110 |
111 |
112 | #endif
113 |
--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadManager.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #include "ThreadManager.hpp"
18 | #include "ThreadBank.hpp"
19 |
20 |
21 | void ThreadManager::enqueue(ThreadBank* bank){
22 | lock_guard lock(mx);
23 | if(is_runnable(bank)) launch(bank);
24 | else queue.push_back(bank);
25 | }
26 |
27 |
28 | void ThreadManager::release(ThreadBank* bank){
29 | lock_guard lock(mx);
30 | if(bank->nprivileged>0) bank->nprivileged--;
31 | else nthreads--;
32 | for(auto it=queue.begin(); it!=queue.end(); it++)
33 | if(is_runnable(*it)){
34 | launch(*it);
35 | it=queue.erase(it);
36 | }
37 | // auto it=find_if(queue.begin(),queue.end(),[this](ThreadBank* bank){return is_runnable(bank);});
38 | // if(it==queue.end()) return;
39 | // ThreadBank* bank=*it;
40 | // queue.erase(it);
41 | // launch(bank);
42 | }
43 |
44 |
45 | bool ThreadManager::is_runnable(ThreadBank* bank){
46 | return bank->is_ready() && (bank->nprivilegedmaxprivileged || nthreadsnprivilegedmaxprivileged) bank->nprivileged++;
52 | else nthreads++;
53 | bank->gate.unlock();
54 | }
55 |
56 |
57 | /*
58 | void addBank(const ThreadBank* bank){
59 | lock_guard lock(mx);
60 | banks.push_front(bank);
61 | }
62 |
63 | void removeBank(const ThreadBank* bank){
64 | lock_guard lock(mx);
65 | banks.remove(bank);
66 | }
67 | */
68 |
--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadManager.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #ifndef _ThreadManager
18 | #define _ThreadManager
19 |
20 | #include
21 | #include
22 | #include
23 |
24 | class ThreadBank;
25 |
26 | using namespace std;
27 |
28 |
29 | class ThreadManager{
30 | public:
31 |
32 | ThreadManager(const int _maxthreads):maxthreads(_maxthreads),nthreads(0){}
33 | ~ThreadManager(){}
34 |
35 | public:
36 |
37 | void enqueue(ThreadBank* bank);
38 | void release(ThreadBank* bank);
39 |
40 | int get_nthreads(){lock_guard lock(mx); return nthreads;}
41 |
42 | private:
43 |
44 | bool is_runnable(ThreadBank* bank);
45 | void launch(ThreadBank* bank);
46 |
47 | public:
48 |
49 | int maxthreads;
50 |
51 | private:
52 |
53 | mutex mx;
54 | int nthreads;
55 | list queue;
56 |
57 | };
58 |
59 |
60 | #endif
61 |
--------------------------------------------------------------------------------
/MLGkernel/utility/TopkList.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #include "TopkList.hpp"
18 |
19 |
--------------------------------------------------------------------------------
/MLGkernel/utility/TopkList.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 |
17 | #ifndef _TopkList
18 | #define _TopkList
19 |
20 | #include
21 | #include "pMMFbase.hpp"
22 | //#include "DenseVector.hpp"
23 |
24 | struct TopkListPair{
25 | TopkListPair(const INDEX& _first, const FIELD& _second):first(_first),second(_second){};
26 | INDEX first;
27 | FIELD second;
28 | };
29 |
30 |
31 | class TopkList: public list{
32 | public:
33 |
34 | TopkList(const int _k): k(_k), lowestv(numeric_limits::lowest()){}
35 |
36 | // TopkList(const DenseVector& v, const int _k): k(_k), lowestv(-10000){
37 | // for(int i=0; ilowestv) insert(i,v(i));}
38 |
39 | public:
40 |
41 | void insert(int index, FIELD value){
42 | auto it=begin();
43 | while(it!=end() && it->second>=value){it++;}
44 | list::insert(it,TopkListPair(index,value));
45 | if(size()>k) pop_back();
46 | if(size()>=k) lowestv=back().second;
47 | }
48 |
49 | void consider(int index, FIELD value){
50 | if(value>lowestv || size()<second>=value){it++;}
53 | list::insert(it,TopkListPair(index,value));
54 | if(size()>k) pop_back();
55 | if(size()>=k) lowestv=back().second;
56 | }
57 | }
58 |
59 | IndexSet indices() const{
60 | IndexSet I(size()); int i=0;
61 | for(auto& p:*this) I[i++]=p.first;
62 | return I;
63 | }
64 |
65 |
66 | public:
67 |
68 | int k;
69 | FIELD lowestv;
70 | int lowestp;
71 |
72 | };
73 |
74 |
75 |
76 | #endif
77 |
78 |
79 | /*
80 | // vector version
81 | void insert(int index, FIELD value){
82 | if(size()
21 |
22 | class MatrixIF{
23 | public:
24 |
25 | ~MatrixIF(){ifs.close();}
26 |
27 | public:
28 |
29 | virtual void rewind(){}
30 |
31 | virtual MatrixIF& operator>>(FIELD& v){
32 | cout<<"Error: operator>>(FIELD& ) not supported in sparse matrix input files."<>(IndexValueTriple& t){
36 | cout<<"Error: operator>>(IndexValueTriple& ) not supported in dense matrix input files."<0);
43 | // cout<<"Line length="<>b; ncols++;}
47 | nrows=0; while(ifs.good()) {for(int i=0; i>b; nrows++;}
48 | // cout<>(IndexValueTriple& dest){
59 | dest.i=i; dest.j=j;
60 | if(++j>=ncols) { j=0; i++; }
61 | if(ifs.good() && i<=nrows) ifs>>dest.value; else {dest.i=-1; return *this;}
62 | return *this;
63 | }
64 |
65 | MatrixIF& operator>>(FIELD& dest){
66 | if(++j>=ncols) { j=0; i++; }
67 | if(ifs.good() && i<=nrows) ifs>>dest;
68 | return *this;
69 | }
70 |
71 |
72 | public:
73 |
74 | int i;
75 | int j;
76 | bool eof;
77 |
78 | };
79 |
80 |
81 |
82 | class MatrixIF_ASCII::Sparse: public MatrixIF_ASCII{
83 | public:
84 |
85 | Sparse(const string filename){
86 | sparse=1;
87 | ifs.open(filename);
88 | char buffer[255];
89 | ifs.get(buffer,255);
90 | ifs.close();
91 |
92 | ifs.open(filename);
93 | int nextracted=0;
94 | while(ifs.good() && ifs.tellg()>b; if(!ifs.fail()) nextracted++;}
95 | if(nextracted==2){ifs.close(); ifs.open(filename); ifs>>nrows>>ncols; return;}
96 | if(nextracted==3){
97 | ifs.close(); ifs.open(filename);
98 | nrows=0; ncols=0;
99 | int a; int b; float f;
100 | while(ifs.good()){
101 | ifs>>a>>b>>f;
102 | if(a>nrows-1) nrows=a+1;
103 | if(b>ncols-1) ncols=b+1;
104 | }
105 | ifs.close(); ifs.open(filename);
106 | return;
107 | }
108 | cout<<"Error: could not parse first line"<>(IndexValueTriple& dest){
115 | if(!ifs.good()){dest.i=-1; return *this;}
116 | ifs>>dest.i>>dest.j>>dest.value; return *this;
117 | }
118 |
119 | public:
120 |
121 | };
122 |
123 |
124 | #endif
125 |
--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Boeing.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_Boeing.hpp"
17 |
--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Matlab.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_Matlab.hpp"
17 |
18 |
--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Matlab.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 | #ifndef _MatrixIF_Matlab
17 | #define _MatrixIF_Matlab
18 |
19 | #include "MatrixIF.hpp"
20 | #include
21 |
22 |
23 | class MatrixIF_Matlab: public MatrixIF{
24 | public:
25 |
26 | class Dense;
27 | class Sparse;
28 |
29 | };
30 |
31 |
32 |
33 | class MatrixIF_Matlab::Dense: public MatrixIF_Matlab{
34 | public:
35 |
36 | Dense(const string filename){
37 | sparse=0;
38 | matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY);
39 | if(matfile==NULL){cout<<"Error: file cannot be opened"<dims[0];
43 | ncols=matvar->dims[1];
44 | next=reinterpret_cast(matvar->data);
45 |
46 | //need to swap to 'transpose' the array since matIO reads it in column major order
47 | /*int n = 5; //sqrt(nrows*ncols);
48 | cout<<"size"<>(FIELD& dest){
70 | dest = *next;
71 | next++;
72 | return *this;
73 | }
74 |
75 | public:
76 |
77 | mat_t* matfile;
78 | matvar_t* matvar;
79 | double *next;
80 |
81 | };
82 |
83 |
84 |
85 | class MatrixIF_Matlab::Sparse: public MatrixIF_Matlab{
86 | public:
87 |
88 | Sparse(const string filename){
89 | sparse=1;
90 | matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY);
91 | if(matfile==NULL){cout<<"Error: file "<class_type == MAT_C_SPARSE){
96 | sparse = (mat_sparse_t*)matvar->data;
97 | }
98 | nrows=matvar->dims[0];
99 | ncols=matvar->dims[1];
100 | next=reinterpret_cast(sparse->data);
101 | //Mat_VarPrint(matvar,1);
102 | //cout<<"printed"<jc;
104 | Ir = sparse->ir;
105 | njc= sparse->njc;
106 | ndata=sparse->ndata;
107 | cout<<"read"<>(IndexValueTriple& dest){
114 | int i = indIr; int j = indJc; int c = 0;
115 | for (; i < njc-1; i++ ) {
116 | c= 0;
117 | for (; j0){
124 | break;
125 | }
126 | indIr = i;
127 | }
128 | dest.value = *next++;
129 | if(!dest.value || i>=njc-1){dest.i=-1; return *this;}
130 | return *this;
131 | }
132 |
133 |
134 | public:
135 |
136 | mat_t* matfile;
137 | matvar_t* matvar;
138 | double *next;
139 |
140 | int indIr= 0; //sparse->ir;
141 | int indJc =0;
142 | int* Ir;
143 | int* Jc; //sparse->jc;
144 | int njc;
145 | int ndata;
146 |
147 | };
148 |
149 |
150 | #endif
151 |
--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF.hpp:
--------------------------------------------------------------------------------
1 | /*
2 | -----------------------------------------------------------------------------
3 |
4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph
5 | Kernel for computing the gram matrix of a collection of graphs.
6 |
7 |
8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan
9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |
11 |
12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF)
13 | which is licensed under the GNU Public License, version 3.
14 | This code therefore is also licensed under the terms of the GNU Public License, version 3.
15 | ----------------------------------------------------------------------------- */
16 | #ifndef _MatrixOF
17 | #define _MatrixOF
18 |
19 | #include "pMMFbase.hpp"
20 | #include
21 |
22 | class MatrixOF{
23 | public:
24 |
25 | //MatrixOF(const char* filename, const int _nrows, const int _ncols):
26 | // nrows(_nrows), ncols(_ncols), ofs(filename){}
27 |
28 | ~MatrixOF(){ofs.close();}
29 |
30 | public:
31 |
32 | virtual MatrixOF& operator<<(const FIELD& v){
33 | cout<<"Error: operator<<(FIELD& ) not supported in sparse matrix output files."<
21 |
22 | class MatrixOF_Boeing: public MatrixOF{
23 | public:
24 |
25 | MatrixOF_Boeing(const string filename, const int _nrows, const int _ncols){
26 | nrows=_nrows; ncols=_ncols; sparse=1;
27 |
28 | // Write row indices, column pointers, values in separate files and concatenate
29 | rowfs.open("row.temp");
30 | colfs.open("col.temp");
31 | valfs.open("val.temp");
32 |
33 | nnz=0;
34 | nnz_in_col=0;
35 | rowchars=colchars=valchars=0;
36 | colptr_val=1;
37 |
38 | colfs<t.j) return *this; // write only the upper triangular part
80 | if(t.j==current_col+1) { // no more nonzeros in previous column
81 | if (colchars+1+std::to_string(colptr_val+nnz_in_col).length() > 80) {
82 | colfs< 80) {
96 | rowfs< 80) {
103 | valfs<= num_sample else to_neigh for to_neigh in to_neighs]
43 | else:
44 | samp_neighs = to_neighs
45 |
46 | if self.gcn:
47 | samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
48 | unique_nodes_list = list(set.union(*samp_neighs))
49 | unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)}
50 | mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
51 | column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]
52 | row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))]
53 | mask[row_indices, column_indices] = 1
54 | if self.cuda:
55 | mask = mask.cuda()
56 | num_neigh = mask.sum(1, keepdim=True)
57 | mask = mask.div(num_neigh)
58 | if self.cuda:
59 | embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
60 | else:
61 | embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
62 | to_feats = mask.mm(embed_matrix)
63 | return to_feats
64 |
--------------------------------------------------------------------------------
/diffpool/cross_val.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import torch
4 |
5 | import pickle
6 | import random
7 |
8 | from graph_sampler import GraphSampler
9 |
10 | def prepare_val_data(graphs, args, val_idx, max_nodes=0):
11 |
12 | random.shuffle(graphs)
13 | val_size = len(graphs) // 10
14 | train_graphs = graphs[:val_idx * val_size]
15 | if val_idx < 9:
16 | train_graphs = train_graphs + graphs[(val_idx+1) * val_size :]
17 | val_graphs = graphs[val_idx*val_size: (val_idx+1)*val_size]
18 | print('Num training graphs: ', len(train_graphs),
19 | '; Num validation graphs: ', len(val_graphs))
20 |
21 | print('Number of graphs: ', len(graphs))
22 | print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
23 | print('Max, avg, std of graph size: ',
24 | max([G.number_of_nodes() for G in graphs]), ', '
25 | "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
26 | "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))
27 |
28 | # minibatch
29 | dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes,
30 | features=args.feature_type)
31 | train_dataset_loader = torch.utils.data.DataLoader(
32 | dataset_sampler,
33 | batch_size=args.batch_size,
34 | shuffle=True,
35 | num_workers=args.num_workers)
36 |
37 | dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes,
38 | features=args.feature_type)
39 | val_dataset_loader = torch.utils.data.DataLoader(
40 | dataset_sampler,
41 | batch_size=args.batch_size,
42 | shuffle=False,
43 | num_workers=args.num_workers)
44 |
45 | return train_dataset_loader, val_dataset_loader, \
46 | dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
47 |
48 |
--------------------------------------------------------------------------------
/diffpool/gen/feat.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import networkx as nx
3 | import numpy as np
4 | import random
5 |
6 | class FeatureGen(metaclass=abc.ABCMeta):
7 | @abc.abstractmethod
8 | def gen_node_features(self, G):
9 | pass
10 |
11 | class ConstFeatureGen(FeatureGen):
12 | def __init__(self, val):
13 | self.val = val
14 |
15 | def gen_node_features(self, G):
16 | feat_dict = {i:{'feat': self.val} for i in G.nodes()}
17 | nx.set_node_attributes(G, feat_dict)
18 |
19 | class GaussianFeatureGen(FeatureGen):
20 | def __init__(self, mu, sigma):
21 | self.mu = mu
22 | self.sigma = sigma
23 |
24 | def gen_node_features(self, G):
25 | feat = np.random.multivariate_normal(mu, sigma, G.number_of_nodes())
26 | feat_dict = {i:{'feat': feat[i]} for i in range(feat.shape[0])}
27 | nx.set_node_attributes(G, feat_dict)
28 |
29 |
--------------------------------------------------------------------------------
/diffpool/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | # grid search for ENZYMES
3 | for method in 'base-set2set' 'base'
4 | do
5 | for gc in 4 8 16 2
6 | do
7 | python -m train --datadir=../data --bmname=ENZYMES --cuda=0 --max-nodes 1000 --epochs=100 --num-classes=3 --output-dim 512 --lr 0.001 --num-gc-layers $gc --method $method
8 | done
9 | done
10 |
11 | # DD
12 | #python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000
13 |
--------------------------------------------------------------------------------
/diffpool/graph_embedding.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/diffpool/graphsage.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 |
5 | import numpy as np
6 |
7 | class SupervisedGraphSage(nn.Module):
8 | ''' GraphSage embeddings
9 | '''
10 |
11 | def __init__(self, num_classes, enc):
12 | super(SupervisedGraphSage, self).__init__()
13 | self.enc = enc
14 | self.xent = nn.CrossEntropyLoss()
15 |
16 | self.weight = nn.Parameter(torch.FloatTensor(enc.embed_dim, num_classes))
17 | init.xavier_uniform(self.weight)
18 |
19 | def forward(self, nodes):
20 | embeds = self.enc(nodes)
21 | scores = embeds.mm(self.weight)
22 | return scores
23 |
24 | def loss(self, nodes, labels):
25 | scores = self.forward(nodes)
26 | return self.xent(nn.softmax(scores), labels.squeeze())
27 |
28 |
--------------------------------------------------------------------------------
/diffpool/load_data.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | import scipy as sc
4 | import os
5 | import re
6 |
7 | def read_graphfile(datadir, dataname, max_nodes=None):
8 | ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
9 | graph index starts with 1 in file
10 |
11 | Returns:
12 | List of networkx objects with graph and node labels
13 | '''
14 | prefix = os.path.join(datadir, dataname, dataname)
15 | filename_graph_indic = prefix + '_graph_indicator.txt'
16 | # index of graphs that a given node belongs to
17 | graph_indic={}
18 | with open(filename_graph_indic) as f:
19 | i=1
20 | for line in f:
21 | line=line.strip("\n")
22 | graph_indic[i]=int(line)
23 | i+=1
24 |
25 | filename_nodes=prefix + '_node_labels.txt'
26 | node_labels=[]
27 | try:
28 | with open(filename_nodes) as f:
29 | for line in f:
30 | line=line.strip("\n")
31 | node_labels+=[int(line) - 1]
32 | num_unique_node_labels = max(node_labels) + 1
33 | except IOError:
34 | print('No node labels')
35 |
36 | filename_node_attrs=prefix + '_node_attributes.txt'
37 | node_attrs=[]
38 | try:
39 | with open(filename_node_attrs) as f:
40 | for line in f:
41 | line = line.strip("\s\n")
42 | attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
43 | node_attrs.append(np.array(attrs))
44 | except IOError:
45 | print('No node attributes')
46 |
47 | label_has_zero = False
48 | filename_graphs=prefix + '_graph_labels.txt'
49 | graph_labels=[]
50 | with open(filename_graphs) as f:
51 | for line in f:
52 | line=line.strip("\n")
53 | val = int(line)
54 | if val == 0:
55 | label_has_zero = True
56 | graph_labels.append(val - 1)
57 | graph_labels = np.array(graph_labels)
58 | if label_has_zero:
59 | graph_labels += 1
60 |
61 | filename_adj=prefix + '_A.txt'
62 | adj_list={i:[] for i in range(1,len(graph_labels)+1)}
63 | index_graph={i:[] for i in range(1,len(graph_labels)+1)}
64 | num_edges = 0
65 | with open(filename_adj) as f:
66 | for line in f:
67 | line=line.strip("\n").split(",")
68 | e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
69 | adj_list[graph_indic[e0]].append((e0,e1))
70 | index_graph[graph_indic[e0]]+=[e0,e1]
71 | num_edges += 1
72 | for k in index_graph.keys():
73 | index_graph[k]=[u-1 for u in set(index_graph[k])]
74 |
75 | graphs=[]
76 | for i in range(1,1+len(adj_list)):
77 | # indexed from 1 here
78 | G=nx.from_edgelist(adj_list[i])
79 | if max_nodes is not None and G.number_of_nodes() > max_nodes:
80 | continue
81 |
82 | # add features and labels
83 | G.graph['label'] = graph_labels[i-1]
84 | for u in G.nodes():
85 | if len(node_labels) > 0:
86 | node_label_one_hot = [0] * num_unique_node_labels
87 | node_label = node_labels[u-1]
88 | node_label_one_hot[node_label] = 1
89 | G.node[u]['label'] = node_label_one_hot
90 | if len(node_attrs) > 0:
91 | G.node[u]['feat'] = node_attrs[u-1]
92 | if len(node_attrs) > 0:
93 | G.graph['feat_dim'] = node_attrs[0].shape[0]
94 |
95 | # relabeling
96 | mapping={}
97 | it=0
98 | if float(nx.__version__)<2.0:
99 | for n in G.nodes():
100 | mapping[n]=it
101 | it+=1
102 | else:
103 | for n in G.nodes:
104 | mapping[n]=it
105 | it+=1
106 |
107 | # indexed from 0
108 | graphs.append(nx.relabel_nodes(G, mapping))
109 | return graphs
110 |
111 |
--------------------------------------------------------------------------------
/diffpool/partition.py:
--------------------------------------------------------------------------------
1 | import networkx
2 | import numpy as np
3 |
4 | def partition(embeddings):
5 | ''' Compute a partition of embeddings, where each partition is pooled together.
6 | Args:
7 | embeddings: N-by-D matrix, where N is the number of node embeddings, and D
8 | is the embedding dimension.
9 | '''
10 | dist = np.dot(embeddings)
11 |
12 | def kruskal(adj):
13 | # initialize MST
14 | MST = set()
15 | edges = set()
16 | num_nodes = adj.shape[0]
17 | # collect all edges from graph G
18 | for j in range(num_nodes):
19 | for k in range(num_nodes):
20 | if G.graph[j][k] != 0 and (k, j) not in edges:
21 | edges.add((j, k))
22 | # sort all edges in graph G by weights from smallest to largest
23 | sorted_edges = sorted(edges, key=lambda e:G.graph[e[0]][e[1]])
24 | uf = UF(G.vertices)
25 | for e in sorted_edges:
26 | u, v = e
27 | # if u, v already connected, abort this edge
28 | if uf.connected(u, v):
29 | continue
30 | # if not, connect them and add this edge to the MST
31 | uf.union(u, v)
32 | MST.add(e)
33 | return MST
34 |
35 |
--------------------------------------------------------------------------------
/diffpool/set2set.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import init
4 | import torch.nn.functional as F
5 |
6 | import numpy as np
7 |
8 | class Set2Set(nn.Module):
9 | def __init__(self, input_dim, hidden_dim, act_fn=nn.ReLU, num_layers=1):
10 | '''
11 | Args:
12 | input_dim: input dim of Set2Set.
13 | hidden_dim: the dim of set representation, which is also the INPUT dimension of
14 | the LSTM in Set2Set.
15 | This is a concatenation of weighted sum of embedding (dim input_dim), and the LSTM
16 | hidden/output (dim: self.lstm_output_dim).
17 | '''
18 | super(Set2Set, self).__init__()
19 | self.input_dim = input_dim
20 | self.hidden_dim = hidden_dim
21 | self.num_layers = num_layers
22 | if hidden_dim <= input_dim:
23 | print('ERROR: Set2Set output_dim should be larger than input_dim')
24 | # the hidden is a concatenation of weighted sum of embedding and LSTM output
25 | self.lstm_output_dim = hidden_dim - input_dim
26 | self.lstm = nn.LSTM(hidden_dim, input_dim, num_layers=num_layers, batch_first=True)
27 |
28 | # convert back to dim of input_dim
29 | self.pred = nn.Linear(hidden_dim, input_dim)
30 | self.act = act_fn()
31 |
32 | def forward(self, embedding):
33 | '''
34 | Args:
35 | embedding: [batch_size x n x d] embedding matrix
36 | Returns:
37 | aggregated: [batch_size x d] vector representation of all embeddings
38 | '''
39 | batch_size = embedding.size()[0]
40 | n = embedding.size()[1]
41 |
42 | hidden = (torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda(),
43 | torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda())
44 |
45 | q_star = torch.zeros(batch_size, 1, self.hidden_dim).cuda()
46 | for i in range(n):
47 | # q: batch_size x 1 x input_dim
48 | q, hidden = self.lstm(q_star, hidden)
49 | # e: batch_size x n x 1
50 | e = embedding @ torch.transpose(q, 1, 2)
51 | a = nn.Softmax(dim=1)(e)
52 | r = torch.sum(a * embedding, dim=1, keepdim=True)
53 | q_star = torch.cat((q, r), dim=2)
54 | q_star = torch.squeeze(q_star, dim=1)
55 | out = self.act(self.pred(q_star))
56 |
57 | return out
58 |
--------------------------------------------------------------------------------
/diffpool/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from glob import glob
3 | import numpy as np
4 | import sys
5 | """
6 | files = glob(f'{sys.argv[1]}/*/*')
7 |
8 | for f in files:
9 | print(f)
10 | accs100 = []
11 | accs200 = []
12 | accs500 = []
13 | accs1000 = []
14 | cnt = 0
15 | for event in tf.train.summary_iterator(f):
16 |
17 | for value in event.summary.value:
18 |
19 | if value.tag == 'acc/val_acc':
20 | val_acc = value.simple_value
21 | else:
22 | continue
23 |
24 |
25 | cnt += 1
26 | if (cnt-100) % 1000 == 0:
27 | accs100.append(val_acc)
28 |
29 | print(cnt)
30 |
31 | print(np.mean(accs100), np.std(accs100))
32 |
33 | # print(value.tag)
34 | # if value.HasField('simple_value'):
35 | # print(value.simple_value)
36 | """
37 |
38 | if __name__ == '__main__':
39 | # DS = sys.argv[1]
40 |
41 | df = pd.read_csv('log_')
42 | # df = df[df.DS == DS]
43 | gcs = df.gc.unique()
44 | types = df.method.unique()
45 | for gc in gcs:
46 | for tpe in types:
47 | tmpdf = df[(df.gc == gc) & (df.method == tpe)]
48 | for i in range(10, 110, 10):
49 | print(gc, tpe, i, tmpdf[str(i)+'-mean'].mean(), tmpdf[str(i) + '-mean'].std())
50 |
--------------------------------------------------------------------------------
/graph2vec_tf/README.md:
--------------------------------------------------------------------------------
1 | # graph2vec
2 |
3 | This repository contains the "tensorflow" implementation of our paper "graph2vec: Learning distributed representations of graphs".
4 | The paper could be found at: https://arxiv.org/pdf/1707.05005.pdf
5 |
6 |
7 | #### Dependencies
8 | This code is developed in python 2.7. It is ran and tested on Ubuntu 16.04.
9 | It uses the following python packages:
10 | 1. tensorflow (version == 1.4.0)
11 | 2. networkx (version <= 2.0)
12 | 4. scikit-learn (+scipy, +numpy)
13 |
14 | ##### The procedure for setting up graph2vec is as follows:
15 | 1. git clone the repository (command: git clone https://github.com/MLDroid/graph2vec_tf.git )
16 | 2. untar the data.tar.gz tarball
17 |
18 | ##### The procedure for obtaining rooted graph vectors using graph2vec and performing graph classification is as follows:
19 | 1. move to the folder "src" (command: cd src) (also make sure that kdd 2015 paper's (Deep Graph Kernels) datasets are available in '../data/kdd_datasets/dir_graphs/')
20 | 2. run main.py --corpus --class_labels_file_name file to:
21 | *Generate the weisfeiler-lehman kernel's rooted subgraphs from all the graphs
22 | *Train skipgram model to learn graph embeddings. The same will be dumped in ../embeddings/ folder
23 | *Perform graph classification using the graph embeddings generated in the above step
24 | 3. example:
25 | *python main.py --corpus ../data/kdd_datasets/mutag --class_labels_file_name ../data/kdd_datasets/mutag.Labels
26 | *python main.py --corpus ../data/kdd_datasets/proteins --class_labels_file_name ../data/kdd_datasets/proteins.Labels --batch_size 16 --embedding_size 128 --num_negsample 5
27 |
28 |
29 | #### Other command line args:
30 | optional arguments:
31 | -h, --help show this help message and exit
32 | -c CORPUS, --corpus CORPUS
33 | Path to directory containing graph files to be used
34 | for graph classification or clustering
35 | -l CLASS_LABELS_FILE_NAME, --class_labels_file_name CLASS_LABELS_FILE_NAME
36 | File name containg the name of the sample and the
37 | class labels
38 | -o OUTPUT_DIR, --output_dir OUTPUT_DIR
39 | Path to directory for storing output embeddings
40 | -b BATCH_SIZE, --batch_size BATCH_SIZE
41 | Number of samples per training batch
42 | -e EPOCHS, --epochs EPOCHS
43 | Number of iterations the whole dataset of graphs is
44 | traversed
45 | -d EMBEDDING_SIZE, --embedding_size EMBEDDING_SIZE
46 | Intended graph embedding size to be learnt
47 | -neg NUM_NEGSAMPLE, --num_negsample NUM_NEGSAMPLE
48 | Number of negative samples to be used for training
49 | -lr LEARNING_RATE, --learning_rate LEARNING_RATE
50 | Learning rate to optimize the loss function
51 |
52 | --wlk_h WLK_H Height of WL kernel (i.e., degree of rooted subgraph
53 | features to be considered for representation learning)
54 | -lf LABEL_FILED_NAME, --label_filed_name LABEL_FILED_NAME
55 | Label field to be used for coloring nodes in graphs
56 | using WL kenrel
57 |
58 | ## Contact ##
59 | In case of queries, please email: annamala002@e.ntu.edu.sg OR XZHANG048@e.ntu.edu.sg
60 |
61 | #### Reference
62 |
63 | Please consider citing the follow paper when you use this code.
64 | @article{narayanangraph2vec,
65 | title={graph2vec: Learning distributed representations of graphs},
66 | author={Narayanan, Annamalai and Chandramohan, Mahinthan and Venkatesan, Rajasekar and Chen, Lihui and Liu, Yang}
67 | }
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/graph2vec_tf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunfanyunn/graph-classification/0d9b60102b6456cdc0607b43c8852d860b2f53c0/graph2vec_tf/__init__.py
--------------------------------------------------------------------------------
/graph2vec_tf/classify.py:
--------------------------------------------------------------------------------
1 | import time,json
2 | from utils import get_files
3 | from sklearn.feature_extraction.text import CountVectorizer
4 | from sklearn.preprocessing import Normalizer
5 | from sklearn.model_selection import train_test_split
6 | from sklearn.metrics import accuracy_score, classification_report
7 | from sklearn.svm import SVC,LinearSVC
8 | from random import randint
9 | import numpy as np
10 | import logging
11 | from sklearn.model_selection import GridSearchCV
12 |
13 | from utils import get_class_labels
14 |
15 | import os
16 | logger = logging.getLogger()
17 | logger.setLevel("INFO")
18 |
19 | def subgraph2vec_tokenizer (s):
20 | '''
21 | Tokenize the string from subgraph2vec sentence (i.e. ...). Just target is to be used
22 | and context strings to be ignored.
23 | :param s: context of graph2vec file.
24 | :return: List of targets from graph2vec file.
25 | '''
26 | return [line.split(' ')[0] for line in s.split('\n')]
27 |
28 |
29 | def linear_svm_classify (X_train, X_test, Y_train, Y_test):
30 | '''
31 | Classifier with graph embeddings
32 | :param X_train: training feature vectors
33 | :param X_test: testing feature vectors
34 | :param Y_train: training set labels
35 | :param Y_test: test set labels
36 | :return: None
37 | '''
38 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
39 | classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy',verbose=0)
40 | classifier.fit(X_train,Y_train)
41 | Y_pred = classifier.predict(X_test)
42 | acc = accuracy_score(Y_test, Y_pred)
43 | return acc
44 |
45 |
46 | def perform_classification (corpus_dir, extn, embeddings, class_labels_fname):
47 | '''
48 | Perform classification from
49 | :param corpus_dir: folder containing subgraph2vec sentence files
50 | :param extn: extension of subgraph2vec sentence files
51 | :param embedding_fname: file containing subgraph vectors in word2vec format (refer Mikolov et al (2013) code)
52 | :param class_labels_fname: files containing labels of each graph
53 | :return: None
54 | '''
55 |
56 | wlk_files = get_files(corpus_dir, extn)
57 |
58 | Y = np.array(get_class_labels(wlk_files, class_labels_fname))
59 | # logging.info('Y (label) matrix shape: {}'.format(Y.shape))
60 |
61 | seed = randint(0, 1000)
62 |
63 | # with open(embedding_fname,'r') as fh:
64 | # graph_embedding_dict = json.load(fh)
65 |
66 | wlk_files = [os.path.basename(x) for x in wlk_files]
67 | # graph_embedding_dict = {os.path.basename(x):y for x, y in graph_embedding_dict.iteritems()}
68 |
69 | # X = np.array([graph_embedding_dict[fname] for fname in wlk_files])
70 | X = embeddings
71 |
72 | from sklearn.model_selection import StratifiedKFold
73 | kf = StratifiedKFold(10, shuffle=True, random_state=None)
74 | accs = []
75 | for train_index, test_index in kf.split(X, Y):
76 |
77 | X_train, X_test = X[train_index], X[test_index]
78 | Y_train, Y_test = Y[train_index], Y[test_index]
79 | # logging.info('Train and Test matrix shapes: {}, {}, {}, {} '.format(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape))
80 |
81 | acc = linear_svm_classify(X_train, X_test, Y_train, Y_test)
82 | accs.append(acc)
83 | print(np.mean(accs), np.std(accs))
84 | return np.mean(accs)
85 |
86 |
87 | if __name__ == '__main__':
88 | pass
89 |
--------------------------------------------------------------------------------
/graph2vec_tf/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # run preprocess
4 | #python preprocess.py IMDB-BINARY
5 | #python preprocess.py IMDB-MULTI
6 | #python preprocess.py COLLAB
7 | #python preprocess.py DD
8 | #python preprocess.py REDDIT-BINARY
9 | #python preprocess.py REDDIT-MULTI-5K
10 |
11 | for i in 1 2 3 4 5
12 | do
13 | for DS in 'MUTAG' 'PTC_MR' 'PROTEINS_full' 'IMDB-BINARY' 'IMDB-MULTI' 'REDDIT-BINARY' 'REDDIT-MULTI-5K'
14 | do
15 | python3 preprocess.py $DS
16 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.001
17 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.01
18 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.1
19 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.5
20 | done
21 | done
22 |
--------------------------------------------------------------------------------
/graph2vec_tf/main.py:
--------------------------------------------------------------------------------
1 | import argparse,os,logging,psutil,time
2 | from joblib import Parallel,delayed
3 |
4 | from utils import get_files
5 | from train_utils import train_skipgram
6 | from classify import perform_classification
7 | from make_graph2vec_corpus import *
8 | from time import time
9 |
10 | logger = logging.getLogger()
11 | logger.setLevel("INFO")
12 |
13 |
14 | def main(args):
15 | '''
16 | :param args: arguments for
17 | 1. training the skigram model for learning subgraph representations
18 | 2. construct the deep WL kernel using the learnt subgraph representations
19 | 3. performing graph classification using the WL and deep WL kernel
20 | :return: None
21 | '''
22 | corpus_dir = args.corpus
23 | output_dir = args.output_dir
24 | batch_size = args.batch_size
25 | epochs = args.epochs
26 | embedding_size = args.embedding_size
27 | num_negsample = args.num_negsample
28 | learning_rate = args.learning_rate
29 | wlk_h = args.wlk_h
30 | label_filed_name = args.label_filed_name
31 | class_labels_fname = args.class_labels_file_name
32 |
33 | wl_extn = 'g2v'+str(wlk_h)
34 | assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir)
35 | # assert os.path.exists(output_dir), "Dir {} does not exist".format(output_dir)
36 |
37 | graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0)
38 | logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir))
39 |
40 |
41 | t0 = time()
42 | wlk_relabel_and_dump_memory_version(graph_files, max_h=wlk_h, node_label_attr_name=label_filed_name)
43 | logging.info('dumped sg2vec sentences in {} sec.'.format(time() - t0))
44 |
45 | t0 = time()
46 | embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample,
47 | epochs, batch_size, output_dir, class_labels_fname)
48 | # logging.info('Trained the skipgram model in {} sec.'.format(round(time()-t0, 2)))
49 |
50 | # embedding_fname = '../embeddings/_dims_512_epochs_2_lr_0.5_embeddings.txt'
51 | # perform_classification (corpus_dir, wl_extn, embedding_fname, class_labels_fname)
52 |
53 |
54 |
55 |
56 | def parse_args():
57 | '''
58 | Usual pythonic way of parsing command line arguments
59 | :return: all command line arguments read
60 | '''
61 | args = argparse.ArgumentParser("graph2vec")
62 | args.add_argument("-c","--corpus",
63 | help="Path to directory containing graph files to be used for graph classification or clustering")
64 |
65 | args.add_argument('-l','--class_labels_file_name',
66 | help='File name containg the name of the sample and the class labels')
67 |
68 | args.add_argument('-o', "--output_dir", default = "../embeddings",
69 | help="Path to directory for storing output embeddings")
70 |
71 | args.add_argument('-b',"--batch_size", default=128, type=int,
72 | help="Number of samples per training batch")
73 |
74 | args.add_argument('-e',"--epochs", default=1000, type=int,
75 | help="Number of iterations the whole dataset of graphs is traversed")
76 |
77 | args.add_argument('-d',"--embedding_size", default=1024, type=int,
78 | help="Intended graph embedding size to be learnt")
79 |
80 | args.add_argument('-neg', "--num_negsample", default=10, type=int,
81 | help="Number of negative samples to be used for training")
82 |
83 | args.add_argument('-lr', "--learning_rate", default=0.3, type=float,
84 | help="Learning rate to optimize the loss function")
85 |
86 | args.add_argument("--wlk_h", default=3, type=int, help="Height of WL kernel (i.e., degree of rooted subgraph "
87 | "features to be considered for representation learning)")
88 |
89 | args.add_argument('-lf', '--label_filed_name', default='Label', help='Label field to be used '
90 | 'for coloring nodes in graphs using WL kenrel')
91 |
92 | return args.parse_args()
93 |
94 |
95 |
96 | if __name__=="__main__":
97 | args = parse_args()
98 | main(args)
99 |
100 |
--------------------------------------------------------------------------------
/graph2vec_tf/preprocess.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import networkx as nx
3 | from glob import glob
4 | from tqdm import tqdm
5 | import os
6 | import subprocess
7 | from sklearn.model_selection import cross_val_score
8 | from sklearn.model_selection import GridSearchCV, StratifiedKFold
9 | from sklearn.svm import SVC, LinearSVC
10 | from sklearn.ensemble import RandomForestClassifier
11 | from sklearn import preprocessing
12 | from sklearn.metrics import accuracy_score
13 |
14 | def load_data(ds_name, use_node_labels):
15 | node2graph = {}
16 | Gs = []
17 |
18 | with open("../data/%s/%s_graph_indicator.txt"%(ds_name,ds_name), "r") as f:
19 | c = 1
20 | for line in f:
21 | node2graph[c] = int(line[:-1])
22 | if not node2graph[c] == len(Gs):
23 | Gs.append(nx.Graph())
24 | Gs[-1].add_node(c)
25 | c += 1
26 |
27 | with open("../data/%s/%s_A.txt"%(ds_name,ds_name), "r") as f:
28 | for line in f:
29 | edge = line[:-1].split(",")
30 | edge[1] = edge[1].replace(" ", "")
31 | Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1]))
32 |
33 | if use_node_labels:
34 | with open("../data/%s/%s_node_labels.txt"%(ds_name,ds_name), "r") as f:
35 | c = 1
36 | for line in f:
37 | node_label = int(line[:-1])
38 | Gs[node2graph[c]-1].node[c]['label'] = node_label
39 | c += 1
40 |
41 | labels = []
42 | with open("../data/%s/%s_graph_labels.txt"%(ds_name,ds_name), "r") as f:
43 | for line in f:
44 | labels.append(int(line[:-1]))
45 |
46 | labels = np.array(labels, dtype = np.float)
47 | return Gs, labels
48 |
49 | def preprocess(DS):
50 | Gs, labels = load_data(DS, False)
51 | print('number of graphs', len(Gs))
52 |
53 | datadir = '../data/{}'.format(DS)
54 | try:
55 | os.mkdir(datadir)
56 | except Exception as e:
57 | print(e)
58 |
59 | assert len(Gs) == len(labels)
60 | f = open('../data/{}.Labels'.format(DS), 'w')
61 | for graphidx, G in tqdm(enumerate(Gs)):
62 | nx.write_gexf(G, '{}/{}.gexf'.format(datadir, graphidx))
63 | f.write('{}.gexf {}\n'.format(graphidx, int(labels[graphidx])))
64 | f.close()
65 |
66 |
67 |
68 | if __name__ == '__main__':
69 | import sys
70 | preprocess(sys.argv[1])
71 | # preprocess('ENZYMES')
72 | # preprocess('DD')
73 | # preprocess('REDDIT-BINARY')
74 | # preprocess('COLLAB')
75 | # preprocess('REDDIT-MULTI-5K')
76 | # preprocess('IMDB-BINARY')
77 | # preprocess('IMDB-MULTI')
78 |
--------------------------------------------------------------------------------
/graph2vec_tf/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | df = pd.read_csv('log')
3 | DSs = df.DS.unique()
4 | for DS in DSs:
5 | tmpdf = df[df.DS == DS]
6 | for i in [500, 900, 1000]:
7 | print(DS, i, tmpdf[str(i)].mean(), tmpdf[str(i)].std())
8 |
9 |
--------------------------------------------------------------------------------
/graph2vec_tf/train_utils.py:
--------------------------------------------------------------------------------
1 | import os,logging
2 | import numpy as np
3 | from corpus_parser import Corpus
4 | from utils import save_graph_embeddings
5 | from skipgram import skipgram
6 |
7 |
8 | def train_skipgram (corpus_dir, extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir, class_labels_fname):
9 | '''
10 |
11 | :param corpus_dir: folder containing WL kernel relabeled files. All the files in this folder will be relabled
12 | according to WL relabeling strategy and the format of each line in these folders shall be: ....
13 | :param extn: Extension of the WL relabled file
14 | :param learning_rate: learning rate for the skipgram model (will involve a linear decay)
15 | :param embedding_size: number of dimensions to be used for learning subgraph representations
16 | :param num_negsample: number of negative samples to be used by the skipgram model
17 | :param epochs: number of iterations the dataset is traversed by the skipgram model
18 | :param batch_size: size of each batch for the skipgram model
19 | :param output_dir: the folder where embedding file will be stored
20 | :return: name of the file that contains the subgraph embeddings (in word2vec format proposed by Mikolov et al (2013))
21 | '''
22 |
23 | op_fname = '_'.join([os.path.basename(corpus_dir), 'dims', str(embedding_size), 'epochs',
24 | str(epochs),'lr',str(learning_rate),'embeddings.txt'])
25 | op_fname = os.path.join(output_dir, op_fname)
26 | # if os.path.isfile(op_fname):
27 | # logging.info('The embedding file: {} is already present, hence NOT training skipgram model '
28 | # 'for subgraph vectors'.format(op_fname))
29 | # return op_fname
30 |
31 | logging.info("Initializing SKIPGRAM...")
32 | corpus = Corpus(corpus_dir, extn = extn, max_files=0) # just load 'max_files' files from this folder
33 | corpus.scan_and_load_corpus()
34 |
35 | model_skipgram = skipgram(
36 | corpus_dir=corpus_dir,
37 | extn=extn,
38 | class_labels_fname=class_labels_fname,
39 | num_graphs=corpus.num_graphs,
40 | num_subgraphs=corpus.num_subgraphs,
41 | learning_rate=learning_rate,
42 | embedding_size=embedding_size,
43 | num_negsample=num_negsample,
44 | num_steps=epochs, # no. of time the training set will be iterated through
45 | corpus=corpus, # data set of (target,context) tuples
46 | )
47 |
48 | # final_embeddings = model_skipgram.train(corpus=corpus,batch_size=batch_size)
49 | model_skipgram.train(corpus=corpus,batch_size=batch_size)
50 |
51 | # logging.info('Write the matrix to a word2vec format file')
52 | # save_graph_embeddings(corpus, final_embeddings, op_fname)
53 | # logging.info('Completed writing the final embeddings, pls check file: {} for the same'.format(op_fname))
54 | # return op_fname
55 |
56 | # perform_classification(corpus_dir, extn, embeddings, class_labels_fname)
57 |
58 |
59 |
60 | if __name__ == '__main__':
61 | pass
62 |
--------------------------------------------------------------------------------
/graph2vec_tf/utils.py:
--------------------------------------------------------------------------------
1 | import os,json
2 |
3 |
4 | def get_files(dirname, extn, max_files=0):
5 | all_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f.endswith(extn)]
6 | for root, dirs, files in os.walk(dirname):
7 | for f in files:
8 | if f.endswith(extn):
9 | all_files.append(os.path.join(root, f))
10 |
11 | all_files = list(set(all_files))
12 | all_files.sort()
13 | if max_files:
14 | return all_files[:max_files]
15 | else:
16 | return all_files
17 |
18 |
19 | def save_graph_embeddings(corpus, final_embeddings, opfname):
20 | dict_to_save = {}
21 | for i in range(len(final_embeddings)):
22 | graph_fname = corpus._id_to_graph_name_map[i]
23 | graph_embedding = final_embeddings[i,:].tolist()
24 | dict_to_save[graph_fname] = graph_embedding
25 |
26 | with open(opfname, 'w') as fh:
27 | json.dump(dict_to_save,fh,indent=4)
28 |
29 |
30 | def get_class_labels(graph_files, class_labels_fname):
31 | graph_to_class_label_map = {l.split()[0].split('.')[0]: int(l.split()[1].strip()) for l in open (class_labels_fname)}
32 | labels = [graph_to_class_label_map[os.path.basename(g).split('.')[0]] for g in graph_files]
33 |
34 | return labels
35 |
36 | if __name__ == '__main__':
37 | print('nothing to do')
38 |
--------------------------------------------------------------------------------
/kcnn/README.md:
--------------------------------------------------------------------------------
1 | ## Kernel Graph Convolutional Neural Networks
2 | Code for the paper [Kernel Graph Convolutional Neural Networks](https://arxiv.org/pdf/1710.10689.pdf).
3 |
4 | ### Requirements
5 | Code is written in Python 3.6 and requires:
6 | * PyTorch 0.3
7 | * NetworkX 1.11
8 | * igraph 0.7
9 | * scikit-learn 0.18
10 |
11 | ### Datasets
12 | Use the following link to download datasets:
13 | ```
14 | https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
15 | ```
16 | Extract the datasets into the `datasets` folder.
17 |
18 | ### Run the model
19 | First, specify the dataset and the hyperparameters in the `main.py` file. Then, use the following command:
20 |
21 | ```
22 | $ python main.py
23 | ```
24 |
25 | ### Cite
26 | Please cite our paper if you use this code:
27 | ```
28 | @article{nikolentzos2017kernel,
29 | title={Kernel Graph Convolutional Neural Networks},
30 | author={Nikolentzos, Giannis and Meladianos, Polykarpos and Tixier, Antoine Jean-Pierre and Skianis, Konstantinos and Vazirgiannis, Michalis},
31 | journal={arXiv preprint arXiv:1710.10689},
32 | year={2017}
33 | }
34 | ```
35 |
36 | -----------
37 |
38 | Provided for academic use only
39 |
--------------------------------------------------------------------------------
/kcnn/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | # TEST is the virtualenv
4 | ./TEST/bin/python3 main.py --d 512 --dataset $@
5 |
--------------------------------------------------------------------------------
/kcnn/graph_kernels_labeled.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import numpy as np
3 | from collections import defaultdict
4 | import copy
5 |
6 |
7 | def sp_kernel(g1, g2=None):
8 | if g2 != None:
9 | graphs = []
10 | for g in g1:
11 | graphs.append(g)
12 | for g in g2:
13 | graphs.append(g)
14 | else:
15 | graphs = g1
16 |
17 | N = len(graphs)
18 | all_paths = {}
19 | sp_counts = {}
20 | for i in range(N):
21 | sp_lengths = nx.shortest_path_length(graphs[i])
22 | sp_counts[i] = {}
23 | nodes = graphs[i].nodes()
24 | for v1 in nodes:
25 | for v2 in nodes:
26 | if v2 in sp_lengths[v1]:
27 | label = tuple(sorted([graphs[i].node[v1]['label'], graphs[i].node[v2]['label']]) + [sp_lengths[v1][v2]])
28 | if label in sp_counts[i]:
29 | sp_counts[i][label] += 1
30 | else:
31 | sp_counts[i][label] = 1
32 |
33 | if label not in all_paths:
34 | all_paths[label] = len(all_paths)
35 |
36 | phi = np.zeros((N,len(all_paths)))
37 |
38 | for i in range(N):
39 | for label in sp_counts[i]:
40 | phi[i,all_paths[label]] = sp_counts[i][label]
41 |
42 | if g2 != None:
43 | K = np.dot(phi[:len(g1),:],phi[len(g1):,:].T)
44 | else:
45 | K = np.dot(phi,phi.T)
46 |
47 | return K
48 |
49 |
50 | def wl_kernel(g1, g2=None, h=6):
51 | if g2 != None:
52 | graphs = []
53 | for g in g1:
54 | graphs.append(g)
55 | for g in g2:
56 | graphs.append(g)
57 | else:
58 | graphs = g1
59 |
60 | labels = {}
61 | label_lookup = {}
62 | label_counter = 0
63 |
64 | N = len(graphs)
65 |
66 | orig_graph_map = {it: {i: defaultdict(lambda: 0) for i in range(N)} for it in range(-1, h)}
67 |
68 | # initial labeling
69 | ind = 0
70 | for G in graphs:
71 | labels[ind] = np.zeros(G.number_of_nodes(), dtype = np.int32)
72 | node2index = {}
73 | for node in G.nodes():
74 | node2index[node] = len(node2index)
75 |
76 | for node in G.nodes():
77 | label = G.node[node]['label']
78 | if not (label in label_lookup):
79 | label_lookup[label] = len(label_lookup)
80 |
81 | labels[ind][node2index[node]] = label_lookup[label]
82 | orig_graph_map[-1][ind][label] = orig_graph_map[-1][ind].get(label, 0) + 1
83 |
84 | ind += 1
85 |
86 | compressed_labels = copy.deepcopy(labels)
87 |
88 | # WL iterations
89 | for it in range(h):
90 | unique_labels_per_h = set()
91 | label_lookup = {}
92 | ind = 0
93 | for G in graphs:
94 | node2index = {}
95 | for node in G.nodes():
96 | node2index[node] = len(node2index)
97 |
98 | for node in G.nodes():
99 | node_label = tuple([labels[ind][node2index[node]]])
100 | neighbors = G.neighbors(node)
101 | if len(neighbors) > 0:
102 | neighbors_label = tuple([labels[ind][node2index[neigh]] for neigh in neighbors])
103 | node_label = str(node_label) + "-" + str(sorted(neighbors_label))
104 | if not (node_label in label_lookup):
105 | label_lookup[node_label] = len(label_lookup)
106 |
107 | compressed_labels[ind][node2index[node]] = label_lookup[node_label]
108 | orig_graph_map[it][ind][node_label] = orig_graph_map[it][ind].get(node_label, 0) + 1
109 |
110 | ind +=1
111 |
112 | labels = copy.deepcopy(compressed_labels)
113 |
114 | if g2 != None:
115 | K = np.zeros((len(g1), len(g2)))
116 | for it in range(-1, h):
117 | for i in range(len(g1)):
118 | for j in range(len(g2)):
119 | common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][len(g1)+j].keys())
120 | K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][len(g1)+j].get(k,0) for k in common_keys])
121 | else:
122 | K = np.zeros((N, N))
123 | for it in range(-1, h):
124 | for i in range(N):
125 | for j in range(N):
126 | common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][j].keys())
127 | K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][j].get(k,0) for k in common_keys])
128 |
129 | return K
--------------------------------------------------------------------------------
/kcnn/model.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 | from torch.nn.init import xavier_normal,xavier_uniform
4 |
5 | # CNN Model
6 | class CNN(nn.Module):
7 | def __init__(self, input_size, hidden_size, n_classes, d, n_kernels, max_n_communities):
8 | super(CNN, self).__init__()
9 | self.max_n_communities = max_n_communities
10 | self.conv = nn.Conv3d(1, input_size, (1, 1, d), padding=0)
11 | self.fc1 = nn.Linear(input_size*n_kernels, hidden_size)
12 | self.fc2 = nn.Linear(hidden_size, n_classes)
13 | self.init_weights()
14 |
15 | def init_weights(self):
16 | xavier_uniform(self.conv.weight.data)
17 | xavier_normal(self.fc1.weight.data)
18 | xavier_normal(self.fc2.weight.data)
19 |
20 | def forward(self, x_in):
21 | out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_n_communities,1)))
22 | out = out.view(out.size(0), -1)
23 | out = F.relu(self.fc1(out))
24 | out = F.dropout(out, training=self.training)
25 | out = self.fc2(out)
26 | return F.log_softmax(out, dim=1)
27 |
--------------------------------------------------------------------------------
/kcnn/nystrom.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import scipy.sparse as sp
3 | from scipy.linalg import svd
4 | from sklearn.utils import check_random_state
5 |
6 | class Nystrom():
7 |
8 | def __init__(self, kernel, kernel_params=None, n_components=100, random_state=None):
9 | self.kernel = kernel
10 | self.kernel_params = kernel_params
11 | self.n_components = n_components
12 | self.random_state = random_state
13 |
14 | def fit(self, graphs, y=None):
15 | rnd = check_random_state(self.random_state)
16 | n_samples = len(graphs)
17 |
18 | # get basis vectors
19 | if self.n_components > n_samples:
20 | n_components = n_samples
21 | else:
22 | n_components = self.n_components
23 | n_components = min(n_samples, n_components)
24 | inds = rnd.permutation(n_samples)
25 | basis_inds = inds[:n_components]
26 | basis = []
27 | for ind in basis_inds:
28 | basis.append(graphs[ind])
29 |
30 | basis_kernel = self.kernel(basis, basis, **self._get_kernel_params())
31 |
32 | # sqrt of kernel matrix on basis vectors
33 | U, S, V = svd(basis_kernel)
34 | S = np.maximum(S, 1e-12)
35 | self.normalization_ = np.dot(U * 1. / np.sqrt(S), V)
36 | self.components_ = basis
37 | self.component_indices_ = inds
38 | return self
39 |
40 | def transform(self, graphs):
41 | embedded = self.kernel(graphs, self.components_, **self._get_kernel_params())
42 | return np.dot(embedded, self.normalization_.T)
43 |
44 | def _get_kernel_params(self):
45 | params = self.kernel_params
46 | if params is None:
47 | params = {}
48 |
49 | return params
50 |
51 |
--------------------------------------------------------------------------------
/kernel_methods/README.md:
--------------------------------------------------------------------------------
1 | # Graph Kernels
2 |
3 | please install Grakel
4 |
--------------------------------------------------------------------------------
/kernel_methods/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 |
3 | python3 main.py REDDIT-BINARY walk
4 | python3 main.py REDDIT-BINARY shortest
5 | python3 main.py REDDIT-MULTI-5K wl
6 | python3 main.py REDDIT-MULTI-5K shortest
7 | python3 main.py REDDIT-MULTI-5K walk
8 | python3 main.py IMDB-BINARY wl
9 | python3 main.py IMDB-MULTI wl
10 | python3 main.py IMDB-BINARY shortest
11 | python3 main.py IMDB-MULTI shortest
12 | python3 main.py IMDB-BINARY walk
13 | python3 main.py IMDB-MULTI walk
14 | python3 main.py REDDIT-MULTI-5K shortest
15 |
16 |
17 |
--------------------------------------------------------------------------------
/kernel_methods/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import networkx as nx
4 | import pickle
5 | import json
6 | from glob import glob
7 | import graph_tool
8 | from graph_tool import load_graph
9 | from tqdm import tqdm
10 | import operator
11 |
12 |
13 | def read_graphfile(datadir, dataname, max_nodes=None):
14 | ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
15 | graph index starts with 1 in file
16 |
17 | Returns:
18 | List of networkx objects with graph and node labels
19 | '''
20 | prefix = os.path.join(datadir, dataname, dataname)
21 | filename_graph_indic = prefix + '_graph_indicator.txt'
22 | # index of graphs that a given node belongs to
23 | graph_indic={}
24 | with open(filename_graph_indic) as f:
25 | i=1
26 | for line in f:
27 | line=line.strip("\n")
28 | graph_indic[i]=int(line)
29 | i+=1
30 |
31 | filename_nodes=prefix + '_node_labels.txt'
32 | node_labels=[]
33 | try:
34 | with open(filename_nodes) as f:
35 | for line in f:
36 | line=line.strip("\n")
37 | node_labels+=[int(line)]
38 | # node_labels = LabelEncoder().fit_transform(node_labels)
39 | except IOError:
40 | print('No node labels')
41 |
42 | filename_node_attrs=prefix + '_node_attributes.txt'
43 | node_attrs=[]
44 | try:
45 | with open(filename_node_attrs) as f:
46 | for line in f:
47 | line = line.strip("\s\n")
48 | attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
49 | node_attrs.append(np.array(attrs))
50 | except IOError:
51 | print('No node attributes')
52 |
53 | label_has_zero = False
54 | filename_graphs=prefix + '_graph_labels.txt'
55 | graph_labels=[]
56 | with open(filename_graphs) as f:
57 | for line in f:
58 | line=line.strip("\n")
59 | val = int(line)
60 | if val == 0:
61 | label_has_zero = True
62 | graph_labels.append(val - 1)
63 | graph_labels = np.array(graph_labels)
64 | if label_has_zero:
65 | graph_labels += 1
66 |
67 | filename_adj=prefix + '_A.txt'
68 | adj_list={i:[] for i in range(1,len(graph_labels)+1)}
69 | # index_graph={i:[] for i in range(1,len(graph_labels)+1)}
70 | num_edges = 0
71 | with open(filename_adj) as f:
72 | for line in f:
73 | line=line.strip("\n").split(",")
74 | e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
75 | adj_list[graph_indic[e0]].append((e0,e1))
76 | # index_graph[graph_indic[e0]]+=[e0,e1]
77 | num_edges += 1
78 | # for k in index_graph.keys():
79 | # index_graph[k]=[u-1 for u in set(index_graph[k])]
80 |
81 |
82 | graphs=[]
83 | for i in range(1,1+len(adj_list)):
84 | # indexed from 1 here
85 | G=nx.from_edgelist(adj_list[i])
86 | graphs.append(G)
87 |
88 | # add features and labels
89 | for nodeid, nl in enumerate(node_labels):
90 | nodeid += 1
91 | graphs[graph_indic[nodeid]-1].add_node(nodeid)
92 | # graphs[graph_indic[nodeid]-1][nodeid]['label'] = nl
93 |
94 | for idx, G in enumerate(graphs):
95 | # no graph labels needed
96 | G.graph['label'] = graph_labels[idx]
97 | for u in G.nodes():
98 | if len(node_labels) > 0:
99 | G.node[u]['label'] = node_labels[u-1]
100 | if len(node_attrs) > 0:
101 | G.node[u]['feat'] = node_attrs[u-1]
102 |
103 | graphs[idx] = G
104 |
105 | # relabeling
106 | for idx, G in enumerate(graphs):
107 | mapping={}
108 | it=0
109 | if float(nx.__version__)<2.0:
110 | for n in G.nodes():
111 | mapping[n]=it
112 | it+=1
113 | else:
114 | for n in G.nodes:
115 | mapping[n]=it
116 | it+=1
117 |
118 | # indexed from 0
119 | G = nx.relabel_nodes(G, mapping)
120 |
121 | graphs[idx] = G
122 |
123 | return graphs, graph_labels
124 |
--------------------------------------------------------------------------------
/sub2vec/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | for i in 1 2 3 4 5
3 | do
4 | for DS in 'IMDB-BINARY' 'IMDB-MULTI'
5 | do
6 | python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property n
7 | python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property s
8 | done
9 | done
10 |
--------------------------------------------------------------------------------
/sub2vec/preprocess.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import networkx as nx
3 | from glob import glob
4 | import os
5 | import subprocess
6 | from sklearn.model_selection import cross_val_score
7 | from sklearn.model_selection import GridSearchCV, StratifiedKFold
8 | from sklearn.svm import SVC, LinearSVC
9 | from sklearn.linear_model import LogisticRegression
10 | from sklearn.ensemble import RandomForestClassifier
11 | from sklearn import preprocessing
12 | from sklearn.metrics import accuracy_score
13 |
14 | def load_data(dir_name, use_node_labels):
15 | node2graph = {}
16 | Gs = []
17 |
18 | ds_name = os.path.basename(dir_name)
19 | with open("%s/%s_graph_indicator.txt"%(dir_name,ds_name), "r") as f:
20 | c = 1
21 | for line in f:
22 | node2graph[c] = int(line[:-1])
23 | if not node2graph[c] == len(Gs):
24 | Gs.append(nx.Graph())
25 | Gs[-1].add_node(c)
26 | c += 1
27 |
28 | with open("%s/%s_A.txt"%(dir_name,ds_name), "r") as f:
29 | for line in f:
30 | edge = line[:-1].split(",")
31 | edge[1] = edge[1].replace(" ", "")
32 | Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1]))
33 |
34 | if use_node_labels:
35 | with open("%s/%s_node_labels.txt"%(dir_name,ds_name), "r") as f:
36 | c = 1
37 | for line in f:
38 | node_label = int(line[:-1])
39 | Gs[node2graph[c]-1].node[c]['label'] = node_label
40 | c += 1
41 |
42 | # for idx, g in enumerate(Gs):
43 | # for n in g.nodes():
44 | # _ = (g.node[n]['label'])
45 |
46 | labels = []
47 | with open("%s/%s_graph_labels.txt"%(dir_name,ds_name), "r") as f:
48 | for line in f:
49 | labels.append(int(line[:-1]))
50 |
51 | labels = np.array(labels, dtype = np.float)
52 | return Gs, labels
53 |
54 | def evaluate(DS, embeddings):
55 | graphs, labels = load_data(DS, False)
56 | x, y = np.array(embeddings), np.array(labels)
57 |
58 | kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
59 | accs=[]
60 | accuracies = []
61 | for train_index, test_index in kf.split(x, y):
62 | best_acc1 = 0
63 |
64 | x_train, x_test = x[train_index], x[test_index]
65 | y_train, y_test = y[train_index], y[test_index]
66 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
67 | classifier = GridSearchCV(SVC(), params, cv=10, scoring='accuracy', verbose=0)
68 | classifier.fit(x_train, y_train)
69 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
70 |
71 | print('SVC', np.mean(accuracies))
72 | return np.mean(accuracies)
73 |
74 | def preprocess(DS, preprocessed_input):
75 | Gs, labels = load_data(DS, False)
76 | print('number of graphs', len(Gs))
77 | try:
78 | os.makedirs(preprocessed_input)
79 | except Exception as e:
80 | print(e)
81 |
82 | for i in range(len(Gs)):
83 | with open('{}/{}'.format(preprocessed_input, i), 'w+') as f:
84 | for e in Gs[i].edges():
85 | f.write('{} {}\n'.format(e[0], e[1]))
86 | print('done preprocessing')
87 |
88 | if __name__ == '__main__':
89 | ds_name='MUTAG'
90 | print('classification')
91 | classification(ds_name, ds_name+'.vec')
92 | classification('ENZYMES', 'output')
93 |
--------------------------------------------------------------------------------
/sub2vec/src/graphUtils_n.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import random
3 | import matplotlib.pyplot as plt
4 |
5 |
6 | def getGraph(filename):
7 | G=nx.Graph()
8 |
9 | f=open(filename,'r')
10 | lines=f.readlines()
11 | for line in lines:
12 | if(line[0]=='#'):
13 | continue
14 | else:
15 | temp=line.split()
16 | index1=int(temp[0])
17 | index2=int(temp[1])
18 | G.add_edge(index1,index2)
19 | f.close()
20 | return G
21 |
22 |
23 | def randomWalk(G, walkSize):
24 | walkList= []
25 | curNode = random.choice(G.nodes())
26 |
27 | while(len(walkList) < walkSize):
28 | walkList.append(curNode)
29 | curNode = random.choice(G.neighbors(curNode))
30 | return walkList
31 |
32 | def getStats(G):
33 | stats ={}
34 | stats['num_nodes'] = nx.number_of_nodes(G)
35 | stats['num_edges'] = nx.number_of_edges(G)
36 | stats['is_Connected'] = nx.is_connected(G)
37 |
38 |
39 | def drawGraph(G):
40 | pos = nx.spring_layout(G)
41 | nx.draw_networkx(G, pos)
42 | plt.savefig("graph.pdf")
43 | plt.show()
44 |
--------------------------------------------------------------------------------
/sub2vec/src/graphUtils_s.py:
--------------------------------------------------------------------------------
1 | import networkx as nx
2 | import random
3 | import matplotlib.pyplot as plt
4 |
5 |
6 | def writeGraph(filename, G):
7 |
8 | file = open(filename, 'w')
9 | for edge in G.edges():
10 | node1 = str(G.node[edge[0]]['label'])
11 | node2 = str(G.node[edge[1]]['label'])
12 | file.write(node1+'\t'+node2+'\n')
13 | file.close()
14 |
15 |
16 | def getGraph(filename):
17 | G=nx.Graph()
18 | mode = 0
19 | f=open(filename,'r')
20 | lines=f.readlines()
21 | labels = {}
22 | for line in lines:
23 | temp=line.split()
24 | index1=int(temp[0])
25 | index2=int(temp[1])
26 | G.add_edge(index1,index2)
27 | f.close()
28 | nx.set_node_attributes(G, 'label', labels)
29 | return G
30 |
31 |
32 | def randomWalk(G, walkSize):
33 | walkList= []
34 | curNode = random.choice(G.nodes())
35 |
36 | while(len(walkList) < walkSize):
37 | walkList.append(G.node[curNode]['label'])
38 | curNode = random.choice(G.neighbors(curNode))
39 | return walkList
40 |
41 | def getStats(G):
42 | stats ={}
43 | stats['num_nodes'] = nx.number_of_nodes(G)
44 | stats['num_edges'] = nx.number_of_edges(G)
45 | stats['is_Connected'] = nx.is_connected(G)
46 |
47 |
48 | def drawGraph(G):
49 | plt.figure()
50 | pos = nx.spring_layout(G)
51 | nx.draw_networkx(G, pos)
52 | plt.savefig("graph.pdf")
53 | plt.show()
54 |
--------------------------------------------------------------------------------
/sub2vec/src/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 |
4 | from structural import structural_embedding
5 | from neighborhood import neighborhood_embedding
6 |
7 | def main():
8 | parser = argparse.ArgumentParser(description="sub2vec.")
9 | parser.add_argument('--input', nargs='?', required=True, help='Input directory')
10 | parser.add_argument('--preprocessed-input', required=True)
11 |
12 | parser.add_argument('--property', default='n', choices=['n', 's'], required=True, help='Type of subgraph property to presernve. For neighborhood property add " --property n" and for the structural property " --property s" ')
13 |
14 | parser.add_argument('--walkLength', default=100000, type=int, help='length of random walk on each subgraph')
15 |
16 | # parser.add_argument('--output', required=True, help='Output representation file')
17 |
18 | parser.add_argument('--d', default=300, type=int, help='dimension of learned feautures for each subgraph.')
19 |
20 | parser.add_argument('--iter', default=20, type=int, help= 'training iterations')
21 |
22 | parser.add_argument('--windowSize', default=2, type=int,
23 | help='Window size of the model.')
24 |
25 | parser.add_argument('--p', default=0.5, type=float,
26 | help='meta parameter.')
27 |
28 | parser.add_argument('--model', default='dm', choices=['dbon', 'dm'],
29 | help='models for learninig vectors SV-DM (dm) or SV-DBON (dbon).')
30 |
31 | args = parser.parse_args()
32 | from preprocess import preprocess
33 | print('start preprocessing ..')
34 | preprocess(args.input, args.preprocessed_input)
35 |
36 | if args.property == 's':
37 | structural_embedding(args)
38 | else:
39 | neighborhood_embedding(args)
40 |
41 |
42 |
43 | if __name__=='__main__':
44 | main()
45 |
--------------------------------------------------------------------------------
/sub2vec/src/neighborhood.py:
--------------------------------------------------------------------------------
1 | import gensim.models.doc2vec as doc
2 | import os
3 | import graphUtils_n
4 | from tqdm import tqdm
5 |
6 |
7 | def arr2str(arr):
8 | result = ""
9 | for i in arr:
10 | result += " "+str(i)
11 | return result
12 |
13 |
14 | def generateWalkFile(dirName, walkLength):
15 | walkFile = open(dirName+'.walk', 'w')
16 | indexToName = {}
17 |
18 | for root, dirs, files in os.walk(dirName):
19 | index = 0
20 | for name in tqdm(files):
21 | # print(name)
22 | subgraph = graphUtils_n.getGraph(os.path.join(root, name))
23 | walk = graphUtils_n.randomWalk(subgraph, walkLength)
24 | walkFile.write(arr2str(walk) +"\n")
25 | indexToName[index] = name
26 | index += 1
27 | walkFile.close()
28 |
29 | return indexToName
30 |
31 | def saveVectors(vectors, outputfile, IdToName):
32 | print(len(vectors), outputfile, IdToName)
33 | output = open(outputfile, 'w')
34 |
35 | output.write(str(len(vectors)) +"\n")
36 | for i in range(len(vectors)):
37 | output.write(str(IdToName[i]))
38 | for j in vectors[i]:
39 | output.write('\t'+ str(j))
40 | output.write('\n')
41 | output.close()
42 |
43 | def neighborhood_embedding(args):
44 | inputDir = args.preprocessed_input
45 | # outputFile = args.output
46 | iterations = args.iter
47 | dimensions = args.d
48 | window = args.windowSize
49 | dm = 1 if args.model == 'dm' else 0
50 | indexToName = generateWalkFile(inputDir, args.walkLength)
51 | # print(indexToName)
52 | sentences = doc.TaggedLineDocument(inputDir+'.walk')
53 |
54 | with open('log', 'a+') as f:
55 | results = []
56 | # for epochs in range(10, 110, 10):
57 | # print('epochs', epochs)
58 | model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window )
59 | vectors = model.docvecs
60 | embeddings = [[] for _ in range(len(vectors))]
61 | for i in range(len(vectors)):
62 | embeddings[int(indexToName[i])] = vectors[i]
63 |
64 | from preprocess import evaluate
65 | res = evaluate(args.input, embeddings)
66 | results.append(str(res))
67 | print(res)
68 |
69 | f.write(inputDir + ',n,' + ','.join(results) + '\n')
70 |
--------------------------------------------------------------------------------
/sub2vec/src/structural.py:
--------------------------------------------------------------------------------
1 | import gensim.models.doc2vec as doc
2 | import os
3 | import graphUtils_s
4 | import random
5 | import networkx as nx
6 | from tqdm import tqdm
7 |
8 |
9 | def arr2str(arr):
10 | result = ""
11 | for i in arr:
12 | result += " "+str(i)
13 | return result
14 |
15 |
16 | def generateDegreeWalk(Graph, walkSize):
17 | g = Graph
18 | walk = randomWalkDegreeLabels(g,walkSize)
19 | #walk = serializeEdge(g,NodeToLables)
20 | return walk
21 |
22 | def randomWalkDegreeLabels(G, walkSize):
23 | curNode = random.choice(G.nodes())
24 | walkList= []
25 |
26 | while(len(walkList) < walkSize):
27 | walkList.append(G.node[curNode]['label'])
28 | curNode = random.choice(G.neighbors(curNode))
29 | return walkList
30 |
31 | def getDegreeLabelledGraph(G, rangetoLabels):
32 | degreeDict = G.degree(G.nodes())
33 | labelDict = {}
34 | for node in degreeDict.keys():
35 | val = degreeDict[node]/float(nx.number_of_nodes(G))
36 | labelDict[node] = inRange(rangetoLabels, val)
37 | #val = degreeDict[node]/float(nx.number_of_nodes(G))
38 | #labelDict[node] = degreeDict[node]
39 |
40 | nx.set_node_attributes(G, 'label', labelDict)
41 |
42 | return G
43 |
44 | def inRange(rangeDict, val):
45 | for key in rangeDict:
46 | if key[0] < val and key[1] >= val:
47 | return rangeDict[key]
48 |
49 | def generateWalkFile(dirName, walkLength, alpha):
50 | walkFile = open(dirName+'.walk', 'w')
51 | indexToName = {}
52 | rangetoLabels = {(0, 0.05):'z',(0.05, 0.1):'a', (0.1, 0.15):'b', (0.15, 0.2):'c', (0.2, 0.25):'d', (0.25, 0.5):'e', (0.5, 0.75):'f',(0.75, 1.0):'g'}
53 | for root, dirs, files in os.walk(dirName):
54 | index = 0
55 | for name in tqdm(files):
56 | subgraph = graphUtils_s.getGraph(os.path.join(root, name))
57 | degreeGraph = getDegreeLabelledGraph(subgraph, rangetoLabels)
58 | degreeWalk = generateDegreeWalk(degreeGraph, int(walkLength* (1- alpha)))
59 | walk = graphUtils_s.randomWalk(subgraph, int(alpha * walkLength))
60 | walkFile.write(arr2str(walk)+ arr2str(degreeWalk) +"\n")
61 | indexToName[index] = name
62 | index += 1
63 | walkFile.close()
64 |
65 | return indexToName
66 |
67 | def saveVectors(vectors, outputfile, IdToName):
68 | output = open(outputfile, 'w')
69 |
70 | output.write(str(len(vectors)) +"\n")
71 | for i in range(len(vectors)):
72 | output.write(str(IdToName[i]))
73 | for j in vectors[i]:
74 | output.write('\t'+ str(j))
75 | output.write('\n')
76 | output.close()
77 |
78 |
79 | def structural_embedding(args):
80 |
81 | inputDir = args.preprocessed_input
82 | # outputFile = args.output
83 | iterations = args.iter
84 | dimensions = args.d
85 | window = args.windowSize
86 | dm = 1 if args.model == 'dm' else 0
87 | indexToName = generateWalkFile(inputDir, args.walkLength, args.p)
88 | sentences = doc.TaggedLineDocument(inputDir+'.walk')
89 |
90 | with open('log', 'a+') as f:
91 | results = []
92 | # for epochs in range(10, 110, 10):
93 | # print('epochs', epochs)
94 | model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window, workers=8)
95 | vectors = model.docvecs
96 | embeddings = [[] for _ in range(len(vectors))]
97 | for i in range(len(vectors)):
98 | embeddings[int(indexToName[i])] = vectors[i]
99 |
100 | from preprocess import evaluate
101 | res = evaluate(args.input, embeddings)
102 | print(res)
103 | results.append(str(res))
104 |
105 | f.write(inputDir + ',s,' + ','.join(results) + '\n')
106 |
--------------------------------------------------------------------------------
/sub2vec/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import sys
3 |
4 | if __name__ == '__main__':
5 | df = pd.read_csv(sys.argv[1])
6 | DSs = df.DS.unique()
7 | for DS in DSs:
8 | tmpdf = df[df.DS == DS]
9 | for tpe in ['n', 's']:
10 | m, s = tmpdf[(tmpdf.type == tpe)]['result'].mean(), tmpdf[(tmpdf.type == tpe)]['result'].std()
11 | print(DS, tpe, m, s)
12 |
--------------------------------------------------------------------------------