├── .gitignore
├── DGK
    ├── README
    ├── canonical_maps
    │   ├── canonical_map_n2.p
    │   ├── canonical_map_n3.p
    │   ├── canonical_map_n4.p
    │   ├── canonical_map_n5.p
    │   ├── canonical_map_n6.p
    │   ├── canonical_map_n7.p
    │   └── canonical_map_n8.p
    ├── datasets
    │   └── README
    ├── deep_kernel.py
    ├── go.sh
    └── graphlet_counter_maps
    │   ├── graphlet_counter_nodebased_n2.p
    │   ├── graphlet_counter_nodebased_n3.p
    │   ├── graphlet_counter_nodebased_n4.p
    │   ├── graphlet_counter_nodebased_n5.p
    │   ├── graphlet_counter_nodebased_n6.p
    │   ├── graphlet_counter_nodebased_n7.p
    │   └── graphlet_counter_nodebased_n8.p
├── MLGkernel
    ├── LICENSE
    ├── MLGkernel
    │   ├── FLGinstance.cpp
    │   ├── FLGinstance.hpp
    │   ├── FLGkernel.cpp
    │   ├── FLGkernel.hpp
    │   ├── Kernel.hpp
    │   ├── Linearizer.hpp
    │   ├── MLG_base.hpp
    │   ├── MLGdataset.cpp
    │   ├── MLGdataset.hpp
    │   ├── MLGgraph.cpp
    │   ├── MLGgraph.hpp
    │   ├── Makefile
    │   ├── RandomSelection.hpp
    │   ├── params.hpp
    │   ├── runMLG
    │   ├── runMLG.cpp
    │   └── swig
    │   │   ├── MLGK.cpp
    │   │   ├── MLGK.i
    │   │   ├── Makefile
    │   │   ├── README.txt
    │   │   └── test.py
    ├── Makefile
    ├── Makefile.base
    ├── Makefile.options
    ├── README.md
    ├── data_utils.py
    ├── evaluate_embedding.py
    ├── go.sh
    ├── include
    │   ├── pMMFbase.hpp
    │   └── pMMFglobal.inc
    ├── matrices
    │   ├── Activemap.cpp
    │   ├── Activemap.hpp
    │   ├── AtomicCmatrix.hpp
    │   ├── Cmatrix.cpp
    │   ├── Cmatrix.hpp
    │   ├── Cvector.cpp
    │   ├── Cvector.hpp
    │   ├── DenseVector.hpp
    │   ├── EigenInterface.hpp
    │   ├── GramMatrix.cpp
    │   ├── GramMatrix.hpp
    │   ├── LapackInterface.hpp
    │   ├── Makefile
    │   ├── Matrix.cpp
    │   ├── Matrix.hpp
    │   ├── MatrixX.cpp
    │   ├── MatrixX.hpp
    │   ├── Remap.cpp
    │   ├── Remap.hpp
    │   ├── SparseVector.hpp
    │   ├── Vector.cpp
    │   ├── Vector.hpp
    │   ├── Vectorh.cpp
    │   ├── Vectorh.hpp
    │   ├── Vectorl.cpp
    │   ├── Vectorl.hpp
    │   ├── Vectorv.cpp
    │   ├── Vectorv.hpp
    │   ├── matrices.cpp
    │   └── matrices.o
    ├── preprocess.py
    ├── test.py
    └── utility
    │   ├── Bifstream.cpp
    │   ├── Bifstream.hpp
    │   ├── Bofstream.cpp
    │   ├── Bofstream.hpp
    │   ├── Graph.cpp
    │   ├── Graph.hpp
    │   ├── Log.cpp
    │   ├── Log.hpp
    │   ├── Makefile
    │   ├── Rstream.cpp
    │   ├── Rstream.hpp
    │   ├── Serializable.cpp
    │   ├── Serializable.hpp
    │   ├── ThreadBank.cpp
    │   ├── ThreadBank.hpp
    │   ├── ThreadManager.cpp
    │   ├── ThreadManager.hpp
    │   ├── TopkList.cpp
    │   ├── TopkList.hpp
    │   └── filetypes
    │       ├── Makefile
    │       ├── MatrixIF.hpp
    │       ├── MatrixIF_ASCII.cpp
    │       ├── MatrixIF_ASCII.hpp
    │       ├── MatrixIF_Boeing.cpp
    │       ├── MatrixIF_Boeing.hpp
    │       ├── MatrixIF_Matlab.cpp
    │       ├── MatrixIF_Matlab.hpp
    │       ├── MatrixOF.hpp
    │       ├── MatrixOF_ASCII.cpp
    │       ├── MatrixOF_ASCII.hpp
    │       ├── MatrixOF_Boeing.cpp
    │       ├── MatrixOF_Boeing.hpp
    │       ├── MatrixOF_Matlab.cpp
    │       ├── MatrixOF_Matlab.hpp
    │       └── filetypes.cpp
├── README.md
├── data
    └── PTC_MR
    │   ├── PTC_MR_A.txt
    │   ├── PTC_MR_edge_labels.txt
    │   ├── PTC_MR_graph_indicator.txt
    │   ├── PTC_MR_graph_labels.txt
    │   └── PTC_MR_node_labels.txt
├── diffpool
    ├── aggregators.py
    ├── cross_val.py
    ├── encoders.py
    ├── gen
    │   ├── data.py
    │   └── feat.py
    ├── go.sh
    ├── graph_embedding.py
    ├── graph_sampler.py
    ├── graphsage.py
    ├── load_data.py
    ├── partition.py
    ├── set2set.py
    ├── test.py
    ├── train.py
    └── util.py
├── graph2vec_tf
    ├── README.md
    ├── __init__.py
    ├── classify.py
    ├── corpus_parser.py
    ├── go.sh
    ├── main.py
    ├── make_graph2vec_corpus.py
    ├── preprocess.py
    ├── skipgram.py
    ├── test.py
    ├── train_utils.py
    └── utils.py
├── kcnn
    ├── README.md
    ├── go.sh
    ├── graph_kernels.py
    ├── graph_kernels_labeled.py
    ├── main.py
    ├── model.py
    ├── nystrom.py
    └── utils.py
├── kernel_methods
    ├── README.md
    ├── go.sh
    ├── main.py
    └── utils.py
└── sub2vec
    ├── go.sh
    ├── preprocess.py
    ├── src
        ├── graphUtils_n.py
        ├── graphUtils_s.py
        ├── main.py
        ├── neighborhood.py
        └── structural.py
    └── test.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | log
 2 | __pycache__
 3 | *.pyc
 4 | *.walk
 5 | preprocessed_dataset
 6 | TEST
 7 | checkpoint
 8 | tmp
 9 | *.nexf
10 | results
11 | 
12 | 


--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n2.p:
--------------------------------------------------------------------------------
 1 | (dp0
 2 | S''
 3 | p1
 4 | (dp2
 5 | S'graph'
 6 | p3
 7 | (tsS'idx'
 8 | p4
 9 | I0
10 | sS'n'
11 | p5
12 | I0
13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
14 | p6
15 | (dp7
16 | g3
17 | (tsg4
18 | I1
19 | sg5
20 | I1
21 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
22 | p8
23 | (dp9
24 | g3
25 | (I1
26 | tp10
27 | sg4
28 | I3
29 | sg5
30 | I2
31 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
32 | p11
33 | (dp12
34 | g3
35 | (I0
36 | tp13
37 | sg4
38 | I2
39 | sg5
40 | I2
41 | ss.


--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n3.p:
--------------------------------------------------------------------------------
 1 | (dp0
 2 | S''
 3 | p1
 4 | (dp2
 5 | S'graph'
 6 | p3
 7 | (tsS'idx'
 8 | p4
 9 | I0
10 | sS'n'
11 | p5
12 | I0
13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
14 | p6
15 | (dp7
16 | g3
17 | (I0
18 | tp8
19 | sg4
20 | I2
21 | sg5
22 | I2
23 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
24 | p9
25 | (dp10
26 | g3
27 | (tsg4
28 | I1
29 | sg5
30 | I1
31 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
32 | p11
33 | (dp12
34 | g3
35 | (I1
36 | tp13
37 | sg4
38 | I3
39 | sg5
40 | I2
41 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
42 | p14
43 | (dp15
44 | g3
45 | (I0
46 | I0
47 | I0
48 | tp16
49 | sg4
50 | I4
51 | sg5
52 | I3
53 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0'
54 | p17
55 | (dp18
56 | g3
57 | (I1
58 | I1
59 | I1
60 | tp19
61 | sg4
62 | I7
63 | sg5
64 | I3
65 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@'
66 | p20
67 | (dp21
68 | g3
69 | (I0
70 | I0
71 | I1
72 | tp22
73 | sg4
74 | I5
75 | sg5
76 | I3
77 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0'
78 | p23
79 | (dp24
80 | g3
81 | (I0
82 | I1
83 | I1
84 | tp25
85 | sg4
86 | I6
87 | sg5
88 | I3
89 | ss.


--------------------------------------------------------------------------------
/DGK/canonical_maps/canonical_map_n4.p:
--------------------------------------------------------------------------------
  1 | (dp0
  2 | S''
  3 | p1
  4 | (dp2
  5 | S'graph'
  6 | p3
  7 | (tsS'idx'
  8 | p4
  9 | I0
 10 | sS'n'
 11 | p5
 12 | I0
 13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
 14 | p6
 15 | (dp7
 16 | g3
 17 | (I0
 18 | tp8
 19 | sg4
 20 | I2
 21 | sg5
 22 | I2
 23 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 '
 24 | p9
 25 | (dp10
 26 | g3
 27 | (I0
 28 | I0
 29 | I1
 30 | I1
 31 | I0
 32 | I0
 33 | tp11
 34 | sg4
 35 | I13
 36 | sg5
 37 | I4
 38 | ssS'\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0'
 39 | p12
 40 | (dp13
 41 | g3
 42 | (I0
 43 | I1
 44 | I1
 45 | I1
 46 | I1
 47 | I1
 48 | tp14
 49 | sg4
 50 | I17
 51 | sg5
 52 | I4
 53 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
 54 | p15
 55 | (dp16
 56 | g3
 57 | (I0
 58 | I0
 59 | I0
 60 | I0
 61 | I0
 62 | I0
 63 | tp17
 64 | sg4
 65 | I8
 66 | sg5
 67 | I4
 68 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00'
 69 | p18
 70 | (dp19
 71 | g3
 72 | (tsg4
 73 | I1
 74 | sg5
 75 | I1
 76 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\xe0'
 77 | p20
 78 | (dp21
 79 | g3
 80 | (I0
 81 | I0
 82 | I1
 83 | I1
 84 | I1
 85 | I1
 86 | tp22
 87 | sg4
 88 | I15
 89 | sg5
 90 | I4
 91 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\xe0'
 92 | p23
 93 | (dp24
 94 | g3
 95 | (I0
 96 | I0
 97 | I1
 98 | I0
 99 | I1
100 | I1
101 | tp25
102 | sg4
103 | I12
104 | sg5
105 | I4
106 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 '
107 | p26
108 | (dp27
109 | g3
110 | (I0
111 | I0
112 | I0
113 | I0
114 | I0
115 | I1
116 | tp28
117 | sg4
118 | I9
119 | sg5
120 | I4
121 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80'
122 | p29
123 | (dp30
124 | g3
125 | (I1
126 | tp31
127 | sg4
128 | I3
129 | sg5
130 | I2
131 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
132 | p32
133 | (dp33
134 | g3
135 | (I0
136 | I0
137 | I0
138 | tp34
139 | sg4
140 | I4
141 | sg5
142 | I3
143 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00`'
144 | p35
145 | (dp36
146 | g3
147 | (I0
148 | I0
149 | I0
150 | I0
151 | I1
152 | I1
153 | tp37
154 | sg4
155 | I10
156 | sg5
157 | I4
158 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0'
159 | p38
160 | (dp39
161 | g3
162 | (I1
163 | I1
164 | I1
165 | tp40
166 | sg4
167 | I7
168 | sg5
169 | I3
170 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00`'
171 | p41
172 | (dp42
173 | g3
174 | (I0
175 | I0
176 | I0
177 | I1
178 | I1
179 | I1
180 | tp43
181 | sg4
182 | I11
183 | sg5
184 | I4
185 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@'
186 | p44
187 | (dp45
188 | g3
189 | (I0
190 | I0
191 | I1
192 | tp46
193 | sg4
194 | I5
195 | sg5
196 | I3
197 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`'
198 | p47
199 | (dp48
200 | g3
201 | (I0
202 | I1
203 | I1
204 | I1
205 | I1
206 | I0
207 | tp49
208 | sg4
209 | I16
210 | sg5
211 | I4
212 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0'
213 | p50
214 | (dp51
215 | g3
216 | (I0
217 | I1
218 | I1
219 | tp52
220 | sg4
221 | I6
222 | sg5
223 | I3
224 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`'
225 | p53
226 | (dp54
227 | g3
228 | (I0
229 | I0
230 | I1
231 | I1
232 | I0
233 | I1
234 | tp55
235 | sg4
236 | I14
237 | sg5
238 | I4
239 | ssS'\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0'
240 | p56
241 | (dp57
242 | g3
243 | (I1
244 | I1
245 | I1
246 | I1
247 | I1
248 | I1
249 | tp58
250 | sg4
251 | I18
252 | sg5
253 | I4
254 | ss.


--------------------------------------------------------------------------------
/DGK/datasets/README:
--------------------------------------------------------------------------------
1 | Please refer to the README under the main folder 
2 | for more information on how to obtain the datasets.
3 | 


--------------------------------------------------------------------------------
/DGK/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | # Fill in the name of the dataset
 4 | DS=
 5 | 
 6 | # Run multiple trials 
 7 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
 8 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
 9 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
10 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
11 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100
12 | 


--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n2.p:
--------------------------------------------------------------------------------
 1 | (dp0
 2 | I0
 3 | ccollections
 4 | Counter
 5 | p1
 6 | ((dp2
 7 | I1
 8 | I1
 9 | stp3
10 | Rp4
11 | sI1
12 | g1
13 | ((dp5
14 | I2
15 | I2
16 | sI3
17 | I2
18 | stp6
19 | Rp7
20 | sI2
21 | g1
22 | ((dp8
23 | tp9
24 | Rp10
25 | sI3
26 | g1
27 | ((dp11
28 | tp12
29 | Rp13
30 | s.


--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n3.p:
--------------------------------------------------------------------------------
 1 | (dp0
 2 | I0
 3 | ccollections
 4 | Counter
 5 | p1
 6 | ((dp2
 7 | I1
 8 | I1
 9 | stp3
10 | Rp4
11 | sI1
12 | g1
13 | ((dp5
14 | I2
15 | I2
16 | sI3
17 | I2
18 | stp6
19 | Rp7
20 | sI2
21 | g1
22 | ((dp8
23 | I4
24 | I3
25 | sI5
26 | I2
27 | sI6
28 | I1
29 | stp9
30 | Rp10
31 | sI3
32 | g1
33 | ((dp11
34 | I5
35 | I1
36 | sI6
37 | I2
38 | sI7
39 | I3
40 | stp12
41 | Rp13
42 | sI4
43 | g1
44 | ((dp14
45 | tp15
46 | Rp16
47 | sI5
48 | g1
49 | ((dp17
50 | tp18
51 | Rp19
52 | sI6
53 | g1
54 | ((dp20
55 | tp21
56 | Rp22
57 | sI7
58 | g1
59 | ((dp23
60 | tp24
61 | Rp25
62 | s.


--------------------------------------------------------------------------------
/DGK/graphlet_counter_maps/graphlet_counter_nodebased_n4.p:
--------------------------------------------------------------------------------
  1 | (dp0
  2 | I0
  3 | ccollections
  4 | Counter
  5 | p1
  6 | ((dp2
  7 | I1
  8 | I1
  9 | stp3
 10 | Rp4
 11 | sI1
 12 | g1
 13 | ((dp5
 14 | I2
 15 | I2
 16 | sI3
 17 | I2
 18 | stp6
 19 | Rp7
 20 | sI2
 21 | g1
 22 | ((dp8
 23 | I4
 24 | I3
 25 | sI5
 26 | I2
 27 | sI6
 28 | I1
 29 | stp9
 30 | Rp10
 31 | sI3
 32 | g1
 33 | ((dp11
 34 | I5
 35 | I1
 36 | sI6
 37 | I2
 38 | sI7
 39 | I3
 40 | stp12
 41 | Rp13
 42 | sI4
 43 | g1
 44 | ((dp14
 45 | I8
 46 | I4
 47 | sI9
 48 | I2
 49 | sI10
 50 | I1
 51 | sI12
 52 | I1
 53 | stp15
 54 | Rp16
 55 | sI5
 56 | g1
 57 | ((dp17
 58 | I9
 59 | I2
 60 | sI10
 61 | I2
 62 | sI11
 63 | I3
 64 | sI13
 65 | I4
 66 | sI14
 67 | I2
 68 | sI15
 69 | I1
 70 | stp18
 71 | Rp19
 72 | sI6
 73 | g1
 74 | ((dp20
 75 | I16
 76 | I4
 77 | sI17
 78 | I2
 79 | sI10
 80 | I1
 81 | sI12
 82 | I3
 83 | sI14
 84 | I2
 85 | sI15
 86 | I2
 87 | stp21
 88 | Rp22
 89 | sI7
 90 | g1
 91 | ((dp23
 92 | I17
 93 | I2
 94 | sI18
 95 | I4
 96 | sI11
 97 | I1
 98 | sI15
 99 | I1
100 | stp24
101 | Rp25
102 | sI8
103 | g1
104 | ((dp26
105 | tp27
106 | Rp28
107 | sI9
108 | g1
109 | ((dp29
110 | tp30
111 | Rp31
112 | sI10
113 | g1
114 | ((dp32
115 | tp33
116 | Rp34
117 | sI11
118 | g1
119 | ((dp35
120 | tp36
121 | Rp37
122 | sI12
123 | g1
124 | ((dp38
125 | tp39
126 | Rp40
127 | sI13
128 | g1
129 | ((dp41
130 | tp42
131 | Rp43
132 | sI14
133 | g1
134 | ((dp44
135 | tp45
136 | Rp46
137 | sI15
138 | g1
139 | ((dp47
140 | tp48
141 | Rp49
142 | sI16
143 | g1
144 | ((dp50
145 | tp51
146 | Rp52
147 | sI17
148 | g1
149 | ((dp53
150 | tp54
151 | Rp55
152 | sI18
153 | g1
154 | ((dp56
155 | tp57
156 | Rp58
157 | s.


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGinstance.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #include"FLGinstance.hpp"
28 | 
29 | 
30 | void FLGinstance::precompute(const double gamma){
31 |   if(Sinv.nrows>0) return;
32 | 
33 |   //cout<<"L="<<endl<<L<<endl;
34 |   //cout<<"U="<<endl<<U<<endl;
35 | 
36 |   Cmatrix Linv=invert(L);
37 |   //cout<<"Linv="<<endl<<Linv<<endl;
38 | 
39 |   //int n=Linv.nrows;
40 |   //assert(labels.size()==n);
41 |   //int m=labels[0].n;
42 | 
43 |   //Cmatrix U(n,m);
44 |   //for(int i=0; i<n; i++)
45 |   //  for(int j=0; j<m; j++)
46 |   //    U(i,j)=labels[i](j);
47 |   Cmatrix S=U.dot(Linv*U);
48 |   for(int i=0; i<S.nrows; i++) S(i,i)+=gamma;
49 |   //cout<<"S="<<endl<<S<<endl;
50 | 
51 |   //Sinv=invert(S,0,&detS)*0.5;
52 |   Sinv=invert(S,0,&log_detS)*0.5;
53 |   //cout<<"Sinv="<<endl<<Sinv<<endl;
54 | }
55 | 
56 | 
57 | 
58 | Cmatrix FLGinstance::invert(const Cmatrix& M, const int _maxrank, double* detp) const{
59 | 
60 |   pair<Cmatrix*,Cvector*> eigenp=M.symmetricEigensolver();
61 |   Cmatrix& eigs=*eigenp.first;
62 |   Cvector& lambda=*eigenp.second;
63 |   int n=M.nrows;
64 |   //cout<<"eigs="<<eigs<<endl;
65 | 
66 |   int maxrank=min(_maxrank,n);
67 |   if(maxrank==0) maxrank=n;
68 |   for(int i=0; i<lambda.n-1; i++)
69 |     if(lambda(i+1)<lambda(i)) cout<<"WARNING: eigenvalues not sorted in FLGinstance::invert"<<endl;
70 |   for(int i=0; i<maxrank; i++)
71 |     if(lambda(n-1-i)<10e-5*lambda(n-1)) maxrank=i;
72 |   if(detp != nullptr) maxrank = n;
73 |   Cmatrix R=Cmatrix::Zero(n,n);
74 |   //cout<<"maxrank="<<maxrank<<endl;
75 |   for(int i=0; i<n; i++)
76 |     for(int j=0; j<n; j++)
77 |       for(int p=0; p<maxrank; p++)
78 | 	R(i,j)+=eigs(i,n-1-p)*eigs(j,n-1-p)/lambda(n-1-p);  // CORRECTED
79 | 
80 |   //if(detp!=nullptr){
81 |   //  *detp=1.0; for(int i=0; i<lambda.n; i++) (*detp)*=lambda(i);
82 |   //}
83 |   if(detp!=nullptr){
84 |     *detp=0.0; for(int i=0; i<lambda.n; i++) (*detp)+=log(lambda(i));
85 |   }
86 | 
87 |   delete eigenp.first;
88 |   delete eigenp.second;
89 | 
90 |   return R;
91 | }
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGinstance.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #ifndef _FLGinstance
28 | #define _FLGinstance
29 | 
30 | #include "Cmatrix.hpp"
31 | #include "Cvector.hpp"
32 | 
33 | 
34 | class FLGinstance{
35 | public:
36 | 
37 |   //FLGinstance(Cmatrix&& _L, vector<Cvector>&& _labels): 
38 |   //  L(move(_L)), labels(_labels){};
39 | 
40 |   FLGinstance(){}
41 | 
42 |   FLGinstance(Cmatrix&& _L, Cmatrix&& _U): 
43 |     L(move(_L)), U(move(_U)){};
44 | 
45 | 
46 | public:
47 | 
48 |   void precompute(const double gamma);
49 | 
50 |   bool operator==(const FLGinstance& x) const{
51 |     if(L!=x.L) return false;
52 |     if(U!=x.U) return false;
53 |     //if(labels.size()!=x.labels.size()) return false;
54 |     //if(labels!=x.labels) return false;
55 |     return true;
56 |   }
57 | 
58 |   string str(){
59 |     ostringstream oss; oss<<L<<U<<endl; return oss.str();}
60 | 
61 | private:
62 | 
63 |   Cmatrix invert(const Cmatrix& M, const int _maxrank=0, double* detp=nullptr) const;
64 | 
65 | public:
66 | 
67 |   Cmatrix L;
68 |   //vector<Cvector> labels;
69 |   Cmatrix U;
70 | 
71 |   Cmatrix Sinv; // actually Sinv/2
72 |   //double detS;
73 |   double log_detS;
74 | 
75 |   // Cvector linearization;
76 | 
77 | };
78 | 
79 | 
80 | 
81 | 
82 | namespace std{
83 |   template<>
84 |   class hash<FLGinstance>{
85 |   public:
86 |     size_t operator()(const FLGinstance& x) const{
87 |       size_t h=hash<Cmatrix>()(x.L)^hash<Cmatrix>()(x.U);
88 |       //for(auto& p: G.labels) h=(h<<1)^hash<Cvector>()(p); 
89 |       return h;
90 |     }
91 |   };
92 | };
93 | 
94 | 
95 | #endif
96 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGkernel.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #include"FLGkernel.hpp"
28 | 
29 | 
30 | double FLGkernel::operator()(const FLGinstance& x1, const FLGinstance& x2) const
31 | {
32 | 
33 |   if(x1.Sinv.nrows==0) const_cast<FLGinstance&>(x1).precompute(gamma);
34 |   if(x2.Sinv.nrows==0) const_cast<FLGinstance&>(x2).precompute(gamma);
35 | 
36 |   Cvector lambda=(x1.Sinv+x2.Sinv).eigenvalues();
37 |   //double detS=1; for(int i=0; i<lambda.n; i++) detS*=lambda(i); detS=1.0/detS; 
38 |   double log_detS=0; for(int i=0; i<lambda.n; i++) log_detS-=log(lambda(i)); 
39 | 
40 |   //double r=sqrt(detS/sqrt(x1.detS*x2.detS));
41 |   double logr=(log_detS-0.5*(x1.log_detS+x2.log_detS))/2;
42 |   double r=0;
43 |   if(logr<-30){cout<<"Underflow!"<<endl;} 
44 |   else r=exp(logr);
45 |   /**
46 |   cout << "x1.sinv" << endl;
47 |   cout << x1.Sinv << endl;
48 | 
49 |   cout << "x2.sinv" << endl;
50 |   cout << x2.Sinv << endl;
51 | 
52 |   cout << "x1.logdets" << endl;
53 |   cout << x1.log_detS << endl;
54 | 
55 |   cout << "x2.logdets" << endl;
56 |   cout << x2.log_detS << endl;
57 |   cout << "log dets" << endl;
58 |   cout <<  log_detS << endl;
59 |   **/
60 |   //cout<<"k(.,.)="<<r<<endl;
61 |   return r;
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/FLGkernel.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #ifndef _FLGkernel
28 | #define _FLGkernel
29 | 
30 | #include "FLGinstance.hpp"
31 | #include "MLGgraph.hpp"
32 | #include "Kernel.hpp"
33 | 
34 | 
35 | 
36 | class FLGkernel: public Kernel<FLGinstance>{
37 | public:
38 | 
39 |   FLGkernel(const double _gamma): gamma(_gamma){}
40 | 
41 |   double operator()(const FLGinstance& x1, const FLGinstance& x2) const;
42 | 
43 |   double operator()(const MLGgraph& x1, const MLGgraph& x2) const{return (*this)(x1.flg,x2.flg);};
44 | 
45 | public:
46 | 
47 |   double gamma=0.1;
48 | 
49 | };
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/Kernel.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #ifndef _Kernel
28 | #define _Kernel
29 | 
30 | #include "pMMFbase.hpp"
31 | 
32 | 
33 | template<class TYPE>
34 | class Kernel{
35 | public:
36 | 
37 |   virtual double operator()(const TYPE& x1, const TYPE& x2) const =0;
38 | 
39 | };
40 | 
41 | 
42 | 
43 | 
44 | #endif
45 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLG_base.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #include "pMMFbase.hpp"
28 | 
29 | 
30 | typedef Cvector VertexFeatures;
31 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLGdataset.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #ifndef _MLGdataset
28 | #define _MLGdataset
29 | 
30 | #include "MLGgraph.hpp"
31 | #include <string>
32 | 
33 | class MLGdataset{
34 | public:
35 | 
36 |   MLGdataset(){}
37 |   MLGdataset(const std::string filename, double eta, double gamma, bool grow): gamma(gamma), grow(grow), eta(eta){
38 |     loadGraphs(filename);
39 |   }
40 |   ~MLGdataset() {for(auto p:graphs) delete p;}
41 | 
42 | public:
43 | 
44 |   void condense(const int nlevels, const int leaf_radius=2);
45 |   void computeGram(const int levels, const int radius);
46 | 
47 | public:
48 | 
49 |   void loadGraphs(std::string filename);
50 |   void loadDiscreteFeatures(std::string filename, int numFeatures);
51 |   void loadFeatures(std::string filename);
52 |   void saveGram(std::string filename);
53 |   void fillGram(double *npmatrix, int rows, int cols);
54 | 
55 | public:
56 | 
57 |   vector<MLGgraph*> graphs;
58 |   double gamma; // regularizer constant
59 |   double eta; // regularizer constant
60 |   int levels;
61 |   int radius;
62 |   bool grow; // 1 to grow by the leaf radius, 0 to double
63 |   Cmatrix gram;
64 | };
65 | 
66 | #endif
67 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/MLGgraph.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  8 |  
  9 |  
 10 |  This program is free software; you can redistribute it and/or
 11 |  modify it under the terms of the GNU General Public License
 12 |  as published by the Free Software Foundation; either version 2
 13 |  of the License, or (at your option) any later version.
 14 |  
 15 |  This program is distributed in the hope that it will be useful,
 16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 18 |  GNU General Public License for more details.
 19 |  
 20 |  You should have received a copy of the GNU General Public License
 21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
 22 |  
 23 |  ----------------------------------------------------------------------------- */
 24 | 
 25 | 
 26 | 
 27 | #ifndef _MLGgraph
 28 | #define _MLGgraph
 29 | 
 30 | #include <unordered_set>
 31 | 
 32 | #include "Cmatrix.hpp"
 33 | #include "Graph.hpp"
 34 | #include "FLGinstance.hpp"
 35 | #include "Linearizer.hpp"
 36 | 
 37 | 
 38 | class MLGgraph{
 39 | public:
 40 | 
 41 |   MLGgraph(const MLGgraph& x): n(x.n), adj(x.adj.copy()), labels(x.labels.size()){
 42 |     for(int i=0; i<x.labels.size(); i++) labels[i]=x.labels[i].copy(); init();}
 43 |   
 44 | 
 45 | public:
 46 | 
 47 |   MLGgraph(Cmatrix&& _adj): adj(move(_adj)){
 48 |     n=adj.nrows; labels=vector<Cvector>(n); for(auto& p:labels) p=Cvector::Filled(1,0); init();
 49 |   }
 50 |   MLGgraph(Graph<Cmatrix>&& G){
 51 |     n=G.n; adj=move(G.adj); labels=vector<Cvector>(n); init();
 52 |   }
 53 |   MLGgraph& operator=(Graph<Cmatrix>&& G){
 54 |     n=G.n; adj=move(G.adj); labels=vector<Cvector>(n); init(); return *this;
 55 |   }
 56 |   
 57 | public:
 58 | 
 59 |   void grow_subgraphs(const int radius);
 60 |   void double_subgraphs();
 61 |   void push_to_linearizer(Linearizer<FLGinstance>& linearizer, double eta);
 62 |   void pull_features();
 63 |   void compute_flg();
 64 | 
 65 |   void computeDegreeFeatures(const int maxdeg);
 66 | 
 67 |   string str() const;
 68 | 
 69 | private:
 70 |   
 71 |   void init();
 72 |   Cmatrix subLaplacian(const vector<int>& vset, double eta) const;
 73 |   Cmatrix FloydWarshall(const Cmatrix& A) const;
 74 | 
 75 | public:
 76 | 
 77 |   int n;
 78 |   Cmatrix adj;
 79 |   vector<Cvector> labels;
 80 | 
 81 |   vector< vector<int> > neighbors;
 82 |   vector< unordered_set<int> > subgraphs;
 83 |   vector<Lwrapper<FLGinstance>*> subinstances;
 84 | 
 85 |   FLGinstance flg;
 86 |   
 87 |   Cmatrix dist;
 88 |   
 89 | 
 90 | };
 91 | 
 92 | 
 93 | namespace std{
 94 | template<>
 95 | class hash< Hwrapper<FLGinstance> >{
 96 | public:
 97 |   size_t operator()(const Hwrapper<FLGinstance>& x) const{
 98 |     return hash<FLGinstance>()(*x.ptr);}
 99 | };
100 | };
101 | 
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/Makefile:
--------------------------------------------------------------------------------
 1 | ROOTDIR= ../
 2 | include $(ROOTDIR)/Makefile.base
 3 | 
 4 | CFLAGS= -std=c++11 -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(FILETYPESDIR) -I $(MATRIXDIR) -fPIC
 5 | LIBS= -lstdc++ -lm -lpthread
 6 | 
 7 | ALLOBJECTS= $(UTILITYDIR)/*.o $(MATRIXDIR)/*.o  *.o    #$(BLOCKEDDIR)/*.o 
 8 | 
 9 | FLGinstance.o: FLGinstance.hpp FLGinstance.cpp
10 | 	$(CC) -c FLGinstance.cpp $(CFLAGS)
11 | 
12 | FLGkernel.o: FLGkernel.hpp FLGkernel.cpp FLGinstance.hpp
13 | 	$(CC) -c FLGkernel.cpp $(CFLAGS)
14 | 
15 | MLGgraph.o: MLGgraph.hpp MLGgraph.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp
16 | 	$(CC) -c MLGgraph.cpp $(CFLAGS)
17 | 
18 | MLGdataset.o: MLGdataset.hpp MLGdataset.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp MLGgraph.hpp
19 | 	$(CC) -c MLGdataset.cpp $(CFLAGS)
20 | 
21 | runMLG: runMLG.cpp params.hpp MLGgraph.o MLGdataset.o FLGinstance.o
22 | 	$(CC) -o runMLG runMLG.cpp $(CFLAGS) $(ALLOBJECTS) $(LIBS)
23 | 
24 | all: FLGinstance.o FLGkernel.o MLGgraph.o MLGdataset.o runMLG
25 | 
26 | clean:
27 | 	@rm -f runMLG *.o
28 | 
29 | anew: clean all
30 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/RandomSelection.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #ifndef _RandomSelection
28 | #define _RandomSelection
29 | 
30 | #include <unordered_set>
31 | 
32 | 
33 | #include "Activemap.hpp"
34 | #include "pMMFbase.hpp"
35 | 
36 | extern default_random_engine randomNumberGenerator;
37 | 
38 | 
39 | class RandomSelection: public vector<int>{
40 | public:
41 | 
42 |   RandomSelection(const int k, const int n): vector<int>(k){
43 |     assert(k<=n);
44 | 
45 |     if(k<0.3*n){
46 |       uniform_int_distribution<int> distri(0,n-1);
47 |       for(int i=0; i<k; i++){
48 | 	int x; while(selected.find(x=distri(randomNumberGenerator))!=selected.end()){}
49 | 	(*this)[i]=x;
50 | 	selected.insert(x);
51 |       }
52 |       return;
53 |     }
54 |     
55 |     Activemap amap(n);
56 |     for(int i=0; i<k; i++){
57 |       uniform_int_distribution<int> distri(0,n-i);
58 |       int j=amap(distri(randomNumberGenerator));
59 |       amap.remove(j);
60 |       (*this)[i]=j;
61 |     }
62 |       
63 |   }
64 | 
65 | public:
66 | 
67 |   unordered_set<int> selected;
68 |   //Activemap activemap;
69 | 
70 | };
71 | 
72 | #endif
73 | 
74 | 
75 |       //do{x=distri(randomNumberGenerator);
76 |       //}while(selected.find(x)!=selected.end());
77 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/params.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 8 |  
 9 |  
10 |  This program is free software; you can redistribute it and/or
11 |  modify it under the terms of the GNU General Public License
12 |  as published by the Free Software Foundation; either version 2
13 |  of the License, or (at your option) any later version.
14 |  
15 |  This program is distributed in the hope that it will be useful,
16 |  but WITHOUT ANY WARRANTY; without even the implied warranty of
17 |  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 |  GNU General Public License for more details.
19 |  
20 |  You should have received a copy of the GNU General Public License
21 |  along with this program; if not, see <http://www.gnu.org/licenses/>.
22 |  
23 |  ----------------------------------------------------------------------------- */
24 | 
25 | 
26 | 
27 | #include<iostream>
28 | #include<string>
29 | using namespace std;
30 | 
31 | class Params{
32 | /**
33 | A simple object that holds all the parameters necessary for the MLGkernel including
34 | file paths for where the dataset and dataset features are stored and file path to
35 | save the resulting gram matrix. 
36 | **/
37 |   public:
38 |     // Constructer that inits the model variables.
39 |     // set the data, feature and save paths separately.
40 |     Params(double e, double g, int r, int l, int t, bool b): 
41 |       eta(e), gamma(g), radius(r), levels(l), num_threads(t), grow_or_double(b) {}
42 | 
43 |   public:
44 |     void set_paths(string data, string features){
45 |       data_path = data; 
46 |       features_path = features; 
47 |     }
48 | 
49 |     void set_save_path(string save){
50 |       save_path = save;
51 |     }
52 | 
53 |     void show() {
54 |       cout << "Current parameter settings:" << endl;
55 |       cout << "    -eta            : "          << eta <<endl;
56 |       cout << "    -gamma          : "          << gamma<<endl;
57 |       cout << "    -radius         : "          << radius<<endl;
58 |       cout << "    -levels         : "          << levels<<endl;
59 |       cout << "    -grow_or_double : "          ; 
60 |       cout << ((grow_or_double == GROW) ? "grow" : "double") <<endl;
61 |       cout << "    -data_path      : "          << data_path<<endl;
62 |       cout << "    -features_path  : "          << features_path<<endl;
63 |       cout << "    -save_path      : "          << save_path<<endl;
64 |       cout << "    -num_threads    : "          << num_threads<<endl;
65 |     }
66 |   public:
67 |     static const bool GROW = 1; // subgraphs increase by given radius size at every level
68 |     static const bool DOUBLE = 0; // subgraphs double at each level
69 | 
70 |     // MLGkernel model parameter defaults.
71 |     double eta = 0.1;
72 |     double gamma = 0.01;
73 |     int radius = 1;
74 |     int levels = 2;
75 |     bool grow_or_double = GROW;
76 | 
77 |     // Input/Output file locations 
78 |     string data_path;
79 |     string features_path;
80 |     string save_path;
81 | 
82 |     int num_threads = 1; 
83 | };
84 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/runMLG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunfanyunn/graph-classification/0d9b60102b6456cdc0607b43c8852d860b2f53c0/MLGkernel/MLGkernel/runMLG


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/swig/MLGK.cpp:
--------------------------------------------------------------------------------
 1 | #include "ThreadManager.hpp"
 2 | #include "pMMFbase.hpp"
 3 | #include "Vectorv.hpp"
 4 | #include "Vectorl.hpp"
 5 | #include "Vectorh.hpp"
 6 | 
 7 | std::default_random_engine randomNumberGenerator;
 8 | bool multithreading=true;
 9 | ThreadManager threadManager(4);
10 | char strbuffer[255];
11 | mutex CoutLock::mx;
12 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/swig/MLGK.i:
--------------------------------------------------------------------------------
 1 | %module MLGK
 2 | %include "std_string.i"
 3 | %{
 4 |   #define SWIG_FILE_WITH_INIT
 5 |   #include "../params.hpp"
 6 |   #include "../MLGdataset.hpp"
 7 |   using namespace std;
 8 | %}
 9 | 
10 | %include "numpy.i"
11 | %init
12 | %{
13 | import_array();
14 | %}
15 | 
16 | %apply (double* INPLACE_ARRAY2, int DIM1, int DIM2) {(double *npmatrix, int rows, int cols)};
17 | %include "../params.hpp"
18 | %include "../MLGdataset.hpp"
19 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/swig/Makefile:
--------------------------------------------------------------------------------
 1 | # Adjust accordingly
 2 | ROOTDIR=/home/hopan/MLGkernel
 3 | include ../../Makefile.base
 4 | 
 5 | 
 6 | CC=g++
 7 | CFLAGS= -std=c++11 -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(FILETYPESDIR) -I $(MATRIXDIR) 
 8 | 
 9 | MKLVARS+= -lmkl_rt
10 | MLGDIR=/home/hopan/MLGkernel/MLGkernel
11 | ALLOBJECTS=  $(MATRIXDIR)/*.o  $(MLGDIR)/*.o $(UTILITYDIR)/*.o
12 | 
13 | # These need to be set according to your python location/distribution
14 | PYINCLUDE= -I/software/python-3.4-2015q1-el6-x86_64/include/python3.4m \
15 | -I/software/python-3.4-2015q1-el6-x86_64/lib/python3.4/site-packages/numpy/core/include
16 | PYLIBS= -L/Library/Frameworks/Python.framework/Versions/3.4/lib -ldl -framework CoreFoundation -lpython3.4m
17 | 
18 | MLGK_wrap.cxx: MLGK.i $(MLGDIR)/*.hpp $(MATRIXDIR)/*.hpp $(UTILITYDIR)/*.hpp
19 | 	swig -python -shadow -c++ MLGK.i
20 | 
21 | _MLGK.so: MLGK_wrap.cxx MLGK.cpp
22 | 	$(CC) -fPIC -shared MLGK_wrap.cxx MLGK.cpp  \
23 | -o _MLGK.so $(CFLAGS) $(PYINCLUDE) $(ALLOBJECTS) -I $(INCLUDEDIR)
24 | 
25 | all: MLGK_wrap.cxx _MLGK.so
26 | 
27 | clean: 
28 | 	rm -f $(OBJECTS)  MLGK_wrap.cxx _MLGK.so
29 | 
30 | anew: clean all
31 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/swig/README.txt:
--------------------------------------------------------------------------------
 1 | Run 'make all' at the top level of the MLGkernel directory.
 2 | Then in this directory("MLGkernel/MLGkernel/swig/"), run make all.
 3 | This will create the _MLGK.so shared object file as well as
 4 | the MLGK.py file.
 5 | 
 6 | In python you should then be able to stuff like:
 7 | import MLGK
 8 | lvls = 2
 9 | radius = 2
10 | m = MLGK.MLGdataset()
11 | m.loadGraphs("sampledata.txt")
12 | m.computeGram(lvls, radius)
13 | m.saveGram("savefile.txt")
14 | 
15 | 
16 | See test.py for further details.
17 | 
18 | NOTE: You will have to change some of the macros in the Makefile here(to change
19 | the location of where the Python.h file is stored - it should probably be
20 | somewhere like /usr/include/python3.x/. Also, in the top level directory inside
21 | Makefile.options, you need to specify where the eigen library is located, as well
22 | as the C++ compiler(IE: clang/g++/etc).
23 | 


--------------------------------------------------------------------------------
/MLGkernel/MLGkernel/swig/test.py:
--------------------------------------------------------------------------------
 1 | from MLGK import *
 2 | import numpy as np
 3 | import pdb
 4 | import os.path as path
 5 | import sys
 6 | 
 7 | fname =  path.abspath(path.join(__file__ ,"../../../data/MUTAG.txt"))
 8 | save_name = "m.txt"
 9 | num_graphs = 188
10 | eta = 0.01
11 | gamma= 0.001
12 | grow = True
13 | radius = 2
14 | levels = 2
15 | 
16 | m = MLGdataset(fname, eta, gamma, grow)
17 | m.computeGram(levels, radius)
18 | m.saveGram(save_name)
19 | 
20 | # fill a numpy matrix. Note: need to call compute gram
21 | # before calling fillGram
22 | gram = np.zeros((num_graphs, num_graphs))
23 | m.fillGram(gram)
24 | pdb.set_trace()
25 | 


--------------------------------------------------------------------------------
/MLGkernel/Makefile:
--------------------------------------------------------------------------------
 1 | ROOTDIR= .
 2 | include $(ROOTDIR)/Makefile.base
 3 | 
 4 | 
 5 | 
 6 | SUBDIRS = utility matrices MLGkernel MLGkernel/swig # tests
 7 | 
 8 | .PHONY: all objects tests clean $(SUBDIRS)
 9 | 
10 | intro:
11 | 	@echo; echo "\033[1mCompiling MLGkernel system... \033[00m"; echo 
12 | ifdef EIGENDIR
13 | 	@echo "Linear algebra with Eigen:            enabled"
14 | else
15 | 	@echo "Linear algebra with Eigen:           disabled"
16 | endif
17 | 
18 | 
19 | objects:
20 | 	@for dir in $(SUBDIRS); do echo; echo "\033[1m *** Making objects in $$dir directory *** \033[00m"; $(MAKE) -C $$dir objects; done
21 | 
22 | all: intro
23 | 	@for dir in $(SUBDIRS); do echo; echo "\033[1m *** Making all in $$dir directory *** \033[00m"; $(MAKE) -C $$dir all; done; echo "\n\n" 
24 | 
25 | clean:
26 | 	@for dir in $(SUBDIRS); do $(MAKE) -C $$dir clean; done
27 | 
28 | 
29 | # declare dependencies among subdirectories
30 | filetypes: utility
31 | matrices: filetypes
32 | 
33 | anew: clean all 
34 | 


--------------------------------------------------------------------------------
/MLGkernel/Makefile.base:
--------------------------------------------------------------------------------
 1 | include $(ROOTDIR)/Makefile.options
 2 | 
 3 | BASEDIR= $(ROOTDIR)
 4 | INCLUDEDIR= $(ROOTDIR)/include
 5 | UTILITYDIR= $(ROOTDIR)/utility
 6 | FILETYPESDIR= $(ROOTDIR)/utility/filetypes
 7 | MATRIXDIR= $(ROOTDIR)/matrices
 8 | 
 9 | ifndef CFLAGS
10 |   CFLAGS=
11 | endif
12 | CFLAGS+= -std=c++11
13 | 
14 | ifdef EIGENDIR
15 |   MACROS+= -D_withEigen
16 | endif
17 | 


--------------------------------------------------------------------------------
/MLGkernel/Makefile.options:
--------------------------------------------------------------------------------
 1 | ############################################
 2 | ##        Options for pMMF build          ##	
 3 | ############################################
 4 | 
 5 | ## Set the C++ compiler 
 6 | CC= g++
 7 | 
 8 | ## Path to MATLAB installation 
 9 | #MATLABDIR= /Applications/MATLAB_R2015b.app/
10 | MATLABDIR= /usr/local/bin/matlab
11 | 
12 | ############################################
13 | ##   Paths for linear algebra packages    ##
14 | ############################################
15 | 
16 | ## path to Eigen
17 | EIGENDIR= /usr/local/include/eigen3/
18 | 


--------------------------------------------------------------------------------
/MLGkernel/README.md:
--------------------------------------------------------------------------------
 1 | # The Multiscale Laplacian Graph Kernel
 2 | 
 3 | This is a C++ implementation of the Multiscale Graph Laplacian kernel as described in:
 4 |  
 5 | R. Kondor, H. Pan, [The Multiscale Graph Laplacian](https://arxiv.org/abs/1603.06186) (2016)
 6 | 
 7 | ## Requirements
 8 | * C++11
 9 | * [Eigen](http://eigen.tuxfamily.org/index.php)
10 | 
11 | ## Installation/Setup
12 | Change the EIGENDIR variable Makefile.options to the path to your installation of
13 | the Eigen library. Run the following command to create the runMLG executable in the MLGkernel directory.
14 | ```bash
15 | make all
16 | ```
17 | 
18 | ## Run the demo
19 | After compiling, you can run the sample.sh script, which will run the MLGkernel
20 | on the sample datasets in the data directory and save the resulting kernel matrices in data/results.
21 | ```bash
22 | sh sample.sh
23 | ```
24 | 
25 | ## Content
26 | To understand the code, the main files worth skimming through are in the MLGkernel directory:
27 | MLGdataset, Linearizer, FLGinstance, and FLGkernel.
28 | See MLGkernel/runMLG.cpp for an example of how to load data/feature files and
29 | how to compute/save the MLG kernel matrix.
30 | 
31 | ## Data Format
32 | In order to use your own data, you have to provide the data via
33 | * a text file in the following format: the first line is the number of graphs in the dataset
34 | The next line is the number of vertices in first graph, the next {size of first graph} lines
35 | is the space delimited adjacency matrix of the first graph. And so on for the rest of the graphs.
36 | See data/MUTAG.txt for an example of this format.
37 | * a text file containing a discrete node label of each graph for each graph in the dataset.
38 | The first line is the number of graphs in the dataset. The second line is the size of the first
39 | graph in the dataset. The next {size of first graph} lines denote the node labels of the vertices
40 | of the first graph. And so on for the rest of the graphs.
41 | See data/MUTAG_nodelabels.txt for an example of this data format.
42 | 
43 | Note that if no feature file is given for the runMLG
44 | executable, it will default to using vertex degrees as node labels.
45 | 
46 | ## Contact
47 | If you have any questions/comments/concerns, please contact hopan[at]uchicago.edu. In particular,
48 | it would be fantastic if you could report any bugs!
49 | 


--------------------------------------------------------------------------------
/MLGkernel/data_utils.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | import scipy as sc
  4 | import os
  5 | import re
  6 | 
  7 | 
  8 | def read_graphfile(datadir, dataname, max_nodes=None):
  9 |     ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
 10 |         graph index starts with 1 in file
 11 | 
 12 |     Returns:
 13 |         List of networkx objects with graph and node labels
 14 |     '''
 15 |     prefix = os.path.join(datadir, dataname, dataname)
 16 |     filename_graph_indic = prefix + '_graph_indicator.txt'
 17 |     # index of graphs that a given node belongs to
 18 |     graph_indic={}
 19 |     with open(filename_graph_indic) as f:
 20 |         i=1
 21 |         for line in f:
 22 |             line=line.strip("\n")
 23 |             graph_indic[i]=int(line)
 24 |             i+=1
 25 | 
 26 |     filename_nodes=prefix + '_node_labels.txt'
 27 |     node_labels=[]
 28 |     try:
 29 |         with open(filename_nodes) as f:
 30 |             for line in f:
 31 |                 line=line.strip("\n")
 32 |                 node_labels+=[int(line)]
 33 |         num_unique_node_labels = max(node_labels) + 1
 34 |     except IOError:
 35 |         print('No node labels')
 36 |  
 37 |     filename_node_attrs=prefix + '_node_attributes.txt'
 38 |     node_attrs=[]
 39 |     try:
 40 |         with open(filename_node_attrs) as f:
 41 |             for line in f:
 42 |                 line = line.strip("\s\n")
 43 |                 attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
 44 |                 node_attrs.append(np.array(attrs))
 45 |     except IOError:
 46 |         pass
 47 |         # print('No node attributes')
 48 |        
 49 |     label_has_zero = False
 50 |     filename_graphs=prefix + '_graph_labels.txt'
 51 |     graph_labels=[]
 52 |     with open(filename_graphs) as f:
 53 |         for line in f:
 54 |             line=line.strip("\n")
 55 |             val = int(line)
 56 |             if val == 0:
 57 |                 label_has_zero = True
 58 |             graph_labels.append(val - 1)
 59 |     graph_labels = np.array(graph_labels)
 60 |     if label_has_zero:
 61 |         graph_labels += 1
 62 |     
 63 |     filename_adj=prefix + '_A.txt'
 64 |     adj_list={i:[] for i in range(1,len(graph_labels)+1)}    
 65 |     index_graph={i:[] for i in range(1,len(graph_labels)+1)}
 66 |     num_edges = 0
 67 |     with open(filename_adj) as f:
 68 |         for line in f:
 69 |             line=line.strip("\n").split(",")
 70 |             e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
 71 |             adj_list[graph_indic[e0]].append((e0,e1))
 72 |             index_graph[graph_indic[e0]]+=[e0,e1]
 73 |             num_edges += 1
 74 |     for k in index_graph.keys():
 75 |         index_graph[k]=[u-1 for u in set(index_graph[k])]
 76 | 
 77 |     graphs=[]
 78 |     for i in range(1,1+len(adj_list)):
 79 |         # indexed from 1 here
 80 |         G=nx.from_edgelist(adj_list[i])
 81 |         if max_nodes is not None and max_nodes and G.number_of_nodes() > max_nodes:
 82 |             continue
 83 |       
 84 |         # add features and labels
 85 |         G.graph['label'] = graph_labels[i-1]
 86 |         for u in G.nodes():
 87 |             if len(node_labels) > 0:
 88 |                 node_label_one_hot = [0] * num_unique_node_labels
 89 |                 node_label = node_labels[u-1]
 90 |                 node_label_one_hot[node_label] = 1
 91 |                 G.node[u]['label'] = node_label_one_hot
 92 |             if len(node_attrs) > 0:
 93 |                 G.node[u]['feat'] = node_attrs[u-1]
 94 |         if len(node_attrs) > 0:
 95 |             G.graph['feat_dim'] = node_attrs[0].shape[0]
 96 | 
 97 |         # relabeling
 98 |         mapping={}
 99 |         it=0
100 |         if float(nx.__version__)<2.0:
101 |             for n in G.nodes():
102 |                 mapping[n]=it
103 |                 it+=1
104 |         else:
105 |             for n in G.nodes:
106 |                 mapping[n]=it
107 |                 it+=1
108 |             
109 |         # indexed from 0
110 |         graphs.append(nx.relabel_nodes(G, mapping))
111 | 
112 | 
113 |     np.random.shuffle(graphs)
114 |     #idx = np.random.RandomState(seed=2).permutation(len(graphs))
115 |     #graphs = [graphs[i] for i in idx]
116 | 
117 |     return graphs
118 | 
119 | 


--------------------------------------------------------------------------------
/MLGkernel/evaluate_embedding.py:
--------------------------------------------------------------------------------
 1 | from data_utils import read_graphfile
 2 | import numpy as np
 3 | import pandas as pd
 4 | import os
 5 | import sys
 6 | 
 7 | from sklearn.model_selection import cross_val_score
 8 | from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold
 9 | from sklearn.svm import SVC, LinearSVC
10 | from sklearn.linear_model import LogisticRegression
11 | from sklearn.ensemble import RandomForestClassifier
12 | from sklearn import preprocessing
13 | from sklearn.metrics import accuracy_score
14 | from sklearn.manifold import TSNE
15 | 
16 | def evaluate_embedding(embeddings, labels):
17 | 
18 |     labels = preprocessing.LabelEncoder().fit_transform(labels)
19 |     x, y = np.array(embeddings), np.array(labels)
20 |     print(x.shape, y.shape)
21 |     
22 |     kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
23 |     accuracies = []
24 |     for train_index, test_index in kf.split(x, y):
25 | 
26 |         x_train, x_test = x[train_index], x[test_index]
27 |         y_train, y_test = y[train_index], y[test_index]
28 |         search=True
29 |         if search:
30 |             params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
31 |             classifier = GridSearchCV(SVC(), params, cv=5, scoring='accuracy', verbose=0)
32 |         else:
33 |             classifier = SVC(C=10)
34 |         classifier.fit(x_train, y_train)
35 |         accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
36 | 
37 |     svm_accuracies = np.mean(accuracies)
38 | 
39 |     accuracies = []
40 |     for train_index, test_index in kf.split(x, y):
41 | 
42 |         x_train, x_test = x[train_index], x[test_index]
43 |         y_train, y_test = y[train_index], y[test_index]
44 |         search=True
45 |         if search:
46 |             params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
47 |             classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0)
48 |         else:
49 |             classifier = LinearSVC(C=10)
50 |         classifier.fit(x_train, y_train)
51 |         accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
52 |     print('LinearSvc', np.mean(accuracies))
53 |     print('svc', svm_accuracies)
54 | 
55 | # def get_mutag():
56 |     # emb = []
57 |     # with open('data/results/output.txt', 'r') as f:
58 |         # for line in f:
59 |             # emb.append(list(map(float, [x for x in line.strip().split()])))
60 | 
61 |     # ret = []
62 |     # for i in range(188):
63 |         # with open('./data/mutag/mutag_{}.graph'.format(i+1), 'r') as f:
64 |             # x = f.readlines()
65 |         # ret.append(int(x[-1].strip()))
66 |     # return emb, ret
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     # x, y = get_mutag()
71 |     emb = []
72 |     with open('data/results/{}_output.txt'.format(sys.argv[1]), 'r') as f:
73 |         for line in f:
74 |             emb.append(list(map(float, [x for x in line.strip().split()])))
75 | 
76 |     with open('../data/{}_label.txt'.format(sys.argv[1]), 'r') as f:
77 |         y = f.readlines()
78 |     y = [int(x.strip()) for x in y]
79 |     
80 |     evaluate_embedding(emb, y)
81 | 


--------------------------------------------------------------------------------
/MLGkernel/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | # Run the MLG kernel on the MUTAG dataset with parameters:
 3 | # radius = 1
 4 | # levels = 2
 5 | # eta = 0.1
 6 | # gamma = 0.01
 7 | # num threads = 32
 8 | # grow = 1 # if you want the subgraphs to double in size at each level, set this equal to 0
 9 | 
10 | # Replace MUTAG with the dataset name of your choice(PTC/PROTEINS/NCI1/NCI109).
11 | BASE=`pwd`
12 | dset=$1
13 | data=$BASE/../data/$dset.txt
14 | feats=$BASE/..//data/$dset\_nodelabels.txt
15 | save=$BASE//data/results/output.txt
16 | mkdir -p $BASE/data/results/
17 | 
18 | ~/ENV/bin/python3 preprocess.py $dset
19 | 
20 | for r in 1 2 3 4
21 | do 
22 |   for l in 1 2 3 4
23 |   do
24 |     for g in 0.01 0.1 1
25 |     do
26 |       for e in 0.01 0.1 1
27 |       do
28 | 
29 |   cd MLGkernel
30 |   ./runMLG -d $data -f $feats -s $save -r $r -l $l -e $e -g $g -t 32 -m 1
31 |   cd ../
32 |   ~/ENV/bin/python3 evaluate_embedding.py $dset >> $dset.log
33 |       done
34 |     done
35 |   done
36 | done
37 | 


--------------------------------------------------------------------------------
/MLGkernel/include/pMMFglobal.inc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "pMMFbase.hpp" 
19 | #include "Vectorv.hpp"
20 | #include "Vectorl.hpp"
21 | #include "Vectorh.hpp"
22 | #include "ThreadManager.hpp"
23 | 
24 | bool multithreading=true;
25 | ThreadManager threadManager(4);
26 | mutex cout_mutex;
27 | mutex CoutLock::mx;
28 | 
29 | std::default_random_engine randomNumberGenerator;
30 | 
31 | FIELD Vectorv::dummyZero=0;
32 | FIELD Vectorl::dummyZero=0;
33 | FIELD Vectorh::dummyZero=0;
34 | 
35 | char strbuffer[255];
36 | 
37 | Log mlog;
38 | 
39 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Activemap.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "Activemap.hpp"
19 | 
20 | extern default_random_engine randomNumberGenerator;
21 | 
22 | 
23 | int Activemap::random(){
24 |   uniform_int_distribution<int> distri(0,nactive-1);
25 |   return forward[distri(randomNumberGenerator)];
26 | }
27 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Activemap.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _Activemap
19 | #define _Activemap
20 | 
21 | #include "Remap.hpp"
22 | 
23 | class Activemap: public Remap{
24 | public:
25 | 
26 |   Activemap(const int n=1): Remap(n), nactive(n){}
27 | 
28 | public:
29 | 
30 |   int random();
31 | 
32 |   void remove(const int i){
33 |     if(backward[i]!=nactive-1) swap(backward[i],nactive-1); 
34 |     nactive--; 
35 |    }
36 | 
37 |   bool isactive(const int i) const {return(backward[i]<nactive);}
38 | 
39 | public:
40 | 
41 |   int nactive=0;
42 |   
43 | };
44 | 
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/AtomicCmatrix.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _AtomicCmatrix
19 | #define _AtomicCmatrix
20 | 
21 | #include "Cmatrix.hpp"
22 | 
23 | class AtomicCmatrix: public Cmatrix{
24 | public:
25 | 
26 |   using Cmatrix::Cmatrix;
27 | 
28 |   //Cmatrix(const Cmatrix& x); // these will need to be implemented 
29 |   //Cmatrix(Cmatrix&& x);
30 |   //Cmatrix& operator=(const Cmatrix& x);
31 |   //Cmatrix& operator=(Cmatrix&& x);
32 |   //~Cmatrix(){delete[] array;}
33 |   
34 | public:
35 | 
36 | public: // in-place operations 
37 | 
38 |   Cmatrix& operator+=(const Cmatrix& x){
39 |     lock_guard<mutex> lock(mx);
40 |     assert(x.nrows==nrows); assert(x.ncols==ncols);
41 |     for(int i=0; i<nrows*ncols; i++) array[i]+=x.array[i]; return *this;}
42 | 
43 | 
44 | public:
45 | 
46 |   mutex mx;
47 | 
48 | };
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Cvector.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | 
 18 | #include "Cvector.hpp"
 19 | #include "Vectorv.hpp"
 20 | #include "Vectorl.hpp"
 21 | #include "Vectorh.hpp"
 22 | 
 23 | #ifdef _withEigen
 24 | #include "EigenInterface.hpp"
 25 | #endif
 26 | 
 27 | extern default_random_engine randomNumberGenerator;
 28 | 
 29 | 
 30 | /*
 31 | Cvector::Cvector(const int _n, const class Random& dummy): DenseVector(_n) {
 32 |   array=new FIELD[n]; 
 33 |   uniform_real_distribution<FIELD> distr;
 34 |   for(int i=0; i<n; i++) array[i]=distr(randomNumberGenerator);
 35 | }
 36 | */
 37 | 
 38 | Cvector Cvector::Random(const int n){
 39 |   Cvector v(n);
 40 |   uniform_real_distribution<FIELD> distr;
 41 |   for(int i=0; i<n; i++) v.array[i]=distr(randomNumberGenerator);
 42 |   return v;
 43 | }
 44 | 
 45 | Cvector::Cvector(const initializer_list<FIELD> list): DenseVector(list.size()){
 46 |   array=new FIELD[n]; int i=0; for(FIELD v:list) array[i++]=v;
 47 | }
 48 | 
 49 | 
 50 | Cvector::Cvector(const int _n, const FIELD* _array): DenseVector(_n){
 51 |   array=new FIELD[n]; for(int i=0; i<n; i++) array[i]=_array[i];
 52 | }
 53 | 
 54 | 
 55 | 
 56 | // ---- Conversions ---------------------------------------------------------------------------------------------
 57 | 
 58 | 
 59 | /*
 60 | Cvector::Cvector(const Vectorv& x): DenseVector(x.n){
 61 |   array=new FIELD[n]; for(int i=0; i<n; i++) array[i]=0;
 62 |   for(auto& it:x) array[it.first]=it.second;
 63 | }
 64 | 
 65 | 
 66 | Cvector::Cvector(const Vectorl& x): DenseVector(x.n){
 67 |   array=new FIELD[n]; for(int i=0; i<n; i++) array[i]=0;
 68 |   for(auto& it:x) array[it.first]=it.second;
 69 | }
 70 | 
 71 | 
 72 | Cvector::Cvector(const Vectorh& x): DenseVector(x.n){
 73 |   array=new FIELD[n]; for(int i=0; i<n; i++) array[i]=0;
 74 |   for(auto& it:x) array[it.first]=it.second;
 75 | }
 76 | */
 77 | 
 78 | #ifdef _withEigen
 79 | Cvector::Cvector(const EigenVectorXdAdaptor& x):Cvector(x.size()){
 80 |   for(int i=0; i<n; i++) array[i]=x(i);
 81 | }
 82 | #endif
 83 | 
 84 | 
 85 | #ifdef _withEigen
 86 | template<>
 87 | Eigen::VectorXd Cvector::convert() const{
 88 |   Eigen::VectorXd v(n);
 89 |   for(int i=0; i<n; i++) v(i)=array[i];
 90 |   return v;
 91 | }
 92 | #endif
 93 | 
 94 | 
 95 | Cvector Cvector::merge(const Cvector& x, const Cvector& y){
 96 |   Cvector r(x.n+y.n);
 97 |   for(int i=0; i<x.n; i++) r.array[i]=x.array[i];
 98 |   for(int i=0; i<y.n; i++) r.array[i+x.n]=y.array[i];
 99 |   return r;
100 | }
101 | 
102 | Cvector::Virtual Cvector::vsubvector(const int beg, const int end){
103 |   return Cvector::Virtual(end-beg,&array[beg]);}
104 | 
105 | 
106 | // ---- I/O ------------------------------------------------------------------------------------------------------
107 | 
108 | 
109 | 
110 | ostream& operator<<(ostream& stream, const Cvector& x){
111 |   stream<<"("; for (int i=0; i<x.n-1; i++) stream<<x.array[i]<<","; stream<<x.array[x.n-1]<<")";
112 |   return stream;
113 | }
114 | 
115 | 
116 | string Cvector::classname(){return "Cvector";}
117 | 
118 | 
119 | void Cvector::serialize(Rstream& rstream) const{
120 |   rstream<<"Cvector{"<<Rstream::endl;
121 |   rstream.var("n",n);
122 |   rstream.var("array"," *OMITTED*");
123 |   rstream<<"}"<<Rstream::endl;
124 | }
125 | 
126 | 
127 | void Cvector::serialize(Bofstream& ofs) const{
128 |   ofs.tag("Cvector",0);
129 |   ofs.write(n);
130 |   ofs.write_array(array,n);
131 | }
132 | 
133 | 
134 | Cvector::Cvector(Bifstream& ifs): DenseVector(0){
135 |   ifs.check("Cvector",0);
136 |   ifs.read(n);
137 |   ifs.read_array(array);
138 | }
139 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/DenseVector.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _DenseVector
19 | #define _DenseVector
20 | 
21 | #include "Vector.hpp"
22 | 
23 | class DenseVector: public Vector{
24 | 
25 |   using Vector::Vector;
26 | 
27 | public:
28 | 
29 |   bool isFilled(const int i) const {assert(i<n); return true;}
30 |   int nFilled() const {return n;}
31 | 
32 | };
33 | 
34 | #endif
35 | 
36 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/EigenInterface.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _EigenInterface
19 | #define _EigenInterface
20 | 
21 | #include <Eigen/Dense>
22 | #include <Eigen/Sparse>
23 | //#include <unsupported/Eigen/MatrixFunctions>
24 | 	
25 | // The purpose of these adaptors is to avoid having to include Eigen/Dense or Eigen/Core in any of the 
26 | // header files of the native vector/matrix classes, which would slow down compilation.  
27 | 
28 | typedef Eigen::SparseMatrix<FIELD> EigenSparseMatrix;
29 | 
30 | class EigenVectorXdAdaptor: public Eigen::VectorXd{
31 | public:
32 |   EigenVectorXdAdaptor(const Eigen::VectorXd& M): Eigen::VectorXd(M){}
33 | };
34 | 
35 | class EigenMatrixXdAdaptor: public Eigen::MatrixXd{
36 | public:
37 |   EigenMatrixXdAdaptor(const Eigen::MatrixXd& M): Eigen::MatrixXd(M){}
38 | };
39 | 
40 | 
41 | #endif
42 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/GramMatrix.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include"GramMatrix.hpp"
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/GramMatrix.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _GramMatrix
19 | #define _GramMatrix
20 | 
21 | #include <unordered_map>
22 | #include "MatrixX.hpp"
23 | 
24 | template<class MATRIX>
25 | class GramMatrix: public MATRIX{
26 | public:
27 | 
28 |   using class MATRIX::MATRIX;
29 | 
30 | public:
31 | 
32 |   template<class COLTYPE>
33 |   GramMatrix(MatrixX<COLTYPE>& A)
34 | 
35 | 
36 | public:
37 | 
38 | 
39 | };
40 | 
41 | 
42 | template<class MATRIX>
43 | template<class COLTYPE>
44 | GramMatrix<MATRIX>::GramMatrix(MatrixX<COLTYPE>& A): MATRIX(MATRIX::Zero(A.nrows,A.nrows)){
45 |   assert(A.nrows==A.ncols); // assumption: A is symmetric
46 |   for(int i=0; i<nrows; i++){
47 |     if(A.column[i]->nFilled>0.2*nrows){
48 |       for(int j=0; j<=i; j++){
49 | 	(*this)(i,j)=A.column[i]->dot(*A.column[j]);
50 | 	(*this)(j,i)=(*this)(i,j);
51 |       }
52 |     }else{
53 |       unordered_map neighbors;
54 |       A.column[i]->for_each([&A,&neighbors](int j, FIELD dummy){
55 | 			      A.column[j]->for_each([&neighbors](int k, FIELD dummy){neighbors.insert(k);});
56 | 			    });
57 |       for(auto j:neighbors){
58 | 	(*this)(i,j)=A.column[i]->dot(*A.column[j]);
59 | 	(*this)(j,i)=(*this)(i,j);
60 |       }
61 |     }
62 |   }
63 | 
64 | }
65 | 
66 | 
67 | 
68 | #endif
69 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/LapackInterface.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _LapackInterface
19 | #define _LapackInterface
20 | 
21 | #include <lapacke.h>
22 | #include <cblas.h>
23 | 
24 | // The purpose of these adaptors is to avoid having to include lapacke/include in any of the 
25 | // header files of the native vector/matrix classes, which would slow down compilation.  
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Makefile:
--------------------------------------------------------------------------------
 1 | ROOTDIR=..
 2 | include $(ROOTDIR)/Makefile.base
 3 | 
 4 | CFLAGS+= -fPIC#-std=c++11
 5 | INCLUDE= -I$(INCLUDEDIR) -I$(UTILITYDIR) -I$(FILETYPESDIR) #-I$(MATRIXDIR) 
 6 | OBJFILES= $(UTILITYDIR)/*.o $(FILETYPESDIR)/*.o ./*.o  
 7 | LIBS= -lstdc++ -lm 
 8 | LMACROS=
 9 | 
10 | ifneq (,$(findstring withEigen,$(MACROS)))
11 | 	INCLUDE+= -I$(EIGENDIR)
12 | 	LMACROS+=-D_withEigen
13 | else ifneq (,$(findstring withLapack,$(MACROS)))
14 | 	INCLUDE+= -I$(LAPACKDIR)/LAPACKE/include/ -I$(CBLASDIR)/include
15 | 	OBJFILES+= $(LAPACKDIR)/liblapacke.a $(LAPACKDIR)/liblapack.a $(BLASDIR)/blas_LINUX.a $(CBLASDIR)/lib/cblas_LINUX.a 
16 | 	LIBS+= -L$(FORTRANDIR)/lib/ -lgfortran
17 | 	LMACROS+=-D_withLapack
18 | endif
19 | 
20 | 
21 | matrices.o: *.hpp *.cpp 
22 | 	$(CC) -c matrices.cpp $(CFLAGS) $(INCLUDE) $(LMACROS)
23 | 
24 | 
25 | objects: matrices.o 
26 | 
27 | tests: 
28 | 
29 | all: objects 
30 | 
31 | clean: 
32 | 	@rm -f *.o 
33 | 
34 | anew: clean all 
35 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Matrix.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "Matrix.hpp"
19 | #include "Cmatrix.hpp"
20 | 
21 | 
22 | Cmatrix Matrix::dot(const Matrix& x) const{
23 |   return Cmatrix(0,0);
24 | }
25 | 
26 | 
27 | string Matrix::str(const Dense dummy) const{
28 |   ostringstream stream;
29 |   stream.precision(3); 
30 |   stream.setf(ios_base::fixed, ios_base::floatfield);
31 |   for(int i=0; i<nrows; i++){stream<<"[ ";
32 |     for(int j=0; j<ncols; j++) {stream.width(6); stream<<this->read(i,j)<<" ";}
33 |     stream<<" ]\n";}
34 |   return stream.str();
35 | }
36 | 
37 | 
38 | string Matrix::str(const Sparse dummy) const{
39 |   ostringstream stream;
40 |   for(int i=0; i<nrows; i++)
41 |     for(int j=0; j<ncols; j++)
42 |       if((*this)(i,j)!=0) stream<<"("<<i<<","<<j<<") : "<<(*this)(i,j)<<endl;
43 |   return stream.str();
44 | }
45 | 
46 | 
47 | ostream& operator<<(ostream& stream, const Matrix& x){stream<<x.str(); return stream;}
48 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Matrix.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | 
 18 | #ifndef _Matrix
 19 | #define _Matrix
 20 | 
 21 | #include "pMMFbase.hpp"
 22 | #include <functional>
 23 | 
 24 | class Matrix{
 25 | public:
 26 | 
 27 |   virtual ~Matrix(){}
 28 | 
 29 | public: // constructors
 30 | 
 31 |   Matrix(const int _nrows, const int _ncols): nrows(_nrows), ncols(_ncols) {}
 32 | 
 33 | 
 34 | public: // member access
 35 |   
 36 |   virtual FIELD& operator()(const int i, const int j)=0; 
 37 |   virtual FIELD operator()(const int i, const int j) const=0; 
 38 |   virtual FIELD read(const int i, const int j) const=0; 
 39 |   virtual bool isFilled(const int i, const int j) const=0; 
 40 |   virtual int nFilled() const=0;
 41 |   virtual bool isSparse() const=0;
 42 | 
 43 |   //virtual void (foreach)(std::function<void(const INDEX, const INDEX,FIELD&)> lambda)=0;
 44 |   //virtual void (foreach)(std::function<void(const INDEX, const INDEX, const FIELD)> lambda) const=0;
 45 |   virtual void foreach_in_column(const int j, std::function<void(const INDEX, FIELD&)> lambda)=0;
 46 |   virtual void foreach_in_column(const int j, std::function<void(const INDEX, const FIELD)> lambda) const=0;
 47 | 
 48 | public: // scalar valued operations 
 49 | 
 50 |   virtual int nnz() const=0;
 51 | 
 52 | public:
 53 | 
 54 |   virtual Cmatrix dot(const Matrix& x) const; // {};
 55 | 
 56 | public:
 57 | 
 58 |   virtual void saveTo(MatrixOF& file) const=0;
 59 | 
 60 |   virtual string str(const Dense dummy) const;
 61 |   virtual string str(const Sparse dummy) const;
 62 |   virtual string str() const{return str(Dense());}
 63 | 
 64 |    
 65 |   
 66 | public:
 67 | 
 68 |   int nrows;
 69 |   int ncols;
 70 | 
 71 | };
 72 | 
 73 | 
 74 | ostream& operator<<(ostream& stream, const Matrix& x); 
 75 | 
 76 | 
 77 | 
 78 | class SparseMatrix: public Matrix{
 79 | public:
 80 |   using Matrix::Matrix;
 81 |   bool isSparse() const {return true;}
 82 | };
 83 | 
 84 | 
 85 | 
 86 | class DenseMatrix: public Matrix{
 87 | public:
 88 |   using Matrix::Matrix;
 89 |   bool isFilled(const int i, const int j) const {return true;}
 90 |   int nFilled() const {return nrows*ncols;}
 91 |   bool isSparse() const {return false;}
 92 | };
 93 | 
 94 | 
 95 | 
 96 | 
 97 | // virtual Matrix* newof()=0;
 98 | 
 99 | //virtual Cmatrix Cmatrix() const=0;
100 | // virtual SparseMatrixX<SparseVectorv> MatrixXv() const=0;
101 | // virtual SparseMatrixX<SparseVectorl> MatrixXl() const=0;
102 | // virtual SparseMatrixX<SparseVectorh> MatrixXh() const=0;
103 | 
104 | 
105 | 
106 | #endif 
107 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/MatrixX.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "MatrixX.hpp"
19 | 
20 | 
21 | /*
22 | template<>
23 | void MatrixX<Vectorv>::serialize(Bofstream& ofs) const{
24 |   ofs.tag("MatrixXv",0);
25 |   ofs.write(nrows);
26 |   ofs.write(ncols);
27 |   for(int j=0; j<ncols; j++) 
28 |     column[j]->serialize(ofs);
29 | };
30 | 
31 | template<>
32 | void MatrixX<Vectorl>::serialize(Bofstream& ofs) const{
33 |   ofs.tag("MatrixXl",0);
34 |   ofs.write(nrows);
35 |   ofs.write(ncols);
36 |   for(int j=0; j<ncols; j++) 
37 |     column[j]->serialize(ofs);
38 | };
39 | 
40 | template<>
41 | void MatrixX<Vectorh>::serialize(Bofstream& ofs) const{
42 |   ofs.tag("MatrixXh",0);
43 |   ofs.write(nrows);
44 |   ofs.write(ncols);
45 |   for(int j=0; j<ncols; j++) 
46 |     column[j]->serialize(ofs);
47 | };
48 | */
49 | 
50 | /*
51 | template<>
52 | MatrixX<Vectorv>::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
53 |   ifs.check("MatrixXv",0);
54 |   ifs.read(nrows);
55 |   ifs.read(ncols);
56 |   for(int j=0; j<ncols; j++)
57 |     column.push_back(new Vectorv(ifs));
58 | };
59 | 
60 | template<>
61 | MatrixX<Vectorl>::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
62 |   ifs.check("MatrixXl",0);
63 |   ifs.read(nrows);
64 |   ifs.read(ncols);
65 |   for(int j=0; j<ncols; j++)
66 |     column.push_back(new Vectorl(ifs));
67 | };
68 | 
69 | template<>
70 | MatrixX<Vectorh>::MatrixX(Bifstream& ifs):SparseMatrix(0,0){
71 |   ifs.check("MatrixXh",0);
72 |   ifs.read(nrows);
73 |   ifs.read(ncols);
74 |   for(int j=0; j<ncols; j++)
75 |     column.push_back(new Vectorh(ifs));
76 | };
77 | */
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Remap.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "Remap.hpp"
19 | #include <random>
20 | 
21 | extern default_random_engine randomNumberGenerator;
22 | 
23 | 
24 | Remap::Remap(const Remap& x): n(x.n){
25 |   forward=new int[n]; for(int i=0; i<n; i++) forward[i]=x.forward[i];
26 |   backward=new int[n]; for(int i=0; i<n; i++) backward[i]=x.backward[i];
27 | }
28 | 
29 | 
30 | Remap::Remap(Remap&& x): n(x.n){
31 |   forward=x.forward; x.forward=nullptr;
32 |   backward=x.backward; x.backward=nullptr;
33 |   x.n=0;
34 | }
35 | 
36 | 
37 | Remap& Remap::operator=(const Remap& x){
38 |   delete forward; delete backward; n=x.n; 
39 |   forward=new int[n]; for(int i=0; i<n; i++) forward[i]=x.forward[i];
40 |   backward=new int[n]; for(int i=0; i<n; i++) backward[i]=x.backward[i];
41 |   return *this;
42 | }
43 | 
44 | 
45 | Remap Remap::operator=(Remap&& x){
46 |   delete forward; delete backward;
47 |   forward=x.forward; x.forward=nullptr;
48 |   backward=x.backward; x.backward=nullptr;
49 |   n=x.n; x.n=0;
50 |   return *this;
51 | }
52 | 
53 | 
54 | Remap Remap::Random(const int n){
55 |   Remap R(n);
56 |   for(int i=0; i<n-1; i++){
57 |     uniform_int_distribution<int> distr(i+1,n-1);
58 |     int j=distr(randomNumberGenerator);
59 |     R.swap(i,j);}
60 |   return R;
61 | }
62 | 
63 | 
64 | /* DEPRECATED 
65 | Remap::Remap(const int _n, const Random random): Remap(_n){
66 |   for(int i=0; i<n-1; i++){
67 |     uniform_int_distribution<int> distr(i+1,n-1);
68 |     int j=distr(randomNumberGenerator);
69 |     swap(i,j);
70 |   }
71 | }
72 | */
73 | 
74 | 
75 | string Remap::str() const{
76 |   ostringstream stream;
77 |   for(int i=0; i<n; i++) stream<<i<<" -> "<<forward[i]<<endl;
78 |   return stream.str();
79 | }
80 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Remap.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _Remap
19 | #define _Remap
20 | 
21 | #include "pMMFbase.hpp"
22 | 
23 | class Remap{
24 | public:
25 | 
26 |   Remap(){forward=NULL; backward=NULL; n=0;}
27 |   Remap(const Remap& x);
28 |   Remap(Remap&& x);
29 |   Remap& operator=(const Remap& x);
30 |   Remap operator=(Remap&& x);
31 |   ~Remap(){delete[] forward; delete[] backward;}
32 | 
33 | public:
34 | 
35 |   Remap(const int _n): n(_n){
36 |     forward=new int[n]; for(int i=0; i<n; i++) forward[i]=i;
37 |     backward=new int[n]; for(int i=0; i<n; i++) backward[i]=i;
38 |   }
39 | 
40 |   // Remap(const int _n, const Random random);
41 | 
42 | public:
43 | 
44 |   static Remap Random(const int n);
45 | 
46 | public:
47 | 
48 |   
49 | 
50 |   int operator()(const int i) const{return forward[i];}
51 | 
52 |   Remap& swap(const int i, const int j){
53 |     int t=forward[i]; forward[i]=forward[j]; forward[j]=t; 
54 |     backward[forward[i]]=i; backward[forward[j]]=j;
55 |     return *this;}
56 | 
57 |   void fixBackwardMap(){
58 |     for(int i=0; i<n; i++) backward[forward[i]]=i;}
59 | 
60 |   string str() const;
61 | 
62 | public:
63 |   
64 |   int n;
65 |   int* forward;
66 |   int* backward;
67 | 
68 | };
69 | 
70 | 
71 | #endif 
72 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/SparseVector.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _SparseVector
19 | #define _SparseVector
20 | 
21 | #include "Vector.hpp"
22 | 
23 | 
24 | 
25 | class SparseVector: public Vector{
26 | public:
27 | 
28 |   SparseVector(const int _n): Vector(_n){}
29 | 
30 | 
31 | public:
32 | 
33 |   virtual FIELD& operator()(const int i)=0;
34 |   virtual FIELD operator()(const int i) const=0;
35 |   virtual FIELD read(const int i) const=0;
36 | 
37 |   virtual void insert(const int i, const FIELD value)=0;
38 |   virtual void append(const int i, const FIELD value)=0;
39 |   virtual void zero(const int i)=0;
40 | 
41 |   virtual void sort()=0;
42 |   virtual void tidy()=0;
43 |   //virtual int nnz() const=0;
44 | };
45 | 
46 | 
47 | 
48 | 
49 | 
50 | 
51 | #endif
52 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Vector.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "Vector.hpp"
19 | #include <vector>
20 | 
21 | 
22 | string Vector::str(const Dense dummy) const{
23 |   ostringstream stream;
24 |   for(int i=0; i<n; i++) stream<<(*this)(i)<<endl; 
25 |   return stream.str();  
26 | }
27 | 
28 | 
29 | string Vector::str(const Sparse dummy) const{
30 |   ostringstream stream;
31 |   for(int i=0; i<n; i++) if((*this)(i)!=0) stream<<(*this)(i)<<endl; 
32 |   return stream.str();  
33 | }
34 | 
35 | 
36 | ostream& operator<<(ostream& stream, const Vector& x) {stream<<x.str(); return stream;}
37 | //  {stream<<"("; for (int i=0; i<x.n-1; i++) stream<<x(i)<<","; stream<<x(x.n-1)<<")"; return stream;}
38 | 
39 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Vector.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #ifndef _Vector
19 | #define _Vector
20 | 
21 | #include "pMMFbase.hpp"
22 | #include <functional>
23 | 
24 | class Vector{ //: public Serializable{
25 | public:
26 | 
27 |   Vector(const int _n): n(_n){}
28 | 
29 | public:
30 | 
31 |   virtual FIELD& operator()(const int n)=0;
32 |   virtual FIELD operator()(const int n) const=0;
33 |   virtual FIELD read(const int i) const {return (*this)(i);}
34 | 
35 |   //virtual void (foreach)(std::function<void(const INDEX, FIELD&)> lambda)=0;
36 |   //virtual void (foreach)(std::function<void(const INDEX, const FIELD)> lambda) const=0;
37 | 
38 |   virtual bool isFilled (const int i)const =0;
39 |   virtual int nFilled() const=0;
40 | 
41 | public:
42 | 
43 |   virtual int nnz() const=0;
44 | 
45 |   virtual int argmax() const=0;
46 |   virtual int argmax_abs() const=0;
47 | 
48 |   virtual FIELD norm2() const=0;
49 |   // FIELD diff2(const VECTOR& x)=0;
50 | 
51 | public:
52 | 
53 |   //virtual void serialize(Bofstream& ofs) const=0;
54 |   //virtual void serialize(Rstream& rstream) const=0;
55 | 
56 |   virtual string str(const Dense dummy) const; 
57 |   virtual string str(const Sparse dummy) const; 
58 |   virtual string str() const{return str(Dense());};
59 | 
60 | public:  
61 | 
62 |   int n;
63 | 
64 | };
65 | 
66 | 
67 | ostream& operator<<(ostream& stream, const Vector& x);
68 | 
69 | 
70 | 
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Vectorh.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | 
 18 | #include "Vectorv.hpp"
 19 | #include "Vectorl.hpp"
 20 | #include "Vectorh.hpp"
 21 | #include "Cvector.hpp"
 22 | #include <random>
 23 | 
 24 | extern default_random_engine randomNumberGenerator;
 25 | 
 26 | 
 27 | /*
 28 | Vectorh::Vectorh(const int _n, const class Random& dummy): SparseVector(_n){
 29 |   uniform_real_distribution<FIELD> distr(0,1);
 30 |   for(int i=0; i<n; i++) 
 31 |     if(distr(randomNumberGenerator)<=dummy.p) (*this)[i]=distr(randomNumberGenerator);
 32 | }
 33 | */
 34 | 
 35 | Vectorh Vectorh::Random(const int _n, const FIELD p){
 36 |   Vectorh v(_n); 
 37 |   uniform_real_distribution<FIELD> distr(0,1);
 38 |   for(int i=0; i<_n; i++) 
 39 |     if(distr(randomNumberGenerator)<=p) v[i]=distr(randomNumberGenerator);
 40 |   return v;
 41 | }
 42 | 
 43 | 
 44 | 
 45 | Vectorh::Vectorh(const Cvector& x): SparseVector(x.n){
 46 |   for(int i=0; i<n; i++) if(x(i)!=0) (*this)[i]=x.array[i];}
 47 | 
 48 | 
 49 | Vectorh::Vectorh(const Vectorv& x): SparseVector(x.n){
 50 |   for(auto& p:x) (*this)(p.first)=p.second;}
 51 | 
 52 | 
 53 | Vectorh::Vectorh(const Vectorl& x): SparseVector(x.n){
 54 |   for(auto& p:x) (*this)(p.first)=p.second;}
 55 | 
 56 | 
 57 | Vectorh::Vectorh(const Vectorl& x, const class Remap& remap, const bool inverse): SparseVector(x.n){
 58 |   if(!inverse) for(auto& p:x) (*this)(remap.forward[p.first])=p.second;
 59 |   else for(auto& p:x) (*this)(remap.backward[p.first])=p.second;
 60 | }
 61 | 
 62 | 
 63 | // ---- I/O ------------------------------------------------------------------------------------------------------
 64 | 
 65 | 
 66 | string Vectorh::classname(){return "Vectorh";}
 67 | 
 68 | 
 69 | Vectorh::Vectorh(Bifstream& ifs): SparseVector(0){
 70 |   ifs.check("Vectorh",0);
 71 |   ifs.read(n);
 72 |   ifs.read_unordered_map(*this);
 73 | }
 74 | 
 75 | 
 76 | void Vectorh::serialize(Bofstream& ofs) const{
 77 |   ofs.tag("Vectorh",0);
 78 |   ofs.write(n);
 79 |   ofs.write_unordered_map(*this);
 80 | }
 81 | 
 82 | 
 83 | void Vectorh::serialize(Rstream& rstream) const{
 84 |   rstream<<"Vectorh{"<<Rstream::endl;
 85 |   rstream.var("n",n);
 86 |   for(auto p:*this){rstream<<"("<<p.first<<","<<p.second<<")"<<Rstream::endl;}
 87 |   rstream<<"}"<<Rstream::endl;
 88 | }
 89 | 
 90 | 
 91 | string Vectorh::str(const Sparse dummy) const{ // 
 92 |   ostringstream res;
 93 |   for(auto& it:*this) res<<it.first<<" : "<<it.second<<endl;
 94 |   return res.str();
 95 | }
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Vectorl.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | 
 18 | #include "Vectorv.hpp"
 19 | #include "Vectorl.hpp"
 20 | #include "Vectorh.hpp"
 21 | #include "Cvector.hpp"
 22 | #include <random>
 23 | 
 24 | extern default_random_engine randomNumberGenerator;
 25 | 
 26 | 
 27 | /*
 28 | Vectorl::Vectorl(const int _n, const class Random& dummy): SparseVector(_n){
 29 |   uniform_real_distribution<FIELD> distr(0,1);
 30 |   for(int i=0; i<n; i++) 
 31 |     if(distr(randomNumberGenerator)<=dummy.p) push_back(SVpair(i,distr(randomNumberGenerator)));
 32 | }
 33 | */
 34 | 
 35 | Vectorl Vectorl::Random(const int _n, const FIELD p){
 36 |   Vectorl v(_n); 
 37 |   uniform_real_distribution<FIELD> distr(0,1);
 38 |   for(int i=0; i<_n; i++) 
 39 |     if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator)));
 40 |   return v;
 41 | }
 42 | 
 43 | 
 44 | Vectorl::Vectorl(const Cvector& x): SparseVector(x.n){
 45 |   for(int i=0; i<n; i++) if(x(i)!=0) push_back(SVpair(i,x.array[i]));
 46 | }
 47 | 
 48 | 
 49 | Vectorl::Vectorl(const Vectorv& x): SparseVector(x.n){
 50 |   const_cast<Vectorv&>(x).sort(); for(auto& p:x) push_back(p);}
 51 | 
 52 | 
 53 | Vectorl::Vectorl(const Vectorh& x): SparseVector(x.n){
 54 |   for(auto& p:x) (*this)(p.first)=p.second;}
 55 | 
 56 | 
 57 | Vectorl::Vectorl(const Vectorl& x, const class Remap& remap, const bool inverse): SparseVector(x.n){
 58 |   if(!inverse) for(auto& p:x) (*this)(remap.forward[p.first])=p.second;
 59 |   else for(auto& p:x) (*this)(remap.backward[p.first])=p.second;
 60 | }
 61 | 
 62 | 
 63 | 
 64 | // ---- I/O ------------------------------------------------------------------------------------------------------
 65 | 
 66 | 
 67 | string Vectorl::classname(){return "Vectorl";}
 68 | 
 69 | 
 70 | Vectorl::Vectorl(Bifstream& ifs): SparseVector(0){
 71 |   ifs.check("Vectorl",0);
 72 |   ifs.read(n);
 73 |   ifs.read_list(*this);
 74 | }
 75 | 
 76 | 
 77 | void Vectorl::serialize(Bofstream& ofs) const{
 78 |   ofs.tag("Vectorl",0);
 79 |   ofs.write(n);
 80 |   ofs.write_list(*this);
 81 | }
 82 | 
 83 | 
 84 | void Vectorl::serialize(Rstream& rstream) const{
 85 |   rstream<<"Vectorl{"<<Rstream::endl;
 86 |   rstream.var("n",n);
 87 |   for(auto p:*this){rstream<<"("<<p.first<<","<<p.second<<")"<<Rstream::endl;}
 88 |   rstream<<"}"<<Rstream::endl;
 89 | }
 90 | 
 91 | 
 92 | string Vectorl::str(const Sparse dummy) const{
 93 |   ostringstream res;
 94 |   for(auto& it:*this) res<<it.first<<" : "<<it.second<<endl;
 95 |   return res.str();
 96 | }
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/Vectorv.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | 
 18 | #include "Vectorv.hpp"
 19 | #include "Vectorl.hpp"
 20 | #include "Vectorh.hpp"
 21 | #include "Cvector.hpp"
 22 | #include <random>
 23 | 
 24 | extern default_random_engine randomNumberGenerator;
 25 | 
 26 | 
 27 | /*
 28 | Vectorv::Vectorv(const int _n, const class Random& dummy): SparseVector(_n), sorted(0){
 29 |   uniform_real_distribution<FIELD> distr(0,1);
 30 |   for(int i=0; i<n; i++) 
 31 |     if(distr(randomNumberGenerator)<=dummy.p) push_back(SVpair(i,distr(randomNumberGenerator)));
 32 | }
 33 | */
 34 | 
 35 | 
 36 | Vectorv Vectorv::Random(const int _n, const FIELD p){
 37 |   Vectorv v(_n); 
 38 |   uniform_real_distribution<FIELD> distr(0,1);
 39 |   for(int i=0; i<_n; i++) 
 40 |     if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator)));
 41 |   return v;
 42 | }
 43 | 
 44 | 
 45 | // ---- Conversions -----------------------------------------------------------------------------------------------
 46 | 
 47 | 
 48 | Vectorv::Vectorv(const Cvector& x): SparseVector(x.n), sorted(true){
 49 |   for(int i=0; i<n; i++) if(x(i)!=0) push_back(SVpair(i,x.array[i]));
 50 | }
 51 | 
 52 | Vectorv::Vectorv(const Vectorl& x): SparseVector(x.n), sorted(true){
 53 |   for(auto& p:x) push_back(p);}
 54 | 
 55 | Vectorv::Vectorv(const Vectorh& x): SparseVector(x.n), sorted(false){
 56 |   for(auto& p:x) push_back(SVpair(p.first,p.second));}
 57 | 
 58 | Vectorv::Vectorv(const Vectorv& x, const class Remap& remap, const bool inverse): SparseVector(x.n), sorted(false){
 59 |   if (!inverse) for(auto& p:x) push_back(SVpair(remap.forward[p.first],p.second));
 60 |   else for(auto& p:x) push_back(SVpair(remap.backward[p.first],p.second));
 61 | }
 62 | 
 63 | 
 64 | // ---- Operations -----------------------------------------------------------------------------------------------
 65 | 
 66 | 
 67 | // ---- I/O ------------------------------------------------------------------------------------------------------
 68 | 
 69 | 
 70 | 
 71 | string Vectorv::classname(){return "Vectorv";}
 72 | 
 73 | 
 74 | string Vectorv::str(const Sparse dummy) const{
 75 |   ostringstream res;
 76 |   for(auto& it:*this) res<<it.first<<" : "<<it.second<<endl;
 77 |   return res.str();
 78 | }
 79 | 
 80 | 
 81 | Vectorv::Vectorv(Bifstream& ifs): SparseVector(0), sorted(0){
 82 |   ifs.check("Vectorv",0);
 83 |   ifs.read(n);
 84 |   ifs.read(sorted);
 85 |   ifs.read_vector(*this);
 86 | }
 87 | 
 88 | 
 89 | void Vectorv::serialize(Bofstream& ofs) const{
 90 |   ofs.tag("Vectorv",0);
 91 |   ofs.write(n);
 92 |   ofs.write(sorted);
 93 |   ofs.write_vector(*this);
 94 | }
 95 | 
 96 | 
 97 | void Vectorv::serialize(Rstream& rstream) const{
 98 |   rstream<<"Vectorv{"<<Rstream::endl;
 99 |   rstream.var("n",n);
100 |   for(auto p:*this){rstream<<"("<<p.first<<","<<p.second<<")"<<Rstream::endl;}
101 |   rstream<<"}"<<Rstream::endl;
102 | }
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/matrices.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | 
18 | #include "Remap.cpp"
19 | #include "Activemap.cpp"
20 | #include "Vector.cpp"
21 | #include "Cvector.cpp"
22 | #include "Vectorv.cpp"
23 | #include "Vectorl.cpp"
24 | #include "Vectorh.cpp"
25 | #include "Matrix.cpp"
26 | #include "Cmatrix.cpp"
27 | #include "MatrixX.cpp"
28 | 


--------------------------------------------------------------------------------
/MLGkernel/matrices/matrices.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunfanyunn/graph-classification/0d9b60102b6456cdc0607b43c8852d860b2f53c0/MLGkernel/matrices/matrices.o


--------------------------------------------------------------------------------
/MLGkernel/preprocess.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | from data_utils import read_graphfile
 3 | import numpy as np
 4 | 
 5 | def preprocess(DS):
 6 |     graphs = read_graphfile('../data', DS)
 7 |     lab = open('../data/{}_label.txt'.format(DS), 'w')
 8 |     f = open('../data/{}.txt'.format(DS), 'w')
 9 |     nl = open('../data/{}_nodelabels.txt'.format(DS), 'w')
10 |     f.write('{}\n'.format(len(graphs)))
11 |     nl.write('{}\n'.format(len(graphs)))
12 | 
13 |     for g in graphs:
14 |         num_nodes = g.number_of_nodes()
15 |         f.write('{}\n'.format(num_nodes))
16 |         nl.write('{}\n'.format(num_nodes))
17 |         lab.write('{}\n'.format(g.graph['label']))
18 | 
19 |         A = np.array(nx.adjacency_matrix(g).todense())
20 |         assert A.shape == (num_nodes, num_nodes)
21 |         for u in g.nodes():
22 | 
23 |             f.write(' '.join(list(map(str, list(A[int(u)])))) + '\n')
24 |             try:
25 |                 nl.write('{}\n'.format(np.argmax(g.node[int(u)]['label'])+1))
26 |             except:
27 |                 pass
28 |     f.close()
29 |     nl.close()
30 |     lab.close()
31 | 
32 | if __name__ == '__main__':
33 |     # x, y = get_mutag()
34 |     # evaluate_embedding(x, y)
35 |     import sys
36 |     preprocess(sys.argv[1])
37 | 


--------------------------------------------------------------------------------
/MLGkernel/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from glob import glob
 3 | import sys
 4 | files = glob(sys.argv[1])
 5 | num=288
 6 | scores = [[] for i in range(num)]
 7 | for f in files:
 8 |     with open(f, 'r') as f:
 9 |         for idx, line in enumerate(f):
10 |             scores[idx%288].append(float(line.strip().split()[-1]))
11 | 
12 | 
13 | res = [np.mean(scores[idx]) for idx in range(num)]
14 | amax = np.argmax(res)
15 | print(amax, np.mean(scores[amax]), np.std(scores[amax]), len(scores[amax]))
16 | 
17 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Bifstream.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "Bifstream.hpp"
18 | 
19 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Bofstream.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "Bofstream.hpp"
18 | 
19 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Graph.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "Graph.hpp"
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Log.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #ifndef _Log
18 | #define _Log
19 | 
20 | #include <chrono>
21 | #include "pMMFbase.hpp"
22 | 
23 | class LogStream{
24 | public:
25 | 
26 |   //virtual void write(const string& s)=0;
27 |   virtual LogStream& operator<<(const char* s)=0;
28 |   virtual LogStream& operator<<(const string& s)=0;
29 |   virtual LogStream& operator<<(const int& x)=0;
30 |   virtual LogStream& operator<<(const double& x)=0;
31 | 
32 | };
33 | 
34 | 
35 | class Log{
36 | public:
37 | 
38 |   Log(){startClock();}
39 | 
40 | public:
41 | 
42 |   Log& operator<<(const string& s);
43 |   Log& operator<<(const char* s);
44 | 
45 |   /*
46 |   Log& skipline(const int n=1, const int v=0){
47 |     if(verbosity<v) return *this;
48 |     if(skippedlines>=n) return *this;
49 |     for(int i=0; i<n-skippedlines; i++) cout<<endl; skippedlines=n;
50 |     return *this;
51 |   }
52 |   */
53 | 
54 |   Log& skip(const int v=0, const int n=1){
55 |     if(verbosity<v) return *this;
56 |     if(skippedlines>=n) return *this;
57 |     if(stream==nullptr) for(int i=0; i<n-skippedlines; i++) cout<<endl; 
58 |     else for(int i=0; i<n-skippedlines; i++) (*stream)<<"\n";
59 |     skippedlines=n;
60 |     return *this;
61 |   }
62 | 
63 |   Log& log(const int v, const string& s);
64 |   Log& log(const int v, const char* s, const int i);
65 |   Log& log(const int v, const char* s1, const int i, const char* s2);
66 |   Log& log(const int v, const char* s1, const int i1, const char* s2, const int i2, const char* s3);
67 |   Log& log(const int v, const char* s1, const int i1, const char* s2, const int i2, const char* s3, const int i3, const char* s4);
68 |   Log& log(const int v, const char* s1, const int i1, const char* s2, const int i2, const char* s3, const int i3, const char* s4, const int i4, const char* s5);
69 |   Log& log(const int v, const char* s, const double f);
70 |   Log& log(const int v, const int i, const double f);
71 | 
72 |   void startClock(const int i=0);
73 |   double clock(const int i=0);
74 | 
75 | public:
76 |   
77 |   //vector<chrono::time_point<chrono::system_clock> > time;
78 |   
79 |   chrono::time_point<chrono::system_clock> t;
80 | 
81 |   int verbosity=0;
82 |   int skippedlines=0;
83 | 
84 |   LogStream* stream=nullptr;
85 | };
86 | 
87 | 
88 | 
89 | #endif
90 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Makefile:
--------------------------------------------------------------------------------
 1 | ROOTDIR= ..
 2 | include $(ROOTDIR)/Makefile.base
 3 | 
 4 | CFLAGS+= -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(MATRIXDIR) -fPIC # -std=c++11
 5 | 
 6 | 
 7 | Log.o: Log.hpp Log.cpp
 8 | 	$(CC) -c Log.cpp $(CFLAGS)
 9 | 
10 | Rstream.o: Rstream.hpp Rstream.cpp
11 | 	$(CC) -c Rstream.cpp $(CFLAGS)
12 | 
13 | Bofstream.o: Bofstream.hpp Bofstream.cpp
14 | 	$(CC) -c Bofstream.cpp $(CFLAGS)
15 | 
16 | Bifstream.o: Bifstream.hpp Bifstream.cpp
17 | 	$(CC) -c Bifstream.cpp $(CFLAGS)
18 | 
19 | Serializable.o: Serializable.hpp Serializable.cpp
20 | 	$(CC) -c Serializable.cpp $(CFLAGS)
21 | 
22 | TopkList.o: TopkList.hpp TopkList.cpp 
23 | 	$(CC) -c TopkList.cpp $(CFLAGS) 
24 | 
25 | ThreadBank.o: ThreadBank.hpp ThreadBank.cpp 
26 | 	$(CC) -c ThreadBank.cpp $(CFLAGS)
27 | 
28 | ThreadManager.o: ThreadManager.hpp ThreadManager.cpp ThreadBank.hpp 
29 | 	$(CC) -c ThreadManager.cpp $(CFLAGS)
30 | 
31 | #Graph.o: Graph.cpp Graph.hpp
32 | #	$(CC) -c Graph.cpp $(CFLAGS) $(INCLUDE)
33 | 
34 | filetypes.o: filetypes/*.hpp filetypes/*.cpp  
35 | 	$(CC) -c filetypes/filetypes.cpp $(CFLAGS) $(INCLUDE) -I filetypes/ $(MACROS) 
36 | 
37 | 
38 | objects: Log.o Rstream.o Bofstream.o Bifstream.o Serializable.o TopkList.o \
39 | ThreadBank.o ThreadManager.o filetypes.o #Graph.o
40 | 
41 | tests:
42 | 
43 | all: objects tests 
44 | 
45 | clean: 
46 | 	@rm -f *.o 
47 | 
48 | anew: clean all 
49 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Rstream.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "Rstream.hpp"
18 | 
19 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Rstream.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #ifndef _Rstream
18 | #define _Rstream
19 | 
20 | #include "pMMFbase.hpp"
21 | 
22 | 
23 | class Rstream{
24 | public:
25 | 
26 |   Rstream(ostream& _out, const int _depth=16):out(_out),indent(0),depth(_depth),bol(true){}
27 | 
28 |   ~Rstream(){out<<std::endl;}
29 | 
30 |   //const Rstream& indent(const int n=0) const {for(int i=0; i<n; i++) out<<"  "; return *this;}
31 | 
32 |   template<class T>
33 |   Rstream& operator<<(const T& x){
34 |     //if(typeid(x)==typeid(Rstream::end)) {out<<std::endl; bol=true; return *this;}
35 |     //if(typeid(x)==typeid(endme)) {out<<std::endl; bol=true; return *this;}
36 |     if(bol) {for(int i=0; i<indent; i++) out<<"  "; bol=false;}
37 |     out<<x; return *this;}
38 | 
39 | 
40 |   typedef  Rstream& (*RstreamManipulator)(Rstream&);
41 |   Rstream& operator<<(const RstreamManipulator& manip){return manip(*this);}
42 | 
43 | 
44 |   static Rstream& endl(Rstream& x){
45 |     x.out<<std::endl; 
46 |     x.bol=true; 
47 |     return x;
48 |   }
49 | 
50 | 
51 |   template<class T>
52 |   Rstream& write(const T& x){
53 |     if(depth<0){out<<std::endl; bol=true; return *this;}
54 |     indent++; depth--;
55 |     x.serialize(*this);
56 |     indent--; depth++;
57 |     return *this;
58 |   }
59 | 
60 | 
61 |   template<class T>
62 |   const Rstream& var(const char* name, const T& x){
63 |     if(bol) {for(int i=0; i<indent; i++) out<<"  "; bol=false;}
64 |     out<<"  "<<name<<"="<<x<<std::endl; 
65 |     bol=true;
66 |     return *this;
67 |   }
68 | 
69 | 
70 | 
71 |   //typedef std::basic_ostream<char, std::char_traits<char> > CoutType;
72 |   //typedef CoutType& (*StandardEndLine)(CoutType&);
73 |   //Rstream& operator<<(StandardEndLine manip){return *this;}
74 | 
75 | 
76 | public:
77 | 
78 |   int indent;
79 |   int depth;
80 |   mutable bool bol;
81 | 
82 |   ostream& out;
83 |   
84 |   
85 | };
86 | 
87 | #endif
88 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Serializable.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "Serializable.hpp"
18 | #include "Bofstream.hpp"
19 | #include "Bifstream.hpp"
20 | 
21 | void Serializable::save(const char* filename) const {Bofstream ofs(filename); serialize(ofs);}
22 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/Serializable.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #ifndef _Serializable
18 | #define _Serializable
19 | 
20 | #include "Bifstream.hpp"
21 | #include "Bofstream.hpp"
22 | 
23 | 
24 | class Serializable{
25 | public:
26 | 
27 |   virtual void serialize(Bofstream& ofs) const=0;
28 | 
29 | public:
30 | 
31 |   void save(const char* filename) const; 
32 | 
33 | };
34 | 
35 | 
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadBank.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "ThreadBank.hpp"
18 | 
19 | 
20 | 
21 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadBank.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | 
 17 | #ifndef _ThreadBank
 18 | #define _ThreadBank
 19 | 
 20 | #include <thread>
 21 | #include "pMMFbase.hpp"
 22 | #include "ThreadManager.hpp"
 23 | 
 24 | using namespace std;
 25 | 
 26 | 
 27 | //extern mutex cout_mutex;
 28 | extern ThreadManager threadManager;
 29 | 
 30 | 
 31 | class ThreadBank{
 32 | public:
 33 | 
 34 |   ThreadBank()=delete;
 35 |   
 36 |   ThreadBank(const int _maxthreads=1000, const int _maxprivileged=1): 
 37 |     maxthreads(_maxthreads), maxprivileged(_maxprivileged), nthreads(0), nprivileged(0) {gate.lock();}; 
 38 | 
 39 |   ~ThreadBank(){for(auto& th:threads) th.join();}
 40 | 
 41 | 
 42 | public:
 43 | 
 44 |   template<class FUNCTION, class OBJ>
 45 |   void add(FUNCTION lambda, const OBJ x){
 46 |     lock_guard<mutex> lock(mx); //                                   unnecessary if called from a single thread
 47 |     threadManager.enqueue(this);
 48 |     gate.lock(); //                                                  gate can only be unlocked by threadManager
 49 |     nthreads++;
 50 |     threads.push_back(thread([this,lambda](OBJ _x){lambda(_x); nthreads--; threadManager.release(this);},x));
 51 |     #ifdef _THREADBANKVERBOSE
 52 |     printinfo();
 53 |     #endif
 54 |   }
 55 | 
 56 | 
 57 |   template<class FUNCTION, class OBJ1, class OBJ2>
 58 |   void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2){
 59 |     lock_guard<mutex> lock(mx);
 60 |     threadManager.enqueue(this);
 61 |     gate.lock();
 62 |     nthreads++;
 63 |     threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2){
 64 | 			       lambda(_x1,_x2); nthreads--; threadManager.release(this);},x1,x2));
 65 |     #ifdef _THREADBANKVERBOSE
 66 |     printinfo();
 67 |     #endif
 68 |   }
 69 | 
 70 | 
 71 |   template<class FUNCTION, class OBJ1, class OBJ2, class OBJ3>
 72 |   void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2, const OBJ3 x3){
 73 |     lock_guard<mutex> lock(mx);
 74 |     threadManager.enqueue(this);
 75 |     gate.lock();
 76 |     nthreads++;
 77 |     threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2, OBJ3 _x3){
 78 | 			       lambda(_x1,_x2,_x3); nthreads--; threadManager.release(this);},x1,x2,x3));
 79 |     #ifdef _THREADBANKVERBOSE
 80 |     printinfo();
 81 |     #endif
 82 |   }
 83 | 
 84 | 
 85 |   bool is_ready(){return nthreads<maxthreads;}
 86 | 
 87 | 
 88 |   void printinfo(){
 89 |     CoutLock lock;
 90 |     cout<<"    (threads: "<<nthreads-nprivileged<<"+"<<nprivileged<<" local, ";
 91 |     cout<<threadManager.get_nthreads()<<" global)"<<endl;
 92 |   }
 93 | 
 94 |   
 95 | public:
 96 | 
 97 |   mutex mx;
 98 |   mutex gate;
 99 |   atomic<int> nthreads;
100 |   int nprivileged=0; //                                               only to be touched by threadManager
101 |   int maxthreads=4;
102 |   int maxprivileged=1;
103 | 
104 |   vector<thread> threads;
105 | 
106 | };
107 | 
108 | 
109 | 
110 | 
111 | 
112 | #endif
113 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadManager.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "ThreadManager.hpp"
18 | #include "ThreadBank.hpp"
19 | 
20 | 
21 | void ThreadManager::enqueue(ThreadBank* bank){
22 |   lock_guard<mutex> lock(mx);
23 |   if(is_runnable(bank)) launch(bank);
24 |   else queue.push_back(bank);
25 | }
26 | 
27 | 
28 | void ThreadManager::release(ThreadBank* bank){
29 |   lock_guard<mutex> lock(mx);
30 |   if(bank->nprivileged>0) bank->nprivileged--;
31 |   else nthreads--;
32 |   for(auto it=queue.begin(); it!=queue.end(); it++)
33 |     if(is_runnable(*it)){
34 |       launch(*it);
35 |       it=queue.erase(it);
36 |     }
37 |   //  auto it=find_if(queue.begin(),queue.end(),[this](ThreadBank* bank){return is_runnable(bank);});
38 |   // if(it==queue.end()) return;
39 |   // ThreadBank* bank=*it;
40 |   // queue.erase(it);
41 |   // launch(bank);
42 | }
43 | 
44 | 
45 | bool ThreadManager::is_runnable(ThreadBank* bank){
46 |   return bank->is_ready() && (bank->nprivileged<bank->maxprivileged || nthreads<maxthreads) ;
47 | }
48 | 
49 | 
50 | void ThreadManager::launch(ThreadBank* bank){
51 |   if(bank->nprivileged<bank->maxprivileged) bank->nprivileged++;
52 |   else nthreads++;
53 |   bank->gate.unlock();
54 | }
55 | 
56 | 
57 |   /*
58 |   void addBank(const ThreadBank* bank){
59 |     lock_guard<mutex> lock(mx);
60 |     banks.push_front(bank);
61 |   }
62 | 
63 |   void removeBank(const ThreadBank* bank){
64 |     lock_guard<mutex> lock(mx);
65 |     banks.remove(bank);
66 |   }
67 |   */
68 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/ThreadManager.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #ifndef _ThreadManager
18 | #define _ThreadManager
19 | 
20 | #include <list>
21 | #include <queue>
22 | #include <mutex>
23 | 
24 | class ThreadBank;
25 | 
26 | using namespace std;
27 | 
28 | 
29 | class ThreadManager{
30 | public:
31 | 
32 |   ThreadManager(const int _maxthreads):maxthreads(_maxthreads),nthreads(0){}
33 |   ~ThreadManager(){}
34 | 
35 | public:
36 |   
37 |   void enqueue(ThreadBank* bank);
38 |   void release(ThreadBank* bank);
39 | 
40 |   int get_nthreads(){lock_guard<mutex> lock(mx); return nthreads;}
41 | 
42 | private:
43 | 
44 |   bool is_runnable(ThreadBank* bank);
45 |   void launch(ThreadBank* bank);
46 | 
47 | public:
48 | 
49 |   int maxthreads;
50 | 
51 | private:
52 | 
53 |   mutex mx;
54 |   int nthreads;
55 |   list<ThreadBank*> queue;
56 | 
57 | };
58 | 
59 | 
60 | #endif
61 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/TopkList.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #include "TopkList.hpp"
18 | 
19 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/TopkList.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | 
17 | #ifndef _TopkList
18 | #define _TopkList
19 | 
20 | #include <list>
21 | #include "pMMFbase.hpp"
22 | //#include "DenseVector.hpp"
23 | 
24 | struct TopkListPair{
25 |   TopkListPair(const INDEX& _first, const FIELD& _second):first(_first),second(_second){};
26 |   INDEX first; 
27 |   FIELD second;
28 | };
29 | 
30 | 
31 | class TopkList: public list<TopkListPair>{
32 | public:
33 | 
34 |   TopkList(const int _k): k(_k), lowestv(numeric_limits<FIELD>::lowest()){}
35 | 
36 |   //  TopkList(const DenseVector& v, const int _k): k(_k), lowestv(-10000){
37 |   //    for(int i=0; i<v.n; i++) if(v(i)>lowestv) insert(i,v(i));}
38 | 
39 | public:
40 | 
41 |   void insert(int index, FIELD value){
42 |     auto it=begin();
43 |     while(it!=end() && it->second>=value){it++;}
44 |     list::insert(it,TopkListPair(index,value));
45 |     if(size()>k) pop_back();
46 |     if(size()>=k) lowestv=back().second;
47 |   }
48 | 
49 |   void consider(int index, FIELD value){
50 |     if(value>lowestv || size()<<k){
51 |       auto it=begin();
52 |       while(it!=end() && it->second>=value){it++;}
53 |       list::insert(it,TopkListPair(index,value));
54 |       if(size()>k) pop_back();
55 |       if(size()>=k) lowestv=back().second;
56 |     }
57 |   }
58 | 
59 |   IndexSet indices() const{
60 |     IndexSet I(size()); int i=0;
61 |     for(auto& p:*this) I[i++]=p.first;
62 |     return I;
63 |   }
64 | 
65 | 
66 | public:
67 | 
68 |   int k;
69 |   FIELD lowestv;
70 |   int lowestp;
71 | 
72 | };
73 | 
74 | 
75 | 
76 | #endif
77 | 
78 | 
79 |   /*
80 |   // vector version
81 |   void insert(int index, FIELD value){
82 |     if(size()<k) pushBack(topkListPair(index,value)); 
83 |     else {at(lowestp).first=index; at(lowestp).second=value;}
84 |     lowestp=0; lowestv=at(0).second; 
85 |     for(int i=1; i<size(); i++)
86 |       if(at(i).second<lowestv){lowestp=i; lowestv=at(i);}
87 |   }
88 |   */
89 | 
90 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/Makefile:
--------------------------------------------------------------------------------
 1 | ROOTDIR= ../../
 2 | include $(ROOTDIR)/Makefile.base
 3 | 
 4 | CFLAGS= -std=c++11 -fPIC
 5 | INCLUDE= -I$(INCLUDEDIR) -I$(UTILITYDIR) 
 6 | 
 7 | LIBS= -lstdc++ -lm -lpthread
 8 | OBJFILES= $(UTILITYDIR)/*.o $(FILETYPESDIR)/*.o $(MATRIXDIR)/*.o 
 9 | LMACROS=
10 | 
11 | 
12 | ifneq (,$(findstring withMatio,$(MACROS)))
13 | 	INCLUDE+= -I$(MATIO)/include
14 | 	LIBS+= -L$(MATIO)/lib -lmatio
15 | 	LMACROS+=-D_withMatio
16 | endif
17 | 
18 | 
19 | filetypes.o: MatrixIF.hpp *.hpp *.cpp  
20 | 	$(CC) -c filetypes.cpp $(CFLAGS) $(INCLUDE) $(LMACROS) 
21 | 
22 | 
23 | objects: filetypes.o 
24 | 
25 | all: objects
26 | 
27 | clean: 
28 | 	@rm -f *.o 
29 | 
30 | anew: clean all 
31 | 
32 | 
33 | 
34 | 
35 | # ---- DEPRECATED ----------------------------------------------------------------------------------------
36 | 
37 | 
38 | MatrixIF_ASCII.o: MatrixIF.hpp MatrixIF_ASCII.hpp MatrixIF_ASCII.cpp 
39 | 	$(CC) $(LMACROS) -c MatrixIF_ASCII.cpp $(CFLAGS) $(INCLUDE)
40 | 
41 | MatrixOF_ASCII.o: MatrixOF.hpp MatrixOF_ASCII.hpp MatrixOF_ASCII.cpp 
42 | 	$(CC) $(LMACROS) -c MatrixOF_ASCII.cpp $(CFLAGS) $(INCLUDE)
43 | 
44 | 
45 | MatrixIF_Matlab.o: MatrixIF.hpp MatrixIF_Matlab.hpp MatrixIF_Matlab.cpp 
46 | 	$(CC) $(LMACROS) -c MatrixIF_Matlab.cpp $(CFLAGS) $(INCLUDE)
47 | 
48 | MatrixOF_Matlab.o: MatrixOF.hpp MatrixOF_Matlab.hpp MatrixOF_Matlab.cpp 
49 | 	$(CC) $(LMACROS) -c MatrixOF_Matlab.cpp $(CFLAGS) $(INCLUDE)
50 | 
51 | 
52 | MatrixIF_Boeing.o: MatrixIF.hpp MatrixIF_Boeing.hpp MatrixIF_Boeing.cpp 
53 | 	$(CC) $(LMACROS) -c MatrixIF_Boeing.cpp $(CFLAGS) $(INCLUDE)
54 | 
55 | MatrixOF_Boeing.o: MatrixOF.hpp MatrixOF_Boeing.hpp MatrixOF_Boeing.cpp 
56 | 	$(CC) $(LMACROS) -c MatrixOF_Boeing.cpp $(CFLAGS) $(INCLUDE)
57 | 
58 | #ifneq (,$(findstring withMatio,$(MACROS)))
59 | #	$(MAKE) MatrixIF_Matlab.o 
60 | #	$(MAKE) MatrixOF_Matlab.o 
61 | #endif 
62 | 
63 | 
64 | #objects: MatrixIF_ASCII.o MatrixOF_ASCII.o MatrixIF_Boeing.o MatrixOF_Boeing.o 
65 | 
66 | #unity: filetypes.o 
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #ifndef _MatrixIF
17 | #define _MatrixIF
18 | 
19 | #include "pMMFbase.hpp"
20 | #include <fstream>
21 | 
22 | class MatrixIF{
23 | public:
24 |   
25 |   ~MatrixIF(){ifs.close();}
26 | 
27 | public:
28 | 
29 |   virtual void rewind(){}
30 | 
31 |   virtual MatrixIF& operator>>(FIELD& v){
32 |     cout<<"Error: operator>>(FIELD& ) not supported in sparse matrix input files."<<endl; 
33 |     return *this;}
34 | 
35 |   virtual MatrixIF& operator>>(IndexValueTriple& t){
36 |     cout<<"Error: operator>>(IndexValueTriple& ) not supported in dense matrix input files."<<endl; 
37 |     return *this;}
38 | 
39 | 
40 | public:
41 | 
42 |   bool sparse;
43 |   ifstream ifs;
44 |   int nrows;
45 |   int ncols;
46 | 
47 | };
48 | 
49 | 
50 | #endif 
51 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_ASCII.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_ASCII.hpp"
17 | 
18 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_ASCII.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | #ifndef _MatrixIF_ASCII
 17 | #define _MatrixIF_ASCII
 18 | 
 19 | #include "MatrixIF.hpp"
 20 | 
 21 | 
 22 | class MatrixIF_ASCII: public MatrixIF{
 23 | public:
 24 |   class Dense;
 25 |   class Sparse;
 26 | 
 27 | };
 28 | 
 29 | 
 30 | class MatrixIF_ASCII::Dense: public MatrixIF_ASCII{
 31 | public:
 32 | 
 33 |   Dense(const string filename){
 34 |     sparse=0;
 35 |     ifs.open(filename);
 36 |     if(ifs.fail()){cout<<"Failed to open "<<filename<<"."<<endl; return;}
 37 |     char buffer[1024]; 
 38 |     int linelength=0; 
 39 |     do{
 40 |       ifs.get(buffer,1024);
 41 |       linelength+=strlen(buffer);
 42 |     }while(strlen(buffer)>0);
 43 |     // cout<<"Line length="<<linelength<<endl;
 44 |     ifs.close(); ifs.open(filename); // why does ifs.seekg(0) not work?
 45 |     float b;
 46 |     ncols=0; while(ifs.good() && ifs.tellg()<linelength){ifs>>b; ncols++;}
 47 |     nrows=0; while(ifs.good()) {for(int i=0; i<ncols;i++) ifs>>b; nrows++;}
 48 |     // cout<<nrows<<" "<<ncols<<endl;
 49 |     ifs.close();
 50 |     ifs.open(filename); i=0; j=0;
 51 |   }
 52 | 
 53 | 
 54 | public:
 55 | 
 56 |   void rewind(){i=0; j=0; }
 57 | 
 58 |    MatrixIF& operator>>(IndexValueTriple& dest){
 59 |    dest.i=i; dest.j=j;
 60 |    if(++j>=ncols) { j=0; i++; }
 61 |    if(ifs.good() && i<=nrows) ifs>>dest.value; else {dest.i=-1; return *this;}
 62 |    return *this;
 63 |   }
 64 | 
 65 |   MatrixIF& operator>>(FIELD& dest){
 66 |     if(++j>=ncols) {  j=0; i++; }
 67 |     if(ifs.good() && i<=nrows) ifs>>dest;
 68 |     return *this;
 69 |   }
 70 |   
 71 | 
 72 | public:
 73 | 
 74 |   int i;
 75 |   int j;
 76 |   bool eof;
 77 | 
 78 | };
 79 | 
 80 | 
 81 | 
 82 | class MatrixIF_ASCII::Sparse: public MatrixIF_ASCII{
 83 | public:
 84 | 
 85 |   Sparse(const string filename){
 86 |     sparse=1;
 87 |     ifs.open(filename);
 88 |     char buffer[255];
 89 |     ifs.get(buffer,255);
 90 |     ifs.close(); 
 91 | 
 92 |     ifs.open(filename);
 93 |     int nextracted=0;
 94 |     while(ifs.good() && ifs.tellg()<strlen(buffer)){float b; ifs>>b; if(!ifs.fail()) nextracted++;}
 95 |     if(nextracted==2){ifs.close(); ifs.open(filename); ifs>>nrows>>ncols; return;}
 96 |     if(nextracted==3){
 97 |       ifs.close(); ifs.open(filename);
 98 |       nrows=0; ncols=0;
 99 |       int a; int b; float f;
100 |       while(ifs.good()){
101 | 	ifs>>a>>b>>f;
102 | 	if(a>nrows-1) nrows=a+1;
103 | 	if(b>ncols-1) ncols=b+1;
104 |       }
105 |       ifs.close(); ifs.open(filename);
106 |       return;
107 |     }
108 |     cout<<"Error: could not parse first line"<<endl;
109 |   }
110 | 
111 | 
112 | public:
113 | 
114 |   MatrixIF& operator>>(IndexValueTriple& dest){
115 |     if(!ifs.good()){dest.i=-1; return *this;}
116 |     ifs>>dest.i>>dest.j>>dest.value; return *this;
117 |   }
118 | 
119 | public:
120 | 
121 | };
122 | 
123 | 
124 | #endif
125 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Boeing.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_Boeing.hpp"
17 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Matlab.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_Matlab.hpp"
17 | 
18 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixIF_Matlab.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | #ifndef _MatrixIF_Matlab
 17 | #define _MatrixIF_Matlab
 18 | 
 19 | #include "MatrixIF.hpp"
 20 | #include <matio.h>
 21 | 
 22 | 
 23 | class MatrixIF_Matlab: public MatrixIF{
 24 | public:
 25 | 
 26 |   class Dense;
 27 |   class Sparse;
 28 | 
 29 | };
 30 | 
 31 | 
 32 | 
 33 | class MatrixIF_Matlab::Dense: public MatrixIF_Matlab{
 34 | public:
 35 | 
 36 |   Dense(const string filename){
 37 |     sparse=0;
 38 |     matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY);
 39 |     if(matfile==NULL){cout<<"Error: file cannot be opened"<<endl; return;}
 40 |     matvar=Mat_VarReadNext(matfile);
 41 |     if(matvar==NULL){cout<<"Error: no variables"<<endl; return;}
 42 |     nrows=matvar->dims[0];
 43 |     ncols=matvar->dims[1];
 44 |     next=reinterpret_cast<double*>(matvar->data);
 45 | 
 46 |     //need to swap to 'transpose' the array since matIO reads it in column major order
 47 |     /*int n = 5; //sqrt(nrows*ncols); 
 48 |       cout<<"size"<<n<<endl;
 49 |       for(int y = 0; y < n; ++y)
 50 |       for(int x = y+1; x < n; ++x){
 51 |       swap(next[x*n + y], next[y*n + x]); 
 52 |       }*/
 53 |       int count= nrows*ncols;
 54 |       for (int x= 0; x<nrows; ++x){
 55 |         int count_adjustment= nrows - x - 1;
 56 |         for (int y= 0, step= 1; y<ncols; ++y, step+= count_adjustment){
 57 |          int last= count - (y+x*ncols);
 58 |          int first= last - step;
 59 |          std::rotate(next + first, next + first + 1, next + last);
 60 |        }
 61 |      }
 62 | 
 63 |    }
 64 | 
 65 | 
 66 | 
 67 |  public:
 68 | 
 69 |   MatrixIF& operator>>(FIELD& dest){
 70 |     dest = *next;
 71 |     next++;
 72 |     return *this;
 73 |   }
 74 | 
 75 | public:
 76 | 
 77 |   mat_t* matfile;
 78 |   matvar_t* matvar;
 79 |   double *next;
 80 | 
 81 | };
 82 | 
 83 | 
 84 | 
 85 | class MatrixIF_Matlab::Sparse: public MatrixIF_Matlab{
 86 | public:
 87 | 
 88 |   Sparse(const string filename){
 89 |     sparse=1;
 90 |     matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY);
 91 |     if(matfile==NULL){cout<<"Error: file "<<filename<<" cannot be opened"<<endl; return;}
 92 |     matvar=Mat_VarReadNext(matfile);
 93 |     if(matvar==NULL){cout<<"Error: no variables"<<endl; return;}
 94 |     mat_sparse_t *sparse;
 95 |     if (matvar->class_type == MAT_C_SPARSE){
 96 |       sparse = (mat_sparse_t*)matvar->data;
 97 |     }
 98 |     nrows=matvar->dims[0];
 99 |     ncols=matvar->dims[1];
100 |     next=reinterpret_cast<double*>(sparse->data); 
101 |     //Mat_VarPrint(matvar,1);
102 |     //cout<<"printed"<<endl;
103 |     Jc= sparse->jc;
104 |     Ir = sparse->ir; 
105 |     njc= sparse->njc; 
106 |     ndata=sparse->ndata; 
107 |     cout<<"read"<<endl;
108 |   }
109 | 
110 | 
111 | public:
112 | 
113 |   MatrixIF& operator>>(IndexValueTriple& dest){
114 |     int i = indIr; int j = indJc; int c = 0;  
115 |     for (; i < njc-1; i++ ) { 
116 |       c= 0;
117 |       for (; j<Jc[i+1] && j<ndata;j++) {
118 |        c++;
119 |        dest.i=Ir[j]; dest.j=i; 
120 |        indJc = j+1; indIr = i; 
121 |        break; 
122 |      }
123 |      if (c>0){
124 |        break; 
125 |      }
126 |      indIr = i; 
127 |    }
128 |    dest.value =  *next++; 
129 |    if(!dest.value || i>=njc-1){dest.i=-1; return *this;}
130 |    return *this;
131 |  }
132 | 
133 | 
134 | public:
135 | 
136 |   mat_t* matfile;
137 |   matvar_t* matvar;
138 |   double *next;
139 | 
140 |   int indIr= 0; //sparse->ir; 
141 |   int indJc =0; 
142 |   int* Ir; 
143 |   int* Jc; //sparse->jc;
144 |   int njc; 
145 |   int ndata; 
146 | 
147 | };
148 | 
149 | 
150 | #endif
151 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #ifndef _MatrixOF
17 | #define _MatrixOF
18 | 
19 | #include "pMMFbase.hpp"
20 | #include <fstream>
21 | 
22 | class MatrixOF{
23 | public:
24 |   
25 |   //MatrixOF(const char* filename, const int _nrows, const int _ncols): 
26 |   //  nrows(_nrows), ncols(_ncols), ofs(filename){}
27 | 
28 |   ~MatrixOF(){ofs.close();}
29 | 
30 | public:
31 | 
32 |   virtual MatrixOF& operator<<(const FIELD& v){
33 |     cout<<"Error: operator<<(FIELD& ) not supported in sparse matrix output files."<<endl; 
34 |     return *this;
35 |   };
36 | 
37 |   virtual MatrixOF& operator<<(const IndexValueTriple& t){
38 |     cout<<"Error: operator<<(IndexValueTriple& ) not supported in dense matrix output files."<<endl; 
39 |     return *this;
40 |   };
41 | 
42 | public:
43 | 
44 |   bool sparse=0;
45 |   ofstream ofs;
46 |   int nrows;
47 |   int ncols;
48 | 
49 | };
50 | 
51 | 
52 | #endif 
53 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_ASCII.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixOF_ASCII.hpp"
17 | 
18 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_ASCII.hpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #ifndef _MatrixOF_ASCII
17 | #define _MatrixOF_ASCII
18 | 
19 | #include "MatrixOF.hpp"
20 | 
21 | 
22 | 
23 | class MatrixOF_ASCII: public MatrixOF{
24 | public:
25 | 
26 |   class Dense;
27 |   class Sparse;
28 | 
29 | };
30 | 
31 | 
32 | 
33 | class MatrixOF_ASCII::Dense: public MatrixOF_ASCII{
34 | public:
35 | 
36 |   Dense(const string filename, const int _nrows, const int _ncols){
37 |     nrows=_nrows; ncols=_ncols; sparse=0; ofs.open(filename); i=0; j=0;}
38 | 
39 | public:
40 |   
41 |   MatrixOF& operator<<(const FIELD& v){
42 |     ofs<<v; 
43 |     if(++i<ncols) ofs<<" ";
44 |     else {ofs<<"\n"; i=0; j++;}
45 |     return *this;
46 |   }
47 | 
48 | public:
49 | 
50 |   int i=0;
51 |   int j=0;
52 | 
53 | };
54 | 
55 | 
56 | 
57 | class MatrixOF_ASCII::Sparse: public MatrixOF_ASCII{
58 | public:
59 | 
60 |   Sparse(const string filename, const int _nrows, const int _ncols){
61 |     nrows=_nrows; ncols=_ncols; sparse=1; ofs.open(filename);}
62 | 
63 | public:
64 | 
65 |   MatrixOF& operator<<(const IndexValueTriple& t){
66 |     ofs<<t.i<<" "<<t.j<<" "<<t.value<<"\n";
67 |     return *this;
68 |   }
69 | 
70 | public:
71 | 
72 | };
73 | 
74 | 
75 | #endif
76 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_Boeing.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixOF_Boeing.hpp"
17 | 
18 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_Boeing.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | #ifndef _MatrixOF_Boeing
 17 | #define _MatrixOF_Boeing
 18 | 
 19 | #include "MatrixOF.hpp"
 20 | #include <iomanip>
 21 | 
 22 | class MatrixOF_Boeing: public MatrixOF{
 23 | public:
 24 | 
 25 |   MatrixOF_Boeing(const string filename, const int _nrows, const int _ncols){
 26 |     nrows=_nrows; ncols=_ncols; sparse=1; 
 27 |     
 28 |     // Write row indices, column pointers, values in separate files and concatenate
 29 |     rowfs.open("row.temp");
 30 |     colfs.open("col.temp");
 31 |     valfs.open("val.temp");
 32 | 
 33 |     nnz=0;
 34 |     nnz_in_col=0;
 35 |     rowchars=colchars=valchars=0;
 36 |     colptr_val=1;
 37 |     
 38 |     colfs<<colptr_val;
 39 |     
 40 |     current_col=0;
 41 |     num_rowidx_lines=num_colptr_lines=num_value_lines=1;
 42 |     filename_local=filename;
 43 |   }
 44 |   
 45 |   ~MatrixOF_Boeing() { 
 46 |     rowfs<<endl;
 47 |     colfs<<" "<<colptr_val+nnz_in_col<<endl;
 48 |     valfs<<endl;
 49 |     rowfs.close();
 50 |     colfs.close();
 51 |     valfs.close();
 52 | 
 53 |     // write header
 54 |     ofs.open(filename_local);
 55 |     ofs<<"Matrix"<<setw(72)<<"RSA_32"<<endl;
 56 |     ofs<<setw(14)<<num_colptr_lines+num_rowidx_lines+num_value_lines<<setw(14)<<num_colptr_lines<<setw(14)<<num_rowidx_lines<<setw(14)<<num_value_lines<<setw(14)<<0<<endl;
 57 |     ofs<<"RSA"<<setw(25)<<nrows<<setw(14)<<ncols<<setw(14)<<nnz<<setw(14)<<0<<endl;
 58 |     ofs<<"(16I5)          (16I5)          (10F7.1)            (10F7.1)"<<endl;
 59 | 
 60 |     // write data
 61 |     ifstream tempfs;
 62 |     tempfs.open("col.temp");
 63 |     ofs<<tempfs.rdbuf();
 64 |     tempfs.close(); tempfs.open("row.temp");
 65 |     ofs<<tempfs.rdbuf();
 66 |     tempfs.close(); tempfs.open("val.temp");
 67 |     ofs<<tempfs.rdbuf();
 68 |     tempfs.close();
 69 | 
 70 |     // delete temporary files
 71 |     remove("row.temp");
 72 |     remove("col.temp");
 73 |     remove("val.temp");
 74 |   }
 75 | 
 76 | public:
 77 | 
 78 |   MatrixOF& operator<<(const IndexValueTriple& t){
 79 |     if (t.i>t.j) return *this; // write only the upper triangular part
 80 |     if(t.j==current_col+1) { // no more nonzeros in previous column
 81 |       if (colchars+1+std::to_string(colptr_val+nnz_in_col).length() > 80) {
 82 | 	colfs<<endl;
 83 | 	colchars=0;
 84 | 	num_colptr_lines++;
 85 |       }
 86 |       current_col++;
 87 |       colfs<<" "<<colptr_val+nnz_in_col;
 88 |       colptr_val += nnz_in_col;
 89 |       colchars += 1+std::to_string(colptr_val+nnz_in_col).length();
 90 |       nnz_in_col=0;
 91 |     }
 92 |     if (t.j==current_col) {
 93 |       nnz_in_col++;
 94 |       nnz++;
 95 |       if(rowchars+1+std::to_string(t.i+1).length() > 80) {
 96 | 	rowfs<<endl;
 97 | 	rowchars=0;
 98 | 	num_rowidx_lines++;
 99 |       }
100 |       rowfs<<" "<<t.i+1;
101 |       rowchars += 1+std::to_string(t.i+1).length();
102 |       if(valchars+1+std::to_string(t.value).length() > 80) {
103 | 	valfs<<endl;
104 | 	valchars=0;
105 | 	num_value_lines++;
106 |       }
107 |       valfs<<" "<<t.value;
108 |       valchars += 1+std::to_string(t.value).length();
109 |     } else { cout <<"Matrix should be written column-wise!!"<<current_col<<" "<<t.j<<endl; return *this; }
110 |     
111 |     return *this;
112 |   }
113 | 
114 | public:
115 |   ofstream headerfs,rowfs,colfs,valfs;
116 |   int nnz_in_col,nnz,colptr_val,current_col;
117 |   int rowchars,colchars,valchars;
118 |   int num_rowidx_lines,num_colptr_lines,num_value_lines;
119 |   string filename_local;
120 | 
121 | };
122 | 
123 | 
124 | 
125 | #endif
126 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_Matlab.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixOF_Matlab.hpp"
17 | 
18 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/MatrixOF_Matlab.hpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  -----------------------------------------------------------------------------
  3 |  
  4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
  5 |  Kernel for computing the gram matrix of a collection of graphs.
  6 |  
  7 |  
  8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
  9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
 10 |  
 11 |  
 12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
 13 |  which is licensed under the GNU Public License, version 3. 
 14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
 15 |  ----------------------------------------------------------------------------- */
 16 | #ifndef _MatrixOF_Matlab
 17 | #define _MatrixOF_Matlab
 18 | 
 19 | #include "MatrixOF.hpp"
 20 | #include "matio.h"
 21 | 
 22 | 
 23 | class MatrixOF_Matlab: public MatrixOF{
 24 | public:
 25 | 
 26 |   using MatrixOF::MatrixOF;
 27 | 
 28 |   class Dense;
 29 |   class Sparse;
 30 | 
 31 | };
 32 | 
 33 | 
 34 | 
 35 | class MatrixOF_Matlab::Dense: public MatrixOF_Matlab{
 36 | public:
 37 | 
 38 |   Dense(const string filename, const int _nrows, const int _ncols, const FIELD* array){
 39 |     nrows=_nrows; ncols=_ncols; sparse=0; 
 40 |     matfile = Mat_CreateVer(filename.c_str(),NULL,MAT_FT_DEFAULT);
 41 |     if(matfile==NULL) { cout<<"Error: Cannot open file for write!"<<endl; return; }
 42 |     size_t dims[2]; dims[0]=nrows; dims[1]=ncols;
 43 |     matvar = Mat_VarCreate("M_dense",MAT_C_DOUBLE,MAT_T_DOUBLE,2,dims,(void*)array,0);
 44 |     if(matvar==NULL) { cout<<"Error creating matrix variable!"<<endl; return; }
 45 |     Mat_VarWrite(matfile,matvar,MAT_COMPRESSION_NONE);
 46 |     Mat_VarFree(matvar);
 47 |     Mat_Close(matfile);
 48 |   }
 49 | 
 50 |   ~Dense(){}
 51 | 
 52 | public:
 53 | 
 54 |   MatrixOF& operator<<(const FIELD& v){
 55 |     return *this;
 56 |   }
 57 | 
 58 | public:
 59 |   mat_t *matfile;
 60 |   matvar_t* matvar;
 61 | 
 62 | };
 63 | 
 64 | 
 65 | 
 66 | class MatrixOF_Matlab::Sparse: public MatrixOF_Matlab{
 67 | public:
 68 | 
 69 |   Sparse(const string filename, const CSCmatrix &cscmatrix){
 70 |     nrows=cscmatrix.nrows; ncols=cscmatrix.ncols; sparse=1; 
 71 |     size_t dims[2]; dims[0]=nrows; dims[1]=ncols;
 72 |     mat_sparse_t sparsemat = {0,};
 73 |     sparsemat.nzmax = cscmatrix.nnz;
 74 |     sparsemat.nir = cscmatrix.nnz;
 75 |     sparsemat.ir = cscmatrix.ir;
 76 |     sparsemat.njc = cscmatrix.ncols+1;
 77 |     sparsemat.jc = cscmatrix.jc;
 78 |     sparsemat.ndata = cscmatrix.nnz;
 79 |     sparsemat.data = cscmatrix.val;
 80 | 
 81 |     matfile = Mat_CreateVer(filename.c_str(),NULL,MAT_FT_DEFAULT);
 82 |     if(matfile==NULL) { cout<<"Error: Cannot open file for write!"<<endl; return; }
 83 |     matvar = Mat_VarCreate("M_sparse",MAT_C_SPARSE,MAT_T_DOUBLE,2,dims,&sparsemat,0);
 84 |     if(matvar==NULL) { cout<<"Error creating matrix variable!"<<endl; return; }
 85 |     Mat_VarWrite(matfile,matvar,MAT_COMPRESSION_NONE);
 86 |     Mat_VarFree(matvar);
 87 |     Mat_Close(matfile);
 88 |   }
 89 | 
 90 |   ~Sparse() {}
 91 | 
 92 | public:
 93 | 
 94 |   MatrixOF& operator<<(const IndexValueTriple& t){
 95 |     return *this;
 96 |   }
 97 | 
 98 | public:
 99 |   mat_t* matfile;
100 |   matvar_t* matvar;
101 | 
102 | 
103 | };
104 | 
105 | 
106 | 
107 | #endif
108 | 


--------------------------------------------------------------------------------
/MLGkernel/utility/filetypes/filetypes.cpp:
--------------------------------------------------------------------------------
 1 | /*
 2 |  -----------------------------------------------------------------------------
 3 |  
 4 |  MLGkernel is an open source implementation of the Multiscale Laplacian Graph
 5 |  Kernel for computing the gram matrix of a collection of graphs.
 6 |  
 7 |  
 8 |  Copyright (C) 2016 Imre Risi Kondor, Horace Pan
 9 |  Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta
10 |  
11 |  
12 |  The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 
13 |  which is licensed under the GNU Public License, version 3. 
14 |  This code therefore is also licensed under the terms of the GNU Public License, version 3. 
15 |  ----------------------------------------------------------------------------- */
16 | #include "MatrixIF_ASCII.cpp"
17 | #include "MatrixOF_ASCII.cpp"
18 | 
19 | #ifdef _withMatio
20 | #include "MatrixIF_ASCII.cpp"
21 | #include "MatrixOF_ASCII.cpp"
22 | #endif 
23 | 
24 | #include "MatrixIF_Boeing.cpp"
25 | #include "MatrixOF_Boeing.cpp"
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # graph-classification 
 2 | 
 3 | A collection of graph classiifcation methods.
 4 | 
 5 | All methods accept graph data from [https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets](https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets) 
 6 | 
 7 | Download data to `./data` and refer to `go.sh` under every directory for sample command.
 8 | 
 9 | Methods
10 | * DGK: [Deep Graph Kernels](http://www.mit.edu/~pinary/kdd/YanVis15.pdf) [[source](http://www.mit.edu/~pinary/kdd/)]
11 | * MLGkernel: [Multiscale Laplacian Graph Kernel](https://arxiv.org/abs/1603.06186) [[source](https://github.com/horacepan/MLGkernel)]
12 | * graph2vec\_tf: [graph2vec: Learning distributed representations of graphs](https://arxiv.org/pdf/1707.05005.pdf) [[source](https://github.com/MLDroid/graph2vec_tf)]
13 | * diffpool: [Hierarchical Graph Representation Learning with Differentiable Pooling](https://arxiv.org/pdf/1806.08804.pdf) [[source](https://github.com/RexYing/diffpool)]
14 | * sub2vec: [Sub2Vec: Feature Learning for Subgraphs](http://people.cs.vt.edu/~badityap/papers/sub2vec-pakdd18.pdf) [[source](http://people.cs.vt.edu/~bijaya/)]
15 | * kcnn: [Kernel Graph Convolutional Neural Networks](https://link.springer.com/chapter/10.1007/978-3-030-01418-6_3) [[source](https://github.com/giannisnik/cnn-graph-classification)]
16 | * kernel\_methods: Various graph kernels implementation using [GraKel](https://github.com/ysig/GraKeL)
17 | 


--------------------------------------------------------------------------------
/data/PTC_MR/PTC_MR_graph_labels.txt:
--------------------------------------------------------------------------------
  1 | 1
  2 | -1
  3 | 1
  4 | -1
  5 | 1
  6 | -1
  7 | 1
  8 | -1
  9 | -1
 10 | 1
 11 | 1
 12 | -1
 13 | 1
 14 | -1
 15 | 1
 16 | 1
 17 | -1
 18 | 1
 19 | 1
 20 | -1
 21 | 1
 22 | 1
 23 | -1
 24 | -1
 25 | 1
 26 | -1
 27 | 1
 28 | 1
 29 | 1
 30 | 1
 31 | -1
 32 | -1
 33 | -1
 34 | -1
 35 | 1
 36 | 1
 37 | -1
 38 | -1
 39 | -1
 40 | 1
 41 | 1
 42 | 1
 43 | 1
 44 | 1
 45 | 1
 46 | 1
 47 | 1
 48 | 1
 49 | -1
 50 | 1
 51 | 1
 52 | -1
 53 | 1
 54 | 1
 55 | -1
 56 | -1
 57 | 1
 58 | -1
 59 | 1
 60 | -1
 61 | -1
 62 | 1
 63 | 1
 64 | 1
 65 | -1
 66 | 1
 67 | 1
 68 | -1
 69 | -1
 70 | -1
 71 | -1
 72 | 1
 73 | 1
 74 | -1
 75 | 1
 76 | 1
 77 | 1
 78 | -1
 79 | 1
 80 | 1
 81 | -1
 82 | -1
 83 | 1
 84 | -1
 85 | 1
 86 | -1
 87 | 1
 88 | 1
 89 | -1
 90 | -1
 91 | -1
 92 | -1
 93 | 1
 94 | 1
 95 | -1
 96 | -1
 97 | 1
 98 | 1
 99 | 1
100 | 1
101 | 1
102 | -1
103 | -1
104 | 1
105 | -1
106 | -1
107 | -1
108 | 1
109 | -1
110 | 1
111 | 1
112 | 1
113 | -1
114 | 1
115 | 1
116 | 1
117 | -1
118 | 1
119 | -1
120 | -1
121 | 1
122 | -1
123 | 1
124 | 1
125 | -1
126 | 1
127 | 1
128 | -1
129 | -1
130 | -1
131 | 1
132 | -1
133 | -1
134 | -1
135 | 1
136 | -1
137 | -1
138 | -1
139 | 1
140 | 1
141 | 1
142 | -1
143 | -1
144 | -1
145 | -1
146 | 1
147 | 1
148 | -1
149 | -1
150 | -1
151 | -1
152 | 1
153 | 1
154 | -1
155 | -1
156 | 1
157 | -1
158 | 1
159 | -1
160 | 1
161 | 1
162 | -1
163 | -1
164 | -1
165 | -1
166 | -1
167 | 1
168 | 1
169 | -1
170 | -1
171 | -1
172 | -1
173 | 1
174 | -1
175 | -1
176 | -1
177 | 1
178 | -1
179 | 1
180 | -1
181 | -1
182 | -1
183 | 1
184 | 1
185 | 1
186 | 1
187 | -1
188 | -1
189 | 1
190 | -1
191 | -1
192 | 1
193 | -1
194 | 1
195 | -1
196 | 1
197 | 1
198 | -1
199 | -1
200 | -1
201 | -1
202 | -1
203 | 1
204 | 1
205 | -1
206 | -1
207 | -1
208 | -1
209 | -1
210 | -1
211 | -1
212 | -1
213 | 1
214 | -1
215 | -1
216 | -1
217 | -1
218 | -1
219 | -1
220 | -1
221 | -1
222 | 1
223 | -1
224 | 1
225 | -1
226 | -1
227 | -1
228 | -1
229 | -1
230 | 1
231 | -1
232 | -1
233 | 1
234 | -1
235 | 1
236 | -1
237 | -1
238 | -1
239 | -1
240 | 1
241 | -1
242 | -1
243 | 1
244 | -1
245 | -1
246 | -1
247 | 1
248 | -1
249 | 1
250 | 1
251 | 1
252 | 1
253 | -1
254 | -1
255 | 1
256 | -1
257 | -1
258 | 1
259 | 1
260 | -1
261 | 1
262 | -1
263 | -1
264 | -1
265 | 1
266 | 1
267 | -1
268 | -1
269 | -1
270 | -1
271 | -1
272 | -1
273 | 1
274 | -1
275 | -1
276 | -1
277 | 1
278 | -1
279 | -1
280 | -1
281 | 1
282 | -1
283 | -1
284 | 1
285 | 1
286 | 1
287 | 1
288 | -1
289 | 1
290 | 1
291 | -1
292 | 1
293 | 1
294 | 1
295 | 1
296 | -1
297 | 1
298 | -1
299 | 1
300 | -1
301 | -1
302 | -1
303 | -1
304 | -1
305 | -1
306 | -1
307 | 1
308 | 1
309 | 1
310 | -1
311 | 1
312 | -1
313 | 1
314 | -1
315 | -1
316 | -1
317 | 1
318 | -1
319 | 1
320 | -1
321 | -1
322 | -1
323 | 1
324 | 1
325 | 1
326 | -1
327 | 1
328 | 1
329 | -1
330 | -1
331 | -1
332 | -1
333 | 1
334 | 1
335 | 1
336 | -1
337 | 1
338 | -1
339 | -1
340 | -1
341 | -1
342 | -1
343 | 1
344 | -1
345 | 


--------------------------------------------------------------------------------
/diffpool/aggregators.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | import random
 6 | 
 7 | """
 8 | Set of modules for aggregating embeddings of neighbors.
 9 | """
10 | 
11 | class MeanAggregator(nn.Module):
12 |     """
13 |     Aggregates a node's embeddings using mean of neighbors' embeddings
14 |     """
15 |     def __init__(self, features, cuda=False, gcn=False): 
16 |         """
17 |         Initializes the aggregator for a specific graph.
18 | 
19 |         features -- function mapping LongTensor of node ids to FloatTensor of feature values.
20 |         cuda -- whether to use GPU
21 |         gcn --- whether to perform concatenation GraphSAGE-style, or add self-loops GCN-style
22 |         """
23 | 
24 |         super(MeanAggregator, self).__init__()
25 | 
26 |         self.features = features
27 |         self.cuda = cuda
28 |         self.gcn = gcn
29 |         
30 |     def forward(self, nodes, to_neighs, num_sample=10):
31 |         """
32 |         nodes --- list of nodes in a batch
33 |         to_neighs --- list of sets, each set is the set of neighbors for node in batch
34 |         num_sample --- number of neighbors to sample. No sampling if None.
35 |         """
36 |         # Local pointers to functions (speed hack)
37 |         _set = set
38 |         if not num_sample is None:
39 |             _sample = random.sample
40 |             samp_neighs = [_set(_sample(to_neigh, 
41 |                             num_sample,
42 |                             )) if len(to_neigh) >= num_sample else to_neigh for to_neigh in to_neighs]
43 |         else:
44 |             samp_neighs = to_neighs
45 | 
46 |         if self.gcn:
47 |             samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)]
48 |         unique_nodes_list = list(set.union(*samp_neighs))
49 |         unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)}
50 |         mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes)))
51 |         column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh]   
52 |         row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))]
53 |         mask[row_indices, column_indices] = 1
54 |         if self.cuda:
55 |             mask = mask.cuda()
56 |         num_neigh = mask.sum(1, keepdim=True)
57 |         mask = mask.div(num_neigh)
58 |         if self.cuda:
59 |             embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda())
60 |         else:
61 |             embed_matrix = self.features(torch.LongTensor(unique_nodes_list))
62 |         to_feats = mask.mm(embed_matrix)
63 |         return to_feats
64 | 


--------------------------------------------------------------------------------
/diffpool/cross_val.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | import pickle
 6 | import random
 7 | 
 8 | from graph_sampler import GraphSampler
 9 | 
10 | def prepare_val_data(graphs, args, val_idx, max_nodes=0):
11 | 
12 |     random.shuffle(graphs)
13 |     val_size = len(graphs) // 10
14 |     train_graphs = graphs[:val_idx * val_size]
15 |     if val_idx < 9:
16 |         train_graphs = train_graphs + graphs[(val_idx+1) * val_size :]
17 |     val_graphs = graphs[val_idx*val_size: (val_idx+1)*val_size]
18 |     print('Num training graphs: ', len(train_graphs), 
19 |           '; Num validation graphs: ', len(val_graphs))
20 | 
21 |     print('Number of graphs: ', len(graphs))
22 |     print('Number of edges: ', sum([G.number_of_edges() for G in graphs]))
23 |     print('Max, avg, std of graph size: ', 
24 |             max([G.number_of_nodes() for G in graphs]), ', '
25 |             "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', '
26 |             "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs])))
27 | 
28 |     # minibatch
29 |     dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes,
30 |             features=args.feature_type)
31 |     train_dataset_loader = torch.utils.data.DataLoader(
32 |             dataset_sampler, 
33 |             batch_size=args.batch_size, 
34 |             shuffle=True,
35 |             num_workers=args.num_workers)
36 | 
37 |     dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes,
38 |             features=args.feature_type)
39 |     val_dataset_loader = torch.utils.data.DataLoader(
40 |             dataset_sampler, 
41 |             batch_size=args.batch_size, 
42 |             shuffle=False,
43 |             num_workers=args.num_workers)
44 | 
45 |     return train_dataset_loader, val_dataset_loader, \
46 |             dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim
47 | 
48 | 


--------------------------------------------------------------------------------
/diffpool/gen/feat.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import networkx as nx
 3 | import numpy as np
 4 | import random
 5 | 
 6 | class FeatureGen(metaclass=abc.ABCMeta):
 7 |     @abc.abstractmethod
 8 |     def gen_node_features(self, G):
 9 |         pass
10 | 
11 | class ConstFeatureGen(FeatureGen):
12 |     def __init__(self, val):
13 |         self.val = val
14 | 
15 |     def gen_node_features(self, G):
16 |         feat_dict = {i:{'feat': self.val} for i in G.nodes()}
17 |         nx.set_node_attributes(G, feat_dict)
18 | 
19 | class GaussianFeatureGen(FeatureGen):
20 |     def __init__(self, mu, sigma):
21 |         self.mu = mu
22 |         self.sigma = sigma
23 | 
24 |     def gen_node_features(self, G):
25 |         feat = np.random.multivariate_normal(mu, sigma, G.number_of_nodes())
26 |         feat_dict = {i:{'feat': feat[i]} for i in range(feat.shape[0])}
27 |         nx.set_node_attributes(G, feat_dict)
28 | 
29 | 


--------------------------------------------------------------------------------
/diffpool/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | # grid search for ENZYMES
 3 | for method in 'base-set2set' 'base'
 4 |   do
 5 |   for gc in 4 8 16 2
 6 |   do
 7 |     python -m train --datadir=../data --bmname=ENZYMES --cuda=0 --max-nodes 1000 --epochs=100 --num-classes=3 --output-dim 512 --lr 0.001 --num-gc-layers $gc --method $method 
 8 |   done
 9 | done
10 | 
11 | # DD
12 | #python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000
13 | 


--------------------------------------------------------------------------------
/diffpool/graph_embedding.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/diffpool/graphsage.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | 
 5 | import numpy as np
 6 | 
 7 | class SupervisedGraphSage(nn.Module):
 8 |     ''' GraphSage embeddings
 9 |     '''
10 | 
11 |     def __init__(self, num_classes, enc):
12 |         super(SupervisedGraphSage, self).__init__()
13 |         self.enc = enc
14 |         self.xent = nn.CrossEntropyLoss()
15 | 
16 |         self.weight = nn.Parameter(torch.FloatTensor(enc.embed_dim, num_classes))
17 |         init.xavier_uniform(self.weight)
18 | 
19 |     def forward(self, nodes):
20 |         embeds = self.enc(nodes)
21 |         scores = embeds.mm(self.weight)
22 |         return scores
23 | 
24 |     def loss(self, nodes, labels):
25 |         scores = self.forward(nodes)
26 |         return self.xent(nn.softmax(scores), labels.squeeze())
27 | 
28 | 


--------------------------------------------------------------------------------
/diffpool/load_data.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | import scipy as sc
  4 | import os
  5 | import re
  6 | 
  7 | def read_graphfile(datadir, dataname, max_nodes=None):
  8 |     ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
  9 |         graph index starts with 1 in file
 10 | 
 11 |     Returns:
 12 |         List of networkx objects with graph and node labels
 13 |     '''
 14 |     prefix = os.path.join(datadir, dataname, dataname)
 15 |     filename_graph_indic = prefix + '_graph_indicator.txt'
 16 |     # index of graphs that a given node belongs to
 17 |     graph_indic={}
 18 |     with open(filename_graph_indic) as f:
 19 |         i=1
 20 |         for line in f:
 21 |             line=line.strip("\n")
 22 |             graph_indic[i]=int(line)
 23 |             i+=1
 24 | 
 25 |     filename_nodes=prefix + '_node_labels.txt'
 26 |     node_labels=[]
 27 |     try:
 28 |         with open(filename_nodes) as f:
 29 |             for line in f:
 30 |                 line=line.strip("\n")
 31 |                 node_labels+=[int(line) - 1]
 32 |         num_unique_node_labels = max(node_labels) + 1
 33 |     except IOError:
 34 |         print('No node labels')
 35 |  
 36 |     filename_node_attrs=prefix + '_node_attributes.txt'
 37 |     node_attrs=[]
 38 |     try:
 39 |         with open(filename_node_attrs) as f:
 40 |             for line in f:
 41 |                 line = line.strip("\s\n")
 42 |                 attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
 43 |                 node_attrs.append(np.array(attrs))
 44 |     except IOError:
 45 |         print('No node attributes')
 46 |        
 47 |     label_has_zero = False
 48 |     filename_graphs=prefix + '_graph_labels.txt'
 49 |     graph_labels=[]
 50 |     with open(filename_graphs) as f:
 51 |         for line in f:
 52 |             line=line.strip("\n")
 53 |             val = int(line)
 54 |             if val == 0:
 55 |                 label_has_zero = True
 56 |             graph_labels.append(val - 1)
 57 |     graph_labels = np.array(graph_labels)
 58 |     if label_has_zero:
 59 |         graph_labels += 1
 60 |     
 61 |     filename_adj=prefix + '_A.txt'
 62 |     adj_list={i:[] for i in range(1,len(graph_labels)+1)}    
 63 |     index_graph={i:[] for i in range(1,len(graph_labels)+1)}
 64 |     num_edges = 0
 65 |     with open(filename_adj) as f:
 66 |         for line in f:
 67 |             line=line.strip("\n").split(",")
 68 |             e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
 69 |             adj_list[graph_indic[e0]].append((e0,e1))
 70 |             index_graph[graph_indic[e0]]+=[e0,e1]
 71 |             num_edges += 1
 72 |     for k in index_graph.keys():
 73 |         index_graph[k]=[u-1 for u in set(index_graph[k])]
 74 | 
 75 |     graphs=[]
 76 |     for i in range(1,1+len(adj_list)):
 77 |         # indexed from 1 here
 78 |         G=nx.from_edgelist(adj_list[i])
 79 |         if max_nodes is not None and G.number_of_nodes() > max_nodes:
 80 |             continue
 81 |       
 82 |         # add features and labels
 83 |         G.graph['label'] = graph_labels[i-1]
 84 |         for u in G.nodes():
 85 |             if len(node_labels) > 0:
 86 |                 node_label_one_hot = [0] * num_unique_node_labels
 87 |                 node_label = node_labels[u-1]
 88 |                 node_label_one_hot[node_label] = 1
 89 |                 G.node[u]['label'] = node_label_one_hot
 90 |             if len(node_attrs) > 0:
 91 |                 G.node[u]['feat'] = node_attrs[u-1]
 92 |         if len(node_attrs) > 0:
 93 |             G.graph['feat_dim'] = node_attrs[0].shape[0]
 94 | 
 95 |         # relabeling
 96 |         mapping={}
 97 |         it=0
 98 |         if float(nx.__version__)<2.0:
 99 |             for n in G.nodes():
100 |                 mapping[n]=it
101 |                 it+=1
102 |         else:
103 |             for n in G.nodes:
104 |                 mapping[n]=it
105 |                 it+=1
106 |             
107 |         # indexed from 0
108 |         graphs.append(nx.relabel_nodes(G, mapping))
109 |     return graphs
110 | 
111 | 


--------------------------------------------------------------------------------
/diffpool/partition.py:
--------------------------------------------------------------------------------
 1 | import networkx
 2 | import numpy as np
 3 | 
 4 | def partition(embeddings):
 5 |     ''' Compute a partition of embeddings, where each partition is pooled together.
 6 |     Args:
 7 |         embeddings: N-by-D matrix, where N is the number of node embeddings, and D
 8 |             is the embedding dimension.
 9 |     '''
10 |     dist = np.dot(embeddings)
11 |     
12 | def kruskal(adj):
13 |     # initialize MST
14 |     MST = set()
15 |     edges = set()
16 |     num_nodes = adj.shape[0]
17 |     # collect all edges from graph G
18 |     for j in range(num_nodes):
19 |         for k in range(num_nodes):
20 |             if G.graph[j][k] != 0 and (k, j) not in edges:
21 |                 edges.add((j, k))
22 |     # sort all edges in graph G by weights from smallest to largest
23 |     sorted_edges = sorted(edges, key=lambda e:G.graph[e[0]][e[1]])
24 |     uf = UF(G.vertices)
25 |     for e in sorted_edges:
26 |         u, v = e
27 |         # if u, v already connected, abort this edge
28 |         if uf.connected(u, v):
29 |             continue
30 |         # if not, connect them and add this edge to the MST
31 |         uf.union(u, v)
32 |         MST.add(e)
33 |     return MST
34 | 
35 | 


--------------------------------------------------------------------------------
/diffpool/set2set.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn import init
 4 | import torch.nn.functional as F
 5 | 
 6 | import numpy as np
 7 | 
 8 | class Set2Set(nn.Module):
 9 |     def __init__(self, input_dim, hidden_dim, act_fn=nn.ReLU, num_layers=1):
10 |         '''
11 |         Args:
12 |             input_dim: input dim of Set2Set. 
13 |             hidden_dim: the dim of set representation, which is also the INPUT dimension of 
14 |                 the LSTM in Set2Set. 
15 |                 This is a concatenation of weighted sum of embedding (dim input_dim), and the LSTM
16 |                 hidden/output (dim: self.lstm_output_dim).
17 |         '''
18 |         super(Set2Set, self).__init__()
19 |         self.input_dim = input_dim
20 |         self.hidden_dim = hidden_dim
21 |         self.num_layers = num_layers
22 |         if hidden_dim <= input_dim:
23 |             print('ERROR: Set2Set output_dim should be larger than input_dim')
24 |         # the hidden is a concatenation of weighted sum of embedding and LSTM output
25 |         self.lstm_output_dim = hidden_dim - input_dim
26 |         self.lstm = nn.LSTM(hidden_dim, input_dim, num_layers=num_layers, batch_first=True)
27 | 
28 |         # convert back to dim of input_dim
29 |         self.pred = nn.Linear(hidden_dim, input_dim)
30 |         self.act = act_fn()
31 | 
32 |     def forward(self, embedding):
33 |         '''
34 |         Args:
35 |             embedding: [batch_size x n x d] embedding matrix
36 |         Returns:
37 |             aggregated: [batch_size x d] vector representation of all embeddings
38 |         '''
39 |         batch_size = embedding.size()[0]
40 |         n = embedding.size()[1]
41 | 
42 |         hidden = (torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda(),
43 |                   torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda())
44 | 
45 |         q_star = torch.zeros(batch_size, 1, self.hidden_dim).cuda()
46 |         for i in range(n):
47 |             # q: batch_size x 1 x input_dim
48 |             q, hidden = self.lstm(q_star, hidden)
49 |             # e: batch_size x n x 1
50 |             e = embedding @ torch.transpose(q, 1, 2)
51 |             a = nn.Softmax(dim=1)(e)
52 |             r = torch.sum(a * embedding, dim=1, keepdim=True)
53 |             q_star = torch.cat((q, r), dim=2)
54 |         q_star = torch.squeeze(q_star, dim=1)
55 |         out = self.act(self.pred(q_star))
56 | 
57 |         return out
58 | 


--------------------------------------------------------------------------------
/diffpool/test.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from glob import glob
 3 | import numpy as np
 4 | import sys
 5 | """
 6 | files = glob(f'{sys.argv[1]}/*/*')
 7 | 
 8 | for f in files:
 9 |     print(f)
10 |     accs100 = []
11 |     accs200 = []
12 |     accs500 = []
13 |     accs1000 = []
14 |     cnt = 0
15 |     for event in tf.train.summary_iterator(f):
16 | 
17 |         for value in event.summary.value:
18 | 
19 |             if value.tag == 'acc/val_acc':
20 |                 val_acc = value.simple_value
21 |             else:
22 |                 continue
23 |             
24 | 
25 |             cnt += 1
26 |             if (cnt-100) % 1000 == 0:
27 |                 accs100.append(val_acc)
28 | 
29 |     print(cnt)
30 | 
31 |     print(np.mean(accs100), np.std(accs100))
32 | 
33 |      # print(value.tag)
34 |      # if value.HasField('simple_value'):
35 |   # print(value.simple_value)
36 | """
37 | 
38 | if __name__ == '__main__':
39 |     # DS = sys.argv[1]
40 | 
41 |     df = pd.read_csv('log_')
42 |     # df = df[df.DS == DS]
43 |     gcs = df.gc.unique()
44 |     types = df.method.unique()
45 |     for gc in gcs:
46 |         for tpe in types:
47 |             tmpdf = df[(df.gc == gc) & (df.method == tpe)]
48 |             for i in range(10, 110, 10):
49 |                 print(gc, tpe, i, tmpdf[str(i)+'-mean'].mean(), tmpdf[str(i) + '-mean'].std())
50 | 


--------------------------------------------------------------------------------
/graph2vec_tf/README.md:
--------------------------------------------------------------------------------
 1 | # graph2vec
 2 | 
 3 | This repository contains the "tensorflow" implementation of our paper "graph2vec: Learning distributed representations of graphs". 
 4 | The paper could be found at: https://arxiv.org/pdf/1707.05005.pdf
 5 | 
 6 | 
 7 | #### Dependencies
 8 | This code is developed in python 2.7. It is ran and tested on Ubuntu 16.04.
 9 | It uses the following python packages:
10 | 1. tensorflow (version == 1.4.0)
11 | 2. networkx (version <= 2.0)
12 | 4. scikit-learn (+scipy, +numpy)
13 | 
14 | #####  The procedure for setting up graph2vec is as follows:
15 | 	1. git clone the repository (command: git clone https://github.com/MLDroid/graph2vec_tf.git )
16 | 	2. untar the data.tar.gz tarball
17 | 
18 | #####  The procedure for obtaining rooted graph vectors using graph2vec and performing graph classification is as follows:
19 | 	1. move to the folder "src" (command: cd src) (also make sure that kdd 2015 paper's (Deep Graph Kernels) datasets are available in '../data/kdd_datasets/dir_graphs/')
20 | 	2. run main.py --corpus <dataset of graph files> --class_labels_file_name <file containing class labels of graphs to be used for graph classification> file to:
21 | 		*Generate the weisfeiler-lehman kernel's rooted subgraphs from all the graphs 
22 | 		*Train skipgram model to learn graph embeddings. The same will be dumped in ../embeddings/ folder
23 | 		*Perform graph classification using the graph embeddings generated in the above step
24 | 	3. example: 
25 | 		*python main.py --corpus ../data/kdd_datasets/mutag --class_labels_file_name ../data/kdd_datasets/mutag.Labels 
26 | 		*python main.py --corpus ../data/kdd_datasets/proteins --class_labels_file_name ../data/kdd_datasets/proteins.Labels --batch_size 16 --embedding_size 128 --num_negsample 5
27 | 	
28 | 
29 | #### Other command line args:
30 | 	optional arguments:
31 | 		-h, --help            show this help message and exit
32 | 		-c CORPUS, --corpus CORPUS
33 | 				        Path to directory containing graph files to be used
34 | 				        for graph classification or clustering
35 | 		-l CLASS_LABELS_FILE_NAME, --class_labels_file_name CLASS_LABELS_FILE_NAME
36 | 				        File name containg the name of the sample and the
37 | 				        class labels
38 | 		-o OUTPUT_DIR, --output_dir OUTPUT_DIR
39 | 				        Path to directory for storing output embeddings
40 | 		-b BATCH_SIZE, --batch_size BATCH_SIZE
41 | 				        Number of samples per training batch
42 | 		-e EPOCHS, --epochs EPOCHS
43 | 				        Number of iterations the whole dataset of graphs is
44 | 				        traversed
45 | 		-d EMBEDDING_SIZE, --embedding_size EMBEDDING_SIZE
46 | 				        Intended graph embedding size to be learnt
47 | 		-neg NUM_NEGSAMPLE, --num_negsample NUM_NEGSAMPLE
48 | 				        Number of negative samples to be used for training
49 | 		-lr LEARNING_RATE, --learning_rate LEARNING_RATE
50 | 				        Learning rate to optimize the loss function
51 | 
52 | 		--wlk_h WLK_H         Height of WL kernel (i.e., degree of rooted subgraph
53 | 				        features to be considered for representation learning)
54 | 		-lf LABEL_FILED_NAME, --label_filed_name LABEL_FILED_NAME
55 | 				        Label field to be used for coloring nodes in graphs
56 | 				        using WL kenrel
57 | 
58 | ## Contact ##
59 | In case of queries, please email: annamala002@e.ntu.edu.sg OR XZHANG048@e.ntu.edu.sg
60 | 
61 | #### Reference 
62 | 
63 | 	Please consider citing the follow paper when you use this code.
64 | 	@article{narayanangraph2vec,
65 | 	  title={graph2vec: Learning distributed representations of graphs},
66 | 	  author={Narayanan, Annamalai and Chandramohan, Mahinthan and Venkatesan, Rajasekar and Chen, Lihui and Liu, Yang}
67 | 	}
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 


--------------------------------------------------------------------------------
/graph2vec_tf/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sunfanyunn/graph-classification/0d9b60102b6456cdc0607b43c8852d860b2f53c0/graph2vec_tf/__init__.py


--------------------------------------------------------------------------------
/graph2vec_tf/classify.py:
--------------------------------------------------------------------------------
 1 | import time,json
 2 | from utils import get_files
 3 | from sklearn.feature_extraction.text import CountVectorizer
 4 | from sklearn.preprocessing import Normalizer
 5 | from sklearn.model_selection import train_test_split
 6 | from sklearn.metrics import accuracy_score, classification_report
 7 | from sklearn.svm import SVC,LinearSVC
 8 | from random import randint
 9 | import numpy as np
10 | import logging
11 | from sklearn.model_selection import GridSearchCV
12 | 
13 | from utils import get_class_labels
14 | 
15 | import os
16 | logger = logging.getLogger()
17 | logger.setLevel("INFO")
18 | 
19 | def subgraph2vec_tokenizer (s):
20 |     '''
21 |     Tokenize the string from subgraph2vec sentence (i.e. <target> <context1> <context2> ...). Just target is to be used
22 |     and context strings to be ignored.
23 |     :param s: context of graph2vec file.
24 |     :return: List of targets from graph2vec file.
25 |     '''
26 |     return [line.split(' ')[0] for line in s.split('\n')]
27 | 
28 | 
29 | def linear_svm_classify (X_train, X_test, Y_train, Y_test):
30 |     '''
31 |     Classifier with graph embeddings
32 |     :param X_train: training feature vectors
33 |     :param X_test: testing feature vectors
34 |     :param Y_train: training set labels
35 |     :param Y_test: test set labels
36 |     :return: None
37 |     '''
38 |     params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
39 |     classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy',verbose=0)
40 |     classifier.fit(X_train,Y_train)
41 |     Y_pred = classifier.predict(X_test)
42 |     acc = accuracy_score(Y_test, Y_pred)
43 |     return acc
44 | 
45 | 
46 | def perform_classification (corpus_dir, extn, embeddings, class_labels_fname):
47 |     '''
48 |     Perform classification from
49 |     :param corpus_dir: folder containing subgraph2vec sentence files
50 |     :param extn: extension of subgraph2vec sentence files
51 |     :param embedding_fname: file containing subgraph vectors in word2vec format (refer Mikolov et al (2013) code)
52 |     :param class_labels_fname: files containing labels of each graph
53 |     :return: None
54 |     '''
55 | 
56 |     wlk_files = get_files(corpus_dir, extn)
57 | 
58 |     Y = np.array(get_class_labels(wlk_files, class_labels_fname))
59 |     # logging.info('Y (label) matrix shape: {}'.format(Y.shape))
60 | 
61 |     seed = randint(0, 1000)
62 | 
63 |     # with open(embedding_fname,'r') as fh:
64 |         # graph_embedding_dict = json.load(fh)
65 | 
66 |     wlk_files = [os.path.basename(x) for x in wlk_files]
67 |     # graph_embedding_dict = {os.path.basename(x):y for x, y in graph_embedding_dict.iteritems()}
68 | 
69 |     # X = np.array([graph_embedding_dict[fname] for fname in wlk_files])
70 |     X = embeddings
71 | 
72 |     from sklearn.model_selection import StratifiedKFold
73 |     kf = StratifiedKFold(10, shuffle=True, random_state=None)
74 |     accs = []
75 |     for train_index, test_index in kf.split(X, Y):
76 | 
77 |         X_train, X_test = X[train_index], X[test_index]
78 |         Y_train, Y_test = Y[train_index], Y[test_index]
79 |         # logging.info('Train and Test matrix shapes: {}, {}, {}, {} '.format(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape))
80 | 
81 |         acc = linear_svm_classify(X_train, X_test, Y_train, Y_test)
82 |         accs.append(acc)
83 |     print(np.mean(accs), np.std(accs))
84 |     return np.mean(accs)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     pass
89 | 


--------------------------------------------------------------------------------
/graph2vec_tf/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | # run preprocess
 4 | #python preprocess.py IMDB-BINARY
 5 | #python preprocess.py IMDB-MULTI
 6 | #python preprocess.py COLLAB
 7 | #python preprocess.py DD
 8 | #python preprocess.py REDDIT-BINARY
 9 | #python preprocess.py REDDIT-MULTI-5K
10 | 
11 | for i in 1 2 3 4 5 
12 | do
13 |   for DS in 'MUTAG' 'PTC_MR' 'PROTEINS_full' 'IMDB-BINARY' 'IMDB-MULTI' 'REDDIT-BINARY' 'REDDIT-MULTI-5K'
14 |   do
15 |   python3 preprocess.py $DS
16 |   main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.001
17 |   main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.01
18 |   main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.1
19 |   main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.5
20 |   done
21 | done
22 | 


--------------------------------------------------------------------------------
/graph2vec_tf/main.py:
--------------------------------------------------------------------------------
  1 | import argparse,os,logging,psutil,time
  2 | from joblib import Parallel,delayed
  3 | 
  4 | from utils import get_files
  5 | from train_utils import train_skipgram
  6 | from classify import perform_classification
  7 | from make_graph2vec_corpus import *
  8 | from time import time
  9 | 
 10 | logger = logging.getLogger()
 11 | logger.setLevel("INFO")
 12 | 
 13 | 
 14 | def main(args):
 15 |     '''
 16 |     :param args: arguments for
 17 |     1. training the skigram model for learning subgraph representations
 18 |     2. construct the deep WL kernel using the learnt subgraph representations
 19 |     3. performing graph classification using  the WL and deep WL kernel
 20 |     :return: None
 21 |     '''
 22 |     corpus_dir = args.corpus
 23 |     output_dir = args.output_dir
 24 |     batch_size = args.batch_size
 25 |     epochs = args.epochs
 26 |     embedding_size = args.embedding_size
 27 |     num_negsample = args.num_negsample
 28 |     learning_rate = args.learning_rate
 29 |     wlk_h = args.wlk_h
 30 |     label_filed_name = args.label_filed_name
 31 |     class_labels_fname = args.class_labels_file_name
 32 | 
 33 |     wl_extn = 'g2v'+str(wlk_h)
 34 |     assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir)
 35 |     # assert os.path.exists(output_dir), "Dir {} does not exist".format(output_dir)
 36 | 
 37 |     graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0)
 38 |     logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir))
 39 | 
 40 | 
 41 |     t0 = time()
 42 |     wlk_relabel_and_dump_memory_version(graph_files, max_h=wlk_h, node_label_attr_name=label_filed_name)
 43 |     logging.info('dumped sg2vec sentences in {} sec.'.format(time() - t0))
 44 | 
 45 |     t0 = time()
 46 |     embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample,
 47 |                                      epochs, batch_size, output_dir, class_labels_fname)
 48 |     # logging.info('Trained the skipgram model in {} sec.'.format(round(time()-t0, 2)))
 49 | 
 50 |     # embedding_fname = '../embeddings/_dims_512_epochs_2_lr_0.5_embeddings.txt'
 51 |     # perform_classification (corpus_dir, wl_extn, embedding_fname, class_labels_fname)
 52 | 
 53 | 
 54 | 
 55 | 
 56 | def parse_args():
 57 |     '''
 58 |     Usual pythonic way of parsing command line arguments
 59 |     :return: all command line arguments read
 60 |     '''
 61 |     args = argparse.ArgumentParser("graph2vec")
 62 |     args.add_argument("-c","--corpus", 
 63 |                       help="Path to directory containing graph files to be used for graph classification or clustering")
 64 | 
 65 |     args.add_argument('-l','--class_labels_file_name', 
 66 |                       help='File name containg the name of the sample and the class labels')
 67 | 
 68 |     args.add_argument('-o', "--output_dir", default = "../embeddings",
 69 |                       help="Path to directory for storing output embeddings")
 70 | 
 71 |     args.add_argument('-b',"--batch_size", default=128, type=int,
 72 |                       help="Number of samples per training batch")
 73 | 
 74 |     args.add_argument('-e',"--epochs", default=1000, type=int,
 75 |                       help="Number of iterations the whole dataset of graphs is traversed")
 76 | 
 77 |     args.add_argument('-d',"--embedding_size", default=1024, type=int,
 78 |                       help="Intended graph embedding size to be learnt")
 79 | 
 80 |     args.add_argument('-neg', "--num_negsample", default=10, type=int,
 81 |                       help="Number of negative samples to be used for training")
 82 | 
 83 |     args.add_argument('-lr', "--learning_rate", default=0.3, type=float,
 84 |                       help="Learning rate to optimize the loss function")
 85 | 
 86 |     args.add_argument("--wlk_h", default=3, type=int, help="Height of WL kernel (i.e., degree of rooted subgraph "
 87 |                                                            "features to be considered for representation learning)")
 88 | 
 89 |     args.add_argument('-lf', '--label_filed_name', default='Label', help='Label field to be used '
 90 |                                                                          'for coloring nodes in graphs using WL kenrel')
 91 | 
 92 |     return args.parse_args()
 93 | 
 94 | 
 95 | 
 96 | if __name__=="__main__":
 97 |     args = parse_args()
 98 |     main(args)
 99 | 
100 | 


--------------------------------------------------------------------------------
/graph2vec_tf/preprocess.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | from glob import glob
 4 | from tqdm import tqdm
 5 | import os
 6 | import subprocess
 7 | from sklearn.model_selection import cross_val_score
 8 | from sklearn.model_selection import GridSearchCV, StratifiedKFold
 9 | from sklearn.svm import SVC, LinearSVC
10 | from sklearn.ensemble import RandomForestClassifier
11 | from sklearn import preprocessing
12 | from sklearn.metrics import accuracy_score
13 | 
14 | def load_data(ds_name, use_node_labels):
15 |     node2graph = {}
16 |     Gs = []
17 | 
18 |     with open("../data/%s/%s_graph_indicator.txt"%(ds_name,ds_name), "r") as f:
19 |             c = 1
20 |             for line in f:
21 |                     node2graph[c] = int(line[:-1])
22 |                     if not node2graph[c] == len(Gs):
23 |                             Gs.append(nx.Graph())
24 |                     Gs[-1].add_node(c)
25 |                     c += 1
26 | 
27 |     with open("../data/%s/%s_A.txt"%(ds_name,ds_name), "r") as f:
28 |             for line in f:
29 |                     edge = line[:-1].split(",")
30 |                     edge[1] = edge[1].replace(" ", "")
31 |                     Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1]))
32 | 
33 |     if use_node_labels:
34 |             with open("../data/%s/%s_node_labels.txt"%(ds_name,ds_name), "r") as f:
35 |                     c = 1
36 |                     for line in f:
37 |                             node_label = int(line[:-1])
38 |                             Gs[node2graph[c]-1].node[c]['label'] = node_label
39 |                             c += 1
40 | 
41 |     labels = []
42 |     with open("../data/%s/%s_graph_labels.txt"%(ds_name,ds_name), "r") as f:
43 |             for line in f:
44 |                     labels.append(int(line[:-1]))
45 | 
46 |     labels  = np.array(labels, dtype = np.float)
47 |     return Gs, labels
48 | 
49 | def preprocess(DS):
50 |     Gs, labels = load_data(DS, False)
51 |     print('number of graphs', len(Gs))
52 | 
53 |     datadir = '../data/{}'.format(DS)
54 |     try:
55 |         os.mkdir(datadir)
56 |     except Exception as e:
57 |         print(e)
58 | 
59 |     assert len(Gs) == len(labels)
60 |     f = open('../data/{}.Labels'.format(DS), 'w')
61 |     for graphidx, G in tqdm(enumerate(Gs)):
62 |         nx.write_gexf(G, '{}/{}.gexf'.format(datadir, graphidx))
63 |         f.write('{}.gexf {}\n'.format(graphidx, int(labels[graphidx])))
64 |     f.close()
65 | 
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     import sys
70 |     preprocess(sys.argv[1])
71 |     # preprocess('ENZYMES')
72 |     # preprocess('DD')
73 |     # preprocess('REDDIT-BINARY')
74 |     # preprocess('COLLAB')
75 |     # preprocess('REDDIT-MULTI-5K')
76 |     # preprocess('IMDB-BINARY')
77 |     # preprocess('IMDB-MULTI')
78 | 


--------------------------------------------------------------------------------
/graph2vec_tf/test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | df = pd.read_csv('log')
3 | DSs = df.DS.unique()
4 | for DS in DSs:
5 |     tmpdf = df[df.DS == DS]
6 |     for i in [500, 900, 1000]:
7 |         print(DS, i, tmpdf[str(i)].mean(), tmpdf[str(i)].std())
8 | 
9 | 


--------------------------------------------------------------------------------
/graph2vec_tf/train_utils.py:
--------------------------------------------------------------------------------
 1 | import os,logging
 2 | import numpy as np
 3 | from corpus_parser import Corpus
 4 | from utils import save_graph_embeddings
 5 | from skipgram import skipgram
 6 | 
 7 | 
 8 | def train_skipgram (corpus_dir, extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir, class_labels_fname):
 9 |     '''
10 | 
11 |     :param corpus_dir: folder containing WL kernel relabeled files. All the files in this folder will be relabled
12 |     according to WL relabeling strategy and the format of each line in these folders shall be: <target> <context 1> <context 2>....
13 |     :param extn: Extension of the WL relabled file
14 |     :param learning_rate: learning rate for the skipgram model (will involve a linear decay)
15 |     :param embedding_size: number of dimensions to be used for learning subgraph representations
16 |     :param num_negsample: number of negative samples to be used by the skipgram model
17 |     :param epochs: number of iterations the dataset is traversed by the skipgram model
18 |     :param batch_size: size of each batch for the skipgram model
19 |     :param output_dir: the folder where embedding file will be stored
20 |     :return: name of the file that contains the subgraph embeddings (in word2vec format proposed by Mikolov et al (2013))
21 |     '''
22 | 
23 |     op_fname = '_'.join([os.path.basename(corpus_dir), 'dims', str(embedding_size), 'epochs',
24 |                          str(epochs),'lr',str(learning_rate),'embeddings.txt'])
25 |     op_fname = os.path.join(output_dir, op_fname)
26 |     # if os.path.isfile(op_fname):
27 |         # logging.info('The embedding file: {} is already present, hence NOT training skipgram model '
28 |                      # 'for subgraph vectors'.format(op_fname))
29 |         # return op_fname
30 | 
31 |     logging.info("Initializing SKIPGRAM...")
32 |     corpus = Corpus(corpus_dir, extn = extn, max_files=0)  # just load 'max_files' files from this folder
33 |     corpus.scan_and_load_corpus()
34 | 
35 |     model_skipgram = skipgram(
36 |         corpus_dir=corpus_dir,
37 |         extn=extn,
38 |         class_labels_fname=class_labels_fname,
39 |         num_graphs=corpus.num_graphs,
40 |         num_subgraphs=corpus.num_subgraphs,
41 |         learning_rate=learning_rate,
42 |         embedding_size=embedding_size,
43 |         num_negsample=num_negsample,
44 |         num_steps=epochs,  # no. of time the training set will be iterated through
45 |         corpus=corpus,  # data set of (target,context) tuples
46 |     )
47 | 
48 |     # final_embeddings = model_skipgram.train(corpus=corpus,batch_size=batch_size)
49 |     model_skipgram.train(corpus=corpus,batch_size=batch_size)
50 | 
51 |     # logging.info('Write the matrix to a word2vec format file')
52 |     # save_graph_embeddings(corpus, final_embeddings, op_fname)
53 |     # logging.info('Completed writing the final embeddings, pls check file: {} for the same'.format(op_fname))
54 |     # return op_fname
55 | 
56 |     # perform_classification(corpus_dir, extn, embeddings, class_labels_fname)
57 | 
58 | 
59 | 
60 | if __name__ == '__main__':
61 |     pass
62 | 


--------------------------------------------------------------------------------
/graph2vec_tf/utils.py:
--------------------------------------------------------------------------------
 1 | import os,json
 2 | 
 3 | 
 4 | def get_files(dirname, extn, max_files=0):
 5 |     all_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f.endswith(extn)]
 6 |     for root, dirs, files in os.walk(dirname):
 7 |         for f in files:
 8 |             if f.endswith(extn):
 9 |                 all_files.append(os.path.join(root, f))
10 | 
11 |     all_files = list(set(all_files))
12 |     all_files.sort()
13 |     if max_files:
14 |         return all_files[:max_files]
15 |     else:
16 |         return all_files
17 | 
18 | 
19 | def save_graph_embeddings(corpus, final_embeddings, opfname):
20 |     dict_to_save = {}
21 |     for i in range(len(final_embeddings)):
22 |         graph_fname = corpus._id_to_graph_name_map[i]
23 |         graph_embedding = final_embeddings[i,:].tolist()
24 |         dict_to_save[graph_fname] = graph_embedding
25 | 
26 |     with open(opfname, 'w') as fh:
27 |         json.dump(dict_to_save,fh,indent=4)
28 | 
29 | 
30 | def get_class_labels(graph_files, class_labels_fname):
31 |     graph_to_class_label_map = {l.split()[0].split('.')[0]: int(l.split()[1].strip()) for l in open (class_labels_fname)}
32 |     labels = [graph_to_class_label_map[os.path.basename(g).split('.')[0]] for g in graph_files]
33 | 
34 |     return labels
35 | 
36 | if __name__ == '__main__':
37 |     print('nothing to do')
38 | 


--------------------------------------------------------------------------------
/kcnn/README.md:
--------------------------------------------------------------------------------
 1 | ## Kernel Graph Convolutional Neural Networks
 2 | Code for the paper [Kernel Graph Convolutional Neural Networks](https://arxiv.org/pdf/1710.10689.pdf).
 3 | 
 4 | ### Requirements
 5 | Code is written in Python 3.6 and requires:
 6 | * PyTorch 0.3
 7 | * NetworkX 1.11
 8 | * igraph 0.7
 9 | * scikit-learn 0.18
10 | 
11 | ### Datasets
12 | Use the following link to download datasets: 
13 | ```
14 | https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
15 | ```
16 | Extract the datasets into the `datasets` folder.
17 | 
18 | ### Run the model
19 | First, specify the dataset and the hyperparameters in the `main.py` file. Then, use the following command:
20 | 
21 | ```
22 | $ python main.py
23 | ```
24 | 
25 | ### Cite
26 | Please cite our paper if you use this code:
27 | ```
28 | @article{nikolentzos2017kernel,
29 |   title={Kernel Graph Convolutional Neural Networks},
30 |   author={Nikolentzos, Giannis and Meladianos, Polykarpos and Tixier, Antoine Jean-Pierre and Skianis, Konstantinos and Vazirgiannis, Michalis},
31 |   journal={arXiv preprint arXiv:1710.10689},
32 |   year={2017}
33 | }
34 | ```
35 | 
36 | -----------
37 | 
38 | Provided for academic use only
39 | 


--------------------------------------------------------------------------------
/kcnn/go.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -ex
2 | 
3 | # TEST is the virtualenv 
4 | ./TEST/bin/python3 main.py --d 512 --dataset $@
5 | 


--------------------------------------------------------------------------------
/kcnn/graph_kernels_labeled.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import numpy as np
  3 | from collections import defaultdict
  4 | import copy
  5 | 
  6 | 
  7 | def sp_kernel(g1, g2=None):
  8 | 	if g2 != None:
  9 | 		graphs = []
 10 | 		for g in g1:
 11 | 			graphs.append(g)
 12 | 		for g in g2:
 13 | 			graphs.append(g)
 14 | 	else:
 15 | 		graphs = g1
 16 | 
 17 | 	N = len(graphs)
 18 | 	all_paths = {}
 19 | 	sp_counts = {}
 20 | 	for i in range(N):
 21 | 		sp_lengths = nx.shortest_path_length(graphs[i])
 22 | 		sp_counts[i] = {}
 23 | 		nodes = graphs[i].nodes()
 24 | 		for v1 in nodes:
 25 | 		    for v2 in nodes:
 26 | 		        if v2 in sp_lengths[v1]:
 27 | 		            label = tuple(sorted([graphs[i].node[v1]['label'], graphs[i].node[v2]['label']]) + [sp_lengths[v1][v2]])
 28 | 		            if label in sp_counts[i]:
 29 | 		                sp_counts[i][label] += 1
 30 | 		            else:
 31 | 		                sp_counts[i][label] = 1
 32 | 
 33 | 		            if label not in all_paths:
 34 | 		                all_paths[label] = len(all_paths)
 35 | 
 36 | 	phi = np.zeros((N,len(all_paths)))
 37 | 
 38 | 	for i in range(N):
 39 | 		for label in sp_counts[i]:
 40 | 		    phi[i,all_paths[label]] = sp_counts[i][label]
 41 | 
 42 | 	if g2 != None:
 43 | 		K = np.dot(phi[:len(g1),:],phi[len(g1):,:].T)
 44 | 	else:
 45 | 		K = np.dot(phi,phi.T)
 46 | 
 47 | 	return K
 48 | 
 49 | 
 50 | def wl_kernel(g1, g2=None, h=6):
 51 | 	if g2 != None:
 52 | 		graphs = []
 53 | 		for g in g1:
 54 | 			graphs.append(g)
 55 | 		for g in g2:
 56 | 			graphs.append(g)
 57 | 	else:
 58 | 		graphs = g1
 59 | 	
 60 | 	labels = {}
 61 | 	label_lookup = {}
 62 | 	label_counter = 0
 63 | 
 64 | 	N = len(graphs)
 65 | 
 66 | 	orig_graph_map = {it: {i: defaultdict(lambda: 0) for i in range(N)} for it in range(-1, h)}
 67 | 
 68 | 	# initial labeling
 69 | 	ind = 0
 70 | 	for G in graphs:
 71 | 		labels[ind] = np.zeros(G.number_of_nodes(), dtype = np.int32)
 72 | 		node2index = {}
 73 | 		for node in G.nodes():
 74 | 		    node2index[node] = len(node2index)
 75 | 		    
 76 | 		for node in G.nodes():
 77 | 		    label = G.node[node]['label']
 78 | 		    if not (label in label_lookup):
 79 | 		        label_lookup[label] = len(label_lookup)
 80 | 
 81 | 		    labels[ind][node2index[node]] = label_lookup[label]
 82 | 		    orig_graph_map[-1][ind][label] = orig_graph_map[-1][ind].get(label, 0) + 1
 83 | 		
 84 | 		ind += 1
 85 | 		
 86 | 	compressed_labels = copy.deepcopy(labels)
 87 | 
 88 | 	# WL iterations
 89 | 	for it in range(h):
 90 | 		unique_labels_per_h = set()
 91 | 		label_lookup = {}
 92 | 		ind = 0
 93 | 		for G in graphs:
 94 | 		    node2index = {}
 95 | 		    for node in G.nodes():
 96 | 		        node2index[node] = len(node2index)
 97 | 		        
 98 | 		    for node in G.nodes():
 99 | 		        node_label = tuple([labels[ind][node2index[node]]])
100 | 		        neighbors = G.neighbors(node)
101 | 		        if len(neighbors) > 0:
102 | 		            neighbors_label = tuple([labels[ind][node2index[neigh]] for neigh in neighbors])
103 | 		            node_label =  str(node_label) + "-" + str(sorted(neighbors_label))
104 | 		        if not (node_label in label_lookup):
105 | 		            label_lookup[node_label] = len(label_lookup)
106 | 		            
107 | 		        compressed_labels[ind][node2index[node]] = label_lookup[node_label]
108 | 		        orig_graph_map[it][ind][node_label] = orig_graph_map[it][ind].get(node_label, 0) + 1
109 | 		        
110 | 		    ind +=1
111 | 		    
112 | 		labels = copy.deepcopy(compressed_labels)
113 | 	
114 | 	if g2 != None:
115 | 		K = np.zeros((len(g1), len(g2)))
116 | 		for it in range(-1, h):
117 | 			for i in range(len(g1)):
118 | 				for j in range(len(g2)):
119 | 				    common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][len(g1)+j].keys())
120 | 				    K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][len(g1)+j].get(k,0) for k in common_keys])
121 | 	else:
122 | 		K = np.zeros((N, N))
123 | 		for it in range(-1, h):
124 | 			for i in range(N):
125 | 				for j in range(N):
126 | 				    common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][j].keys())
127 | 				    K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][j].get(k,0) for k in common_keys])
128 | 				  	                            
129 | 	return K


--------------------------------------------------------------------------------
/kcnn/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from torch.nn.init import xavier_normal,xavier_uniform
 4 | 
 5 | # CNN Model
 6 | class CNN(nn.Module):
 7 |     def __init__(self, input_size, hidden_size, n_classes, d, n_kernels, max_n_communities):
 8 |         super(CNN, self).__init__()
 9 |         self.max_n_communities = max_n_communities
10 |         self.conv = nn.Conv3d(1, input_size, (1, 1, d), padding=0)
11 |         self.fc1 = nn.Linear(input_size*n_kernels, hidden_size)
12 |         self.fc2 = nn.Linear(hidden_size, n_classes)
13 |         self.init_weights()
14 | 
15 |     def init_weights(self):
16 |         xavier_uniform(self.conv.weight.data)
17 |         xavier_normal(self.fc1.weight.data)
18 |         xavier_normal(self.fc2.weight.data)
19 | 
20 |     def forward(self, x_in):
21 |         out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_n_communities,1)))
22 |         out = out.view(out.size(0), -1)
23 |         out = F.relu(self.fc1(out))
24 |         out = F.dropout(out, training=self.training)
25 |         out = self.fc2(out)
26 |         return F.log_softmax(out, dim=1)
27 |            


--------------------------------------------------------------------------------
/kcnn/nystrom.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.sparse as sp
 3 | from scipy.linalg import svd
 4 | from sklearn.utils import check_random_state
 5 | 
 6 | class Nystrom():
 7 |     
 8 | 	def __init__(self, kernel, kernel_params=None, n_components=100, random_state=None):
 9 | 		self.kernel = kernel
10 | 		self.kernel_params = kernel_params
11 | 		self.n_components = n_components
12 | 		self.random_state = random_state
13 | 
14 | 	def fit(self, graphs, y=None):
15 | 		rnd = check_random_state(self.random_state)
16 | 		n_samples = len(graphs)
17 | 
18 | 		# get basis vectors
19 | 		if self.n_components > n_samples:
20 | 			n_components = n_samples
21 | 		else:
22 | 			n_components = self.n_components
23 | 		n_components = min(n_samples, n_components)
24 | 		inds = rnd.permutation(n_samples)
25 | 		basis_inds = inds[:n_components]
26 | 		basis = []
27 | 		for ind in basis_inds:
28 | 			basis.append(graphs[ind])
29 | 
30 | 		basis_kernel = self.kernel(basis, basis, **self._get_kernel_params())
31 | 
32 | 		# sqrt of kernel matrix on basis vectors
33 | 		U, S, V = svd(basis_kernel)
34 | 		S = np.maximum(S, 1e-12)
35 | 		self.normalization_ = np.dot(U * 1. / np.sqrt(S), V)
36 | 		self.components_ = basis
37 | 		self.component_indices_ = inds
38 | 		return self
39 | 
40 | 	def transform(self, graphs):
41 | 		embedded = self.kernel(graphs, self.components_, **self._get_kernel_params())
42 | 		return np.dot(embedded, self.normalization_.T)
43 | 
44 | 	def _get_kernel_params(self):
45 | 		params = self.kernel_params
46 | 		if params is None:
47 | 		    params = {}
48 | 		    
49 | 		return params
50 | 
51 | 


--------------------------------------------------------------------------------
/kernel_methods/README.md:
--------------------------------------------------------------------------------
1 | # Graph Kernels
2 | 
3 | please install Grakel
4 | 


--------------------------------------------------------------------------------
/kernel_methods/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | python3 main.py REDDIT-BINARY walk
 4 | python3 main.py REDDIT-BINARY shortest
 5 | python3 main.py REDDIT-MULTI-5K wl
 6 | python3 main.py REDDIT-MULTI-5K shortest
 7 | python3 main.py REDDIT-MULTI-5K walk
 8 | python3 main.py IMDB-BINARY wl
 9 | python3 main.py IMDB-MULTI wl
10 | python3 main.py IMDB-BINARY shortest
11 | python3 main.py IMDB-MULTI shortest
12 | python3 main.py IMDB-BINARY walk
13 | python3 main.py IMDB-MULTI walk
14 | python3 main.py REDDIT-MULTI-5K shortest
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/kernel_methods/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import networkx as nx
  4 | import pickle
  5 | import json
  6 | from glob import glob
  7 | import graph_tool
  8 | from graph_tool import load_graph
  9 | from tqdm import tqdm
 10 | import operator
 11 | 
 12 | 
 13 | def read_graphfile(datadir, dataname, max_nodes=None):
 14 |     ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets
 15 |         graph index starts with 1 in file
 16 | 
 17 |     Returns:
 18 |         List of networkx objects with graph and node labels
 19 |     '''
 20 |     prefix = os.path.join(datadir, dataname, dataname)
 21 |     filename_graph_indic = prefix + '_graph_indicator.txt'
 22 |     # index of graphs that a given node belongs to
 23 |     graph_indic={}
 24 |     with open(filename_graph_indic) as f:
 25 |         i=1
 26 |         for line in f:
 27 |             line=line.strip("\n")
 28 |             graph_indic[i]=int(line)
 29 |             i+=1
 30 | 
 31 |     filename_nodes=prefix + '_node_labels.txt'
 32 |     node_labels=[]
 33 |     try:
 34 |         with open(filename_nodes) as f:
 35 |             for line in f:
 36 |                 line=line.strip("\n")
 37 |                 node_labels+=[int(line)]
 38 |         # node_labels = LabelEncoder().fit_transform(node_labels)
 39 |     except IOError:
 40 |         print('No node labels')
 41 | 
 42 |     filename_node_attrs=prefix + '_node_attributes.txt'
 43 |     node_attrs=[]
 44 |     try:
 45 |         with open(filename_node_attrs) as f:
 46 |             for line in f:
 47 |                 line = line.strip("\s\n")
 48 |                 attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == '']
 49 |                 node_attrs.append(np.array(attrs))
 50 |     except IOError:
 51 |         print('No node attributes')
 52 |        
 53 |     label_has_zero = False
 54 |     filename_graphs=prefix + '_graph_labels.txt'
 55 |     graph_labels=[]
 56 |     with open(filename_graphs) as f:
 57 |         for line in f:
 58 |             line=line.strip("\n")
 59 |             val = int(line)
 60 |             if val == 0:
 61 |                 label_has_zero = True
 62 |             graph_labels.append(val - 1)
 63 |     graph_labels = np.array(graph_labels)
 64 |     if label_has_zero:
 65 |         graph_labels += 1
 66 |     
 67 |     filename_adj=prefix + '_A.txt'
 68 |     adj_list={i:[] for i in range(1,len(graph_labels)+1)}    
 69 |     # index_graph={i:[] for i in range(1,len(graph_labels)+1)}
 70 |     num_edges = 0
 71 |     with open(filename_adj) as f:
 72 |         for line in f:
 73 |             line=line.strip("\n").split(",")
 74 |             e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" ")))
 75 |             adj_list[graph_indic[e0]].append((e0,e1))
 76 |             # index_graph[graph_indic[e0]]+=[e0,e1]
 77 |             num_edges += 1
 78 |     # for k in index_graph.keys():
 79 |         # index_graph[k]=[u-1 for u in set(index_graph[k])]
 80 | 
 81 | 
 82 |     graphs=[]
 83 |     for i in range(1,1+len(adj_list)):
 84 |         # indexed from 1 here
 85 |         G=nx.from_edgelist(adj_list[i])
 86 |         graphs.append(G)
 87 |       
 88 |         # add features and labels
 89 |     for nodeid, nl in enumerate(node_labels):
 90 |         nodeid += 1
 91 |         graphs[graph_indic[nodeid]-1].add_node(nodeid)
 92 |         # graphs[graph_indic[nodeid]-1][nodeid]['label'] = nl
 93 | 
 94 |     for idx, G in enumerate(graphs):
 95 |         # no graph labels needed
 96 |         G.graph['label'] = graph_labels[idx]
 97 |         for u in G.nodes():
 98 |             if len(node_labels) > 0:
 99 |                 G.node[u]['label'] = node_labels[u-1]
100 |             if len(node_attrs) > 0:
101 |                 G.node[u]['feat'] = node_attrs[u-1]
102 | 
103 |         graphs[idx] = G
104 | 
105 |     # relabeling
106 |     for idx, G in enumerate(graphs):
107 |         mapping={}
108 |         it=0
109 |         if float(nx.__version__)<2.0:
110 |             for n in G.nodes():
111 |                 mapping[n]=it
112 |                 it+=1
113 |         else:
114 |             for n in G.nodes:
115 |                 mapping[n]=it
116 |                 it+=1
117 |             
118 |         # indexed from 0
119 |         G = nx.relabel_nodes(G, mapping)
120 | 
121 |         graphs[idx] = G
122 | 
123 |     return graphs, graph_labels
124 | 


--------------------------------------------------------------------------------
/sub2vec/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | for i in 1 2 3 4 5
 3 | do
 4 |   for DS in 'IMDB-BINARY' 'IMDB-MULTI'
 5 |   do
 6 |    python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property n
 7 |    python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property s 
 8 |   done
 9 | done
10 | 


--------------------------------------------------------------------------------
/sub2vec/preprocess.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import networkx as nx
 3 | from glob import glob
 4 | import os
 5 | import subprocess
 6 | from sklearn.model_selection import cross_val_score
 7 | from sklearn.model_selection import GridSearchCV, StratifiedKFold
 8 | from sklearn.svm import SVC, LinearSVC
 9 | from sklearn.linear_model import LogisticRegression
10 | from sklearn.ensemble import RandomForestClassifier
11 | from sklearn import preprocessing
12 | from sklearn.metrics import accuracy_score
13 | 
14 | def load_data(dir_name, use_node_labels):
15 |     node2graph = {}
16 |     Gs = []
17 | 
18 |     ds_name = os.path.basename(dir_name)
19 |     with open("%s/%s_graph_indicator.txt"%(dir_name,ds_name), "r") as f:
20 |             c = 1
21 |             for line in f:
22 |                     node2graph[c] = int(line[:-1])
23 |                     if not node2graph[c] == len(Gs):
24 |                             Gs.append(nx.Graph())
25 |                     Gs[-1].add_node(c)
26 |                     c += 1
27 | 
28 |     with open("%s/%s_A.txt"%(dir_name,ds_name), "r") as f:
29 |             for line in f:
30 |                     edge = line[:-1].split(",")
31 |                     edge[1] = edge[1].replace(" ", "")
32 |                     Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1]))
33 | 
34 |     if use_node_labels:
35 |             with open("%s/%s_node_labels.txt"%(dir_name,ds_name), "r") as f:
36 |                     c = 1
37 |                     for line in f:
38 |                             node_label = int(line[:-1])
39 |                             Gs[node2graph[c]-1].node[c]['label'] = node_label
40 |                             c += 1
41 | 
42 |             # for idx, g in enumerate(Gs):
43 |                     # for n in g.nodes():
44 |                             # _ = (g.node[n]['label'])
45 | 
46 |     labels = []
47 |     with open("%s/%s_graph_labels.txt"%(dir_name,ds_name), "r") as f:
48 |             for line in f:
49 |                     labels.append(int(line[:-1]))
50 | 
51 |     labels  = np.array(labels, dtype = np.float)
52 |     return Gs, labels
53 | 
54 | def evaluate(DS, embeddings):
55 |     graphs, labels = load_data(DS, False)
56 |     x, y = np.array(embeddings), np.array(labels)
57 | 
58 |     kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None)
59 |     accs=[]
60 |     accuracies = []
61 |     for train_index, test_index in kf.split(x, y):
62 |         best_acc1 = 0
63 | 
64 |         x_train, x_test = x[train_index], x[test_index]
65 |         y_train, y_test = y[train_index], y[test_index]
66 |         params = {'C':[0.001, 0.01,0.1,1,10,100,1000]}
67 |         classifier = GridSearchCV(SVC(), params, cv=10, scoring='accuracy', verbose=0)
68 |         classifier.fit(x_train, y_train)
69 |         accuracies.append(accuracy_score(y_test, classifier.predict(x_test)))
70 | 
71 |     print('SVC', np.mean(accuracies))
72 |     return np.mean(accuracies)
73 | 
74 | def preprocess(DS, preprocessed_input):
75 |     Gs, labels = load_data(DS, False)
76 |     print('number of graphs', len(Gs))
77 |     try:
78 |         os.makedirs(preprocessed_input)
79 |     except Exception as e:
80 |         print(e)
81 | 
82 |     for i in range(len(Gs)):
83 |         with open('{}/{}'.format(preprocessed_input, i), 'w+') as f:
84 |             for e in Gs[i].edges():
85 |                 f.write('{} {}\n'.format(e[0], e[1]))
86 |     print('done preprocessing')
87 | 
88 | if __name__ == '__main__':
89 |     ds_name='MUTAG'
90 |     print('classification')
91 |     classification(ds_name, ds_name+'.vec')
92 |     classification('ENZYMES', 'output')
93 | 


--------------------------------------------------------------------------------
/sub2vec/src/graphUtils_n.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import random
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def getGraph(filename):
 7 |     G=nx.Graph()
 8 |     
 9 |     f=open(filename,'r')
10 |     lines=f.readlines()
11 |     for line in lines:
12 |         if(line[0]=='#'):
13 |             continue
14 |         else:
15 |             temp=line.split()
16 |             index1=int(temp[0])
17 |             index2=int(temp[1])
18 |             G.add_edge(index1,index2)         
19 |     f.close()
20 |     return G
21 | 
22 | 
23 | def randomWalk(G, walkSize):
24 |     walkList= []
25 |     curNode = random.choice(G.nodes())
26 | 
27 |     while(len(walkList) < walkSize):
28 |         walkList.append(curNode)
29 |         curNode = random.choice(G.neighbors(curNode))  
30 |     return walkList
31 |     
32 | def getStats(G):
33 |     stats ={}
34 |     stats['num_nodes'] = nx.number_of_nodes(G)
35 |     stats['num_edges'] = nx.number_of_edges(G)
36 |     stats['is_Connected'] = nx.is_connected(G)
37 | 
38 | 
39 | def drawGraph(G):
40 |     pos = nx.spring_layout(G)
41 |     nx.draw_networkx(G, pos)
42 |     plt.savefig("graph.pdf")
43 |     plt.show()
44 | 


--------------------------------------------------------------------------------
/sub2vec/src/graphUtils_s.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import random
 3 | import matplotlib.pyplot as plt
 4 | 
 5 | 
 6 | def writeGraph(filename, G):
 7 |     
 8 |     file = open(filename, 'w')
 9 |     for edge in G.edges():
10 |         node1 = str(G.node[edge[0]]['label'])
11 |         node2 = str(G.node[edge[1]]['label'])
12 |         file.write(node1+'\t'+node2+'\n')
13 |     file.close()
14 | 
15 | 
16 | def getGraph(filename):
17 |     G=nx.Graph()
18 |     mode = 0
19 |     f=open(filename,'r')
20 |     lines=f.readlines()
21 |     labels = {}
22 |     for line in lines:
23 |         temp=line.split()
24 |         index1=int(temp[0])
25 |         index2=int(temp[1])
26 |         G.add_edge(index1,index2)         
27 |     f.close()
28 |     nx.set_node_attributes(G, 'label', labels)
29 |     return G
30 | 
31 | 
32 | def randomWalk(G, walkSize):
33 |     walkList= []
34 |     curNode = random.choice(G.nodes())
35 | 
36 |     while(len(walkList) < walkSize):
37 |         walkList.append(G.node[curNode]['label'])
38 |         curNode = random.choice(G.neighbors(curNode))  
39 |     return walkList
40 |     
41 | def getStats(G):
42 |     stats ={}
43 |     stats['num_nodes'] = nx.number_of_nodes(G)
44 |     stats['num_edges'] = nx.number_of_edges(G)
45 |     stats['is_Connected'] = nx.is_connected(G)
46 | 
47 | 
48 | def drawGraph(G):
49 |     plt.figure()
50 |     pos = nx.spring_layout(G)
51 |     nx.draw_networkx(G, pos)
52 |     plt.savefig("graph.pdf")
53 |     plt.show()
54 | 


--------------------------------------------------------------------------------
/sub2vec/src/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | 
 4 | from structural import structural_embedding
 5 | from neighborhood import neighborhood_embedding
 6 | 
 7 | def main():
 8 |     parser = argparse.ArgumentParser(description="sub2vec.")
 9 |     parser.add_argument('--input', nargs='?', required=True, help='Input directory')
10 |     parser.add_argument('--preprocessed-input', required=True)
11 | 
12 |     parser.add_argument('--property', default='n', choices=['n', 's'], required=True, help='Type of subgraph property to presernve. For neighborhood property add " --property n" and for the structural property " --property s" ')
13 | 
14 |     parser.add_argument('--walkLength', default=100000, type=int, help='length of random walk on each subgraph')
15 | 
16 |     # parser.add_argument('--output', required=True, help='Output representation file')
17 | 
18 |     parser.add_argument('--d', default=300, type=int, help='dimension of learned feautures for each subgraph.')
19 |     
20 |     parser.add_argument('--iter', default=20, type=int, help= 'training iterations')
21 | 
22 |     parser.add_argument('--windowSize', default=2, type=int,
23 |                       help='Window size of the model.')
24 |     
25 |     parser.add_argument('--p', default=0.5, type=float,
26 |                       help='meta parameter.')
27 | 
28 |     parser.add_argument('--model', default='dm', choices=['dbon', 'dm'],
29 |                       help='models for learninig vectors SV-DM (dm) or SV-DBON (dbon).')
30 |                       
31 |     args = parser.parse_args()
32 |     from preprocess import preprocess
33 |     print('start preprocessing ..')
34 |     preprocess(args.input, args.preprocessed_input)
35 |     
36 |     if args.property == 's':
37 |         structural_embedding(args)
38 |     else:
39 |         neighborhood_embedding(args)
40 |     
41 | 
42 | 
43 | if __name__=='__main__':
44 |     main()
45 | 


--------------------------------------------------------------------------------
/sub2vec/src/neighborhood.py:
--------------------------------------------------------------------------------
 1 | import gensim.models.doc2vec as doc
 2 | import os
 3 | import graphUtils_n
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | def arr2str(arr):
 8 |     result = ""
 9 |     for i in arr:
10 |         result += " "+str(i)
11 |     return result
12 |     
13 | 
14 | def generateWalkFile(dirName, walkLength):
15 |     walkFile = open(dirName+'.walk', 'w')
16 |     indexToName = {}
17 |     
18 |     for  root, dirs, files in os.walk(dirName):
19 |         index = 0
20 |         for name in tqdm(files):
21 |             # print(name)
22 |             subgraph = graphUtils_n.getGraph(os.path.join(root, name))
23 |             walk = graphUtils_n.randomWalk(subgraph, walkLength)
24 |             walkFile.write(arr2str(walk) +"\n")
25 |             indexToName[index] = name
26 |             index += 1
27 |     walkFile.close()
28 |     
29 |     return indexToName
30 |     
31 | def saveVectors(vectors, outputfile, IdToName):
32 |     print(len(vectors), outputfile, IdToName)
33 |     output = open(outputfile, 'w')
34 |     
35 |     output.write(str(len(vectors)) +"\n")
36 |     for i in range(len(vectors)):
37 |         output.write(str(IdToName[i]))
38 |         for j in vectors[i]:
39 |             output.write('\t'+ str(j))
40 |         output.write('\n')
41 |     output.close()
42 |     
43 | def neighborhood_embedding(args):
44 |     inputDir = args.preprocessed_input
45 |     # outputFile = args.output
46 |     iterations = args.iter
47 |     dimensions = args.d
48 |     window = args.windowSize
49 |     dm = 1 if args.model == 'dm' else 0
50 |     indexToName = generateWalkFile(inputDir, args.walkLength)
51 |     # print(indexToName)
52 |     sentences = doc.TaggedLineDocument(inputDir+'.walk')
53 | 
54 |     with open('log', 'a+') as f:
55 |         results = []
56 |         # for epochs in range(10, 110, 10):
57 |             # print('epochs', epochs)
58 |         model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window )
59 |         vectors = model.docvecs
60 |         embeddings = [[] for _ in range(len(vectors))]
61 |         for i in range(len(vectors)):
62 |             embeddings[int(indexToName[i])] = vectors[i]
63 |         
64 |         from preprocess import evaluate
65 |         res = evaluate(args.input, embeddings)
66 |         results.append(str(res))
67 |         print(res)
68 | 
69 |         f.write(inputDir + ',n,' + ','.join(results) + '\n')
70 | 


--------------------------------------------------------------------------------
/sub2vec/src/structural.py:
--------------------------------------------------------------------------------
  1 | import gensim.models.doc2vec as doc
  2 | import os
  3 | import graphUtils_s
  4 | import random
  5 | import networkx as nx
  6 | from tqdm import tqdm
  7 | 
  8 | 
  9 | def arr2str(arr):
 10 |     result = ""
 11 |     for i in arr:
 12 |         result += " "+str(i)
 13 |     return result
 14 | 
 15 | 
 16 | def generateDegreeWalk(Graph, walkSize):
 17 |     g = Graph
 18 |     walk = randomWalkDegreeLabels(g,walkSize)
 19 |     #walk = serializeEdge(g,NodeToLables)
 20 |     return walk
 21 | 
 22 | def randomWalkDegreeLabels(G, walkSize):
 23 |     curNode = random.choice(G.nodes())
 24 |     walkList= []
 25 | 
 26 |     while(len(walkList) < walkSize):
 27 |         walkList.append(G.node[curNode]['label'])
 28 |         curNode = random.choice(G.neighbors(curNode))  
 29 |     return walkList
 30 | 
 31 | def getDegreeLabelledGraph(G, rangetoLabels):
 32 |     degreeDict = G.degree(G.nodes())
 33 |     labelDict = {}
 34 |     for node in degreeDict.keys():
 35 |         val = degreeDict[node]/float(nx.number_of_nodes(G))
 36 |         labelDict[node] = inRange(rangetoLabels, val)
 37 |         #val = degreeDict[node]/float(nx.number_of_nodes(G))
 38 |         #labelDict[node] = degreeDict[node]
 39 |         
 40 |         nx.set_node_attributes(G, 'label', labelDict)
 41 |     
 42 |     return G
 43 | 
 44 | def inRange(rangeDict, val):
 45 |         for key in rangeDict:
 46 |             if key[0] < val and key[1] >= val:
 47 |                 return rangeDict[key]  
 48 |             
 49 | def generateWalkFile(dirName, walkLength, alpha):
 50 |     walkFile = open(dirName+'.walk', 'w')
 51 |     indexToName = {}
 52 |     rangetoLabels = {(0, 0.05):'z',(0.05, 0.1):'a', (0.1, 0.15):'b', (0.15, 0.2):'c', (0.2, 0.25):'d', (0.25, 0.5):'e', (0.5, 0.75):'f',(0.75, 1.0):'g'}
 53 |     for  root, dirs, files in os.walk(dirName):
 54 |         index = 0
 55 |         for name in tqdm(files):
 56 |             subgraph = graphUtils_s.getGraph(os.path.join(root, name))
 57 |             degreeGraph = getDegreeLabelledGraph(subgraph, rangetoLabels)
 58 |             degreeWalk = generateDegreeWalk(degreeGraph, int(walkLength* (1- alpha)))
 59 |             walk = graphUtils_s.randomWalk(subgraph, int(alpha * walkLength))
 60 |             walkFile.write(arr2str(walk)+ arr2str(degreeWalk) +"\n")
 61 |             indexToName[index] = name
 62 |             index += 1
 63 |     walkFile.close()
 64 |     
 65 |     return indexToName
 66 |     
 67 | def saveVectors(vectors, outputfile, IdToName):
 68 |     output = open(outputfile, 'w')
 69 |     
 70 |     output.write(str(len(vectors)) +"\n")
 71 |     for i in range(len(vectors)):
 72 |         output.write(str(IdToName[i]))
 73 |         for j in vectors[i]:
 74 |             output.write('\t'+ str(j))
 75 |         output.write('\n')
 76 |     output.close()
 77 |     
 78 |     
 79 | def structural_embedding(args):
 80 |     
 81 |     inputDir = args.preprocessed_input
 82 |     # outputFile = args.output
 83 |     iterations = args.iter
 84 |     dimensions = args.d
 85 |     window = args.windowSize
 86 |     dm = 1 if args.model == 'dm' else 0
 87 |     indexToName = generateWalkFile(inputDir, args.walkLength, args.p)
 88 |     sentences = doc.TaggedLineDocument(inputDir+'.walk')
 89 |     
 90 |     with open('log', 'a+') as f:
 91 |         results = []
 92 |         # for epochs in range(10, 110, 10):
 93 |             # print('epochs', epochs)
 94 |         model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window, workers=8)
 95 |         vectors = model.docvecs
 96 |         embeddings = [[] for _ in range(len(vectors))]
 97 |         for i in range(len(vectors)):
 98 |             embeddings[int(indexToName[i])] = vectors[i]
 99 |         
100 |         from preprocess import evaluate
101 |         res = evaluate(args.input, embeddings)
102 |         print(res)
103 |         results.append(str(res))
104 | 
105 |         f.write(inputDir + ',s,' + ','.join(results) + '\n')
106 | 


--------------------------------------------------------------------------------
/sub2vec/test.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import sys
 3 | 
 4 | if __name__ == '__main__':
 5 |     df = pd.read_csv(sys.argv[1])
 6 |     DSs = df.DS.unique()
 7 |     for DS in DSs:
 8 |         tmpdf = df[df.DS == DS]
 9 |         for tpe in ['n', 's']:
10 |             m, s = tmpdf[(tmpdf.type == tpe)]['result'].mean(), tmpdf[(tmpdf.type == tpe)]['result'].std() 
11 |             print(DS, tpe, m, s)
12 | 


--------------------------------------------------------------------------------