├── .gitignore ├── DGK ├── README ├── canonical_maps │ ├── canonical_map_n2.p │ ├── canonical_map_n3.p │ ├── canonical_map_n4.p │ ├── canonical_map_n5.p │ ├── canonical_map_n6.p │ ├── canonical_map_n7.p │ └── canonical_map_n8.p ├── datasets │ └── README ├── deep_kernel.py ├── go.sh └── graphlet_counter_maps │ ├── graphlet_counter_nodebased_n2.p │ ├── graphlet_counter_nodebased_n3.p │ ├── graphlet_counter_nodebased_n4.p │ ├── graphlet_counter_nodebased_n5.p │ ├── graphlet_counter_nodebased_n6.p │ ├── graphlet_counter_nodebased_n7.p │ └── graphlet_counter_nodebased_n8.p ├── MLGkernel ├── LICENSE ├── MLGkernel │ ├── FLGinstance.cpp │ ├── FLGinstance.hpp │ ├── FLGkernel.cpp │ ├── FLGkernel.hpp │ ├── Kernel.hpp │ ├── Linearizer.hpp │ ├── MLG_base.hpp │ ├── MLGdataset.cpp │ ├── MLGdataset.hpp │ ├── MLGgraph.cpp │ ├── MLGgraph.hpp │ ├── Makefile │ ├── RandomSelection.hpp │ ├── params.hpp │ ├── runMLG │ ├── runMLG.cpp │ └── swig │ │ ├── MLGK.cpp │ │ ├── MLGK.i │ │ ├── Makefile │ │ ├── README.txt │ │ └── test.py ├── Makefile ├── Makefile.base ├── Makefile.options ├── README.md ├── data_utils.py ├── evaluate_embedding.py ├── go.sh ├── include │ ├── pMMFbase.hpp │ └── pMMFglobal.inc ├── matrices │ ├── Activemap.cpp │ ├── Activemap.hpp │ ├── AtomicCmatrix.hpp │ ├── Cmatrix.cpp │ ├── Cmatrix.hpp │ ├── Cvector.cpp │ ├── Cvector.hpp │ ├── DenseVector.hpp │ ├── EigenInterface.hpp │ ├── GramMatrix.cpp │ ├── GramMatrix.hpp │ ├── LapackInterface.hpp │ ├── Makefile │ ├── Matrix.cpp │ ├── Matrix.hpp │ ├── MatrixX.cpp │ ├── MatrixX.hpp │ ├── Remap.cpp │ ├── Remap.hpp │ ├── SparseVector.hpp │ ├── Vector.cpp │ ├── Vector.hpp │ ├── Vectorh.cpp │ ├── Vectorh.hpp │ ├── Vectorl.cpp │ ├── Vectorl.hpp │ ├── Vectorv.cpp │ ├── Vectorv.hpp │ ├── matrices.cpp │ └── matrices.o ├── preprocess.py ├── test.py └── utility │ ├── Bifstream.cpp │ ├── Bifstream.hpp │ ├── Bofstream.cpp │ ├── Bofstream.hpp │ ├── Graph.cpp │ ├── Graph.hpp │ ├── Log.cpp │ ├── Log.hpp │ ├── Makefile │ ├── Rstream.cpp │ ├── Rstream.hpp │ ├── Serializable.cpp │ ├── Serializable.hpp │ ├── ThreadBank.cpp │ ├── ThreadBank.hpp │ ├── ThreadManager.cpp │ ├── ThreadManager.hpp │ ├── TopkList.cpp │ ├── TopkList.hpp │ └── filetypes │ ├── Makefile │ ├── MatrixIF.hpp │ ├── MatrixIF_ASCII.cpp │ ├── MatrixIF_ASCII.hpp │ ├── MatrixIF_Boeing.cpp │ ├── MatrixIF_Boeing.hpp │ ├── MatrixIF_Matlab.cpp │ ├── MatrixIF_Matlab.hpp │ ├── MatrixOF.hpp │ ├── MatrixOF_ASCII.cpp │ ├── MatrixOF_ASCII.hpp │ ├── MatrixOF_Boeing.cpp │ ├── MatrixOF_Boeing.hpp │ ├── MatrixOF_Matlab.cpp │ ├── MatrixOF_Matlab.hpp │ └── filetypes.cpp ├── README.md ├── data └── PTC_MR │ ├── PTC_MR_A.txt │ ├── PTC_MR_edge_labels.txt │ ├── PTC_MR_graph_indicator.txt │ ├── PTC_MR_graph_labels.txt │ └── PTC_MR_node_labels.txt ├── diffpool ├── aggregators.py ├── cross_val.py ├── encoders.py ├── gen │ ├── data.py │ └── feat.py ├── go.sh ├── graph_embedding.py ├── graph_sampler.py ├── graphsage.py ├── load_data.py ├── partition.py ├── set2set.py ├── test.py ├── train.py └── util.py ├── graph2vec_tf ├── README.md ├── __init__.py ├── classify.py ├── corpus_parser.py ├── go.sh ├── main.py ├── make_graph2vec_corpus.py ├── preprocess.py ├── skipgram.py ├── test.py ├── train_utils.py └── utils.py ├── kcnn ├── README.md ├── go.sh ├── graph_kernels.py ├── graph_kernels_labeled.py ├── main.py ├── model.py ├── nystrom.py └── utils.py ├── kernel_methods ├── README.md ├── go.sh ├── main.py └── utils.py └── sub2vec ├── go.sh ├── preprocess.py ├── src ├── graphUtils_n.py ├── graphUtils_s.py ├── main.py ├── neighborhood.py └── structural.py └── test.py /.gitignore: -------------------------------------------------------------------------------- 1 | log 2 | __pycache__ 3 | *.pyc 4 | *.walk 5 | preprocessed_dataset 6 | TEST 7 | checkpoint 8 | tmp 9 | *.nexf 10 | results 11 | 12 | -------------------------------------------------------------------------------- /DGK/canonical_maps/canonical_map_n2.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | S'' 3 | p1 4 | (dp2 5 | S'graph' 6 | p3 7 | (tsS'idx' 8 | p4 9 | I0 10 | sS'n' 11 | p5 12 | I0 13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00' 14 | p6 15 | (dp7 16 | g3 17 | (tsg4 18 | I1 19 | sg5 20 | I1 21 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80' 22 | p8 23 | (dp9 24 | g3 25 | (I1 26 | tp10 27 | sg4 28 | I3 29 | sg5 30 | I2 31 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 32 | p11 33 | (dp12 34 | g3 35 | (I0 36 | tp13 37 | sg4 38 | I2 39 | sg5 40 | I2 41 | ss. -------------------------------------------------------------------------------- /DGK/canonical_maps/canonical_map_n3.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | S'' 3 | p1 4 | (dp2 5 | S'graph' 6 | p3 7 | (tsS'idx' 8 | p4 9 | I0 10 | sS'n' 11 | p5 12 | I0 13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 14 | p6 15 | (dp7 16 | g3 17 | (I0 18 | tp8 19 | sg4 20 | I2 21 | sg5 22 | I2 23 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00' 24 | p9 25 | (dp10 26 | g3 27 | (tsg4 28 | I1 29 | sg5 30 | I1 31 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80' 32 | p11 33 | (dp12 34 | g3 35 | (I1 36 | tp13 37 | sg4 38 | I3 39 | sg5 40 | I2 41 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 42 | p14 43 | (dp15 44 | g3 45 | (I0 46 | I0 47 | I0 48 | tp16 49 | sg4 50 | I4 51 | sg5 52 | I3 53 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0' 54 | p17 55 | (dp18 56 | g3 57 | (I1 58 | I1 59 | I1 60 | tp19 61 | sg4 62 | I7 63 | sg5 64 | I3 65 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@' 66 | p20 67 | (dp21 68 | g3 69 | (I0 70 | I0 71 | I1 72 | tp22 73 | sg4 74 | I5 75 | sg5 76 | I3 77 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0' 78 | p23 79 | (dp24 80 | g3 81 | (I0 82 | I1 83 | I1 84 | tp25 85 | sg4 86 | I6 87 | sg5 88 | I3 89 | ss. -------------------------------------------------------------------------------- /DGK/canonical_maps/canonical_map_n4.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | S'' 3 | p1 4 | (dp2 5 | S'graph' 6 | p3 7 | (tsS'idx' 8 | p4 9 | I0 10 | sS'n' 11 | p5 12 | I0 13 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 14 | p6 15 | (dp7 16 | g3 17 | (I0 18 | tp8 19 | sg4 20 | I2 21 | sg5 22 | I2 23 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 ' 24 | p9 25 | (dp10 26 | g3 27 | (I0 28 | I0 29 | I1 30 | I1 31 | I0 32 | I0 33 | tp11 34 | sg4 35 | I13 36 | sg5 37 | I4 38 | ssS'\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0' 39 | p12 40 | (dp13 41 | g3 42 | (I0 43 | I1 44 | I1 45 | I1 46 | I1 47 | I1 48 | tp14 49 | sg4 50 | I17 51 | sg5 52 | I4 53 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 54 | p15 55 | (dp16 56 | g3 57 | (I0 58 | I0 59 | I0 60 | I0 61 | I0 62 | I0 63 | tp17 64 | sg4 65 | I8 66 | sg5 67 | I4 68 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00' 69 | p18 70 | (dp19 71 | g3 72 | (tsg4 73 | I1 74 | sg5 75 | I1 76 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\xe0' 77 | p20 78 | (dp21 79 | g3 80 | (I0 81 | I0 82 | I1 83 | I1 84 | I1 85 | I1 86 | tp22 87 | sg4 88 | I15 89 | sg5 90 | I4 91 | ssS'\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\xe0' 92 | p23 93 | (dp24 94 | g3 95 | (I0 96 | I0 97 | I1 98 | I0 99 | I1 100 | I1 101 | tp25 102 | sg4 103 | I12 104 | sg5 105 | I4 106 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00 ' 107 | p26 108 | (dp27 109 | g3 110 | (I0 111 | I0 112 | I0 113 | I0 114 | I0 115 | I1 116 | tp28 117 | sg4 118 | I9 119 | sg5 120 | I4 121 | ssS'\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00\x80' 122 | p29 123 | (dp30 124 | g3 125 | (I1 126 | tp31 127 | sg4 128 | I3 129 | sg5 130 | I2 131 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' 132 | p32 133 | (dp33 134 | g3 135 | (I0 136 | I0 137 | I0 138 | tp34 139 | sg4 140 | I4 141 | sg5 142 | I3 143 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00`' 144 | p35 145 | (dp36 146 | g3 147 | (I0 148 | I0 149 | I0 150 | I0 151 | I1 152 | I1 153 | tp37 154 | sg4 155 | I10 156 | sg5 157 | I4 158 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\xa0\x00\x00\x00\x00\x00\x00\x00\xc0' 159 | p38 160 | (dp39 161 | g3 162 | (I1 163 | I1 164 | I1 165 | tp40 166 | sg4 167 | I7 168 | sg5 169 | I3 170 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x000\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00`' 171 | p41 172 | (dp42 173 | g3 174 | (I0 175 | I0 176 | I0 177 | I1 178 | I1 179 | I1 180 | tp43 181 | sg4 182 | I11 183 | sg5 184 | I4 185 | ssS'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00@' 186 | p44 187 | (dp45 188 | g3 189 | (I0 190 | I0 191 | I1 192 | tp46 193 | sg4 194 | I5 195 | sg5 196 | I3 197 | ssS'\x00\x00\x00\x00\x00\x00\x00`\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`' 198 | p47 199 | (dp48 200 | g3 201 | (I0 202 | I1 203 | I1 204 | I1 205 | I1 206 | I0 207 | tp49 208 | sg4 209 | I16 210 | sg5 211 | I4 212 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\xc0' 213 | p50 214 | (dp51 215 | g3 216 | (I0 217 | I1 218 | I1 219 | tp52 220 | sg4 221 | I6 222 | sg5 223 | I3 224 | ssS'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x90\x00\x00\x00\x00\x00\x00\x00`' 225 | p53 226 | (dp54 227 | g3 228 | (I0 229 | I0 230 | I1 231 | I1 232 | I0 233 | I1 234 | tp55 235 | sg4 236 | I14 237 | sg5 238 | I4 239 | ssS'\x00\x00\x00\x00\x00\x00\x00p\x00\x00\x00\x00\x00\x00\x00\xb0\x00\x00\x00\x00\x00\x00\x00\xd0\x00\x00\x00\x00\x00\x00\x00\xe0' 240 | p56 241 | (dp57 242 | g3 243 | (I1 244 | I1 245 | I1 246 | I1 247 | I1 248 | I1 249 | tp58 250 | sg4 251 | I18 252 | sg5 253 | I4 254 | ss. -------------------------------------------------------------------------------- /DGK/datasets/README: -------------------------------------------------------------------------------- 1 | Please refer to the README under the main folder 2 | for more information on how to obtain the datasets. 3 | -------------------------------------------------------------------------------- /DGK/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # Fill in the name of the dataset 4 | DS= 5 | 6 | # Run multiple trials 7 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100 8 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100 9 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100 10 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100 11 | python2 deep_kernel.py 512 3 2 $DS 5 1 1 7 100 12 | -------------------------------------------------------------------------------- /DGK/graphlet_counter_maps/graphlet_counter_nodebased_n2.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | I0 3 | ccollections 4 | Counter 5 | p1 6 | ((dp2 7 | I1 8 | I1 9 | stp3 10 | Rp4 11 | sI1 12 | g1 13 | ((dp5 14 | I2 15 | I2 16 | sI3 17 | I2 18 | stp6 19 | Rp7 20 | sI2 21 | g1 22 | ((dp8 23 | tp9 24 | Rp10 25 | sI3 26 | g1 27 | ((dp11 28 | tp12 29 | Rp13 30 | s. -------------------------------------------------------------------------------- /DGK/graphlet_counter_maps/graphlet_counter_nodebased_n3.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | I0 3 | ccollections 4 | Counter 5 | p1 6 | ((dp2 7 | I1 8 | I1 9 | stp3 10 | Rp4 11 | sI1 12 | g1 13 | ((dp5 14 | I2 15 | I2 16 | sI3 17 | I2 18 | stp6 19 | Rp7 20 | sI2 21 | g1 22 | ((dp8 23 | I4 24 | I3 25 | sI5 26 | I2 27 | sI6 28 | I1 29 | stp9 30 | Rp10 31 | sI3 32 | g1 33 | ((dp11 34 | I5 35 | I1 36 | sI6 37 | I2 38 | sI7 39 | I3 40 | stp12 41 | Rp13 42 | sI4 43 | g1 44 | ((dp14 45 | tp15 46 | Rp16 47 | sI5 48 | g1 49 | ((dp17 50 | tp18 51 | Rp19 52 | sI6 53 | g1 54 | ((dp20 55 | tp21 56 | Rp22 57 | sI7 58 | g1 59 | ((dp23 60 | tp24 61 | Rp25 62 | s. -------------------------------------------------------------------------------- /DGK/graphlet_counter_maps/graphlet_counter_nodebased_n4.p: -------------------------------------------------------------------------------- 1 | (dp0 2 | I0 3 | ccollections 4 | Counter 5 | p1 6 | ((dp2 7 | I1 8 | I1 9 | stp3 10 | Rp4 11 | sI1 12 | g1 13 | ((dp5 14 | I2 15 | I2 16 | sI3 17 | I2 18 | stp6 19 | Rp7 20 | sI2 21 | g1 22 | ((dp8 23 | I4 24 | I3 25 | sI5 26 | I2 27 | sI6 28 | I1 29 | stp9 30 | Rp10 31 | sI3 32 | g1 33 | ((dp11 34 | I5 35 | I1 36 | sI6 37 | I2 38 | sI7 39 | I3 40 | stp12 41 | Rp13 42 | sI4 43 | g1 44 | ((dp14 45 | I8 46 | I4 47 | sI9 48 | I2 49 | sI10 50 | I1 51 | sI12 52 | I1 53 | stp15 54 | Rp16 55 | sI5 56 | g1 57 | ((dp17 58 | I9 59 | I2 60 | sI10 61 | I2 62 | sI11 63 | I3 64 | sI13 65 | I4 66 | sI14 67 | I2 68 | sI15 69 | I1 70 | stp18 71 | Rp19 72 | sI6 73 | g1 74 | ((dp20 75 | I16 76 | I4 77 | sI17 78 | I2 79 | sI10 80 | I1 81 | sI12 82 | I3 83 | sI14 84 | I2 85 | sI15 86 | I2 87 | stp21 88 | Rp22 89 | sI7 90 | g1 91 | ((dp23 92 | I17 93 | I2 94 | sI18 95 | I4 96 | sI11 97 | I1 98 | sI15 99 | I1 100 | stp24 101 | Rp25 102 | sI8 103 | g1 104 | ((dp26 105 | tp27 106 | Rp28 107 | sI9 108 | g1 109 | ((dp29 110 | tp30 111 | Rp31 112 | sI10 113 | g1 114 | ((dp32 115 | tp33 116 | Rp34 117 | sI11 118 | g1 119 | ((dp35 120 | tp36 121 | Rp37 122 | sI12 123 | g1 124 | ((dp38 125 | tp39 126 | Rp40 127 | sI13 128 | g1 129 | ((dp41 130 | tp42 131 | Rp43 132 | sI14 133 | g1 134 | ((dp44 135 | tp45 136 | Rp46 137 | sI15 138 | g1 139 | ((dp47 140 | tp48 141 | Rp49 142 | sI16 143 | g1 144 | ((dp50 145 | tp51 146 | Rp52 147 | sI17 148 | g1 149 | ((dp53 150 | tp54 151 | Rp55 152 | sI18 153 | g1 154 | ((dp56 155 | tp57 156 | Rp58 157 | s. -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/FLGinstance.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #include"FLGinstance.hpp" 28 | 29 | 30 | void FLGinstance::precompute(const double gamma){ 31 | if(Sinv.nrows>0) return; 32 | 33 | //cout<<"L="<&& _labels): 38 | // L(move(_L)), labels(_labels){}; 39 | 40 | FLGinstance(){} 41 | 42 | FLGinstance(Cmatrix&& _L, Cmatrix&& _U): 43 | L(move(_L)), U(move(_U)){}; 44 | 45 | 46 | public: 47 | 48 | void precompute(const double gamma); 49 | 50 | bool operator==(const FLGinstance& x) const{ 51 | if(L!=x.L) return false; 52 | if(U!=x.U) return false; 53 | //if(labels.size()!=x.labels.size()) return false; 54 | //if(labels!=x.labels) return false; 55 | return true; 56 | } 57 | 58 | string str(){ 59 | ostringstream oss; oss< labels; 69 | Cmatrix U; 70 | 71 | Cmatrix Sinv; // actually Sinv/2 72 | //double detS; 73 | double log_detS; 74 | 75 | // Cvector linearization; 76 | 77 | }; 78 | 79 | 80 | 81 | 82 | namespace std{ 83 | template<> 84 | class hash{ 85 | public: 86 | size_t operator()(const FLGinstance& x) const{ 87 | size_t h=hash()(x.L)^hash()(x.U); 88 | //for(auto& p: G.labels) h=(h<<1)^hash()(p); 89 | return h; 90 | } 91 | }; 92 | }; 93 | 94 | 95 | #endif 96 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/FLGkernel.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #include"FLGkernel.hpp" 28 | 29 | 30 | double FLGkernel::operator()(const FLGinstance& x1, const FLGinstance& x2) const 31 | { 32 | 33 | if(x1.Sinv.nrows==0) const_cast(x1).precompute(gamma); 34 | if(x2.Sinv.nrows==0) const_cast(x2).precompute(gamma); 35 | 36 | Cvector lambda=(x1.Sinv+x2.Sinv).eigenvalues(); 37 | //double detS=1; for(int i=0; i{ 37 | public: 38 | 39 | FLGkernel(const double _gamma): gamma(_gamma){} 40 | 41 | double operator()(const FLGinstance& x1, const FLGinstance& x2) const; 42 | 43 | double operator()(const MLGgraph& x1, const MLGgraph& x2) const{return (*this)(x1.flg,x2.flg);}; 44 | 45 | public: 46 | 47 | double gamma=0.1; 48 | 49 | }; 50 | 51 | #endif 52 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/Kernel.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #ifndef _Kernel 28 | #define _Kernel 29 | 30 | #include "pMMFbase.hpp" 31 | 32 | 33 | template 34 | class Kernel{ 35 | public: 36 | 37 | virtual double operator()(const TYPE& x1, const TYPE& x2) const =0; 38 | 39 | }; 40 | 41 | 42 | 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/MLG_base.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #include "pMMFbase.hpp" 28 | 29 | 30 | typedef Cvector VertexFeatures; 31 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/MLGdataset.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #ifndef _MLGdataset 28 | #define _MLGdataset 29 | 30 | #include "MLGgraph.hpp" 31 | #include 32 | 33 | class MLGdataset{ 34 | public: 35 | 36 | MLGdataset(){} 37 | MLGdataset(const std::string filename, double eta, double gamma, bool grow): gamma(gamma), grow(grow), eta(eta){ 38 | loadGraphs(filename); 39 | } 40 | ~MLGdataset() {for(auto p:graphs) delete p;} 41 | 42 | public: 43 | 44 | void condense(const int nlevels, const int leaf_radius=2); 45 | void computeGram(const int levels, const int radius); 46 | 47 | public: 48 | 49 | void loadGraphs(std::string filename); 50 | void loadDiscreteFeatures(std::string filename, int numFeatures); 51 | void loadFeatures(std::string filename); 52 | void saveGram(std::string filename); 53 | void fillGram(double *npmatrix, int rows, int cols); 54 | 55 | public: 56 | 57 | vector graphs; 58 | double gamma; // regularizer constant 59 | double eta; // regularizer constant 60 | int levels; 61 | int radius; 62 | bool grow; // 1 to grow by the leaf radius, 0 to double 63 | Cmatrix gram; 64 | }; 65 | 66 | #endif 67 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/MLGgraph.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #ifndef _MLGgraph 28 | #define _MLGgraph 29 | 30 | #include 31 | 32 | #include "Cmatrix.hpp" 33 | #include "Graph.hpp" 34 | #include "FLGinstance.hpp" 35 | #include "Linearizer.hpp" 36 | 37 | 38 | class MLGgraph{ 39 | public: 40 | 41 | MLGgraph(const MLGgraph& x): n(x.n), adj(x.adj.copy()), labels(x.labels.size()){ 42 | for(int i=0; i(n); for(auto& p:labels) p=Cvector::Filled(1,0); init(); 49 | } 50 | MLGgraph(Graph&& G){ 51 | n=G.n; adj=move(G.adj); labels=vector(n); init(); 52 | } 53 | MLGgraph& operator=(Graph&& G){ 54 | n=G.n; adj=move(G.adj); labels=vector(n); init(); return *this; 55 | } 56 | 57 | public: 58 | 59 | void grow_subgraphs(const int radius); 60 | void double_subgraphs(); 61 | void push_to_linearizer(Linearizer& linearizer, double eta); 62 | void pull_features(); 63 | void compute_flg(); 64 | 65 | void computeDegreeFeatures(const int maxdeg); 66 | 67 | string str() const; 68 | 69 | private: 70 | 71 | void init(); 72 | Cmatrix subLaplacian(const vector& vset, double eta) const; 73 | Cmatrix FloydWarshall(const Cmatrix& A) const; 74 | 75 | public: 76 | 77 | int n; 78 | Cmatrix adj; 79 | vector labels; 80 | 81 | vector< vector > neighbors; 82 | vector< unordered_set > subgraphs; 83 | vector*> subinstances; 84 | 85 | FLGinstance flg; 86 | 87 | Cmatrix dist; 88 | 89 | 90 | }; 91 | 92 | 93 | namespace std{ 94 | template<> 95 | class hash< Hwrapper >{ 96 | public: 97 | size_t operator()(const Hwrapper& x) const{ 98 | return hash()(*x.ptr);} 99 | }; 100 | }; 101 | 102 | 103 | #endif 104 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR= ../ 2 | include $(ROOTDIR)/Makefile.base 3 | 4 | CFLAGS= -std=c++11 -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(FILETYPESDIR) -I $(MATRIXDIR) -fPIC 5 | LIBS= -lstdc++ -lm -lpthread 6 | 7 | ALLOBJECTS= $(UTILITYDIR)/*.o $(MATRIXDIR)/*.o *.o #$(BLOCKEDDIR)/*.o 8 | 9 | FLGinstance.o: FLGinstance.hpp FLGinstance.cpp 10 | $(CC) -c FLGinstance.cpp $(CFLAGS) 11 | 12 | FLGkernel.o: FLGkernel.hpp FLGkernel.cpp FLGinstance.hpp 13 | $(CC) -c FLGkernel.cpp $(CFLAGS) 14 | 15 | MLGgraph.o: MLGgraph.hpp MLGgraph.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp 16 | $(CC) -c MLGgraph.cpp $(CFLAGS) 17 | 18 | MLGdataset.o: MLGdataset.hpp MLGdataset.cpp FLGinstance.hpp FLGkernel.hpp Linearizer.hpp MLGgraph.hpp 19 | $(CC) -c MLGdataset.cpp $(CFLAGS) 20 | 21 | runMLG: runMLG.cpp params.hpp MLGgraph.o MLGdataset.o FLGinstance.o 22 | $(CC) -o runMLG runMLG.cpp $(CFLAGS) $(ALLOBJECTS) $(LIBS) 23 | 24 | all: FLGinstance.o FLGkernel.o MLGgraph.o MLGdataset.o runMLG 25 | 26 | clean: 27 | @rm -f runMLG *.o 28 | 29 | anew: clean all 30 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/RandomSelection.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #ifndef _RandomSelection 28 | #define _RandomSelection 29 | 30 | #include 31 | 32 | 33 | #include "Activemap.hpp" 34 | #include "pMMFbase.hpp" 35 | 36 | extern default_random_engine randomNumberGenerator; 37 | 38 | 39 | class RandomSelection: public vector{ 40 | public: 41 | 42 | RandomSelection(const int k, const int n): vector(k){ 43 | assert(k<=n); 44 | 45 | if(k<0.3*n){ 46 | uniform_int_distribution distri(0,n-1); 47 | for(int i=0; i distri(0,n-i); 58 | int j=amap(distri(randomNumberGenerator)); 59 | amap.remove(j); 60 | (*this)[i]=j; 61 | } 62 | 63 | } 64 | 65 | public: 66 | 67 | unordered_set selected; 68 | //Activemap activemap; 69 | 70 | }; 71 | 72 | #endif 73 | 74 | 75 | //do{x=distri(randomNumberGenerator); 76 | //}while(selected.find(x)!=selected.end()); 77 | -------------------------------------------------------------------------------- /MLGkernel/MLGkernel/params.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 8 | 9 | 10 | This program is free software; you can redistribute it and/or 11 | modify it under the terms of the GNU General Public License 12 | as published by the Free Software Foundation; either version 2 13 | of the License, or (at your option) any later version. 14 | 15 | This program is distributed in the hope that it will be useful, 16 | but WITHOUT ANY WARRANTY; without even the implied warranty of 17 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 | GNU General Public License for more details. 19 | 20 | You should have received a copy of the GNU General Public License 21 | along with this program; if not, see . 22 | 23 | ----------------------------------------------------------------------------- */ 24 | 25 | 26 | 27 | #include 28 | #include 29 | using namespace std; 30 | 31 | class Params{ 32 | /** 33 | A simple object that holds all the parameters necessary for the MLGkernel including 34 | file paths for where the dataset and dataset features are stored and file path to 35 | save the resulting gram matrix. 36 | **/ 37 | public: 38 | // Constructer that inits the model variables. 39 | // set the data, feature and save paths separately. 40 | Params(double e, double g, int r, int l, int t, bool b): 41 | eta(e), gamma(g), radius(r), levels(l), num_threads(t), grow_or_double(b) {} 42 | 43 | public: 44 | void set_paths(string data, string features){ 45 | data_path = data; 46 | features_path = features; 47 | } 48 | 49 | void set_save_path(string save){ 50 | save_path = save; 51 | } 52 | 53 | void show() { 54 | cout << "Current parameter settings:" << endl; 55 | cout << " -eta : " << eta < max_nodes: 82 | continue 83 | 84 | # add features and labels 85 | G.graph['label'] = graph_labels[i-1] 86 | for u in G.nodes(): 87 | if len(node_labels) > 0: 88 | node_label_one_hot = [0] * num_unique_node_labels 89 | node_label = node_labels[u-1] 90 | node_label_one_hot[node_label] = 1 91 | G.node[u]['label'] = node_label_one_hot 92 | if len(node_attrs) > 0: 93 | G.node[u]['feat'] = node_attrs[u-1] 94 | if len(node_attrs) > 0: 95 | G.graph['feat_dim'] = node_attrs[0].shape[0] 96 | 97 | # relabeling 98 | mapping={} 99 | it=0 100 | if float(nx.__version__)<2.0: 101 | for n in G.nodes(): 102 | mapping[n]=it 103 | it+=1 104 | else: 105 | for n in G.nodes: 106 | mapping[n]=it 107 | it+=1 108 | 109 | # indexed from 0 110 | graphs.append(nx.relabel_nodes(G, mapping)) 111 | 112 | 113 | np.random.shuffle(graphs) 114 | #idx = np.random.RandomState(seed=2).permutation(len(graphs)) 115 | #graphs = [graphs[i] for i in idx] 116 | 117 | return graphs 118 | 119 | -------------------------------------------------------------------------------- /MLGkernel/evaluate_embedding.py: -------------------------------------------------------------------------------- 1 | from data_utils import read_graphfile 2 | import numpy as np 3 | import pandas as pd 4 | import os 5 | import sys 6 | 7 | from sklearn.model_selection import cross_val_score 8 | from sklearn.model_selection import GridSearchCV, KFold, StratifiedKFold 9 | from sklearn.svm import SVC, LinearSVC 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.ensemble import RandomForestClassifier 12 | from sklearn import preprocessing 13 | from sklearn.metrics import accuracy_score 14 | from sklearn.manifold import TSNE 15 | 16 | def evaluate_embedding(embeddings, labels): 17 | 18 | labels = preprocessing.LabelEncoder().fit_transform(labels) 19 | x, y = np.array(embeddings), np.array(labels) 20 | print(x.shape, y.shape) 21 | 22 | kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) 23 | accuracies = [] 24 | for train_index, test_index in kf.split(x, y): 25 | 26 | x_train, x_test = x[train_index], x[test_index] 27 | y_train, y_test = y[train_index], y[test_index] 28 | search=True 29 | if search: 30 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]} 31 | classifier = GridSearchCV(SVC(), params, cv=5, scoring='accuracy', verbose=0) 32 | else: 33 | classifier = SVC(C=10) 34 | classifier.fit(x_train, y_train) 35 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) 36 | 37 | svm_accuracies = np.mean(accuracies) 38 | 39 | accuracies = [] 40 | for train_index, test_index in kf.split(x, y): 41 | 42 | x_train, x_test = x[train_index], x[test_index] 43 | y_train, y_test = y[train_index], y[test_index] 44 | search=True 45 | if search: 46 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]} 47 | classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy', verbose=0) 48 | else: 49 | classifier = LinearSVC(C=10) 50 | classifier.fit(x_train, y_train) 51 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) 52 | print('LinearSvc', np.mean(accuracies)) 53 | print('svc', svm_accuracies) 54 | 55 | # def get_mutag(): 56 | # emb = [] 57 | # with open('data/results/output.txt', 'r') as f: 58 | # for line in f: 59 | # emb.append(list(map(float, [x for x in line.strip().split()]))) 60 | 61 | # ret = [] 62 | # for i in range(188): 63 | # with open('./data/mutag/mutag_{}.graph'.format(i+1), 'r') as f: 64 | # x = f.readlines() 65 | # ret.append(int(x[-1].strip())) 66 | # return emb, ret 67 | 68 | 69 | if __name__ == '__main__': 70 | # x, y = get_mutag() 71 | emb = [] 72 | with open('data/results/{}_output.txt'.format(sys.argv[1]), 'r') as f: 73 | for line in f: 74 | emb.append(list(map(float, [x for x in line.strip().split()]))) 75 | 76 | with open('../data/{}_label.txt'.format(sys.argv[1]), 'r') as f: 77 | y = f.readlines() 78 | y = [int(x.strip()) for x in y] 79 | 80 | evaluate_embedding(emb, y) 81 | -------------------------------------------------------------------------------- /MLGkernel/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | # Run the MLG kernel on the MUTAG dataset with parameters: 3 | # radius = 1 4 | # levels = 2 5 | # eta = 0.1 6 | # gamma = 0.01 7 | # num threads = 32 8 | # grow = 1 # if you want the subgraphs to double in size at each level, set this equal to 0 9 | 10 | # Replace MUTAG with the dataset name of your choice(PTC/PROTEINS/NCI1/NCI109). 11 | BASE=`pwd` 12 | dset=$1 13 | data=$BASE/../data/$dset.txt 14 | feats=$BASE/..//data/$dset\_nodelabels.txt 15 | save=$BASE//data/results/output.txt 16 | mkdir -p $BASE/data/results/ 17 | 18 | ~/ENV/bin/python3 preprocess.py $dset 19 | 20 | for r in 1 2 3 4 21 | do 22 | for l in 1 2 3 4 23 | do 24 | for g in 0.01 0.1 1 25 | do 26 | for e in 0.01 0.1 1 27 | do 28 | 29 | cd MLGkernel 30 | ./runMLG -d $data -f $feats -s $save -r $r -l $l -e $e -g $g -t 32 -m 1 31 | cd ../ 32 | ~/ENV/bin/python3 evaluate_embedding.py $dset >> $dset.log 33 | done 34 | done 35 | done 36 | done 37 | -------------------------------------------------------------------------------- /MLGkernel/include/pMMFglobal.inc: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include "pMMFbase.hpp" 19 | #include "Vectorv.hpp" 20 | #include "Vectorl.hpp" 21 | #include "Vectorh.hpp" 22 | #include "ThreadManager.hpp" 23 | 24 | bool multithreading=true; 25 | ThreadManager threadManager(4); 26 | mutex cout_mutex; 27 | mutex CoutLock::mx; 28 | 29 | std::default_random_engine randomNumberGenerator; 30 | 31 | FIELD Vectorv::dummyZero=0; 32 | FIELD Vectorl::dummyZero=0; 33 | FIELD Vectorh::dummyZero=0; 34 | 35 | char strbuffer[255]; 36 | 37 | Log mlog; 38 | 39 | -------------------------------------------------------------------------------- /MLGkernel/matrices/Activemap.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include "Activemap.hpp" 19 | 20 | extern default_random_engine randomNumberGenerator; 21 | 22 | 23 | int Activemap::random(){ 24 | uniform_int_distribution distri(0,nactive-1); 25 | return forward[distri(randomNumberGenerator)]; 26 | } 27 | -------------------------------------------------------------------------------- /MLGkernel/matrices/Activemap.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #ifndef _Activemap 19 | #define _Activemap 20 | 21 | #include "Remap.hpp" 22 | 23 | class Activemap: public Remap{ 24 | public: 25 | 26 | Activemap(const int n=1): Remap(n), nactive(n){} 27 | 28 | public: 29 | 30 | int random(); 31 | 32 | void remove(const int i){ 33 | if(backward[i]!=nactive-1) swap(backward[i],nactive-1); 34 | nactive--; 35 | } 36 | 37 | bool isactive(const int i) const {return(backward[i] lock(mx); 40 | assert(x.nrows==nrows); assert(x.ncols==ncols); 41 | for(int i=0; i distr; 34 | for(int i=0; i distr; 41 | for(int i=0; i list): DenseVector(list.size()){ 46 | array=new FIELD[n]; int i=0; for(FIELD v:list) array[i++]=v; 47 | } 48 | 49 | 50 | Cvector::Cvector(const int _n, const FIELD* _array): DenseVector(_n){ 51 | array=new FIELD[n]; for(int i=0; i 87 | Eigen::VectorXd Cvector::convert() const{ 88 | Eigen::VectorXd v(n); 89 | for(int i=0; i 22 | #include 23 | //#include 24 | 25 | // The purpose of these adaptors is to avoid having to include Eigen/Dense or Eigen/Core in any of the 26 | // header files of the native vector/matrix classes, which would slow down compilation. 27 | 28 | typedef Eigen::SparseMatrix EigenSparseMatrix; 29 | 30 | class EigenVectorXdAdaptor: public Eigen::VectorXd{ 31 | public: 32 | EigenVectorXdAdaptor(const Eigen::VectorXd& M): Eigen::VectorXd(M){} 33 | }; 34 | 35 | class EigenMatrixXdAdaptor: public Eigen::MatrixXd{ 36 | public: 37 | EigenMatrixXdAdaptor(const Eigen::MatrixXd& M): Eigen::MatrixXd(M){} 38 | }; 39 | 40 | 41 | #endif 42 | -------------------------------------------------------------------------------- /MLGkernel/matrices/GramMatrix.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include"GramMatrix.hpp" 19 | 20 | 21 | 22 | -------------------------------------------------------------------------------- /MLGkernel/matrices/GramMatrix.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #ifndef _GramMatrix 19 | #define _GramMatrix 20 | 21 | #include 22 | #include "MatrixX.hpp" 23 | 24 | template 25 | class GramMatrix: public MATRIX{ 26 | public: 27 | 28 | using class MATRIX::MATRIX; 29 | 30 | public: 31 | 32 | template 33 | GramMatrix(MatrixX& A) 34 | 35 | 36 | public: 37 | 38 | 39 | }; 40 | 41 | 42 | template 43 | template 44 | GramMatrix::GramMatrix(MatrixX& A): MATRIX(MATRIX::Zero(A.nrows,A.nrows)){ 45 | assert(A.nrows==A.ncols); // assumption: A is symmetric 46 | for(int i=0; inFilled>0.2*nrows){ 48 | for(int j=0; j<=i; j++){ 49 | (*this)(i,j)=A.column[i]->dot(*A.column[j]); 50 | (*this)(j,i)=(*this)(i,j); 51 | } 52 | }else{ 53 | unordered_map neighbors; 54 | A.column[i]->for_each([&A,&neighbors](int j, FIELD dummy){ 55 | A.column[j]->for_each([&neighbors](int k, FIELD dummy){neighbors.insert(k);}); 56 | }); 57 | for(auto j:neighbors){ 58 | (*this)(i,j)=A.column[i]->dot(*A.column[j]); 59 | (*this)(j,i)=(*this)(i,j); 60 | } 61 | } 62 | } 63 | 64 | } 65 | 66 | 67 | 68 | #endif 69 | -------------------------------------------------------------------------------- /MLGkernel/matrices/LapackInterface.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #ifndef _LapackInterface 19 | #define _LapackInterface 20 | 21 | #include 22 | #include 23 | 24 | // The purpose of these adaptors is to avoid having to include lapacke/include in any of the 25 | // header files of the native vector/matrix classes, which would slow down compilation. 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /MLGkernel/matrices/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR=.. 2 | include $(ROOTDIR)/Makefile.base 3 | 4 | CFLAGS+= -fPIC#-std=c++11 5 | INCLUDE= -I$(INCLUDEDIR) -I$(UTILITYDIR) -I$(FILETYPESDIR) #-I$(MATRIXDIR) 6 | OBJFILES= $(UTILITYDIR)/*.o $(FILETYPESDIR)/*.o ./*.o 7 | LIBS= -lstdc++ -lm 8 | LMACROS= 9 | 10 | ifneq (,$(findstring withEigen,$(MACROS))) 11 | INCLUDE+= -I$(EIGENDIR) 12 | LMACROS+=-D_withEigen 13 | else ifneq (,$(findstring withLapack,$(MACROS))) 14 | INCLUDE+= -I$(LAPACKDIR)/LAPACKE/include/ -I$(CBLASDIR)/include 15 | OBJFILES+= $(LAPACKDIR)/liblapacke.a $(LAPACKDIR)/liblapack.a $(BLASDIR)/blas_LINUX.a $(CBLASDIR)/lib/cblas_LINUX.a 16 | LIBS+= -L$(FORTRANDIR)/lib/ -lgfortran 17 | LMACROS+=-D_withLapack 18 | endif 19 | 20 | 21 | matrices.o: *.hpp *.cpp 22 | $(CC) -c matrices.cpp $(CFLAGS) $(INCLUDE) $(LMACROS) 23 | 24 | 25 | objects: matrices.o 26 | 27 | tests: 28 | 29 | all: objects 30 | 31 | clean: 32 | @rm -f *.o 33 | 34 | anew: clean all 35 | -------------------------------------------------------------------------------- /MLGkernel/matrices/Matrix.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include "Matrix.hpp" 19 | #include "Cmatrix.hpp" 20 | 21 | 22 | Cmatrix Matrix::dot(const Matrix& x) const{ 23 | return Cmatrix(0,0); 24 | } 25 | 26 | 27 | string Matrix::str(const Dense dummy) const{ 28 | ostringstream stream; 29 | stream.precision(3); 30 | stream.setf(ios_base::fixed, ios_base::floatfield); 31 | for(int i=0; iread(i,j)<<" ";} 33 | stream<<" ]\n";} 34 | return stream.str(); 35 | } 36 | 37 | 38 | string Matrix::str(const Sparse dummy) const{ 39 | ostringstream stream; 40 | for(int i=0; i 23 | 24 | class Matrix{ 25 | public: 26 | 27 | virtual ~Matrix(){} 28 | 29 | public: // constructors 30 | 31 | Matrix(const int _nrows, const int _ncols): nrows(_nrows), ncols(_ncols) {} 32 | 33 | 34 | public: // member access 35 | 36 | virtual FIELD& operator()(const int i, const int j)=0; 37 | virtual FIELD operator()(const int i, const int j) const=0; 38 | virtual FIELD read(const int i, const int j) const=0; 39 | virtual bool isFilled(const int i, const int j) const=0; 40 | virtual int nFilled() const=0; 41 | virtual bool isSparse() const=0; 42 | 43 | //virtual void (foreach)(std::function lambda)=0; 44 | //virtual void (foreach)(std::function lambda) const=0; 45 | virtual void foreach_in_column(const int j, std::function lambda)=0; 46 | virtual void foreach_in_column(const int j, std::function lambda) const=0; 47 | 48 | public: // scalar valued operations 49 | 50 | virtual int nnz() const=0; 51 | 52 | public: 53 | 54 | virtual Cmatrix dot(const Matrix& x) const; // {}; 55 | 56 | public: 57 | 58 | virtual void saveTo(MatrixOF& file) const=0; 59 | 60 | virtual string str(const Dense dummy) const; 61 | virtual string str(const Sparse dummy) const; 62 | virtual string str() const{return str(Dense());} 63 | 64 | 65 | 66 | public: 67 | 68 | int nrows; 69 | int ncols; 70 | 71 | }; 72 | 73 | 74 | ostream& operator<<(ostream& stream, const Matrix& x); 75 | 76 | 77 | 78 | class SparseMatrix: public Matrix{ 79 | public: 80 | using Matrix::Matrix; 81 | bool isSparse() const {return true;} 82 | }; 83 | 84 | 85 | 86 | class DenseMatrix: public Matrix{ 87 | public: 88 | using Matrix::Matrix; 89 | bool isFilled(const int i, const int j) const {return true;} 90 | int nFilled() const {return nrows*ncols;} 91 | bool isSparse() const {return false;} 92 | }; 93 | 94 | 95 | 96 | 97 | // virtual Matrix* newof()=0; 98 | 99 | //virtual Cmatrix Cmatrix() const=0; 100 | // virtual SparseMatrixX MatrixXv() const=0; 101 | // virtual SparseMatrixX MatrixXl() const=0; 102 | // virtual SparseMatrixX MatrixXh() const=0; 103 | 104 | 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /MLGkernel/matrices/MatrixX.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include "MatrixX.hpp" 19 | 20 | 21 | /* 22 | template<> 23 | void MatrixX::serialize(Bofstream& ofs) const{ 24 | ofs.tag("MatrixXv",0); 25 | ofs.write(nrows); 26 | ofs.write(ncols); 27 | for(int j=0; jserialize(ofs); 29 | }; 30 | 31 | template<> 32 | void MatrixX::serialize(Bofstream& ofs) const{ 33 | ofs.tag("MatrixXl",0); 34 | ofs.write(nrows); 35 | ofs.write(ncols); 36 | for(int j=0; jserialize(ofs); 38 | }; 39 | 40 | template<> 41 | void MatrixX::serialize(Bofstream& ofs) const{ 42 | ofs.tag("MatrixXh",0); 43 | ofs.write(nrows); 44 | ofs.write(ncols); 45 | for(int j=0; jserialize(ofs); 47 | }; 48 | */ 49 | 50 | /* 51 | template<> 52 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){ 53 | ifs.check("MatrixXv",0); 54 | ifs.read(nrows); 55 | ifs.read(ncols); 56 | for(int j=0; j 61 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){ 62 | ifs.check("MatrixXl",0); 63 | ifs.read(nrows); 64 | ifs.read(ncols); 65 | for(int j=0; j 70 | MatrixX::MatrixX(Bifstream& ifs):SparseMatrix(0,0){ 71 | ifs.check("MatrixXh",0); 72 | ifs.read(nrows); 73 | ifs.read(ncols); 74 | for(int j=0; j 20 | 21 | extern default_random_engine randomNumberGenerator; 22 | 23 | 24 | Remap::Remap(const Remap& x): n(x.n){ 25 | forward=new int[n]; for(int i=0; i distr(i+1,n-1); 58 | int j=distr(randomNumberGenerator); 59 | R.swap(i,j);} 60 | return R; 61 | } 62 | 63 | 64 | /* DEPRECATED 65 | Remap::Remap(const int _n, const Random random): Remap(_n){ 66 | for(int i=0; i distr(i+1,n-1); 68 | int j=distr(randomNumberGenerator); 69 | swap(i,j); 70 | } 71 | } 72 | */ 73 | 74 | 75 | string Remap::str() const{ 76 | ostringstream stream; 77 | for(int i=0; i "< 20 | 21 | 22 | string Vector::str(const Dense dummy) const{ 23 | ostringstream stream; 24 | for(int i=0; i 23 | 24 | class Vector{ //: public Serializable{ 25 | public: 26 | 27 | Vector(const int _n): n(_n){} 28 | 29 | public: 30 | 31 | virtual FIELD& operator()(const int n)=0; 32 | virtual FIELD operator()(const int n) const=0; 33 | virtual FIELD read(const int i) const {return (*this)(i);} 34 | 35 | //virtual void (foreach)(std::function lambda)=0; 36 | //virtual void (foreach)(std::function lambda) const=0; 37 | 38 | virtual bool isFilled (const int i)const =0; 39 | virtual int nFilled() const=0; 40 | 41 | public: 42 | 43 | virtual int nnz() const=0; 44 | 45 | virtual int argmax() const=0; 46 | virtual int argmax_abs() const=0; 47 | 48 | virtual FIELD norm2() const=0; 49 | // FIELD diff2(const VECTOR& x)=0; 50 | 51 | public: 52 | 53 | //virtual void serialize(Bofstream& ofs) const=0; 54 | //virtual void serialize(Rstream& rstream) const=0; 55 | 56 | virtual string str(const Dense dummy) const; 57 | virtual string str(const Sparse dummy) const; 58 | virtual string str() const{return str(Dense());}; 59 | 60 | public: 61 | 62 | int n; 63 | 64 | }; 65 | 66 | 67 | ostream& operator<<(ostream& stream, const Vector& x); 68 | 69 | 70 | 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /MLGkernel/matrices/Vectorh.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | 18 | #include "Vectorv.hpp" 19 | #include "Vectorl.hpp" 20 | #include "Vectorh.hpp" 21 | #include "Cvector.hpp" 22 | #include 23 | 24 | extern default_random_engine randomNumberGenerator; 25 | 26 | 27 | /* 28 | Vectorh::Vectorh(const int _n, const class Random& dummy): SparseVector(_n){ 29 | uniform_real_distribution distr(0,1); 30 | for(int i=0; i distr(0,1); 38 | for(int i=0; i<_n; i++) 39 | if(distr(randomNumberGenerator)<=p) v[i]=distr(randomNumberGenerator); 40 | return v; 41 | } 42 | 43 | 44 | 45 | Vectorh::Vectorh(const Cvector& x): SparseVector(x.n){ 46 | for(int i=0; i 23 | 24 | extern default_random_engine randomNumberGenerator; 25 | 26 | 27 | /* 28 | Vectorl::Vectorl(const int _n, const class Random& dummy): SparseVector(_n){ 29 | uniform_real_distribution distr(0,1); 30 | for(int i=0; i distr(0,1); 38 | for(int i=0; i<_n; i++) 39 | if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator))); 40 | return v; 41 | } 42 | 43 | 44 | Vectorl::Vectorl(const Cvector& x): SparseVector(x.n){ 45 | for(int i=0; i(x).sort(); for(auto& p:x) push_back(p);} 51 | 52 | 53 | Vectorl::Vectorl(const Vectorh& x): SparseVector(x.n){ 54 | for(auto& p:x) (*this)(p.first)=p.second;} 55 | 56 | 57 | Vectorl::Vectorl(const Vectorl& x, const class Remap& remap, const bool inverse): SparseVector(x.n){ 58 | if(!inverse) for(auto& p:x) (*this)(remap.forward[p.first])=p.second; 59 | else for(auto& p:x) (*this)(remap.backward[p.first])=p.second; 60 | } 61 | 62 | 63 | 64 | // ---- I/O ------------------------------------------------------------------------------------------------------ 65 | 66 | 67 | string Vectorl::classname(){return "Vectorl";} 68 | 69 | 70 | Vectorl::Vectorl(Bifstream& ifs): SparseVector(0){ 71 | ifs.check("Vectorl",0); 72 | ifs.read(n); 73 | ifs.read_list(*this); 74 | } 75 | 76 | 77 | void Vectorl::serialize(Bofstream& ofs) const{ 78 | ofs.tag("Vectorl",0); 79 | ofs.write(n); 80 | ofs.write_list(*this); 81 | } 82 | 83 | 84 | void Vectorl::serialize(Rstream& rstream) const{ 85 | rstream<<"Vectorl{"< 23 | 24 | extern default_random_engine randomNumberGenerator; 25 | 26 | 27 | /* 28 | Vectorv::Vectorv(const int _n, const class Random& dummy): SparseVector(_n), sorted(0){ 29 | uniform_real_distribution distr(0,1); 30 | for(int i=0; i distr(0,1); 39 | for(int i=0; i<_n; i++) 40 | if(distr(randomNumberGenerator)<=p) v.push_back(SVpair(i,distr(randomNumberGenerator))); 41 | return v; 42 | } 43 | 44 | 45 | // ---- Conversions ----------------------------------------------------------------------------------------------- 46 | 47 | 48 | Vectorv::Vectorv(const Cvector& x): SparseVector(x.n), sorted(true){ 49 | for(int i=0; i 21 | #include "pMMFbase.hpp" 22 | 23 | class LogStream{ 24 | public: 25 | 26 | //virtual void write(const string& s)=0; 27 | virtual LogStream& operator<<(const char* s)=0; 28 | virtual LogStream& operator<<(const string& s)=0; 29 | virtual LogStream& operator<<(const int& x)=0; 30 | virtual LogStream& operator<<(const double& x)=0; 31 | 32 | }; 33 | 34 | 35 | class Log{ 36 | public: 37 | 38 | Log(){startClock();} 39 | 40 | public: 41 | 42 | Log& operator<<(const string& s); 43 | Log& operator<<(const char* s); 44 | 45 | /* 46 | Log& skipline(const int n=1, const int v=0){ 47 | if(verbosity=n) return *this; 49 | for(int i=0; i=n) return *this; 57 | if(stream==nullptr) for(int i=0; i > time; 78 | 79 | chrono::time_point t; 80 | 81 | int verbosity=0; 82 | int skippedlines=0; 83 | 84 | LogStream* stream=nullptr; 85 | }; 86 | 87 | 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /MLGkernel/utility/Makefile: -------------------------------------------------------------------------------- 1 | ROOTDIR= .. 2 | include $(ROOTDIR)/Makefile.base 3 | 4 | CFLAGS+= -I $(INCLUDEDIR) -I $(UTILITYDIR) -I $(MATRIXDIR) -fPIC # -std=c++11 5 | 6 | 7 | Log.o: Log.hpp Log.cpp 8 | $(CC) -c Log.cpp $(CFLAGS) 9 | 10 | Rstream.o: Rstream.hpp Rstream.cpp 11 | $(CC) -c Rstream.cpp $(CFLAGS) 12 | 13 | Bofstream.o: Bofstream.hpp Bofstream.cpp 14 | $(CC) -c Bofstream.cpp $(CFLAGS) 15 | 16 | Bifstream.o: Bifstream.hpp Bifstream.cpp 17 | $(CC) -c Bifstream.cpp $(CFLAGS) 18 | 19 | Serializable.o: Serializable.hpp Serializable.cpp 20 | $(CC) -c Serializable.cpp $(CFLAGS) 21 | 22 | TopkList.o: TopkList.hpp TopkList.cpp 23 | $(CC) -c TopkList.cpp $(CFLAGS) 24 | 25 | ThreadBank.o: ThreadBank.hpp ThreadBank.cpp 26 | $(CC) -c ThreadBank.cpp $(CFLAGS) 27 | 28 | ThreadManager.o: ThreadManager.hpp ThreadManager.cpp ThreadBank.hpp 29 | $(CC) -c ThreadManager.cpp $(CFLAGS) 30 | 31 | #Graph.o: Graph.cpp Graph.hpp 32 | # $(CC) -c Graph.cpp $(CFLAGS) $(INCLUDE) 33 | 34 | filetypes.o: filetypes/*.hpp filetypes/*.cpp 35 | $(CC) -c filetypes/filetypes.cpp $(CFLAGS) $(INCLUDE) -I filetypes/ $(MACROS) 36 | 37 | 38 | objects: Log.o Rstream.o Bofstream.o Bifstream.o Serializable.o TopkList.o \ 39 | ThreadBank.o ThreadManager.o filetypes.o #Graph.o 40 | 41 | tests: 42 | 43 | all: objects tests 44 | 45 | clean: 46 | @rm -f *.o 47 | 48 | anew: clean all 49 | -------------------------------------------------------------------------------- /MLGkernel/utility/Rstream.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #include "Rstream.hpp" 18 | 19 | -------------------------------------------------------------------------------- /MLGkernel/utility/Rstream.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #ifndef _Rstream 18 | #define _Rstream 19 | 20 | #include "pMMFbase.hpp" 21 | 22 | 23 | class Rstream{ 24 | public: 25 | 26 | Rstream(ostream& _out, const int _depth=16):out(_out),indent(0),depth(_depth),bol(true){} 27 | 28 | ~Rstream(){out< 33 | Rstream& operator<<(const T& x){ 34 | //if(typeid(x)==typeid(Rstream::end)) {out< 52 | Rstream& write(const T& x){ 53 | if(depth<0){out< 62 | const Rstream& var(const char* name, const T& x){ 63 | if(bol) {for(int i=0; i 21 | #include "pMMFbase.hpp" 22 | #include "ThreadManager.hpp" 23 | 24 | using namespace std; 25 | 26 | 27 | //extern mutex cout_mutex; 28 | extern ThreadManager threadManager; 29 | 30 | 31 | class ThreadBank{ 32 | public: 33 | 34 | ThreadBank()=delete; 35 | 36 | ThreadBank(const int _maxthreads=1000, const int _maxprivileged=1): 37 | maxthreads(_maxthreads), maxprivileged(_maxprivileged), nthreads(0), nprivileged(0) {gate.lock();}; 38 | 39 | ~ThreadBank(){for(auto& th:threads) th.join();} 40 | 41 | 42 | public: 43 | 44 | template 45 | void add(FUNCTION lambda, const OBJ x){ 46 | lock_guard lock(mx); // unnecessary if called from a single thread 47 | threadManager.enqueue(this); 48 | gate.lock(); // gate can only be unlocked by threadManager 49 | nthreads++; 50 | threads.push_back(thread([this,lambda](OBJ _x){lambda(_x); nthreads--; threadManager.release(this);},x)); 51 | #ifdef _THREADBANKVERBOSE 52 | printinfo(); 53 | #endif 54 | } 55 | 56 | 57 | template 58 | void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2){ 59 | lock_guard lock(mx); 60 | threadManager.enqueue(this); 61 | gate.lock(); 62 | nthreads++; 63 | threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2){ 64 | lambda(_x1,_x2); nthreads--; threadManager.release(this);},x1,x2)); 65 | #ifdef _THREADBANKVERBOSE 66 | printinfo(); 67 | #endif 68 | } 69 | 70 | 71 | template 72 | void add(FUNCTION lambda, const OBJ1 x1, const OBJ2 x2, const OBJ3 x3){ 73 | lock_guard lock(mx); 74 | threadManager.enqueue(this); 75 | gate.lock(); 76 | nthreads++; 77 | threads.push_back(thread([this,lambda](OBJ1 _x1, OBJ2 _x2, OBJ3 _x3){ 78 | lambda(_x1,_x2,_x3); nthreads--; threadManager.release(this);},x1,x2,x3)); 79 | #ifdef _THREADBANKVERBOSE 80 | printinfo(); 81 | #endif 82 | } 83 | 84 | 85 | bool is_ready(){return nthreads nthreads; 100 | int nprivileged=0; // only to be touched by threadManager 101 | int maxthreads=4; 102 | int maxprivileged=1; 103 | 104 | vector threads; 105 | 106 | }; 107 | 108 | 109 | 110 | 111 | 112 | #endif 113 | -------------------------------------------------------------------------------- /MLGkernel/utility/ThreadManager.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #include "ThreadManager.hpp" 18 | #include "ThreadBank.hpp" 19 | 20 | 21 | void ThreadManager::enqueue(ThreadBank* bank){ 22 | lock_guard lock(mx); 23 | if(is_runnable(bank)) launch(bank); 24 | else queue.push_back(bank); 25 | } 26 | 27 | 28 | void ThreadManager::release(ThreadBank* bank){ 29 | lock_guard lock(mx); 30 | if(bank->nprivileged>0) bank->nprivileged--; 31 | else nthreads--; 32 | for(auto it=queue.begin(); it!=queue.end(); it++) 33 | if(is_runnable(*it)){ 34 | launch(*it); 35 | it=queue.erase(it); 36 | } 37 | // auto it=find_if(queue.begin(),queue.end(),[this](ThreadBank* bank){return is_runnable(bank);}); 38 | // if(it==queue.end()) return; 39 | // ThreadBank* bank=*it; 40 | // queue.erase(it); 41 | // launch(bank); 42 | } 43 | 44 | 45 | bool ThreadManager::is_runnable(ThreadBank* bank){ 46 | return bank->is_ready() && (bank->nprivilegedmaxprivileged || nthreadsnprivilegedmaxprivileged) bank->nprivileged++; 52 | else nthreads++; 53 | bank->gate.unlock(); 54 | } 55 | 56 | 57 | /* 58 | void addBank(const ThreadBank* bank){ 59 | lock_guard lock(mx); 60 | banks.push_front(bank); 61 | } 62 | 63 | void removeBank(const ThreadBank* bank){ 64 | lock_guard lock(mx); 65 | banks.remove(bank); 66 | } 67 | */ 68 | -------------------------------------------------------------------------------- /MLGkernel/utility/ThreadManager.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #ifndef _ThreadManager 18 | #define _ThreadManager 19 | 20 | #include 21 | #include 22 | #include 23 | 24 | class ThreadBank; 25 | 26 | using namespace std; 27 | 28 | 29 | class ThreadManager{ 30 | public: 31 | 32 | ThreadManager(const int _maxthreads):maxthreads(_maxthreads),nthreads(0){} 33 | ~ThreadManager(){} 34 | 35 | public: 36 | 37 | void enqueue(ThreadBank* bank); 38 | void release(ThreadBank* bank); 39 | 40 | int get_nthreads(){lock_guard lock(mx); return nthreads;} 41 | 42 | private: 43 | 44 | bool is_runnable(ThreadBank* bank); 45 | void launch(ThreadBank* bank); 46 | 47 | public: 48 | 49 | int maxthreads; 50 | 51 | private: 52 | 53 | mutex mx; 54 | int nthreads; 55 | list queue; 56 | 57 | }; 58 | 59 | 60 | #endif 61 | -------------------------------------------------------------------------------- /MLGkernel/utility/TopkList.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #include "TopkList.hpp" 18 | 19 | -------------------------------------------------------------------------------- /MLGkernel/utility/TopkList.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | 17 | #ifndef _TopkList 18 | #define _TopkList 19 | 20 | #include 21 | #include "pMMFbase.hpp" 22 | //#include "DenseVector.hpp" 23 | 24 | struct TopkListPair{ 25 | TopkListPair(const INDEX& _first, const FIELD& _second):first(_first),second(_second){}; 26 | INDEX first; 27 | FIELD second; 28 | }; 29 | 30 | 31 | class TopkList: public list{ 32 | public: 33 | 34 | TopkList(const int _k): k(_k), lowestv(numeric_limits::lowest()){} 35 | 36 | // TopkList(const DenseVector& v, const int _k): k(_k), lowestv(-10000){ 37 | // for(int i=0; ilowestv) insert(i,v(i));} 38 | 39 | public: 40 | 41 | void insert(int index, FIELD value){ 42 | auto it=begin(); 43 | while(it!=end() && it->second>=value){it++;} 44 | list::insert(it,TopkListPair(index,value)); 45 | if(size()>k) pop_back(); 46 | if(size()>=k) lowestv=back().second; 47 | } 48 | 49 | void consider(int index, FIELD value){ 50 | if(value>lowestv || size()<second>=value){it++;} 53 | list::insert(it,TopkListPair(index,value)); 54 | if(size()>k) pop_back(); 55 | if(size()>=k) lowestv=back().second; 56 | } 57 | } 58 | 59 | IndexSet indices() const{ 60 | IndexSet I(size()); int i=0; 61 | for(auto& p:*this) I[i++]=p.first; 62 | return I; 63 | } 64 | 65 | 66 | public: 67 | 68 | int k; 69 | FIELD lowestv; 70 | int lowestp; 71 | 72 | }; 73 | 74 | 75 | 76 | #endif 77 | 78 | 79 | /* 80 | // vector version 81 | void insert(int index, FIELD value){ 82 | if(size() 21 | 22 | class MatrixIF{ 23 | public: 24 | 25 | ~MatrixIF(){ifs.close();} 26 | 27 | public: 28 | 29 | virtual void rewind(){} 30 | 31 | virtual MatrixIF& operator>>(FIELD& v){ 32 | cout<<"Error: operator>>(FIELD& ) not supported in sparse matrix input files."<>(IndexValueTriple& t){ 36 | cout<<"Error: operator>>(IndexValueTriple& ) not supported in dense matrix input files."<0); 43 | // cout<<"Line length="<>b; ncols++;} 47 | nrows=0; while(ifs.good()) {for(int i=0; i>b; nrows++;} 48 | // cout<>(IndexValueTriple& dest){ 59 | dest.i=i; dest.j=j; 60 | if(++j>=ncols) { j=0; i++; } 61 | if(ifs.good() && i<=nrows) ifs>>dest.value; else {dest.i=-1; return *this;} 62 | return *this; 63 | } 64 | 65 | MatrixIF& operator>>(FIELD& dest){ 66 | if(++j>=ncols) { j=0; i++; } 67 | if(ifs.good() && i<=nrows) ifs>>dest; 68 | return *this; 69 | } 70 | 71 | 72 | public: 73 | 74 | int i; 75 | int j; 76 | bool eof; 77 | 78 | }; 79 | 80 | 81 | 82 | class MatrixIF_ASCII::Sparse: public MatrixIF_ASCII{ 83 | public: 84 | 85 | Sparse(const string filename){ 86 | sparse=1; 87 | ifs.open(filename); 88 | char buffer[255]; 89 | ifs.get(buffer,255); 90 | ifs.close(); 91 | 92 | ifs.open(filename); 93 | int nextracted=0; 94 | while(ifs.good() && ifs.tellg()>b; if(!ifs.fail()) nextracted++;} 95 | if(nextracted==2){ifs.close(); ifs.open(filename); ifs>>nrows>>ncols; return;} 96 | if(nextracted==3){ 97 | ifs.close(); ifs.open(filename); 98 | nrows=0; ncols=0; 99 | int a; int b; float f; 100 | while(ifs.good()){ 101 | ifs>>a>>b>>f; 102 | if(a>nrows-1) nrows=a+1; 103 | if(b>ncols-1) ncols=b+1; 104 | } 105 | ifs.close(); ifs.open(filename); 106 | return; 107 | } 108 | cout<<"Error: could not parse first line"<>(IndexValueTriple& dest){ 115 | if(!ifs.good()){dest.i=-1; return *this;} 116 | ifs>>dest.i>>dest.j>>dest.value; return *this; 117 | } 118 | 119 | public: 120 | 121 | }; 122 | 123 | 124 | #endif 125 | -------------------------------------------------------------------------------- /MLGkernel/utility/filetypes/MatrixIF_Boeing.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | #include "MatrixIF_Boeing.hpp" 17 | -------------------------------------------------------------------------------- /MLGkernel/utility/filetypes/MatrixIF_Matlab.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | #include "MatrixIF_Matlab.hpp" 17 | 18 | -------------------------------------------------------------------------------- /MLGkernel/utility/filetypes/MatrixIF_Matlab.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | #ifndef _MatrixIF_Matlab 17 | #define _MatrixIF_Matlab 18 | 19 | #include "MatrixIF.hpp" 20 | #include 21 | 22 | 23 | class MatrixIF_Matlab: public MatrixIF{ 24 | public: 25 | 26 | class Dense; 27 | class Sparse; 28 | 29 | }; 30 | 31 | 32 | 33 | class MatrixIF_Matlab::Dense: public MatrixIF_Matlab{ 34 | public: 35 | 36 | Dense(const string filename){ 37 | sparse=0; 38 | matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY); 39 | if(matfile==NULL){cout<<"Error: file cannot be opened"<dims[0]; 43 | ncols=matvar->dims[1]; 44 | next=reinterpret_cast(matvar->data); 45 | 46 | //need to swap to 'transpose' the array since matIO reads it in column major order 47 | /*int n = 5; //sqrt(nrows*ncols); 48 | cout<<"size"<>(FIELD& dest){ 70 | dest = *next; 71 | next++; 72 | return *this; 73 | } 74 | 75 | public: 76 | 77 | mat_t* matfile; 78 | matvar_t* matvar; 79 | double *next; 80 | 81 | }; 82 | 83 | 84 | 85 | class MatrixIF_Matlab::Sparse: public MatrixIF_Matlab{ 86 | public: 87 | 88 | Sparse(const string filename){ 89 | sparse=1; 90 | matfile=Mat_Open(filename.c_str(),MAT_ACC_RDONLY); 91 | if(matfile==NULL){cout<<"Error: file "<class_type == MAT_C_SPARSE){ 96 | sparse = (mat_sparse_t*)matvar->data; 97 | } 98 | nrows=matvar->dims[0]; 99 | ncols=matvar->dims[1]; 100 | next=reinterpret_cast(sparse->data); 101 | //Mat_VarPrint(matvar,1); 102 | //cout<<"printed"<jc; 104 | Ir = sparse->ir; 105 | njc= sparse->njc; 106 | ndata=sparse->ndata; 107 | cout<<"read"<>(IndexValueTriple& dest){ 114 | int i = indIr; int j = indJc; int c = 0; 115 | for (; i < njc-1; i++ ) { 116 | c= 0; 117 | for (; j0){ 124 | break; 125 | } 126 | indIr = i; 127 | } 128 | dest.value = *next++; 129 | if(!dest.value || i>=njc-1){dest.i=-1; return *this;} 130 | return *this; 131 | } 132 | 133 | 134 | public: 135 | 136 | mat_t* matfile; 137 | matvar_t* matvar; 138 | double *next; 139 | 140 | int indIr= 0; //sparse->ir; 141 | int indJc =0; 142 | int* Ir; 143 | int* Jc; //sparse->jc; 144 | int njc; 145 | int ndata; 146 | 147 | }; 148 | 149 | 150 | #endif 151 | -------------------------------------------------------------------------------- /MLGkernel/utility/filetypes/MatrixOF.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ----------------------------------------------------------------------------- 3 | 4 | MLGkernel is an open source implementation of the Multiscale Laplacian Graph 5 | Kernel for computing the gram matrix of a collection of graphs. 6 | 7 | 8 | Copyright (C) 2016 Imre Risi Kondor, Horace Pan 9 | Copyright (C) 2016 Imre Risi Kondor, Nedelina Teneva, Pramod K Mudrakarta 10 | 11 | 12 | The following code is a derivative work of the code from the pMMF library(https://github.com/risi-kondor/pMMF) 13 | which is licensed under the GNU Public License, version 3. 14 | This code therefore is also licensed under the terms of the GNU Public License, version 3. 15 | ----------------------------------------------------------------------------- */ 16 | #ifndef _MatrixOF 17 | #define _MatrixOF 18 | 19 | #include "pMMFbase.hpp" 20 | #include 21 | 22 | class MatrixOF{ 23 | public: 24 | 25 | //MatrixOF(const char* filename, const int _nrows, const int _ncols): 26 | // nrows(_nrows), ncols(_ncols), ofs(filename){} 27 | 28 | ~MatrixOF(){ofs.close();} 29 | 30 | public: 31 | 32 | virtual MatrixOF& operator<<(const FIELD& v){ 33 | cout<<"Error: operator<<(FIELD& ) not supported in sparse matrix output files."< 21 | 22 | class MatrixOF_Boeing: public MatrixOF{ 23 | public: 24 | 25 | MatrixOF_Boeing(const string filename, const int _nrows, const int _ncols){ 26 | nrows=_nrows; ncols=_ncols; sparse=1; 27 | 28 | // Write row indices, column pointers, values in separate files and concatenate 29 | rowfs.open("row.temp"); 30 | colfs.open("col.temp"); 31 | valfs.open("val.temp"); 32 | 33 | nnz=0; 34 | nnz_in_col=0; 35 | rowchars=colchars=valchars=0; 36 | colptr_val=1; 37 | 38 | colfs<t.j) return *this; // write only the upper triangular part 80 | if(t.j==current_col+1) { // no more nonzeros in previous column 81 | if (colchars+1+std::to_string(colptr_val+nnz_in_col).length() > 80) { 82 | colfs< 80) { 96 | rowfs< 80) { 103 | valfs<= num_sample else to_neigh for to_neigh in to_neighs] 43 | else: 44 | samp_neighs = to_neighs 45 | 46 | if self.gcn: 47 | samp_neighs = [samp_neigh + set([nodes[i]]) for i, samp_neigh in enumerate(samp_neighs)] 48 | unique_nodes_list = list(set.union(*samp_neighs)) 49 | unique_nodes = {n:i for i,n in enumerate(unique_nodes_list)} 50 | mask = Variable(torch.zeros(len(samp_neighs), len(unique_nodes))) 51 | column_indices = [unique_nodes[n] for samp_neigh in samp_neighs for n in samp_neigh] 52 | row_indices = [i for i in range(len(samp_neighs)) for j in range(len(samp_neighs[i]))] 53 | mask[row_indices, column_indices] = 1 54 | if self.cuda: 55 | mask = mask.cuda() 56 | num_neigh = mask.sum(1, keepdim=True) 57 | mask = mask.div(num_neigh) 58 | if self.cuda: 59 | embed_matrix = self.features(torch.LongTensor(unique_nodes_list).cuda()) 60 | else: 61 | embed_matrix = self.features(torch.LongTensor(unique_nodes_list)) 62 | to_feats = mask.mm(embed_matrix) 63 | return to_feats 64 | -------------------------------------------------------------------------------- /diffpool/cross_val.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import torch 4 | 5 | import pickle 6 | import random 7 | 8 | from graph_sampler import GraphSampler 9 | 10 | def prepare_val_data(graphs, args, val_idx, max_nodes=0): 11 | 12 | random.shuffle(graphs) 13 | val_size = len(graphs) // 10 14 | train_graphs = graphs[:val_idx * val_size] 15 | if val_idx < 9: 16 | train_graphs = train_graphs + graphs[(val_idx+1) * val_size :] 17 | val_graphs = graphs[val_idx*val_size: (val_idx+1)*val_size] 18 | print('Num training graphs: ', len(train_graphs), 19 | '; Num validation graphs: ', len(val_graphs)) 20 | 21 | print('Number of graphs: ', len(graphs)) 22 | print('Number of edges: ', sum([G.number_of_edges() for G in graphs])) 23 | print('Max, avg, std of graph size: ', 24 | max([G.number_of_nodes() for G in graphs]), ', ' 25 | "{0:.2f}".format(np.mean([G.number_of_nodes() for G in graphs])), ', ' 26 | "{0:.2f}".format(np.std([G.number_of_nodes() for G in graphs]))) 27 | 28 | # minibatch 29 | dataset_sampler = GraphSampler(train_graphs, normalize=False, max_num_nodes=max_nodes, 30 | features=args.feature_type) 31 | train_dataset_loader = torch.utils.data.DataLoader( 32 | dataset_sampler, 33 | batch_size=args.batch_size, 34 | shuffle=True, 35 | num_workers=args.num_workers) 36 | 37 | dataset_sampler = GraphSampler(val_graphs, normalize=False, max_num_nodes=max_nodes, 38 | features=args.feature_type) 39 | val_dataset_loader = torch.utils.data.DataLoader( 40 | dataset_sampler, 41 | batch_size=args.batch_size, 42 | shuffle=False, 43 | num_workers=args.num_workers) 44 | 45 | return train_dataset_loader, val_dataset_loader, \ 46 | dataset_sampler.max_num_nodes, dataset_sampler.feat_dim, dataset_sampler.assign_feat_dim 47 | 48 | -------------------------------------------------------------------------------- /diffpool/gen/feat.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import networkx as nx 3 | import numpy as np 4 | import random 5 | 6 | class FeatureGen(metaclass=abc.ABCMeta): 7 | @abc.abstractmethod 8 | def gen_node_features(self, G): 9 | pass 10 | 11 | class ConstFeatureGen(FeatureGen): 12 | def __init__(self, val): 13 | self.val = val 14 | 15 | def gen_node_features(self, G): 16 | feat_dict = {i:{'feat': self.val} for i in G.nodes()} 17 | nx.set_node_attributes(G, feat_dict) 18 | 19 | class GaussianFeatureGen(FeatureGen): 20 | def __init__(self, mu, sigma): 21 | self.mu = mu 22 | self.sigma = sigma 23 | 24 | def gen_node_features(self, G): 25 | feat = np.random.multivariate_normal(mu, sigma, G.number_of_nodes()) 26 | feat_dict = {i:{'feat': feat[i]} for i in range(feat.shape[0])} 27 | nx.set_node_attributes(G, feat_dict) 28 | 29 | -------------------------------------------------------------------------------- /diffpool/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | # grid search for ENZYMES 3 | for method in 'base-set2set' 'base' 4 | do 5 | for gc in 4 8 16 2 6 | do 7 | python -m train --datadir=../data --bmname=ENZYMES --cuda=0 --max-nodes 1000 --epochs=100 --num-classes=3 --output-dim 512 --lr 0.001 --num-gc-layers $gc --method $method 8 | done 9 | done 10 | 11 | # DD 12 | #python -m train --datadir=data --bmname=DD --cuda=0 --max-nodes=500 --epochs=1000 13 | -------------------------------------------------------------------------------- /diffpool/graph_embedding.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /diffpool/graphsage.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | import numpy as np 6 | 7 | class SupervisedGraphSage(nn.Module): 8 | ''' GraphSage embeddings 9 | ''' 10 | 11 | def __init__(self, num_classes, enc): 12 | super(SupervisedGraphSage, self).__init__() 13 | self.enc = enc 14 | self.xent = nn.CrossEntropyLoss() 15 | 16 | self.weight = nn.Parameter(torch.FloatTensor(enc.embed_dim, num_classes)) 17 | init.xavier_uniform(self.weight) 18 | 19 | def forward(self, nodes): 20 | embeds = self.enc(nodes) 21 | scores = embeds.mm(self.weight) 22 | return scores 23 | 24 | def loss(self, nodes, labels): 25 | scores = self.forward(nodes) 26 | return self.xent(nn.softmax(scores), labels.squeeze()) 27 | 28 | -------------------------------------------------------------------------------- /diffpool/load_data.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import scipy as sc 4 | import os 5 | import re 6 | 7 | def read_graphfile(datadir, dataname, max_nodes=None): 8 | ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets 9 | graph index starts with 1 in file 10 | 11 | Returns: 12 | List of networkx objects with graph and node labels 13 | ''' 14 | prefix = os.path.join(datadir, dataname, dataname) 15 | filename_graph_indic = prefix + '_graph_indicator.txt' 16 | # index of graphs that a given node belongs to 17 | graph_indic={} 18 | with open(filename_graph_indic) as f: 19 | i=1 20 | for line in f: 21 | line=line.strip("\n") 22 | graph_indic[i]=int(line) 23 | i+=1 24 | 25 | filename_nodes=prefix + '_node_labels.txt' 26 | node_labels=[] 27 | try: 28 | with open(filename_nodes) as f: 29 | for line in f: 30 | line=line.strip("\n") 31 | node_labels+=[int(line) - 1] 32 | num_unique_node_labels = max(node_labels) + 1 33 | except IOError: 34 | print('No node labels') 35 | 36 | filename_node_attrs=prefix + '_node_attributes.txt' 37 | node_attrs=[] 38 | try: 39 | with open(filename_node_attrs) as f: 40 | for line in f: 41 | line = line.strip("\s\n") 42 | attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == ''] 43 | node_attrs.append(np.array(attrs)) 44 | except IOError: 45 | print('No node attributes') 46 | 47 | label_has_zero = False 48 | filename_graphs=prefix + '_graph_labels.txt' 49 | graph_labels=[] 50 | with open(filename_graphs) as f: 51 | for line in f: 52 | line=line.strip("\n") 53 | val = int(line) 54 | if val == 0: 55 | label_has_zero = True 56 | graph_labels.append(val - 1) 57 | graph_labels = np.array(graph_labels) 58 | if label_has_zero: 59 | graph_labels += 1 60 | 61 | filename_adj=prefix + '_A.txt' 62 | adj_list={i:[] for i in range(1,len(graph_labels)+1)} 63 | index_graph={i:[] for i in range(1,len(graph_labels)+1)} 64 | num_edges = 0 65 | with open(filename_adj) as f: 66 | for line in f: 67 | line=line.strip("\n").split(",") 68 | e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" "))) 69 | adj_list[graph_indic[e0]].append((e0,e1)) 70 | index_graph[graph_indic[e0]]+=[e0,e1] 71 | num_edges += 1 72 | for k in index_graph.keys(): 73 | index_graph[k]=[u-1 for u in set(index_graph[k])] 74 | 75 | graphs=[] 76 | for i in range(1,1+len(adj_list)): 77 | # indexed from 1 here 78 | G=nx.from_edgelist(adj_list[i]) 79 | if max_nodes is not None and G.number_of_nodes() > max_nodes: 80 | continue 81 | 82 | # add features and labels 83 | G.graph['label'] = graph_labels[i-1] 84 | for u in G.nodes(): 85 | if len(node_labels) > 0: 86 | node_label_one_hot = [0] * num_unique_node_labels 87 | node_label = node_labels[u-1] 88 | node_label_one_hot[node_label] = 1 89 | G.node[u]['label'] = node_label_one_hot 90 | if len(node_attrs) > 0: 91 | G.node[u]['feat'] = node_attrs[u-1] 92 | if len(node_attrs) > 0: 93 | G.graph['feat_dim'] = node_attrs[0].shape[0] 94 | 95 | # relabeling 96 | mapping={} 97 | it=0 98 | if float(nx.__version__)<2.0: 99 | for n in G.nodes(): 100 | mapping[n]=it 101 | it+=1 102 | else: 103 | for n in G.nodes: 104 | mapping[n]=it 105 | it+=1 106 | 107 | # indexed from 0 108 | graphs.append(nx.relabel_nodes(G, mapping)) 109 | return graphs 110 | 111 | -------------------------------------------------------------------------------- /diffpool/partition.py: -------------------------------------------------------------------------------- 1 | import networkx 2 | import numpy as np 3 | 4 | def partition(embeddings): 5 | ''' Compute a partition of embeddings, where each partition is pooled together. 6 | Args: 7 | embeddings: N-by-D matrix, where N is the number of node embeddings, and D 8 | is the embedding dimension. 9 | ''' 10 | dist = np.dot(embeddings) 11 | 12 | def kruskal(adj): 13 | # initialize MST 14 | MST = set() 15 | edges = set() 16 | num_nodes = adj.shape[0] 17 | # collect all edges from graph G 18 | for j in range(num_nodes): 19 | for k in range(num_nodes): 20 | if G.graph[j][k] != 0 and (k, j) not in edges: 21 | edges.add((j, k)) 22 | # sort all edges in graph G by weights from smallest to largest 23 | sorted_edges = sorted(edges, key=lambda e:G.graph[e[0]][e[1]]) 24 | uf = UF(G.vertices) 25 | for e in sorted_edges: 26 | u, v = e 27 | # if u, v already connected, abort this edge 28 | if uf.connected(u, v): 29 | continue 30 | # if not, connect them and add this edge to the MST 31 | uf.union(u, v) 32 | MST.add(e) 33 | return MST 34 | 35 | -------------------------------------------------------------------------------- /diffpool/set2set.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import init 4 | import torch.nn.functional as F 5 | 6 | import numpy as np 7 | 8 | class Set2Set(nn.Module): 9 | def __init__(self, input_dim, hidden_dim, act_fn=nn.ReLU, num_layers=1): 10 | ''' 11 | Args: 12 | input_dim: input dim of Set2Set. 13 | hidden_dim: the dim of set representation, which is also the INPUT dimension of 14 | the LSTM in Set2Set. 15 | This is a concatenation of weighted sum of embedding (dim input_dim), and the LSTM 16 | hidden/output (dim: self.lstm_output_dim). 17 | ''' 18 | super(Set2Set, self).__init__() 19 | self.input_dim = input_dim 20 | self.hidden_dim = hidden_dim 21 | self.num_layers = num_layers 22 | if hidden_dim <= input_dim: 23 | print('ERROR: Set2Set output_dim should be larger than input_dim') 24 | # the hidden is a concatenation of weighted sum of embedding and LSTM output 25 | self.lstm_output_dim = hidden_dim - input_dim 26 | self.lstm = nn.LSTM(hidden_dim, input_dim, num_layers=num_layers, batch_first=True) 27 | 28 | # convert back to dim of input_dim 29 | self.pred = nn.Linear(hidden_dim, input_dim) 30 | self.act = act_fn() 31 | 32 | def forward(self, embedding): 33 | ''' 34 | Args: 35 | embedding: [batch_size x n x d] embedding matrix 36 | Returns: 37 | aggregated: [batch_size x d] vector representation of all embeddings 38 | ''' 39 | batch_size = embedding.size()[0] 40 | n = embedding.size()[1] 41 | 42 | hidden = (torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda(), 43 | torch.zeros(self.num_layers, batch_size, self.lstm_output_dim).cuda()) 44 | 45 | q_star = torch.zeros(batch_size, 1, self.hidden_dim).cuda() 46 | for i in range(n): 47 | # q: batch_size x 1 x input_dim 48 | q, hidden = self.lstm(q_star, hidden) 49 | # e: batch_size x n x 1 50 | e = embedding @ torch.transpose(q, 1, 2) 51 | a = nn.Softmax(dim=1)(e) 52 | r = torch.sum(a * embedding, dim=1, keepdim=True) 53 | q_star = torch.cat((q, r), dim=2) 54 | q_star = torch.squeeze(q_star, dim=1) 55 | out = self.act(self.pred(q_star)) 56 | 57 | return out 58 | -------------------------------------------------------------------------------- /diffpool/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from glob import glob 3 | import numpy as np 4 | import sys 5 | """ 6 | files = glob(f'{sys.argv[1]}/*/*') 7 | 8 | for f in files: 9 | print(f) 10 | accs100 = [] 11 | accs200 = [] 12 | accs500 = [] 13 | accs1000 = [] 14 | cnt = 0 15 | for event in tf.train.summary_iterator(f): 16 | 17 | for value in event.summary.value: 18 | 19 | if value.tag == 'acc/val_acc': 20 | val_acc = value.simple_value 21 | else: 22 | continue 23 | 24 | 25 | cnt += 1 26 | if (cnt-100) % 1000 == 0: 27 | accs100.append(val_acc) 28 | 29 | print(cnt) 30 | 31 | print(np.mean(accs100), np.std(accs100)) 32 | 33 | # print(value.tag) 34 | # if value.HasField('simple_value'): 35 | # print(value.simple_value) 36 | """ 37 | 38 | if __name__ == '__main__': 39 | # DS = sys.argv[1] 40 | 41 | df = pd.read_csv('log_') 42 | # df = df[df.DS == DS] 43 | gcs = df.gc.unique() 44 | types = df.method.unique() 45 | for gc in gcs: 46 | for tpe in types: 47 | tmpdf = df[(df.gc == gc) & (df.method == tpe)] 48 | for i in range(10, 110, 10): 49 | print(gc, tpe, i, tmpdf[str(i)+'-mean'].mean(), tmpdf[str(i) + '-mean'].std()) 50 | -------------------------------------------------------------------------------- /graph2vec_tf/README.md: -------------------------------------------------------------------------------- 1 | # graph2vec 2 | 3 | This repository contains the "tensorflow" implementation of our paper "graph2vec: Learning distributed representations of graphs". 4 | The paper could be found at: https://arxiv.org/pdf/1707.05005.pdf 5 | 6 | 7 | #### Dependencies 8 | This code is developed in python 2.7. It is ran and tested on Ubuntu 16.04. 9 | It uses the following python packages: 10 | 1. tensorflow (version == 1.4.0) 11 | 2. networkx (version <= 2.0) 12 | 4. scikit-learn (+scipy, +numpy) 13 | 14 | ##### The procedure for setting up graph2vec is as follows: 15 | 1. git clone the repository (command: git clone https://github.com/MLDroid/graph2vec_tf.git ) 16 | 2. untar the data.tar.gz tarball 17 | 18 | ##### The procedure for obtaining rooted graph vectors using graph2vec and performing graph classification is as follows: 19 | 1. move to the folder "src" (command: cd src) (also make sure that kdd 2015 paper's (Deep Graph Kernels) datasets are available in '../data/kdd_datasets/dir_graphs/') 20 | 2. run main.py --corpus --class_labels_file_name file to: 21 | *Generate the weisfeiler-lehman kernel's rooted subgraphs from all the graphs 22 | *Train skipgram model to learn graph embeddings. The same will be dumped in ../embeddings/ folder 23 | *Perform graph classification using the graph embeddings generated in the above step 24 | 3. example: 25 | *python main.py --corpus ../data/kdd_datasets/mutag --class_labels_file_name ../data/kdd_datasets/mutag.Labels 26 | *python main.py --corpus ../data/kdd_datasets/proteins --class_labels_file_name ../data/kdd_datasets/proteins.Labels --batch_size 16 --embedding_size 128 --num_negsample 5 27 | 28 | 29 | #### Other command line args: 30 | optional arguments: 31 | -h, --help show this help message and exit 32 | -c CORPUS, --corpus CORPUS 33 | Path to directory containing graph files to be used 34 | for graph classification or clustering 35 | -l CLASS_LABELS_FILE_NAME, --class_labels_file_name CLASS_LABELS_FILE_NAME 36 | File name containg the name of the sample and the 37 | class labels 38 | -o OUTPUT_DIR, --output_dir OUTPUT_DIR 39 | Path to directory for storing output embeddings 40 | -b BATCH_SIZE, --batch_size BATCH_SIZE 41 | Number of samples per training batch 42 | -e EPOCHS, --epochs EPOCHS 43 | Number of iterations the whole dataset of graphs is 44 | traversed 45 | -d EMBEDDING_SIZE, --embedding_size EMBEDDING_SIZE 46 | Intended graph embedding size to be learnt 47 | -neg NUM_NEGSAMPLE, --num_negsample NUM_NEGSAMPLE 48 | Number of negative samples to be used for training 49 | -lr LEARNING_RATE, --learning_rate LEARNING_RATE 50 | Learning rate to optimize the loss function 51 | 52 | --wlk_h WLK_H Height of WL kernel (i.e., degree of rooted subgraph 53 | features to be considered for representation learning) 54 | -lf LABEL_FILED_NAME, --label_filed_name LABEL_FILED_NAME 55 | Label field to be used for coloring nodes in graphs 56 | using WL kenrel 57 | 58 | ## Contact ## 59 | In case of queries, please email: annamala002@e.ntu.edu.sg OR XZHANG048@e.ntu.edu.sg 60 | 61 | #### Reference 62 | 63 | Please consider citing the follow paper when you use this code. 64 | @article{narayanangraph2vec, 65 | title={graph2vec: Learning distributed representations of graphs}, 66 | author={Narayanan, Annamalai and Chandramohan, Mahinthan and Venkatesan, Rajasekar and Chen, Lihui and Liu, Yang} 67 | } 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /graph2vec_tf/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sunfanyunn/graph-classification/0d9b60102b6456cdc0607b43c8852d860b2f53c0/graph2vec_tf/__init__.py -------------------------------------------------------------------------------- /graph2vec_tf/classify.py: -------------------------------------------------------------------------------- 1 | import time,json 2 | from utils import get_files 3 | from sklearn.feature_extraction.text import CountVectorizer 4 | from sklearn.preprocessing import Normalizer 5 | from sklearn.model_selection import train_test_split 6 | from sklearn.metrics import accuracy_score, classification_report 7 | from sklearn.svm import SVC,LinearSVC 8 | from random import randint 9 | import numpy as np 10 | import logging 11 | from sklearn.model_selection import GridSearchCV 12 | 13 | from utils import get_class_labels 14 | 15 | import os 16 | logger = logging.getLogger() 17 | logger.setLevel("INFO") 18 | 19 | def subgraph2vec_tokenizer (s): 20 | ''' 21 | Tokenize the string from subgraph2vec sentence (i.e. ...). Just target is to be used 22 | and context strings to be ignored. 23 | :param s: context of graph2vec file. 24 | :return: List of targets from graph2vec file. 25 | ''' 26 | return [line.split(' ')[0] for line in s.split('\n')] 27 | 28 | 29 | def linear_svm_classify (X_train, X_test, Y_train, Y_test): 30 | ''' 31 | Classifier with graph embeddings 32 | :param X_train: training feature vectors 33 | :param X_test: testing feature vectors 34 | :param Y_train: training set labels 35 | :param Y_test: test set labels 36 | :return: None 37 | ''' 38 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]} 39 | classifier = GridSearchCV(LinearSVC(), params, cv=5, scoring='accuracy',verbose=0) 40 | classifier.fit(X_train,Y_train) 41 | Y_pred = classifier.predict(X_test) 42 | acc = accuracy_score(Y_test, Y_pred) 43 | return acc 44 | 45 | 46 | def perform_classification (corpus_dir, extn, embeddings, class_labels_fname): 47 | ''' 48 | Perform classification from 49 | :param corpus_dir: folder containing subgraph2vec sentence files 50 | :param extn: extension of subgraph2vec sentence files 51 | :param embedding_fname: file containing subgraph vectors in word2vec format (refer Mikolov et al (2013) code) 52 | :param class_labels_fname: files containing labels of each graph 53 | :return: None 54 | ''' 55 | 56 | wlk_files = get_files(corpus_dir, extn) 57 | 58 | Y = np.array(get_class_labels(wlk_files, class_labels_fname)) 59 | # logging.info('Y (label) matrix shape: {}'.format(Y.shape)) 60 | 61 | seed = randint(0, 1000) 62 | 63 | # with open(embedding_fname,'r') as fh: 64 | # graph_embedding_dict = json.load(fh) 65 | 66 | wlk_files = [os.path.basename(x) for x in wlk_files] 67 | # graph_embedding_dict = {os.path.basename(x):y for x, y in graph_embedding_dict.iteritems()} 68 | 69 | # X = np.array([graph_embedding_dict[fname] for fname in wlk_files]) 70 | X = embeddings 71 | 72 | from sklearn.model_selection import StratifiedKFold 73 | kf = StratifiedKFold(10, shuffle=True, random_state=None) 74 | accs = [] 75 | for train_index, test_index in kf.split(X, Y): 76 | 77 | X_train, X_test = X[train_index], X[test_index] 78 | Y_train, Y_test = Y[train_index], Y[test_index] 79 | # logging.info('Train and Test matrix shapes: {}, {}, {}, {} '.format(X_train.shape, X_test.shape, Y_train.shape, Y_test.shape)) 80 | 81 | acc = linear_svm_classify(X_train, X_test, Y_train, Y_test) 82 | accs.append(acc) 83 | print(np.mean(accs), np.std(accs)) 84 | return np.mean(accs) 85 | 86 | 87 | if __name__ == '__main__': 88 | pass 89 | -------------------------------------------------------------------------------- /graph2vec_tf/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # run preprocess 4 | #python preprocess.py IMDB-BINARY 5 | #python preprocess.py IMDB-MULTI 6 | #python preprocess.py COLLAB 7 | #python preprocess.py DD 8 | #python preprocess.py REDDIT-BINARY 9 | #python preprocess.py REDDIT-MULTI-5K 10 | 11 | for i in 1 2 3 4 5 12 | do 13 | for DS in 'MUTAG' 'PTC_MR' 'PROTEINS_full' 'IMDB-BINARY' 'IMDB-MULTI' 'REDDIT-BINARY' 'REDDIT-MULTI-5K' 14 | do 15 | python3 preprocess.py $DS 16 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.001 17 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.01 18 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.1 19 | main.py -c ../data/$DS -l ../data/$DS.Labels -d 512 --wlk_h 3 -e 1000 -lr 0.5 20 | done 21 | done 22 | -------------------------------------------------------------------------------- /graph2vec_tf/main.py: -------------------------------------------------------------------------------- 1 | import argparse,os,logging,psutil,time 2 | from joblib import Parallel,delayed 3 | 4 | from utils import get_files 5 | from train_utils import train_skipgram 6 | from classify import perform_classification 7 | from make_graph2vec_corpus import * 8 | from time import time 9 | 10 | logger = logging.getLogger() 11 | logger.setLevel("INFO") 12 | 13 | 14 | def main(args): 15 | ''' 16 | :param args: arguments for 17 | 1. training the skigram model for learning subgraph representations 18 | 2. construct the deep WL kernel using the learnt subgraph representations 19 | 3. performing graph classification using the WL and deep WL kernel 20 | :return: None 21 | ''' 22 | corpus_dir = args.corpus 23 | output_dir = args.output_dir 24 | batch_size = args.batch_size 25 | epochs = args.epochs 26 | embedding_size = args.embedding_size 27 | num_negsample = args.num_negsample 28 | learning_rate = args.learning_rate 29 | wlk_h = args.wlk_h 30 | label_filed_name = args.label_filed_name 31 | class_labels_fname = args.class_labels_file_name 32 | 33 | wl_extn = 'g2v'+str(wlk_h) 34 | assert os.path.exists(corpus_dir), "File {} does not exist".format(corpus_dir) 35 | # assert os.path.exists(output_dir), "Dir {} does not exist".format(output_dir) 36 | 37 | graph_files = get_files(dirname=corpus_dir, extn='.gexf', max_files=0) 38 | logging.info('Loaded {} graph file names form {}'.format(len(graph_files),corpus_dir)) 39 | 40 | 41 | t0 = time() 42 | wlk_relabel_and_dump_memory_version(graph_files, max_h=wlk_h, node_label_attr_name=label_filed_name) 43 | logging.info('dumped sg2vec sentences in {} sec.'.format(time() - t0)) 44 | 45 | t0 = time() 46 | embedding_fname = train_skipgram(corpus_dir, wl_extn, learning_rate, embedding_size, num_negsample, 47 | epochs, batch_size, output_dir, class_labels_fname) 48 | # logging.info('Trained the skipgram model in {} sec.'.format(round(time()-t0, 2))) 49 | 50 | # embedding_fname = '../embeddings/_dims_512_epochs_2_lr_0.5_embeddings.txt' 51 | # perform_classification (corpus_dir, wl_extn, embedding_fname, class_labels_fname) 52 | 53 | 54 | 55 | 56 | def parse_args(): 57 | ''' 58 | Usual pythonic way of parsing command line arguments 59 | :return: all command line arguments read 60 | ''' 61 | args = argparse.ArgumentParser("graph2vec") 62 | args.add_argument("-c","--corpus", 63 | help="Path to directory containing graph files to be used for graph classification or clustering") 64 | 65 | args.add_argument('-l','--class_labels_file_name', 66 | help='File name containg the name of the sample and the class labels') 67 | 68 | args.add_argument('-o', "--output_dir", default = "../embeddings", 69 | help="Path to directory for storing output embeddings") 70 | 71 | args.add_argument('-b',"--batch_size", default=128, type=int, 72 | help="Number of samples per training batch") 73 | 74 | args.add_argument('-e',"--epochs", default=1000, type=int, 75 | help="Number of iterations the whole dataset of graphs is traversed") 76 | 77 | args.add_argument('-d',"--embedding_size", default=1024, type=int, 78 | help="Intended graph embedding size to be learnt") 79 | 80 | args.add_argument('-neg', "--num_negsample", default=10, type=int, 81 | help="Number of negative samples to be used for training") 82 | 83 | args.add_argument('-lr', "--learning_rate", default=0.3, type=float, 84 | help="Learning rate to optimize the loss function") 85 | 86 | args.add_argument("--wlk_h", default=3, type=int, help="Height of WL kernel (i.e., degree of rooted subgraph " 87 | "features to be considered for representation learning)") 88 | 89 | args.add_argument('-lf', '--label_filed_name', default='Label', help='Label field to be used ' 90 | 'for coloring nodes in graphs using WL kenrel') 91 | 92 | return args.parse_args() 93 | 94 | 95 | 96 | if __name__=="__main__": 97 | args = parse_args() 98 | main(args) 99 | 100 | -------------------------------------------------------------------------------- /graph2vec_tf/preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | from glob import glob 4 | from tqdm import tqdm 5 | import os 6 | import subprocess 7 | from sklearn.model_selection import cross_val_score 8 | from sklearn.model_selection import GridSearchCV, StratifiedKFold 9 | from sklearn.svm import SVC, LinearSVC 10 | from sklearn.ensemble import RandomForestClassifier 11 | from sklearn import preprocessing 12 | from sklearn.metrics import accuracy_score 13 | 14 | def load_data(ds_name, use_node_labels): 15 | node2graph = {} 16 | Gs = [] 17 | 18 | with open("../data/%s/%s_graph_indicator.txt"%(ds_name,ds_name), "r") as f: 19 | c = 1 20 | for line in f: 21 | node2graph[c] = int(line[:-1]) 22 | if not node2graph[c] == len(Gs): 23 | Gs.append(nx.Graph()) 24 | Gs[-1].add_node(c) 25 | c += 1 26 | 27 | with open("../data/%s/%s_A.txt"%(ds_name,ds_name), "r") as f: 28 | for line in f: 29 | edge = line[:-1].split(",") 30 | edge[1] = edge[1].replace(" ", "") 31 | Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1])) 32 | 33 | if use_node_labels: 34 | with open("../data/%s/%s_node_labels.txt"%(ds_name,ds_name), "r") as f: 35 | c = 1 36 | for line in f: 37 | node_label = int(line[:-1]) 38 | Gs[node2graph[c]-1].node[c]['label'] = node_label 39 | c += 1 40 | 41 | labels = [] 42 | with open("../data/%s/%s_graph_labels.txt"%(ds_name,ds_name), "r") as f: 43 | for line in f: 44 | labels.append(int(line[:-1])) 45 | 46 | labels = np.array(labels, dtype = np.float) 47 | return Gs, labels 48 | 49 | def preprocess(DS): 50 | Gs, labels = load_data(DS, False) 51 | print('number of graphs', len(Gs)) 52 | 53 | datadir = '../data/{}'.format(DS) 54 | try: 55 | os.mkdir(datadir) 56 | except Exception as e: 57 | print(e) 58 | 59 | assert len(Gs) == len(labels) 60 | f = open('../data/{}.Labels'.format(DS), 'w') 61 | for graphidx, G in tqdm(enumerate(Gs)): 62 | nx.write_gexf(G, '{}/{}.gexf'.format(datadir, graphidx)) 63 | f.write('{}.gexf {}\n'.format(graphidx, int(labels[graphidx]))) 64 | f.close() 65 | 66 | 67 | 68 | if __name__ == '__main__': 69 | import sys 70 | preprocess(sys.argv[1]) 71 | # preprocess('ENZYMES') 72 | # preprocess('DD') 73 | # preprocess('REDDIT-BINARY') 74 | # preprocess('COLLAB') 75 | # preprocess('REDDIT-MULTI-5K') 76 | # preprocess('IMDB-BINARY') 77 | # preprocess('IMDB-MULTI') 78 | -------------------------------------------------------------------------------- /graph2vec_tf/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | df = pd.read_csv('log') 3 | DSs = df.DS.unique() 4 | for DS in DSs: 5 | tmpdf = df[df.DS == DS] 6 | for i in [500, 900, 1000]: 7 | print(DS, i, tmpdf[str(i)].mean(), tmpdf[str(i)].std()) 8 | 9 | -------------------------------------------------------------------------------- /graph2vec_tf/train_utils.py: -------------------------------------------------------------------------------- 1 | import os,logging 2 | import numpy as np 3 | from corpus_parser import Corpus 4 | from utils import save_graph_embeddings 5 | from skipgram import skipgram 6 | 7 | 8 | def train_skipgram (corpus_dir, extn, learning_rate, embedding_size, num_negsample, epochs, batch_size, output_dir, class_labels_fname): 9 | ''' 10 | 11 | :param corpus_dir: folder containing WL kernel relabeled files. All the files in this folder will be relabled 12 | according to WL relabeling strategy and the format of each line in these folders shall be: .... 13 | :param extn: Extension of the WL relabled file 14 | :param learning_rate: learning rate for the skipgram model (will involve a linear decay) 15 | :param embedding_size: number of dimensions to be used for learning subgraph representations 16 | :param num_negsample: number of negative samples to be used by the skipgram model 17 | :param epochs: number of iterations the dataset is traversed by the skipgram model 18 | :param batch_size: size of each batch for the skipgram model 19 | :param output_dir: the folder where embedding file will be stored 20 | :return: name of the file that contains the subgraph embeddings (in word2vec format proposed by Mikolov et al (2013)) 21 | ''' 22 | 23 | op_fname = '_'.join([os.path.basename(corpus_dir), 'dims', str(embedding_size), 'epochs', 24 | str(epochs),'lr',str(learning_rate),'embeddings.txt']) 25 | op_fname = os.path.join(output_dir, op_fname) 26 | # if os.path.isfile(op_fname): 27 | # logging.info('The embedding file: {} is already present, hence NOT training skipgram model ' 28 | # 'for subgraph vectors'.format(op_fname)) 29 | # return op_fname 30 | 31 | logging.info("Initializing SKIPGRAM...") 32 | corpus = Corpus(corpus_dir, extn = extn, max_files=0) # just load 'max_files' files from this folder 33 | corpus.scan_and_load_corpus() 34 | 35 | model_skipgram = skipgram( 36 | corpus_dir=corpus_dir, 37 | extn=extn, 38 | class_labels_fname=class_labels_fname, 39 | num_graphs=corpus.num_graphs, 40 | num_subgraphs=corpus.num_subgraphs, 41 | learning_rate=learning_rate, 42 | embedding_size=embedding_size, 43 | num_negsample=num_negsample, 44 | num_steps=epochs, # no. of time the training set will be iterated through 45 | corpus=corpus, # data set of (target,context) tuples 46 | ) 47 | 48 | # final_embeddings = model_skipgram.train(corpus=corpus,batch_size=batch_size) 49 | model_skipgram.train(corpus=corpus,batch_size=batch_size) 50 | 51 | # logging.info('Write the matrix to a word2vec format file') 52 | # save_graph_embeddings(corpus, final_embeddings, op_fname) 53 | # logging.info('Completed writing the final embeddings, pls check file: {} for the same'.format(op_fname)) 54 | # return op_fname 55 | 56 | # perform_classification(corpus_dir, extn, embeddings, class_labels_fname) 57 | 58 | 59 | 60 | if __name__ == '__main__': 61 | pass 62 | -------------------------------------------------------------------------------- /graph2vec_tf/utils.py: -------------------------------------------------------------------------------- 1 | import os,json 2 | 3 | 4 | def get_files(dirname, extn, max_files=0): 5 | all_files = [os.path.join(dirname, f) for f in os.listdir(dirname) if f.endswith(extn)] 6 | for root, dirs, files in os.walk(dirname): 7 | for f in files: 8 | if f.endswith(extn): 9 | all_files.append(os.path.join(root, f)) 10 | 11 | all_files = list(set(all_files)) 12 | all_files.sort() 13 | if max_files: 14 | return all_files[:max_files] 15 | else: 16 | return all_files 17 | 18 | 19 | def save_graph_embeddings(corpus, final_embeddings, opfname): 20 | dict_to_save = {} 21 | for i in range(len(final_embeddings)): 22 | graph_fname = corpus._id_to_graph_name_map[i] 23 | graph_embedding = final_embeddings[i,:].tolist() 24 | dict_to_save[graph_fname] = graph_embedding 25 | 26 | with open(opfname, 'w') as fh: 27 | json.dump(dict_to_save,fh,indent=4) 28 | 29 | 30 | def get_class_labels(graph_files, class_labels_fname): 31 | graph_to_class_label_map = {l.split()[0].split('.')[0]: int(l.split()[1].strip()) for l in open (class_labels_fname)} 32 | labels = [graph_to_class_label_map[os.path.basename(g).split('.')[0]] for g in graph_files] 33 | 34 | return labels 35 | 36 | if __name__ == '__main__': 37 | print('nothing to do') 38 | -------------------------------------------------------------------------------- /kcnn/README.md: -------------------------------------------------------------------------------- 1 | ## Kernel Graph Convolutional Neural Networks 2 | Code for the paper [Kernel Graph Convolutional Neural Networks](https://arxiv.org/pdf/1710.10689.pdf). 3 | 4 | ### Requirements 5 | Code is written in Python 3.6 and requires: 6 | * PyTorch 0.3 7 | * NetworkX 1.11 8 | * igraph 0.7 9 | * scikit-learn 0.18 10 | 11 | ### Datasets 12 | Use the following link to download datasets: 13 | ``` 14 | https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets 15 | ``` 16 | Extract the datasets into the `datasets` folder. 17 | 18 | ### Run the model 19 | First, specify the dataset and the hyperparameters in the `main.py` file. Then, use the following command: 20 | 21 | ``` 22 | $ python main.py 23 | ``` 24 | 25 | ### Cite 26 | Please cite our paper if you use this code: 27 | ``` 28 | @article{nikolentzos2017kernel, 29 | title={Kernel Graph Convolutional Neural Networks}, 30 | author={Nikolentzos, Giannis and Meladianos, Polykarpos and Tixier, Antoine Jean-Pierre and Skianis, Konstantinos and Vazirgiannis, Michalis}, 31 | journal={arXiv preprint arXiv:1710.10689}, 32 | year={2017} 33 | } 34 | ``` 35 | 36 | ----------- 37 | 38 | Provided for academic use only 39 | -------------------------------------------------------------------------------- /kcnn/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | # TEST is the virtualenv 4 | ./TEST/bin/python3 main.py --d 512 --dataset $@ 5 | -------------------------------------------------------------------------------- /kcnn/graph_kernels_labeled.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | from collections import defaultdict 4 | import copy 5 | 6 | 7 | def sp_kernel(g1, g2=None): 8 | if g2 != None: 9 | graphs = [] 10 | for g in g1: 11 | graphs.append(g) 12 | for g in g2: 13 | graphs.append(g) 14 | else: 15 | graphs = g1 16 | 17 | N = len(graphs) 18 | all_paths = {} 19 | sp_counts = {} 20 | for i in range(N): 21 | sp_lengths = nx.shortest_path_length(graphs[i]) 22 | sp_counts[i] = {} 23 | nodes = graphs[i].nodes() 24 | for v1 in nodes: 25 | for v2 in nodes: 26 | if v2 in sp_lengths[v1]: 27 | label = tuple(sorted([graphs[i].node[v1]['label'], graphs[i].node[v2]['label']]) + [sp_lengths[v1][v2]]) 28 | if label in sp_counts[i]: 29 | sp_counts[i][label] += 1 30 | else: 31 | sp_counts[i][label] = 1 32 | 33 | if label not in all_paths: 34 | all_paths[label] = len(all_paths) 35 | 36 | phi = np.zeros((N,len(all_paths))) 37 | 38 | for i in range(N): 39 | for label in sp_counts[i]: 40 | phi[i,all_paths[label]] = sp_counts[i][label] 41 | 42 | if g2 != None: 43 | K = np.dot(phi[:len(g1),:],phi[len(g1):,:].T) 44 | else: 45 | K = np.dot(phi,phi.T) 46 | 47 | return K 48 | 49 | 50 | def wl_kernel(g1, g2=None, h=6): 51 | if g2 != None: 52 | graphs = [] 53 | for g in g1: 54 | graphs.append(g) 55 | for g in g2: 56 | graphs.append(g) 57 | else: 58 | graphs = g1 59 | 60 | labels = {} 61 | label_lookup = {} 62 | label_counter = 0 63 | 64 | N = len(graphs) 65 | 66 | orig_graph_map = {it: {i: defaultdict(lambda: 0) for i in range(N)} for it in range(-1, h)} 67 | 68 | # initial labeling 69 | ind = 0 70 | for G in graphs: 71 | labels[ind] = np.zeros(G.number_of_nodes(), dtype = np.int32) 72 | node2index = {} 73 | for node in G.nodes(): 74 | node2index[node] = len(node2index) 75 | 76 | for node in G.nodes(): 77 | label = G.node[node]['label'] 78 | if not (label in label_lookup): 79 | label_lookup[label] = len(label_lookup) 80 | 81 | labels[ind][node2index[node]] = label_lookup[label] 82 | orig_graph_map[-1][ind][label] = orig_graph_map[-1][ind].get(label, 0) + 1 83 | 84 | ind += 1 85 | 86 | compressed_labels = copy.deepcopy(labels) 87 | 88 | # WL iterations 89 | for it in range(h): 90 | unique_labels_per_h = set() 91 | label_lookup = {} 92 | ind = 0 93 | for G in graphs: 94 | node2index = {} 95 | for node in G.nodes(): 96 | node2index[node] = len(node2index) 97 | 98 | for node in G.nodes(): 99 | node_label = tuple([labels[ind][node2index[node]]]) 100 | neighbors = G.neighbors(node) 101 | if len(neighbors) > 0: 102 | neighbors_label = tuple([labels[ind][node2index[neigh]] for neigh in neighbors]) 103 | node_label = str(node_label) + "-" + str(sorted(neighbors_label)) 104 | if not (node_label in label_lookup): 105 | label_lookup[node_label] = len(label_lookup) 106 | 107 | compressed_labels[ind][node2index[node]] = label_lookup[node_label] 108 | orig_graph_map[it][ind][node_label] = orig_graph_map[it][ind].get(node_label, 0) + 1 109 | 110 | ind +=1 111 | 112 | labels = copy.deepcopy(compressed_labels) 113 | 114 | if g2 != None: 115 | K = np.zeros((len(g1), len(g2))) 116 | for it in range(-1, h): 117 | for i in range(len(g1)): 118 | for j in range(len(g2)): 119 | common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][len(g1)+j].keys()) 120 | K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][len(g1)+j].get(k,0) for k in common_keys]) 121 | else: 122 | K = np.zeros((N, N)) 123 | for it in range(-1, h): 124 | for i in range(N): 125 | for j in range(N): 126 | common_keys = set(orig_graph_map[it][i].keys()) & set(orig_graph_map[it][j].keys()) 127 | K[i][j] += sum([orig_graph_map[it][i].get(k,0)*orig_graph_map[it][j].get(k,0) for k in common_keys]) 128 | 129 | return K -------------------------------------------------------------------------------- /kcnn/model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from torch.nn.init import xavier_normal,xavier_uniform 4 | 5 | # CNN Model 6 | class CNN(nn.Module): 7 | def __init__(self, input_size, hidden_size, n_classes, d, n_kernels, max_n_communities): 8 | super(CNN, self).__init__() 9 | self.max_n_communities = max_n_communities 10 | self.conv = nn.Conv3d(1, input_size, (1, 1, d), padding=0) 11 | self.fc1 = nn.Linear(input_size*n_kernels, hidden_size) 12 | self.fc2 = nn.Linear(hidden_size, n_classes) 13 | self.init_weights() 14 | 15 | def init_weights(self): 16 | xavier_uniform(self.conv.weight.data) 17 | xavier_normal(self.fc1.weight.data) 18 | xavier_normal(self.fc2.weight.data) 19 | 20 | def forward(self, x_in): 21 | out = F.relu(F.max_pool3d(self.conv(x_in), (1, self.max_n_communities,1))) 22 | out = out.view(out.size(0), -1) 23 | out = F.relu(self.fc1(out)) 24 | out = F.dropout(out, training=self.training) 25 | out = self.fc2(out) 26 | return F.log_softmax(out, dim=1) 27 | -------------------------------------------------------------------------------- /kcnn/nystrom.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | from scipy.linalg import svd 4 | from sklearn.utils import check_random_state 5 | 6 | class Nystrom(): 7 | 8 | def __init__(self, kernel, kernel_params=None, n_components=100, random_state=None): 9 | self.kernel = kernel 10 | self.kernel_params = kernel_params 11 | self.n_components = n_components 12 | self.random_state = random_state 13 | 14 | def fit(self, graphs, y=None): 15 | rnd = check_random_state(self.random_state) 16 | n_samples = len(graphs) 17 | 18 | # get basis vectors 19 | if self.n_components > n_samples: 20 | n_components = n_samples 21 | else: 22 | n_components = self.n_components 23 | n_components = min(n_samples, n_components) 24 | inds = rnd.permutation(n_samples) 25 | basis_inds = inds[:n_components] 26 | basis = [] 27 | for ind in basis_inds: 28 | basis.append(graphs[ind]) 29 | 30 | basis_kernel = self.kernel(basis, basis, **self._get_kernel_params()) 31 | 32 | # sqrt of kernel matrix on basis vectors 33 | U, S, V = svd(basis_kernel) 34 | S = np.maximum(S, 1e-12) 35 | self.normalization_ = np.dot(U * 1. / np.sqrt(S), V) 36 | self.components_ = basis 37 | self.component_indices_ = inds 38 | return self 39 | 40 | def transform(self, graphs): 41 | embedded = self.kernel(graphs, self.components_, **self._get_kernel_params()) 42 | return np.dot(embedded, self.normalization_.T) 43 | 44 | def _get_kernel_params(self): 45 | params = self.kernel_params 46 | if params is None: 47 | params = {} 48 | 49 | return params 50 | 51 | -------------------------------------------------------------------------------- /kernel_methods/README.md: -------------------------------------------------------------------------------- 1 | # Graph Kernels 2 | 3 | please install Grakel 4 | -------------------------------------------------------------------------------- /kernel_methods/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | python3 main.py REDDIT-BINARY walk 4 | python3 main.py REDDIT-BINARY shortest 5 | python3 main.py REDDIT-MULTI-5K wl 6 | python3 main.py REDDIT-MULTI-5K shortest 7 | python3 main.py REDDIT-MULTI-5K walk 8 | python3 main.py IMDB-BINARY wl 9 | python3 main.py IMDB-MULTI wl 10 | python3 main.py IMDB-BINARY shortest 11 | python3 main.py IMDB-MULTI shortest 12 | python3 main.py IMDB-BINARY walk 13 | python3 main.py IMDB-MULTI walk 14 | python3 main.py REDDIT-MULTI-5K shortest 15 | 16 | 17 | -------------------------------------------------------------------------------- /kernel_methods/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import networkx as nx 4 | import pickle 5 | import json 6 | from glob import glob 7 | import graph_tool 8 | from graph_tool import load_graph 9 | from tqdm import tqdm 10 | import operator 11 | 12 | 13 | def read_graphfile(datadir, dataname, max_nodes=None): 14 | ''' Read data from https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets 15 | graph index starts with 1 in file 16 | 17 | Returns: 18 | List of networkx objects with graph and node labels 19 | ''' 20 | prefix = os.path.join(datadir, dataname, dataname) 21 | filename_graph_indic = prefix + '_graph_indicator.txt' 22 | # index of graphs that a given node belongs to 23 | graph_indic={} 24 | with open(filename_graph_indic) as f: 25 | i=1 26 | for line in f: 27 | line=line.strip("\n") 28 | graph_indic[i]=int(line) 29 | i+=1 30 | 31 | filename_nodes=prefix + '_node_labels.txt' 32 | node_labels=[] 33 | try: 34 | with open(filename_nodes) as f: 35 | for line in f: 36 | line=line.strip("\n") 37 | node_labels+=[int(line)] 38 | # node_labels = LabelEncoder().fit_transform(node_labels) 39 | except IOError: 40 | print('No node labels') 41 | 42 | filename_node_attrs=prefix + '_node_attributes.txt' 43 | node_attrs=[] 44 | try: 45 | with open(filename_node_attrs) as f: 46 | for line in f: 47 | line = line.strip("\s\n") 48 | attrs = [float(attr) for attr in re.split("[,\s]+", line) if not attr == ''] 49 | node_attrs.append(np.array(attrs)) 50 | except IOError: 51 | print('No node attributes') 52 | 53 | label_has_zero = False 54 | filename_graphs=prefix + '_graph_labels.txt' 55 | graph_labels=[] 56 | with open(filename_graphs) as f: 57 | for line in f: 58 | line=line.strip("\n") 59 | val = int(line) 60 | if val == 0: 61 | label_has_zero = True 62 | graph_labels.append(val - 1) 63 | graph_labels = np.array(graph_labels) 64 | if label_has_zero: 65 | graph_labels += 1 66 | 67 | filename_adj=prefix + '_A.txt' 68 | adj_list={i:[] for i in range(1,len(graph_labels)+1)} 69 | # index_graph={i:[] for i in range(1,len(graph_labels)+1)} 70 | num_edges = 0 71 | with open(filename_adj) as f: 72 | for line in f: 73 | line=line.strip("\n").split(",") 74 | e0,e1=(int(line[0].strip(" ")),int(line[1].strip(" "))) 75 | adj_list[graph_indic[e0]].append((e0,e1)) 76 | # index_graph[graph_indic[e0]]+=[e0,e1] 77 | num_edges += 1 78 | # for k in index_graph.keys(): 79 | # index_graph[k]=[u-1 for u in set(index_graph[k])] 80 | 81 | 82 | graphs=[] 83 | for i in range(1,1+len(adj_list)): 84 | # indexed from 1 here 85 | G=nx.from_edgelist(adj_list[i]) 86 | graphs.append(G) 87 | 88 | # add features and labels 89 | for nodeid, nl in enumerate(node_labels): 90 | nodeid += 1 91 | graphs[graph_indic[nodeid]-1].add_node(nodeid) 92 | # graphs[graph_indic[nodeid]-1][nodeid]['label'] = nl 93 | 94 | for idx, G in enumerate(graphs): 95 | # no graph labels needed 96 | G.graph['label'] = graph_labels[idx] 97 | for u in G.nodes(): 98 | if len(node_labels) > 0: 99 | G.node[u]['label'] = node_labels[u-1] 100 | if len(node_attrs) > 0: 101 | G.node[u]['feat'] = node_attrs[u-1] 102 | 103 | graphs[idx] = G 104 | 105 | # relabeling 106 | for idx, G in enumerate(graphs): 107 | mapping={} 108 | it=0 109 | if float(nx.__version__)<2.0: 110 | for n in G.nodes(): 111 | mapping[n]=it 112 | it+=1 113 | else: 114 | for n in G.nodes: 115 | mapping[n]=it 116 | it+=1 117 | 118 | # indexed from 0 119 | G = nx.relabel_nodes(G, mapping) 120 | 121 | graphs[idx] = G 122 | 123 | return graphs, graph_labels 124 | -------------------------------------------------------------------------------- /sub2vec/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | for i in 1 2 3 4 5 3 | do 4 | for DS in 'IMDB-BINARY' 'IMDB-MULTI' 5 | do 6 | python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property n 7 | python3 src/main.py --input ../data/$DS --preprocessed-input preprocessed_dataset/$DS --d 512 --property s 8 | done 9 | done 10 | -------------------------------------------------------------------------------- /sub2vec/preprocess.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import networkx as nx 3 | from glob import glob 4 | import os 5 | import subprocess 6 | from sklearn.model_selection import cross_val_score 7 | from sklearn.model_selection import GridSearchCV, StratifiedKFold 8 | from sklearn.svm import SVC, LinearSVC 9 | from sklearn.linear_model import LogisticRegression 10 | from sklearn.ensemble import RandomForestClassifier 11 | from sklearn import preprocessing 12 | from sklearn.metrics import accuracy_score 13 | 14 | def load_data(dir_name, use_node_labels): 15 | node2graph = {} 16 | Gs = [] 17 | 18 | ds_name = os.path.basename(dir_name) 19 | with open("%s/%s_graph_indicator.txt"%(dir_name,ds_name), "r") as f: 20 | c = 1 21 | for line in f: 22 | node2graph[c] = int(line[:-1]) 23 | if not node2graph[c] == len(Gs): 24 | Gs.append(nx.Graph()) 25 | Gs[-1].add_node(c) 26 | c += 1 27 | 28 | with open("%s/%s_A.txt"%(dir_name,ds_name), "r") as f: 29 | for line in f: 30 | edge = line[:-1].split(",") 31 | edge[1] = edge[1].replace(" ", "") 32 | Gs[node2graph[int(edge[0])]-1].add_edge(int(edge[0]), int(edge[1])) 33 | 34 | if use_node_labels: 35 | with open("%s/%s_node_labels.txt"%(dir_name,ds_name), "r") as f: 36 | c = 1 37 | for line in f: 38 | node_label = int(line[:-1]) 39 | Gs[node2graph[c]-1].node[c]['label'] = node_label 40 | c += 1 41 | 42 | # for idx, g in enumerate(Gs): 43 | # for n in g.nodes(): 44 | # _ = (g.node[n]['label']) 45 | 46 | labels = [] 47 | with open("%s/%s_graph_labels.txt"%(dir_name,ds_name), "r") as f: 48 | for line in f: 49 | labels.append(int(line[:-1])) 50 | 51 | labels = np.array(labels, dtype = np.float) 52 | return Gs, labels 53 | 54 | def evaluate(DS, embeddings): 55 | graphs, labels = load_data(DS, False) 56 | x, y = np.array(embeddings), np.array(labels) 57 | 58 | kf = StratifiedKFold(n_splits=10, shuffle=True, random_state=None) 59 | accs=[] 60 | accuracies = [] 61 | for train_index, test_index in kf.split(x, y): 62 | best_acc1 = 0 63 | 64 | x_train, x_test = x[train_index], x[test_index] 65 | y_train, y_test = y[train_index], y[test_index] 66 | params = {'C':[0.001, 0.01,0.1,1,10,100,1000]} 67 | classifier = GridSearchCV(SVC(), params, cv=10, scoring='accuracy', verbose=0) 68 | classifier.fit(x_train, y_train) 69 | accuracies.append(accuracy_score(y_test, classifier.predict(x_test))) 70 | 71 | print('SVC', np.mean(accuracies)) 72 | return np.mean(accuracies) 73 | 74 | def preprocess(DS, preprocessed_input): 75 | Gs, labels = load_data(DS, False) 76 | print('number of graphs', len(Gs)) 77 | try: 78 | os.makedirs(preprocessed_input) 79 | except Exception as e: 80 | print(e) 81 | 82 | for i in range(len(Gs)): 83 | with open('{}/{}'.format(preprocessed_input, i), 'w+') as f: 84 | for e in Gs[i].edges(): 85 | f.write('{} {}\n'.format(e[0], e[1])) 86 | print('done preprocessing') 87 | 88 | if __name__ == '__main__': 89 | ds_name='MUTAG' 90 | print('classification') 91 | classification(ds_name, ds_name+'.vec') 92 | classification('ENZYMES', 'output') 93 | -------------------------------------------------------------------------------- /sub2vec/src/graphUtils_n.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import random 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def getGraph(filename): 7 | G=nx.Graph() 8 | 9 | f=open(filename,'r') 10 | lines=f.readlines() 11 | for line in lines: 12 | if(line[0]=='#'): 13 | continue 14 | else: 15 | temp=line.split() 16 | index1=int(temp[0]) 17 | index2=int(temp[1]) 18 | G.add_edge(index1,index2) 19 | f.close() 20 | return G 21 | 22 | 23 | def randomWalk(G, walkSize): 24 | walkList= [] 25 | curNode = random.choice(G.nodes()) 26 | 27 | while(len(walkList) < walkSize): 28 | walkList.append(curNode) 29 | curNode = random.choice(G.neighbors(curNode)) 30 | return walkList 31 | 32 | def getStats(G): 33 | stats ={} 34 | stats['num_nodes'] = nx.number_of_nodes(G) 35 | stats['num_edges'] = nx.number_of_edges(G) 36 | stats['is_Connected'] = nx.is_connected(G) 37 | 38 | 39 | def drawGraph(G): 40 | pos = nx.spring_layout(G) 41 | nx.draw_networkx(G, pos) 42 | plt.savefig("graph.pdf") 43 | plt.show() 44 | -------------------------------------------------------------------------------- /sub2vec/src/graphUtils_s.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import random 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def writeGraph(filename, G): 7 | 8 | file = open(filename, 'w') 9 | for edge in G.edges(): 10 | node1 = str(G.node[edge[0]]['label']) 11 | node2 = str(G.node[edge[1]]['label']) 12 | file.write(node1+'\t'+node2+'\n') 13 | file.close() 14 | 15 | 16 | def getGraph(filename): 17 | G=nx.Graph() 18 | mode = 0 19 | f=open(filename,'r') 20 | lines=f.readlines() 21 | labels = {} 22 | for line in lines: 23 | temp=line.split() 24 | index1=int(temp[0]) 25 | index2=int(temp[1]) 26 | G.add_edge(index1,index2) 27 | f.close() 28 | nx.set_node_attributes(G, 'label', labels) 29 | return G 30 | 31 | 32 | def randomWalk(G, walkSize): 33 | walkList= [] 34 | curNode = random.choice(G.nodes()) 35 | 36 | while(len(walkList) < walkSize): 37 | walkList.append(G.node[curNode]['label']) 38 | curNode = random.choice(G.neighbors(curNode)) 39 | return walkList 40 | 41 | def getStats(G): 42 | stats ={} 43 | stats['num_nodes'] = nx.number_of_nodes(G) 44 | stats['num_edges'] = nx.number_of_edges(G) 45 | stats['is_Connected'] = nx.is_connected(G) 46 | 47 | 48 | def drawGraph(G): 49 | plt.figure() 50 | pos = nx.spring_layout(G) 51 | nx.draw_networkx(G, pos) 52 | plt.savefig("graph.pdf") 53 | plt.show() 54 | -------------------------------------------------------------------------------- /sub2vec/src/main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | 4 | from structural import structural_embedding 5 | from neighborhood import neighborhood_embedding 6 | 7 | def main(): 8 | parser = argparse.ArgumentParser(description="sub2vec.") 9 | parser.add_argument('--input', nargs='?', required=True, help='Input directory') 10 | parser.add_argument('--preprocessed-input', required=True) 11 | 12 | parser.add_argument('--property', default='n', choices=['n', 's'], required=True, help='Type of subgraph property to presernve. For neighborhood property add " --property n" and for the structural property " --property s" ') 13 | 14 | parser.add_argument('--walkLength', default=100000, type=int, help='length of random walk on each subgraph') 15 | 16 | # parser.add_argument('--output', required=True, help='Output representation file') 17 | 18 | parser.add_argument('--d', default=300, type=int, help='dimension of learned feautures for each subgraph.') 19 | 20 | parser.add_argument('--iter', default=20, type=int, help= 'training iterations') 21 | 22 | parser.add_argument('--windowSize', default=2, type=int, 23 | help='Window size of the model.') 24 | 25 | parser.add_argument('--p', default=0.5, type=float, 26 | help='meta parameter.') 27 | 28 | parser.add_argument('--model', default='dm', choices=['dbon', 'dm'], 29 | help='models for learninig vectors SV-DM (dm) or SV-DBON (dbon).') 30 | 31 | args = parser.parse_args() 32 | from preprocess import preprocess 33 | print('start preprocessing ..') 34 | preprocess(args.input, args.preprocessed_input) 35 | 36 | if args.property == 's': 37 | structural_embedding(args) 38 | else: 39 | neighborhood_embedding(args) 40 | 41 | 42 | 43 | if __name__=='__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /sub2vec/src/neighborhood.py: -------------------------------------------------------------------------------- 1 | import gensim.models.doc2vec as doc 2 | import os 3 | import graphUtils_n 4 | from tqdm import tqdm 5 | 6 | 7 | def arr2str(arr): 8 | result = "" 9 | for i in arr: 10 | result += " "+str(i) 11 | return result 12 | 13 | 14 | def generateWalkFile(dirName, walkLength): 15 | walkFile = open(dirName+'.walk', 'w') 16 | indexToName = {} 17 | 18 | for root, dirs, files in os.walk(dirName): 19 | index = 0 20 | for name in tqdm(files): 21 | # print(name) 22 | subgraph = graphUtils_n.getGraph(os.path.join(root, name)) 23 | walk = graphUtils_n.randomWalk(subgraph, walkLength) 24 | walkFile.write(arr2str(walk) +"\n") 25 | indexToName[index] = name 26 | index += 1 27 | walkFile.close() 28 | 29 | return indexToName 30 | 31 | def saveVectors(vectors, outputfile, IdToName): 32 | print(len(vectors), outputfile, IdToName) 33 | output = open(outputfile, 'w') 34 | 35 | output.write(str(len(vectors)) +"\n") 36 | for i in range(len(vectors)): 37 | output.write(str(IdToName[i])) 38 | for j in vectors[i]: 39 | output.write('\t'+ str(j)) 40 | output.write('\n') 41 | output.close() 42 | 43 | def neighborhood_embedding(args): 44 | inputDir = args.preprocessed_input 45 | # outputFile = args.output 46 | iterations = args.iter 47 | dimensions = args.d 48 | window = args.windowSize 49 | dm = 1 if args.model == 'dm' else 0 50 | indexToName = generateWalkFile(inputDir, args.walkLength) 51 | # print(indexToName) 52 | sentences = doc.TaggedLineDocument(inputDir+'.walk') 53 | 54 | with open('log', 'a+') as f: 55 | results = [] 56 | # for epochs in range(10, 110, 10): 57 | # print('epochs', epochs) 58 | model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window ) 59 | vectors = model.docvecs 60 | embeddings = [[] for _ in range(len(vectors))] 61 | for i in range(len(vectors)): 62 | embeddings[int(indexToName[i])] = vectors[i] 63 | 64 | from preprocess import evaluate 65 | res = evaluate(args.input, embeddings) 66 | results.append(str(res)) 67 | print(res) 68 | 69 | f.write(inputDir + ',n,' + ','.join(results) + '\n') 70 | -------------------------------------------------------------------------------- /sub2vec/src/structural.py: -------------------------------------------------------------------------------- 1 | import gensim.models.doc2vec as doc 2 | import os 3 | import graphUtils_s 4 | import random 5 | import networkx as nx 6 | from tqdm import tqdm 7 | 8 | 9 | def arr2str(arr): 10 | result = "" 11 | for i in arr: 12 | result += " "+str(i) 13 | return result 14 | 15 | 16 | def generateDegreeWalk(Graph, walkSize): 17 | g = Graph 18 | walk = randomWalkDegreeLabels(g,walkSize) 19 | #walk = serializeEdge(g,NodeToLables) 20 | return walk 21 | 22 | def randomWalkDegreeLabels(G, walkSize): 23 | curNode = random.choice(G.nodes()) 24 | walkList= [] 25 | 26 | while(len(walkList) < walkSize): 27 | walkList.append(G.node[curNode]['label']) 28 | curNode = random.choice(G.neighbors(curNode)) 29 | return walkList 30 | 31 | def getDegreeLabelledGraph(G, rangetoLabels): 32 | degreeDict = G.degree(G.nodes()) 33 | labelDict = {} 34 | for node in degreeDict.keys(): 35 | val = degreeDict[node]/float(nx.number_of_nodes(G)) 36 | labelDict[node] = inRange(rangetoLabels, val) 37 | #val = degreeDict[node]/float(nx.number_of_nodes(G)) 38 | #labelDict[node] = degreeDict[node] 39 | 40 | nx.set_node_attributes(G, 'label', labelDict) 41 | 42 | return G 43 | 44 | def inRange(rangeDict, val): 45 | for key in rangeDict: 46 | if key[0] < val and key[1] >= val: 47 | return rangeDict[key] 48 | 49 | def generateWalkFile(dirName, walkLength, alpha): 50 | walkFile = open(dirName+'.walk', 'w') 51 | indexToName = {} 52 | rangetoLabels = {(0, 0.05):'z',(0.05, 0.1):'a', (0.1, 0.15):'b', (0.15, 0.2):'c', (0.2, 0.25):'d', (0.25, 0.5):'e', (0.5, 0.75):'f',(0.75, 1.0):'g'} 53 | for root, dirs, files in os.walk(dirName): 54 | index = 0 55 | for name in tqdm(files): 56 | subgraph = graphUtils_s.getGraph(os.path.join(root, name)) 57 | degreeGraph = getDegreeLabelledGraph(subgraph, rangetoLabels) 58 | degreeWalk = generateDegreeWalk(degreeGraph, int(walkLength* (1- alpha))) 59 | walk = graphUtils_s.randomWalk(subgraph, int(alpha * walkLength)) 60 | walkFile.write(arr2str(walk)+ arr2str(degreeWalk) +"\n") 61 | indexToName[index] = name 62 | index += 1 63 | walkFile.close() 64 | 65 | return indexToName 66 | 67 | def saveVectors(vectors, outputfile, IdToName): 68 | output = open(outputfile, 'w') 69 | 70 | output.write(str(len(vectors)) +"\n") 71 | for i in range(len(vectors)): 72 | output.write(str(IdToName[i])) 73 | for j in vectors[i]: 74 | output.write('\t'+ str(j)) 75 | output.write('\n') 76 | output.close() 77 | 78 | 79 | def structural_embedding(args): 80 | 81 | inputDir = args.preprocessed_input 82 | # outputFile = args.output 83 | iterations = args.iter 84 | dimensions = args.d 85 | window = args.windowSize 86 | dm = 1 if args.model == 'dm' else 0 87 | indexToName = generateWalkFile(inputDir, args.walkLength, args.p) 88 | sentences = doc.TaggedLineDocument(inputDir+'.walk') 89 | 90 | with open('log', 'a+') as f: 91 | results = [] 92 | # for epochs in range(10, 110, 10): 93 | # print('epochs', epochs) 94 | model = doc.Doc2Vec(sentences, vector_size = dimensions, dm = dm, window = window, workers=8) 95 | vectors = model.docvecs 96 | embeddings = [[] for _ in range(len(vectors))] 97 | for i in range(len(vectors)): 98 | embeddings[int(indexToName[i])] = vectors[i] 99 | 100 | from preprocess import evaluate 101 | res = evaluate(args.input, embeddings) 102 | print(res) 103 | results.append(str(res)) 104 | 105 | f.write(inputDir + ',s,' + ','.join(results) + '\n') 106 | -------------------------------------------------------------------------------- /sub2vec/test.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import sys 3 | 4 | if __name__ == '__main__': 5 | df = pd.read_csv(sys.argv[1]) 6 | DSs = df.DS.unique() 7 | for DS in DSs: 8 | tmpdf = df[df.DS == DS] 9 | for tpe in ['n', 's']: 10 | m, s = tmpdf[(tmpdf.type == tpe)]['result'].mean(), tmpdf[(tmpdf.type == tpe)]['result'].std() 11 | print(DS, tpe, m, s) 12 | --------------------------------------------------------------------------------