├── HeterophilousDatasets
    └── data
    │   ├── actor.npz
    │   ├── amazon_ratings.npz
    │   ├── chameleon.npz
    │   ├── chameleon_filtered.npz
    │   ├── cornell.npz
    │   ├── dataread
    │   ├── minesweeper.npz
    │   ├── questions.npz
    │   ├── roman_empire.npz
    │   ├── squirrel.npz
    │   ├── squirrel_filtered.npz
    │   ├── texas.npz
    │   ├── wiki_cooc.npz
    │   ├── wisconsin.npz
    │   └── workers.npz
├── LICENSE
├── README.md
├── environment.yml
└── src
    ├── 11
    ├── GNN_he.py
    ├── GNN_heter.py
    ├── GNN_plot.py
    ├── base_classes.py
    ├── best_log
        ├── amazon-ratingsbelconvconstant1.020230116-211847.txt
        ├── amazon-ratingsgatconvNone1.020230113-171121.txt
        ├── cornellbelconveulerNone20230115-175939.txt
        ├── cornelllapconveulerNone20230115-190111.txt
        ├── minesweeperbelconvattention3.020230118-201621.txt
        ├── minesweepergatconvconstant4.020230118-011000.txt
        ├── questionsbelconvconstant1.020230116-173925.txt
        ├── questionsgatconvNone3.020230113-193655.txt
        ├── roman-empirebelconvconstant1.020230116-170240.txt
        ├── roman-empiregatconvconstant3.020230117-145044.txt
        ├── texasbelconveulerNone20230115-175910.txt
        ├── texaslapconveulerNone20230115-190052.txt
        ├── wiki-coocbelconvconstant1.020230116-202725.txt
        ├── wiki-cooctransconvattention1.020230117-230603.txt
        ├── wisconsinbelconveulerNone20230115-180013.txt
        ├── wisconsinlapconveulerNone20230115-190125.txt
        ├── workersbelconvattention3.020230114-120652.txt
        └── workersgatconvconstant1.020230117-174152.txt
    ├── best_params.py
    ├── best_params_discrete.py
    ├── best_params_graphocn.py
    ├── block_constant.py
    ├── block_transformer_attention.py
    ├── data.py
    ├── discrete_models.py
    ├── early_stop_solver.py
    ├── function_GAT_attention.py
    ├── function_GAT_convection.py
    ├── function_beltrami_convection.py
    ├── function_beltrami_gat.py
    ├── function_beltrami_trans.py
    ├── function_beltrami_van.py
    ├── function_beltramitrans_convection.py
    ├── function_laplacian_convection.py
    ├── function_laplacian_diffusion.py
    ├── function_transformer_attention.py
    ├── function_transformer_convection.py
    ├── graphcon_models.py
    ├── heterophilic.py
    ├── model_configurations.py
    ├── run_GNN_raw.py
    └── utils.py


/HeterophilousDatasets/data/actor.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/actor.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/amazon_ratings.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/amazon_ratings.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/chameleon.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/chameleon.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/chameleon_filtered.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/chameleon_filtered.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/cornell.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/cornell.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/dataread:
--------------------------------------------------------------------------------
1 | Dataset used, copied from 
2 |  - [HeterophilousDatasets](https://github.com/heterophily-submit/HeterophilousDatasets)
3 | 


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/minesweeper.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/minesweeper.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/questions.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/questions.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/roman_empire.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/roman_empire.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/squirrel.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/squirrel.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/squirrel_filtered.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/squirrel_filtered.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/texas.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/texas.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/wiki_cooc.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/wiki_cooc.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/wisconsin.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/wisconsin.npz


--------------------------------------------------------------------------------
/HeterophilousDatasets/data/workers.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/workers.npz


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 zknus
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Graph Neural Convection-Diffusion with Heterophily
 2 | 
 3 | This repository contains the code for our IJCAI 2023 accepted paper, *[Graph Neural Convection-Diffusion with Heterophily](https://arxiv.org/abs/2305.16780)*. 
 4 | 
 5 | ## Table of Contents
 6 | 
 7 | - [Requirements](#requirements)
 8 | - [Datasets](#datasets)
 9 | - [Reproducing Results](#reproducing-results)
10 | - [Reference](#reference)
11 | - [Citation](#citation)
12 | 
13 | ## Requirements
14 | 
15 | To install the required dependencies, refer to the environment.yaml file
16 | 
17 | 
18 | <!-- ## Datasets
19 | 
20 | To reproduce our results in Table 2, you first need to download the datasets.
21 | 
22 | 1. Download the datasets from the following repositories:
23 |     - [HeterophilousDatasets](https://github.com/heterophily-submit/HeterophilousDatasets)
24 |     - [ACM-GNN/new_data](https://github.com/SitaoLuan/ACM-GNN/tree/main/new_data)
25 | 
26 | 2. Update the data path in line 297 of `./src/data.py` with the path to the downloaded datasets. -->
27 | 
28 | ## Reproducing Results
29 | 
30 | To reproduce the results in Table 2, run the following commands:
31 | 
32 | ```bash
33 | python run_GNN_raw.py --dataset amazon-ratings --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant  
34 | 
35 | python run_GNN_raw.py --dataset amazon-ratings --function gatconv --time 1 --epoch 1000 --step_size 0.5 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 2 --hidden_dim 64
36 | 
37 | python run_GNN_raw.py --dataset minesweeper --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001
38 | 
39 | python run_GNN_raw.py --dataset minesweeper --function gatconv --time 4 --epoch 600 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001
40 | 
41 | python run_GNN_raw.py --dataset questions --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant
42 | 
43 | python run_GNN_raw.py --dataset questions --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 3
44 | 
45 | python run_GNN_raw.py --dataset roman-empire --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 256 --block constant
46 | 
47 | python run_GNN_raw.py --dataset roman-empire --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001
48 | 
49 | python run_GNN_raw.py --dataset wiki-cooc --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant
50 | 
51 | python run_GNN_raw.py --dataset wiki-cooc --function transconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001
52 | ```
53 | 
54 | ## Reference 
55 | 
56 | Our code is developed based on the following repo:
57 | https://github.com/twitter-research/graph-neural-pde
58 | 
59 | 
60 | 
61 | ## Citation
62 | 
63 | If you find our helpful, consider to cite us:
64 | ```bash
65 | @inproceedings{zhao2023graph,
66 |   title={Graph neural convection-diffusion with heterophily},
67 |   author={Zhao, K. and Kang, Q. and Song, Y. and She, R. and Wang, S. and Tay, W. P.},
68 |   booktitle={Proc. International Joint Conference on Artificial Intelligence},
69 |   year={2023},
70 |   month={Aug},
71 |   address={Macao, China}
72 | }
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
 1 | name: graph-cde
 2 | channels:
 3 |   - soumith
 4 |   - pytorch
 5 |   - defaults
 6 | dependencies:
 7 |   - blas=1.0
 8 |   - ca-certificates=2020.12.8
 9 |   - certifi=2020.12.5
10 |   - cycler=0.10.0
11 |   - freetype=2.10.4
12 |   - intel-openmp=2020.2
13 |   - joblib=1.0.0
14 |   - jpeg=9b
15 |   - kiwisolver=1.3.0
16 |   - lcms2=2.11
17 |   - libcxx=10.0.0
18 |   - libedit=3.1.20191231
19 |   - libffi=3.3
20 |   - libgfortran
21 |   - libllvm9=9.0.1
22 |   - libpng=1.6.37
23 |   - libtiff=4.1.0
24 |   - libuv=1.40.0
25 |   - llvm-openmp=10.0.0
26 |   - lz4-c=1.9.2
27 |   - matplotlib=3.3.2
28 |   - matplotlib-base=3.3.2
29 |   - mkl=2019.4
30 |   - mkl-service=2.3.0
31 |   - mkl_fft=1.2.0
32 |   - mkl_random=1.1.1
33 |   - ncurses=6.2
34 |   - ninja=1.10.2
35 |   - numba=0.50.1
36 |   - numpy=1.19.2
37 |   - numpy-base=1.19.2
38 |   - olefile=0.46
39 |   - openssl=1.1.1i
40 |   - pillow=8.1.0
41 |   - pip=20.3.3
42 |   - pyparsing=2.4.7
43 |   - python=3.8.5
44 |   - python-dateutil=2.8.1
45 |   - pytorch=1.7.1
46 |   - readline=8.0
47 |   - setuptools=51.1.2
48 |   - six=1.15.0
49 |   - sqlite=3.33.0
50 |   - tbb=2020.3
51 |   - threadpoolctl=2.1.0
52 |   - tk=8.6.10
53 |   - torchvision=0.2.1
54 |   - tornado=6.1
55 |   - typing_extensions=3.7.4.3
56 |   - wheel=0.36.2
57 |   - xz=5.2.5
58 |   - zlib=1.2.11
59 |   - zstd=1.4.5
60 |   - pip:
61 |     - ase==3.20.1
62 |     - boltons==20.2.1
63 |     - chardet==4.0.0
64 |     - decorator==4.4.2
65 |     - et-xmlfile==1.0.1
66 |     - googledrivedownloader==0.4
67 |     - h5py==3.1.0
68 |     - idna==2.10
69 |     - isodate==0.6.0
70 |     - jdcal==1.4.1
71 |     - jinja2==2.11.2
72 |     - littleutils==0.2.2
73 |     - llvmlite==0.33.0
74 |     - markupsafe==1.1.1
75 |     - networkx==2.5
76 |     - ogb==1.2.4
77 |     - openpyxl==3.0.6
78 |     - outdated==0.2.0
79 |     - pandas==1.2.0
80 |     - pykeops==1.4.2
81 |     - python-louvain==0.15
82 |     - pytz==2020.5
83 |     - rdflib==5.0.0
84 |     - requests==2.25.1
85 |     - scikit-learn==0.24.0
86 |     - scipy==1.5.4
87 |     - torch-cluster==1.5.8
88 |     - torch-geometric==1.6.3
89 |     - torch-scatter==2.0.5
90 |     - torch-sparse==0.6.8
91 |     - torch-spline-conv==1.2.0
92 |     - torchdiffeq==0.1.1
93 |     - torchsde==0.2.4
94 |     - tqdm==4.56.0
95 |     - trampoline==0.1.2
96 |     - urllib3==1.26.2
97 | 
98 | 


--------------------------------------------------------------------------------
/src/11:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/src/GNN_he.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | from base_classes import BaseGNN
 5 | from model_configurations import set_block, set_function
 6 | 
 7 | 
 8 | # Define the GNN model.
 9 | class GNNhe(BaseGNN):
10 |   def __init__(self, opt, dataset, device=torch.device('cpu')):
11 |     super(GNNhe, self).__init__(opt, dataset, device)
12 |     self.f = set_function(opt)
13 |     block = set_block(opt)
14 |     time_tensor = torch.tensor([0, self.T]).to(device)
15 |     self.odeblock = block(self.f, opt, dataset.data, device, t=time_tensor).to(device)
16 | 
17 |     if opt["use_mlp"]:
18 |       self.reset_parameters()
19 | 
20 |     self.output_normalization =nn.LayerNorm(opt['hidden_dim'])
21 | 
22 |   def reset_parameters(self):
23 |     torch.nn.init.xavier_normal_(self.m11.weight, gain=1.414)
24 |     torch.nn.init.xavier_normal_(self.m12.weight, gain=1.414)
25 | 
26 |   def forward(self, x, pos_encoding=None):
27 |     # Encode each node based on its feature.
28 | 
29 | 
30 | 
31 |     x = F.dropout(x, self.opt['input_dropout'], training=self.training)
32 |     x = self.m1(x)
33 | 
34 |     if self.opt['use_mlp']:
35 |       x = F.dropout(x, self.opt['dropout'], training=self.training)
36 |       x = F.dropout(x + self.m11(F.relu(x)), self.opt['dropout'], training=self.training)
37 |       x = F.dropout(x + self.m12(F.relu(x)), self.opt['dropout'], training=self.training)
38 |     # todo investigate if some input non-linearity solves the problem with smooth deformations identified in the ANODE paper
39 | 
40 | 
41 | 
42 |     if self.opt['batch_norm']:
43 |       x = self.bn_in(x)
44 | 
45 | 
46 | 
47 |     self.odeblock.set_x0(x)
48 | 
49 | 
50 |     z = self.odeblock(x)
51 | 
52 | 
53 | 
54 |     # Activation.
55 |     z = F.relu(z)
56 | 
57 |     if self.opt['fc_out']:
58 |       z = self.fc(z)
59 |       z = F.relu(z)
60 | 
61 |     # Dropout.
62 |     z = F.dropout(z, self.opt['dropout'], training=self.training)
63 | 
64 |     # Decode each node embedding to get node label.
65 |     # z = self.output_normalization(z)
66 | 
67 |     z = self.m2(z)
68 |     return z
69 | 


--------------------------------------------------------------------------------
/src/GNN_plot.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | from base_classes import BaseGNN
 5 | from model_configurations import set_block, set_function
 6 | 
 7 | 
 8 | # Define the GNN model.
 9 | class GNNplot(BaseGNN):
10 |   def __init__(self, opt, dataset, device=torch.device('cpu')):
11 |     super(GNNplot, self).__init__(opt, dataset, device)
12 |     self.f = set_function(opt)
13 |     block = set_block(opt)
14 |     time_tensor = torch.tensor([0, self.T]).to(device)
15 |     self.odeblock = block(self.f, self.regularization_fns, opt, dataset.data, device, t=time_tensor).to(device)
16 | 
17 |     if opt["use_mlp"]:
18 |       self.reset_parameters()
19 | 
20 |     self.output_normalization =nn.LayerNorm(opt['hidden_dim'])
21 | 
22 |   def reset_parameters(self):
23 |     torch.nn.init.xavier_normal_(self.m11.weight, gain=1.414)
24 |     torch.nn.init.xavier_normal_(self.m12.weight, gain=1.414)
25 | 
26 |   def forward(self, x, pos_encoding=None):
27 |     # Encode each node based on its feature.
28 | 
29 | 
30 |     if self.opt['beltrami']:
31 |       x = F.dropout(x, self.opt['input_dropout'], training=self.training)
32 |       x = self.mx(x)
33 |       p = F.dropout(pos_encoding, self.opt['input_dropout'], training=self.training)
34 |       p = self.mp(p)
35 |       x = torch.cat([x, p], dim=1)
36 |     else:
37 |       x = F.dropout(x, self.opt['input_dropout'], training=self.training)
38 |       x = self.m1(x)
39 | 
40 |     if self.opt['use_mlp']:
41 |       x = F.dropout(x, self.opt['dropout'], training=self.training)
42 |       x = F.dropout(x + self.m11(F.relu(x)), self.opt['dropout'], training=self.training)
43 |       x = F.dropout(x + self.m12(F.relu(x)), self.opt['dropout'], training=self.training)
44 |     # todo investigate if some input non-linearity solves the problem with smooth deformations identified in the ANODE paper
45 | 
46 | 
47 | 
48 |     if self.opt['batch_norm']:
49 |       x = self.bn_in(x)
50 | 
51 | 
52 | 
53 |     self.odeblock.set_x0(x)
54 | 
55 |     if self.training and self.odeblock.nreg > 0:
56 |       z, self.reg_states = self.odeblock(x)
57 |     else:
58 |       z, att, edge= self.odeblock(x)
59 | 
60 | 
61 | 
62 |     # Activation.
63 |     z = F.relu(z)
64 | 
65 |     if self.opt['fc_out']:
66 |       z = self.fc(z)
67 |       z = F.relu(z)
68 | 
69 |     # Dropout.
70 |     z = F.dropout(z, self.opt['dropout'], training=self.training)
71 | 
72 |     # Decode each node embedding to get node label.
73 |     # z = self.output_normalization(z)
74 | 
75 |     z = self.m2(z)
76 |     return z,att, edge
77 | 


--------------------------------------------------------------------------------
/src/base_classes.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.nn.conv import MessagePassing
  4 | from utils import Meter
  5 | 
  6 | 
  7 | 
  8 | 
  9 | 
 10 | 
 11 | 
 12 | class ODEblock(nn.Module):
 13 |   def __init__(self, odefunc, opt, data, device, t):
 14 |     super(ODEblock, self).__init__()
 15 |     self.opt = opt
 16 |     self.t = t
 17 |     
 18 |     self.aug_dim = 1
 19 |     self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device)
 20 |     
 21 | 
 22 | 
 23 | 
 24 |     if opt['adjoint']:
 25 |       from torchdiffeq import odeint_adjoint as odeint
 26 |     else:
 27 |       from torchdiffeq import odeint
 28 |     self.train_integrator = odeint
 29 |     self.test_integrator = None
 30 |     self.set_tol()
 31 | 
 32 |   def set_x0(self, x0):
 33 |     self.odefunc.x0 = x0.clone().detach()
 34 | 
 35 | 
 36 |   def set_tol(self):
 37 |     self.atol = self.opt['tol_scale'] * 1e-7
 38 |     self.rtol = self.opt['tol_scale'] * 1e-9
 39 |     if self.opt['adjoint']:
 40 |       self.atol_adjoint = self.opt['tol_scale_adjoint'] * 1e-7
 41 |       self.rtol_adjoint = self.opt['tol_scale_adjoint'] * 1e-9
 42 | 
 43 |   def reset_tol(self):
 44 |     self.atol = 1e-7
 45 |     self.rtol = 1e-9
 46 |     self.atol_adjoint = 1e-7
 47 |     self.rtol_adjoint = 1e-9
 48 | 
 49 |   def set_time(self, time):
 50 |     self.t = torch.tensor([0, time]).to(self.device)
 51 | 
 52 |   def __repr__(self):
 53 |     return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \
 54 |            + ")"
 55 | 
 56 | 
 57 | class ODEFunc(MessagePassing):
 58 | 
 59 |   # currently requires in_features = out_features
 60 |   def __init__(self, opt, data, device):
 61 |     super(ODEFunc, self).__init__()
 62 |     self.opt = opt
 63 |     self.device = device
 64 |     self.edge_index = None
 65 |     self.edge_weight = None
 66 |     self.attention_weights = None
 67 |     self.alpha_train = nn.Parameter(torch.tensor(0.0))
 68 |     self.beta_train = nn.Parameter(torch.tensor(0.0))
 69 |     self.x0 = None
 70 |     self.nfe = 0
 71 |     self.alpha_sc = nn.Parameter(torch.ones(1))
 72 |     self.beta_sc = nn.Parameter(torch.ones(1))
 73 | 
 74 |   def __repr__(self):
 75 |     return self.__class__.__name__
 76 | 
 77 | 
 78 | class BaseGNN(MessagePassing):
 79 |   def __init__(self, opt, dataset, device=torch.device('cpu')):
 80 |     super(BaseGNN, self).__init__()
 81 |     self.opt = opt
 82 |     self.T = opt['time']
 83 |     self.num_classes = dataset.num_classes
 84 |     self.num_features = dataset.data.num_features
 85 |     self.num_nodes = dataset.data.num_nodes
 86 |     self.device = device
 87 |     self.fm = Meter()
 88 |     self.bm = Meter()
 89 | 
 90 | 
 91 |     self.m1 = nn.Linear(self.num_features, opt['hidden_dim'])
 92 | 
 93 |     if self.opt['use_mlp']:
 94 |       self.m11 = nn.Linear(opt['hidden_dim'], opt['hidden_dim'])
 95 |       self.m12 = nn.Linear(opt['hidden_dim'], opt['hidden_dim'])
 96 | 
 97 |     self.hidden_dim = opt['hidden_dim']
 98 |     if opt['fc_out']:
 99 |       self.fc = nn.Linear(opt['hidden_dim'], opt['hidden_dim'])
100 |     self.m2 = nn.Linear(opt['hidden_dim'], dataset.num_classes)
101 |     if self.opt['batch_norm']:
102 |       self.bn_in = torch.nn.BatchNorm1d(opt['hidden_dim'])
103 |       self.bn_out = torch.nn.BatchNorm1d(opt['hidden_dim'])
104 | 
105 | 
106 | 
107 |   def getNFE(self):
108 |     return self.odeblock.odefunc.nfe
109 | 
110 |   def resetNFE(self):
111 |     self.odeblock.odefunc.nfe = 0
112 | 
113 | 
114 |   def reset(self):
115 |     self.m1.reset_parameters()
116 |     self.m2.reset_parameters()
117 | 
118 |   def __repr__(self):
119 |     return self.__class__.__name__
120 | 


--------------------------------------------------------------------------------
/src/best_log/amazon-ratingsbelconvconstant1.020230116-211847.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset amazon-ratings --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant"
  2 | 0.4487996080352768
  3 | 0.45141270618977625
  4 | 0.45843540747999345
  5 | 0.45337252980565085
  6 | 0.467254613751429
  7 | 0.4515760248244325
  8 | 0.4484729707659644
  9 | 0.4486362894006206
 10 | 0.44569655397680874
 11 | 0.4487996080352768
 12 | 45.2245631226523,0.5999131633978108
 13 | train acc list: [0.8871468234525559, 0.8638739180140454, 0.8897599216070554, 0.8525232729054385, 0.8975175567532255, 0.8975175567532255, 0.8733463988241058, 0.8837171321247754, 0.8739996733627307, 0.8966193042626164]
 14 | val acc list: [0.4515760248244325, 0.4551690347868692, 0.46153846153846156, 0.4613751429038053, 0.4669279764821166, 0.45337252980565085, 0.46088518699983666, 0.4556589906908378, 0.45827208884533727, 0.44308345582230935]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "amazon-ratings",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/amazon-ratingsgatconvNone1.020230113-171121.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset amazon-ratings --function gatconv --time 1 --epoch 1000 --step_size 0.5 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 2 --hidden_dim 64"
  2 | 0.47885023681202027
  3 | 0.4750939082149273
  4 | 0.48097337906255105
  5 | 0.4700310305405847
  6 | 0.4783602809080516
  7 | 0.4700310305405847
  8 | 0.4775436877347705
  9 | 0.4713375796178344
 10 | 0.4781969622733954
 11 | 0.4827698840437694
 12 | 47.63187979748489,0.42926711688506275
 13 | train acc list: [0.772823779193206, 0.7094561489465948, 0.7701290217213784, 0.7486526212640863, 0.7634329577004736, 0.7396700963579944, 0.7581251020741466, 0.769802384452066, 0.776661767107627, 0.7822962600032664]
 14 | val acc list: [0.48554630083292505, 0.4804834231585824, 0.4829332026784256, 0.4791768740813327, 0.4817899722358321, 0.4829332026784256, 0.48815939898742444, 0.47770700636942676, 0.4803201045239262, 0.4829332026784256]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 2,
 18 |   "dataset": "amazon-ratings",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 0,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": true,
 27 |   "edge_homo": 0.1,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "gatconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 0.5,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/cornellbelconveulerNone20230115-175939.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_sweep.py --dataset cornell --function belconv --method euler --no_early --cuda 2 --epoch 600"
  2 | {
  3 |   "test_result": 77.83783783783784,
  4 |   "test_std": 6.922296472900378,
  5 |   "dropout": 0,
  6 |   "weight_decay": 0.0001,
  7 |   "lr": 0.005,
  8 |   "runtime_average": 5.765610194206237,
  9 |   "time": 0.2,
 10 |   "step_size": 0.2,
 11 |   "hidden_dim": 16
 12 | }
 13 | {
 14 |   "test_result": 81.62162162162163,
 15 |   "test_std": 7.2319395460862985,
 16 |   "dropout": 0,
 17 |   "weight_decay": 0.0001,
 18 |   "lr": 0.005,
 19 |   "runtime_average": 5.342220187187195,
 20 |   "time": 0.2,
 21 |   "step_size": 0.2,
 22 |   "hidden_dim": 64
 23 | }
 24 | {
 25 |   "test_result": 82.16216216216216,
 26 |   "test_std": 6.751349187457728,
 27 |   "dropout": 0.2,
 28 |   "weight_decay": 0.0001,
 29 |   "lr": 0.005,
 30 |   "runtime_average": 44.54893562793732,
 31 |   "time": 0.2,
 32 |   "step_size": 0.5,
 33 |   "hidden_dim": 32
 34 | }
 35 | {
 36 |   "test_result": 82.70270270270268,
 37 |   "test_std": 6.964377690121693,
 38 |   "dropout": 0.4,
 39 |   "weight_decay": 0.0001,
 40 |   "lr": 0.005,
 41 |   "runtime_average": 56.194859743118286,
 42 |   "time": 0.2,
 43 |   "step_size": 0.5,
 44 |   "hidden_dim": 256
 45 | }
 46 | {
 47 |   "test_result": 83.24324324324323,
 48 |   "test_std": 4.954135886438745,
 49 |   "dropout": 0.4,
 50 |   "weight_decay": 0.0001,
 51 |   "lr": 0.005,
 52 |   "runtime_average": 91.36924576759338,
 53 |   "time": 1,
 54 |   "step_size": 0.2,
 55 |   "hidden_dim": 64
 56 | }
 57 | {
 58 |   "test_result": 83.51351351351352,
 59 |   "test_std": 4.750917792228907,
 60 |   "dropout": 0.4,
 61 |   "weight_decay": 0.001,
 62 |   "lr": 0.005,
 63 |   "runtime_average": 18.978949880599977,
 64 |   "time": 1,
 65 |   "step_size": 0.2,
 66 |   "hidden_dim": 64
 67 | }
 68 | {
 69 |   "test_result": 84.32432432432431,
 70 |   "test_std": 6.139360373838138,
 71 |   "dropout": 0.4,
 72 |   "weight_decay": 0.001,
 73 |   "lr": 0.005,
 74 |   "runtime_average": 9.309138369560241,
 75 |   "time": 1,
 76 |   "step_size": 0.5,
 77 |   "hidden_dim": 256
 78 | }
 79 | {
 80 |   "test_result": 84.59459459459458,
 81 |   "test_std": 6.948627098476995,
 82 |   "dropout": 0.2,
 83 |   "weight_decay": 0.01,
 84 |   "lr": 0.005,
 85 |   "runtime_average": 5.897371053695679,
 86 |   "time": 0.2,
 87 |   "step_size": 0.2,
 88 |   "hidden_dim": 128
 89 | }
 90 | {
 91 |   "test_result": 84.86486486486486,
 92 |   "test_std": 6.41856329029077,
 93 |   "dropout": 0.4,
 94 |   "weight_decay": 0.01,
 95 |   "lr": 0.005,
 96 |   "runtime_average": 12.7184077501297,
 97 |   "time": 1.5,
 98 |   "step_size": 0.5,
 99 |   "hidden_dim": 256
100 | }
101 | {
102 |   "test_result": 85.13513513513513,
103 |   "test_std": 5.952085282579796,
104 |   "dropout": 0.6,
105 |   "weight_decay": 0.01,
106 |   "lr": 0.005,
107 |   "runtime_average": 33.56719207763672,
108 |   "time": 1.5,
109 |   "step_size": 0.2,
110 |   "hidden_dim": 128
111 | }
112 | 


--------------------------------------------------------------------------------
/src/best_log/cornelllapconveulerNone20230115-190111.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_sweep.py --dataset cornell --function lapconv --method euler --no_early --cuda 1 --epoch 600"
  2 | {
  3 |   "test_result": 79.1891891891892,
  4 |   "test_std": 5.545482305049514,
  5 |   "dropout": 0,
  6 |   "weight_decay": 0.0001,
  7 |   "lr": 0.005,
  8 |   "runtime_average": 20.32029731273651,
  9 |   "time": 0.2,
 10 |   "step_size": 0.2,
 11 |   "hidden_dim": 16
 12 | }
 13 | {
 14 |   "test_result": 79.45945945945945,
 15 |   "test_std": 5.565205481614593,
 16 |   "dropout": 0,
 17 |   "weight_decay": 0.0001,
 18 |   "lr": 0.005,
 19 |   "runtime_average": 21.358316016197204,
 20 |   "time": 0.2,
 21 |   "step_size": 0.2,
 22 |   "hidden_dim": 64
 23 | }
 24 | {
 25 |   "test_result": 79.72972972972973,
 26 |   "test_std": 5.952085282579798,
 27 |   "dropout": 0.4,
 28 |   "weight_decay": 0.0001,
 29 |   "lr": 0.005,
 30 |   "runtime_average": 32.16159210205078,
 31 |   "time": 0.5,
 32 |   "step_size": 0.2,
 33 |   "hidden_dim": 64
 34 | }
 35 | {
 36 |   "test_result": 79.72972972972974,
 37 |   "test_std": 6.073568933579522,
 38 |   "dropout": 0.6,
 39 |   "weight_decay": 0.0001,
 40 |   "lr": 0.005,
 41 |   "runtime_average": 20.645365118980408,
 42 |   "time": 1,
 43 |   "step_size": 1,
 44 |   "hidden_dim": 64
 45 | }
 46 | {
 47 |   "test_result": 81.35135135135134,
 48 |   "test_std": 7.591660489290884,
 49 |   "dropout": 0,
 50 |   "weight_decay": 0.001,
 51 |   "lr": 0.005,
 52 |   "runtime_average": 25.48646306991577,
 53 |   "time": 0.2,
 54 |   "step_size": 0.2,
 55 |   "hidden_dim": 16
 56 | }
 57 | {
 58 |   "test_result": 83.24324324324323,
 59 |   "test_std": 8.529585858410544,
 60 |   "dropout": 0,
 61 |   "weight_decay": 0.001,
 62 |   "lr": 0.005,
 63 |   "runtime_average": 13.805762386322021,
 64 |   "time": 0.2,
 65 |   "step_size": 0.2,
 66 |   "hidden_dim": 256
 67 | }
 68 | {
 69 |   "test_result": 83.78378378378378,
 70 |   "test_std": 5.538892305924107,
 71 |   "dropout": 0.4,
 72 |   "weight_decay": 0.001,
 73 |   "lr": 0.005,
 74 |   "runtime_average": 7.33641140460968,
 75 |   "time": 0.2,
 76 |   "step_size": 0.2,
 77 |   "hidden_dim": 128
 78 | }
 79 | {
 80 |   "test_result": 84.05405405405405,
 81 |   "test_std": 5.853083196677764,
 82 |   "dropout": 0,
 83 |   "weight_decay": 0.01,
 84 |   "lr": 0.005,
 85 |   "runtime_average": 10.523886942863465,
 86 |   "time": 0.5,
 87 |   "step_size": 0.2,
 88 |   "hidden_dim": 256
 89 | }
 90 | {
 91 |   "test_result": 84.32432432432431,
 92 |   "test_std": 7.130219436904283,
 93 |   "dropout": 0.2,
 94 |   "weight_decay": 0.01,
 95 |   "lr": 0.005,
 96 |   "runtime_average": 10.521282386779784,
 97 |   "time": 0.5,
 98 |   "step_size": 0.2,
 99 |   "hidden_dim": 256
100 | }
101 | {
102 |   "test_result": 85.40540540540539,
103 |   "test_std": 5.565205481614596,
104 |   "dropout": 0.2,
105 |   "weight_decay": 0.01,
106 |   "lr": 0.005,
107 |   "runtime_average": 13.455496621131896,
108 |   "time": 1,
109 |   "step_size": 0.2,
110 |   "hidden_dim": 64
111 | }
112 | {
113 |   "test_result": 86.21621621621621,
114 |   "test_std": 5.049065322234977,
115 |   "dropout": 0.6,
116 |   "weight_decay": 0.01,
117 |   "lr": 0.005,
118 |   "runtime_average": 13.325262236595155,
119 |   "time": 1,
120 |   "step_size": 0.2,
121 |   "hidden_dim": 128
122 | }
123 | 


--------------------------------------------------------------------------------
/src/best_log/minesweeperbelconvattention3.020230118-201621.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset minesweeper --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001"
  2 | 0.928993
  3 | 0.9452619999999999
  4 | 0.9367850000000001
  5 | 0.933376
  6 | 0.9373509999999999
  7 | 0.9464109999999999
  8 | 0.937468
  9 | 0.9464170000000001
 10 | 0.940732
 11 | 0.9455669999999999
 12 | 93.98362,0.5744757067100388
 13 | train acc list: [0.9491600000000001, 0.96114225, 0.9527565, 0.9618617500000001, 0.954907, 0.96515775, 0.95813275, 0.9605132499999999, 0.95401125, 0.9673325]
 14 | val acc list: [0.920393, 0.9448479999999999, 0.943635, 0.9386559999999999, 0.941713, 0.944002, 0.94882, 0.946468, 0.9447450000000001, 0.946731]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "minesweeper",
 19 |   "data_norm": "gcn",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.2,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "attention",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 3.0,
 47 |   "augment": false,
 48 |   "method": "rk4",
 49 |   "step_size": 1,
 50 |   "max_iters": 1000,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/minesweepergatconvconstant4.020230118-011000.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset minesweeper --function gatconv --time 4 --epoch 600 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001"
  2 | 0.977259
  3 | 0.9743330000000001
  4 | 0.979973
  5 | 0.9754499999999999
  6 | 0.973902
  7 | 0.9802029999999999
  8 | 0.9739869999999998
  9 | 0.975565
 10 | 0.9778269999999999
 11 | 0.978157
 12 | 97.66655999999999,0.22432064193916582
 13 | train acc list: [0.9898334999999999, 0.9858706249999999, 0.9888985000000001, 0.99088775, 0.9895455, 0.99146575, 0.98837625, 0.9845142499999999, 0.9895122500000001, 0.985397]
 14 | val acc list: [0.9770789999999999, 0.976502, 0.97763, 0.9750365, 0.976732, 0.974698, 0.978675, 0.9753859999999999, 0.9783069999999999, 0.974014]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 2,
 18 |   "dataset": "minesweeper",
 19 |   "data_norm": "gcn",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.2,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 600,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "gatconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 4.0,
 47 |   "augment": false,
 48 |   "method": "rk4",
 49 |   "step_size": 1,
 50 |   "max_iters": 1000,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/questionsbelconvconstant1.020230116-173925.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset questions --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant"
  2 | 0.72697069328968
  3 | 0.730413937369115
  4 | 0.7180077070668123
  5 | 0.6988274314318104
  6 | 0.7368228321277093
  7 | 0.7192944501268734
  8 | 0.7013276103705995
  9 | 0.7389217264014369
 10 | 0.7124791911504955
 11 | 0.7278471470230358
 12 | 72.10912726357567,1.3060300210075217
 13 | train acc list: [0.7822344122519902, 0.755272760334586, 0.7941171801488204, 0.8998120984361742, 0.7753958632792431, 0.8778854868411179, 0.7713146759491771, 0.7832984950556777, 0.8427026075310715, 0.7954768543373223]
 14 | val acc list: [0.7175811209439529, 0.7409935980696073, 0.7143170023494911, 0.7131849748021406, 0.7310133984494513, 0.7104989349358364, 0.7281456338141998, 0.7386065843478863, 0.7438031738334805, 0.7336534875800242]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "questions",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/questionsgatconvNone3.020230113-193655.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset questions --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 3"
  2 | 0.7594055999759876
  3 | 0.7609594813314893
  4 | 0.7525871085569683
  5 | 0.7374343410088454
  6 | 0.7515671574592078
  7 | 0.7651656973186889
  8 | 0.7456307765481669
  9 | 0.7637708983188989
 10 | 0.7425674599234835
 11 | 0.7378839968691484
 12 | 75.16972517310884,0.9937706575189349
 13 | train acc list: [0.8224468478141651, 0.8624602693544384, 0.8374133083952455, 0.8454756997962234, 0.8677887074335129, 0.817008988102454, 0.844050072447454, 0.7853427543886994, 0.8247994562111425, 0.8639757200006927]
 14 | val acc list: [0.7626469009230556, 0.7657107066368795, 0.7587769946140657, 0.7613943392849927, 0.7663074889308372, 0.7474302801493976, 0.7581697059961092, 0.7521627441132837, 0.7654696384554549, 0.7681211575429057]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 3,
 18 |   "dataset": "questions",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 0,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.1,
 28 |   "hidden_dim": 16,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "gatconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 3.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/roman-empirebelconvconstant1.020230116-170240.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset roman-empire --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 256 --block constant"
  2 | 0.8512177903282739
  3 | 0.8600423579244617
  4 | 0.854041651959054
  5 | 0.8513942816801977
  6 | 0.8512177903282739
  7 | 0.8621602541475468
  8 | 0.8552770914225203
  9 | 0.8469819978821038
 10 | 0.8579244617013766
 11 | 0.8487469114013413
 12 | 85.3900458877515,0.4681381126880635
 13 | train acc list: [0.9969993822257524, 0.9928514694201748, 0.9971758891536493, 0.9964698614420616, 0.9984996911128762, 0.9956755802665255, 0.9971758891536493, 0.9949695525549378, 0.9973523960815462, 0.9952343129467831]
 14 | val acc list: [0.8593115622241836, 0.8619593998234775, 0.8527802294792586, 0.863901147396293, 0.8614298323036187, 0.8564872021182701, 0.8570167696381289, 0.8570167696381289, 0.8533097969991174, 0.8571932921447485]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "roman-empire",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 256,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/roman-empiregatconvconstant3.020230117-145044.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset roman-empire --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001"
  2 | 0.9174020472996823
  3 | 0.9193434521708437
  4 | 0.9191669608189199
  5 | 0.9126367807977409
  6 | 0.9179315213554535
  7 | 0.9196964348746911
  8 | 0.9174020472996823
  9 | 0.9135192375573596
 10 | 0.9114013413342746
 11 | 0.9152841510765972
 12 | 91.63783974585246,0.2832452516386811
 13 | train acc list: [0.9968228752978554, 0.9972641426175978, 0.9974406495454946, 0.9968228752978554, 0.9984114376489277, 0.9958520871944224, 0.9940870179154532, 0.9971758891536493, 0.9887918100785456, 0.9973523960815462]
 14 | val acc list: [0.9205648720211828, 0.9214474845542807, 0.918270079435128, 0.9276257722859664, 0.9251544571932921, 0.9205648720211828, 0.918270079435128, 0.9210944395410415, 0.91738746690203, 0.9198587819947043]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 2,
 18 |   "dataset": "roman-empire",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "gatconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 3.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/texasbelconveulerNone20230115-175910.txt:
--------------------------------------------------------------------------------
 1 | "run_GNN_sweep.py --dataset texas --function belconv --method euler --no_early --cuda 3 --epoch 600"
 2 | {
 3 |   "test_result": 79.45945945945945,
 4 |   "test_std": 4.391372110614032,
 5 |   "dropout": 0,
 6 |   "weight_decay": 0.0001,
 7 |   "lr": 0.005,
 8 |   "runtime_average": 5.350853610038757,
 9 |   "time": 0.2,
10 |   "step_size": 0.2,
11 |   "hidden_dim": 16
12 | }
13 | {
14 |   "test_result": 80.0,
15 |   "test_std": 4.391372110614035,
16 |   "dropout": 0,
17 |   "weight_decay": 0.0001,
18 |   "lr": 0.005,
19 |   "runtime_average": 5.039187932014466,
20 |   "time": 0.2,
21 |   "step_size": 0.2,
22 |   "hidden_dim": 32
23 | }
24 | {
25 |   "test_result": 84.05405405405403,
26 |   "test_std": 4.750917792228907,
27 |   "dropout": 0,
28 |   "weight_decay": 0.0001,
29 |   "lr": 0.005,
30 |   "runtime_average": 5.045219922065735,
31 |   "time": 0.2,
32 |   "step_size": 0.2,
33 |   "hidden_dim": 64
34 | }
35 | {
36 |   "test_result": 85.94594594594594,
37 |   "test_std": 4.490066952928694,
38 |   "dropout": 0,
39 |   "weight_decay": 0.0001,
40 |   "lr": 0.005,
41 |   "runtime_average": 5.194399738311768,
42 |   "time": 0.2,
43 |   "step_size": 0.2,
44 |   "hidden_dim": 128
45 | }
46 | {
47 |   "test_result": 86.21621621621621,
48 |   "test_std": 4.594594594594594,
49 |   "dropout": 0.2,
50 |   "weight_decay": 0.0001,
51 |   "lr": 0.005,
52 |   "runtime_average": 37.9852658033371,
53 |   "time": 0.2,
54 |   "step_size": 0.5,
55 |   "hidden_dim": 128
56 | }
57 | {
58 |   "test_result": 86.48648648648648,
59 |   "test_std": 5.268537483680524,
60 |   "dropout": 0.2,
61 |   "weight_decay": 0.0001,
62 |   "lr": 0.005,
63 |   "runtime_average": 30.911461496353148,
64 |   "time": 0.5,
65 |   "step_size": 1,
66 |   "hidden_dim": 128
67 | }
68 | {
69 |   "test_result": 87.56756756756756,
70 |   "test_std": 3.2432432432432456,
71 |   "dropout": 0.4,
72 |   "weight_decay": 0.0001,
73 |   "lr": 0.005,
74 |   "runtime_average": 60.836114573478696,
75 |   "time": 0.5,
76 |   "step_size": 0.2,
77 |   "hidden_dim": 64
78 | }
79 | 


--------------------------------------------------------------------------------
/src/best_log/texaslapconveulerNone20230115-190052.txt:
--------------------------------------------------------------------------------
 1 | "run_GNN_sweep.py --dataset texas --function lapconv --method euler --no_early --cuda 3 --epoch 600"
 2 | {
 3 |   "test_result": 80.54054054054055,
 4 |   "test_std": 6.019204716572999,
 5 |   "dropout": 0,
 6 |   "weight_decay": 0.0001,
 7 |   "lr": 0.005,
 8 |   "runtime_average": 9.118959641456604,
 9 |   "time": 0.2,
10 |   "step_size": 0.2,
11 |   "hidden_dim": 16
12 | }
13 | {
14 |   "test_result": 80.8108108108108,
15 |   "test_std": 7.875028261801876,
16 |   "dropout": 0,
17 |   "weight_decay": 0.0001,
18 |   "lr": 0.005,
19 |   "runtime_average": 8.814443945884705,
20 |   "time": 0.2,
21 |   "step_size": 0.2,
22 |   "hidden_dim": 32
23 | }
24 | {
25 |   "test_result": 83.78378378378378,
26 |   "test_std": 3.6260561797293906,
27 |   "dropout": 0,
28 |   "weight_decay": 0.0001,
29 |   "lr": 0.005,
30 |   "runtime_average": 8.797356653213502,
31 |   "time": 0.2,
32 |   "step_size": 0.2,
33 |   "hidden_dim": 64
34 | }
35 | {
36 |   "test_result": 84.32432432432431,
37 |   "test_std": 4.954135886438749,
38 |   "dropout": 0.8,
39 |   "weight_decay": 0.0001,
40 |   "lr": 0.005,
41 |   "runtime_average": 24.91836953163147,
42 |   "time": 0.5,
43 |   "step_size": 0.2,
44 |   "hidden_dim": 256
45 | }
46 | {
47 |   "test_result": 85.40540540540539,
48 |   "test_std": 4.221756581571168,
49 |   "dropout": 0,
50 |   "weight_decay": 0.001,
51 |   "lr": 0.005,
52 |   "runtime_average": 25.02802336215973,
53 |   "time": 0.5,
54 |   "step_size": 0.2,
55 |   "hidden_dim": 128
56 | }
57 | {
58 |   "test_result": 85.94594594594594,
59 |   "test_std": 5.51245352820842,
60 |   "dropout": 0.6,
61 |   "weight_decay": 0.001,
62 |   "lr": 0.005,
63 |   "runtime_average": 22.686385536193846,
64 |   "time": 0.5,
65 |   "step_size": 0.2,
66 |   "hidden_dim": 256
67 | }
68 | {
69 |   "test_result": 86.21621621621621,
70 |   "test_std": 3.2990690853334352,
71 |   "dropout": 0.8,
72 |   "weight_decay": 0.001,
73 |   "lr": 0.005,
74 |   "runtime_average": 23.397251343727113,
75 |   "time": 0.5,
76 |   "step_size": 0.2,
77 |   "hidden_dim": 256
78 | }
79 | 


--------------------------------------------------------------------------------
/src/best_log/wiki-coocbelconvconstant1.020230116-202725.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset wiki-cooc --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant"
  2 | 0.9796
  3 | 0.9784
  4 | 0.9808
  5 | 0.9816
  6 | 0.9716
  7 | 0.9772
  8 | 0.9748
  9 | 0.984
 10 | 0.9796
 11 | 0.9712
 12 | 97.788,0.40001999950002803
 13 | train acc list: [0.9994, 0.9996, 0.9992, 0.9992, 0.9996, 0.9986, 0.9978, 0.9996, 0.9992, 0.9998]
 14 | val acc list: [0.9784, 0.9816, 0.9768, 0.978, 0.9804, 0.9828, 0.9832, 0.9812, 0.9816, 0.9796]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "wiki-cooc",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": false,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/wiki-cooctransconvattention1.020230117-230603.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset wiki-cooc --function transconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001"
  2 | 0.9812
  3 | 0.9796
  4 | 0.9884
  5 | 0.9804
  6 | 0.974
  7 | 0.9812
  8 | 0.9764
  9 | 0.982
 10 | 0.9804
 11 | 0.9804
 12 | 98.03999999999999,0.35417509793885704
 13 | train acc list: [0.9978, 0.9982, 0.9988, 0.9994, 0.998, 0.9986, 1.0, 1.0, 0.9984, 0.999]
 14 | val acc list: [0.9816, 0.9836, 0.9808, 0.9804, 0.9848, 0.9872, 0.9876, 0.984, 0.9832, 0.9828]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 1,
 18 |   "dataset": "wiki-cooc",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "attention",
 42 |   "function": "transconv",
 43 |   "use_mlp": true,
 44 |   "add_source": false,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/wisconsinbelconveulerNone20230115-180013.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_sweep.py --dataset wisconsin --function belconv --method euler --no_early --cuda 1 --epoch 600"
  2 | {
  3 |   "test_result": 83.52941176470588,
  4 |   "test_std": 4.13162892268735,
  5 |   "dropout": 0,
  6 |   "weight_decay": 0.0001,
  7 |   "lr": 0.005,
  8 |   "runtime_average": 5.726944208145142,
  9 |   "time": 0.2,
 10 |   "step_size": 0.2,
 11 |   "hidden_dim": 16
 12 | }
 13 | {
 14 |   "test_result": 84.11764705882354,
 15 |   "test_std": 4.995191844257645,
 16 |   "dropout": 0,
 17 |   "weight_decay": 0.0001,
 18 |   "lr": 0.005,
 19 |   "runtime_average": 5.316670346260071,
 20 |   "time": 0.2,
 21 |   "step_size": 0.2,
 22 |   "hidden_dim": 64
 23 | }
 24 | {
 25 |   "test_result": 84.90196078431373,
 26 |   "test_std": 4.475573415887581,
 27 |   "dropout": 0,
 28 |   "weight_decay": 0.0001,
 29 |   "lr": 0.005,
 30 |   "runtime_average": 5.399200391769409,
 31 |   "time": 0.2,
 32 |   "step_size": 0.5,
 33 |   "hidden_dim": 32
 34 | }
 35 | {
 36 |   "test_result": 85.29411764705881,
 37 |   "test_std": 3.7460731714789826,
 38 |   "dropout": 0,
 39 |   "weight_decay": 0.0001,
 40 |   "lr": 0.005,
 41 |   "runtime_average": 5.319157719612122,
 42 |   "time": 0.2,
 43 |   "step_size": 1,
 44 |   "hidden_dim": 64
 45 | }
 46 | {
 47 |   "test_result": 85.29411764705883,
 48 |   "test_std": 4.318179518734362,
 49 |   "dropout": 0.2,
 50 |   "weight_decay": 0.0001,
 51 |   "lr": 0.005,
 52 |   "runtime_average": 31.17937104701996,
 53 |   "time": 0.2,
 54 |   "step_size": 0.2,
 55 |   "hidden_dim": 256
 56 | }
 57 | {
 58 |   "test_result": 85.88235294117648,
 59 |   "test_std": 5.3912655234774585,
 60 |   "dropout": 0.2,
 61 |   "weight_decay": 0.0001,
 62 |   "lr": 0.005,
 63 |   "runtime_average": 30.308173513412477,
 64 |   "time": 0.2,
 65 |   "step_size": 0.5,
 66 |   "hidden_dim": 256
 67 | }
 68 | {
 69 |   "test_result": 86.47058823529413,
 70 |   "test_std": 4.50980392156863,
 71 |   "dropout": 0.2,
 72 |   "weight_decay": 0.0001,
 73 |   "lr": 0.005,
 74 |   "runtime_average": 132.80444235801696,
 75 |   "time": 1.5,
 76 |   "step_size": 0.2,
 77 |   "hidden_dim": 128
 78 | }
 79 | {
 80 |   "test_result": 86.66666666666667,
 81 |   "test_std": 3.802101848953985,
 82 |   "dropout": 0.4,
 83 |   "weight_decay": 0.0001,
 84 |   "lr": 0.005,
 85 |   "runtime_average": 37.766193342208865,
 86 |   "time": 0.5,
 87 |   "step_size": 0.5,
 88 |   "hidden_dim": 256
 89 | }
 90 | {
 91 |   "test_result": 87.84313725490196,
 92 |   "test_std": 4.866538684702295,
 93 |   "dropout": 0.4,
 94 |   "weight_decay": 0.0001,
 95 |   "lr": 0.005,
 96 |   "runtime_average": 75.86288826465606,
 97 |   "time": 1,
 98 |   "step_size": 0.2,
 99 |   "hidden_dim": 64
100 | }
101 | 


--------------------------------------------------------------------------------
/src/best_log/wisconsinlapconveulerNone20230115-190125.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_sweep.py --dataset wisconsin --function lapconv --method euler --no_early --cuda 0 --epoch 600"
  2 | {
  3 |   "test_result": 82.35294117647058,
  4 |   "test_std": 5.333910003425664,
  5 |   "dropout": 0,
  6 |   "weight_decay": 0.0001,
  7 |   "lr": 0.005,
  8 |   "runtime_average": 18.909467649459838,
  9 |   "time": 0.2,
 10 |   "step_size": 0.2,
 11 |   "hidden_dim": 16
 12 | }
 13 | {
 14 |   "test_result": 84.90196078431373,
 15 |   "test_std": 3.6208206495332176,
 16 |   "dropout": 0,
 17 |   "weight_decay": 0.0001,
 18 |   "lr": 0.005,
 19 |   "runtime_average": 18.475360369682313,
 20 |   "time": 0.2,
 21 |   "step_size": 0.2,
 22 |   "hidden_dim": 32
 23 | }
 24 | {
 25 |   "test_result": 85.09803921568627,
 26 |   "test_std": 4.898037645802665,
 27 |   "dropout": 0,
 28 |   "weight_decay": 0.0001,
 29 |   "lr": 0.005,
 30 |   "runtime_average": 23.430826544761658,
 31 |   "time": 1.5,
 32 |   "step_size": 1,
 33 |   "hidden_dim": 128
 34 | }
 35 | {
 36 |   "test_result": 85.88235294117646,
 37 |   "test_std": 3.594177015651642,
 38 |   "dropout": 0.4,
 39 |   "weight_decay": 0.0001,
 40 |   "lr": 0.005,
 41 |   "runtime_average": 16.001456832885744,
 42 |   "time": 1,
 43 |   "step_size": 1,
 44 |   "hidden_dim": 128
 45 | }
 46 | {
 47 |   "test_result": 86.07843137254902,
 48 |   "test_std": 1.849800221971885,
 49 |   "dropout": 0.4,
 50 |   "weight_decay": 0.0001,
 51 |   "lr": 0.005,
 52 |   "runtime_average": 35.787513732910156,
 53 |   "time": 1.5,
 54 |   "step_size": 0.2,
 55 |   "hidden_dim": 256
 56 | }
 57 | {
 58 |   "test_result": 86.27450980392157,
 59 |   "test_std": 2.90831313219438,
 60 |   "dropout": 0,
 61 |   "weight_decay": 0.001,
 62 |   "lr": 0.005,
 63 |   "runtime_average": 18.373980784416197,
 64 |   "time": 0.2,
 65 |   "step_size": 0.2,
 66 |   "hidden_dim": 64
 67 | }
 68 | {
 69 |   "test_result": 86.47058823529412,
 70 |   "test_std": 4.594264515239208,
 71 |   "dropout": 0.2,
 72 |   "weight_decay": 0.01,
 73 |   "lr": 0.005,
 74 |   "runtime_average": 13.467104196548462,
 75 |   "time": 1.5,
 76 |   "step_size": 1,
 77 |   "hidden_dim": 16
 78 | }
 79 | {
 80 |   "test_result": 87.05882352941177,
 81 |   "test_std": 3.304372460069162,
 82 |   "dropout": 0.4,
 83 |   "weight_decay": 0.01,
 84 |   "lr": 0.005,
 85 |   "runtime_average": 9.68775908946991,
 86 |   "time": 0.5,
 87 |   "step_size": 0.5,
 88 |   "hidden_dim": 32
 89 | }
 90 | {
 91 |   "test_result": 87.45098039215688,
 92 |   "test_std": 4.401949866792872,
 93 |   "dropout": 0.6,
 94 |   "weight_decay": 0.01,
 95 |   "lr": 0.005,
 96 |   "runtime_average": 13.179940152168275,
 97 |   "time": 0.5,
 98 |   "step_size": 0.2,
 99 |   "hidden_dim": 128
100 | }
101 | 


--------------------------------------------------------------------------------
/src/best_log/workersbelconvattention3.020230114-120652.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset workers --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 3 --hidden_dim 64 --block attention"
  2 | 0.8129475312407647
  3 | 0.8062150752787878
  4 | 0.8026341475317832
  5 | 0.82135217133143
  6 | 0.8175861984822237
  7 | 0.8076337543956684
  8 | 0.8116159521078875
  9 | 0.8112492510079197
 10 | 0.8151507880345634
 11 | 0.8239102673596707
 12 | 81.302951367707,0.6338053733195014
 13 | train acc list: [0.8416701601650287, 0.8234789545553522, 0.8240209555633793, 0.8394800249903843, 0.8468909899624668, 0.8461019002596043, 0.842648339706424, 0.8465870895224217, 0.8530692248571566, 0.8334028641259776]
 14 | val acc list: [0.8242404369804035, 0.8177506995841192, 0.798109731177759, 0.8136356785864123, 0.8095325379594819, 0.834769161001427, 0.8107966094100685, 0.8161027224378792, 0.8186661670122158, 0.8153186179666507]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 3,
 18 |   "dataset": "workers",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": true,
 27 |   "edge_homo": 0.1,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "attention",
 42 |   "function": "belconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 3.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_log/workersgatconvconstant1.020230117-174152.txt:
--------------------------------------------------------------------------------
  1 | "run_GNN_raw.py --dataset workers --function gatconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001"
  2 | 0.7980819024534405
  3 | 0.8008182653750112
  4 | 0.8178444482402414
  5 | 0.8132905086096809
  6 | 0.8210810429765555
  7 | 0.7904021918016209
  8 | 0.8120484018339121
  9 | 0.79523708818992
 10 | 0.8083637674911681
 11 | 0.8217114164016387
 12 | 80.7887903337319,1.0596868582754906
 13 | train acc list: [0.8508128658422012, 0.831333559901965, 0.8523991854382849, 0.838000714980053, 0.8525191684524209, 0.8444870899972663, 0.8444729294577888, 0.8519340074767651, 0.8535531422152307, 0.8503690558803718]
 14 | val acc list: [0.8026846922440867, 0.8150884782127576, 0.8046052390398488, 0.8006972759328128, 0.8028007804385283, 0.8185466844261238, 0.8019705122408551, 0.8033975144906578, 0.8200324096514775, 0.803147687265193]
 15 | {
 16 |   "use_cora_defaults": false,
 17 |   "cuda": 2,
 18 |   "dataset": "workers",
 19 |   "data_norm": "rw",
 20 |   "self_loop_weight": 1,
 21 |   "use_labels": false,
 22 |   "geom_gcn_splits": true,
 23 |   "num_splits": 1,
 24 |   "label_rate": 0.5,
 25 |   "planetoid_split": false,
 26 |   "random_splits": false,
 27 |   "edge_homo": 0.0,
 28 |   "hidden_dim": 64,
 29 |   "fc_out": false,
 30 |   "input_dropout": 0.0,
 31 |   "dropout": 0.2,
 32 |   "batch_norm": false,
 33 |   "optimizer": "adam",
 34 |   "lr": 0.005,
 35 |   "decay": 0.001,
 36 |   "epoch": 1000,
 37 |   "alpha": 1.0,
 38 |   "alpha_dim": "sc",
 39 |   "no_alpha_sigmoid": false,
 40 |   "beta_dim": "sc",
 41 |   "block": "constant",
 42 |   "function": "gatconv",
 43 |   "use_mlp": true,
 44 |   "add_source": true,
 45 |   "cgnn": false,
 46 |   "time": 1.0,
 47 |   "augment": false,
 48 |   "method": "euler",
 49 |   "step_size": 1,
 50 |   "max_iters": 100,
 51 |   "adjoint_method": "adaptive_heun",
 52 |   "adjoint": false,
 53 |   "adjoint_step_size": 1,
 54 |   "tol_scale": 821.9773048827274,
 55 |   "tol_scale_adjoint": 1.0,
 56 |   "ode_blocks": 1,
 57 |   "max_nfe": 2000,
 58 |   "no_early": true,
 59 |   "earlystopxT": 3,
 60 |   "max_test_steps": 100,
 61 |   "leaky_relu_slope": 0.2,
 62 |   "attention_dropout": 0.0,
 63 |   "heads": 8,
 64 |   "attention_norm_idx": 1,
 65 |   "attention_dim": 16,
 66 |   "mix_features": false,
 67 |   "reweight_attention": false,
 68 |   "attention_type": "scaled_dot",
 69 |   "square_plus": true,
 70 |   "jacobian_norm2": null,
 71 |   "total_deriv": null,
 72 |   "kinetic_energy": null,
 73 |   "directional_penalty": null,
 74 |   "not_lcc": true,
 75 |   "rewiring": null,
 76 |   "gdc_method": "ppr",
 77 |   "gdc_sparsification": "topk",
 78 |   "gdc_k": 64,
 79 |   "gdc_threshold": 0.01,
 80 |   "gdc_avg_degree": 64,
 81 |   "ppr_alpha": 0.05,
 82 |   "heat_time": 3.0,
 83 |   "att_samp_pct": 1,
 84 |   "use_flux": false,
 85 |   "exact": true,
 86 |   "M_nodes": 64,
 87 |   "new_edges": "k_hop_att",
 88 |   "sparsify": "S_hat",
 89 |   "threshold_type": "addD_rvR",
 90 |   "rw_addD": 0.02,
 91 |   "rw_rmvR": 0.02,
 92 |   "rewire_KNN": false,
 93 |   "rewire_KNN_T": "T0",
 94 |   "rewire_KNN_epoch": 10,
 95 |   "rewire_KNN_k": 64,
 96 |   "rewire_KNN_sym": false,
 97 |   "KNN_online": false,
 98 |   "KNN_online_reps": 4,
 99 |   "KNN_space": "pos_distance",
100 |   "beltrami": false,
101 |   "fa_layer": false,
102 |   "pos_enc_type": "GDC",
103 |   "pos_enc_orientation": "row",
104 |   "feat_hidden_dim": 64,
105 |   "pos_enc_hidden_dim": 16,
106 |   "edge_sampling": false,
107 |   "edge_sampling_T": "T0",
108 |   "edge_sampling_epoch": 5,
109 |   "edge_sampling_add": 0.64,
110 |   "edge_sampling_add_type": "importance",
111 |   "edge_sampling_rmv": 0.32,
112 |   "edge_sampling_sym": false,
113 |   "edge_sampling_online": false,
114 |   "edge_sampling_online_reps": 4,
115 |   "edge_sampling_space": "attention",
116 |   "symmetric_attention": false,
117 |   "fa_layer_edge_sampling_rmv": 0.8,
118 |   "gpu": 0,
119 |   "pos_enc_csv": false,
120 |   "pos_dist_quantile": 0.001,
121 |   "adaptive": false,
122 |   "attention_rewiring": false,
123 |   "baseline": false,
124 |   "cpus": 1,
125 |   "dt": 0.001,
126 |   "dt_min": 1e-05,
127 |   "gpus": 0.5,
128 |   "grace_period": 20,
129 |   "max_epochs": 1000,
130 |   "metric": "accuracy",
131 |   "name": "cora_beltrami_splits",
132 |   "num_init": 1,
133 |   "num_samples": 1000,
134 |   "patience": 100,
135 |   "reduction_factor": 10,
136 |   "regularise": false,
137 |   "use_lcc": false
138 | }


--------------------------------------------------------------------------------
/src/best_params_discrete.py:
--------------------------------------------------------------------------------
 1 | best_params_dict = {'cornell': {'model': 'lap_gcn', 'lr': 0.00721, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1},
 2 |                     'wisconsin': {'model': 'lap_gcn', 'lr': 0.00356, 'nhid': 64, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2},
 3 |                     'texas': {'model': 'lap_gcn', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 4 | 
 5 |                     }
 6 | 
 7 | # best_params_dict = {'cornell': {'model': 'HAMCON_GCN', 'lr': 0.00721, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1},
 8 | #                     'wisconsin': {'model': 'HAMCON_GCN', 'lr': 0.00356, 'nhid': 64, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2},
 9 | #                     'texas': {'model': 'HAMCON_GCN', 'lr': 0.00155, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2}
10 | #                     }


--------------------------------------------------------------------------------
/src/best_params_graphocn.py:
--------------------------------------------------------------------------------
 1 | best_params_dict = {'cornell': {'model': 'GraphCON_GCN', 'lr': 0.00721, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1},
 2 |                     'wisconsin': {'model': 'GraphCON_GCN', 'lr': 0.00356, 'nhid': 64, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2},
 3 |                     'texas': {'model': 'GraphCON_GCN', 'lr': 0.00155, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 4 |                     'film': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 5 |                     'chameleon': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 6 |                     'squirrel': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 7 |                     'wiki-cooc': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 8 |                      'roman-empire': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
 9 |                      'amazon-ratings': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
10 |                      'minesweeper': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
11 |                      'workers': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
12 |                      'questions': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2},
13 |                     }
14 | 
15 | 


--------------------------------------------------------------------------------
/src/block_constant.py:
--------------------------------------------------------------------------------
 1 | from base_classes import ODEblock
 2 | import torch
 3 | from utils import get_rw_adj, gcn_norm_fill_val
 4 | 
 5 | 
 6 | class ConstantODEblock(ODEblock):
 7 |   def __init__(self, odefunc,  opt, data, device, t=torch.tensor([0, 1])):
 8 |     super(ConstantODEblock, self).__init__(odefunc,  opt, data, device, t)
 9 | 
10 |     self.aug_dim = 2 if opt['augment'] else 1
11 |     self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device)
12 |     if opt['data_norm'] == 'rw':
13 |       edge_index, edge_weight = get_rw_adj(data.edge_index, edge_weight=data.edge_attr, norm_dim=1,
14 |                                                                    fill_value=opt['self_loop_weight'],
15 |                                                                    num_nodes=data.num_nodes,
16 |                                                                    dtype=data.x.dtype)
17 |     else:
18 |       edge_index, edge_weight = gcn_norm_fill_val(data.edge_index, edge_weight=data.edge_attr,
19 |                                            fill_value=opt['self_loop_weight'],
20 |                                            num_nodes=data.num_nodes,
21 |                                            dtype=data.x.dtype)
22 |     self.odefunc.edge_index = edge_index.to(device)
23 |     self.odefunc.edge_weight = edge_weight.to(device)
24 | 
25 | 
26 |     if opt['adjoint']:
27 |       from torchdiffeq import odeint_adjoint as odeint
28 |     else:
29 |       from torchdiffeq import odeint
30 | 
31 |     self.train_integrator = odeint
32 |     self.test_integrator = odeint
33 |     self.set_tol()
34 | 
35 |   def forward(self, x):
36 |     t = self.t.type_as(x)
37 | 
38 |     integrator = self.train_integrator if self.training else self.test_integrator
39 |     
40 | 
41 | 
42 |     func = self.odefunc
43 |     state = x
44 | 
45 |     if self.opt["adjoint"] and self.training:
46 |       state_dt = integrator(
47 |         func, state, t,
48 |         method=self.opt['method'],
49 |         options=dict(step_size=self.opt['step_size'], max_iters=self.opt['max_iters']),
50 |         adjoint_method=self.opt['adjoint_method'],
51 |         adjoint_options=dict(step_size = self.opt['adjoint_step_size'], max_iters=self.opt['max_iters']),
52 |         atol=self.atol,
53 |         rtol=self.rtol,
54 |         adjoint_atol=self.atol_adjoint,
55 |         adjoint_rtol=self.rtol_adjoint)
56 |     else:
57 |       state_dt = integrator(
58 |         func, state, t,
59 |         method=self.opt['method'],
60 |         options=dict(step_size=self.opt['step_size'], ),
61 |         atol=self.atol,
62 |         rtol=self.rtol)
63 | 
64 | 
65 |     z = state_dt[1]
66 |     return z
67 | 
68 |   def __repr__(self):
69 |     return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \
70 |            + ")"
71 | 


--------------------------------------------------------------------------------
/src/block_transformer_attention.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from function_transformer_attention import SpGraphTransAttentionLayer
 3 | from base_classes import ODEblock
 4 | from utils import get_rw_adj
 5 | 
 6 | 
 7 | class AttODEblock(ODEblock):
 8 |   def __init__(self, odefunc, opt, data, device, t=torch.tensor([0, 1]), gamma=0.5):
 9 |     super(AttODEblock, self).__init__(odefunc, opt, data, device, t)
10 | 
11 |     self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device)
12 |     # self.odefunc.edge_index, self.odefunc.edge_weight = data.edge_index, edge_weight=data.edge_attr
13 |     edge_index, edge_weight = get_rw_adj(data.edge_index, edge_weight=data.edge_attr, norm_dim=1,
14 |                                          fill_value=opt['self_loop_weight'],
15 |                                          num_nodes=data.num_nodes,
16 |                                          dtype=data.x.dtype)
17 |     self.odefunc.edge_index = edge_index.to(device)
18 |     self.odefunc.edge_weight = edge_weight.to(device)
19 | 
20 | 
21 |     if opt['adjoint']:
22 |       from torchdiffeq import odeint_adjoint as odeint
23 |     else:
24 |       from torchdiffeq import odeint
25 |     self.train_integrator = odeint
26 |     self.test_integrator = odeint
27 |     self.set_tol()
28 |     # parameter trading off between attention and the Laplacian
29 |     self.multihead_att_layer = SpGraphTransAttentionLayer(opt['hidden_dim'], opt['hidden_dim'], opt,
30 |                                                           device, edge_weights=self.odefunc.edge_weight).to(device)
31 | 
32 |   def get_attention_weights(self, x):
33 |     attention, values = self.multihead_att_layer(x, self.odefunc.edge_index)
34 |     return attention
35 | 
36 |   def forward(self, x):
37 |     t = self.t.type_as(x)
38 |     self.odefunc.attention_weights = self.get_attention_weights(x)
39 | 
40 |     integrator = self.train_integrator if self.training else self.test_integrator
41 | 
42 |     func = self.odefunc
43 | 
44 | 
45 |     state =  x
46 | 
47 |     if self.opt["adjoint"] and self.training:
48 |       state_dt = integrator(
49 |         func, state, t,
50 |         method=self.opt['method'],
51 |         options={'step_size': self.opt['step_size']},
52 |         adjoint_method=self.opt['adjoint_method'],
53 |         adjoint_options={'step_size': self.opt['adjoint_step_size']},
54 |         atol=self.atol,
55 |         rtol=self.rtol,
56 |         adjoint_atol=self.atol_adjoint,
57 |         adjoint_rtol=self.rtol_adjoint)
58 |     else:
59 |       state_dt = integrator(
60 |         func, state, t,
61 |         method=self.opt['method'],
62 |         options={'step_size': self.opt['step_size']},
63 |         atol=self.atol,
64 |         rtol=self.rtol)
65 | 
66 | 
67 |     z = state_dt[1]
68 |     return z
69 | 
70 |   def __repr__(self):
71 |     return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \
72 |            + ")"
73 | 
74 | 


--------------------------------------------------------------------------------
/src/discrete_models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | from torch_geometric.nn import GCNConv, GATConv
  6 | from torch_scatter import scatter
  7 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  8 | import torch_sparse
  9 | class Lap_GCN(nn.Module):
 10 |     def __init__(self, nfeat, nhid, nclass, dropout, nlayers, graph_size, dt=1., alpha=1., gamma=1., res_version=1,  ):
 11 |         super(Lap_GCN, self).__init__()
 12 |         self.dropout = dropout
 13 |         self.nhid = nhid
 14 |         self.nlayers = nlayers
 15 |         self.enc = nn.Linear(nfeat,nhid)
 16 |         self.conv = GCNConv(nhid, nhid)
 17 |         self.dec = nn.Linear(nhid,nclass)
 18 |         self.res = nn.Linear(nhid,nhid)
 19 |         if(res_version==1):
 20 |             self.residual = self.res_connection_v1
 21 |         else:
 22 |             self.residual = self.res_connection_v2
 23 |         self.dt = dt
 24 |         self.act_fn = nn.ReLU()
 25 |         self.alpha = alpha
 26 |         self.gamma = gamma
 27 |         self.graph_size = graph_size
 28 |         self.epsilons = nn.ParameterList()
 29 |         for i in range(self.nlayers):
 30 |             self.epsilons.append(nn.Parameter(torch.zeros((self.nhid, 1))))
 31 |         # print("self.epsilons: ", self.epsilons[0].shape)
 32 |         # print("self.graph_size",self.graph_size)
 33 | 
 34 |         self.reset_params()
 35 | 
 36 | 
 37 | 
 38 |     def reset_params(self):
 39 |         for name, param in self.named_parameters():
 40 |             if 'weight' in name and 'emb' not in name and 'out' not in name:
 41 |                 stdv = 1. / math.sqrt(self.nhid)
 42 |                 param.data.uniform_(-stdv, stdv)
 43 | 
 44 |     def res_connection_v1(self, X):
 45 |         res = - self.res(self.conv.lin(X))
 46 |         return res
 47 | 
 48 |     def res_connection_v2(self, X):
 49 |         res = - self.conv.lin(X) + self.res(X)
 50 |         return res
 51 | 
 52 |     def forward(self, data):
 53 |         input = data.x
 54 |         edge_index = data.edge_index
 55 |         input = F.dropout(input, self.dropout, training=self.training)
 56 |         X = self.act_fn(self.enc(input))
 57 | 
 58 | 
 59 |         X = F.dropout(X, self.dropout, training=self.training)
 60 |         X0 =X
 61 |         for i in range(self.nlayers):
 62 | 
 63 |             # coeff = (1 + torch.tanh(self.epsilons[i]).tile(self.graph_size, 1))
 64 |             coeff = (1 + torch.tanh(self.epsilons[i])).T
 65 |             coeff = coeff.tile(self.graph_size, 1)
 66 |             # print("coeff shape: ", coeff.shape)
 67 |             # print("X0 shape: ", X0.shape)
 68 |             X0 = X0 * coeff  + self.dt * (self.act_fn(self.conv(X, edge_index) + self.residual(X)) - self.alpha * X)
 69 |             X = X0
 70 | 
 71 | 
 72 |             # X = X + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*X)
 73 |             # X = X + self.dt * (self.act_fn(self.conv(X, edge_index)) - self.alpha * X)
 74 |             # X = X + self.dt * (self.act_fn(self.conv(X, edge_index) + self.residual(X)) )
 75 |             X = F.dropout(X, self.dropout, training=self.training)
 76 | 
 77 |         X = self.dec(X)
 78 | 
 79 |         return X
 80 | 
 81 | 
 82 | class Lap_conv_GCN(nn.Module):
 83 |     def __init__(self, nfeat, nhid, nclass, dropout, nlayers,graph_size, dt=1., alpha=1., gamma=1., res_version=1):
 84 |         super(Lap_conv_GCN, self).__init__()
 85 |         self.dropout = dropout
 86 |         self.nhid = nhid
 87 |         self.nlayers = nlayers
 88 |         self.enc = nn.Linear(nfeat,nhid)
 89 |         self.conv = GCNConv(nhid, nhid)
 90 |         self.dec = nn.Linear(nhid,nclass)
 91 |         self.res = nn.Linear(nhid,nhid)
 92 |         if(res_version==1):
 93 |             self.residual = self.res_connection_v1
 94 |         else:
 95 |             self.residual = self.res_connection_v2
 96 |         self.dt = dt
 97 |         self.act_fn = nn.ReLU()
 98 |         self.alpha = alpha
 99 |         self.gamma = gamma
100 |         self.reset_params()
101 | 
102 |         self.gate = nn.Linear(2 * nhid, 1)
103 |         nn.init.xavier_normal_(self.gate.weight, gain=1.414)
104 | 
105 |         self.lin1 = nn.Linear(nhid, nhid)
106 |         nn.init.xavier_normal_(self.lin1.weight, gain=1.414)
107 | 
108 |         self.lin2 = nn.Linear(nhid * 2, nhid)
109 |         nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
110 | 
111 |         self.weight_low, self.weight_high, self.weight_mlp = (
112 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
113 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
114 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
115 |         )
116 | 
117 |         self.output_low, self.output_high, self.output_mlp = (
118 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
119 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
120 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
121 |         )
122 | 
123 |         stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
124 | 
125 |         self.weight_low.data.uniform_(-stdv, stdv)
126 |         self.weight_high.data.uniform_(-stdv, stdv)
127 |         self.weight_mlp.data.uniform_(-stdv, stdv)
128 | 
129 |         self.output_low.data.uniform_(-stdv, stdv)
130 |         self.output_high.data.uniform_(-stdv, stdv)
131 |         self.output_mlp.data.uniform_(-stdv, stdv)
132 | 
133 |         self.epsilons = nn.ParameterList()
134 |         for i in range(self.nlayers):
135 |             self.epsilons.append(nn.Parameter(torch.zeros((self.nhid, 1))))
136 |         self.lamda = nn.ParameterList()
137 |         for i in range(self.nlayers):
138 |             self.lamda .append(nn.Parameter(torch.zeros((self.nhid, 1))))
139 |         self.graph_size = graph_size
140 | 
141 | 
142 | 
143 |     def reset_params(self):
144 |         for name, param in self.named_parameters():
145 |             if 'weight' in name and 'emb' not in name and 'out' not in name:
146 |                 stdv = 1. / math.sqrt(self.nhid)
147 |                 param.data.uniform_(-stdv, stdv)
148 | 
149 |     def res_connection_v1(self, X):
150 |         res = - self.res(self.conv.lin(X))
151 |         return res
152 | 
153 |     def res_connection_v2(self, X):
154 |         res = - self.conv.lin(X) + self.res(X)
155 |         return res
156 | 
157 |     def forward(self, data):
158 |         input = data.x
159 |         # edge_index = data.edge_index
160 |         input = F.dropout(input, self.dropout, training=self.training)
161 |         X = self.act_fn(self.enc(input))
162 |         self.edge_index ,self.edge_weight = add_remaining_self_loops (data.edge_index, data.edge_weight)
163 |         edge_index = self.edge_index
164 | 
165 | 
166 | 
167 |         X = F.dropout(X, self.dropout, training=self.training)
168 | 
169 |         for i in range(self.nlayers):
170 |             # X = X + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*X - self.gamma*X)
171 | 
172 |             src = X[self.edge_index[0, :], :]
173 |             dst_k = X[self.edge_index[1, :], :]
174 |             h2 = torch.cat([src, dst_k], dim=1)
175 |             attention1 = torch.tanh(self.gate(h2)).squeeze()
176 | 
177 |             # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
178 |             x_new = torch.tanh(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
179 |             ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
180 | 
181 |             # ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new)
182 |             # ax3 = scatter(ax3, self.edge_index[1, :].T, dim=0, reduce="sum")
183 |             ax2 = self.act_fn(self.conv(X, edge_index) + self.residual(X))
184 | 
185 |             # print("X: ", X.shape)
186 |             # print("x_new: ", x_new.shape)
187 |             # print("ax3: ", ax3.shape)
188 |             # print("ax2: ", ax2.shape)
189 | 
190 |             # ax = torch.mm(ax3, self.output_high) + torch.mm(ax2, self.output_low)
191 | 
192 |             # ax = torch.cat([X, ax2], axis=1)
193 |             # ax = self.lin2(ax)
194 |             coeff_lamda = (torch.tanh(self.lamda[i])).T
195 |             coeff_lamda = coeff_lamda.tile(self.graph_size, 1)
196 | 
197 |             ax = ax2 + coeff_lamda * ax3
198 | 
199 |             ax = ax - self.alpha * X
200 | 
201 |             coeff = (1 + torch.tanh(self.epsilons[i])).T
202 |             coeff = coeff.tile(self.graph_size, 1)
203 | 
204 |             X = X * coeff + self.dt* ax
205 | 
206 |             X = F.dropout(X, self.dropout, training=self.training)
207 | 
208 |         X = self.dec(X)
209 | 
210 |         return X


--------------------------------------------------------------------------------
/src/early_stop_solver.py:
--------------------------------------------------------------------------------
  1 | import torchdiffeq
  2 | from torchdiffeq._impl.dopri5 import _DORMAND_PRINCE_SHAMPINE_TABLEAU, DPS_C_MID
  3 | from torchdiffeq._impl.solvers import FixedGridODESolver
  4 | import torch
  5 | from torchdiffeq._impl.misc import _check_inputs, _flat_to_shape
  6 | import torch.nn.functional as F
  7 | import copy
  8 | 
  9 | from torchdiffeq._impl.interp import _interp_evaluate
 10 | from torchdiffeq._impl.rk_common import RKAdaptiveStepsizeODESolver, rk4_alt_step_func
 11 | from ogb.nodeproppred import Evaluator
 12 | 
 13 | 
 14 | def run_evaluator(evaluator, data, y_pred):
 15 |   train_acc = evaluator.eval({
 16 |     'y_true': data.y[data.train_mask],
 17 |     'y_pred': y_pred[data.train_mask],
 18 |   })['acc']
 19 |   valid_acc = evaluator.eval({
 20 |     'y_true': data.y[data.val_mask],
 21 |     'y_pred': y_pred[data.val_mask],
 22 |   })['acc']
 23 |   test_acc = evaluator.eval({
 24 |     'y_true': data.y[data.test_mask],
 25 |     'y_pred': y_pred[data.test_mask],
 26 |   })['acc']
 27 |   return train_acc, valid_acc, test_acc
 28 | 
 29 | 
 30 | class EarlyStopDopri5(RKAdaptiveStepsizeODESolver):
 31 |   order = 5
 32 |   tableau = _DORMAND_PRINCE_SHAMPINE_TABLEAU
 33 |   mid = DPS_C_MID
 34 | 
 35 |   def __init__(self, func, y0, rtol, atol, opt, **kwargs):
 36 |     super(EarlyStopDopri5, self).__init__(func, y0, rtol, atol, **kwargs)
 37 | 
 38 |     self.lf = torch.nn.CrossEntropyLoss()
 39 |     self.m2_weight = None
 40 |     self.m2_bias = None
 41 |     self.data = None
 42 |     self.best_val = 0
 43 |     self.best_test = 0
 44 |     self.max_test_steps = opt['max_test_steps']
 45 |     self.best_time = 0
 46 |     self.ode_test = self.test_OGB if opt['dataset'] == 'ogbn-arxiv' else self.test
 47 |     self.dataset = opt['dataset']
 48 |     if opt['dataset'] == 'ogbn-arxiv':
 49 |       self.lf = torch.nn.functional.nll_loss
 50 |       self.evaluator = Evaluator(name=opt['dataset'])
 51 | 
 52 |   def set_accs(self, train, val, test, time):
 53 |     self.best_train = train
 54 |     self.best_val = val
 55 |     self.best_test = test
 56 |     self.best_time = time.item()
 57 | 
 58 |   def integrate(self, t):
 59 |     solution = torch.empty(len(t), *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device)
 60 |     solution[0] = self.y0
 61 |     t = t.to(self.dtype)
 62 |     self._before_integrate(t)
 63 |     new_t = t
 64 |     for i in range(1, len(t)):
 65 |       new_t, y = self.advance(t[i])
 66 |       solution[i] = y
 67 |     return new_t, solution
 68 | 
 69 |   def advance(self, next_t):
 70 |     """
 71 |     Takes steps dt to get to the next user specified time point next_t. In practice this goes past next_t and then interpolates
 72 |     :param next_t:
 73 |     :return: The state, x(next_t)
 74 |     """
 75 |     n_steps = 0
 76 |     while next_t > self.rk_state.t1 and n_steps < self.max_test_steps:
 77 |       self.rk_state = self._adaptive_step(self.rk_state)
 78 |       n_steps += 1
 79 |       train_acc, val_acc, test_acc = self.evaluate(self.rk_state)
 80 |       if val_acc > self.best_val:
 81 |         self.set_accs(train_acc, val_acc, test_acc, self.rk_state.t1)
 82 |     new_t = next_t
 83 |     if n_steps < self.max_test_steps:
 84 |       return (new_t, _interp_evaluate(self.rk_state.interp_coeff, self.rk_state.t0, self.rk_state.t1, next_t))
 85 |     else:
 86 |       return (new_t, _interp_evaluate(self.rk_state.interp_coeff, self.rk_state.t0, self.rk_state.t1, self.rk_state.t1))
 87 | 
 88 |   @torch.no_grad()
 89 |   def test(self, logits):
 90 |     accs = []
 91 |     for _, mask in self.data('train_mask', 'val_mask', 'test_mask'):
 92 |       pred = logits[mask].max(1)[1]
 93 |       acc = pred.eq(self.data.y[mask]).sum().item() / mask.sum().item()
 94 |       accs.append(acc)
 95 |     return accs
 96 | 
 97 |   @torch.no_grad()
 98 |   def test_OGB(self, logits):
 99 |     evaluator = self.evaluator
100 |     data = self.data
101 |     y_pred = logits.argmax(dim=-1, keepdim=True)
102 |     train_acc, valid_acc, test_acc = run_evaluator(evaluator, data, y_pred)
103 |     return [train_acc, valid_acc, test_acc]
104 | 
105 |   @torch.no_grad()
106 |   def evaluate(self, rkstate):
107 |     # Activation.
108 |     z = rkstate.y1
109 |     if not self.m2_weight.shape[1] == z.shape[1]:  # system has been augmented
110 |       z = torch.split(z, self.m2_weight.shape[1], dim=1)[0]
111 |     z = F.relu(z)
112 |     z = F.linear(z, self.m2_weight, self.m2_bias)
113 |     t0, t1 = float(self.rk_state.t0), float(self.rk_state.t1)
114 |     if self.dataset == 'ogbn-arxiv':
115 |       z = z.log_softmax(dim=-1)
116 |       loss = self.lf(z[self.data.train_mask], self.data.y.squeeze()[self.data.train_mask])
117 |     else:
118 |       loss = self.lf(z[self.data.train_mask], self.data.y[self.data.train_mask])
119 |     train_acc, val_acc, test_acc = self.ode_test(z)
120 |     log = 'ODE eval t0 {:.3f}, t1 {:.3f} Loss: {:.4f}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
121 |     # print(log.format(t0, t1, loss, train_acc, val_acc, tmp_test_acc))
122 |     return train_acc, val_acc, test_acc
123 | 
124 |   def set_m2(self, m2):
125 |     self.m2 = copy.deepcopy(m2)
126 | 
127 |   def set_data(self, data):
128 |     if self.data is None:
129 |       self.data = data
130 | 
131 | class EarlyStopRK4(FixedGridODESolver):
132 |   order = 4
133 | 
134 |   def __init__(self, func, y0, opt, eps=0, **kwargs):
135 |     super(EarlyStopRK4, self).__init__(func, y0, **kwargs)
136 |     self.eps = torch.as_tensor(eps, dtype=self.dtype, device=self.device)
137 |     self.lf = torch.nn.CrossEntropyLoss()
138 |     self.m2_weight = None
139 |     self.m2_bias = None
140 |     self.data = None
141 |     self.best_val = 0
142 |     self.best_test = 0
143 |     self.best_time = 0
144 |     self.ode_test = self.test_OGB if opt['dataset'] == 'ogbn-arxiv' else self.test
145 |     self.dataset = opt['dataset']
146 |     if opt['dataset'] == 'ogbn-arxiv':
147 |       self.lf = torch.nn.functional.nll_loss
148 |       self.evaluator = Evaluator(name=opt['dataset'])
149 | 
150 |   def _step_func(self, func, t, dt, t1, y):
151 |     ver = torchdiffeq.__version__[0] + torchdiffeq.__version__[2] + torchdiffeq.__version__[4]
152 |     if int(ver) >= 22:  # '0.2.2'
153 |       return rk4_alt_step_func(func, t + self.eps, dt - 2 * self.eps, t1, y)
154 |     else:
155 |       return rk4_alt_step_func(func, t + self.eps, dt - 2 * self.eps, y)
156 | 
157 |   def set_accs(self, train, val, test, time):
158 |     self.best_train = train
159 |     self.best_val = val
160 |     self.best_test = test
161 |     self.best_time = time.item()
162 | 
163 |   def integrate(self, t):
164 |     time_grid = self.grid_constructor(self.func, self.y0, t)
165 |     assert time_grid[0] == t[0] and time_grid[-1] == t[-1]
166 | 
167 |     solution = torch.empty(len(t), *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device)
168 |     solution[0] = self.y0
169 | 
170 |     j = 1
171 |     y0 = self.y0
172 |     for t0, t1 in zip(time_grid[:-1], time_grid[1:]):
173 |       dy = self._step_func(self.func, t0, t1 - t0, t1, y0)
174 |       y1 = y0 + dy
175 |       train_acc, val_acc, test_acc = self.evaluate(y1, t0, t1)
176 |       if val_acc > self.best_val:
177 |         self.set_accs(train_acc, val_acc, test_acc, t1)
178 | 
179 |       while j < len(t) and t1 >= t[j]:
180 |         solution[j] = self._linear_interp(t0, t1, y0, y1, t[j])
181 |         j += 1
182 |       y0 = y1
183 | 
184 |     return t1, solution
185 | 
186 |   @torch.no_grad()
187 |   def test(self, logits):
188 |     accs = []
189 |     for _, mask in self.data('train_mask', 'val_mask', 'test_mask'):
190 |       pred = logits[mask].max(1)[1]
191 |       acc = pred.eq(self.data.y[mask]).sum().item() / mask.sum().item()
192 |       accs.append(acc)
193 |     return accs
194 | 
195 |   @torch.no_grad()
196 |   def test_OGB(self, logits):
197 |     evaluator = self.evaluator
198 |     data = self.data
199 |     y_pred = logits.argmax(dim=-1, keepdim=True)
200 |     train_acc, valid_acc, test_acc = run_evaluator(evaluator, data, y_pred)
201 |     return [train_acc, valid_acc, test_acc]
202 | 
203 |   @torch.no_grad()
204 |   def evaluate(self, z, t0, t1):
205 |     # Activation.
206 |     if not self.m2_weight.shape[1] == z.shape[1]:  # system has been augmented
207 |       z = torch.split(z, self.m2_weight.shape[1], dim=1)[0]
208 |     z = F.relu(z)
209 |     z = F.linear(z, self.m2_weight, self.m2_bias)
210 |     if self.dataset == 'ogbn-arxiv':
211 |       z = z.log_softmax(dim=-1)
212 |       loss = self.lf(z[self.data.train_mask], self.data.y.squeeze()[self.data.train_mask])
213 |     else:
214 |       loss = self.lf(z[self.data.train_mask], self.data.y[self.data.train_mask])
215 |     train_acc, val_acc, test_acc = self.ode_test(z)
216 |     log = 'ODE eval t0 {:.3f}, t1 {:.3f} Loss: {:.4f}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
217 |     # print(log.format(t0, t1, loss, train_acc, val_acc, tmp_test_acc))
218 |     return train_acc, val_acc, test_acc
219 | 
220 |   def set_m2(self, m2):
221 |     self.m2 = copy.deepcopy(m2)
222 | 
223 |   def set_data(self, data):
224 |     if self.data is None:
225 |       self.data = data
226 | 
227 | 
228 | SOLVERS = {
229 |   'dopri5': EarlyStopDopri5,
230 |   'rk4': EarlyStopRK4
231 | }
232 | 
233 | 
234 | class EarlyStopInt(torch.nn.Module):
235 |   def __init__(self, t, opt, device=None):
236 |     super(EarlyStopInt, self).__init__()
237 |     self.device = device
238 |     self.solver = None
239 |     self.data = None
240 |     self.max_test_steps = opt['max_test_steps']
241 |     self.m2_weight = None
242 |     self.m2_bias = None
243 |     self.opt = opt
244 |     self.t = torch.tensor([0, opt['earlystopxT'] * t], dtype=torch.float).to(self.device)
245 | 
246 |   def __call__(self, func, y0, t, method=None, rtol=1e-7, atol=1e-9,
247 |                adjoint_method="dopri5", adjoint_atol=1e-9, adjoint_rtol=1e-7, options=None):
248 |     """Integrate a system of ordinary differential equations.
249 | 
250 |     Solves the initial value problem for a non-stiff system of first order ODEs:
251 |         ```
252 |         dy/dt = func(t, y), y(t[0]) = y0
253 |         ```
254 |     where y is a Tensor of any shape.
255 | 
256 |     Output dtypes and numerical precision are based on the dtypes of the inputs `y0`.
257 | 
258 |     Args:
259 |         func: Function that maps a Tensor holding the state `y` and a scalar Tensor
260 |             `t` into a Tensor of state derivatives with respect to time.
261 |         y0: N-D Tensor giving starting value of `y` at time point `t[0]`. May
262 |             have any floating point or complex dtype.
263 |         t: 1-D Tensor holding a sequence of time points for which to solve for
264 |             `y`. The initial time point should be the first element of this sequence,
265 |             and each time must be larger than the previous time. May have any floating
266 |             point dtype. Converted to a Tensor with float64 dtype.
267 |         rtol: optional float64 Tensor specifying an upper bound on relative error,
268 |             per element of `y`.
269 |         atol: optional float64 Tensor specifying an upper bound on absolute error,
270 |             per element of `y`.
271 |         method: optional string indicating the integration method to use.
272 |         options: optional dict of configuring options for the indicated integration
273 |             method. Can only be provided if a `method` is explicitly set.
274 |         name: Optional name for this operation.
275 | 
276 |     Returns:
277 |         y: Tensor, where the first dimension corresponds to different
278 |             time points. Contains the solved value of y for each desired time point in
279 |             `t`, with the initial value `y0` being the first element along the first
280 |             dimension.
281 | 
282 |     Raises:
283 |         ValueError: if an invalid `method` is provided.
284 |         TypeError: if `options` is supplied without `method`, or if `t` or `y0` has
285 |             an invalid dtype.
286 |     """
287 |     method = self.opt['method']
288 |     # assert method in ['rk4', 'dopri5'], "Only dopri5 and rk4 implemented with early stopping"
289 | 
290 |     ver = torchdiffeq.__version__
291 |     if int(ver[0] + ver[2] + ver[4]) >= 20:  # 0.2.0 change of signature on this release for event_fn
292 |       event_fn = None
293 |       shapes, func, y0, t, rtol, atol, method, options, event_fn, t_is_reversed = _check_inputs(func, y0, self.t, rtol,
294 |                                                                                                 atol, method, options,
295 |                                                                                                 event_fn, SOLVERS)
296 |     else:
297 |       shapes, func, y0, t, rtol, atol, method, options = _check_inputs(func, y0, self.t, rtol, atol, method, options,
298 |                                                                      SOLVERS)
299 | 
300 |     self.solver = SOLVERS[method](func, y0, rtol=rtol, atol=atol, opt=self.opt, **options)
301 |     if self.solver.data is None:
302 |       self.solver.data = self.data
303 |     self.solver.m2_weight = self.m2_weight
304 |     self.solver.m2_bias = self.m2_bias
305 |     t, solution = self.solver.integrate(t)
306 |     if shapes is not None:
307 |       solution = _flat_to_shape(solution, (len(t),), shapes)
308 |     return solution
309 | 


--------------------------------------------------------------------------------
/src/function_GAT_attention.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops
  6 | from data import get_dataset
  7 | from utils import MaxNFEException
  8 | from base_classes import ODEFunc
  9 | 
 10 | 
 11 | class ODEFuncAtt(ODEFunc):
 12 | 
 13 |   def __init__(self, in_features, out_features, opt, data, device):
 14 |     super(ODEFuncAtt, self).__init__(opt, data, device)
 15 | 
 16 |     if opt['self_loop_weight'] > 0:
 17 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 18 |                                                                    fill_value=opt['self_loop_weight'])
 19 |     else:
 20 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 21 | 
 22 |     self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt,
 23 |                                                      device).to(device)
 24 |     try:
 25 |       self.attention_dim = opt['attention_dim']
 26 |     except KeyError:
 27 |       self.attention_dim = out_features
 28 | 
 29 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
 30 |     self.d_k = self.attention_dim // opt['heads']
 31 | 
 32 |   def multiply_attention(self, x, attention, wx):
 33 |     if self.opt['mix_features']:
 34 |       wx = torch.mean(torch.stack(
 35 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], wx.shape[0], wx.shape[0], wx) for idx in
 36 |          range(self.opt['heads'])], dim=0),
 37 |         dim=0)
 38 |       ax = torch.mm(wx, self.multihead_att_layer.Wout)
 39 |     else:
 40 |       ax = torch.mean(torch.stack(
 41 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], x.shape[0], x.shape[0], x) for idx in
 42 |          range(self.opt['heads'])], dim=0),
 43 |         dim=0)
 44 |     return ax
 45 | 
 46 |   def forward(self, t, x):  # t is needed when called by the integrator
 47 | 
 48 |     if self.nfe > self.opt["max_nfe"]:
 49 |       raise MaxNFEException
 50 | 
 51 |     self.nfe += 1
 52 | 
 53 |     attention, wx = self.multihead_att_layer(x, self.edge_index)
 54 |     ax = self.multiply_attention(x, attention, wx)
 55 |     # todo would be nice if this was more efficient
 56 | 
 57 |     if not self.opt['no_alpha_sigmoid']:
 58 |       alpha = torch.sigmoid(self.alpha_train)
 59 |     else:
 60 |       alpha = self.alpha_train
 61 | 
 62 |     f = alpha * (ax - x)
 63 |     if self.opt['add_source']:
 64 |       f = f + self.beta_train * self.x0
 65 |     return f
 66 | 
 67 |   def __repr__(self):
 68 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
 69 | 
 70 | 
 71 | class SpGraphAttentionLayer(nn.Module):
 72 |   """
 73 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
 74 |   """
 75 | 
 76 |   def __init__(self, in_features, out_features, opt, device, concat=True):
 77 |     super(SpGraphAttentionLayer, self).__init__()
 78 |     self.in_features = in_features
 79 |     self.out_features = out_features
 80 |     self.alpha = opt['leaky_relu_slope']
 81 |     self.concat = concat
 82 |     self.device = device
 83 |     self.opt = opt
 84 |     self.h = opt['heads']
 85 | 
 86 |     try:
 87 |       self.attention_dim = opt['attention_dim']
 88 |     except KeyError:
 89 |       self.attention_dim = out_features
 90 | 
 91 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
 92 |     self.d_k = self.attention_dim // opt['heads']
 93 | 
 94 |     self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device)
 95 |     nn.init.xavier_normal_(self.W.data, gain=1.414)
 96 | 
 97 |     self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device)
 98 |     nn.init.xavier_normal_(self.Wout.data, gain=1.414)
 99 | 
100 |     self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device)
101 |     nn.init.xavier_normal_(self.a.data, gain=1.414)
102 | 
103 |     self.leakyrelu = nn.LeakyReLU(self.alpha)
104 | 
105 |   def forward(self, x, edge):
106 |     wx = torch.mm(x, self.W)  # h: N x out
107 |     h = wx.view(-1, self.h, self.d_k)
108 |     h = h.transpose(1, 2)
109 | 
110 |     # Self-attention on the nodes - Shared attention mechanism
111 |     edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to(
112 |       self.device)  # edge: 2*D x E
113 |     edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device)
114 |     attention = softmax(edge_e, edge[self.opt['attention_norm_idx']])
115 |     return attention, wx
116 | 
117 |   def __repr__(self):
118 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
119 | 
120 | 
121 | if __name__ == '__main__':
122 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
123 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0,
124 |          'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False}
125 |   dataset = get_dataset(opt, '../data', False)
126 |   t = 1
127 |   func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device)
128 |   out = func(t, dataset.data.x)
129 | 


--------------------------------------------------------------------------------
/src/function_GAT_convection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | from data import get_dataset
  7 | from utils import MaxNFEException
  8 | from base_classes import ODEFunc
  9 | from torch_scatter import scatter
 10 | import math
 11 | from torch_geometric.utils import get_laplacian
 12 | import torch.nn.functional as F
 13 | 
 14 | class ODEFuncAttConv(ODEFunc):
 15 | 
 16 |   def __init__(self, in_features, out_features, opt, data, device):
 17 |     super(ODEFuncAttConv, self).__init__(opt, data, device)
 18 | 
 19 |     if opt['self_loop_weight'] > 0:
 20 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 21 |                                                                    fill_value=opt['self_loop_weight'])
 22 |     else:
 23 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 24 | 
 25 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 26 | 
 27 |     self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt,
 28 |                                                      device).to(device)
 29 |     try:
 30 |       self.attention_dim = opt['attention_dim']
 31 |     except KeyError:
 32 |       self.attention_dim = out_features
 33 | 
 34 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
 35 |     self.d_k = self.attention_dim // opt['heads']
 36 | 
 37 |     self.device = device
 38 | 
 39 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 40 | 
 41 |     self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym')
 42 |     self.edge_index_lap = self.edge_index_lap.to(device)
 43 |     self.edge_weight_lap = self.edge_weight_lap.to(device)
 44 | 
 45 |     self.gate = nn.Linear(2 * in_features, 1)
 46 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 47 | 
 48 | 
 49 | 
 50 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 51 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 52 | 
 53 |     self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device))
 54 | 
 55 | 
 56 |     self.output_low, self.output_high = (
 57 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 58 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 59 |     )
 60 | 
 61 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 62 | 
 63 | 
 64 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 65 | 
 66 |     self.output_low.data.uniform_(-stdv, stdv)
 67 |     self.output_high.data.uniform_(-stdv, stdv)
 68 | 
 69 | 
 70 |     self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 71 |     self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 72 | 
 73 |     self.lamda1 = nn.Parameter(torch.tensor(0.0),requires_grad=True)
 74 | 
 75 | 
 76 |   def multiply_attention(self, x, attention, wx):
 77 |     if self.opt['mix_features']:
 78 |       wx = torch.mean(torch.stack(
 79 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], wx.shape[0], wx.shape[0], wx) for idx in
 80 |          range(self.opt['heads'])], dim=0),
 81 |         dim=0)
 82 |       ax = torch.mm(wx, self.multihead_att_layer.Wout)
 83 |     else:
 84 |       ax = torch.mean(torch.stack(
 85 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], x.shape[0], x.shape[0], x) for idx in
 86 |          range(self.opt['heads'])], dim=0),
 87 |         dim=0)
 88 |     return ax
 89 | 
 90 |   def forward(self, t, x):  # t is needed when called by the integrator
 91 | 
 92 |     if self.nfe > self.opt["max_nfe"]:
 93 |       raise MaxNFEException
 94 | 
 95 |     self.nfe += 1
 96 | 
 97 |     attention, wx = self.multihead_att_layer(x, self.edge_index)
 98 |     ax2 = self.multiply_attention(x, attention, wx)
 99 |     # todo would be nice if this was more efficient
100 | 
101 | 
102 | 
103 |     src = x[self.edge_index[0, :], :]
104 |     dst_k = x[self.edge_index[1, :], :]
105 | 
106 | 
107 |     x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
108 | 
109 |     ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
110 | 
111 |     ax = self.lamda1 *  torch.mm(ax3, self.output_high) +torch.mm(ax2, self.output_low)
112 | 
113 | 
114 | 
115 | 
116 | 
117 |     ax = torch.cat([x, ax], dim=1)
118 |     ax = F.relu(self.lin2(ax))
119 | 
120 |     if not self.opt['no_alpha_sigmoid']:
121 |       alpha = torch.sigmoid(self.alpha_train)
122 |     else:
123 |       alpha = self.alpha_train
124 | 
125 |     f = alpha * (ax - x)
126 |     if self.opt['add_source']:
127 |       f = f + self.beta_train * self.x0
128 |     return f
129 | 
130 |   def __repr__(self):
131 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
132 | 
133 | 
134 | class SpGraphAttentionLayer(nn.Module):
135 |   """
136 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
137 |   """
138 | 
139 |   def __init__(self, in_features, out_features, opt, device, concat=True):
140 |     super(SpGraphAttentionLayer, self).__init__()
141 |     self.in_features = in_features
142 |     self.out_features = out_features
143 |     self.alpha = opt['leaky_relu_slope']
144 |     self.concat = concat
145 |     self.device = device
146 |     self.opt = opt
147 |     self.h = opt['heads']
148 | 
149 |     try:
150 |       self.attention_dim = opt['attention_dim']
151 |     except KeyError:
152 |       self.attention_dim = out_features
153 | 
154 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
155 |     self.d_k = self.attention_dim // opt['heads']
156 | 
157 |     self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device)
158 |     nn.init.xavier_normal_(self.W.data, gain=1.414)
159 | 
160 |     self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device)
161 |     nn.init.xavier_normal_(self.Wout.data, gain=1.414)
162 | 
163 |     self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device)
164 |     nn.init.xavier_normal_(self.a.data, gain=1.414)
165 | 
166 |     self.leakyrelu = nn.LeakyReLU(self.alpha)
167 | 
168 |   def forward(self, x, edge):
169 |     wx = torch.mm(x, self.W)  # h: N x out
170 |     h = wx.view(-1, self.h, self.d_k)
171 |     h = h.transpose(1, 2)
172 | 
173 |     # Self-attention on the nodes - Shared attention mechanism
174 |     edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to(
175 |       self.device)  # edge: 2*D x E
176 |     edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device)
177 |     attention = softmax(edge_e, edge[self.opt['attention_norm_idx']])
178 |     return attention, wx
179 | 
180 |   def __repr__(self):
181 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
182 | 
183 | 
184 | if __name__ == '__main__':
185 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
186 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0,
187 |          'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False}
188 |   dataset = get_dataset(opt, '../data', False)
189 |   t = 1
190 |   func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device)
191 |   out = func(t, dataset.data.x)
192 | 


--------------------------------------------------------------------------------
/src/function_beltrami_convection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | import torch.nn.functional as F
 11 | from torch_scatter import scatter
 12 | import math
 13 | from torch_geometric.utils import get_laplacian
 14 | 
 15 | class ODEFuncBeltramiCONV(ODEFunc):
 16 | 
 17 |   def __init__(self, in_features, out_features, opt, data, device):
 18 |     super(ODEFuncBeltramiCONV, self).__init__(opt, data, device)
 19 | 
 20 |     if opt['self_loop_weight'] > 0:
 21 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 22 |                                                                    fill_value=opt['self_loop_weight'])
 23 |     else:
 24 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 25 |     # print("self.edge_index: ", self.edge_index.shape)
 26 |     self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features,  opt,device).to(
 27 |       device)
 28 |     self.device = device
 29 | 
 30 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 31 | 
 32 |     self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym')
 33 |     self.edge_index_lap = self.edge_index_lap.to(device)
 34 |     self.edge_weight_lap = self.edge_weight_lap.to(device)
 35 | 
 36 |     self.gate = nn.Linear(2 * in_features, 1)
 37 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 38 | 
 39 |     self.lin1 = nn.Linear(in_features, out_features)
 40 |     nn.init.xavier_normal_(self.lin1.weight, gain=1.414)
 41 | 
 42 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 43 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 44 | 
 45 |     self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device))
 46 | 
 47 | 
 48 |     self.output_low, self.output_high = (
 49 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 50 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 51 |     )
 52 | 
 53 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 54 | 
 55 | 
 56 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 57 | 
 58 |     self.output_low.data.uniform_(-stdv, stdv)
 59 |     self.output_high.data.uniform_(-stdv, stdv)
 60 | 
 61 | 
 62 |     self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 63 |     self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 64 | 
 65 |   def multiply_attention(self, x, attention=None, v=None):
 66 |     num_heads = 4
 67 |     mix_features = 0
 68 |     if mix_features:
 69 |       vx = torch.mean(torch.stack(
 70 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 71 |          range(num_heads)], dim=0),
 72 |         dim=0)
 73 |       ax = self.multihead_att_layer.Wout(vx)
 74 |     else:
 75 |       mean_attention = attention.mean(dim=1)
 76 |       # mean_attention = self.edge_weight
 77 |       grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x
 78 |       grad_x_abs = torch.abs(grad_x)
 79 |       grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1))
 80 |       grad_x_norm_inv = 1 / grad_x_norm
 81 |       gu = grad_x_norm_inv[self.edge_index[0, :]]
 82 |       gv = grad_x_norm_inv[self.edge_index[1, :]]
 83 |       attention2 = gu * gu + gu * gv
 84 |       new_attn = mean_attention * softmax(attention2, self.edge_index[0])
 85 |       # Da = torch.diag(grad_x_norm_inv)
 86 |       W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce()
 87 |       rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten()
 88 |       diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device)
 89 |       dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x)
 90 |       ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x)
 91 |     return ax - dx
 92 | 
 93 |   def forward(self, t, x):  # t is needed when called by the integrator
 94 | 
 95 |     attention, values = self.multihead_att_layer(x, self.edge_index)
 96 |     ax = self.multiply_attention(x, attention, values)
 97 | 
 98 | 
 99 |     src = x[self.edge_index[0, :], :]
100 |     dst_k = x[self.edge_index[1, :], :]
101 |     # h2 = torch.cat([src, dst_k], dim=1)
102 |     # attention1 = torch.tanh(self.gate(h2)).squeeze()
103 | 
104 |     x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
105 |     # print("x_new: ", x_new.shape)
106 | 
107 |     # ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new)
108 | 
109 |     ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
110 | 
111 |     ax3 = self.bn_in_1(ax3)
112 |     ax = self.bn_in_2(ax)
113 | 
114 |     ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low)
115 | 
116 | 
117 | 
118 |     ax = torch.cat([x, ax], axis=1)
119 |     ax = self.lin2(ax)
120 | 
121 |     if not self.opt['no_alpha_sigmoid']:
122 |       alpha = torch.sigmoid(self.alpha_train)
123 |     else:
124 |       alpha = self.alpha_train
125 |     f = alpha * (ax - x)
126 |     if self.opt['add_source']:
127 |       f = f + self.beta_train * self.x0
128 | 
129 |     # f = ax - x
130 |     return f
131 | 
132 |   def __repr__(self):
133 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
134 | 
135 | 
136 | class SpGraphAttentionLayer(nn.Module):
137 |   """
138 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
139 |   """
140 | 
141 |   def __init__(self, in_features, out_features, opt, device, concat=True):
142 |     super(SpGraphAttentionLayer, self).__init__()
143 |     self.in_features = in_features
144 |     self.out_features = out_features
145 |     self.alpha = opt['leaky_relu_slope']
146 |     self.concat = concat
147 |     self.device = device
148 |     self.opt = opt
149 |     self.h = opt['heads']
150 | 
151 |     try:
152 |       self.attention_dim = opt['attention_dim']
153 |     except KeyError:
154 |       self.attention_dim = out_features
155 | 
156 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
157 |     self.d_k = self.attention_dim // opt['heads']
158 | 
159 |     self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device)
160 |     nn.init.xavier_normal_(self.W.data, gain=1.414)
161 | 
162 |     self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device)
163 |     nn.init.xavier_normal_(self.Wout.data, gain=1.414)
164 | 
165 |     self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device)
166 |     nn.init.xavier_normal_(self.a.data, gain=1.414)
167 | 
168 |     self.leakyrelu = nn.LeakyReLU(self.alpha)
169 | 
170 |   def forward(self, x, edge):
171 |     wx = torch.mm(x, self.W)  # h: N x out
172 |     h = wx.view(-1, self.h, self.d_k)
173 |     h = h.transpose(1, 2)
174 | 
175 |     # Self-attention on the nodes - Shared attention mechanism
176 |     edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to(
177 |       self.device)  # edge: 2*D x E
178 |     edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device)
179 |     attention = softmax(edge_e, edge[self.opt['attention_norm_idx']])
180 |     return attention, wx
181 | 
182 |   def __repr__(self):
183 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
184 | 
185 | 
186 | if __name__ == '__main__':
187 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
188 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
189 |          'attention_norm_idx': 0, 'add_source': False,
190 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
191 |          }
192 |   dataset = get_dataset(opt, '../data', False)
193 |   t = 1
194 |   func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device)
195 |   out = func(t, dataset.data.x)
196 | 


--------------------------------------------------------------------------------
/src/function_beltrami_gat.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | import torch.nn.functional as F
 11 | from torch_scatter import scatter
 12 | import math
 13 | from torch_geometric.utils import get_laplacian
 14 | 
 15 | class ODEFuncBeltramiGAT(ODEFunc):
 16 | 
 17 |   def __init__(self, in_features, out_features, opt, data, device):
 18 |     super(ODEFuncBeltramiGAT, self).__init__(opt, data, device)
 19 | 
 20 |     if opt['self_loop_weight'] > 0:
 21 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 22 |                                                                    fill_value=opt['self_loop_weight'])
 23 |     else:
 24 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 25 |     # print("self.edge_index: ", self.edge_index.shape)
 26 |     self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features,  opt,device,).to(
 27 |       device)
 28 |     self.device = device
 29 | 
 30 |     # self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 31 |     #
 32 |     # self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym')
 33 |     # self.edge_index_lap = self.edge_index_lap.to(device)
 34 |     # self.edge_weight_lap = self.edge_weight_lap.to(device)
 35 | 
 36 |     self.gate = nn.Linear(2 * in_features, 1)
 37 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 38 | 
 39 |     self.lin1 = nn.Linear(in_features, out_features)
 40 |     nn.init.xavier_normal_(self.lin1.weight, gain=1.414)
 41 | 
 42 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 43 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 44 | 
 45 |     self.weight_low, self.weight_high, self.weight_mlp = (
 46 |       nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)),
 47 |       nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)),
 48 |       nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)),
 49 |     )
 50 | 
 51 |     self.output_low, self.output_high, self.output_mlp = (
 52 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 53 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 54 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 55 |     )
 56 | 
 57 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 58 | 
 59 |     self.weight_low.data.uniform_(-stdv, stdv)
 60 |     self.weight_high.data.uniform_(-stdv, stdv)
 61 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 62 | 
 63 |     self.output_low.data.uniform_(-stdv, stdv)
 64 |     self.output_high.data.uniform_(-stdv, stdv)
 65 |     self.output_mlp.data.uniform_(-stdv, stdv)
 66 | 
 67 |   def multiply_attention(self, x, attention=None, v=None):
 68 |     num_heads = 4
 69 |     mix_features = 0
 70 |     if mix_features:
 71 |       vx = torch.mean(torch.stack(
 72 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 73 |          range(num_heads)], dim=0),
 74 |         dim=0)
 75 |       ax = self.multihead_att_layer.Wout(vx)
 76 |     else:
 77 |       mean_attention = attention.mean(dim=1)
 78 |       # mean_attention = self.edge_weight
 79 |       grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x
 80 |       grad_x_abs = torch.abs(grad_x)
 81 |       grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1))
 82 |       grad_x_norm_inv = 1 / grad_x_norm
 83 |       gu = grad_x_norm_inv[self.edge_index[0, :]]
 84 |       gv = grad_x_norm_inv[self.edge_index[1, :]]
 85 |       attention2 = gu * gu + gu * gv
 86 |       new_attn = mean_attention * softmax(attention2, self.edge_index[0])
 87 |       # Da = torch.diag(grad_x_norm_inv)
 88 |       W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce()
 89 |       rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten()
 90 |       diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device)
 91 |       dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x)
 92 |       ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x)
 93 |     return ax - dx
 94 | 
 95 |   def forward(self, t, x):  # t is needed when called by the integrator
 96 | 
 97 |     attention, values = self.multihead_att_layer(x, self.edge_index)
 98 |     ax = self.multiply_attention(x, attention, values)
 99 |     # ax = self.multiply_attention(x,)
100 |     # src = x[self.edge_index[0, :], :]
101 |     # dst_k = x[self.edge_index[1, :], :]
102 |     # h2 = torch.cat([src, dst_k], dim=1)
103 |     # attention1 = torch.tanh(self.gate(h2)).squeeze()
104 |     #
105 |     # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
106 |     # ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
107 |     #
108 |     # ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low)
109 | 
110 |     # ax = torch.cat([x, ax], axis=1)
111 |     # ax = self.lin2(ax)
112 | 
113 |     if not self.opt['no_alpha_sigmoid']:
114 |       alpha = torch.sigmoid(self.alpha_train)
115 |     else:
116 |       alpha = self.alpha_train
117 |     f = alpha * (ax - x)
118 |     if self.opt['add_source']:
119 |       f = f + self.beta_train * self.x0
120 | 
121 |     # f = ax - x
122 |     return f
123 | 
124 |   def __repr__(self):
125 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
126 | 
127 | 
128 | class SpGraphAttentionLayer(nn.Module):
129 |   """
130 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
131 |   """
132 | 
133 |   def __init__(self, in_features, out_features, opt, device, concat=True):
134 |     super(SpGraphAttentionLayer, self).__init__()
135 |     self.in_features = in_features
136 |     self.out_features = out_features
137 |     self.alpha = opt['leaky_relu_slope']
138 |     self.concat = concat
139 |     self.device = device
140 |     self.opt = opt
141 |     self.h = opt['heads']
142 | 
143 |     try:
144 |       self.attention_dim = opt['attention_dim']
145 |     except KeyError:
146 |       self.attention_dim = out_features
147 | 
148 |     assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size"
149 |     self.d_k = self.attention_dim // opt['heads']
150 | 
151 |     self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device)
152 |     nn.init.xavier_normal_(self.W.data, gain=1.414)
153 | 
154 |     self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device)
155 |     nn.init.xavier_normal_(self.Wout.data, gain=1.414)
156 | 
157 |     self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device)
158 |     nn.init.xavier_normal_(self.a.data, gain=1.414)
159 | 
160 |     self.leakyrelu = nn.LeakyReLU(self.alpha)
161 | 
162 |   def forward(self, x, edge):
163 |     wx = torch.mm(x, self.W)  # h: N x out
164 |     h = wx.view(-1, self.h, self.d_k)
165 |     h = h.transpose(1, 2)
166 | 
167 |     # Self-attention on the nodes - Shared attention mechanism
168 |     edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to(
169 |       self.device)  # edge: 2*D x E
170 |     edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device)
171 |     attention = softmax(edge_e, edge[self.opt['attention_norm_idx']])
172 |     return attention, wx
173 | 
174 |   def __repr__(self):
175 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
176 | 
177 | 
178 | if __name__ == '__main__':
179 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
180 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
181 |          'attention_norm_idx': 0, 'add_source': False,
182 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
183 |          }
184 |   dataset = get_dataset(opt, '../data', False)
185 |   t = 1
186 |   func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device)
187 |   out = func(t, dataset.data.x)
188 | 


--------------------------------------------------------------------------------
/src/function_beltrami_van.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | 
 11 | 
 12 | class ODEFuncBektramiAtt(ODEFunc):
 13 | 
 14 |   def __init__(self, in_features, out_features, opt, data, device):
 15 |     super(ODEFuncBektramiAtt, self).__init__(opt, data, device)
 16 | 
 17 |     if opt['self_loop_weight'] > 0:
 18 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 19 |                                                                    fill_value=opt['self_loop_weight'])
 20 |     else:
 21 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 22 |     # print("self.edge_index: ", self.edge_index.shape)
 23 |     self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features,  opt,device,edge_weights=self.edge_weight).to(
 24 |       device)
 25 |     self.device = device
 26 | 
 27 |   def multiply_attention(self, x, attention, v=None):
 28 |     num_heads = 4
 29 |     mix_features = 0
 30 |     if mix_features:
 31 |       vx = torch.mean(torch.stack(
 32 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 33 |          range(num_heads)], dim=0),
 34 |         dim=0)
 35 |       ax = self.multihead_att_layer.Wout(vx)
 36 |     else:
 37 |       mean_attention = attention.mean(dim=1)
 38 |       # mean_attention = self.edge_weight
 39 |       grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x
 40 |       grad_x_abs = torch.abs(grad_x)
 41 |       grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1))
 42 |       grad_x_norm_inv = 1 / grad_x_norm
 43 |       gu = grad_x_norm_inv[self.edge_index[0, :]]
 44 |       gv = grad_x_norm_inv[self.edge_index[1, :]]
 45 |       attention2 = gu * gu + gu * gv
 46 |       new_attn = mean_attention * softmax(attention2, self.edge_index[0])
 47 |       # Da = torch.diag(grad_x_norm_inv)
 48 |       W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce()
 49 |       rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten()
 50 |       diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device)
 51 |       dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x)
 52 |       ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x)
 53 |     return ax - dx
 54 | 
 55 |   def forward(self, t, x):  # t is needed when called by the integrator
 56 | 
 57 |     attention, values = self.multihead_att_layer(x, self.edge_index)
 58 |     ax = self.multiply_attention(x, attention, values)
 59 | 
 60 |     if not self.opt['no_alpha_sigmoid']:
 61 |       alpha = torch.sigmoid(self.alpha_train)
 62 |     else:
 63 |       alpha = self.alpha_train
 64 |     f = alpha * (ax - x)
 65 |     if self.opt['add_source']:
 66 |       f = f + self.beta_train * self.x0
 67 | 
 68 |     # f = ax - x
 69 |     return f
 70 | 
 71 |   def __repr__(self):
 72 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
 73 | 
 74 | 
 75 | class SpGraphTransAttentionLayer(nn.Module):
 76 |   """
 77 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
 78 |   """
 79 | 
 80 |   def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None):
 81 |     super(SpGraphTransAttentionLayer, self).__init__()
 82 |     self.in_features = in_features
 83 |     self.out_features = out_features
 84 |     self.alpha = opt['leaky_relu_slope']
 85 |     self.concat = concat
 86 |     self.device = device
 87 |     self.opt = opt
 88 |     self.h = int(opt['heads'])
 89 |     self.edge_weights = edge_weights
 90 | 
 91 |     try:
 92 |       self.attention_dim = opt['attention_dim']
 93 |     except KeyError:
 94 |       self.attention_dim = out_features
 95 | 
 96 |     assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format(
 97 |       self.h, self.attention_dim)
 98 |     self.d_k = self.attention_dim // self.h
 99 | 
100 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
101 |       self.output_var_x = nn.Parameter(torch.ones(1))
102 |       self.lengthscale_x = nn.Parameter(torch.ones(1))
103 |       self.output_var_p = nn.Parameter(torch.ones(1))
104 |       self.lengthscale_p = nn.Parameter(torch.ones(1))
105 |       self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
106 |       self.init_weights(self.Qx)
107 |       self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
108 |       self.init_weights(self.Vx)
109 |       self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
110 |       self.init_weights(self.Kx)
111 | 
112 |       self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
113 |       self.init_weights(self.Qp)
114 |       self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
115 |       self.init_weights(self.Vp)
116 |       self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
117 |       self.init_weights(self.Kp)
118 | 
119 |     else:
120 |       if self.opt['attention_type'] == "exp_kernel":
121 |         self.output_var = nn.Parameter(torch.ones(1))
122 |         self.lengthscale = nn.Parameter(torch.ones(1))
123 | 
124 |       self.Q = nn.Linear(in_features, self.attention_dim)
125 |       self.init_weights(self.Q)
126 | 
127 |       self.V = nn.Linear(in_features, self.attention_dim)
128 |       self.init_weights(self.V)
129 | 
130 |       self.K = nn.Linear(in_features, self.attention_dim)
131 |       self.init_weights(self.K)
132 | 
133 |     self.activation = nn.Sigmoid()  # nn.LeakyReLU(self.alpha)
134 | 
135 |     self.Wout = nn.Linear(self.d_k, in_features)
136 |     self.init_weights(self.Wout)
137 | 
138 |   def init_weights(self, m):
139 |     if type(m) == nn.Linear:
140 |       # nn.init.xavier_uniform_(m.weight, gain=1.414)
141 |       # m.bias.data.fill_(0.01)
142 |       nn.init.constant_(m.weight, 1e-5)
143 | 
144 |   def forward(self, x, edge):
145 |     """
146 |     x might be [features, augmentation, positional encoding, labels]
147 |     """
148 |     # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
149 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
150 |       label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim']
151 |       p = x[:, self.opt['feat_hidden_dim']: label_index]
152 |       x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1)
153 | 
154 |       qx = self.Qx(x)
155 |       kx = self.Kx(x)
156 |       vx = self.Vx(x)
157 |       # perform linear operation and split into h heads
158 |       kx = kx.view(-1, self.h, self.d_k)
159 |       qx = qx.view(-1, self.h, self.d_k)
160 |       vx = vx.view(-1, self.h, self.d_k)
161 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
162 |       kx = kx.transpose(1, 2)
163 |       qx = qx.transpose(1, 2)
164 |       vx = vx.transpose(1, 2)
165 |       src_x = qx[edge[0, :], :, :]
166 |       dst_x = kx[edge[1, :], :, :]
167 | 
168 |       qp = self.Qp(p)
169 |       kp = self.Kp(p)
170 |       vp = self.Vp(p)
171 |       # perform linear operation and split into h heads
172 |       kp = kp.view(-1, self.h, self.d_k)
173 |       qp = qp.view(-1, self.h, self.d_k)
174 |       vp = vp.view(-1, self.h, self.d_k)
175 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
176 |       kp = kp.transpose(1, 2)
177 |       qp = qp.transpose(1, 2)
178 |       vp = vp.transpose(1, 2)
179 |       src_p = qp[edge[0, :], :, :]
180 |       dst_p = kp[edge[1, :], :, :]
181 | 
182 |       prods = self.output_var_x ** 2 * torch.exp(
183 |         -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \
184 |               * self.output_var_p ** 2 * torch.exp(
185 |         -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2))
186 | 
187 |       v = None
188 | 
189 |     else:
190 |       q = self.Q(x)
191 |       k = self.K(x)
192 |       v = self.V(x)
193 | 
194 |       # perform linear operation and split into h heads
195 | 
196 |       k = k.view(-1, self.h, self.d_k)
197 |       q = q.view(-1, self.h, self.d_k)
198 |       v = v.view(-1, self.h, self.d_k)
199 | 
200 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
201 | 
202 |       k = k.transpose(1, 2)
203 |       q = q.transpose(1, 2)
204 |       v = v.transpose(1, 2)
205 | 
206 |       src = q[edge[0, :], :, :]
207 |       dst_k = k[edge[1, :], :, :]
208 | 
209 |     if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
210 |       prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2)))
211 |     elif self.opt['attention_type'] == "scaled_dot":
212 |       prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k)
213 |     elif self.opt['attention_type'] == "cosine_sim":
214 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
215 |       prods = cos(src, dst_k)
216 |     elif self.opt['attention_type'] == "pearson":
217 |       src_mu = torch.mean(src, dim=1, keepdim=True)
218 |       dst_mu = torch.mean(dst_k, dim=1, keepdim=True)
219 |       src = src - src_mu
220 |       dst_k = dst_k - dst_mu
221 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
222 |       prods = cos(src, dst_k)
223 | 
224 |     if self.opt['reweight_attention'] and self.edge_weights is not None:
225 |       prods = prods * self.edge_weights.unsqueeze(dim=1)
226 |     if self.opt['square_plus']:
227 |       attention = squareplus(prods, edge[self.opt['attention_norm_idx']])
228 |     else:
229 |       attention = softmax(prods, edge[self.opt['attention_norm_idx']])
230 |     return attention, (v, prods)
231 | 
232 |   def __repr__(self):
233 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
234 | 
235 | 
236 | if __name__ == '__main__':
237 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
238 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
239 |          'attention_norm_idx': 0, 'add_source': False,
240 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
241 |          }
242 |   dataset = get_dataset(opt, '../data', False)
243 |   t = 1
244 |   func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device)
245 |   out = func(t, dataset.data.x)
246 | 


--------------------------------------------------------------------------------
/src/function_beltramitrans_convection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | import torch.nn.functional as F
 11 | from torch_scatter import scatter
 12 | import math
 13 | from torch_geometric.utils import get_laplacian
 14 | 
 15 | class ODEFuncBeltramiTRANSCONV(ODEFunc):
 16 | 
 17 |   def __init__(self, in_features, out_features, opt, data, device):
 18 |     super(ODEFuncBeltramiTRANSCONV, self).__init__(opt, data, device)
 19 | 
 20 |     if opt['self_loop_weight'] > 0:
 21 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 22 |                                                                    fill_value=opt['self_loop_weight'])
 23 |     else:
 24 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 25 |     # print("self.edge_index: ", self.edge_index.shape)
 26 |     self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features,  opt,device).to(
 27 |       device)
 28 |     self.device = device
 29 | 
 30 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 31 | 
 32 |     self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym')
 33 |     self.edge_index_lap = self.edge_index_lap.to(device)
 34 |     self.edge_weight_lap = self.edge_weight_lap.to(device)
 35 | 
 36 |     self.gate = nn.Linear(2 * in_features, 1)
 37 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 38 | 
 39 |     self.lin1 = nn.Linear(in_features, out_features)
 40 |     nn.init.xavier_normal_(self.lin1.weight, gain=1.414)
 41 | 
 42 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 43 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 44 | 
 45 |     self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device))
 46 | 
 47 | 
 48 |     self.output_low, self.output_high = (
 49 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 50 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 51 |     )
 52 | 
 53 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 54 | 
 55 | 
 56 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 57 | 
 58 |     self.output_low.data.uniform_(-stdv, stdv)
 59 |     self.output_high.data.uniform_(-stdv, stdv)
 60 | 
 61 | 
 62 |     self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 63 |     self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 64 | 
 65 |   def multiply_attention(self, x, attention=None, v=None):
 66 |     num_heads = 4
 67 |     mix_features = 0
 68 |     if mix_features:
 69 |       vx = torch.mean(torch.stack(
 70 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 71 |          range(num_heads)], dim=0),
 72 |         dim=0)
 73 |       ax = self.multihead_att_layer.Wout(vx)
 74 |     else:
 75 |       mean_attention = attention.mean(dim=1)
 76 |       # mean_attention = self.edge_weight
 77 |       grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x
 78 |       grad_x_abs = torch.abs(grad_x)
 79 |       grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1))
 80 |       grad_x_norm_inv = 1 / grad_x_norm
 81 |       gu = grad_x_norm_inv[self.edge_index[0, :]]
 82 |       gv = grad_x_norm_inv[self.edge_index[1, :]]
 83 |       attention2 = gu * gu + gu * gv
 84 |       new_attn = mean_attention * softmax(attention2, self.edge_index[0])
 85 |       # Da = torch.diag(grad_x_norm_inv)
 86 |       W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce()
 87 |       rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten()
 88 |       diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device)
 89 |       dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x)
 90 |       ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x)
 91 |     return ax - dx
 92 | 
 93 |   def forward(self, t, x):  # t is needed when called by the integrator
 94 | 
 95 |     attention, values = self.multihead_att_layer(x, self.edge_index)
 96 |     ax = self.multiply_attention(x, attention, values)
 97 | 
 98 | 
 99 |     src = x[self.edge_index[0, :], :]
100 |     dst_k = x[self.edge_index[1, :], :]
101 | 
102 | 
103 |     x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
104 | 
105 | 
106 |     ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
107 | 
108 |     ax3 = self.bn_in_1(ax3)
109 |     ax = self.bn_in_2(ax)
110 | 
111 |     ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low)
112 | 
113 | 
114 | 
115 |     ax = torch.cat([x, ax], axis=1)
116 |     ax = self.lin2(ax)
117 | 
118 |     if not self.opt['no_alpha_sigmoid']:
119 |       alpha = torch.sigmoid(self.alpha_train)
120 |     else:
121 |       alpha = self.alpha_train
122 |     f = alpha * (ax - x)
123 |     if self.opt['add_source']:
124 |       f = f + self.beta_train * self.x0
125 | 
126 |     # f = ax - x
127 |     return f
128 | 
129 |   def __repr__(self):
130 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
131 | 
132 | 
133 | 
134 | 
135 | class SpGraphTransAttentionLayer(nn.Module):
136 |   """
137 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
138 |   """
139 | 
140 |   def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None):
141 |     super(SpGraphTransAttentionLayer, self).__init__()
142 |     self.in_features = in_features
143 |     self.out_features = out_features
144 |     self.alpha = opt['leaky_relu_slope']
145 |     self.concat = concat
146 |     self.device = device
147 |     self.opt = opt
148 |     self.h = int(opt['heads'])
149 |     self.edge_weights = edge_weights
150 | 
151 |     try:
152 |       self.attention_dim = opt['attention_dim']
153 |     except KeyError:
154 |       self.attention_dim = out_features
155 | 
156 |     assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format(
157 |       self.h, self.attention_dim)
158 |     self.d_k = self.attention_dim // self.h
159 | 
160 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
161 |       self.output_var_x = nn.Parameter(torch.ones(1))
162 |       self.lengthscale_x = nn.Parameter(torch.ones(1))
163 |       self.output_var_p = nn.Parameter(torch.ones(1))
164 |       self.lengthscale_p = nn.Parameter(torch.ones(1))
165 |       self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
166 |       self.init_weights(self.Qx)
167 |       self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
168 |       self.init_weights(self.Vx)
169 |       self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
170 |       self.init_weights(self.Kx)
171 | 
172 |       self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
173 |       self.init_weights(self.Qp)
174 |       self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
175 |       self.init_weights(self.Vp)
176 |       self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
177 |       self.init_weights(self.Kp)
178 | 
179 |     else:
180 |       if self.opt['attention_type'] == "exp_kernel":
181 |         self.output_var = nn.Parameter(torch.ones(1))
182 |         self.lengthscale = nn.Parameter(torch.ones(1))
183 | 
184 |       self.Q = nn.Linear(in_features, self.attention_dim)
185 |       self.init_weights(self.Q)
186 | 
187 |       self.V = nn.Linear(in_features, self.attention_dim)
188 |       self.init_weights(self.V)
189 | 
190 |       self.K = nn.Linear(in_features, self.attention_dim)
191 |       self.init_weights(self.K)
192 | 
193 |     self.activation = nn.Sigmoid()  # nn.LeakyReLU(self.alpha)
194 | 
195 |     self.Wout = nn.Linear(self.d_k, in_features)
196 |     self.init_weights(self.Wout)
197 | 
198 |   def init_weights(self, m):
199 |     if type(m) == nn.Linear:
200 |       # nn.init.xavier_uniform_(m.weight, gain=1.414)
201 |       # m.bias.data.fill_(0.01)
202 |       nn.init.constant_(m.weight, 1e-5)
203 | 
204 |   def forward(self, x, edge):
205 |     """
206 |     x might be [features, augmentation, positional encoding, labels]
207 |     """
208 |     # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
209 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
210 |       label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim']
211 |       p = x[:, self.opt['feat_hidden_dim']: label_index]
212 |       x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1)
213 | 
214 |       qx = self.Qx(x)
215 |       kx = self.Kx(x)
216 |       vx = self.Vx(x)
217 |       # perform linear operation and split into h heads
218 |       kx = kx.view(-1, self.h, self.d_k)
219 |       qx = qx.view(-1, self.h, self.d_k)
220 |       vx = vx.view(-1, self.h, self.d_k)
221 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
222 |       kx = kx.transpose(1, 2)
223 |       qx = qx.transpose(1, 2)
224 |       vx = vx.transpose(1, 2)
225 |       src_x = qx[edge[0, :], :, :]
226 |       dst_x = kx[edge[1, :], :, :]
227 | 
228 |       qp = self.Qp(p)
229 |       kp = self.Kp(p)
230 |       vp = self.Vp(p)
231 |       # perform linear operation and split into h heads
232 |       kp = kp.view(-1, self.h, self.d_k)
233 |       qp = qp.view(-1, self.h, self.d_k)
234 |       vp = vp.view(-1, self.h, self.d_k)
235 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
236 |       kp = kp.transpose(1, 2)
237 |       qp = qp.transpose(1, 2)
238 |       vp = vp.transpose(1, 2)
239 |       src_p = qp[edge[0, :], :, :]
240 |       dst_p = kp[edge[1, :], :, :]
241 | 
242 |       prods = self.output_var_x ** 2 * torch.exp(
243 |         -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \
244 |               * self.output_var_p ** 2 * torch.exp(
245 |         -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2))
246 | 
247 |       v = None
248 | 
249 |     else:
250 |       q = self.Q(x)
251 |       k = self.K(x)
252 |       v = self.V(x)
253 | 
254 |       # perform linear operation and split into h heads
255 | 
256 |       k = k.view(-1, self.h, self.d_k)
257 |       q = q.view(-1, self.h, self.d_k)
258 |       v = v.view(-1, self.h, self.d_k)
259 | 
260 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
261 | 
262 |       k = k.transpose(1, 2)
263 |       q = q.transpose(1, 2)
264 |       v = v.transpose(1, 2)
265 | 
266 |       src = q[edge[0, :], :, :]
267 |       dst_k = k[edge[1, :], :, :]
268 | 
269 |     if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
270 |       prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2)))
271 |     elif self.opt['attention_type'] == "scaled_dot":
272 |       prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k)
273 |     elif self.opt['attention_type'] == "cosine_sim":
274 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
275 |       prods = cos(src, dst_k)
276 |     elif self.opt['attention_type'] == "pearson":
277 |       src_mu = torch.mean(src, dim=1, keepdim=True)
278 |       dst_mu = torch.mean(dst_k, dim=1, keepdim=True)
279 |       src = src - src_mu
280 |       dst_k = dst_k - dst_mu
281 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
282 |       prods = cos(src, dst_k)
283 | 
284 |     if self.opt['reweight_attention'] and self.edge_weights is not None:
285 |       prods = prods * self.edge_weights.unsqueeze(dim=1)
286 |     if self.opt['square_plus']:
287 |       attention = squareplus(prods, edge[self.opt['attention_norm_idx']])
288 |     else:
289 |       attention = softmax(prods, edge[self.opt['attention_norm_idx']])
290 |     return attention, (v, prods)
291 | 
292 |   def __repr__(self):
293 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
294 | 
295 | 
296 | 
297 | if __name__ == '__main__':
298 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
299 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
300 |          'attention_norm_idx': 0, 'add_source': False,
301 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
302 |          }
303 |   dataset = get_dataset(opt, '../data', False)
304 |   t = 1
305 |   func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device)
306 |   out = func(t, dataset.data.x)
307 | 


--------------------------------------------------------------------------------
/src/function_laplacian_convection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | from torch_scatter import scatter
 11 | import math
 12 | from torch_geometric.utils import get_laplacian
 13 | import torch.nn.functional as F
 14 | 
 15 | 
 16 | class ODEFuncLapCONV(ODEFunc):
 17 | 
 18 |   def __init__(self, in_features, out_features, opt, data, device):
 19 |     super(ODEFuncLapCONV, self).__init__(opt, data, device)
 20 | 
 21 |     if opt['self_loop_weight'] > 0:
 22 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 23 |                                                                    fill_value=opt['self_loop_weight'])
 24 |     else:
 25 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 26 |     # print("self.edge_index: ", self.edge_index.shape)
 27 |     self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features,  opt,device,edge_weights=self.edge_weight).to(
 28 |       device)
 29 |     self.device = device
 30 | 
 31 |     self.edge_index,self.edge_weight = remove_self_loops (self.edge_index, self.edge_weight)
 32 | 
 33 |     self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight,normalization='sym')
 34 |     self.edge_index_lap = self.edge_index_lap.to(device)
 35 |     self.edge_weight_lap = self.edge_weight_lap.to(device)
 36 | 
 37 | 
 38 | 
 39 |     self.gate = nn.Linear(2 * in_features, 1)
 40 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 41 | 
 42 | 
 43 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 44 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 45 |     self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device))
 46 |     self.output_low, self.output_high (
 47 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 48 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device))
 49 |     )
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 56 | 
 57 | 
 58 | 
 59 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 60 | 
 61 |     self.output_low.data.uniform_(-stdv, stdv)
 62 |     self.output_high.data.uniform_(-stdv, stdv)
 63 | 
 64 | 
 65 |     self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 66 |     self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 67 | 
 68 |     self.lamda = nn.Parameter(torch.tensor(0.0))
 69 | 
 70 | 
 71 | 
 72 |   def forward(self, t, x):  # t is needed when called by the integrator
 73 | 
 74 | 
 75 |     x2 =x
 76 | 
 77 |     src = x[self.edge_index[0, :], :]
 78 |     dst_k = x[self.edge_index[1, :], :]
 79 |     h2 = torch.cat([src, dst_k], dim=1)
 80 |     attention1 = torch.tanh(self.gate(h2)).squeeze()
 81 |     x_new = F.relu(torch.mm(src-dst_k, self.weight_mlp)) * dst_k
 82 |     #x_new is v_ij elementwise product with x_j in the paper
 83 | 
 84 |     # print("x_new: ", x_new.shape)
 85 |     ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new)
 86 |     ax3 = scatter(ax3, self.edge_index[1, :].T, dim=0, reduce="sum")
 87 |     # ax3 is the divergence of the V elementwise product with X in the paper
 88 | 
 89 | 
 90 | 
 91 | 
 92 |     ax2 = torch_sparse.spmm(self.edge_index, self.edge_weight, x.shape[0], x.shape[0], x2)
 93 | 
 94 |     # ax2 is the diffusion item in the paper
 95 | 
 96 |     ax3 = self.bn_in_1(ax3)
 97 |     ax2 = self.bn_in_2(ax2)
 98 | 
 99 |     ax = torch.mm(ax3, self.output_high) + torch.mm(ax2, self.output_low)
100 | 
101 | 
102 | 
103 |     ax = torch.cat([x, ax], axis=1)
104 |     ax = self.lin2(ax)
105 | 
106 |     if not self.opt['no_alpha_sigmoid']:
107 |       alpha = torch.sigmoid(self.alpha_train)
108 |     else:
109 |       alpha = self.alpha_train
110 |     f = alpha * (ax - x)
111 |     if self.opt['add_source']:
112 |       f = f + self.beta_train * self.x0
113 | 
114 |     # f = ax - x
115 |     return f
116 | 
117 |   def __repr__(self):
118 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
119 | 
120 | 
121 | class SpGraphTransAttentionLayer(nn.Module):
122 |   """
123 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
124 |   """
125 | 
126 |   def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None):
127 |     super(SpGraphTransAttentionLayer, self).__init__()
128 |     self.in_features = in_features
129 |     self.out_features = out_features
130 |     self.alpha = opt['leaky_relu_slope']
131 |     self.concat = concat
132 |     self.device = device
133 |     self.opt = opt
134 |     self.h = int(opt['heads'])
135 |     self.edge_weights = edge_weights
136 | 
137 |     try:
138 |       self.attention_dim = opt['attention_dim']
139 |     except KeyError:
140 |       self.attention_dim = out_features
141 | 
142 |     assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format(
143 |       self.h, self.attention_dim)
144 |     self.d_k = self.attention_dim // self.h
145 | 
146 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
147 |       self.output_var_x = nn.Parameter(torch.ones(1))
148 |       self.lengthscale_x = nn.Parameter(torch.ones(1))
149 |       self.output_var_p = nn.Parameter(torch.ones(1))
150 |       self.lengthscale_p = nn.Parameter(torch.ones(1))
151 |       self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
152 |       self.init_weights(self.Qx)
153 |       self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
154 |       self.init_weights(self.Vx)
155 |       self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
156 |       self.init_weights(self.Kx)
157 | 
158 |       self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
159 |       self.init_weights(self.Qp)
160 |       self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
161 |       self.init_weights(self.Vp)
162 |       self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
163 |       self.init_weights(self.Kp)
164 | 
165 |     else:
166 |       if self.opt['attention_type'] == "exp_kernel":
167 |         self.output_var = nn.Parameter(torch.ones(1))
168 |         self.lengthscale = nn.Parameter(torch.ones(1))
169 | 
170 |       self.Q = nn.Linear(in_features, self.attention_dim)
171 |       self.init_weights(self.Q)
172 | 
173 |       self.V = nn.Linear(in_features, self.attention_dim)
174 |       self.init_weights(self.V)
175 | 
176 |       self.K = nn.Linear(in_features, self.attention_dim)
177 |       self.init_weights(self.K)
178 | 
179 |     self.activation = nn.Sigmoid()  # nn.LeakyReLU(self.alpha)
180 | 
181 |     self.Wout = nn.Linear(self.d_k, in_features)
182 |     self.init_weights(self.Wout)
183 | 
184 |   def init_weights(self, m):
185 |     if type(m) == nn.Linear:
186 |       # nn.init.xavier_uniform_(m.weight, gain=1.414)
187 |       # m.bias.data.fill_(0.01)
188 |       nn.init.constant_(m.weight, 1e-5)
189 | 
190 |   def forward(self, x, edge):
191 |     """
192 |     x might be [features, augmentation, positional encoding, labels]
193 |     """
194 |     # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
195 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
196 |       label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim']
197 |       p = x[:, self.opt['feat_hidden_dim']: label_index]
198 |       x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1)
199 | 
200 |       qx = self.Qx(x)
201 |       kx = self.Kx(x)
202 |       vx = self.Vx(x)
203 |       # perform linear operation and split into h heads
204 |       kx = kx.view(-1, self.h, self.d_k)
205 |       qx = qx.view(-1, self.h, self.d_k)
206 |       vx = vx.view(-1, self.h, self.d_k)
207 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
208 |       kx = kx.transpose(1, 2)
209 |       qx = qx.transpose(1, 2)
210 |       vx = vx.transpose(1, 2)
211 |       src_x = qx[edge[0, :], :, :]
212 |       dst_x = kx[edge[1, :], :, :]
213 | 
214 |       qp = self.Qp(p)
215 |       kp = self.Kp(p)
216 |       vp = self.Vp(p)
217 |       # perform linear operation and split into h heads
218 |       kp = kp.view(-1, self.h, self.d_k)
219 |       qp = qp.view(-1, self.h, self.d_k)
220 |       vp = vp.view(-1, self.h, self.d_k)
221 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
222 |       kp = kp.transpose(1, 2)
223 |       qp = qp.transpose(1, 2)
224 |       vp = vp.transpose(1, 2)
225 |       src_p = qp[edge[0, :], :, :]
226 |       dst_p = kp[edge[1, :], :, :]
227 | 
228 |       prods = self.output_var_x ** 2 * torch.exp(
229 |         -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \
230 |               * self.output_var_p ** 2 * torch.exp(
231 |         -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2))
232 | 
233 |       v = None
234 | 
235 |     else:
236 |       q = self.Q(x)
237 |       k = self.K(x)
238 |       v = self.V(x)
239 | 
240 |       # perform linear operation and split into h heads
241 | 
242 |       k = k.view(-1, self.h, self.d_k)
243 |       q = q.view(-1, self.h, self.d_k)
244 |       v = v.view(-1, self.h, self.d_k)
245 | 
246 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
247 | 
248 |       k = k.transpose(1, 2)
249 |       q = q.transpose(1, 2)
250 |       v = v.transpose(1, 2)
251 | 
252 |       src = q[edge[0, :], :, :]
253 |       dst_k = k[edge[1, :], :, :]
254 | 
255 |     if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
256 |       prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2)))
257 |     elif self.opt['attention_type'] == "scaled_dot":
258 |       prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k)
259 |     elif self.opt['attention_type'] == "cosine_sim":
260 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
261 |       prods = cos(src, dst_k)
262 |     elif self.opt['attention_type'] == "pearson":
263 |       src_mu = torch.mean(src, dim=1, keepdim=True)
264 |       dst_mu = torch.mean(dst_k, dim=1, keepdim=True)
265 |       src = src - src_mu
266 |       dst_k = dst_k - dst_mu
267 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
268 |       prods = cos(src, dst_k)
269 | 
270 |     if self.opt['reweight_attention'] and self.edge_weights is not None:
271 |       prods = prods * self.edge_weights.unsqueeze(dim=1)
272 |     if self.opt['square_plus']:
273 |       attention = squareplus(prods, edge[self.opt['attention_norm_idx']])
274 |     else:
275 |       attention = softmax(prods, edge[self.opt['attention_norm_idx']])
276 |     return attention, (v, prods)
277 | 
278 |   def __repr__(self):
279 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
280 | 
281 | 
282 | if __name__ == '__main__':
283 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
284 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
285 |          'attention_norm_idx': 0, 'add_source': False,
286 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
287 |          }
288 |   dataset = get_dataset(opt, '../data', False)
289 |   t = 1
290 |   func = ODEFuncBelFA(dataset.data.num_features, 6, opt, dataset.data, device)
291 |   out = func(t, dataset.data.x)
292 | 


--------------------------------------------------------------------------------
/src/function_laplacian_diffusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch_sparse
 4 | 
 5 | from base_classes import ODEFunc
 6 | from utils import MaxNFEException
 7 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
 8 | 
 9 | # Define the ODE function.
10 | # Input:
11 | # --- t: A tensor with shape [], meaning the current time.
12 | # --- x: A tensor with shape [#batches, dims], meaning the value of x at t.
13 | # Output:
14 | # --- dx/dt: A tensor with shape [#batches, dims], meaning the derivative of x at t.
15 | class LaplacianODEFunc(ODEFunc):
16 | 
17 |   # currently requires in_features = out_features
18 |   def __init__(self, in_features, out_features, opt, data, device):
19 |     super(LaplacianODEFunc, self).__init__(opt, data, device)
20 | 
21 |     self.in_features = in_features
22 |     self.out_features = out_features
23 |     self.w = nn.Parameter(torch.eye(opt['hidden_dim']))
24 |     self.d = nn.Parameter(torch.zeros(opt['hidden_dim']) + 1)
25 |     self.alpha_sc = nn.Parameter(torch.ones(1))
26 |     self.beta_sc = nn.Parameter(torch.ones(1))
27 | 
28 |     # if opt['self_loop_weight'] > 0:
29 |     #   self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
30 |     #                                                                fill_value=opt['self_loop_weight'])
31 |     # else:
32 |     #   self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
33 |     #
34 |     # self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
35 |     # self.lin2 = nn.Linear(in_features * 2, out_features)
36 |     # nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
37 | 
38 |   def sparse_multiply(self, x):
39 |     if self.opt['block'] in ['attention']:  # adj is a multihead attention
40 |       mean_attention = self.attention_weights.mean(dim=1)
41 |       ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x)
42 |     elif self.opt['block'] in ['mixed', 'hard_attention']:  # adj is a torch sparse matrix
43 |       ax = torch_sparse.spmm(self.edge_index, self.attention_weights, x.shape[0], x.shape[0], x)
44 |     else:  # adj is a torch sparse matrix
45 |       ax = torch_sparse.spmm(self.edge_index, self.edge_weight, x.shape[0], x.shape[0], x)
46 |     return ax
47 | 
48 |   def forward(self, t, x):  # the t param is needed by the ODE solver.
49 |     if self.nfe > self.opt["max_nfe"]:
50 |       raise MaxNFEException
51 |     self.nfe += 1
52 |     ax = self.sparse_multiply(x)
53 | 
54 |     # ax = torch.cat([x, ax], axis=1)
55 |     # ax = self.lin2(ax)
56 | 
57 |     if not self.opt['no_alpha_sigmoid']:
58 |       alpha = torch.sigmoid(self.alpha_train)
59 |     else:
60 |       alpha = self.alpha_train
61 | 
62 |     f = alpha * (ax - x)
63 |     if self.opt['add_source']:
64 |       f = f + self.beta_train * self.x0
65 |     return f
66 | 


--------------------------------------------------------------------------------
/src/function_transformer_attention.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops
  6 | import numpy as np
  7 | from data import get_dataset
  8 | from utils import MaxNFEException, squareplus
  9 | from base_classes import ODEFunc
 10 | 
 11 | 
 12 | class ODEFuncTransformerAtt(ODEFunc):
 13 | 
 14 |   def __init__(self, in_features, out_features, opt, data, device):
 15 |     super(ODEFuncTransformerAtt, self).__init__(opt, data, device)
 16 | 
 17 |     if opt['self_loop_weight'] > 0:
 18 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 19 |                                                                    fill_value=opt['self_loop_weight'])
 20 |     else:
 21 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 22 |     self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt,
 23 |                                                           device, edge_weights=self.edge_weight).to(device)
 24 | 
 25 |   def multiply_attention(self, x, attention, v=None):
 26 |     # todo would be nice if this was more efficient
 27 |     if self.opt['mix_features']:
 28 |       vx = torch.mean(torch.stack(
 29 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 30 |          range(self.opt['heads'])], dim=0),
 31 |         dim=0)
 32 |       ax = self.multihead_att_layer.Wout(vx)
 33 |     else:
 34 |       mean_attention = attention.mean(dim=1)
 35 |       ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x)
 36 |     return ax
 37 | 
 38 |   def forward(self, t, x):  # t is needed when called by the integrator
 39 |     if self.nfe > self.opt["max_nfe"]:
 40 |       raise MaxNFEException
 41 | 
 42 |     self.nfe += 1
 43 |     attention, values = self.multihead_att_layer(x, self.edge_index)
 44 |     ax = self.multiply_attention(x, attention, values)
 45 | 
 46 |     if not self.opt['no_alpha_sigmoid']:
 47 |       alpha = torch.sigmoid(self.alpha_train)
 48 |     else:
 49 |       alpha = self.alpha_train
 50 |     f = alpha * (ax - x)
 51 |     if self.opt['add_source']:
 52 |       f = f + self.beta_train * self.x0
 53 |     return f
 54 | 
 55 |   def __repr__(self):
 56 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
 57 | 
 58 | 
 59 | class SpGraphTransAttentionLayer(nn.Module):
 60 |   """
 61 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
 62 |   """
 63 | 
 64 |   def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None):
 65 |     super(SpGraphTransAttentionLayer, self).__init__()
 66 |     self.in_features = in_features
 67 |     self.out_features = out_features
 68 |     self.alpha = opt['leaky_relu_slope']
 69 |     self.concat = concat
 70 |     self.device = device
 71 |     self.opt = opt
 72 |     self.h = int(opt['heads'])
 73 |     self.edge_weights = edge_weights
 74 | 
 75 |     try:
 76 |       self.attention_dim = opt['attention_dim']
 77 |     except KeyError:
 78 |       self.attention_dim = out_features
 79 | 
 80 |     assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format(
 81 |       self.h, self.attention_dim)
 82 |     self.d_k = self.attention_dim // self.h
 83 | 
 84 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
 85 |       self.output_var_x = nn.Parameter(torch.ones(1))
 86 |       self.lengthscale_x = nn.Parameter(torch.ones(1))
 87 |       self.output_var_p = nn.Parameter(torch.ones(1))
 88 |       self.lengthscale_p = nn.Parameter(torch.ones(1))
 89 |       self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
 90 |       self.init_weights(self.Qx)
 91 |       self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
 92 |       self.init_weights(self.Vx)
 93 |       self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
 94 |       self.init_weights(self.Kx)
 95 | 
 96 |       self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
 97 |       self.init_weights(self.Qp)
 98 |       self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
 99 |       self.init_weights(self.Vp)
100 |       self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
101 |       self.init_weights(self.Kp)
102 | 
103 |     else:
104 |       if self.opt['attention_type'] == "exp_kernel":
105 |         self.output_var = nn.Parameter(torch.ones(1))
106 |         self.lengthscale = nn.Parameter(torch.ones(1))
107 | 
108 |       self.Q = nn.Linear(in_features, self.attention_dim)
109 |       self.init_weights(self.Q)
110 | 
111 |       self.V = nn.Linear(in_features, self.attention_dim)
112 |       self.init_weights(self.V)
113 | 
114 |       self.K = nn.Linear(in_features, self.attention_dim)
115 |       self.init_weights(self.K)
116 | 
117 |     self.activation = nn.Sigmoid()  # nn.LeakyReLU(self.alpha)
118 | 
119 |     self.Wout = nn.Linear(self.d_k, in_features)
120 |     self.init_weights(self.Wout)
121 | 
122 |   def init_weights(self, m):
123 |     if type(m) == nn.Linear:
124 |       # nn.init.xavier_uniform_(m.weight, gain=1.414)
125 |       # m.bias.data.fill_(0.01)
126 |       nn.init.constant_(m.weight, 1e-5)
127 | 
128 |   def forward(self, x, edge):
129 |     """
130 |     x might be [features, augmentation, positional encoding, labels]
131 |     """
132 |     # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
133 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
134 |       label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim']
135 |       p = x[:, self.opt['feat_hidden_dim']: label_index]
136 |       x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1)
137 | 
138 |       qx = self.Qx(x)
139 |       kx = self.Kx(x)
140 |       vx = self.Vx(x)
141 |       # perform linear operation and split into h heads
142 |       kx = kx.view(-1, self.h, self.d_k)
143 |       qx = qx.view(-1, self.h, self.d_k)
144 |       vx = vx.view(-1, self.h, self.d_k)
145 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
146 |       kx = kx.transpose(1, 2)
147 |       qx = qx.transpose(1, 2)
148 |       vx = vx.transpose(1, 2)
149 |       src_x = qx[edge[0, :], :, :]
150 |       dst_x = kx[edge[1, :], :, :]
151 | 
152 |       qp = self.Qp(p)
153 |       kp = self.Kp(p)
154 |       vp = self.Vp(p)
155 |       # perform linear operation and split into h heads
156 |       kp = kp.view(-1, self.h, self.d_k)
157 |       qp = qp.view(-1, self.h, self.d_k)
158 |       vp = vp.view(-1, self.h, self.d_k)
159 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
160 |       kp = kp.transpose(1, 2)
161 |       qp = qp.transpose(1, 2)
162 |       vp = vp.transpose(1, 2)
163 |       src_p = qp[edge[0, :], :, :]
164 |       dst_p = kp[edge[1, :], :, :]
165 | 
166 |       prods = self.output_var_x ** 2 * torch.exp(
167 |         -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \
168 |               * self.output_var_p ** 2 * torch.exp(
169 |         -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2))
170 | 
171 |       v = None
172 | 
173 |     else:
174 |       q = self.Q(x)
175 |       k = self.K(x)
176 |       v = self.V(x)
177 | 
178 |       # perform linear operation and split into h heads
179 | 
180 |       k = k.view(-1, self.h, self.d_k)
181 |       q = q.view(-1, self.h, self.d_k)
182 |       v = v.view(-1, self.h, self.d_k)
183 | 
184 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
185 | 
186 |       k = k.transpose(1, 2)
187 |       q = q.transpose(1, 2)
188 |       v = v.transpose(1, 2)
189 | 
190 |       src = q[edge[0, :], :, :]
191 |       dst_k = k[edge[1, :], :, :]
192 | 
193 |     if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
194 |       prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2)))
195 |     elif self.opt['attention_type'] == "scaled_dot":
196 |       prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k)
197 |     elif self.opt['attention_type'] == "cosine_sim":
198 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
199 |       prods = cos(src, dst_k)
200 |     elif self.opt['attention_type'] == "pearson":
201 |       src_mu = torch.mean(src, dim=1, keepdim=True)
202 |       dst_mu = torch.mean(dst_k, dim=1, keepdim=True)
203 |       src = src - src_mu
204 |       dst_k = dst_k - dst_mu
205 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
206 |       prods = cos(src, dst_k)
207 | 
208 |     if self.opt['reweight_attention'] and self.edge_weights is not None:
209 |       prods = prods * self.edge_weights.unsqueeze(dim=1)
210 |     if self.opt['square_plus']:
211 |       attention = squareplus(prods, edge[self.opt['attention_norm_idx']])
212 |     else:
213 |       attention = softmax(prods, edge[self.opt['attention_norm_idx']])
214 |     return attention, (v, prods)
215 | 
216 |   def __repr__(self):
217 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
218 | 
219 | 
220 | if __name__ == '__main__':
221 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
222 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10,
223 |          'attention_norm_idx': 0, 'add_source': False,
224 |          'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False
225 |          }
226 |   dataset = get_dataset(opt, '../data', False)
227 |   t = 1
228 |   func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device)
229 |   out = func(t, dataset.data.x)
230 | 


--------------------------------------------------------------------------------
/src/function_transformer_convection.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.utils import softmax
  4 | import torch_sparse
  5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops
  6 | from data import get_dataset
  7 | from utils import MaxNFEException, squareplus
  8 | from base_classes import ODEFunc
  9 | from torch_scatter import scatter
 10 | import math
 11 | from torch_geometric.utils import get_laplacian
 12 | import torch.nn.functional as F
 13 | import numpy as np
 14 | 
 15 | class ODEFuncTransConv(ODEFunc):
 16 | 
 17 |   def __init__(self, in_features, out_features, opt, data, device):
 18 |     super(ODEFuncTransConv, self).__init__(opt, data, device)
 19 | 
 20 |     if opt['self_loop_weight'] > 0:
 21 |       self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 22 |                                                                    fill_value=opt['self_loop_weight'])
 23 |     else:
 24 |       self.edge_index, self.edge_weight = data.edge_index, data.edge_attr
 25 | 
 26 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 27 | 
 28 |     self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt,
 29 |                                                           device, edge_weights=self.edge_weight).to(device)
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 |     self.device = device
 36 | 
 37 |     self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight)
 38 | 
 39 |     self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym')
 40 |     self.edge_index_lap = self.edge_index_lap.to(device)
 41 |     self.edge_weight_lap = self.edge_weight_lap.to(device)
 42 | 
 43 |     self.gate = nn.Linear(2 * in_features, 1)
 44 |     nn.init.xavier_normal_(self.gate.weight, gain=1.414)
 45 | 
 46 | 
 47 |     self.lin2 = nn.Linear(in_features * 2, out_features)
 48 |     nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
 49 | 
 50 |     self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device))
 51 | 
 52 | 
 53 |     self.output_low, self.output_high = (
 54 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 55 |       nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)),
 56 |     )
 57 | 
 58 |     stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
 59 | 
 60 |     self.weight_mlp.data.uniform_(-stdv, stdv)
 61 | 
 62 |     self.output_low.data.uniform_(-stdv, stdv)
 63 |     self.output_high.data.uniform_(-stdv, stdv)
 64 | 
 65 | 
 66 |     self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 67 |     self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim'])
 68 | 
 69 |     self.lamda1 = nn.Parameter(torch.tensor(0.0),requires_grad=True)
 70 | 
 71 |   def multiply_attention(self, x, attention, v=None):
 72 |     # todo would be nice if this was more efficient
 73 |     if self.opt['mix_features']:
 74 |       vx = torch.mean(torch.stack(
 75 |         [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in
 76 |          range(self.opt['heads'])], dim=0),
 77 |         dim=0)
 78 |       ax = self.multihead_att_layer.Wout(vx)
 79 |     else:
 80 |       mean_attention = attention.mean(dim=1)
 81 |       ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x)
 82 |     return ax
 83 | 
 84 |   def forward(self, t, x):  # t is needed when called by the integrator
 85 | 
 86 |     if self.nfe > self.opt["max_nfe"]:
 87 |       raise MaxNFEException
 88 | 
 89 |     self.nfe += 1
 90 | 
 91 |     attention, wx = self.multihead_att_layer(x, self.edge_index)
 92 |     ax2 = self.multiply_attention(x, attention, wx)
 93 |     # todo would be nice if this was more efficient
 94 | 
 95 | 
 96 | 
 97 |     src = x[self.edge_index[0, :], :]
 98 |     dst_k = x[self.edge_index[1, :], :]
 99 | 
100 | 
101 |     x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
102 | 
103 |     ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
104 | 
105 | 
106 |     ax = self.lamda1 *  torch.mm(ax3, self.output_high) +torch.mm(ax2, self.output_low)
107 | 
108 | 
109 |     ax = torch.cat([x, ax], dim=1)
110 |     ax = F.relu(self.lin2(ax))
111 | 
112 |     if not self.opt['no_alpha_sigmoid']:
113 |       alpha = torch.sigmoid(self.alpha_train)
114 |     else:
115 |       alpha = self.alpha_train
116 | 
117 |     f = alpha * (ax - x)
118 |     if self.opt['add_source']:
119 |       f = f + self.beta_train * self.x0
120 |     return f
121 | 
122 |   def __repr__(self):
123 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
124 | 
125 | 
126 | class SpGraphTransAttentionLayer(nn.Module):
127 |   """
128 |   Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903
129 |   """
130 | 
131 |   def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None):
132 |     super(SpGraphTransAttentionLayer, self).__init__()
133 |     self.in_features = in_features
134 |     self.out_features = out_features
135 |     self.alpha = opt['leaky_relu_slope']
136 |     self.concat = concat
137 |     self.device = device
138 |     self.opt = opt
139 |     self.h = int(opt['heads'])
140 |     self.edge_weights = edge_weights
141 | 
142 |     try:
143 |       self.attention_dim = opt['attention_dim']
144 |     except KeyError:
145 |       self.attention_dim = out_features
146 | 
147 |     assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format(
148 |       self.h, self.attention_dim)
149 |     self.d_k = self.attention_dim // self.h
150 | 
151 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
152 |       self.output_var_x = nn.Parameter(torch.ones(1))
153 |       self.lengthscale_x = nn.Parameter(torch.ones(1))
154 |       self.output_var_p = nn.Parameter(torch.ones(1))
155 |       self.lengthscale_p = nn.Parameter(torch.ones(1))
156 |       self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
157 |       self.init_weights(self.Qx)
158 |       self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
159 |       self.init_weights(self.Vx)
160 |       self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim)
161 |       self.init_weights(self.Kx)
162 | 
163 |       self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
164 |       self.init_weights(self.Qp)
165 |       self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
166 |       self.init_weights(self.Vp)
167 |       self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim)
168 |       self.init_weights(self.Kp)
169 | 
170 |     else:
171 |       if self.opt['attention_type'] == "exp_kernel":
172 |         self.output_var = nn.Parameter(torch.ones(1))
173 |         self.lengthscale = nn.Parameter(torch.ones(1))
174 | 
175 |       self.Q = nn.Linear(in_features, self.attention_dim)
176 |       self.init_weights(self.Q)
177 | 
178 |       self.V = nn.Linear(in_features, self.attention_dim)
179 |       self.init_weights(self.V)
180 | 
181 |       self.K = nn.Linear(in_features, self.attention_dim)
182 |       self.init_weights(self.K)
183 | 
184 |     self.activation = nn.Sigmoid()  # nn.LeakyReLU(self.alpha)
185 | 
186 |     self.Wout = nn.Linear(self.d_k, in_features)
187 |     self.init_weights(self.Wout)
188 | 
189 |   def init_weights(self, m):
190 |     if type(m) == nn.Linear:
191 |       # nn.init.xavier_uniform_(m.weight, gain=1.414)
192 |       # m.bias.data.fill_(0.01)
193 |       nn.init.constant_(m.weight, 1e-5)
194 | 
195 |   def forward(self, x, edge):
196 |     """
197 |     x might be [features, augmentation, positional encoding, labels]
198 |     """
199 |     # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
200 |     if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
201 |       label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim']
202 |       p = x[:, self.opt['feat_hidden_dim']: label_index]
203 |       x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1)
204 | 
205 |       qx = self.Qx(x)
206 |       kx = self.Kx(x)
207 |       vx = self.Vx(x)
208 |       # perform linear operation and split into h heads
209 |       kx = kx.view(-1, self.h, self.d_k)
210 |       qx = qx.view(-1, self.h, self.d_k)
211 |       vx = vx.view(-1, self.h, self.d_k)
212 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
213 |       kx = kx.transpose(1, 2)
214 |       qx = qx.transpose(1, 2)
215 |       vx = vx.transpose(1, 2)
216 |       src_x = qx[edge[0, :], :, :]
217 |       dst_x = kx[edge[1, :], :, :]
218 | 
219 |       qp = self.Qp(p)
220 |       kp = self.Kp(p)
221 |       vp = self.Vp(p)
222 |       # perform linear operation and split into h heads
223 |       kp = kp.view(-1, self.h, self.d_k)
224 |       qp = qp.view(-1, self.h, self.d_k)
225 |       vp = vp.view(-1, self.h, self.d_k)
226 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
227 |       kp = kp.transpose(1, 2)
228 |       qp = qp.transpose(1, 2)
229 |       vp = vp.transpose(1, 2)
230 |       src_p = qp[edge[0, :], :, :]
231 |       dst_p = kp[edge[1, :], :, :]
232 | 
233 |       prods = self.output_var_x ** 2 * torch.exp(
234 |         -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \
235 |               * self.output_var_p ** 2 * torch.exp(
236 |         -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2))
237 | 
238 |       v = None
239 | 
240 |     else:
241 |       q = self.Q(x)
242 |       k = self.K(x)
243 |       v = self.V(x)
244 | 
245 |       # perform linear operation and split into h heads
246 | 
247 |       k = k.view(-1, self.h, self.d_k)
248 |       q = q.view(-1, self.h, self.d_k)
249 |       v = v.view(-1, self.h, self.d_k)
250 | 
251 |       # transpose to get dimensions [n_nodes, attention_dim, n_heads]
252 | 
253 |       k = k.transpose(1, 2)
254 |       q = q.transpose(1, 2)
255 |       v = v.transpose(1, 2)
256 | 
257 |       src = q[edge[0, :], :, :]
258 |       dst_k = k[edge[1, :], :, :]
259 | 
260 |     if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel":
261 |       prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2)))
262 |     elif self.opt['attention_type'] == "scaled_dot":
263 |       prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k)
264 |     elif self.opt['attention_type'] == "cosine_sim":
265 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
266 |       prods = cos(src, dst_k)
267 |     elif self.opt['attention_type'] == "pearson":
268 |       src_mu = torch.mean(src, dim=1, keepdim=True)
269 |       dst_mu = torch.mean(dst_k, dim=1, keepdim=True)
270 |       src = src - src_mu
271 |       dst_k = dst_k - dst_mu
272 |       cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5)
273 |       prods = cos(src, dst_k)
274 | 
275 |     if self.opt['reweight_attention'] and self.edge_weights is not None:
276 |       prods = prods * self.edge_weights.unsqueeze(dim=1)
277 |     if self.opt['square_plus']:
278 |       attention = squareplus(prods, edge[self.opt['attention_norm_idx']])
279 |     else:
280 |       attention = softmax(prods, edge[self.opt['attention_norm_idx']])
281 |     return attention, (v, prods)
282 | 
283 |   def __repr__(self):
284 |     return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'
285 | 
286 | 
287 | 
288 | if __name__ == '__main__':
289 |   device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
290 |   opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0,
291 |          'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False}
292 |   dataset = get_dataset(opt, '../data', False)
293 |   t = 1
294 |   func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device)
295 |   out = func(t, dataset.data.x)
296 | 


--------------------------------------------------------------------------------
/src/graphcon_models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | from torch_geometric.nn import GCNConv, GATConv
  6 | from torch_scatter import scatter
  7 | 
  8 | def batch_jacobian(func, x, create_graph=False):
  9 |   # x in shape (Batch, Length)
 10 |   def _func_sum(x):
 11 |     return func(x).sum(dim=0)
 12 | 
 13 |   return torch.autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1, 2, 0)
 14 | 
 15 | 
 16 | 
 17 | 
 18 | class attention_H(nn.Module):
 19 |   """"replace this module by a aggregation function """
 20 | 
 21 |   def __init__(self, size_in, edge_index):
 22 |     super().__init__()
 23 |     self.dim = size_in
 24 | 
 25 |     self.layer1 =GCNConv(size_in*2, size_in*2, normalize=True)
 26 |     self.edge_index = edge_index
 27 |     self.layer2 =GCNConv(size_in*2,size_in, normalize=True)
 28 | 
 29 |     self.layer3 = GCNConv(size_in , 1, normalize=True)
 30 |   def forward(self, x):
 31 | 
 32 |     out = self.layer1(x,self.edge_index)
 33 |     out = torch.tanh(out)
 34 |     out = self.layer2(out,self.edge_index)
 35 |     out = torch.tanh(out)
 36 |     out = self.layer3(out, self.edge_index)
 37 |     return out
 38 | 
 39 | class HAMCON_GCN(nn.Module):
 40 |     def __init__(self, nfeat, nhid, nclass, dropout, nlayers,data,device, dt=1., alpha=1., gamma=1., res_version=1,):
 41 |         super(HAMCON_GCN, self).__init__()
 42 |         self.dropout = dropout
 43 |         self.nhid = nhid
 44 |         self.nlayers = nlayers
 45 |         self.enc = nn.Linear(nfeat,nhid)
 46 |         self.conv = GCNConv(nhid, nhid)
 47 |         self.dec = nn.Linear(nhid,nclass)
 48 |         self.res = nn.Linear(nhid,nhid)
 49 |         if(res_version==1):
 50 |             self.residual = self.res_connection_v1
 51 |         else:
 52 |             self.residual = self.res_connection_v2
 53 |         self.dt = dt
 54 |         self.act_fn = nn.ReLU()
 55 |         self.alpha = alpha
 56 |         self.gamma = gamma
 57 |         self.reset_params()
 58 |         self.in_features = nhid
 59 | 
 60 |         self.edge_index = data.edge_index.to(device)
 61 |         self.H = attention_H(self.in_features, self.edge_index)
 62 |     def reset_params(self):
 63 |         for name, param in self.named_parameters():
 64 |             if 'weight' in name and 'emb' not in name and 'out' not in name:
 65 |                 stdv = 1. / math.sqrt(self.nhid)
 66 |                 param.data.uniform_(-stdv, stdv)
 67 | 
 68 |     def res_connection_v1(self, X):
 69 |         res = - self.res(self.conv.lin(X))
 70 |         return res
 71 | 
 72 |     def res_connection_v2(self, X):
 73 |         res = - self.conv.lin(X) + self.res(X)
 74 |         return res
 75 | 
 76 |     def forward(self, data):
 77 |         input = data.x
 78 |         edge_index = data.edge_index
 79 |         input = F.dropout(input, self.dropout, training=self.training)
 80 |         Y = self.act_fn(self.enc(input))
 81 |         X = Y
 82 |         Y = F.dropout(Y, self.dropout, training=self.training)
 83 |         X = F.dropout(X, self.dropout, training=self.training)
 84 | 
 85 |         for i in range(self.nlayers):
 86 |             x_full = torch.hstack([X, Y])
 87 |             f_full = batch_jacobian(lambda xx: self.H(xx), x_full, create_graph=True).squeeze()
 88 |             dx = f_full[..., self.in_features:]
 89 |             dv = -1 * f_full[..., 0:self.in_features]
 90 | 
 91 |             # Y = Y + self.dt*( dv- self.alpha*Y - self.gamma*X)
 92 |             Y = Y + self.dt * (dv)   ###v1
 93 |             # Y = Y + self.dt * (dv - self.alpha * Y )  ##v2
 94 |             X = X + self.dt*dx
 95 |             Y = F.dropout(Y, self.dropout, training=self.training)
 96 |             X = F.dropout(X, self.dropout, training=self.training)
 97 | 
 98 |         X = self.dec(X)
 99 | 
100 |         return X
101 | 
102 | 
103 | class GraphCON_GCN(nn.Module):
104 |     def __init__(self, nfeat, nhid, nclass, dropout, nlayers, dt=1., alpha=1., gamma=1., res_version=1):
105 |         super(GraphCON_GCN, self).__init__()
106 |         self.dropout = dropout
107 |         self.nhid = nhid
108 |         self.nlayers = nlayers
109 |         self.enc = nn.Linear(nfeat,nhid)
110 |         self.conv = GCNConv(nhid, nhid)
111 |         self.dec = nn.Linear(nhid,nclass)
112 |         self.res = nn.Linear(nhid,nhid)
113 |         if(res_version==1):
114 |             self.residual = self.res_connection_v1
115 |         else:
116 |             self.residual = self.res_connection_v2
117 |         self.dt = dt
118 |         self.act_fn = nn.ReLU()
119 |         self.alpha = alpha
120 |         self.gamma = gamma
121 |         self.reset_params()
122 | 
123 |     def reset_params(self):
124 |         for name, param in self.named_parameters():
125 |             if 'weight' in name and 'emb' not in name and 'out' not in name:
126 |                 stdv = 1. / math.sqrt(self.nhid)
127 |                 param.data.uniform_(-stdv, stdv)
128 | 
129 |     def res_connection_v1(self, X):
130 |         res = - self.res(self.conv.lin(X))
131 |         return res
132 | 
133 |     def res_connection_v2(self, X):
134 |         res = - self.conv.lin(X) + self.res(X)
135 |         return res
136 | 
137 |     def forward(self, data):
138 |         input = data.x
139 |         edge_index = data.edge_index
140 |         input = F.dropout(input, self.dropout, training=self.training)
141 |         Y = self.act_fn(self.enc(input))
142 |         X = Y
143 |         Y = F.dropout(Y, self.dropout, training=self.training)
144 |         X = F.dropout(X, self.dropout, training=self.training)
145 | 
146 |         for i in range(self.nlayers):
147 |             Y = Y + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*Y - self.gamma*X)
148 |             X = X + self.dt*Y
149 |             Y = F.dropout(Y, self.dropout, training=self.training)
150 |             X = F.dropout(X, self.dropout, training=self.training)
151 | 
152 |         X = self.dec(X)
153 | 
154 |         return X
155 | 
156 | class GraphCON_GAT(nn.Module):
157 |     def __init__(self, nfeat, nhid, nclass, nlayers, dropout, dt=1., alpha=1., gamma=1., nheads=4):
158 |         super(GraphCON_GAT, self).__init__()
159 |         self.alpha = alpha
160 |         self.gamma = gamma
161 |         self.dropout = dropout
162 |         self.nheads = nheads
163 |         self.nhid = nhid
164 |         self.nlayers = nlayers
165 |         self.act_fn = nn.ReLU()
166 |         self.res = nn.Linear(nhid, nheads * nhid)
167 |         self.enc = nn.Linear(nfeat,nhid)
168 |         self.conv = GATConv(nhid, nhid, heads=nheads)
169 |         self.dec = nn.Linear(nhid,nclass)
170 |         self.dt = dt
171 | 
172 |     def res_connection(self, X):
173 |         res = self.res(X)
174 |         return res
175 | 
176 |     def forward(self, data):
177 |         input = data.x
178 |         n_nodes = input.size(0)
179 |         edge_index = data.edge_index
180 |         input = F.dropout(input, self.dropout, training=self.training)
181 |         Y = self.act_fn(self.enc(input))
182 |         X = Y
183 |         Y = F.dropout(Y, self.dropout, training=self.training)
184 |         X = F.dropout(X, self.dropout, training=self.training)
185 | 
186 |         for i in range(self.nlayers):
187 |             Y = Y + self.dt*(F.elu(self.conv(X, edge_index) + self.res_connection(X)).view(n_nodes, -1, self.nheads).mean(dim=-1) - self.alpha*Y - self.gamma*X)
188 |             X = X + self.dt*Y
189 |             Y = F.dropout(Y, self.dropout, training=self.training)
190 |             X = F.dropout(X, self.dropout, training=self.training)
191 | 
192 |         X = self.dec(X)
193 | 
194 |         return X
195 | 
196 | 
197 | class GraphCON_GCN_conv(nn.Module):
198 |     def __init__(self, nfeat, nhid, nclass, dropout, nlayers,graph_size, dt=1., alpha=1., gamma=1., res_version=1):
199 |         super(GraphCON_GCN_conv, self).__init__()
200 |         self.dropout = dropout
201 |         self.nhid = nhid
202 |         self.nlayers = nlayers
203 |         self.enc = nn.Linear(nfeat,nhid)
204 |         self.conv = GCNConv(nhid, nhid)
205 |         self.dec = nn.Linear(nhid,nclass)
206 |         self.res = nn.Linear(nhid,nhid)
207 |         if(res_version==1):
208 |             self.residual = self.res_connection_v1
209 |         else:
210 |             self.residual = self.res_connection_v2
211 |         self.dt = dt
212 |         self.act_fn = nn.ReLU()
213 |         self.alpha = alpha
214 |         self.gamma = gamma
215 |         self.reset_params()
216 |         self.lamda1 = nn.Parameter(torch.tensor(1.0),requires_grad=True)
217 | 
218 |         self.gate = nn.Linear(2 * nhid, 1)
219 |         nn.init.xavier_normal_(self.gate.weight, gain=1.414)
220 | 
221 |         self.lin1 = nn.Linear(nhid, nhid)
222 |         nn.init.xavier_normal_(self.lin1.weight, gain=1.414)
223 | 
224 |         self.lin2 = nn.Linear(nhid * 2, nhid)
225 |         nn.init.xavier_normal_(self.lin2.weight, gain=1.414)
226 | 
227 |         self.weight_low, self.weight_high, self.weight_mlp = (
228 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
229 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
230 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
231 |         )
232 | 
233 |         self.output_low, self.output_high, self.output_mlp = (
234 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
235 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
236 |             nn.Parameter(torch.FloatTensor(nhid, nhid)),
237 |         )
238 | 
239 |         stdv = 1.0 / math.sqrt(self.weight_mlp.size(1))
240 | 
241 |         self.weight_low.data.uniform_(-stdv, stdv)
242 |         self.weight_high.data.uniform_(-stdv, stdv)
243 |         self.weight_mlp.data.uniform_(-stdv, stdv)
244 | 
245 |         self.output_low.data.uniform_(-stdv, stdv)
246 |         self.output_high.data.uniform_(-stdv, stdv)
247 |         self.output_mlp.data.uniform_(-stdv, stdv)
248 |         self.lamda = nn.ParameterList()
249 |         for i in range(self.nlayers):
250 |             self.lamda .append(nn.Parameter(torch.zeros((self.nhid, 1)),requires_grad=True))
251 |         self.graph_size = graph_size
252 | 
253 |     def reset_params(self):
254 |         for name, param in self.named_parameters():
255 |             if 'weight' in name and 'emb' not in name and 'out' not in name:
256 |                 stdv = 1. / math.sqrt(self.nhid)
257 |                 param.data.uniform_(-stdv, stdv)
258 | 
259 |     def res_connection_v1(self, X):
260 |         res = - self.res(self.conv.lin(X))
261 |         return res
262 | 
263 |     def res_connection_v2(self, X):
264 |         res = - self.conv.lin(X) + self.res(X)
265 |         return res
266 | 
267 |     def forward(self, data):
268 |         input = data.x
269 |         edge_index = data.edge_index
270 |         self.edge_index =  data.edge_index
271 |         input = F.dropout(input, self.dropout, training=self.training)
272 |         Y = self.act_fn(self.enc(input))
273 |         X = Y
274 |         Y = F.dropout(Y, self.dropout, training=self.training)
275 |         X = F.dropout(X, self.dropout, training=self.training)
276 | 
277 | 
278 | 
279 |         for i in range(self.nlayers):
280 |             coeff_lamda = (torch.tanh(self.lamda[i])).T
281 |             coeff_lamda = coeff_lamda.tile(self.graph_size, 1)
282 | 
283 |             # src = X[self.edge_index[0, :], :]
284 |             # dst_k = X[self.edge_index[1, :], :]
285 |             # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
286 |             # ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
287 | 
288 |             src = X[self.edge_index[0, :], :]
289 |             dst_k = X[self.edge_index[1, :], :]
290 |             x_new = torch.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k
291 |             ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum")
292 | 
293 | 
294 |             Y = Y + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*Y - self.gamma*X )
295 |             X = X + self.dt* (Y + self.lamda1 * ax3)
296 |             Y = F.dropout(Y, self.dropout, training=self.training)
297 |             X = F.dropout(X, self.dropout, training=self.training)
298 | 
299 |         X = self.dec(X)
300 | 
301 |         return X
302 | 


--------------------------------------------------------------------------------
/src/model_configurations.py:
--------------------------------------------------------------------------------
 1 | from function_transformer_attention import ODEFuncTransformerAtt
 2 | from function_GAT_attention import ODEFuncAtt
 3 | from function_laplacian_diffusion import LaplacianODEFunc
 4 | from block_transformer_attention import AttODEblock
 5 | from block_constant import ConstantODEblock
 6 | 
 7 | from function_beltrami_trans import ODEFuncBektramiAtt
 8 | 
 9 | from function_beltrami_fa import ODEFuncBelFA
10 | 
11 | from function_laplacian_convection import ODEFuncLapCONV
12 | from function_beltrami_convection import ODEFuncBeltramiCONV
13 | 
14 | from function_GAT_convection import ODEFuncAttConv
15 | from function_beltrami_gat import ODEFuncBeltramiGAT
16 | 
17 | from function_transformer_convection import ODEFuncTransConv
18 | 
19 | from function_beltramitrans_convection import ODEFuncBeltramiTRANSCONV
20 | 
21 | 
22 | class BlockNotDefined(Exception):
23 |   pass
24 | 
25 | class FunctionNotDefined(Exception):
26 |   pass
27 | 
28 | 
29 | def set_block(opt):
30 |   ode_str = opt['block']
31 |   if ode_str == 'attention':
32 |     block = AttODEblock
33 | 
34 | 
35 |   elif ode_str == 'constant':
36 |     block = ConstantODEblock
37 | 
38 |   else:
39 |     raise BlockNotDefined
40 |   return block
41 | 
42 | 
43 | def set_function(opt):
44 |   ode_str = opt['function']
45 |   if ode_str == 'laplacian':
46 |     f = LaplacianODEFunc
47 |   elif ode_str == 'GAT':
48 |     f = ODEFuncAtt
49 |   elif ode_str == 'transformer':
50 |     f = ODEFuncTransformerAtt
51 |   elif ode_str == 'beltrami':
52 |     f = ODEFuncBektramiAtt
53 |  
54 | 
55 |   elif ode_str == 'lapconv':
56 |     f = ODEFuncLapCONV
57 |   elif ode_str == 'belconv':
58 |     f = ODEFuncBeltramiCONV
59 | 
60 |   elif ode_str == 'gatconv':
61 |     f = ODEFuncAttConv
62 |   elif ode_str == 'belgat':
63 |     f = ODEFuncBeltramiGAT
64 |   elif ode_str == 'transconv':
65 |     f = ODEFuncTransConv
66 |   elif ode_str == 'beltransconv':
67 |     f = ODEFuncBeltramiTRANSCONV
68 |  
69 | 
70 | 
71 |   else:
72 |     raise FunctionNotDefined
73 |   return f
74 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | utility functions
  3 | """
  4 | import os
  5 | 
  6 | import scipy
  7 | from scipy.stats import sem
  8 | import numpy as np
  9 | from torch_scatter import scatter_add
 10 | from torch_geometric.utils import add_remaining_self_loops
 11 | from torch_geometric.utils.num_nodes import maybe_num_nodes
 12 | from torch_geometric.utils.convert import to_scipy_sparse_matrix
 13 | from sklearn.preprocessing import normalize
 14 | from torch_geometric.nn.conv.gcn_conv import gcn_norm
 15 | 
 16 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 17 | 
 18 | class MaxNFEException(Exception): pass
 19 | 
 20 | 
 21 | def rms_norm(tensor):
 22 |   return tensor.pow(2).mean().sqrt()
 23 | 
 24 | 
 25 | def make_norm(state):
 26 |   if isinstance(state, tuple):
 27 |     state = state[0]
 28 |   state_size = state.numel()
 29 | 
 30 |   def norm(aug_state):
 31 |     y = aug_state[1:1 + state_size]
 32 |     adj_y = aug_state[1 + state_size:1 + 2 * state_size]
 33 |     return max(rms_norm(y), rms_norm(adj_y))
 34 | 
 35 |   return norm
 36 | 
 37 | 
 38 | def print_model_params(model):
 39 |   total_num_params = 0
 40 |   print(model)
 41 |   for name, param in model.named_parameters():
 42 |     if param.requires_grad:
 43 |       print(name)
 44 |       print(param.data.shape)
 45 |       total_num_params += param.numel()
 46 |   print("Model has a total of {} params".format(total_num_params))
 47 | 
 48 | 
 49 | def adjust_learning_rate(optimizer, lr, epoch, burnin=50):
 50 |   if epoch <= burnin:
 51 |     for param_group in optimizer.param_groups:
 52 |       param_group["lr"] = lr * epoch / burnin
 53 | 
 54 | 
 55 | def gcn_norm_fill_val(edge_index, edge_weight=None, fill_value=0., num_nodes=None, dtype=None):
 56 |   num_nodes = maybe_num_nodes(edge_index, num_nodes)
 57 | 
 58 |   if edge_weight is None:
 59 |     edge_weight = torch.ones((edge_index.size(1),), dtype=dtype,
 60 |                              device=edge_index.device)
 61 | 
 62 |   if not int(fill_value) == 0:
 63 |     edge_index, tmp_edge_weight = add_remaining_self_loops(
 64 |       edge_index, edge_weight, fill_value, num_nodes)
 65 |     assert tmp_edge_weight is not None
 66 |     edge_weight = tmp_edge_weight
 67 | 
 68 |   row, col = edge_index[0], edge_index[1]
 69 |   deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
 70 |   deg_inv_sqrt = deg.pow_(-0.5)
 71 |   deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
 72 |   return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
 73 | 
 74 | 
 75 | def coo2tensor(coo, device=None):
 76 |   indices = np.vstack((coo.row, coo.col))
 77 |   i = torch.LongTensor(indices)
 78 |   values = coo.data
 79 |   v = torch.FloatTensor(values)
 80 |   shape = coo.shape
 81 |   print('adjacency matrix generated with shape {}'.format(shape))
 82 |   # test
 83 |   return torch.sparse.FloatTensor(i, v, torch.Size(shape)).to(device)
 84 | 
 85 | 
 86 | def get_sym_adj(data, opt, improved=False):
 87 |   edge_index, edge_weight = gcn_norm(  # yapf: disable
 88 |     data.edge_index, data.edge_attr, data.num_nodes,
 89 |     improved, opt['self_loop_weight'] > 0, dtype=data.x.dtype)
 90 |   coo = to_scipy_sparse_matrix(edge_index, edge_weight)
 91 |   return coo2tensor(coo)
 92 | 
 93 | 
 94 | def get_rw_adj_old(data, opt):
 95 |   if opt['self_loop_weight'] > 0:
 96 |     edge_index, edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr,
 97 |                                                        fill_value=opt['self_loop_weight'])
 98 |   else:
 99 |     edge_index, edge_weight = data.edge_index, data.edge_attr
100 |   coo = to_scipy_sparse_matrix(edge_index, edge_weight)
101 |   normed_csc = normalize(coo, norm='l1', axis=0)
102 |   return coo2tensor(normed_csc.tocoo())
103 | 
104 | 
105 | def get_rw_adj(edge_index, edge_weight=None, norm_dim=1, fill_value=0., num_nodes=None, dtype=None):
106 |   num_nodes = maybe_num_nodes(edge_index, num_nodes)
107 | 
108 |   if edge_weight is None:
109 |     edge_weight = torch.ones((edge_index.size(1),), dtype=dtype,
110 |                              device=edge_index.device)
111 | 
112 |   if not fill_value == 0:
113 |     edge_index, tmp_edge_weight = add_remaining_self_loops(
114 |       edge_index, edge_weight, fill_value, num_nodes)
115 |     assert tmp_edge_weight is not None
116 |     edge_weight = tmp_edge_weight
117 | 
118 |   row, col = edge_index[0], edge_index[1]
119 |   indices = row if norm_dim == 0 else col
120 |   deg = scatter_add(edge_weight, indices, dim=0, dim_size=num_nodes)
121 |   deg_inv_sqrt = deg.pow_(-1)
122 |   edge_weight = deg_inv_sqrt[indices] * edge_weight if norm_dim == 0 else edge_weight * deg_inv_sqrt[indices]
123 |   return edge_index, edge_weight
124 | 
125 | 
126 | def mean_confidence_interval(data, confidence=0.95):
127 |   """
128 |   As number of samples will be < 10 use t-test for the mean confidence intervals
129 |   :param data: NDarray of metric means
130 |   :param confidence: The desired confidence interval
131 |   :return: Float confidence interval
132 |   """
133 |   if len(data) < 2:
134 |     return 0
135 |   a = 1.0 * np.array(data)
136 |   n = len(a)
137 |   _, se = np.mean(a), scipy.stats.sem(a)
138 |   h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1)
139 |   return h
140 | 
141 | 
142 | def sparse_dense_mul(s, d):
143 |   i = s._indices()
144 |   v = s._values()
145 |   return torch.sparse.FloatTensor(i, v * d, s.size())
146 | 
147 | 
148 | def get_sem(vec):
149 |   """
150 |   wrapper around the scipy standard error metric
151 |   :param vec: List of metric means
152 |   :return:
153 |   """
154 |   if len(vec) > 1:
155 |     retval = sem(vec)
156 |   else:
157 |     retval = 0.
158 |   return retval
159 | 
160 | 
161 | def get_full_adjacency(num_nodes):
162 |   # what is the format of the edge index?
163 |   edge_index = torch.zeros((2, num_nodes ** 2),dtype=torch.long)
164 |   for idx in range(num_nodes):
165 |     edge_index[0][idx * num_nodes: (idx + 1) * num_nodes] = idx
166 |     edge_index[1][idx * num_nodes: (idx + 1) * num_nodes] = torch.arange(0, num_nodes,dtype=torch.long)
167 |   return edge_index
168 | 
169 | 
170 | 
171 | from typing import Optional
172 | import torch
173 | from torch import Tensor
174 | from torch_scatter import scatter, segment_csr, gather_csr
175 | 
176 | 
177 | # https://twitter.com/jon_barron/status/1387167648669048833?s=12
178 | # @torch.jit.script
179 | def squareplus(src: Tensor, index: Optional[Tensor], ptr: Optional[Tensor] = None,
180 |                num_nodes: Optional[int] = None) -> Tensor:
181 |   r"""Computes a sparsely evaluated softmax.
182 |     Given a value tensor :attr:`src`, this function first groups the values
183 |     along the first dimension based on the indices specified in :attr:`index`,
184 |     and then proceeds to compute the softmax individually for each group.
185 | 
186 |     Args:
187 |         src (Tensor): The source tensor.
188 |         index (LongTensor): The indices of elements for applying the softmax.
189 |         ptr (LongTensor, optional): If given, computes the softmax based on
190 |             sorted inputs in CSR representation. (default: :obj:`None`)
191 |         num_nodes (int, optional): The number of nodes, *i.e.*
192 |             :obj:`max_val + 1` of :attr:`index`. (default: :obj:`None`)
193 | 
194 |     :rtype: :class:`Tensor`
195 |     """
196 |   out = src - src.max()
197 |   # out = out.exp()
198 |   out = (out + torch.sqrt(out ** 2 + 4)) / 2
199 | 
200 |   if ptr is not None:
201 |     out_sum = gather_csr(segment_csr(out, ptr, reduce='sum'), ptr)
202 |   elif index is not None:
203 |     N = maybe_num_nodes(index, num_nodes)
204 |     out_sum = scatter(out, index, dim=0, dim_size=N, reduce='sum')[index]
205 |   else:
206 |     raise NotImplementedError
207 | 
208 |   return out / (out_sum + 1e-16)
209 | 
210 | 
211 | # Counter of forward and backward passes.
212 | class Meter(object):
213 | 
214 |   def __init__(self):
215 |     self.reset()
216 | 
217 |   def reset(self):
218 |     self.val = None
219 |     self.sum = 0
220 |     self.cnt = 0
221 | 
222 |   def update(self, val):
223 |     self.val = val
224 |     self.sum += val
225 |     self.cnt += 1
226 | 
227 |   def get_average(self):
228 |     if self.cnt == 0:
229 |       return 0
230 |     return self.sum / self.cnt
231 | 
232 |   def get_value(self):
233 |     return self.val
234 | 
235 | 
236 | class DummyDataset(object):
237 |   def __init__(self, data, num_classes):
238 |     self.data = data
239 |     self.num_classes = num_classes
240 | 
241 | 
242 | class DummyData(object):
243 |   def __init__(self, edge_index=None, edge_Attr=None, num_nodes=None):
244 |     self.edge_index = edge_index
245 |     self.edge_attr = edge_Attr
246 |     self.num_nodes = num_nodes
247 | 


--------------------------------------------------------------------------------