├── HeterophilousDatasets └── data │ ├── actor.npz │ ├── amazon_ratings.npz │ ├── chameleon.npz │ ├── chameleon_filtered.npz │ ├── cornell.npz │ ├── dataread │ ├── minesweeper.npz │ ├── questions.npz │ ├── roman_empire.npz │ ├── squirrel.npz │ ├── squirrel_filtered.npz │ ├── texas.npz │ ├── wiki_cooc.npz │ ├── wisconsin.npz │ └── workers.npz ├── LICENSE ├── README.md ├── environment.yml └── src ├── 11 ├── GNN_he.py ├── GNN_heter.py ├── GNN_plot.py ├── base_classes.py ├── best_log ├── amazon-ratingsbelconvconstant1.020230116-211847.txt ├── amazon-ratingsgatconvNone1.020230113-171121.txt ├── cornellbelconveulerNone20230115-175939.txt ├── cornelllapconveulerNone20230115-190111.txt ├── minesweeperbelconvattention3.020230118-201621.txt ├── minesweepergatconvconstant4.020230118-011000.txt ├── questionsbelconvconstant1.020230116-173925.txt ├── questionsgatconvNone3.020230113-193655.txt ├── roman-empirebelconvconstant1.020230116-170240.txt ├── roman-empiregatconvconstant3.020230117-145044.txt ├── texasbelconveulerNone20230115-175910.txt ├── texaslapconveulerNone20230115-190052.txt ├── wiki-coocbelconvconstant1.020230116-202725.txt ├── wiki-cooctransconvattention1.020230117-230603.txt ├── wisconsinbelconveulerNone20230115-180013.txt ├── wisconsinlapconveulerNone20230115-190125.txt ├── workersbelconvattention3.020230114-120652.txt └── workersgatconvconstant1.020230117-174152.txt ├── best_params.py ├── best_params_discrete.py ├── best_params_graphocn.py ├── block_constant.py ├── block_transformer_attention.py ├── data.py ├── discrete_models.py ├── early_stop_solver.py ├── function_GAT_attention.py ├── function_GAT_convection.py ├── function_beltrami_convection.py ├── function_beltrami_gat.py ├── function_beltrami_trans.py ├── function_beltrami_van.py ├── function_beltramitrans_convection.py ├── function_laplacian_convection.py ├── function_laplacian_diffusion.py ├── function_transformer_attention.py ├── function_transformer_convection.py ├── graphcon_models.py ├── heterophilic.py ├── model_configurations.py ├── run_GNN_raw.py └── utils.py /HeterophilousDatasets/data/actor.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/actor.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/amazon_ratings.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/amazon_ratings.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/chameleon.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/chameleon.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/chameleon_filtered.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/chameleon_filtered.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/cornell.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/cornell.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/dataread: -------------------------------------------------------------------------------- 1 | Dataset used, copied from 2 | - [HeterophilousDatasets](https://github.com/heterophily-submit/HeterophilousDatasets) 3 | -------------------------------------------------------------------------------- /HeterophilousDatasets/data/minesweeper.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/minesweeper.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/questions.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/questions.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/roman_empire.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/roman_empire.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/squirrel.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/squirrel.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/squirrel_filtered.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/squirrel_filtered.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/texas.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/texas.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/wiki_cooc.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/wiki_cooc.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/wisconsin.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/wisconsin.npz -------------------------------------------------------------------------------- /HeterophilousDatasets/data/workers.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zknus/Graph-Diffusion-CDE/2097b6393b139469be27c4850ec233d6ba26bb44/HeterophilousDatasets/data/workers.npz -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 zknus 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Graph Neural Convection-Diffusion with Heterophily 2 | 3 | This repository contains the code for our IJCAI 2023 accepted paper, *[Graph Neural Convection-Diffusion with Heterophily](https://arxiv.org/abs/2305.16780)*. 4 | 5 | ## Table of Contents 6 | 7 | - [Requirements](#requirements) 8 | - [Datasets](#datasets) 9 | - [Reproducing Results](#reproducing-results) 10 | - [Reference](#reference) 11 | - [Citation](#citation) 12 | 13 | ## Requirements 14 | 15 | To install the required dependencies, refer to the environment.yaml file 16 | 17 | 18 | 27 | 28 | ## Reproducing Results 29 | 30 | To reproduce the results in Table 2, run the following commands: 31 | 32 | ```bash 33 | python run_GNN_raw.py --dataset amazon-ratings --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant 34 | 35 | python run_GNN_raw.py --dataset amazon-ratings --function gatconv --time 1 --epoch 1000 --step_size 0.5 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 2 --hidden_dim 64 36 | 37 | python run_GNN_raw.py --dataset minesweeper --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001 38 | 39 | python run_GNN_raw.py --dataset minesweeper --function gatconv --time 4 --epoch 600 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001 40 | 41 | python run_GNN_raw.py --dataset questions --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant 42 | 43 | python run_GNN_raw.py --dataset questions --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 3 44 | 45 | python run_GNN_raw.py --dataset roman-empire --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 256 --block constant 46 | 47 | python run_GNN_raw.py --dataset roman-empire --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001 48 | 49 | python run_GNN_raw.py --dataset wiki-cooc --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant 50 | 51 | python run_GNN_raw.py --dataset wiki-cooc --function transconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001 52 | ``` 53 | 54 | ## Reference 55 | 56 | Our code is developed based on the following repo: 57 | https://github.com/twitter-research/graph-neural-pde 58 | 59 | 60 | 61 | ## Citation 62 | 63 | If you find our helpful, consider to cite us: 64 | ```bash 65 | @inproceedings{zhao2023graph, 66 | title={Graph neural convection-diffusion with heterophily}, 67 | author={Zhao, K. and Kang, Q. and Song, Y. and She, R. and Wang, S. and Tay, W. P.}, 68 | booktitle={Proc. International Joint Conference on Artificial Intelligence}, 69 | year={2023}, 70 | month={Aug}, 71 | address={Macao, China} 72 | } 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: graph-cde 2 | channels: 3 | - soumith 4 | - pytorch 5 | - defaults 6 | dependencies: 7 | - blas=1.0 8 | - ca-certificates=2020.12.8 9 | - certifi=2020.12.5 10 | - cycler=0.10.0 11 | - freetype=2.10.4 12 | - intel-openmp=2020.2 13 | - joblib=1.0.0 14 | - jpeg=9b 15 | - kiwisolver=1.3.0 16 | - lcms2=2.11 17 | - libcxx=10.0.0 18 | - libedit=3.1.20191231 19 | - libffi=3.3 20 | - libgfortran 21 | - libllvm9=9.0.1 22 | - libpng=1.6.37 23 | - libtiff=4.1.0 24 | - libuv=1.40.0 25 | - llvm-openmp=10.0.0 26 | - lz4-c=1.9.2 27 | - matplotlib=3.3.2 28 | - matplotlib-base=3.3.2 29 | - mkl=2019.4 30 | - mkl-service=2.3.0 31 | - mkl_fft=1.2.0 32 | - mkl_random=1.1.1 33 | - ncurses=6.2 34 | - ninja=1.10.2 35 | - numba=0.50.1 36 | - numpy=1.19.2 37 | - numpy-base=1.19.2 38 | - olefile=0.46 39 | - openssl=1.1.1i 40 | - pillow=8.1.0 41 | - pip=20.3.3 42 | - pyparsing=2.4.7 43 | - python=3.8.5 44 | - python-dateutil=2.8.1 45 | - pytorch=1.7.1 46 | - readline=8.0 47 | - setuptools=51.1.2 48 | - six=1.15.0 49 | - sqlite=3.33.0 50 | - tbb=2020.3 51 | - threadpoolctl=2.1.0 52 | - tk=8.6.10 53 | - torchvision=0.2.1 54 | - tornado=6.1 55 | - typing_extensions=3.7.4.3 56 | - wheel=0.36.2 57 | - xz=5.2.5 58 | - zlib=1.2.11 59 | - zstd=1.4.5 60 | - pip: 61 | - ase==3.20.1 62 | - boltons==20.2.1 63 | - chardet==4.0.0 64 | - decorator==4.4.2 65 | - et-xmlfile==1.0.1 66 | - googledrivedownloader==0.4 67 | - h5py==3.1.0 68 | - idna==2.10 69 | - isodate==0.6.0 70 | - jdcal==1.4.1 71 | - jinja2==2.11.2 72 | - littleutils==0.2.2 73 | - llvmlite==0.33.0 74 | - markupsafe==1.1.1 75 | - networkx==2.5 76 | - ogb==1.2.4 77 | - openpyxl==3.0.6 78 | - outdated==0.2.0 79 | - pandas==1.2.0 80 | - pykeops==1.4.2 81 | - python-louvain==0.15 82 | - pytz==2020.5 83 | - rdflib==5.0.0 84 | - requests==2.25.1 85 | - scikit-learn==0.24.0 86 | - scipy==1.5.4 87 | - torch-cluster==1.5.8 88 | - torch-geometric==1.6.3 89 | - torch-scatter==2.0.5 90 | - torch-sparse==0.6.8 91 | - torch-spline-conv==1.2.0 92 | - torchdiffeq==0.1.1 93 | - torchsde==0.2.4 94 | - tqdm==4.56.0 95 | - trampoline==0.1.2 96 | - urllib3==1.26.2 97 | 98 | -------------------------------------------------------------------------------- /src/11: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/GNN_he.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from base_classes import BaseGNN 5 | from model_configurations import set_block, set_function 6 | 7 | 8 | # Define the GNN model. 9 | class GNNhe(BaseGNN): 10 | def __init__(self, opt, dataset, device=torch.device('cpu')): 11 | super(GNNhe, self).__init__(opt, dataset, device) 12 | self.f = set_function(opt) 13 | block = set_block(opt) 14 | time_tensor = torch.tensor([0, self.T]).to(device) 15 | self.odeblock = block(self.f, opt, dataset.data, device, t=time_tensor).to(device) 16 | 17 | if opt["use_mlp"]: 18 | self.reset_parameters() 19 | 20 | self.output_normalization =nn.LayerNorm(opt['hidden_dim']) 21 | 22 | def reset_parameters(self): 23 | torch.nn.init.xavier_normal_(self.m11.weight, gain=1.414) 24 | torch.nn.init.xavier_normal_(self.m12.weight, gain=1.414) 25 | 26 | def forward(self, x, pos_encoding=None): 27 | # Encode each node based on its feature. 28 | 29 | 30 | 31 | x = F.dropout(x, self.opt['input_dropout'], training=self.training) 32 | x = self.m1(x) 33 | 34 | if self.opt['use_mlp']: 35 | x = F.dropout(x, self.opt['dropout'], training=self.training) 36 | x = F.dropout(x + self.m11(F.relu(x)), self.opt['dropout'], training=self.training) 37 | x = F.dropout(x + self.m12(F.relu(x)), self.opt['dropout'], training=self.training) 38 | # todo investigate if some input non-linearity solves the problem with smooth deformations identified in the ANODE paper 39 | 40 | 41 | 42 | if self.opt['batch_norm']: 43 | x = self.bn_in(x) 44 | 45 | 46 | 47 | self.odeblock.set_x0(x) 48 | 49 | 50 | z = self.odeblock(x) 51 | 52 | 53 | 54 | # Activation. 55 | z = F.relu(z) 56 | 57 | if self.opt['fc_out']: 58 | z = self.fc(z) 59 | z = F.relu(z) 60 | 61 | # Dropout. 62 | z = F.dropout(z, self.opt['dropout'], training=self.training) 63 | 64 | # Decode each node embedding to get node label. 65 | # z = self.output_normalization(z) 66 | 67 | z = self.m2(z) 68 | return z 69 | -------------------------------------------------------------------------------- /src/GNN_plot.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from base_classes import BaseGNN 5 | from model_configurations import set_block, set_function 6 | 7 | 8 | # Define the GNN model. 9 | class GNNplot(BaseGNN): 10 | def __init__(self, opt, dataset, device=torch.device('cpu')): 11 | super(GNNplot, self).__init__(opt, dataset, device) 12 | self.f = set_function(opt) 13 | block = set_block(opt) 14 | time_tensor = torch.tensor([0, self.T]).to(device) 15 | self.odeblock = block(self.f, self.regularization_fns, opt, dataset.data, device, t=time_tensor).to(device) 16 | 17 | if opt["use_mlp"]: 18 | self.reset_parameters() 19 | 20 | self.output_normalization =nn.LayerNorm(opt['hidden_dim']) 21 | 22 | def reset_parameters(self): 23 | torch.nn.init.xavier_normal_(self.m11.weight, gain=1.414) 24 | torch.nn.init.xavier_normal_(self.m12.weight, gain=1.414) 25 | 26 | def forward(self, x, pos_encoding=None): 27 | # Encode each node based on its feature. 28 | 29 | 30 | if self.opt['beltrami']: 31 | x = F.dropout(x, self.opt['input_dropout'], training=self.training) 32 | x = self.mx(x) 33 | p = F.dropout(pos_encoding, self.opt['input_dropout'], training=self.training) 34 | p = self.mp(p) 35 | x = torch.cat([x, p], dim=1) 36 | else: 37 | x = F.dropout(x, self.opt['input_dropout'], training=self.training) 38 | x = self.m1(x) 39 | 40 | if self.opt['use_mlp']: 41 | x = F.dropout(x, self.opt['dropout'], training=self.training) 42 | x = F.dropout(x + self.m11(F.relu(x)), self.opt['dropout'], training=self.training) 43 | x = F.dropout(x + self.m12(F.relu(x)), self.opt['dropout'], training=self.training) 44 | # todo investigate if some input non-linearity solves the problem with smooth deformations identified in the ANODE paper 45 | 46 | 47 | 48 | if self.opt['batch_norm']: 49 | x = self.bn_in(x) 50 | 51 | 52 | 53 | self.odeblock.set_x0(x) 54 | 55 | if self.training and self.odeblock.nreg > 0: 56 | z, self.reg_states = self.odeblock(x) 57 | else: 58 | z, att, edge= self.odeblock(x) 59 | 60 | 61 | 62 | # Activation. 63 | z = F.relu(z) 64 | 65 | if self.opt['fc_out']: 66 | z = self.fc(z) 67 | z = F.relu(z) 68 | 69 | # Dropout. 70 | z = F.dropout(z, self.opt['dropout'], training=self.training) 71 | 72 | # Decode each node embedding to get node label. 73 | # z = self.output_normalization(z) 74 | 75 | z = self.m2(z) 76 | return z,att, edge 77 | -------------------------------------------------------------------------------- /src/base_classes.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.nn.conv import MessagePassing 4 | from utils import Meter 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | class ODEblock(nn.Module): 13 | def __init__(self, odefunc, opt, data, device, t): 14 | super(ODEblock, self).__init__() 15 | self.opt = opt 16 | self.t = t 17 | 18 | self.aug_dim = 1 19 | self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device) 20 | 21 | 22 | 23 | 24 | if opt['adjoint']: 25 | from torchdiffeq import odeint_adjoint as odeint 26 | else: 27 | from torchdiffeq import odeint 28 | self.train_integrator = odeint 29 | self.test_integrator = None 30 | self.set_tol() 31 | 32 | def set_x0(self, x0): 33 | self.odefunc.x0 = x0.clone().detach() 34 | 35 | 36 | def set_tol(self): 37 | self.atol = self.opt['tol_scale'] * 1e-7 38 | self.rtol = self.opt['tol_scale'] * 1e-9 39 | if self.opt['adjoint']: 40 | self.atol_adjoint = self.opt['tol_scale_adjoint'] * 1e-7 41 | self.rtol_adjoint = self.opt['tol_scale_adjoint'] * 1e-9 42 | 43 | def reset_tol(self): 44 | self.atol = 1e-7 45 | self.rtol = 1e-9 46 | self.atol_adjoint = 1e-7 47 | self.rtol_adjoint = 1e-9 48 | 49 | def set_time(self, time): 50 | self.t = torch.tensor([0, time]).to(self.device) 51 | 52 | def __repr__(self): 53 | return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \ 54 | + ")" 55 | 56 | 57 | class ODEFunc(MessagePassing): 58 | 59 | # currently requires in_features = out_features 60 | def __init__(self, opt, data, device): 61 | super(ODEFunc, self).__init__() 62 | self.opt = opt 63 | self.device = device 64 | self.edge_index = None 65 | self.edge_weight = None 66 | self.attention_weights = None 67 | self.alpha_train = nn.Parameter(torch.tensor(0.0)) 68 | self.beta_train = nn.Parameter(torch.tensor(0.0)) 69 | self.x0 = None 70 | self.nfe = 0 71 | self.alpha_sc = nn.Parameter(torch.ones(1)) 72 | self.beta_sc = nn.Parameter(torch.ones(1)) 73 | 74 | def __repr__(self): 75 | return self.__class__.__name__ 76 | 77 | 78 | class BaseGNN(MessagePassing): 79 | def __init__(self, opt, dataset, device=torch.device('cpu')): 80 | super(BaseGNN, self).__init__() 81 | self.opt = opt 82 | self.T = opt['time'] 83 | self.num_classes = dataset.num_classes 84 | self.num_features = dataset.data.num_features 85 | self.num_nodes = dataset.data.num_nodes 86 | self.device = device 87 | self.fm = Meter() 88 | self.bm = Meter() 89 | 90 | 91 | self.m1 = nn.Linear(self.num_features, opt['hidden_dim']) 92 | 93 | if self.opt['use_mlp']: 94 | self.m11 = nn.Linear(opt['hidden_dim'], opt['hidden_dim']) 95 | self.m12 = nn.Linear(opt['hidden_dim'], opt['hidden_dim']) 96 | 97 | self.hidden_dim = opt['hidden_dim'] 98 | if opt['fc_out']: 99 | self.fc = nn.Linear(opt['hidden_dim'], opt['hidden_dim']) 100 | self.m2 = nn.Linear(opt['hidden_dim'], dataset.num_classes) 101 | if self.opt['batch_norm']: 102 | self.bn_in = torch.nn.BatchNorm1d(opt['hidden_dim']) 103 | self.bn_out = torch.nn.BatchNorm1d(opt['hidden_dim']) 104 | 105 | 106 | 107 | def getNFE(self): 108 | return self.odeblock.odefunc.nfe 109 | 110 | def resetNFE(self): 111 | self.odeblock.odefunc.nfe = 0 112 | 113 | 114 | def reset(self): 115 | self.m1.reset_parameters() 116 | self.m2.reset_parameters() 117 | 118 | def __repr__(self): 119 | return self.__class__.__name__ 120 | -------------------------------------------------------------------------------- /src/best_log/amazon-ratingsbelconvconstant1.020230116-211847.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset amazon-ratings --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant" 2 | 0.4487996080352768 3 | 0.45141270618977625 4 | 0.45843540747999345 5 | 0.45337252980565085 6 | 0.467254613751429 7 | 0.4515760248244325 8 | 0.4484729707659644 9 | 0.4486362894006206 10 | 0.44569655397680874 11 | 0.4487996080352768 12 | 45.2245631226523,0.5999131633978108 13 | train acc list: [0.8871468234525559, 0.8638739180140454, 0.8897599216070554, 0.8525232729054385, 0.8975175567532255, 0.8975175567532255, 0.8733463988241058, 0.8837171321247754, 0.8739996733627307, 0.8966193042626164] 14 | val acc list: [0.4515760248244325, 0.4551690347868692, 0.46153846153846156, 0.4613751429038053, 0.4669279764821166, 0.45337252980565085, 0.46088518699983666, 0.4556589906908378, 0.45827208884533727, 0.44308345582230935] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "amazon-ratings", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/amazon-ratingsgatconvNone1.020230113-171121.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset amazon-ratings --function gatconv --time 1 --epoch 1000 --step_size 0.5 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 2 --hidden_dim 64" 2 | 0.47885023681202027 3 | 0.4750939082149273 4 | 0.48097337906255105 5 | 0.4700310305405847 6 | 0.4783602809080516 7 | 0.4700310305405847 8 | 0.4775436877347705 9 | 0.4713375796178344 10 | 0.4781969622733954 11 | 0.4827698840437694 12 | 47.63187979748489,0.42926711688506275 13 | train acc list: [0.772823779193206, 0.7094561489465948, 0.7701290217213784, 0.7486526212640863, 0.7634329577004736, 0.7396700963579944, 0.7581251020741466, 0.769802384452066, 0.776661767107627, 0.7822962600032664] 14 | val acc list: [0.48554630083292505, 0.4804834231585824, 0.4829332026784256, 0.4791768740813327, 0.4817899722358321, 0.4829332026784256, 0.48815939898742444, 0.47770700636942676, 0.4803201045239262, 0.4829332026784256] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 2, 18 | "dataset": "amazon-ratings", 19 | "data_norm": "rw", 20 | "self_loop_weight": 0, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": true, 27 | "edge_homo": 0.1, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "gatconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 0.5, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/cornellbelconveulerNone20230115-175939.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset cornell --function belconv --method euler --no_early --cuda 2 --epoch 600" 2 | { 3 | "test_result": 77.83783783783784, 4 | "test_std": 6.922296472900378, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 5.765610194206237, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 81.62162162162163, 15 | "test_std": 7.2319395460862985, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 5.342220187187195, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 64 23 | } 24 | { 25 | "test_result": 82.16216216216216, 26 | "test_std": 6.751349187457728, 27 | "dropout": 0.2, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 44.54893562793732, 31 | "time": 0.2, 32 | "step_size": 0.5, 33 | "hidden_dim": 32 34 | } 35 | { 36 | "test_result": 82.70270270270268, 37 | "test_std": 6.964377690121693, 38 | "dropout": 0.4, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 56.194859743118286, 42 | "time": 0.2, 43 | "step_size": 0.5, 44 | "hidden_dim": 256 45 | } 46 | { 47 | "test_result": 83.24324324324323, 48 | "test_std": 4.954135886438745, 49 | "dropout": 0.4, 50 | "weight_decay": 0.0001, 51 | "lr": 0.005, 52 | "runtime_average": 91.36924576759338, 53 | "time": 1, 54 | "step_size": 0.2, 55 | "hidden_dim": 64 56 | } 57 | { 58 | "test_result": 83.51351351351352, 59 | "test_std": 4.750917792228907, 60 | "dropout": 0.4, 61 | "weight_decay": 0.001, 62 | "lr": 0.005, 63 | "runtime_average": 18.978949880599977, 64 | "time": 1, 65 | "step_size": 0.2, 66 | "hidden_dim": 64 67 | } 68 | { 69 | "test_result": 84.32432432432431, 70 | "test_std": 6.139360373838138, 71 | "dropout": 0.4, 72 | "weight_decay": 0.001, 73 | "lr": 0.005, 74 | "runtime_average": 9.309138369560241, 75 | "time": 1, 76 | "step_size": 0.5, 77 | "hidden_dim": 256 78 | } 79 | { 80 | "test_result": 84.59459459459458, 81 | "test_std": 6.948627098476995, 82 | "dropout": 0.2, 83 | "weight_decay": 0.01, 84 | "lr": 0.005, 85 | "runtime_average": 5.897371053695679, 86 | "time": 0.2, 87 | "step_size": 0.2, 88 | "hidden_dim": 128 89 | } 90 | { 91 | "test_result": 84.86486486486486, 92 | "test_std": 6.41856329029077, 93 | "dropout": 0.4, 94 | "weight_decay": 0.01, 95 | "lr": 0.005, 96 | "runtime_average": 12.7184077501297, 97 | "time": 1.5, 98 | "step_size": 0.5, 99 | "hidden_dim": 256 100 | } 101 | { 102 | "test_result": 85.13513513513513, 103 | "test_std": 5.952085282579796, 104 | "dropout": 0.6, 105 | "weight_decay": 0.01, 106 | "lr": 0.005, 107 | "runtime_average": 33.56719207763672, 108 | "time": 1.5, 109 | "step_size": 0.2, 110 | "hidden_dim": 128 111 | } 112 | -------------------------------------------------------------------------------- /src/best_log/cornelllapconveulerNone20230115-190111.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset cornell --function lapconv --method euler --no_early --cuda 1 --epoch 600" 2 | { 3 | "test_result": 79.1891891891892, 4 | "test_std": 5.545482305049514, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 20.32029731273651, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 79.45945945945945, 15 | "test_std": 5.565205481614593, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 21.358316016197204, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 64 23 | } 24 | { 25 | "test_result": 79.72972972972973, 26 | "test_std": 5.952085282579798, 27 | "dropout": 0.4, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 32.16159210205078, 31 | "time": 0.5, 32 | "step_size": 0.2, 33 | "hidden_dim": 64 34 | } 35 | { 36 | "test_result": 79.72972972972974, 37 | "test_std": 6.073568933579522, 38 | "dropout": 0.6, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 20.645365118980408, 42 | "time": 1, 43 | "step_size": 1, 44 | "hidden_dim": 64 45 | } 46 | { 47 | "test_result": 81.35135135135134, 48 | "test_std": 7.591660489290884, 49 | "dropout": 0, 50 | "weight_decay": 0.001, 51 | "lr": 0.005, 52 | "runtime_average": 25.48646306991577, 53 | "time": 0.2, 54 | "step_size": 0.2, 55 | "hidden_dim": 16 56 | } 57 | { 58 | "test_result": 83.24324324324323, 59 | "test_std": 8.529585858410544, 60 | "dropout": 0, 61 | "weight_decay": 0.001, 62 | "lr": 0.005, 63 | "runtime_average": 13.805762386322021, 64 | "time": 0.2, 65 | "step_size": 0.2, 66 | "hidden_dim": 256 67 | } 68 | { 69 | "test_result": 83.78378378378378, 70 | "test_std": 5.538892305924107, 71 | "dropout": 0.4, 72 | "weight_decay": 0.001, 73 | "lr": 0.005, 74 | "runtime_average": 7.33641140460968, 75 | "time": 0.2, 76 | "step_size": 0.2, 77 | "hidden_dim": 128 78 | } 79 | { 80 | "test_result": 84.05405405405405, 81 | "test_std": 5.853083196677764, 82 | "dropout": 0, 83 | "weight_decay": 0.01, 84 | "lr": 0.005, 85 | "runtime_average": 10.523886942863465, 86 | "time": 0.5, 87 | "step_size": 0.2, 88 | "hidden_dim": 256 89 | } 90 | { 91 | "test_result": 84.32432432432431, 92 | "test_std": 7.130219436904283, 93 | "dropout": 0.2, 94 | "weight_decay": 0.01, 95 | "lr": 0.005, 96 | "runtime_average": 10.521282386779784, 97 | "time": 0.5, 98 | "step_size": 0.2, 99 | "hidden_dim": 256 100 | } 101 | { 102 | "test_result": 85.40540540540539, 103 | "test_std": 5.565205481614596, 104 | "dropout": 0.2, 105 | "weight_decay": 0.01, 106 | "lr": 0.005, 107 | "runtime_average": 13.455496621131896, 108 | "time": 1, 109 | "step_size": 0.2, 110 | "hidden_dim": 64 111 | } 112 | { 113 | "test_result": 86.21621621621621, 114 | "test_std": 5.049065322234977, 115 | "dropout": 0.6, 116 | "weight_decay": 0.01, 117 | "lr": 0.005, 118 | "runtime_average": 13.325262236595155, 119 | "time": 1, 120 | "step_size": 0.2, 121 | "hidden_dim": 128 122 | } 123 | -------------------------------------------------------------------------------- /src/best_log/minesweeperbelconvattention3.020230118-201621.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset minesweeper --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001" 2 | 0.928993 3 | 0.9452619999999999 4 | 0.9367850000000001 5 | 0.933376 6 | 0.9373509999999999 7 | 0.9464109999999999 8 | 0.937468 9 | 0.9464170000000001 10 | 0.940732 11 | 0.9455669999999999 12 | 93.98362,0.5744757067100388 13 | train acc list: [0.9491600000000001, 0.96114225, 0.9527565, 0.9618617500000001, 0.954907, 0.96515775, 0.95813275, 0.9605132499999999, 0.95401125, 0.9673325] 14 | val acc list: [0.920393, 0.9448479999999999, 0.943635, 0.9386559999999999, 0.941713, 0.944002, 0.94882, 0.946468, 0.9447450000000001, 0.946731] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "minesweeper", 19 | "data_norm": "gcn", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.2, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "attention", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 3.0, 47 | "augment": false, 48 | "method": "rk4", 49 | "step_size": 1, 50 | "max_iters": 1000, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/minesweepergatconvconstant4.020230118-011000.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset minesweeper --function gatconv --time 4 --epoch 600 --step_size 1 --dropout 0.2 --lr 0.01 --method rk4 --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001" 2 | 0.977259 3 | 0.9743330000000001 4 | 0.979973 5 | 0.9754499999999999 6 | 0.973902 7 | 0.9802029999999999 8 | 0.9739869999999998 9 | 0.975565 10 | 0.9778269999999999 11 | 0.978157 12 | 97.66655999999999,0.22432064193916582 13 | train acc list: [0.9898334999999999, 0.9858706249999999, 0.9888985000000001, 0.99088775, 0.9895455, 0.99146575, 0.98837625, 0.9845142499999999, 0.9895122500000001, 0.985397] 14 | val acc list: [0.9770789999999999, 0.976502, 0.97763, 0.9750365, 0.976732, 0.974698, 0.978675, 0.9753859999999999, 0.9783069999999999, 0.974014] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 2, 18 | "dataset": "minesweeper", 19 | "data_norm": "gcn", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.2, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 600, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "gatconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 4.0, 47 | "augment": false, 48 | "method": "rk4", 49 | "step_size": 1, 50 | "max_iters": 1000, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/questionsbelconvconstant1.020230116-173925.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset questions --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant" 2 | 0.72697069328968 3 | 0.730413937369115 4 | 0.7180077070668123 5 | 0.6988274314318104 6 | 0.7368228321277093 7 | 0.7192944501268734 8 | 0.7013276103705995 9 | 0.7389217264014369 10 | 0.7124791911504955 11 | 0.7278471470230358 12 | 72.10912726357567,1.3060300210075217 13 | train acc list: [0.7822344122519902, 0.755272760334586, 0.7941171801488204, 0.8998120984361742, 0.7753958632792431, 0.8778854868411179, 0.7713146759491771, 0.7832984950556777, 0.8427026075310715, 0.7954768543373223] 14 | val acc list: [0.7175811209439529, 0.7409935980696073, 0.7143170023494911, 0.7131849748021406, 0.7310133984494513, 0.7104989349358364, 0.7281456338141998, 0.7386065843478863, 0.7438031738334805, 0.7336534875800242] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "questions", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/questionsgatconvNone3.020230113-193655.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset questions --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 3" 2 | 0.7594055999759876 3 | 0.7609594813314893 4 | 0.7525871085569683 5 | 0.7374343410088454 6 | 0.7515671574592078 7 | 0.7651656973186889 8 | 0.7456307765481669 9 | 0.7637708983188989 10 | 0.7425674599234835 11 | 0.7378839968691484 12 | 75.16972517310884,0.9937706575189349 13 | train acc list: [0.8224468478141651, 0.8624602693544384, 0.8374133083952455, 0.8454756997962234, 0.8677887074335129, 0.817008988102454, 0.844050072447454, 0.7853427543886994, 0.8247994562111425, 0.8639757200006927] 14 | val acc list: [0.7626469009230556, 0.7657107066368795, 0.7587769946140657, 0.7613943392849927, 0.7663074889308372, 0.7474302801493976, 0.7581697059961092, 0.7521627441132837, 0.7654696384554549, 0.7681211575429057] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 3, 18 | "dataset": "questions", 19 | "data_norm": "rw", 20 | "self_loop_weight": 0, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.1, 28 | "hidden_dim": 16, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "gatconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 3.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/roman-empirebelconvconstant1.020230116-170240.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset roman-empire --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 256 --block constant" 2 | 0.8512177903282739 3 | 0.8600423579244617 4 | 0.854041651959054 5 | 0.8513942816801977 6 | 0.8512177903282739 7 | 0.8621602541475468 8 | 0.8552770914225203 9 | 0.8469819978821038 10 | 0.8579244617013766 11 | 0.8487469114013413 12 | 85.3900458877515,0.4681381126880635 13 | train acc list: [0.9969993822257524, 0.9928514694201748, 0.9971758891536493, 0.9964698614420616, 0.9984996911128762, 0.9956755802665255, 0.9971758891536493, 0.9949695525549378, 0.9973523960815462, 0.9952343129467831] 14 | val acc list: [0.8593115622241836, 0.8619593998234775, 0.8527802294792586, 0.863901147396293, 0.8614298323036187, 0.8564872021182701, 0.8570167696381289, 0.8570167696381289, 0.8533097969991174, 0.8571932921447485] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "roman-empire", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 256, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/roman-empiregatconvconstant3.020230117-145044.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset roman-empire --function gatconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001" 2 | 0.9174020472996823 3 | 0.9193434521708437 4 | 0.9191669608189199 5 | 0.9126367807977409 6 | 0.9179315213554535 7 | 0.9196964348746911 8 | 0.9174020472996823 9 | 0.9135192375573596 10 | 0.9114013413342746 11 | 0.9152841510765972 12 | 91.63783974585246,0.2832452516386811 13 | train acc list: [0.9968228752978554, 0.9972641426175978, 0.9974406495454946, 0.9968228752978554, 0.9984114376489277, 0.9958520871944224, 0.9940870179154532, 0.9971758891536493, 0.9887918100785456, 0.9973523960815462] 14 | val acc list: [0.9205648720211828, 0.9214474845542807, 0.918270079435128, 0.9276257722859664, 0.9251544571932921, 0.9205648720211828, 0.918270079435128, 0.9210944395410415, 0.91738746690203, 0.9198587819947043] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 2, 18 | "dataset": "roman-empire", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "gatconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 3.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/texasbelconveulerNone20230115-175910.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset texas --function belconv --method euler --no_early --cuda 3 --epoch 600" 2 | { 3 | "test_result": 79.45945945945945, 4 | "test_std": 4.391372110614032, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 5.350853610038757, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 80.0, 15 | "test_std": 4.391372110614035, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 5.039187932014466, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 32 23 | } 24 | { 25 | "test_result": 84.05405405405403, 26 | "test_std": 4.750917792228907, 27 | "dropout": 0, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 5.045219922065735, 31 | "time": 0.2, 32 | "step_size": 0.2, 33 | "hidden_dim": 64 34 | } 35 | { 36 | "test_result": 85.94594594594594, 37 | "test_std": 4.490066952928694, 38 | "dropout": 0, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 5.194399738311768, 42 | "time": 0.2, 43 | "step_size": 0.2, 44 | "hidden_dim": 128 45 | } 46 | { 47 | "test_result": 86.21621621621621, 48 | "test_std": 4.594594594594594, 49 | "dropout": 0.2, 50 | "weight_decay": 0.0001, 51 | "lr": 0.005, 52 | "runtime_average": 37.9852658033371, 53 | "time": 0.2, 54 | "step_size": 0.5, 55 | "hidden_dim": 128 56 | } 57 | { 58 | "test_result": 86.48648648648648, 59 | "test_std": 5.268537483680524, 60 | "dropout": 0.2, 61 | "weight_decay": 0.0001, 62 | "lr": 0.005, 63 | "runtime_average": 30.911461496353148, 64 | "time": 0.5, 65 | "step_size": 1, 66 | "hidden_dim": 128 67 | } 68 | { 69 | "test_result": 87.56756756756756, 70 | "test_std": 3.2432432432432456, 71 | "dropout": 0.4, 72 | "weight_decay": 0.0001, 73 | "lr": 0.005, 74 | "runtime_average": 60.836114573478696, 75 | "time": 0.5, 76 | "step_size": 0.2, 77 | "hidden_dim": 64 78 | } 79 | -------------------------------------------------------------------------------- /src/best_log/texaslapconveulerNone20230115-190052.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset texas --function lapconv --method euler --no_early --cuda 3 --epoch 600" 2 | { 3 | "test_result": 80.54054054054055, 4 | "test_std": 6.019204716572999, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 9.118959641456604, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 80.8108108108108, 15 | "test_std": 7.875028261801876, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 8.814443945884705, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 32 23 | } 24 | { 25 | "test_result": 83.78378378378378, 26 | "test_std": 3.6260561797293906, 27 | "dropout": 0, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 8.797356653213502, 31 | "time": 0.2, 32 | "step_size": 0.2, 33 | "hidden_dim": 64 34 | } 35 | { 36 | "test_result": 84.32432432432431, 37 | "test_std": 4.954135886438749, 38 | "dropout": 0.8, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 24.91836953163147, 42 | "time": 0.5, 43 | "step_size": 0.2, 44 | "hidden_dim": 256 45 | } 46 | { 47 | "test_result": 85.40540540540539, 48 | "test_std": 4.221756581571168, 49 | "dropout": 0, 50 | "weight_decay": 0.001, 51 | "lr": 0.005, 52 | "runtime_average": 25.02802336215973, 53 | "time": 0.5, 54 | "step_size": 0.2, 55 | "hidden_dim": 128 56 | } 57 | { 58 | "test_result": 85.94594594594594, 59 | "test_std": 5.51245352820842, 60 | "dropout": 0.6, 61 | "weight_decay": 0.001, 62 | "lr": 0.005, 63 | "runtime_average": 22.686385536193846, 64 | "time": 0.5, 65 | "step_size": 0.2, 66 | "hidden_dim": 256 67 | } 68 | { 69 | "test_result": 86.21621621621621, 70 | "test_std": 3.2990690853334352, 71 | "dropout": 0.8, 72 | "weight_decay": 0.001, 73 | "lr": 0.005, 74 | "runtime_average": 23.397251343727113, 75 | "time": 0.5, 76 | "step_size": 0.2, 77 | "hidden_dim": 256 78 | } 79 | -------------------------------------------------------------------------------- /src/best_log/wiki-coocbelconvconstant1.020230116-202725.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset wiki-cooc --function belconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block constant" 2 | 0.9796 3 | 0.9784 4 | 0.9808 5 | 0.9816 6 | 0.9716 7 | 0.9772 8 | 0.9748 9 | 0.984 10 | 0.9796 11 | 0.9712 12 | 97.788,0.40001999950002803 13 | train acc list: [0.9994, 0.9996, 0.9992, 0.9992, 0.9996, 0.9986, 0.9978, 0.9996, 0.9992, 0.9998] 14 | val acc list: [0.9784, 0.9816, 0.9768, 0.978, 0.9804, 0.9828, 0.9832, 0.9812, 0.9816, 0.9796] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "wiki-cooc", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": false, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/wiki-cooctransconvattention1.020230117-230603.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset wiki-cooc --function transconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 1 --hidden_dim 64 --block attention --decay 0.001" 2 | 0.9812 3 | 0.9796 4 | 0.9884 5 | 0.9804 6 | 0.974 7 | 0.9812 8 | 0.9764 9 | 0.982 10 | 0.9804 11 | 0.9804 12 | 98.03999999999999,0.35417509793885704 13 | train acc list: [0.9978, 0.9982, 0.9988, 0.9994, 0.998, 0.9986, 1.0, 1.0, 0.9984, 0.999] 14 | val acc list: [0.9816, 0.9836, 0.9808, 0.9804, 0.9848, 0.9872, 0.9876, 0.984, 0.9832, 0.9828] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 1, 18 | "dataset": "wiki-cooc", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "attention", 42 | "function": "transconv", 43 | "use_mlp": true, 44 | "add_source": false, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/wisconsinbelconveulerNone20230115-180013.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset wisconsin --function belconv --method euler --no_early --cuda 1 --epoch 600" 2 | { 3 | "test_result": 83.52941176470588, 4 | "test_std": 4.13162892268735, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 5.726944208145142, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 84.11764705882354, 15 | "test_std": 4.995191844257645, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 5.316670346260071, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 64 23 | } 24 | { 25 | "test_result": 84.90196078431373, 26 | "test_std": 4.475573415887581, 27 | "dropout": 0, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 5.399200391769409, 31 | "time": 0.2, 32 | "step_size": 0.5, 33 | "hidden_dim": 32 34 | } 35 | { 36 | "test_result": 85.29411764705881, 37 | "test_std": 3.7460731714789826, 38 | "dropout": 0, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 5.319157719612122, 42 | "time": 0.2, 43 | "step_size": 1, 44 | "hidden_dim": 64 45 | } 46 | { 47 | "test_result": 85.29411764705883, 48 | "test_std": 4.318179518734362, 49 | "dropout": 0.2, 50 | "weight_decay": 0.0001, 51 | "lr": 0.005, 52 | "runtime_average": 31.17937104701996, 53 | "time": 0.2, 54 | "step_size": 0.2, 55 | "hidden_dim": 256 56 | } 57 | { 58 | "test_result": 85.88235294117648, 59 | "test_std": 5.3912655234774585, 60 | "dropout": 0.2, 61 | "weight_decay": 0.0001, 62 | "lr": 0.005, 63 | "runtime_average": 30.308173513412477, 64 | "time": 0.2, 65 | "step_size": 0.5, 66 | "hidden_dim": 256 67 | } 68 | { 69 | "test_result": 86.47058823529413, 70 | "test_std": 4.50980392156863, 71 | "dropout": 0.2, 72 | "weight_decay": 0.0001, 73 | "lr": 0.005, 74 | "runtime_average": 132.80444235801696, 75 | "time": 1.5, 76 | "step_size": 0.2, 77 | "hidden_dim": 128 78 | } 79 | { 80 | "test_result": 86.66666666666667, 81 | "test_std": 3.802101848953985, 82 | "dropout": 0.4, 83 | "weight_decay": 0.0001, 84 | "lr": 0.005, 85 | "runtime_average": 37.766193342208865, 86 | "time": 0.5, 87 | "step_size": 0.5, 88 | "hidden_dim": 256 89 | } 90 | { 91 | "test_result": 87.84313725490196, 92 | "test_std": 4.866538684702295, 93 | "dropout": 0.4, 94 | "weight_decay": 0.0001, 95 | "lr": 0.005, 96 | "runtime_average": 75.86288826465606, 97 | "time": 1, 98 | "step_size": 0.2, 99 | "hidden_dim": 64 100 | } 101 | -------------------------------------------------------------------------------- /src/best_log/wisconsinlapconveulerNone20230115-190125.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_sweep.py --dataset wisconsin --function lapconv --method euler --no_early --cuda 0 --epoch 600" 2 | { 3 | "test_result": 82.35294117647058, 4 | "test_std": 5.333910003425664, 5 | "dropout": 0, 6 | "weight_decay": 0.0001, 7 | "lr": 0.005, 8 | "runtime_average": 18.909467649459838, 9 | "time": 0.2, 10 | "step_size": 0.2, 11 | "hidden_dim": 16 12 | } 13 | { 14 | "test_result": 84.90196078431373, 15 | "test_std": 3.6208206495332176, 16 | "dropout": 0, 17 | "weight_decay": 0.0001, 18 | "lr": 0.005, 19 | "runtime_average": 18.475360369682313, 20 | "time": 0.2, 21 | "step_size": 0.2, 22 | "hidden_dim": 32 23 | } 24 | { 25 | "test_result": 85.09803921568627, 26 | "test_std": 4.898037645802665, 27 | "dropout": 0, 28 | "weight_decay": 0.0001, 29 | "lr": 0.005, 30 | "runtime_average": 23.430826544761658, 31 | "time": 1.5, 32 | "step_size": 1, 33 | "hidden_dim": 128 34 | } 35 | { 36 | "test_result": 85.88235294117646, 37 | "test_std": 3.594177015651642, 38 | "dropout": 0.4, 39 | "weight_decay": 0.0001, 40 | "lr": 0.005, 41 | "runtime_average": 16.001456832885744, 42 | "time": 1, 43 | "step_size": 1, 44 | "hidden_dim": 128 45 | } 46 | { 47 | "test_result": 86.07843137254902, 48 | "test_std": 1.849800221971885, 49 | "dropout": 0.4, 50 | "weight_decay": 0.0001, 51 | "lr": 0.005, 52 | "runtime_average": 35.787513732910156, 53 | "time": 1.5, 54 | "step_size": 0.2, 55 | "hidden_dim": 256 56 | } 57 | { 58 | "test_result": 86.27450980392157, 59 | "test_std": 2.90831313219438, 60 | "dropout": 0, 61 | "weight_decay": 0.001, 62 | "lr": 0.005, 63 | "runtime_average": 18.373980784416197, 64 | "time": 0.2, 65 | "step_size": 0.2, 66 | "hidden_dim": 64 67 | } 68 | { 69 | "test_result": 86.47058823529412, 70 | "test_std": 4.594264515239208, 71 | "dropout": 0.2, 72 | "weight_decay": 0.01, 73 | "lr": 0.005, 74 | "runtime_average": 13.467104196548462, 75 | "time": 1.5, 76 | "step_size": 1, 77 | "hidden_dim": 16 78 | } 79 | { 80 | "test_result": 87.05882352941177, 81 | "test_std": 3.304372460069162, 82 | "dropout": 0.4, 83 | "weight_decay": 0.01, 84 | "lr": 0.005, 85 | "runtime_average": 9.68775908946991, 86 | "time": 0.5, 87 | "step_size": 0.5, 88 | "hidden_dim": 32 89 | } 90 | { 91 | "test_result": 87.45098039215688, 92 | "test_std": 4.401949866792872, 93 | "dropout": 0.6, 94 | "weight_decay": 0.01, 95 | "lr": 0.005, 96 | "runtime_average": 13.179940152168275, 97 | "time": 0.5, 98 | "step_size": 0.2, 99 | "hidden_dim": 128 100 | } 101 | -------------------------------------------------------------------------------- /src/best_log/workersbelconvattention3.020230114-120652.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset workers --function belconv --time 3 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --random_split --cuda 3 --hidden_dim 64 --block attention" 2 | 0.8129475312407647 3 | 0.8062150752787878 4 | 0.8026341475317832 5 | 0.82135217133143 6 | 0.8175861984822237 7 | 0.8076337543956684 8 | 0.8116159521078875 9 | 0.8112492510079197 10 | 0.8151507880345634 11 | 0.8239102673596707 12 | 81.302951367707,0.6338053733195014 13 | train acc list: [0.8416701601650287, 0.8234789545553522, 0.8240209555633793, 0.8394800249903843, 0.8468909899624668, 0.8461019002596043, 0.842648339706424, 0.8465870895224217, 0.8530692248571566, 0.8334028641259776] 14 | val acc list: [0.8242404369804035, 0.8177506995841192, 0.798109731177759, 0.8136356785864123, 0.8095325379594819, 0.834769161001427, 0.8107966094100685, 0.8161027224378792, 0.8186661670122158, 0.8153186179666507] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 3, 18 | "dataset": "workers", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": true, 27 | "edge_homo": 0.1, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "attention", 42 | "function": "belconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 3.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_log/workersgatconvconstant1.020230117-174152.txt: -------------------------------------------------------------------------------- 1 | "run_GNN_raw.py --dataset workers --function gatconv --time 1 --epoch 1000 --step_size 1 --dropout 0.2 --lr 0.01 --method euler --no_early --cuda 2 --hidden_dim 64 --block constant --decay 0.001" 2 | 0.7980819024534405 3 | 0.8008182653750112 4 | 0.8178444482402414 5 | 0.8132905086096809 6 | 0.8210810429765555 7 | 0.7904021918016209 8 | 0.8120484018339121 9 | 0.79523708818992 10 | 0.8083637674911681 11 | 0.8217114164016387 12 | 80.7887903337319,1.0596868582754906 13 | train acc list: [0.8508128658422012, 0.831333559901965, 0.8523991854382849, 0.838000714980053, 0.8525191684524209, 0.8444870899972663, 0.8444729294577888, 0.8519340074767651, 0.8535531422152307, 0.8503690558803718] 14 | val acc list: [0.8026846922440867, 0.8150884782127576, 0.8046052390398488, 0.8006972759328128, 0.8028007804385283, 0.8185466844261238, 0.8019705122408551, 0.8033975144906578, 0.8200324096514775, 0.803147687265193] 15 | { 16 | "use_cora_defaults": false, 17 | "cuda": 2, 18 | "dataset": "workers", 19 | "data_norm": "rw", 20 | "self_loop_weight": 1, 21 | "use_labels": false, 22 | "geom_gcn_splits": true, 23 | "num_splits": 1, 24 | "label_rate": 0.5, 25 | "planetoid_split": false, 26 | "random_splits": false, 27 | "edge_homo": 0.0, 28 | "hidden_dim": 64, 29 | "fc_out": false, 30 | "input_dropout": 0.0, 31 | "dropout": 0.2, 32 | "batch_norm": false, 33 | "optimizer": "adam", 34 | "lr": 0.005, 35 | "decay": 0.001, 36 | "epoch": 1000, 37 | "alpha": 1.0, 38 | "alpha_dim": "sc", 39 | "no_alpha_sigmoid": false, 40 | "beta_dim": "sc", 41 | "block": "constant", 42 | "function": "gatconv", 43 | "use_mlp": true, 44 | "add_source": true, 45 | "cgnn": false, 46 | "time": 1.0, 47 | "augment": false, 48 | "method": "euler", 49 | "step_size": 1, 50 | "max_iters": 100, 51 | "adjoint_method": "adaptive_heun", 52 | "adjoint": false, 53 | "adjoint_step_size": 1, 54 | "tol_scale": 821.9773048827274, 55 | "tol_scale_adjoint": 1.0, 56 | "ode_blocks": 1, 57 | "max_nfe": 2000, 58 | "no_early": true, 59 | "earlystopxT": 3, 60 | "max_test_steps": 100, 61 | "leaky_relu_slope": 0.2, 62 | "attention_dropout": 0.0, 63 | "heads": 8, 64 | "attention_norm_idx": 1, 65 | "attention_dim": 16, 66 | "mix_features": false, 67 | "reweight_attention": false, 68 | "attention_type": "scaled_dot", 69 | "square_plus": true, 70 | "jacobian_norm2": null, 71 | "total_deriv": null, 72 | "kinetic_energy": null, 73 | "directional_penalty": null, 74 | "not_lcc": true, 75 | "rewiring": null, 76 | "gdc_method": "ppr", 77 | "gdc_sparsification": "topk", 78 | "gdc_k": 64, 79 | "gdc_threshold": 0.01, 80 | "gdc_avg_degree": 64, 81 | "ppr_alpha": 0.05, 82 | "heat_time": 3.0, 83 | "att_samp_pct": 1, 84 | "use_flux": false, 85 | "exact": true, 86 | "M_nodes": 64, 87 | "new_edges": "k_hop_att", 88 | "sparsify": "S_hat", 89 | "threshold_type": "addD_rvR", 90 | "rw_addD": 0.02, 91 | "rw_rmvR": 0.02, 92 | "rewire_KNN": false, 93 | "rewire_KNN_T": "T0", 94 | "rewire_KNN_epoch": 10, 95 | "rewire_KNN_k": 64, 96 | "rewire_KNN_sym": false, 97 | "KNN_online": false, 98 | "KNN_online_reps": 4, 99 | "KNN_space": "pos_distance", 100 | "beltrami": false, 101 | "fa_layer": false, 102 | "pos_enc_type": "GDC", 103 | "pos_enc_orientation": "row", 104 | "feat_hidden_dim": 64, 105 | "pos_enc_hidden_dim": 16, 106 | "edge_sampling": false, 107 | "edge_sampling_T": "T0", 108 | "edge_sampling_epoch": 5, 109 | "edge_sampling_add": 0.64, 110 | "edge_sampling_add_type": "importance", 111 | "edge_sampling_rmv": 0.32, 112 | "edge_sampling_sym": false, 113 | "edge_sampling_online": false, 114 | "edge_sampling_online_reps": 4, 115 | "edge_sampling_space": "attention", 116 | "symmetric_attention": false, 117 | "fa_layer_edge_sampling_rmv": 0.8, 118 | "gpu": 0, 119 | "pos_enc_csv": false, 120 | "pos_dist_quantile": 0.001, 121 | "adaptive": false, 122 | "attention_rewiring": false, 123 | "baseline": false, 124 | "cpus": 1, 125 | "dt": 0.001, 126 | "dt_min": 1e-05, 127 | "gpus": 0.5, 128 | "grace_period": 20, 129 | "max_epochs": 1000, 130 | "metric": "accuracy", 131 | "name": "cora_beltrami_splits", 132 | "num_init": 1, 133 | "num_samples": 1000, 134 | "patience": 100, 135 | "reduction_factor": 10, 136 | "regularise": false, 137 | "use_lcc": false 138 | } -------------------------------------------------------------------------------- /src/best_params_discrete.py: -------------------------------------------------------------------------------- 1 | best_params_dict = {'cornell': {'model': 'lap_gcn', 'lr': 0.00721, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1}, 2 | 'wisconsin': {'model': 'lap_gcn', 'lr': 0.00356, 'nhid': 64, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2}, 3 | 'texas': {'model': 'lap_gcn', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 4 | 5 | } 6 | 7 | # best_params_dict = {'cornell': {'model': 'HAMCON_GCN', 'lr': 0.00721, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1}, 8 | # 'wisconsin': {'model': 'HAMCON_GCN', 'lr': 0.00356, 'nhid': 64, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2}, 9 | # 'texas': {'model': 'HAMCON_GCN', 'lr': 0.00155, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2} 10 | # } -------------------------------------------------------------------------------- /src/best_params_graphocn.py: -------------------------------------------------------------------------------- 1 | best_params_dict = {'cornell': {'model': 'GraphCON_GCN', 'lr': 0.00721, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 1, 'drop': 0.15, 'weight_decay': 0.0012708787092020595, 'res_version': 1}, 2 | 'wisconsin': {'model': 'GraphCON_GCN', 'lr': 0.00356, 'nhid': 64, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.23, 'weight_decay': 0.008126619200091946, 'res_version': 2}, 3 | 'texas': {'model': 'GraphCON_GCN', 'lr': 0.00155, 'nhid': 256, 'alpha': 0, 'gamma': 0, 'nlayers': 2, 'drop': 0.68, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 4 | 'film': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 5 | 'chameleon': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 6 | 'squirrel': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 7 | 'wiki-cooc': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 8 | 'roman-empire': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 9 | 'amazon-ratings': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 10 | 'minesweeper': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 11 | 'workers': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 12 | 'questions': {'model': 'GraphCON_GCN', 'lr': 0.001, 'nhid': 256, 'alpha': 1, 'gamma': 1, 'nlayers': 2, 'drop': 0.4, 'weight_decay': 0.0008549327066268375, 'res_version': 2}, 13 | } 14 | 15 | -------------------------------------------------------------------------------- /src/block_constant.py: -------------------------------------------------------------------------------- 1 | from base_classes import ODEblock 2 | import torch 3 | from utils import get_rw_adj, gcn_norm_fill_val 4 | 5 | 6 | class ConstantODEblock(ODEblock): 7 | def __init__(self, odefunc, opt, data, device, t=torch.tensor([0, 1])): 8 | super(ConstantODEblock, self).__init__(odefunc, opt, data, device, t) 9 | 10 | self.aug_dim = 2 if opt['augment'] else 1 11 | self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device) 12 | if opt['data_norm'] == 'rw': 13 | edge_index, edge_weight = get_rw_adj(data.edge_index, edge_weight=data.edge_attr, norm_dim=1, 14 | fill_value=opt['self_loop_weight'], 15 | num_nodes=data.num_nodes, 16 | dtype=data.x.dtype) 17 | else: 18 | edge_index, edge_weight = gcn_norm_fill_val(data.edge_index, edge_weight=data.edge_attr, 19 | fill_value=opt['self_loop_weight'], 20 | num_nodes=data.num_nodes, 21 | dtype=data.x.dtype) 22 | self.odefunc.edge_index = edge_index.to(device) 23 | self.odefunc.edge_weight = edge_weight.to(device) 24 | 25 | 26 | if opt['adjoint']: 27 | from torchdiffeq import odeint_adjoint as odeint 28 | else: 29 | from torchdiffeq import odeint 30 | 31 | self.train_integrator = odeint 32 | self.test_integrator = odeint 33 | self.set_tol() 34 | 35 | def forward(self, x): 36 | t = self.t.type_as(x) 37 | 38 | integrator = self.train_integrator if self.training else self.test_integrator 39 | 40 | 41 | 42 | func = self.odefunc 43 | state = x 44 | 45 | if self.opt["adjoint"] and self.training: 46 | state_dt = integrator( 47 | func, state, t, 48 | method=self.opt['method'], 49 | options=dict(step_size=self.opt['step_size'], max_iters=self.opt['max_iters']), 50 | adjoint_method=self.opt['adjoint_method'], 51 | adjoint_options=dict(step_size = self.opt['adjoint_step_size'], max_iters=self.opt['max_iters']), 52 | atol=self.atol, 53 | rtol=self.rtol, 54 | adjoint_atol=self.atol_adjoint, 55 | adjoint_rtol=self.rtol_adjoint) 56 | else: 57 | state_dt = integrator( 58 | func, state, t, 59 | method=self.opt['method'], 60 | options=dict(step_size=self.opt['step_size'], ), 61 | atol=self.atol, 62 | rtol=self.rtol) 63 | 64 | 65 | z = state_dt[1] 66 | return z 67 | 68 | def __repr__(self): 69 | return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \ 70 | + ")" 71 | -------------------------------------------------------------------------------- /src/block_transformer_attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from function_transformer_attention import SpGraphTransAttentionLayer 3 | from base_classes import ODEblock 4 | from utils import get_rw_adj 5 | 6 | 7 | class AttODEblock(ODEblock): 8 | def __init__(self, odefunc, opt, data, device, t=torch.tensor([0, 1]), gamma=0.5): 9 | super(AttODEblock, self).__init__(odefunc, opt, data, device, t) 10 | 11 | self.odefunc = odefunc(self.aug_dim * opt['hidden_dim'], self.aug_dim * opt['hidden_dim'], opt, data, device) 12 | # self.odefunc.edge_index, self.odefunc.edge_weight = data.edge_index, edge_weight=data.edge_attr 13 | edge_index, edge_weight = get_rw_adj(data.edge_index, edge_weight=data.edge_attr, norm_dim=1, 14 | fill_value=opt['self_loop_weight'], 15 | num_nodes=data.num_nodes, 16 | dtype=data.x.dtype) 17 | self.odefunc.edge_index = edge_index.to(device) 18 | self.odefunc.edge_weight = edge_weight.to(device) 19 | 20 | 21 | if opt['adjoint']: 22 | from torchdiffeq import odeint_adjoint as odeint 23 | else: 24 | from torchdiffeq import odeint 25 | self.train_integrator = odeint 26 | self.test_integrator = odeint 27 | self.set_tol() 28 | # parameter trading off between attention and the Laplacian 29 | self.multihead_att_layer = SpGraphTransAttentionLayer(opt['hidden_dim'], opt['hidden_dim'], opt, 30 | device, edge_weights=self.odefunc.edge_weight).to(device) 31 | 32 | def get_attention_weights(self, x): 33 | attention, values = self.multihead_att_layer(x, self.odefunc.edge_index) 34 | return attention 35 | 36 | def forward(self, x): 37 | t = self.t.type_as(x) 38 | self.odefunc.attention_weights = self.get_attention_weights(x) 39 | 40 | integrator = self.train_integrator if self.training else self.test_integrator 41 | 42 | func = self.odefunc 43 | 44 | 45 | state = x 46 | 47 | if self.opt["adjoint"] and self.training: 48 | state_dt = integrator( 49 | func, state, t, 50 | method=self.opt['method'], 51 | options={'step_size': self.opt['step_size']}, 52 | adjoint_method=self.opt['adjoint_method'], 53 | adjoint_options={'step_size': self.opt['adjoint_step_size']}, 54 | atol=self.atol, 55 | rtol=self.rtol, 56 | adjoint_atol=self.atol_adjoint, 57 | adjoint_rtol=self.rtol_adjoint) 58 | else: 59 | state_dt = integrator( 60 | func, state, t, 61 | method=self.opt['method'], 62 | options={'step_size': self.opt['step_size']}, 63 | atol=self.atol, 64 | rtol=self.rtol) 65 | 66 | 67 | z = state_dt[1] 68 | return z 69 | 70 | def __repr__(self): 71 | return self.__class__.__name__ + '( Time Interval ' + str(self.t[0].item()) + ' -> ' + str(self.t[1].item()) \ 72 | + ")" 73 | 74 | -------------------------------------------------------------------------------- /src/discrete_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | from torch_geometric.nn import GCNConv, GATConv 6 | from torch_scatter import scatter 7 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 8 | import torch_sparse 9 | class Lap_GCN(nn.Module): 10 | def __init__(self, nfeat, nhid, nclass, dropout, nlayers, graph_size, dt=1., alpha=1., gamma=1., res_version=1, ): 11 | super(Lap_GCN, self).__init__() 12 | self.dropout = dropout 13 | self.nhid = nhid 14 | self.nlayers = nlayers 15 | self.enc = nn.Linear(nfeat,nhid) 16 | self.conv = GCNConv(nhid, nhid) 17 | self.dec = nn.Linear(nhid,nclass) 18 | self.res = nn.Linear(nhid,nhid) 19 | if(res_version==1): 20 | self.residual = self.res_connection_v1 21 | else: 22 | self.residual = self.res_connection_v2 23 | self.dt = dt 24 | self.act_fn = nn.ReLU() 25 | self.alpha = alpha 26 | self.gamma = gamma 27 | self.graph_size = graph_size 28 | self.epsilons = nn.ParameterList() 29 | for i in range(self.nlayers): 30 | self.epsilons.append(nn.Parameter(torch.zeros((self.nhid, 1)))) 31 | # print("self.epsilons: ", self.epsilons[0].shape) 32 | # print("self.graph_size",self.graph_size) 33 | 34 | self.reset_params() 35 | 36 | 37 | 38 | def reset_params(self): 39 | for name, param in self.named_parameters(): 40 | if 'weight' in name and 'emb' not in name and 'out' not in name: 41 | stdv = 1. / math.sqrt(self.nhid) 42 | param.data.uniform_(-stdv, stdv) 43 | 44 | def res_connection_v1(self, X): 45 | res = - self.res(self.conv.lin(X)) 46 | return res 47 | 48 | def res_connection_v2(self, X): 49 | res = - self.conv.lin(X) + self.res(X) 50 | return res 51 | 52 | def forward(self, data): 53 | input = data.x 54 | edge_index = data.edge_index 55 | input = F.dropout(input, self.dropout, training=self.training) 56 | X = self.act_fn(self.enc(input)) 57 | 58 | 59 | X = F.dropout(X, self.dropout, training=self.training) 60 | X0 =X 61 | for i in range(self.nlayers): 62 | 63 | # coeff = (1 + torch.tanh(self.epsilons[i]).tile(self.graph_size, 1)) 64 | coeff = (1 + torch.tanh(self.epsilons[i])).T 65 | coeff = coeff.tile(self.graph_size, 1) 66 | # print("coeff shape: ", coeff.shape) 67 | # print("X0 shape: ", X0.shape) 68 | X0 = X0 * coeff + self.dt * (self.act_fn(self.conv(X, edge_index) + self.residual(X)) - self.alpha * X) 69 | X = X0 70 | 71 | 72 | # X = X + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*X) 73 | # X = X + self.dt * (self.act_fn(self.conv(X, edge_index)) - self.alpha * X) 74 | # X = X + self.dt * (self.act_fn(self.conv(X, edge_index) + self.residual(X)) ) 75 | X = F.dropout(X, self.dropout, training=self.training) 76 | 77 | X = self.dec(X) 78 | 79 | return X 80 | 81 | 82 | class Lap_conv_GCN(nn.Module): 83 | def __init__(self, nfeat, nhid, nclass, dropout, nlayers,graph_size, dt=1., alpha=1., gamma=1., res_version=1): 84 | super(Lap_conv_GCN, self).__init__() 85 | self.dropout = dropout 86 | self.nhid = nhid 87 | self.nlayers = nlayers 88 | self.enc = nn.Linear(nfeat,nhid) 89 | self.conv = GCNConv(nhid, nhid) 90 | self.dec = nn.Linear(nhid,nclass) 91 | self.res = nn.Linear(nhid,nhid) 92 | if(res_version==1): 93 | self.residual = self.res_connection_v1 94 | else: 95 | self.residual = self.res_connection_v2 96 | self.dt = dt 97 | self.act_fn = nn.ReLU() 98 | self.alpha = alpha 99 | self.gamma = gamma 100 | self.reset_params() 101 | 102 | self.gate = nn.Linear(2 * nhid, 1) 103 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 104 | 105 | self.lin1 = nn.Linear(nhid, nhid) 106 | nn.init.xavier_normal_(self.lin1.weight, gain=1.414) 107 | 108 | self.lin2 = nn.Linear(nhid * 2, nhid) 109 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 110 | 111 | self.weight_low, self.weight_high, self.weight_mlp = ( 112 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 113 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 114 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 115 | ) 116 | 117 | self.output_low, self.output_high, self.output_mlp = ( 118 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 119 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 120 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 121 | ) 122 | 123 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 124 | 125 | self.weight_low.data.uniform_(-stdv, stdv) 126 | self.weight_high.data.uniform_(-stdv, stdv) 127 | self.weight_mlp.data.uniform_(-stdv, stdv) 128 | 129 | self.output_low.data.uniform_(-stdv, stdv) 130 | self.output_high.data.uniform_(-stdv, stdv) 131 | self.output_mlp.data.uniform_(-stdv, stdv) 132 | 133 | self.epsilons = nn.ParameterList() 134 | for i in range(self.nlayers): 135 | self.epsilons.append(nn.Parameter(torch.zeros((self.nhid, 1)))) 136 | self.lamda = nn.ParameterList() 137 | for i in range(self.nlayers): 138 | self.lamda .append(nn.Parameter(torch.zeros((self.nhid, 1)))) 139 | self.graph_size = graph_size 140 | 141 | 142 | 143 | def reset_params(self): 144 | for name, param in self.named_parameters(): 145 | if 'weight' in name and 'emb' not in name and 'out' not in name: 146 | stdv = 1. / math.sqrt(self.nhid) 147 | param.data.uniform_(-stdv, stdv) 148 | 149 | def res_connection_v1(self, X): 150 | res = - self.res(self.conv.lin(X)) 151 | return res 152 | 153 | def res_connection_v2(self, X): 154 | res = - self.conv.lin(X) + self.res(X) 155 | return res 156 | 157 | def forward(self, data): 158 | input = data.x 159 | # edge_index = data.edge_index 160 | input = F.dropout(input, self.dropout, training=self.training) 161 | X = self.act_fn(self.enc(input)) 162 | self.edge_index ,self.edge_weight = add_remaining_self_loops (data.edge_index, data.edge_weight) 163 | edge_index = self.edge_index 164 | 165 | 166 | 167 | X = F.dropout(X, self.dropout, training=self.training) 168 | 169 | for i in range(self.nlayers): 170 | # X = X + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*X - self.gamma*X) 171 | 172 | src = X[self.edge_index[0, :], :] 173 | dst_k = X[self.edge_index[1, :], :] 174 | h2 = torch.cat([src, dst_k], dim=1) 175 | attention1 = torch.tanh(self.gate(h2)).squeeze() 176 | 177 | # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 178 | x_new = torch.tanh(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 179 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 180 | 181 | # ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new) 182 | # ax3 = scatter(ax3, self.edge_index[1, :].T, dim=0, reduce="sum") 183 | ax2 = self.act_fn(self.conv(X, edge_index) + self.residual(X)) 184 | 185 | # print("X: ", X.shape) 186 | # print("x_new: ", x_new.shape) 187 | # print("ax3: ", ax3.shape) 188 | # print("ax2: ", ax2.shape) 189 | 190 | # ax = torch.mm(ax3, self.output_high) + torch.mm(ax2, self.output_low) 191 | 192 | # ax = torch.cat([X, ax2], axis=1) 193 | # ax = self.lin2(ax) 194 | coeff_lamda = (torch.tanh(self.lamda[i])).T 195 | coeff_lamda = coeff_lamda.tile(self.graph_size, 1) 196 | 197 | ax = ax2 + coeff_lamda * ax3 198 | 199 | ax = ax - self.alpha * X 200 | 201 | coeff = (1 + torch.tanh(self.epsilons[i])).T 202 | coeff = coeff.tile(self.graph_size, 1) 203 | 204 | X = X * coeff + self.dt* ax 205 | 206 | X = F.dropout(X, self.dropout, training=self.training) 207 | 208 | X = self.dec(X) 209 | 210 | return X -------------------------------------------------------------------------------- /src/early_stop_solver.py: -------------------------------------------------------------------------------- 1 | import torchdiffeq 2 | from torchdiffeq._impl.dopri5 import _DORMAND_PRINCE_SHAMPINE_TABLEAU, DPS_C_MID 3 | from torchdiffeq._impl.solvers import FixedGridODESolver 4 | import torch 5 | from torchdiffeq._impl.misc import _check_inputs, _flat_to_shape 6 | import torch.nn.functional as F 7 | import copy 8 | 9 | from torchdiffeq._impl.interp import _interp_evaluate 10 | from torchdiffeq._impl.rk_common import RKAdaptiveStepsizeODESolver, rk4_alt_step_func 11 | from ogb.nodeproppred import Evaluator 12 | 13 | 14 | def run_evaluator(evaluator, data, y_pred): 15 | train_acc = evaluator.eval({ 16 | 'y_true': data.y[data.train_mask], 17 | 'y_pred': y_pred[data.train_mask], 18 | })['acc'] 19 | valid_acc = evaluator.eval({ 20 | 'y_true': data.y[data.val_mask], 21 | 'y_pred': y_pred[data.val_mask], 22 | })['acc'] 23 | test_acc = evaluator.eval({ 24 | 'y_true': data.y[data.test_mask], 25 | 'y_pred': y_pred[data.test_mask], 26 | })['acc'] 27 | return train_acc, valid_acc, test_acc 28 | 29 | 30 | class EarlyStopDopri5(RKAdaptiveStepsizeODESolver): 31 | order = 5 32 | tableau = _DORMAND_PRINCE_SHAMPINE_TABLEAU 33 | mid = DPS_C_MID 34 | 35 | def __init__(self, func, y0, rtol, atol, opt, **kwargs): 36 | super(EarlyStopDopri5, self).__init__(func, y0, rtol, atol, **kwargs) 37 | 38 | self.lf = torch.nn.CrossEntropyLoss() 39 | self.m2_weight = None 40 | self.m2_bias = None 41 | self.data = None 42 | self.best_val = 0 43 | self.best_test = 0 44 | self.max_test_steps = opt['max_test_steps'] 45 | self.best_time = 0 46 | self.ode_test = self.test_OGB if opt['dataset'] == 'ogbn-arxiv' else self.test 47 | self.dataset = opt['dataset'] 48 | if opt['dataset'] == 'ogbn-arxiv': 49 | self.lf = torch.nn.functional.nll_loss 50 | self.evaluator = Evaluator(name=opt['dataset']) 51 | 52 | def set_accs(self, train, val, test, time): 53 | self.best_train = train 54 | self.best_val = val 55 | self.best_test = test 56 | self.best_time = time.item() 57 | 58 | def integrate(self, t): 59 | solution = torch.empty(len(t), *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device) 60 | solution[0] = self.y0 61 | t = t.to(self.dtype) 62 | self._before_integrate(t) 63 | new_t = t 64 | for i in range(1, len(t)): 65 | new_t, y = self.advance(t[i]) 66 | solution[i] = y 67 | return new_t, solution 68 | 69 | def advance(self, next_t): 70 | """ 71 | Takes steps dt to get to the next user specified time point next_t. In practice this goes past next_t and then interpolates 72 | :param next_t: 73 | :return: The state, x(next_t) 74 | """ 75 | n_steps = 0 76 | while next_t > self.rk_state.t1 and n_steps < self.max_test_steps: 77 | self.rk_state = self._adaptive_step(self.rk_state) 78 | n_steps += 1 79 | train_acc, val_acc, test_acc = self.evaluate(self.rk_state) 80 | if val_acc > self.best_val: 81 | self.set_accs(train_acc, val_acc, test_acc, self.rk_state.t1) 82 | new_t = next_t 83 | if n_steps < self.max_test_steps: 84 | return (new_t, _interp_evaluate(self.rk_state.interp_coeff, self.rk_state.t0, self.rk_state.t1, next_t)) 85 | else: 86 | return (new_t, _interp_evaluate(self.rk_state.interp_coeff, self.rk_state.t0, self.rk_state.t1, self.rk_state.t1)) 87 | 88 | @torch.no_grad() 89 | def test(self, logits): 90 | accs = [] 91 | for _, mask in self.data('train_mask', 'val_mask', 'test_mask'): 92 | pred = logits[mask].max(1)[1] 93 | acc = pred.eq(self.data.y[mask]).sum().item() / mask.sum().item() 94 | accs.append(acc) 95 | return accs 96 | 97 | @torch.no_grad() 98 | def test_OGB(self, logits): 99 | evaluator = self.evaluator 100 | data = self.data 101 | y_pred = logits.argmax(dim=-1, keepdim=True) 102 | train_acc, valid_acc, test_acc = run_evaluator(evaluator, data, y_pred) 103 | return [train_acc, valid_acc, test_acc] 104 | 105 | @torch.no_grad() 106 | def evaluate(self, rkstate): 107 | # Activation. 108 | z = rkstate.y1 109 | if not self.m2_weight.shape[1] == z.shape[1]: # system has been augmented 110 | z = torch.split(z, self.m2_weight.shape[1], dim=1)[0] 111 | z = F.relu(z) 112 | z = F.linear(z, self.m2_weight, self.m2_bias) 113 | t0, t1 = float(self.rk_state.t0), float(self.rk_state.t1) 114 | if self.dataset == 'ogbn-arxiv': 115 | z = z.log_softmax(dim=-1) 116 | loss = self.lf(z[self.data.train_mask], self.data.y.squeeze()[self.data.train_mask]) 117 | else: 118 | loss = self.lf(z[self.data.train_mask], self.data.y[self.data.train_mask]) 119 | train_acc, val_acc, test_acc = self.ode_test(z) 120 | log = 'ODE eval t0 {:.3f}, t1 {:.3f} Loss: {:.4f}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' 121 | # print(log.format(t0, t1, loss, train_acc, val_acc, tmp_test_acc)) 122 | return train_acc, val_acc, test_acc 123 | 124 | def set_m2(self, m2): 125 | self.m2 = copy.deepcopy(m2) 126 | 127 | def set_data(self, data): 128 | if self.data is None: 129 | self.data = data 130 | 131 | class EarlyStopRK4(FixedGridODESolver): 132 | order = 4 133 | 134 | def __init__(self, func, y0, opt, eps=0, **kwargs): 135 | super(EarlyStopRK4, self).__init__(func, y0, **kwargs) 136 | self.eps = torch.as_tensor(eps, dtype=self.dtype, device=self.device) 137 | self.lf = torch.nn.CrossEntropyLoss() 138 | self.m2_weight = None 139 | self.m2_bias = None 140 | self.data = None 141 | self.best_val = 0 142 | self.best_test = 0 143 | self.best_time = 0 144 | self.ode_test = self.test_OGB if opt['dataset'] == 'ogbn-arxiv' else self.test 145 | self.dataset = opt['dataset'] 146 | if opt['dataset'] == 'ogbn-arxiv': 147 | self.lf = torch.nn.functional.nll_loss 148 | self.evaluator = Evaluator(name=opt['dataset']) 149 | 150 | def _step_func(self, func, t, dt, t1, y): 151 | ver = torchdiffeq.__version__[0] + torchdiffeq.__version__[2] + torchdiffeq.__version__[4] 152 | if int(ver) >= 22: # '0.2.2' 153 | return rk4_alt_step_func(func, t + self.eps, dt - 2 * self.eps, t1, y) 154 | else: 155 | return rk4_alt_step_func(func, t + self.eps, dt - 2 * self.eps, y) 156 | 157 | def set_accs(self, train, val, test, time): 158 | self.best_train = train 159 | self.best_val = val 160 | self.best_test = test 161 | self.best_time = time.item() 162 | 163 | def integrate(self, t): 164 | time_grid = self.grid_constructor(self.func, self.y0, t) 165 | assert time_grid[0] == t[0] and time_grid[-1] == t[-1] 166 | 167 | solution = torch.empty(len(t), *self.y0.shape, dtype=self.y0.dtype, device=self.y0.device) 168 | solution[0] = self.y0 169 | 170 | j = 1 171 | y0 = self.y0 172 | for t0, t1 in zip(time_grid[:-1], time_grid[1:]): 173 | dy = self._step_func(self.func, t0, t1 - t0, t1, y0) 174 | y1 = y0 + dy 175 | train_acc, val_acc, test_acc = self.evaluate(y1, t0, t1) 176 | if val_acc > self.best_val: 177 | self.set_accs(train_acc, val_acc, test_acc, t1) 178 | 179 | while j < len(t) and t1 >= t[j]: 180 | solution[j] = self._linear_interp(t0, t1, y0, y1, t[j]) 181 | j += 1 182 | y0 = y1 183 | 184 | return t1, solution 185 | 186 | @torch.no_grad() 187 | def test(self, logits): 188 | accs = [] 189 | for _, mask in self.data('train_mask', 'val_mask', 'test_mask'): 190 | pred = logits[mask].max(1)[1] 191 | acc = pred.eq(self.data.y[mask]).sum().item() / mask.sum().item() 192 | accs.append(acc) 193 | return accs 194 | 195 | @torch.no_grad() 196 | def test_OGB(self, logits): 197 | evaluator = self.evaluator 198 | data = self.data 199 | y_pred = logits.argmax(dim=-1, keepdim=True) 200 | train_acc, valid_acc, test_acc = run_evaluator(evaluator, data, y_pred) 201 | return [train_acc, valid_acc, test_acc] 202 | 203 | @torch.no_grad() 204 | def evaluate(self, z, t0, t1): 205 | # Activation. 206 | if not self.m2_weight.shape[1] == z.shape[1]: # system has been augmented 207 | z = torch.split(z, self.m2_weight.shape[1], dim=1)[0] 208 | z = F.relu(z) 209 | z = F.linear(z, self.m2_weight, self.m2_bias) 210 | if self.dataset == 'ogbn-arxiv': 211 | z = z.log_softmax(dim=-1) 212 | loss = self.lf(z[self.data.train_mask], self.data.y.squeeze()[self.data.train_mask]) 213 | else: 214 | loss = self.lf(z[self.data.train_mask], self.data.y[self.data.train_mask]) 215 | train_acc, val_acc, test_acc = self.ode_test(z) 216 | log = 'ODE eval t0 {:.3f}, t1 {:.3f} Loss: {:.4f}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}' 217 | # print(log.format(t0, t1, loss, train_acc, val_acc, tmp_test_acc)) 218 | return train_acc, val_acc, test_acc 219 | 220 | def set_m2(self, m2): 221 | self.m2 = copy.deepcopy(m2) 222 | 223 | def set_data(self, data): 224 | if self.data is None: 225 | self.data = data 226 | 227 | 228 | SOLVERS = { 229 | 'dopri5': EarlyStopDopri5, 230 | 'rk4': EarlyStopRK4 231 | } 232 | 233 | 234 | class EarlyStopInt(torch.nn.Module): 235 | def __init__(self, t, opt, device=None): 236 | super(EarlyStopInt, self).__init__() 237 | self.device = device 238 | self.solver = None 239 | self.data = None 240 | self.max_test_steps = opt['max_test_steps'] 241 | self.m2_weight = None 242 | self.m2_bias = None 243 | self.opt = opt 244 | self.t = torch.tensor([0, opt['earlystopxT'] * t], dtype=torch.float).to(self.device) 245 | 246 | def __call__(self, func, y0, t, method=None, rtol=1e-7, atol=1e-9, 247 | adjoint_method="dopri5", adjoint_atol=1e-9, adjoint_rtol=1e-7, options=None): 248 | """Integrate a system of ordinary differential equations. 249 | 250 | Solves the initial value problem for a non-stiff system of first order ODEs: 251 | ``` 252 | dy/dt = func(t, y), y(t[0]) = y0 253 | ``` 254 | where y is a Tensor of any shape. 255 | 256 | Output dtypes and numerical precision are based on the dtypes of the inputs `y0`. 257 | 258 | Args: 259 | func: Function that maps a Tensor holding the state `y` and a scalar Tensor 260 | `t` into a Tensor of state derivatives with respect to time. 261 | y0: N-D Tensor giving starting value of `y` at time point `t[0]`. May 262 | have any floating point or complex dtype. 263 | t: 1-D Tensor holding a sequence of time points for which to solve for 264 | `y`. The initial time point should be the first element of this sequence, 265 | and each time must be larger than the previous time. May have any floating 266 | point dtype. Converted to a Tensor with float64 dtype. 267 | rtol: optional float64 Tensor specifying an upper bound on relative error, 268 | per element of `y`. 269 | atol: optional float64 Tensor specifying an upper bound on absolute error, 270 | per element of `y`. 271 | method: optional string indicating the integration method to use. 272 | options: optional dict of configuring options for the indicated integration 273 | method. Can only be provided if a `method` is explicitly set. 274 | name: Optional name for this operation. 275 | 276 | Returns: 277 | y: Tensor, where the first dimension corresponds to different 278 | time points. Contains the solved value of y for each desired time point in 279 | `t`, with the initial value `y0` being the first element along the first 280 | dimension. 281 | 282 | Raises: 283 | ValueError: if an invalid `method` is provided. 284 | TypeError: if `options` is supplied without `method`, or if `t` or `y0` has 285 | an invalid dtype. 286 | """ 287 | method = self.opt['method'] 288 | # assert method in ['rk4', 'dopri5'], "Only dopri5 and rk4 implemented with early stopping" 289 | 290 | ver = torchdiffeq.__version__ 291 | if int(ver[0] + ver[2] + ver[4]) >= 20: # 0.2.0 change of signature on this release for event_fn 292 | event_fn = None 293 | shapes, func, y0, t, rtol, atol, method, options, event_fn, t_is_reversed = _check_inputs(func, y0, self.t, rtol, 294 | atol, method, options, 295 | event_fn, SOLVERS) 296 | else: 297 | shapes, func, y0, t, rtol, atol, method, options = _check_inputs(func, y0, self.t, rtol, atol, method, options, 298 | SOLVERS) 299 | 300 | self.solver = SOLVERS[method](func, y0, rtol=rtol, atol=atol, opt=self.opt, **options) 301 | if self.solver.data is None: 302 | self.solver.data = self.data 303 | self.solver.m2_weight = self.m2_weight 304 | self.solver.m2_bias = self.m2_bias 305 | t, solution = self.solver.integrate(t) 306 | if shapes is not None: 307 | solution = _flat_to_shape(solution, (len(t),), shapes) 308 | return solution 309 | -------------------------------------------------------------------------------- /src/function_GAT_attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops 6 | from data import get_dataset 7 | from utils import MaxNFEException 8 | from base_classes import ODEFunc 9 | 10 | 11 | class ODEFuncAtt(ODEFunc): 12 | 13 | def __init__(self, in_features, out_features, opt, data, device): 14 | super(ODEFuncAtt, self).__init__(opt, data, device) 15 | 16 | if opt['self_loop_weight'] > 0: 17 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 18 | fill_value=opt['self_loop_weight']) 19 | else: 20 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 21 | 22 | self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt, 23 | device).to(device) 24 | try: 25 | self.attention_dim = opt['attention_dim'] 26 | except KeyError: 27 | self.attention_dim = out_features 28 | 29 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 30 | self.d_k = self.attention_dim // opt['heads'] 31 | 32 | def multiply_attention(self, x, attention, wx): 33 | if self.opt['mix_features']: 34 | wx = torch.mean(torch.stack( 35 | [torch_sparse.spmm(self.edge_index, attention[:, idx], wx.shape[0], wx.shape[0], wx) for idx in 36 | range(self.opt['heads'])], dim=0), 37 | dim=0) 38 | ax = torch.mm(wx, self.multihead_att_layer.Wout) 39 | else: 40 | ax = torch.mean(torch.stack( 41 | [torch_sparse.spmm(self.edge_index, attention[:, idx], x.shape[0], x.shape[0], x) for idx in 42 | range(self.opt['heads'])], dim=0), 43 | dim=0) 44 | return ax 45 | 46 | def forward(self, t, x): # t is needed when called by the integrator 47 | 48 | if self.nfe > self.opt["max_nfe"]: 49 | raise MaxNFEException 50 | 51 | self.nfe += 1 52 | 53 | attention, wx = self.multihead_att_layer(x, self.edge_index) 54 | ax = self.multiply_attention(x, attention, wx) 55 | # todo would be nice if this was more efficient 56 | 57 | if not self.opt['no_alpha_sigmoid']: 58 | alpha = torch.sigmoid(self.alpha_train) 59 | else: 60 | alpha = self.alpha_train 61 | 62 | f = alpha * (ax - x) 63 | if self.opt['add_source']: 64 | f = f + self.beta_train * self.x0 65 | return f 66 | 67 | def __repr__(self): 68 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 69 | 70 | 71 | class SpGraphAttentionLayer(nn.Module): 72 | """ 73 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 74 | """ 75 | 76 | def __init__(self, in_features, out_features, opt, device, concat=True): 77 | super(SpGraphAttentionLayer, self).__init__() 78 | self.in_features = in_features 79 | self.out_features = out_features 80 | self.alpha = opt['leaky_relu_slope'] 81 | self.concat = concat 82 | self.device = device 83 | self.opt = opt 84 | self.h = opt['heads'] 85 | 86 | try: 87 | self.attention_dim = opt['attention_dim'] 88 | except KeyError: 89 | self.attention_dim = out_features 90 | 91 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 92 | self.d_k = self.attention_dim // opt['heads'] 93 | 94 | self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device) 95 | nn.init.xavier_normal_(self.W.data, gain=1.414) 96 | 97 | self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device) 98 | nn.init.xavier_normal_(self.Wout.data, gain=1.414) 99 | 100 | self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device) 101 | nn.init.xavier_normal_(self.a.data, gain=1.414) 102 | 103 | self.leakyrelu = nn.LeakyReLU(self.alpha) 104 | 105 | def forward(self, x, edge): 106 | wx = torch.mm(x, self.W) # h: N x out 107 | h = wx.view(-1, self.h, self.d_k) 108 | h = h.transpose(1, 2) 109 | 110 | # Self-attention on the nodes - Shared attention mechanism 111 | edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to( 112 | self.device) # edge: 2*D x E 113 | edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device) 114 | attention = softmax(edge_e, edge[self.opt['attention_norm_idx']]) 115 | return attention, wx 116 | 117 | def __repr__(self): 118 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 119 | 120 | 121 | if __name__ == '__main__': 122 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 123 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0, 124 | 'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False} 125 | dataset = get_dataset(opt, '../data', False) 126 | t = 1 127 | func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device) 128 | out = func(t, dataset.data.x) 129 | -------------------------------------------------------------------------------- /src/function_GAT_convection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | from data import get_dataset 7 | from utils import MaxNFEException 8 | from base_classes import ODEFunc 9 | from torch_scatter import scatter 10 | import math 11 | from torch_geometric.utils import get_laplacian 12 | import torch.nn.functional as F 13 | 14 | class ODEFuncAttConv(ODEFunc): 15 | 16 | def __init__(self, in_features, out_features, opt, data, device): 17 | super(ODEFuncAttConv, self).__init__(opt, data, device) 18 | 19 | if opt['self_loop_weight'] > 0: 20 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 21 | fill_value=opt['self_loop_weight']) 22 | else: 23 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 24 | 25 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 26 | 27 | self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt, 28 | device).to(device) 29 | try: 30 | self.attention_dim = opt['attention_dim'] 31 | except KeyError: 32 | self.attention_dim = out_features 33 | 34 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 35 | self.d_k = self.attention_dim // opt['heads'] 36 | 37 | self.device = device 38 | 39 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 40 | 41 | self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym') 42 | self.edge_index_lap = self.edge_index_lap.to(device) 43 | self.edge_weight_lap = self.edge_weight_lap.to(device) 44 | 45 | self.gate = nn.Linear(2 * in_features, 1) 46 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 47 | 48 | 49 | 50 | self.lin2 = nn.Linear(in_features * 2, out_features) 51 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 52 | 53 | self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)) 54 | 55 | 56 | self.output_low, self.output_high = ( 57 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 58 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 59 | ) 60 | 61 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 62 | 63 | 64 | self.weight_mlp.data.uniform_(-stdv, stdv) 65 | 66 | self.output_low.data.uniform_(-stdv, stdv) 67 | self.output_high.data.uniform_(-stdv, stdv) 68 | 69 | 70 | self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim']) 71 | self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim']) 72 | 73 | self.lamda1 = nn.Parameter(torch.tensor(0.0),requires_grad=True) 74 | 75 | 76 | def multiply_attention(self, x, attention, wx): 77 | if self.opt['mix_features']: 78 | wx = torch.mean(torch.stack( 79 | [torch_sparse.spmm(self.edge_index, attention[:, idx], wx.shape[0], wx.shape[0], wx) for idx in 80 | range(self.opt['heads'])], dim=0), 81 | dim=0) 82 | ax = torch.mm(wx, self.multihead_att_layer.Wout) 83 | else: 84 | ax = torch.mean(torch.stack( 85 | [torch_sparse.spmm(self.edge_index, attention[:, idx], x.shape[0], x.shape[0], x) for idx in 86 | range(self.opt['heads'])], dim=0), 87 | dim=0) 88 | return ax 89 | 90 | def forward(self, t, x): # t is needed when called by the integrator 91 | 92 | if self.nfe > self.opt["max_nfe"]: 93 | raise MaxNFEException 94 | 95 | self.nfe += 1 96 | 97 | attention, wx = self.multihead_att_layer(x, self.edge_index) 98 | ax2 = self.multiply_attention(x, attention, wx) 99 | # todo would be nice if this was more efficient 100 | 101 | 102 | 103 | src = x[self.edge_index[0, :], :] 104 | dst_k = x[self.edge_index[1, :], :] 105 | 106 | 107 | x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 108 | 109 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 110 | 111 | ax = self.lamda1 * torch.mm(ax3, self.output_high) +torch.mm(ax2, self.output_low) 112 | 113 | 114 | 115 | 116 | 117 | ax = torch.cat([x, ax], dim=1) 118 | ax = F.relu(self.lin2(ax)) 119 | 120 | if not self.opt['no_alpha_sigmoid']: 121 | alpha = torch.sigmoid(self.alpha_train) 122 | else: 123 | alpha = self.alpha_train 124 | 125 | f = alpha * (ax - x) 126 | if self.opt['add_source']: 127 | f = f + self.beta_train * self.x0 128 | return f 129 | 130 | def __repr__(self): 131 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 132 | 133 | 134 | class SpGraphAttentionLayer(nn.Module): 135 | """ 136 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 137 | """ 138 | 139 | def __init__(self, in_features, out_features, opt, device, concat=True): 140 | super(SpGraphAttentionLayer, self).__init__() 141 | self.in_features = in_features 142 | self.out_features = out_features 143 | self.alpha = opt['leaky_relu_slope'] 144 | self.concat = concat 145 | self.device = device 146 | self.opt = opt 147 | self.h = opt['heads'] 148 | 149 | try: 150 | self.attention_dim = opt['attention_dim'] 151 | except KeyError: 152 | self.attention_dim = out_features 153 | 154 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 155 | self.d_k = self.attention_dim // opt['heads'] 156 | 157 | self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device) 158 | nn.init.xavier_normal_(self.W.data, gain=1.414) 159 | 160 | self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device) 161 | nn.init.xavier_normal_(self.Wout.data, gain=1.414) 162 | 163 | self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device) 164 | nn.init.xavier_normal_(self.a.data, gain=1.414) 165 | 166 | self.leakyrelu = nn.LeakyReLU(self.alpha) 167 | 168 | def forward(self, x, edge): 169 | wx = torch.mm(x, self.W) # h: N x out 170 | h = wx.view(-1, self.h, self.d_k) 171 | h = h.transpose(1, 2) 172 | 173 | # Self-attention on the nodes - Shared attention mechanism 174 | edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to( 175 | self.device) # edge: 2*D x E 176 | edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device) 177 | attention = softmax(edge_e, edge[self.opt['attention_norm_idx']]) 178 | return attention, wx 179 | 180 | def __repr__(self): 181 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 182 | 183 | 184 | if __name__ == '__main__': 185 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 186 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0, 187 | 'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False} 188 | dataset = get_dataset(opt, '../data', False) 189 | t = 1 190 | func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device) 191 | out = func(t, dataset.data.x) 192 | -------------------------------------------------------------------------------- /src/function_beltrami_convection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | import torch.nn.functional as F 11 | from torch_scatter import scatter 12 | import math 13 | from torch_geometric.utils import get_laplacian 14 | 15 | class ODEFuncBeltramiCONV(ODEFunc): 16 | 17 | def __init__(self, in_features, out_features, opt, data, device): 18 | super(ODEFuncBeltramiCONV, self).__init__(opt, data, device) 19 | 20 | if opt['self_loop_weight'] > 0: 21 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 22 | fill_value=opt['self_loop_weight']) 23 | else: 24 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 25 | # print("self.edge_index: ", self.edge_index.shape) 26 | self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt,device).to( 27 | device) 28 | self.device = device 29 | 30 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 31 | 32 | self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym') 33 | self.edge_index_lap = self.edge_index_lap.to(device) 34 | self.edge_weight_lap = self.edge_weight_lap.to(device) 35 | 36 | self.gate = nn.Linear(2 * in_features, 1) 37 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 38 | 39 | self.lin1 = nn.Linear(in_features, out_features) 40 | nn.init.xavier_normal_(self.lin1.weight, gain=1.414) 41 | 42 | self.lin2 = nn.Linear(in_features * 2, out_features) 43 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 44 | 45 | self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)) 46 | 47 | 48 | self.output_low, self.output_high = ( 49 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 50 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 51 | ) 52 | 53 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 54 | 55 | 56 | self.weight_mlp.data.uniform_(-stdv, stdv) 57 | 58 | self.output_low.data.uniform_(-stdv, stdv) 59 | self.output_high.data.uniform_(-stdv, stdv) 60 | 61 | 62 | self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim']) 63 | self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim']) 64 | 65 | def multiply_attention(self, x, attention=None, v=None): 66 | num_heads = 4 67 | mix_features = 0 68 | if mix_features: 69 | vx = torch.mean(torch.stack( 70 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 71 | range(num_heads)], dim=0), 72 | dim=0) 73 | ax = self.multihead_att_layer.Wout(vx) 74 | else: 75 | mean_attention = attention.mean(dim=1) 76 | # mean_attention = self.edge_weight 77 | grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x 78 | grad_x_abs = torch.abs(grad_x) 79 | grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1)) 80 | grad_x_norm_inv = 1 / grad_x_norm 81 | gu = grad_x_norm_inv[self.edge_index[0, :]] 82 | gv = grad_x_norm_inv[self.edge_index[1, :]] 83 | attention2 = gu * gu + gu * gv 84 | new_attn = mean_attention * softmax(attention2, self.edge_index[0]) 85 | # Da = torch.diag(grad_x_norm_inv) 86 | W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce() 87 | rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten() 88 | diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device) 89 | dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x) 90 | ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x) 91 | return ax - dx 92 | 93 | def forward(self, t, x): # t is needed when called by the integrator 94 | 95 | attention, values = self.multihead_att_layer(x, self.edge_index) 96 | ax = self.multiply_attention(x, attention, values) 97 | 98 | 99 | src = x[self.edge_index[0, :], :] 100 | dst_k = x[self.edge_index[1, :], :] 101 | # h2 = torch.cat([src, dst_k], dim=1) 102 | # attention1 = torch.tanh(self.gate(h2)).squeeze() 103 | 104 | x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 105 | # print("x_new: ", x_new.shape) 106 | 107 | # ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new) 108 | 109 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 110 | 111 | ax3 = self.bn_in_1(ax3) 112 | ax = self.bn_in_2(ax) 113 | 114 | ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low) 115 | 116 | 117 | 118 | ax = torch.cat([x, ax], axis=1) 119 | ax = self.lin2(ax) 120 | 121 | if not self.opt['no_alpha_sigmoid']: 122 | alpha = torch.sigmoid(self.alpha_train) 123 | else: 124 | alpha = self.alpha_train 125 | f = alpha * (ax - x) 126 | if self.opt['add_source']: 127 | f = f + self.beta_train * self.x0 128 | 129 | # f = ax - x 130 | return f 131 | 132 | def __repr__(self): 133 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 134 | 135 | 136 | class SpGraphAttentionLayer(nn.Module): 137 | """ 138 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 139 | """ 140 | 141 | def __init__(self, in_features, out_features, opt, device, concat=True): 142 | super(SpGraphAttentionLayer, self).__init__() 143 | self.in_features = in_features 144 | self.out_features = out_features 145 | self.alpha = opt['leaky_relu_slope'] 146 | self.concat = concat 147 | self.device = device 148 | self.opt = opt 149 | self.h = opt['heads'] 150 | 151 | try: 152 | self.attention_dim = opt['attention_dim'] 153 | except KeyError: 154 | self.attention_dim = out_features 155 | 156 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 157 | self.d_k = self.attention_dim // opt['heads'] 158 | 159 | self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device) 160 | nn.init.xavier_normal_(self.W.data, gain=1.414) 161 | 162 | self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device) 163 | nn.init.xavier_normal_(self.Wout.data, gain=1.414) 164 | 165 | self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device) 166 | nn.init.xavier_normal_(self.a.data, gain=1.414) 167 | 168 | self.leakyrelu = nn.LeakyReLU(self.alpha) 169 | 170 | def forward(self, x, edge): 171 | wx = torch.mm(x, self.W) # h: N x out 172 | h = wx.view(-1, self.h, self.d_k) 173 | h = h.transpose(1, 2) 174 | 175 | # Self-attention on the nodes - Shared attention mechanism 176 | edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to( 177 | self.device) # edge: 2*D x E 178 | edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device) 179 | attention = softmax(edge_e, edge[self.opt['attention_norm_idx']]) 180 | return attention, wx 181 | 182 | def __repr__(self): 183 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 184 | 185 | 186 | if __name__ == '__main__': 187 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 188 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 189 | 'attention_norm_idx': 0, 'add_source': False, 190 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 191 | } 192 | dataset = get_dataset(opt, '../data', False) 193 | t = 1 194 | func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device) 195 | out = func(t, dataset.data.x) 196 | -------------------------------------------------------------------------------- /src/function_beltrami_gat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | import torch.nn.functional as F 11 | from torch_scatter import scatter 12 | import math 13 | from torch_geometric.utils import get_laplacian 14 | 15 | class ODEFuncBeltramiGAT(ODEFunc): 16 | 17 | def __init__(self, in_features, out_features, opt, data, device): 18 | super(ODEFuncBeltramiGAT, self).__init__(opt, data, device) 19 | 20 | if opt['self_loop_weight'] > 0: 21 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 22 | fill_value=opt['self_loop_weight']) 23 | else: 24 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 25 | # print("self.edge_index: ", self.edge_index.shape) 26 | self.multihead_att_layer = SpGraphAttentionLayer(in_features, out_features, opt,device,).to( 27 | device) 28 | self.device = device 29 | 30 | # self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 31 | # 32 | # self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym') 33 | # self.edge_index_lap = self.edge_index_lap.to(device) 34 | # self.edge_weight_lap = self.edge_weight_lap.to(device) 35 | 36 | self.gate = nn.Linear(2 * in_features, 1) 37 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 38 | 39 | self.lin1 = nn.Linear(in_features, out_features) 40 | nn.init.xavier_normal_(self.lin1.weight, gain=1.414) 41 | 42 | self.lin2 = nn.Linear(in_features * 2, out_features) 43 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 44 | 45 | self.weight_low, self.weight_high, self.weight_mlp = ( 46 | nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)), 47 | nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)), 48 | nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)), 49 | ) 50 | 51 | self.output_low, self.output_high, self.output_mlp = ( 52 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 53 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 54 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 55 | ) 56 | 57 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 58 | 59 | self.weight_low.data.uniform_(-stdv, stdv) 60 | self.weight_high.data.uniform_(-stdv, stdv) 61 | self.weight_mlp.data.uniform_(-stdv, stdv) 62 | 63 | self.output_low.data.uniform_(-stdv, stdv) 64 | self.output_high.data.uniform_(-stdv, stdv) 65 | self.output_mlp.data.uniform_(-stdv, stdv) 66 | 67 | def multiply_attention(self, x, attention=None, v=None): 68 | num_heads = 4 69 | mix_features = 0 70 | if mix_features: 71 | vx = torch.mean(torch.stack( 72 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 73 | range(num_heads)], dim=0), 74 | dim=0) 75 | ax = self.multihead_att_layer.Wout(vx) 76 | else: 77 | mean_attention = attention.mean(dim=1) 78 | # mean_attention = self.edge_weight 79 | grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x 80 | grad_x_abs = torch.abs(grad_x) 81 | grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1)) 82 | grad_x_norm_inv = 1 / grad_x_norm 83 | gu = grad_x_norm_inv[self.edge_index[0, :]] 84 | gv = grad_x_norm_inv[self.edge_index[1, :]] 85 | attention2 = gu * gu + gu * gv 86 | new_attn = mean_attention * softmax(attention2, self.edge_index[0]) 87 | # Da = torch.diag(grad_x_norm_inv) 88 | W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce() 89 | rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten() 90 | diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device) 91 | dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x) 92 | ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x) 93 | return ax - dx 94 | 95 | def forward(self, t, x): # t is needed when called by the integrator 96 | 97 | attention, values = self.multihead_att_layer(x, self.edge_index) 98 | ax = self.multiply_attention(x, attention, values) 99 | # ax = self.multiply_attention(x,) 100 | # src = x[self.edge_index[0, :], :] 101 | # dst_k = x[self.edge_index[1, :], :] 102 | # h2 = torch.cat([src, dst_k], dim=1) 103 | # attention1 = torch.tanh(self.gate(h2)).squeeze() 104 | # 105 | # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 106 | # ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 107 | # 108 | # ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low) 109 | 110 | # ax = torch.cat([x, ax], axis=1) 111 | # ax = self.lin2(ax) 112 | 113 | if not self.opt['no_alpha_sigmoid']: 114 | alpha = torch.sigmoid(self.alpha_train) 115 | else: 116 | alpha = self.alpha_train 117 | f = alpha * (ax - x) 118 | if self.opt['add_source']: 119 | f = f + self.beta_train * self.x0 120 | 121 | # f = ax - x 122 | return f 123 | 124 | def __repr__(self): 125 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 126 | 127 | 128 | class SpGraphAttentionLayer(nn.Module): 129 | """ 130 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 131 | """ 132 | 133 | def __init__(self, in_features, out_features, opt, device, concat=True): 134 | super(SpGraphAttentionLayer, self).__init__() 135 | self.in_features = in_features 136 | self.out_features = out_features 137 | self.alpha = opt['leaky_relu_slope'] 138 | self.concat = concat 139 | self.device = device 140 | self.opt = opt 141 | self.h = opt['heads'] 142 | 143 | try: 144 | self.attention_dim = opt['attention_dim'] 145 | except KeyError: 146 | self.attention_dim = out_features 147 | 148 | assert self.attention_dim % opt['heads'] == 0, "Number of heads must be a factor of the dimension size" 149 | self.d_k = self.attention_dim // opt['heads'] 150 | 151 | self.W = nn.Parameter(torch.zeros(size=(in_features, self.attention_dim))).to(device) 152 | nn.init.xavier_normal_(self.W.data, gain=1.414) 153 | 154 | self.Wout = nn.Parameter(torch.zeros(size=(self.attention_dim, self.in_features))).to(device) 155 | nn.init.xavier_normal_(self.Wout.data, gain=1.414) 156 | 157 | self.a = nn.Parameter(torch.zeros(size=(2 * self.d_k, 1, 1))).to(device) 158 | nn.init.xavier_normal_(self.a.data, gain=1.414) 159 | 160 | self.leakyrelu = nn.LeakyReLU(self.alpha) 161 | 162 | def forward(self, x, edge): 163 | wx = torch.mm(x, self.W) # h: N x out 164 | h = wx.view(-1, self.h, self.d_k) 165 | h = h.transpose(1, 2) 166 | 167 | # Self-attention on the nodes - Shared attention mechanism 168 | edge_h = torch.cat((h[edge[0, :], :, :], h[edge[1, :], :, :]), dim=1).transpose(0, 1).to( 169 | self.device) # edge: 2*D x E 170 | edge_e = self.leakyrelu(torch.sum(self.a * edge_h, dim=0)).to(self.device) 171 | attention = softmax(edge_e, edge[self.opt['attention_norm_idx']]) 172 | return attention, wx 173 | 174 | def __repr__(self): 175 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 176 | 177 | 178 | if __name__ == '__main__': 179 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 180 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 181 | 'attention_norm_idx': 0, 'add_source': False, 182 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 183 | } 184 | dataset = get_dataset(opt, '../data', False) 185 | t = 1 186 | func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device) 187 | out = func(t, dataset.data.x) 188 | -------------------------------------------------------------------------------- /src/function_beltrami_van.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | 11 | 12 | class ODEFuncBektramiAtt(ODEFunc): 13 | 14 | def __init__(self, in_features, out_features, opt, data, device): 15 | super(ODEFuncBektramiAtt, self).__init__(opt, data, device) 16 | 17 | if opt['self_loop_weight'] > 0: 18 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 19 | fill_value=opt['self_loop_weight']) 20 | else: 21 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 22 | # print("self.edge_index: ", self.edge_index.shape) 23 | self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt,device,edge_weights=self.edge_weight).to( 24 | device) 25 | self.device = device 26 | 27 | def multiply_attention(self, x, attention, v=None): 28 | num_heads = 4 29 | mix_features = 0 30 | if mix_features: 31 | vx = torch.mean(torch.stack( 32 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 33 | range(num_heads)], dim=0), 34 | dim=0) 35 | ax = self.multihead_att_layer.Wout(vx) 36 | else: 37 | mean_attention = attention.mean(dim=1) 38 | # mean_attention = self.edge_weight 39 | grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x 40 | grad_x_abs = torch.abs(grad_x) 41 | grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1)) 42 | grad_x_norm_inv = 1 / grad_x_norm 43 | gu = grad_x_norm_inv[self.edge_index[0, :]] 44 | gv = grad_x_norm_inv[self.edge_index[1, :]] 45 | attention2 = gu * gu + gu * gv 46 | new_attn = mean_attention * softmax(attention2, self.edge_index[0]) 47 | # Da = torch.diag(grad_x_norm_inv) 48 | W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce() 49 | rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten() 50 | diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device) 51 | dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x) 52 | ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x) 53 | return ax - dx 54 | 55 | def forward(self, t, x): # t is needed when called by the integrator 56 | 57 | attention, values = self.multihead_att_layer(x, self.edge_index) 58 | ax = self.multiply_attention(x, attention, values) 59 | 60 | if not self.opt['no_alpha_sigmoid']: 61 | alpha = torch.sigmoid(self.alpha_train) 62 | else: 63 | alpha = self.alpha_train 64 | f = alpha * (ax - x) 65 | if self.opt['add_source']: 66 | f = f + self.beta_train * self.x0 67 | 68 | # f = ax - x 69 | return f 70 | 71 | def __repr__(self): 72 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 73 | 74 | 75 | class SpGraphTransAttentionLayer(nn.Module): 76 | """ 77 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 78 | """ 79 | 80 | def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None): 81 | super(SpGraphTransAttentionLayer, self).__init__() 82 | self.in_features = in_features 83 | self.out_features = out_features 84 | self.alpha = opt['leaky_relu_slope'] 85 | self.concat = concat 86 | self.device = device 87 | self.opt = opt 88 | self.h = int(opt['heads']) 89 | self.edge_weights = edge_weights 90 | 91 | try: 92 | self.attention_dim = opt['attention_dim'] 93 | except KeyError: 94 | self.attention_dim = out_features 95 | 96 | assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format( 97 | self.h, self.attention_dim) 98 | self.d_k = self.attention_dim // self.h 99 | 100 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 101 | self.output_var_x = nn.Parameter(torch.ones(1)) 102 | self.lengthscale_x = nn.Parameter(torch.ones(1)) 103 | self.output_var_p = nn.Parameter(torch.ones(1)) 104 | self.lengthscale_p = nn.Parameter(torch.ones(1)) 105 | self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 106 | self.init_weights(self.Qx) 107 | self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 108 | self.init_weights(self.Vx) 109 | self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 110 | self.init_weights(self.Kx) 111 | 112 | self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 113 | self.init_weights(self.Qp) 114 | self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 115 | self.init_weights(self.Vp) 116 | self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 117 | self.init_weights(self.Kp) 118 | 119 | else: 120 | if self.opt['attention_type'] == "exp_kernel": 121 | self.output_var = nn.Parameter(torch.ones(1)) 122 | self.lengthscale = nn.Parameter(torch.ones(1)) 123 | 124 | self.Q = nn.Linear(in_features, self.attention_dim) 125 | self.init_weights(self.Q) 126 | 127 | self.V = nn.Linear(in_features, self.attention_dim) 128 | self.init_weights(self.V) 129 | 130 | self.K = nn.Linear(in_features, self.attention_dim) 131 | self.init_weights(self.K) 132 | 133 | self.activation = nn.Sigmoid() # nn.LeakyReLU(self.alpha) 134 | 135 | self.Wout = nn.Linear(self.d_k, in_features) 136 | self.init_weights(self.Wout) 137 | 138 | def init_weights(self, m): 139 | if type(m) == nn.Linear: 140 | # nn.init.xavier_uniform_(m.weight, gain=1.414) 141 | # m.bias.data.fill_(0.01) 142 | nn.init.constant_(m.weight, 1e-5) 143 | 144 | def forward(self, x, edge): 145 | """ 146 | x might be [features, augmentation, positional encoding, labels] 147 | """ 148 | # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 149 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 150 | label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim'] 151 | p = x[:, self.opt['feat_hidden_dim']: label_index] 152 | x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1) 153 | 154 | qx = self.Qx(x) 155 | kx = self.Kx(x) 156 | vx = self.Vx(x) 157 | # perform linear operation and split into h heads 158 | kx = kx.view(-1, self.h, self.d_k) 159 | qx = qx.view(-1, self.h, self.d_k) 160 | vx = vx.view(-1, self.h, self.d_k) 161 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 162 | kx = kx.transpose(1, 2) 163 | qx = qx.transpose(1, 2) 164 | vx = vx.transpose(1, 2) 165 | src_x = qx[edge[0, :], :, :] 166 | dst_x = kx[edge[1, :], :, :] 167 | 168 | qp = self.Qp(p) 169 | kp = self.Kp(p) 170 | vp = self.Vp(p) 171 | # perform linear operation and split into h heads 172 | kp = kp.view(-1, self.h, self.d_k) 173 | qp = qp.view(-1, self.h, self.d_k) 174 | vp = vp.view(-1, self.h, self.d_k) 175 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 176 | kp = kp.transpose(1, 2) 177 | qp = qp.transpose(1, 2) 178 | vp = vp.transpose(1, 2) 179 | src_p = qp[edge[0, :], :, :] 180 | dst_p = kp[edge[1, :], :, :] 181 | 182 | prods = self.output_var_x ** 2 * torch.exp( 183 | -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \ 184 | * self.output_var_p ** 2 * torch.exp( 185 | -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2)) 186 | 187 | v = None 188 | 189 | else: 190 | q = self.Q(x) 191 | k = self.K(x) 192 | v = self.V(x) 193 | 194 | # perform linear operation and split into h heads 195 | 196 | k = k.view(-1, self.h, self.d_k) 197 | q = q.view(-1, self.h, self.d_k) 198 | v = v.view(-1, self.h, self.d_k) 199 | 200 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 201 | 202 | k = k.transpose(1, 2) 203 | q = q.transpose(1, 2) 204 | v = v.transpose(1, 2) 205 | 206 | src = q[edge[0, :], :, :] 207 | dst_k = k[edge[1, :], :, :] 208 | 209 | if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 210 | prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2))) 211 | elif self.opt['attention_type'] == "scaled_dot": 212 | prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k) 213 | elif self.opt['attention_type'] == "cosine_sim": 214 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 215 | prods = cos(src, dst_k) 216 | elif self.opt['attention_type'] == "pearson": 217 | src_mu = torch.mean(src, dim=1, keepdim=True) 218 | dst_mu = torch.mean(dst_k, dim=1, keepdim=True) 219 | src = src - src_mu 220 | dst_k = dst_k - dst_mu 221 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 222 | prods = cos(src, dst_k) 223 | 224 | if self.opt['reweight_attention'] and self.edge_weights is not None: 225 | prods = prods * self.edge_weights.unsqueeze(dim=1) 226 | if self.opt['square_plus']: 227 | attention = squareplus(prods, edge[self.opt['attention_norm_idx']]) 228 | else: 229 | attention = softmax(prods, edge[self.opt['attention_norm_idx']]) 230 | return attention, (v, prods) 231 | 232 | def __repr__(self): 233 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 234 | 235 | 236 | if __name__ == '__main__': 237 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 238 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 239 | 'attention_norm_idx': 0, 'add_source': False, 240 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 241 | } 242 | dataset = get_dataset(opt, '../data', False) 243 | t = 1 244 | func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device) 245 | out = func(t, dataset.data.x) 246 | -------------------------------------------------------------------------------- /src/function_beltramitrans_convection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | import torch.nn.functional as F 11 | from torch_scatter import scatter 12 | import math 13 | from torch_geometric.utils import get_laplacian 14 | 15 | class ODEFuncBeltramiTRANSCONV(ODEFunc): 16 | 17 | def __init__(self, in_features, out_features, opt, data, device): 18 | super(ODEFuncBeltramiTRANSCONV, self).__init__(opt, data, device) 19 | 20 | if opt['self_loop_weight'] > 0: 21 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 22 | fill_value=opt['self_loop_weight']) 23 | else: 24 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 25 | # print("self.edge_index: ", self.edge_index.shape) 26 | self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt,device).to( 27 | device) 28 | self.device = device 29 | 30 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 31 | 32 | self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym') 33 | self.edge_index_lap = self.edge_index_lap.to(device) 34 | self.edge_weight_lap = self.edge_weight_lap.to(device) 35 | 36 | self.gate = nn.Linear(2 * in_features, 1) 37 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 38 | 39 | self.lin1 = nn.Linear(in_features, out_features) 40 | nn.init.xavier_normal_(self.lin1.weight, gain=1.414) 41 | 42 | self.lin2 = nn.Linear(in_features * 2, out_features) 43 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 44 | 45 | self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)) 46 | 47 | 48 | self.output_low, self.output_high = ( 49 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 50 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 51 | ) 52 | 53 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 54 | 55 | 56 | self.weight_mlp.data.uniform_(-stdv, stdv) 57 | 58 | self.output_low.data.uniform_(-stdv, stdv) 59 | self.output_high.data.uniform_(-stdv, stdv) 60 | 61 | 62 | self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim']) 63 | self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim']) 64 | 65 | def multiply_attention(self, x, attention=None, v=None): 66 | num_heads = 4 67 | mix_features = 0 68 | if mix_features: 69 | vx = torch.mean(torch.stack( 70 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 71 | range(num_heads)], dim=0), 72 | dim=0) 73 | ax = self.multihead_att_layer.Wout(vx) 74 | else: 75 | mean_attention = attention.mean(dim=1) 76 | # mean_attention = self.edge_weight 77 | grad_x = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) - x 78 | grad_x_abs = torch.abs(grad_x) 79 | grad_x_norm = torch.sqrt(torch.sum(torch.clamp(grad_x_abs * grad_x_abs, min=1e-1), 1)) 80 | grad_x_norm_inv = 1 / grad_x_norm 81 | gu = grad_x_norm_inv[self.edge_index[0, :]] 82 | gv = grad_x_norm_inv[self.edge_index[1, :]] 83 | attention2 = gu * gu + gu * gv 84 | new_attn = mean_attention * softmax(attention2, self.edge_index[0]) 85 | # Da = torch.diag(grad_x_norm_inv) 86 | W = torch.sparse.FloatTensor(self.edge_index, new_attn, (x.shape[0], x.shape[0])).coalesce() 87 | rowsum = torch.sparse.mm(W, torch.ones((W.shape[0], 1), device=self.device)).flatten() 88 | diag_index = torch.stack((torch.arange(x.shape[0]), torch.arange(x.shape[0]))).to(self.device) 89 | dx = torch_sparse.spmm(diag_index, rowsum, x.shape[0], x.shape[0], x) 90 | ax = torch_sparse.spmm(self.edge_index, new_attn, x.shape[0], x.shape[0], x) 91 | return ax - dx 92 | 93 | def forward(self, t, x): # t is needed when called by the integrator 94 | 95 | attention, values = self.multihead_att_layer(x, self.edge_index) 96 | ax = self.multiply_attention(x, attention, values) 97 | 98 | 99 | src = x[self.edge_index[0, :], :] 100 | dst_k = x[self.edge_index[1, :], :] 101 | 102 | 103 | x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 104 | 105 | 106 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 107 | 108 | ax3 = self.bn_in_1(ax3) 109 | ax = self.bn_in_2(ax) 110 | 111 | ax = torch.mm(ax3, self.output_high) + torch.mm(ax, self.output_low) 112 | 113 | 114 | 115 | ax = torch.cat([x, ax], axis=1) 116 | ax = self.lin2(ax) 117 | 118 | if not self.opt['no_alpha_sigmoid']: 119 | alpha = torch.sigmoid(self.alpha_train) 120 | else: 121 | alpha = self.alpha_train 122 | f = alpha * (ax - x) 123 | if self.opt['add_source']: 124 | f = f + self.beta_train * self.x0 125 | 126 | # f = ax - x 127 | return f 128 | 129 | def __repr__(self): 130 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 131 | 132 | 133 | 134 | 135 | class SpGraphTransAttentionLayer(nn.Module): 136 | """ 137 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 138 | """ 139 | 140 | def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None): 141 | super(SpGraphTransAttentionLayer, self).__init__() 142 | self.in_features = in_features 143 | self.out_features = out_features 144 | self.alpha = opt['leaky_relu_slope'] 145 | self.concat = concat 146 | self.device = device 147 | self.opt = opt 148 | self.h = int(opt['heads']) 149 | self.edge_weights = edge_weights 150 | 151 | try: 152 | self.attention_dim = opt['attention_dim'] 153 | except KeyError: 154 | self.attention_dim = out_features 155 | 156 | assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format( 157 | self.h, self.attention_dim) 158 | self.d_k = self.attention_dim // self.h 159 | 160 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 161 | self.output_var_x = nn.Parameter(torch.ones(1)) 162 | self.lengthscale_x = nn.Parameter(torch.ones(1)) 163 | self.output_var_p = nn.Parameter(torch.ones(1)) 164 | self.lengthscale_p = nn.Parameter(torch.ones(1)) 165 | self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 166 | self.init_weights(self.Qx) 167 | self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 168 | self.init_weights(self.Vx) 169 | self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 170 | self.init_weights(self.Kx) 171 | 172 | self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 173 | self.init_weights(self.Qp) 174 | self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 175 | self.init_weights(self.Vp) 176 | self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 177 | self.init_weights(self.Kp) 178 | 179 | else: 180 | if self.opt['attention_type'] == "exp_kernel": 181 | self.output_var = nn.Parameter(torch.ones(1)) 182 | self.lengthscale = nn.Parameter(torch.ones(1)) 183 | 184 | self.Q = nn.Linear(in_features, self.attention_dim) 185 | self.init_weights(self.Q) 186 | 187 | self.V = nn.Linear(in_features, self.attention_dim) 188 | self.init_weights(self.V) 189 | 190 | self.K = nn.Linear(in_features, self.attention_dim) 191 | self.init_weights(self.K) 192 | 193 | self.activation = nn.Sigmoid() # nn.LeakyReLU(self.alpha) 194 | 195 | self.Wout = nn.Linear(self.d_k, in_features) 196 | self.init_weights(self.Wout) 197 | 198 | def init_weights(self, m): 199 | if type(m) == nn.Linear: 200 | # nn.init.xavier_uniform_(m.weight, gain=1.414) 201 | # m.bias.data.fill_(0.01) 202 | nn.init.constant_(m.weight, 1e-5) 203 | 204 | def forward(self, x, edge): 205 | """ 206 | x might be [features, augmentation, positional encoding, labels] 207 | """ 208 | # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 209 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 210 | label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim'] 211 | p = x[:, self.opt['feat_hidden_dim']: label_index] 212 | x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1) 213 | 214 | qx = self.Qx(x) 215 | kx = self.Kx(x) 216 | vx = self.Vx(x) 217 | # perform linear operation and split into h heads 218 | kx = kx.view(-1, self.h, self.d_k) 219 | qx = qx.view(-1, self.h, self.d_k) 220 | vx = vx.view(-1, self.h, self.d_k) 221 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 222 | kx = kx.transpose(1, 2) 223 | qx = qx.transpose(1, 2) 224 | vx = vx.transpose(1, 2) 225 | src_x = qx[edge[0, :], :, :] 226 | dst_x = kx[edge[1, :], :, :] 227 | 228 | qp = self.Qp(p) 229 | kp = self.Kp(p) 230 | vp = self.Vp(p) 231 | # perform linear operation and split into h heads 232 | kp = kp.view(-1, self.h, self.d_k) 233 | qp = qp.view(-1, self.h, self.d_k) 234 | vp = vp.view(-1, self.h, self.d_k) 235 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 236 | kp = kp.transpose(1, 2) 237 | qp = qp.transpose(1, 2) 238 | vp = vp.transpose(1, 2) 239 | src_p = qp[edge[0, :], :, :] 240 | dst_p = kp[edge[1, :], :, :] 241 | 242 | prods = self.output_var_x ** 2 * torch.exp( 243 | -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \ 244 | * self.output_var_p ** 2 * torch.exp( 245 | -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2)) 246 | 247 | v = None 248 | 249 | else: 250 | q = self.Q(x) 251 | k = self.K(x) 252 | v = self.V(x) 253 | 254 | # perform linear operation and split into h heads 255 | 256 | k = k.view(-1, self.h, self.d_k) 257 | q = q.view(-1, self.h, self.d_k) 258 | v = v.view(-1, self.h, self.d_k) 259 | 260 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 261 | 262 | k = k.transpose(1, 2) 263 | q = q.transpose(1, 2) 264 | v = v.transpose(1, 2) 265 | 266 | src = q[edge[0, :], :, :] 267 | dst_k = k[edge[1, :], :, :] 268 | 269 | if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 270 | prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2))) 271 | elif self.opt['attention_type'] == "scaled_dot": 272 | prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k) 273 | elif self.opt['attention_type'] == "cosine_sim": 274 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 275 | prods = cos(src, dst_k) 276 | elif self.opt['attention_type'] == "pearson": 277 | src_mu = torch.mean(src, dim=1, keepdim=True) 278 | dst_mu = torch.mean(dst_k, dim=1, keepdim=True) 279 | src = src - src_mu 280 | dst_k = dst_k - dst_mu 281 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 282 | prods = cos(src, dst_k) 283 | 284 | if self.opt['reweight_attention'] and self.edge_weights is not None: 285 | prods = prods * self.edge_weights.unsqueeze(dim=1) 286 | if self.opt['square_plus']: 287 | attention = squareplus(prods, edge[self.opt['attention_norm_idx']]) 288 | else: 289 | attention = softmax(prods, edge[self.opt['attention_norm_idx']]) 290 | return attention, (v, prods) 291 | 292 | def __repr__(self): 293 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 294 | 295 | 296 | 297 | if __name__ == '__main__': 298 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 299 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 300 | 'attention_norm_idx': 0, 'add_source': False, 301 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 302 | } 303 | dataset = get_dataset(opt, '../data', False) 304 | t = 1 305 | func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device) 306 | out = func(t, dataset.data.x) 307 | -------------------------------------------------------------------------------- /src/function_laplacian_convection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | from torch_scatter import scatter 11 | import math 12 | from torch_geometric.utils import get_laplacian 13 | import torch.nn.functional as F 14 | 15 | 16 | class ODEFuncLapCONV(ODEFunc): 17 | 18 | def __init__(self, in_features, out_features, opt, data, device): 19 | super(ODEFuncLapCONV, self).__init__(opt, data, device) 20 | 21 | if opt['self_loop_weight'] > 0: 22 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 23 | fill_value=opt['self_loop_weight']) 24 | else: 25 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 26 | # print("self.edge_index: ", self.edge_index.shape) 27 | self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt,device,edge_weights=self.edge_weight).to( 28 | device) 29 | self.device = device 30 | 31 | self.edge_index,self.edge_weight = remove_self_loops (self.edge_index, self.edge_weight) 32 | 33 | self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight,normalization='sym') 34 | self.edge_index_lap = self.edge_index_lap.to(device) 35 | self.edge_weight_lap = self.edge_weight_lap.to(device) 36 | 37 | 38 | 39 | self.gate = nn.Linear(2 * in_features, 1) 40 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 41 | 42 | 43 | self.lin2 = nn.Linear(in_features * 2, out_features) 44 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 45 | self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)) 46 | self.output_low, self.output_high ( 47 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 48 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)) 49 | ) 50 | 51 | 52 | 53 | 54 | 55 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 56 | 57 | 58 | 59 | self.weight_mlp.data.uniform_(-stdv, stdv) 60 | 61 | self.output_low.data.uniform_(-stdv, stdv) 62 | self.output_high.data.uniform_(-stdv, stdv) 63 | 64 | 65 | self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim']) 66 | self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim']) 67 | 68 | self.lamda = nn.Parameter(torch.tensor(0.0)) 69 | 70 | 71 | 72 | def forward(self, t, x): # t is needed when called by the integrator 73 | 74 | 75 | x2 =x 76 | 77 | src = x[self.edge_index[0, :], :] 78 | dst_k = x[self.edge_index[1, :], :] 79 | h2 = torch.cat([src, dst_k], dim=1) 80 | attention1 = torch.tanh(self.gate(h2)).squeeze() 81 | x_new = F.relu(torch.mm(src-dst_k, self.weight_mlp)) * dst_k 82 | #x_new is v_ij elementwise product with x_j in the paper 83 | 84 | # print("x_new: ", x_new.shape) 85 | ax3 = torch_sparse.spmm(self.edge_index, attention1, x_new.shape[0], x_new.shape[0], x_new) 86 | ax3 = scatter(ax3, self.edge_index[1, :].T, dim=0, reduce="sum") 87 | # ax3 is the divergence of the V elementwise product with X in the paper 88 | 89 | 90 | 91 | 92 | ax2 = torch_sparse.spmm(self.edge_index, self.edge_weight, x.shape[0], x.shape[0], x2) 93 | 94 | # ax2 is the diffusion item in the paper 95 | 96 | ax3 = self.bn_in_1(ax3) 97 | ax2 = self.bn_in_2(ax2) 98 | 99 | ax = torch.mm(ax3, self.output_high) + torch.mm(ax2, self.output_low) 100 | 101 | 102 | 103 | ax = torch.cat([x, ax], axis=1) 104 | ax = self.lin2(ax) 105 | 106 | if not self.opt['no_alpha_sigmoid']: 107 | alpha = torch.sigmoid(self.alpha_train) 108 | else: 109 | alpha = self.alpha_train 110 | f = alpha * (ax - x) 111 | if self.opt['add_source']: 112 | f = f + self.beta_train * self.x0 113 | 114 | # f = ax - x 115 | return f 116 | 117 | def __repr__(self): 118 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 119 | 120 | 121 | class SpGraphTransAttentionLayer(nn.Module): 122 | """ 123 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 124 | """ 125 | 126 | def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None): 127 | super(SpGraphTransAttentionLayer, self).__init__() 128 | self.in_features = in_features 129 | self.out_features = out_features 130 | self.alpha = opt['leaky_relu_slope'] 131 | self.concat = concat 132 | self.device = device 133 | self.opt = opt 134 | self.h = int(opt['heads']) 135 | self.edge_weights = edge_weights 136 | 137 | try: 138 | self.attention_dim = opt['attention_dim'] 139 | except KeyError: 140 | self.attention_dim = out_features 141 | 142 | assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format( 143 | self.h, self.attention_dim) 144 | self.d_k = self.attention_dim // self.h 145 | 146 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 147 | self.output_var_x = nn.Parameter(torch.ones(1)) 148 | self.lengthscale_x = nn.Parameter(torch.ones(1)) 149 | self.output_var_p = nn.Parameter(torch.ones(1)) 150 | self.lengthscale_p = nn.Parameter(torch.ones(1)) 151 | self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 152 | self.init_weights(self.Qx) 153 | self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 154 | self.init_weights(self.Vx) 155 | self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 156 | self.init_weights(self.Kx) 157 | 158 | self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 159 | self.init_weights(self.Qp) 160 | self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 161 | self.init_weights(self.Vp) 162 | self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 163 | self.init_weights(self.Kp) 164 | 165 | else: 166 | if self.opt['attention_type'] == "exp_kernel": 167 | self.output_var = nn.Parameter(torch.ones(1)) 168 | self.lengthscale = nn.Parameter(torch.ones(1)) 169 | 170 | self.Q = nn.Linear(in_features, self.attention_dim) 171 | self.init_weights(self.Q) 172 | 173 | self.V = nn.Linear(in_features, self.attention_dim) 174 | self.init_weights(self.V) 175 | 176 | self.K = nn.Linear(in_features, self.attention_dim) 177 | self.init_weights(self.K) 178 | 179 | self.activation = nn.Sigmoid() # nn.LeakyReLU(self.alpha) 180 | 181 | self.Wout = nn.Linear(self.d_k, in_features) 182 | self.init_weights(self.Wout) 183 | 184 | def init_weights(self, m): 185 | if type(m) == nn.Linear: 186 | # nn.init.xavier_uniform_(m.weight, gain=1.414) 187 | # m.bias.data.fill_(0.01) 188 | nn.init.constant_(m.weight, 1e-5) 189 | 190 | def forward(self, x, edge): 191 | """ 192 | x might be [features, augmentation, positional encoding, labels] 193 | """ 194 | # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 195 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 196 | label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim'] 197 | p = x[:, self.opt['feat_hidden_dim']: label_index] 198 | x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1) 199 | 200 | qx = self.Qx(x) 201 | kx = self.Kx(x) 202 | vx = self.Vx(x) 203 | # perform linear operation and split into h heads 204 | kx = kx.view(-1, self.h, self.d_k) 205 | qx = qx.view(-1, self.h, self.d_k) 206 | vx = vx.view(-1, self.h, self.d_k) 207 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 208 | kx = kx.transpose(1, 2) 209 | qx = qx.transpose(1, 2) 210 | vx = vx.transpose(1, 2) 211 | src_x = qx[edge[0, :], :, :] 212 | dst_x = kx[edge[1, :], :, :] 213 | 214 | qp = self.Qp(p) 215 | kp = self.Kp(p) 216 | vp = self.Vp(p) 217 | # perform linear operation and split into h heads 218 | kp = kp.view(-1, self.h, self.d_k) 219 | qp = qp.view(-1, self.h, self.d_k) 220 | vp = vp.view(-1, self.h, self.d_k) 221 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 222 | kp = kp.transpose(1, 2) 223 | qp = qp.transpose(1, 2) 224 | vp = vp.transpose(1, 2) 225 | src_p = qp[edge[0, :], :, :] 226 | dst_p = kp[edge[1, :], :, :] 227 | 228 | prods = self.output_var_x ** 2 * torch.exp( 229 | -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \ 230 | * self.output_var_p ** 2 * torch.exp( 231 | -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2)) 232 | 233 | v = None 234 | 235 | else: 236 | q = self.Q(x) 237 | k = self.K(x) 238 | v = self.V(x) 239 | 240 | # perform linear operation and split into h heads 241 | 242 | k = k.view(-1, self.h, self.d_k) 243 | q = q.view(-1, self.h, self.d_k) 244 | v = v.view(-1, self.h, self.d_k) 245 | 246 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 247 | 248 | k = k.transpose(1, 2) 249 | q = q.transpose(1, 2) 250 | v = v.transpose(1, 2) 251 | 252 | src = q[edge[0, :], :, :] 253 | dst_k = k[edge[1, :], :, :] 254 | 255 | if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 256 | prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2))) 257 | elif self.opt['attention_type'] == "scaled_dot": 258 | prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k) 259 | elif self.opt['attention_type'] == "cosine_sim": 260 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 261 | prods = cos(src, dst_k) 262 | elif self.opt['attention_type'] == "pearson": 263 | src_mu = torch.mean(src, dim=1, keepdim=True) 264 | dst_mu = torch.mean(dst_k, dim=1, keepdim=True) 265 | src = src - src_mu 266 | dst_k = dst_k - dst_mu 267 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 268 | prods = cos(src, dst_k) 269 | 270 | if self.opt['reweight_attention'] and self.edge_weights is not None: 271 | prods = prods * self.edge_weights.unsqueeze(dim=1) 272 | if self.opt['square_plus']: 273 | attention = squareplus(prods, edge[self.opt['attention_norm_idx']]) 274 | else: 275 | attention = softmax(prods, edge[self.opt['attention_norm_idx']]) 276 | return attention, (v, prods) 277 | 278 | def __repr__(self): 279 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 280 | 281 | 282 | if __name__ == '__main__': 283 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 284 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 285 | 'attention_norm_idx': 0, 'add_source': False, 286 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 287 | } 288 | dataset = get_dataset(opt, '../data', False) 289 | t = 1 290 | func = ODEFuncBelFA(dataset.data.num_features, 6, opt, dataset.data, device) 291 | out = func(t, dataset.data.x) 292 | -------------------------------------------------------------------------------- /src/function_laplacian_diffusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch_sparse 4 | 5 | from base_classes import ODEFunc 6 | from utils import MaxNFEException 7 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 8 | 9 | # Define the ODE function. 10 | # Input: 11 | # --- t: A tensor with shape [], meaning the current time. 12 | # --- x: A tensor with shape [#batches, dims], meaning the value of x at t. 13 | # Output: 14 | # --- dx/dt: A tensor with shape [#batches, dims], meaning the derivative of x at t. 15 | class LaplacianODEFunc(ODEFunc): 16 | 17 | # currently requires in_features = out_features 18 | def __init__(self, in_features, out_features, opt, data, device): 19 | super(LaplacianODEFunc, self).__init__(opt, data, device) 20 | 21 | self.in_features = in_features 22 | self.out_features = out_features 23 | self.w = nn.Parameter(torch.eye(opt['hidden_dim'])) 24 | self.d = nn.Parameter(torch.zeros(opt['hidden_dim']) + 1) 25 | self.alpha_sc = nn.Parameter(torch.ones(1)) 26 | self.beta_sc = nn.Parameter(torch.ones(1)) 27 | 28 | # if opt['self_loop_weight'] > 0: 29 | # self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 30 | # fill_value=opt['self_loop_weight']) 31 | # else: 32 | # self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 33 | # 34 | # self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 35 | # self.lin2 = nn.Linear(in_features * 2, out_features) 36 | # nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 37 | 38 | def sparse_multiply(self, x): 39 | if self.opt['block'] in ['attention']: # adj is a multihead attention 40 | mean_attention = self.attention_weights.mean(dim=1) 41 | ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) 42 | elif self.opt['block'] in ['mixed', 'hard_attention']: # adj is a torch sparse matrix 43 | ax = torch_sparse.spmm(self.edge_index, self.attention_weights, x.shape[0], x.shape[0], x) 44 | else: # adj is a torch sparse matrix 45 | ax = torch_sparse.spmm(self.edge_index, self.edge_weight, x.shape[0], x.shape[0], x) 46 | return ax 47 | 48 | def forward(self, t, x): # the t param is needed by the ODE solver. 49 | if self.nfe > self.opt["max_nfe"]: 50 | raise MaxNFEException 51 | self.nfe += 1 52 | ax = self.sparse_multiply(x) 53 | 54 | # ax = torch.cat([x, ax], axis=1) 55 | # ax = self.lin2(ax) 56 | 57 | if not self.opt['no_alpha_sigmoid']: 58 | alpha = torch.sigmoid(self.alpha_train) 59 | else: 60 | alpha = self.alpha_train 61 | 62 | f = alpha * (ax - x) 63 | if self.opt['add_source']: 64 | f = f + self.beta_train * self.x0 65 | return f 66 | -------------------------------------------------------------------------------- /src/function_transformer_attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops 6 | import numpy as np 7 | from data import get_dataset 8 | from utils import MaxNFEException, squareplus 9 | from base_classes import ODEFunc 10 | 11 | 12 | class ODEFuncTransformerAtt(ODEFunc): 13 | 14 | def __init__(self, in_features, out_features, opt, data, device): 15 | super(ODEFuncTransformerAtt, self).__init__(opt, data, device) 16 | 17 | if opt['self_loop_weight'] > 0: 18 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 19 | fill_value=opt['self_loop_weight']) 20 | else: 21 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 22 | self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt, 23 | device, edge_weights=self.edge_weight).to(device) 24 | 25 | def multiply_attention(self, x, attention, v=None): 26 | # todo would be nice if this was more efficient 27 | if self.opt['mix_features']: 28 | vx = torch.mean(torch.stack( 29 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 30 | range(self.opt['heads'])], dim=0), 31 | dim=0) 32 | ax = self.multihead_att_layer.Wout(vx) 33 | else: 34 | mean_attention = attention.mean(dim=1) 35 | ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) 36 | return ax 37 | 38 | def forward(self, t, x): # t is needed when called by the integrator 39 | if self.nfe > self.opt["max_nfe"]: 40 | raise MaxNFEException 41 | 42 | self.nfe += 1 43 | attention, values = self.multihead_att_layer(x, self.edge_index) 44 | ax = self.multiply_attention(x, attention, values) 45 | 46 | if not self.opt['no_alpha_sigmoid']: 47 | alpha = torch.sigmoid(self.alpha_train) 48 | else: 49 | alpha = self.alpha_train 50 | f = alpha * (ax - x) 51 | if self.opt['add_source']: 52 | f = f + self.beta_train * self.x0 53 | return f 54 | 55 | def __repr__(self): 56 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 57 | 58 | 59 | class SpGraphTransAttentionLayer(nn.Module): 60 | """ 61 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 62 | """ 63 | 64 | def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None): 65 | super(SpGraphTransAttentionLayer, self).__init__() 66 | self.in_features = in_features 67 | self.out_features = out_features 68 | self.alpha = opt['leaky_relu_slope'] 69 | self.concat = concat 70 | self.device = device 71 | self.opt = opt 72 | self.h = int(opt['heads']) 73 | self.edge_weights = edge_weights 74 | 75 | try: 76 | self.attention_dim = opt['attention_dim'] 77 | except KeyError: 78 | self.attention_dim = out_features 79 | 80 | assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format( 81 | self.h, self.attention_dim) 82 | self.d_k = self.attention_dim // self.h 83 | 84 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 85 | self.output_var_x = nn.Parameter(torch.ones(1)) 86 | self.lengthscale_x = nn.Parameter(torch.ones(1)) 87 | self.output_var_p = nn.Parameter(torch.ones(1)) 88 | self.lengthscale_p = nn.Parameter(torch.ones(1)) 89 | self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 90 | self.init_weights(self.Qx) 91 | self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 92 | self.init_weights(self.Vx) 93 | self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 94 | self.init_weights(self.Kx) 95 | 96 | self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 97 | self.init_weights(self.Qp) 98 | self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 99 | self.init_weights(self.Vp) 100 | self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 101 | self.init_weights(self.Kp) 102 | 103 | else: 104 | if self.opt['attention_type'] == "exp_kernel": 105 | self.output_var = nn.Parameter(torch.ones(1)) 106 | self.lengthscale = nn.Parameter(torch.ones(1)) 107 | 108 | self.Q = nn.Linear(in_features, self.attention_dim) 109 | self.init_weights(self.Q) 110 | 111 | self.V = nn.Linear(in_features, self.attention_dim) 112 | self.init_weights(self.V) 113 | 114 | self.K = nn.Linear(in_features, self.attention_dim) 115 | self.init_weights(self.K) 116 | 117 | self.activation = nn.Sigmoid() # nn.LeakyReLU(self.alpha) 118 | 119 | self.Wout = nn.Linear(self.d_k, in_features) 120 | self.init_weights(self.Wout) 121 | 122 | def init_weights(self, m): 123 | if type(m) == nn.Linear: 124 | # nn.init.xavier_uniform_(m.weight, gain=1.414) 125 | # m.bias.data.fill_(0.01) 126 | nn.init.constant_(m.weight, 1e-5) 127 | 128 | def forward(self, x, edge): 129 | """ 130 | x might be [features, augmentation, positional encoding, labels] 131 | """ 132 | # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 133 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 134 | label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim'] 135 | p = x[:, self.opt['feat_hidden_dim']: label_index] 136 | x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1) 137 | 138 | qx = self.Qx(x) 139 | kx = self.Kx(x) 140 | vx = self.Vx(x) 141 | # perform linear operation and split into h heads 142 | kx = kx.view(-1, self.h, self.d_k) 143 | qx = qx.view(-1, self.h, self.d_k) 144 | vx = vx.view(-1, self.h, self.d_k) 145 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 146 | kx = kx.transpose(1, 2) 147 | qx = qx.transpose(1, 2) 148 | vx = vx.transpose(1, 2) 149 | src_x = qx[edge[0, :], :, :] 150 | dst_x = kx[edge[1, :], :, :] 151 | 152 | qp = self.Qp(p) 153 | kp = self.Kp(p) 154 | vp = self.Vp(p) 155 | # perform linear operation and split into h heads 156 | kp = kp.view(-1, self.h, self.d_k) 157 | qp = qp.view(-1, self.h, self.d_k) 158 | vp = vp.view(-1, self.h, self.d_k) 159 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 160 | kp = kp.transpose(1, 2) 161 | qp = qp.transpose(1, 2) 162 | vp = vp.transpose(1, 2) 163 | src_p = qp[edge[0, :], :, :] 164 | dst_p = kp[edge[1, :], :, :] 165 | 166 | prods = self.output_var_x ** 2 * torch.exp( 167 | -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \ 168 | * self.output_var_p ** 2 * torch.exp( 169 | -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2)) 170 | 171 | v = None 172 | 173 | else: 174 | q = self.Q(x) 175 | k = self.K(x) 176 | v = self.V(x) 177 | 178 | # perform linear operation and split into h heads 179 | 180 | k = k.view(-1, self.h, self.d_k) 181 | q = q.view(-1, self.h, self.d_k) 182 | v = v.view(-1, self.h, self.d_k) 183 | 184 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 185 | 186 | k = k.transpose(1, 2) 187 | q = q.transpose(1, 2) 188 | v = v.transpose(1, 2) 189 | 190 | src = q[edge[0, :], :, :] 191 | dst_k = k[edge[1, :], :, :] 192 | 193 | if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 194 | prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2))) 195 | elif self.opt['attention_type'] == "scaled_dot": 196 | prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k) 197 | elif self.opt['attention_type'] == "cosine_sim": 198 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 199 | prods = cos(src, dst_k) 200 | elif self.opt['attention_type'] == "pearson": 201 | src_mu = torch.mean(src, dim=1, keepdim=True) 202 | dst_mu = torch.mean(dst_k, dim=1, keepdim=True) 203 | src = src - src_mu 204 | dst_k = dst_k - dst_mu 205 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 206 | prods = cos(src, dst_k) 207 | 208 | if self.opt['reweight_attention'] and self.edge_weights is not None: 209 | prods = prods * self.edge_weights.unsqueeze(dim=1) 210 | if self.opt['square_plus']: 211 | attention = squareplus(prods, edge[self.opt['attention_norm_idx']]) 212 | else: 213 | attention = softmax(prods, edge[self.opt['attention_norm_idx']]) 214 | return attention, (v, prods) 215 | 216 | def __repr__(self): 217 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 218 | 219 | 220 | if __name__ == '__main__': 221 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 222 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'heads': 2, 'K': 10, 223 | 'attention_norm_idx': 0, 'add_source': False, 224 | 'alpha_dim': 'sc', 'beta_dim': 'sc', 'max_nfe': 1000, 'mix_features': False 225 | } 226 | dataset = get_dataset(opt, '../data', False) 227 | t = 1 228 | func = ODEFuncTransformerAtt(dataset.data.num_features, 6, opt, dataset.data, device) 229 | out = func(t, dataset.data.x) 230 | -------------------------------------------------------------------------------- /src/function_transformer_convection.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.utils import softmax 4 | import torch_sparse 5 | from torch_geometric.utils.loop import add_remaining_self_loops,remove_self_loops 6 | from data import get_dataset 7 | from utils import MaxNFEException, squareplus 8 | from base_classes import ODEFunc 9 | from torch_scatter import scatter 10 | import math 11 | from torch_geometric.utils import get_laplacian 12 | import torch.nn.functional as F 13 | import numpy as np 14 | 15 | class ODEFuncTransConv(ODEFunc): 16 | 17 | def __init__(self, in_features, out_features, opt, data, device): 18 | super(ODEFuncTransConv, self).__init__(opt, data, device) 19 | 20 | if opt['self_loop_weight'] > 0: 21 | self.edge_index, self.edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 22 | fill_value=opt['self_loop_weight']) 23 | else: 24 | self.edge_index, self.edge_weight = data.edge_index, data.edge_attr 25 | 26 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 27 | 28 | self.multihead_att_layer = SpGraphTransAttentionLayer(in_features, out_features, opt, 29 | device, edge_weights=self.edge_weight).to(device) 30 | 31 | 32 | 33 | 34 | 35 | self.device = device 36 | 37 | self.edge_index, self.edge_weight = remove_self_loops(self.edge_index, self.edge_weight) 38 | 39 | self.edge_index_lap, self.edge_weight_lap = get_laplacian(self.edge_index, self.edge_weight, normalization='sym') 40 | self.edge_index_lap = self.edge_index_lap.to(device) 41 | self.edge_weight_lap = self.edge_weight_lap.to(device) 42 | 43 | self.gate = nn.Linear(2 * in_features, 1) 44 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 45 | 46 | 47 | self.lin2 = nn.Linear(in_features * 2, out_features) 48 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 49 | 50 | self.weight_mlp = nn.Parameter(torch.FloatTensor(in_features, out_features).to(device)) 51 | 52 | 53 | self.output_low, self.output_high = ( 54 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 55 | nn.Parameter(torch.FloatTensor(out_features, out_features).to(device)), 56 | ) 57 | 58 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 59 | 60 | self.weight_mlp.data.uniform_(-stdv, stdv) 61 | 62 | self.output_low.data.uniform_(-stdv, stdv) 63 | self.output_high.data.uniform_(-stdv, stdv) 64 | 65 | 66 | self.bn_in_1 = torch.nn.BatchNorm1d(opt['hidden_dim']) 67 | self.bn_in_2 = torch.nn.BatchNorm1d(opt['hidden_dim']) 68 | 69 | self.lamda1 = nn.Parameter(torch.tensor(0.0),requires_grad=True) 70 | 71 | def multiply_attention(self, x, attention, v=None): 72 | # todo would be nice if this was more efficient 73 | if self.opt['mix_features']: 74 | vx = torch.mean(torch.stack( 75 | [torch_sparse.spmm(self.edge_index, attention[:, idx], v.shape[0], v.shape[0], v[:, :, idx]) for idx in 76 | range(self.opt['heads'])], dim=0), 77 | dim=0) 78 | ax = self.multihead_att_layer.Wout(vx) 79 | else: 80 | mean_attention = attention.mean(dim=1) 81 | ax = torch_sparse.spmm(self.edge_index, mean_attention, x.shape[0], x.shape[0], x) 82 | return ax 83 | 84 | def forward(self, t, x): # t is needed when called by the integrator 85 | 86 | if self.nfe > self.opt["max_nfe"]: 87 | raise MaxNFEException 88 | 89 | self.nfe += 1 90 | 91 | attention, wx = self.multihead_att_layer(x, self.edge_index) 92 | ax2 = self.multiply_attention(x, attention, wx) 93 | # todo would be nice if this was more efficient 94 | 95 | 96 | 97 | src = x[self.edge_index[0, :], :] 98 | dst_k = x[self.edge_index[1, :], :] 99 | 100 | 101 | x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 102 | 103 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 104 | 105 | 106 | ax = self.lamda1 * torch.mm(ax3, self.output_high) +torch.mm(ax2, self.output_low) 107 | 108 | 109 | ax = torch.cat([x, ax], dim=1) 110 | ax = F.relu(self.lin2(ax)) 111 | 112 | if not self.opt['no_alpha_sigmoid']: 113 | alpha = torch.sigmoid(self.alpha_train) 114 | else: 115 | alpha = self.alpha_train 116 | 117 | f = alpha * (ax - x) 118 | if self.opt['add_source']: 119 | f = f + self.beta_train * self.x0 120 | return f 121 | 122 | def __repr__(self): 123 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 124 | 125 | 126 | class SpGraphTransAttentionLayer(nn.Module): 127 | """ 128 | Sparse version GAT layer, similar to https://arxiv.org/abs/1710.10903 129 | """ 130 | 131 | def __init__(self, in_features, out_features, opt, device, concat=True, edge_weights=None): 132 | super(SpGraphTransAttentionLayer, self).__init__() 133 | self.in_features = in_features 134 | self.out_features = out_features 135 | self.alpha = opt['leaky_relu_slope'] 136 | self.concat = concat 137 | self.device = device 138 | self.opt = opt 139 | self.h = int(opt['heads']) 140 | self.edge_weights = edge_weights 141 | 142 | try: 143 | self.attention_dim = opt['attention_dim'] 144 | except KeyError: 145 | self.attention_dim = out_features 146 | 147 | assert self.attention_dim % self.h == 0, "Number of heads ({}) must be a factor of the dimension size ({})".format( 148 | self.h, self.attention_dim) 149 | self.d_k = self.attention_dim // self.h 150 | 151 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 152 | self.output_var_x = nn.Parameter(torch.ones(1)) 153 | self.lengthscale_x = nn.Parameter(torch.ones(1)) 154 | self.output_var_p = nn.Parameter(torch.ones(1)) 155 | self.lengthscale_p = nn.Parameter(torch.ones(1)) 156 | self.Qx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 157 | self.init_weights(self.Qx) 158 | self.Vx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 159 | self.init_weights(self.Vx) 160 | self.Kx = nn.Linear(opt['hidden_dim']-opt['pos_enc_hidden_dim'], self.attention_dim) 161 | self.init_weights(self.Kx) 162 | 163 | self.Qp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 164 | self.init_weights(self.Qp) 165 | self.Vp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 166 | self.init_weights(self.Vp) 167 | self.Kp = nn.Linear(opt['pos_enc_hidden_dim'], self.attention_dim) 168 | self.init_weights(self.Kp) 169 | 170 | else: 171 | if self.opt['attention_type'] == "exp_kernel": 172 | self.output_var = nn.Parameter(torch.ones(1)) 173 | self.lengthscale = nn.Parameter(torch.ones(1)) 174 | 175 | self.Q = nn.Linear(in_features, self.attention_dim) 176 | self.init_weights(self.Q) 177 | 178 | self.V = nn.Linear(in_features, self.attention_dim) 179 | self.init_weights(self.V) 180 | 181 | self.K = nn.Linear(in_features, self.attention_dim) 182 | self.init_weights(self.K) 183 | 184 | self.activation = nn.Sigmoid() # nn.LeakyReLU(self.alpha) 185 | 186 | self.Wout = nn.Linear(self.d_k, in_features) 187 | self.init_weights(self.Wout) 188 | 189 | def init_weights(self, m): 190 | if type(m) == nn.Linear: 191 | # nn.init.xavier_uniform_(m.weight, gain=1.414) 192 | # m.bias.data.fill_(0.01) 193 | nn.init.constant_(m.weight, 1e-5) 194 | 195 | def forward(self, x, edge): 196 | """ 197 | x might be [features, augmentation, positional encoding, labels] 198 | """ 199 | # if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 200 | if self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 201 | label_index = self.opt['feat_hidden_dim'] + self.opt['pos_enc_hidden_dim'] 202 | p = x[:, self.opt['feat_hidden_dim']: label_index] 203 | x = torch.cat((x[:, :self.opt['feat_hidden_dim']], x[:, label_index:]), dim=1) 204 | 205 | qx = self.Qx(x) 206 | kx = self.Kx(x) 207 | vx = self.Vx(x) 208 | # perform linear operation and split into h heads 209 | kx = kx.view(-1, self.h, self.d_k) 210 | qx = qx.view(-1, self.h, self.d_k) 211 | vx = vx.view(-1, self.h, self.d_k) 212 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 213 | kx = kx.transpose(1, 2) 214 | qx = qx.transpose(1, 2) 215 | vx = vx.transpose(1, 2) 216 | src_x = qx[edge[0, :], :, :] 217 | dst_x = kx[edge[1, :], :, :] 218 | 219 | qp = self.Qp(p) 220 | kp = self.Kp(p) 221 | vp = self.Vp(p) 222 | # perform linear operation and split into h heads 223 | kp = kp.view(-1, self.h, self.d_k) 224 | qp = qp.view(-1, self.h, self.d_k) 225 | vp = vp.view(-1, self.h, self.d_k) 226 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 227 | kp = kp.transpose(1, 2) 228 | qp = qp.transpose(1, 2) 229 | vp = vp.transpose(1, 2) 230 | src_p = qp[edge[0, :], :, :] 231 | dst_p = kp[edge[1, :], :, :] 232 | 233 | prods = self.output_var_x ** 2 * torch.exp( 234 | -torch.sum((src_x - dst_x) ** 2, dim=1) / (2 * self.lengthscale_x ** 2)) \ 235 | * self.output_var_p ** 2 * torch.exp( 236 | -torch.sum((src_p - dst_p) ** 2, dim=1) / (2 * self.lengthscale_p ** 2)) 237 | 238 | v = None 239 | 240 | else: 241 | q = self.Q(x) 242 | k = self.K(x) 243 | v = self.V(x) 244 | 245 | # perform linear operation and split into h heads 246 | 247 | k = k.view(-1, self.h, self.d_k) 248 | q = q.view(-1, self.h, self.d_k) 249 | v = v.view(-1, self.h, self.d_k) 250 | 251 | # transpose to get dimensions [n_nodes, attention_dim, n_heads] 252 | 253 | k = k.transpose(1, 2) 254 | q = q.transpose(1, 2) 255 | v = v.transpose(1, 2) 256 | 257 | src = q[edge[0, :], :, :] 258 | dst_k = k[edge[1, :], :, :] 259 | 260 | if not self.opt['beltrami'] and self.opt['attention_type'] == "exp_kernel": 261 | prods = self.output_var ** 2 * torch.exp(-(torch.sum((src - dst_k) ** 2, dim=1) / (2 * self.lengthscale ** 2))) 262 | elif self.opt['attention_type'] == "scaled_dot": 263 | prods = torch.sum(src * dst_k, dim=1) / np.sqrt(self.d_k) 264 | elif self.opt['attention_type'] == "cosine_sim": 265 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 266 | prods = cos(src, dst_k) 267 | elif self.opt['attention_type'] == "pearson": 268 | src_mu = torch.mean(src, dim=1, keepdim=True) 269 | dst_mu = torch.mean(dst_k, dim=1, keepdim=True) 270 | src = src - src_mu 271 | dst_k = dst_k - dst_mu 272 | cos = torch.nn.CosineSimilarity(dim=1, eps=1e-5) 273 | prods = cos(src, dst_k) 274 | 275 | if self.opt['reweight_attention'] and self.edge_weights is not None: 276 | prods = prods * self.edge_weights.unsqueeze(dim=1) 277 | if self.opt['square_plus']: 278 | attention = squareplus(prods, edge[self.opt['attention_norm_idx']]) 279 | else: 280 | attention = softmax(prods, edge[self.opt['attention_norm_idx']]) 281 | return attention, (v, prods) 282 | 283 | def __repr__(self): 284 | return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')' 285 | 286 | 287 | 288 | if __name__ == '__main__': 289 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 290 | opt = {'dataset': 'Cora', 'self_loop_weight': 1, 'leaky_relu_slope': 0.2, 'beta_dim': 'vc', 'heads': 2, 'K': 10, 'attention_norm_idx': 0, 291 | 'add_source':False, 'alpha_dim': 'sc', 'beta_dim': 'vc', 'max_nfe':1000, 'mix_features': False} 292 | dataset = get_dataset(opt, '../data', False) 293 | t = 1 294 | func = ODEFuncAtt(dataset.data.num_features, 6, opt, dataset.data, device) 295 | out = func(t, dataset.data.x) 296 | -------------------------------------------------------------------------------- /src/graphcon_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | from torch_geometric.nn import GCNConv, GATConv 6 | from torch_scatter import scatter 7 | 8 | def batch_jacobian(func, x, create_graph=False): 9 | # x in shape (Batch, Length) 10 | def _func_sum(x): 11 | return func(x).sum(dim=0) 12 | 13 | return torch.autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1, 2, 0) 14 | 15 | 16 | 17 | 18 | class attention_H(nn.Module): 19 | """"replace this module by a aggregation function """ 20 | 21 | def __init__(self, size_in, edge_index): 22 | super().__init__() 23 | self.dim = size_in 24 | 25 | self.layer1 =GCNConv(size_in*2, size_in*2, normalize=True) 26 | self.edge_index = edge_index 27 | self.layer2 =GCNConv(size_in*2,size_in, normalize=True) 28 | 29 | self.layer3 = GCNConv(size_in , 1, normalize=True) 30 | def forward(self, x): 31 | 32 | out = self.layer1(x,self.edge_index) 33 | out = torch.tanh(out) 34 | out = self.layer2(out,self.edge_index) 35 | out = torch.tanh(out) 36 | out = self.layer3(out, self.edge_index) 37 | return out 38 | 39 | class HAMCON_GCN(nn.Module): 40 | def __init__(self, nfeat, nhid, nclass, dropout, nlayers,data,device, dt=1., alpha=1., gamma=1., res_version=1,): 41 | super(HAMCON_GCN, self).__init__() 42 | self.dropout = dropout 43 | self.nhid = nhid 44 | self.nlayers = nlayers 45 | self.enc = nn.Linear(nfeat,nhid) 46 | self.conv = GCNConv(nhid, nhid) 47 | self.dec = nn.Linear(nhid,nclass) 48 | self.res = nn.Linear(nhid,nhid) 49 | if(res_version==1): 50 | self.residual = self.res_connection_v1 51 | else: 52 | self.residual = self.res_connection_v2 53 | self.dt = dt 54 | self.act_fn = nn.ReLU() 55 | self.alpha = alpha 56 | self.gamma = gamma 57 | self.reset_params() 58 | self.in_features = nhid 59 | 60 | self.edge_index = data.edge_index.to(device) 61 | self.H = attention_H(self.in_features, self.edge_index) 62 | def reset_params(self): 63 | for name, param in self.named_parameters(): 64 | if 'weight' in name and 'emb' not in name and 'out' not in name: 65 | stdv = 1. / math.sqrt(self.nhid) 66 | param.data.uniform_(-stdv, stdv) 67 | 68 | def res_connection_v1(self, X): 69 | res = - self.res(self.conv.lin(X)) 70 | return res 71 | 72 | def res_connection_v2(self, X): 73 | res = - self.conv.lin(X) + self.res(X) 74 | return res 75 | 76 | def forward(self, data): 77 | input = data.x 78 | edge_index = data.edge_index 79 | input = F.dropout(input, self.dropout, training=self.training) 80 | Y = self.act_fn(self.enc(input)) 81 | X = Y 82 | Y = F.dropout(Y, self.dropout, training=self.training) 83 | X = F.dropout(X, self.dropout, training=self.training) 84 | 85 | for i in range(self.nlayers): 86 | x_full = torch.hstack([X, Y]) 87 | f_full = batch_jacobian(lambda xx: self.H(xx), x_full, create_graph=True).squeeze() 88 | dx = f_full[..., self.in_features:] 89 | dv = -1 * f_full[..., 0:self.in_features] 90 | 91 | # Y = Y + self.dt*( dv- self.alpha*Y - self.gamma*X) 92 | Y = Y + self.dt * (dv) ###v1 93 | # Y = Y + self.dt * (dv - self.alpha * Y ) ##v2 94 | X = X + self.dt*dx 95 | Y = F.dropout(Y, self.dropout, training=self.training) 96 | X = F.dropout(X, self.dropout, training=self.training) 97 | 98 | X = self.dec(X) 99 | 100 | return X 101 | 102 | 103 | class GraphCON_GCN(nn.Module): 104 | def __init__(self, nfeat, nhid, nclass, dropout, nlayers, dt=1., alpha=1., gamma=1., res_version=1): 105 | super(GraphCON_GCN, self).__init__() 106 | self.dropout = dropout 107 | self.nhid = nhid 108 | self.nlayers = nlayers 109 | self.enc = nn.Linear(nfeat,nhid) 110 | self.conv = GCNConv(nhid, nhid) 111 | self.dec = nn.Linear(nhid,nclass) 112 | self.res = nn.Linear(nhid,nhid) 113 | if(res_version==1): 114 | self.residual = self.res_connection_v1 115 | else: 116 | self.residual = self.res_connection_v2 117 | self.dt = dt 118 | self.act_fn = nn.ReLU() 119 | self.alpha = alpha 120 | self.gamma = gamma 121 | self.reset_params() 122 | 123 | def reset_params(self): 124 | for name, param in self.named_parameters(): 125 | if 'weight' in name and 'emb' not in name and 'out' not in name: 126 | stdv = 1. / math.sqrt(self.nhid) 127 | param.data.uniform_(-stdv, stdv) 128 | 129 | def res_connection_v1(self, X): 130 | res = - self.res(self.conv.lin(X)) 131 | return res 132 | 133 | def res_connection_v2(self, X): 134 | res = - self.conv.lin(X) + self.res(X) 135 | return res 136 | 137 | def forward(self, data): 138 | input = data.x 139 | edge_index = data.edge_index 140 | input = F.dropout(input, self.dropout, training=self.training) 141 | Y = self.act_fn(self.enc(input)) 142 | X = Y 143 | Y = F.dropout(Y, self.dropout, training=self.training) 144 | X = F.dropout(X, self.dropout, training=self.training) 145 | 146 | for i in range(self.nlayers): 147 | Y = Y + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*Y - self.gamma*X) 148 | X = X + self.dt*Y 149 | Y = F.dropout(Y, self.dropout, training=self.training) 150 | X = F.dropout(X, self.dropout, training=self.training) 151 | 152 | X = self.dec(X) 153 | 154 | return X 155 | 156 | class GraphCON_GAT(nn.Module): 157 | def __init__(self, nfeat, nhid, nclass, nlayers, dropout, dt=1., alpha=1., gamma=1., nheads=4): 158 | super(GraphCON_GAT, self).__init__() 159 | self.alpha = alpha 160 | self.gamma = gamma 161 | self.dropout = dropout 162 | self.nheads = nheads 163 | self.nhid = nhid 164 | self.nlayers = nlayers 165 | self.act_fn = nn.ReLU() 166 | self.res = nn.Linear(nhid, nheads * nhid) 167 | self.enc = nn.Linear(nfeat,nhid) 168 | self.conv = GATConv(nhid, nhid, heads=nheads) 169 | self.dec = nn.Linear(nhid,nclass) 170 | self.dt = dt 171 | 172 | def res_connection(self, X): 173 | res = self.res(X) 174 | return res 175 | 176 | def forward(self, data): 177 | input = data.x 178 | n_nodes = input.size(0) 179 | edge_index = data.edge_index 180 | input = F.dropout(input, self.dropout, training=self.training) 181 | Y = self.act_fn(self.enc(input)) 182 | X = Y 183 | Y = F.dropout(Y, self.dropout, training=self.training) 184 | X = F.dropout(X, self.dropout, training=self.training) 185 | 186 | for i in range(self.nlayers): 187 | Y = Y + self.dt*(F.elu(self.conv(X, edge_index) + self.res_connection(X)).view(n_nodes, -1, self.nheads).mean(dim=-1) - self.alpha*Y - self.gamma*X) 188 | X = X + self.dt*Y 189 | Y = F.dropout(Y, self.dropout, training=self.training) 190 | X = F.dropout(X, self.dropout, training=self.training) 191 | 192 | X = self.dec(X) 193 | 194 | return X 195 | 196 | 197 | class GraphCON_GCN_conv(nn.Module): 198 | def __init__(self, nfeat, nhid, nclass, dropout, nlayers,graph_size, dt=1., alpha=1., gamma=1., res_version=1): 199 | super(GraphCON_GCN_conv, self).__init__() 200 | self.dropout = dropout 201 | self.nhid = nhid 202 | self.nlayers = nlayers 203 | self.enc = nn.Linear(nfeat,nhid) 204 | self.conv = GCNConv(nhid, nhid) 205 | self.dec = nn.Linear(nhid,nclass) 206 | self.res = nn.Linear(nhid,nhid) 207 | if(res_version==1): 208 | self.residual = self.res_connection_v1 209 | else: 210 | self.residual = self.res_connection_v2 211 | self.dt = dt 212 | self.act_fn = nn.ReLU() 213 | self.alpha = alpha 214 | self.gamma = gamma 215 | self.reset_params() 216 | self.lamda1 = nn.Parameter(torch.tensor(1.0),requires_grad=True) 217 | 218 | self.gate = nn.Linear(2 * nhid, 1) 219 | nn.init.xavier_normal_(self.gate.weight, gain=1.414) 220 | 221 | self.lin1 = nn.Linear(nhid, nhid) 222 | nn.init.xavier_normal_(self.lin1.weight, gain=1.414) 223 | 224 | self.lin2 = nn.Linear(nhid * 2, nhid) 225 | nn.init.xavier_normal_(self.lin2.weight, gain=1.414) 226 | 227 | self.weight_low, self.weight_high, self.weight_mlp = ( 228 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 229 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 230 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 231 | ) 232 | 233 | self.output_low, self.output_high, self.output_mlp = ( 234 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 235 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 236 | nn.Parameter(torch.FloatTensor(nhid, nhid)), 237 | ) 238 | 239 | stdv = 1.0 / math.sqrt(self.weight_mlp.size(1)) 240 | 241 | self.weight_low.data.uniform_(-stdv, stdv) 242 | self.weight_high.data.uniform_(-stdv, stdv) 243 | self.weight_mlp.data.uniform_(-stdv, stdv) 244 | 245 | self.output_low.data.uniform_(-stdv, stdv) 246 | self.output_high.data.uniform_(-stdv, stdv) 247 | self.output_mlp.data.uniform_(-stdv, stdv) 248 | self.lamda = nn.ParameterList() 249 | for i in range(self.nlayers): 250 | self.lamda .append(nn.Parameter(torch.zeros((self.nhid, 1)),requires_grad=True)) 251 | self.graph_size = graph_size 252 | 253 | def reset_params(self): 254 | for name, param in self.named_parameters(): 255 | if 'weight' in name and 'emb' not in name and 'out' not in name: 256 | stdv = 1. / math.sqrt(self.nhid) 257 | param.data.uniform_(-stdv, stdv) 258 | 259 | def res_connection_v1(self, X): 260 | res = - self.res(self.conv.lin(X)) 261 | return res 262 | 263 | def res_connection_v2(self, X): 264 | res = - self.conv.lin(X) + self.res(X) 265 | return res 266 | 267 | def forward(self, data): 268 | input = data.x 269 | edge_index = data.edge_index 270 | self.edge_index = data.edge_index 271 | input = F.dropout(input, self.dropout, training=self.training) 272 | Y = self.act_fn(self.enc(input)) 273 | X = Y 274 | Y = F.dropout(Y, self.dropout, training=self.training) 275 | X = F.dropout(X, self.dropout, training=self.training) 276 | 277 | 278 | 279 | for i in range(self.nlayers): 280 | coeff_lamda = (torch.tanh(self.lamda[i])).T 281 | coeff_lamda = coeff_lamda.tile(self.graph_size, 1) 282 | 283 | # src = X[self.edge_index[0, :], :] 284 | # dst_k = X[self.edge_index[1, :], :] 285 | # x_new = F.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 286 | # ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 287 | 288 | src = X[self.edge_index[0, :], :] 289 | dst_k = X[self.edge_index[1, :], :] 290 | x_new = torch.relu(torch.mm(src - dst_k, self.weight_mlp)) * dst_k 291 | ax3 = scatter(x_new, self.edge_index[1, :].T, dim=0, reduce="sum") 292 | 293 | 294 | Y = Y + self.dt*(self.act_fn(self.conv(X,edge_index) + self.residual(X)) - self.alpha*Y - self.gamma*X ) 295 | X = X + self.dt* (Y + self.lamda1 * ax3) 296 | Y = F.dropout(Y, self.dropout, training=self.training) 297 | X = F.dropout(X, self.dropout, training=self.training) 298 | 299 | X = self.dec(X) 300 | 301 | return X 302 | -------------------------------------------------------------------------------- /src/model_configurations.py: -------------------------------------------------------------------------------- 1 | from function_transformer_attention import ODEFuncTransformerAtt 2 | from function_GAT_attention import ODEFuncAtt 3 | from function_laplacian_diffusion import LaplacianODEFunc 4 | from block_transformer_attention import AttODEblock 5 | from block_constant import ConstantODEblock 6 | 7 | from function_beltrami_trans import ODEFuncBektramiAtt 8 | 9 | from function_beltrami_fa import ODEFuncBelFA 10 | 11 | from function_laplacian_convection import ODEFuncLapCONV 12 | from function_beltrami_convection import ODEFuncBeltramiCONV 13 | 14 | from function_GAT_convection import ODEFuncAttConv 15 | from function_beltrami_gat import ODEFuncBeltramiGAT 16 | 17 | from function_transformer_convection import ODEFuncTransConv 18 | 19 | from function_beltramitrans_convection import ODEFuncBeltramiTRANSCONV 20 | 21 | 22 | class BlockNotDefined(Exception): 23 | pass 24 | 25 | class FunctionNotDefined(Exception): 26 | pass 27 | 28 | 29 | def set_block(opt): 30 | ode_str = opt['block'] 31 | if ode_str == 'attention': 32 | block = AttODEblock 33 | 34 | 35 | elif ode_str == 'constant': 36 | block = ConstantODEblock 37 | 38 | else: 39 | raise BlockNotDefined 40 | return block 41 | 42 | 43 | def set_function(opt): 44 | ode_str = opt['function'] 45 | if ode_str == 'laplacian': 46 | f = LaplacianODEFunc 47 | elif ode_str == 'GAT': 48 | f = ODEFuncAtt 49 | elif ode_str == 'transformer': 50 | f = ODEFuncTransformerAtt 51 | elif ode_str == 'beltrami': 52 | f = ODEFuncBektramiAtt 53 | 54 | 55 | elif ode_str == 'lapconv': 56 | f = ODEFuncLapCONV 57 | elif ode_str == 'belconv': 58 | f = ODEFuncBeltramiCONV 59 | 60 | elif ode_str == 'gatconv': 61 | f = ODEFuncAttConv 62 | elif ode_str == 'belgat': 63 | f = ODEFuncBeltramiGAT 64 | elif ode_str == 'transconv': 65 | f = ODEFuncTransConv 66 | elif ode_str == 'beltransconv': 67 | f = ODEFuncBeltramiTRANSCONV 68 | 69 | 70 | 71 | else: 72 | raise FunctionNotDefined 73 | return f 74 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | utility functions 3 | """ 4 | import os 5 | 6 | import scipy 7 | from scipy.stats import sem 8 | import numpy as np 9 | from torch_scatter import scatter_add 10 | from torch_geometric.utils import add_remaining_self_loops 11 | from torch_geometric.utils.num_nodes import maybe_num_nodes 12 | from torch_geometric.utils.convert import to_scipy_sparse_matrix 13 | from sklearn.preprocessing import normalize 14 | from torch_geometric.nn.conv.gcn_conv import gcn_norm 15 | 16 | ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 17 | 18 | class MaxNFEException(Exception): pass 19 | 20 | 21 | def rms_norm(tensor): 22 | return tensor.pow(2).mean().sqrt() 23 | 24 | 25 | def make_norm(state): 26 | if isinstance(state, tuple): 27 | state = state[0] 28 | state_size = state.numel() 29 | 30 | def norm(aug_state): 31 | y = aug_state[1:1 + state_size] 32 | adj_y = aug_state[1 + state_size:1 + 2 * state_size] 33 | return max(rms_norm(y), rms_norm(adj_y)) 34 | 35 | return norm 36 | 37 | 38 | def print_model_params(model): 39 | total_num_params = 0 40 | print(model) 41 | for name, param in model.named_parameters(): 42 | if param.requires_grad: 43 | print(name) 44 | print(param.data.shape) 45 | total_num_params += param.numel() 46 | print("Model has a total of {} params".format(total_num_params)) 47 | 48 | 49 | def adjust_learning_rate(optimizer, lr, epoch, burnin=50): 50 | if epoch <= burnin: 51 | for param_group in optimizer.param_groups: 52 | param_group["lr"] = lr * epoch / burnin 53 | 54 | 55 | def gcn_norm_fill_val(edge_index, edge_weight=None, fill_value=0., num_nodes=None, dtype=None): 56 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 57 | 58 | if edge_weight is None: 59 | edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, 60 | device=edge_index.device) 61 | 62 | if not int(fill_value) == 0: 63 | edge_index, tmp_edge_weight = add_remaining_self_loops( 64 | edge_index, edge_weight, fill_value, num_nodes) 65 | assert tmp_edge_weight is not None 66 | edge_weight = tmp_edge_weight 67 | 68 | row, col = edge_index[0], edge_index[1] 69 | deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes) 70 | deg_inv_sqrt = deg.pow_(-0.5) 71 | deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0) 72 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 73 | 74 | 75 | def coo2tensor(coo, device=None): 76 | indices = np.vstack((coo.row, coo.col)) 77 | i = torch.LongTensor(indices) 78 | values = coo.data 79 | v = torch.FloatTensor(values) 80 | shape = coo.shape 81 | print('adjacency matrix generated with shape {}'.format(shape)) 82 | # test 83 | return torch.sparse.FloatTensor(i, v, torch.Size(shape)).to(device) 84 | 85 | 86 | def get_sym_adj(data, opt, improved=False): 87 | edge_index, edge_weight = gcn_norm( # yapf: disable 88 | data.edge_index, data.edge_attr, data.num_nodes, 89 | improved, opt['self_loop_weight'] > 0, dtype=data.x.dtype) 90 | coo = to_scipy_sparse_matrix(edge_index, edge_weight) 91 | return coo2tensor(coo) 92 | 93 | 94 | def get_rw_adj_old(data, opt): 95 | if opt['self_loop_weight'] > 0: 96 | edge_index, edge_weight = add_remaining_self_loops(data.edge_index, data.edge_attr, 97 | fill_value=opt['self_loop_weight']) 98 | else: 99 | edge_index, edge_weight = data.edge_index, data.edge_attr 100 | coo = to_scipy_sparse_matrix(edge_index, edge_weight) 101 | normed_csc = normalize(coo, norm='l1', axis=0) 102 | return coo2tensor(normed_csc.tocoo()) 103 | 104 | 105 | def get_rw_adj(edge_index, edge_weight=None, norm_dim=1, fill_value=0., num_nodes=None, dtype=None): 106 | num_nodes = maybe_num_nodes(edge_index, num_nodes) 107 | 108 | if edge_weight is None: 109 | edge_weight = torch.ones((edge_index.size(1),), dtype=dtype, 110 | device=edge_index.device) 111 | 112 | if not fill_value == 0: 113 | edge_index, tmp_edge_weight = add_remaining_self_loops( 114 | edge_index, edge_weight, fill_value, num_nodes) 115 | assert tmp_edge_weight is not None 116 | edge_weight = tmp_edge_weight 117 | 118 | row, col = edge_index[0], edge_index[1] 119 | indices = row if norm_dim == 0 else col 120 | deg = scatter_add(edge_weight, indices, dim=0, dim_size=num_nodes) 121 | deg_inv_sqrt = deg.pow_(-1) 122 | edge_weight = deg_inv_sqrt[indices] * edge_weight if norm_dim == 0 else edge_weight * deg_inv_sqrt[indices] 123 | return edge_index, edge_weight 124 | 125 | 126 | def mean_confidence_interval(data, confidence=0.95): 127 | """ 128 | As number of samples will be < 10 use t-test for the mean confidence intervals 129 | :param data: NDarray of metric means 130 | :param confidence: The desired confidence interval 131 | :return: Float confidence interval 132 | """ 133 | if len(data) < 2: 134 | return 0 135 | a = 1.0 * np.array(data) 136 | n = len(a) 137 | _, se = np.mean(a), scipy.stats.sem(a) 138 | h = se * scipy.stats.t.ppf((1 + confidence) / 2., n - 1) 139 | return h 140 | 141 | 142 | def sparse_dense_mul(s, d): 143 | i = s._indices() 144 | v = s._values() 145 | return torch.sparse.FloatTensor(i, v * d, s.size()) 146 | 147 | 148 | def get_sem(vec): 149 | """ 150 | wrapper around the scipy standard error metric 151 | :param vec: List of metric means 152 | :return: 153 | """ 154 | if len(vec) > 1: 155 | retval = sem(vec) 156 | else: 157 | retval = 0. 158 | return retval 159 | 160 | 161 | def get_full_adjacency(num_nodes): 162 | # what is the format of the edge index? 163 | edge_index = torch.zeros((2, num_nodes ** 2),dtype=torch.long) 164 | for idx in range(num_nodes): 165 | edge_index[0][idx * num_nodes: (idx + 1) * num_nodes] = idx 166 | edge_index[1][idx * num_nodes: (idx + 1) * num_nodes] = torch.arange(0, num_nodes,dtype=torch.long) 167 | return edge_index 168 | 169 | 170 | 171 | from typing import Optional 172 | import torch 173 | from torch import Tensor 174 | from torch_scatter import scatter, segment_csr, gather_csr 175 | 176 | 177 | # https://twitter.com/jon_barron/status/1387167648669048833?s=12 178 | # @torch.jit.script 179 | def squareplus(src: Tensor, index: Optional[Tensor], ptr: Optional[Tensor] = None, 180 | num_nodes: Optional[int] = None) -> Tensor: 181 | r"""Computes a sparsely evaluated softmax. 182 | Given a value tensor :attr:`src`, this function first groups the values 183 | along the first dimension based on the indices specified in :attr:`index`, 184 | and then proceeds to compute the softmax individually for each group. 185 | 186 | Args: 187 | src (Tensor): The source tensor. 188 | index (LongTensor): The indices of elements for applying the softmax. 189 | ptr (LongTensor, optional): If given, computes the softmax based on 190 | sorted inputs in CSR representation. (default: :obj:`None`) 191 | num_nodes (int, optional): The number of nodes, *i.e.* 192 | :obj:`max_val + 1` of :attr:`index`. (default: :obj:`None`) 193 | 194 | :rtype: :class:`Tensor` 195 | """ 196 | out = src - src.max() 197 | # out = out.exp() 198 | out = (out + torch.sqrt(out ** 2 + 4)) / 2 199 | 200 | if ptr is not None: 201 | out_sum = gather_csr(segment_csr(out, ptr, reduce='sum'), ptr) 202 | elif index is not None: 203 | N = maybe_num_nodes(index, num_nodes) 204 | out_sum = scatter(out, index, dim=0, dim_size=N, reduce='sum')[index] 205 | else: 206 | raise NotImplementedError 207 | 208 | return out / (out_sum + 1e-16) 209 | 210 | 211 | # Counter of forward and backward passes. 212 | class Meter(object): 213 | 214 | def __init__(self): 215 | self.reset() 216 | 217 | def reset(self): 218 | self.val = None 219 | self.sum = 0 220 | self.cnt = 0 221 | 222 | def update(self, val): 223 | self.val = val 224 | self.sum += val 225 | self.cnt += 1 226 | 227 | def get_average(self): 228 | if self.cnt == 0: 229 | return 0 230 | return self.sum / self.cnt 231 | 232 | def get_value(self): 233 | return self.val 234 | 235 | 236 | class DummyDataset(object): 237 | def __init__(self, data, num_classes): 238 | self.data = data 239 | self.num_classes = num_classes 240 | 241 | 242 | class DummyData(object): 243 | def __init__(self, edge_index=None, edge_Attr=None, num_nodes=None): 244 | self.edge_index = edge_index 245 | self.edge_attr = edge_Attr 246 | self.num_nodes = num_nodes 247 | --------------------------------------------------------------------------------