├── LICENSE
├── README.md
├── SL
    ├── Graph_Classification
    │   ├── ID_MLP_f.py
    │   ├── ID_MLP_s.py
    │   ├── README.md
    │   ├── configs
    │   │   └── LRGB-tuned
    │   │   │   ├── peptides-func-GCN.yaml
    │   │   │   └── peptides-struct-GCN.yaml
    │   ├── graphgps
    │   │   ├── __init__.py
    │   │   ├── act
    │   │   │   ├── __init__.py
    │   │   │   └── example.py
    │   │   ├── agg_runs.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   ├── custom_gnn_config.py
    │   │   │   ├── dataset_config.py
    │   │   │   ├── defaults_config.py
    │   │   │   ├── example.py
    │   │   │   ├── graphormer_config.py
    │   │   │   ├── gt_config.py
    │   │   │   ├── optimizers_config.py
    │   │   │   ├── posenc_config.py
    │   │   │   ├── pretrained_config.py
    │   │   │   ├── split_config.py
    │   │   │   └── wandb_config.py
    │   │   ├── encoder
    │   │   │   ├── __init__.py
    │   │   │   ├── ast_encoder.py
    │   │   │   ├── composed_encoders.py
    │   │   │   ├── dummy_edge_encoder.py
    │   │   │   ├── equivstable_laplace_pos_encoder.py
    │   │   │   ├── example.py
    │   │   │   ├── graphormer_encoder.py
    │   │   │   ├── kernel_pos_encoder.py
    │   │   │   ├── laplace_pos_encoder.py
    │   │   │   ├── linear_edge_encoder.py
    │   │   │   ├── linear_node_encoder.py
    │   │   │   ├── ppa_encoder.py
    │   │   │   ├── rwse_edge_encoder.py
    │   │   │   ├── signnet_pos_encoder.py
    │   │   │   ├── type_dict_encoder.py
    │   │   │   └── voc_superpixels_encoder.py
    │   │   ├── finetuning.py
    │   │   ├── head
    │   │   │   ├── __init__.py
    │   │   │   ├── example.py
    │   │   │   ├── graphormer_graph.py
    │   │   │   ├── inductive_edge.py
    │   │   │   ├── inductive_node.py
    │   │   │   ├── infer_links.py
    │   │   │   ├── mlp_graph.py
    │   │   │   ├── ogb_code_graph.py
    │   │   │   └── san_graph.py
    │   │   ├── layer
    │   │   │   ├── __init__.py
    │   │   │   ├── bigbird_layer.py
    │   │   │   ├── example.py
    │   │   │   ├── gatedgcn_layer.py
    │   │   │   ├── gcn_conv_layer.py
    │   │   │   ├── gine_conv_layer.py
    │   │   │   ├── gps_layer.py
    │   │   │   ├── graphormer_layer.py
    │   │   │   ├── performer_layer.py
    │   │   │   ├── san2_layer.py
    │   │   │   └── san_layer.py
    │   │   ├── loader
    │   │   │   ├── __init__.py
    │   │   │   ├── dataset
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── __pycache__
    │   │   │   │   │   ├── __init__.cpython-39.pyc
    │   │   │   │   │   ├── aqsol_molecules.cpython-39.pyc
    │   │   │   │   │   ├── coco_superpixels.cpython-39.pyc
    │   │   │   │   │   ├── malnet_tiny.cpython-39.pyc
    │   │   │   │   │   ├── peptides_functional.cpython-39.pyc
    │   │   │   │   │   ├── peptides_structural.cpython-39.pyc
    │   │   │   │   │   └── voc_superpixels.cpython-39.pyc
    │   │   │   │   ├── aqsol_molecules.py
    │   │   │   │   ├── coco_superpixels.py
    │   │   │   │   ├── malnet_tiny.py
    │   │   │   │   ├── pcqm4mv2_contact.py
    │   │   │   │   ├── peptides_functional.py
    │   │   │   │   ├── peptides_structural.py
    │   │   │   │   └── voc_superpixels.py
    │   │   │   ├── master_loader.py
    │   │   │   ├── ogbg_code2_utils.py
    │   │   │   └── split_generator.py
    │   │   ├── logger.py
    │   │   ├── loss
    │   │   │   ├── __init__.py
    │   │   │   ├── l1.py
    │   │   │   ├── multilabel_classification_loss.py
    │   │   │   ├── subtoken_prediction_loss.py
    │   │   │   └── weighted_cross_entropy.py
    │   │   ├── metric_wrapper.py
    │   │   ├── metrics_ogb.py
    │   │   ├── network
    │   │   │   ├── __init__.py
    │   │   │   ├── big_bird.py
    │   │   │   ├── custom_gnn.py
    │   │   │   ├── example.py
    │   │   │   ├── gps_model.py
    │   │   │   ├── graphormer.py
    │   │   │   ├── performer.py
    │   │   │   ├── san_transformer.py
    │   │   │   └── vq.py
    │   │   ├── optimizer
    │   │   │   ├── __init__.py
    │   │   │   └── extra_optimizers.py
    │   │   ├── pooling
    │   │   │   ├── __init__.py
    │   │   │   ├── example.py
    │   │   │   └── graph_token.py
    │   │   ├── stage
    │   │   │   ├── __init__.py
    │   │   │   └── example.py
    │   │   ├── train
    │   │   │   ├── __init__.py
    │   │   │   ├── custom_train.py
    │   │   │   ├── custom_train_bechmark.py
    │   │   │   └── example.py
    │   │   ├── transform
    │   │   │   ├── __init__.py
    │   │   │   ├── posenc_stats.py
    │   │   │   ├── task_preprocessing.py
    │   │   │   └── transforms.py
    │   │   └── utils.py
    │   └── main.py
    ├── Link_Prediction
    │   ├── ID_MLP.py
    │   ├── ID_pretrain.py
    │   ├── README.md
    │   ├── env.yaml
    │   ├── model.py
    │   ├── ogbdataset.py
    │   ├── run.sh
    │   ├── run_citeseer.sh
    │   ├── run_cora.sh
    │   ├── run_pubmed.sh
    │   ├── utils.py
    │   └── vq.py
    └── Node_Classification
    │   ├── ID_MLP.py
    │   ├── README.md
    │   ├── cora_citeseer_pubmed_analysis
    │       ├── ID_MLP.py
    │       ├── README.md
    │       ├── data_utils.py
    │       ├── dataset.py
    │       ├── dataset_large.py
    │       ├── eval.py
    │       ├── logger.py
    │       ├── main.py
    │       ├── models.py
    │       ├── parse.py
    │       ├── run.sh
    │       └── vq.py
    │   ├── data
    │       ├── amazon-computer_split.npz
    │       ├── amazon-photo_split.npz
    │       ├── coauthor-cs_split.npz
    │       └── coauthor-physics_split.npz
    │   ├── data_utils.py
    │   ├── dataset.py
    │   ├── eval.py
    │   ├── large_graph
    │       ├── ID_MLP.py
    │       ├── arxiv.sh
    │       ├── arxiv_ID_MLP.py
    │       ├── data
    │       │   └── pokec
    │       │   │   └── pokec-splits.npy
    │       ├── lg_model.py
    │       ├── lg_parse.py
    │       ├── logger_ copy.py
    │       ├── logger_.py
    │       ├── main-arxiv.py
    │       ├── main-batch.py
    │       ├── pokec.sh
    │       ├── product.sh
    │       ├── product_ID_MLP.py
    │       ├── product_pre.py
    │       ├── protein.sh
    │       ├── protein_ID_MLP.py
    │       ├── protein_pre.py
    │       └── vq.py
    │   ├── logger.py
    │   ├── main.py
    │   ├── model.py
    │   ├── parse.py
    │   ├── run.sh
    │   └── vq.py
└── SSL
    ├── DGCluster
        ├── README.md
        ├── env.yml
        ├── install.py
        ├── main.py
        ├── plots.py
        ├── plots_num_clusters.py
        ├── print_results.py
        ├── run.sh
        ├── utils.py
        └── vq.py
    ├── GraphCL
        ├── transferLearning_MoleculeNet
        │   ├── README.md
        │   ├── chem
        │   │   ├── batch.py
        │   │   ├── dataloader.py
        │   │   ├── finetune.py
        │   │   ├── finetune.sh
        │   │   ├── finetune_mutag_ptc.py
        │   │   ├── loader.py
        │   │   ├── model.py
        │   │   ├── parse_result.py
        │   │   ├── pretrain_contextpred.py
        │   │   ├── pretrain_deepgraphinfomax.py
        │   │   ├── pretrain_edgepred.py
        │   │   ├── pretrain_graphcl.py
        │   │   ├── pretrain_masking.py
        │   │   ├── pretrain_supervised.py
        │   │   ├── run.sh
        │   │   ├── splitters.py
        │   │   ├── util.py
        │   │   └── vq.py
        │   └── environment.yml
        └── unsupervised_TU
        │   ├── README.md
        │   ├── arguments.py
        │   ├── aug.py
        │   ├── cortex_DIM
        │       ├── configs
        │       │   ├── convnets.py
        │       │   └── resnets.py
        │       ├── functions
        │       │   ├── dim_losses.py
        │       │   ├── gan_losses.py
        │       │   └── misc.py
        │       └── nn_modules
        │       │   ├── convnet.py
        │       │   ├── encoder.py
        │       │   ├── mi_networks.py
        │       │   ├── misc.py
        │       │   └── resnet.py
        │   ├── deepinfomax.py
        │   ├── deepinfomax_v.py
        │   ├── evaluate_embedding.py
        │   ├── gin.py
        │   ├── go.sh
        │   ├── gsimclr.py
        │   ├── losses.py
        │   ├── model.py
        │   ├── test.py
        │   └── vq.py
    └── GraphMAE
        ├── README.md
        ├── configs.yml
        ├── graphmae
            ├── __init__.py
            ├── datasets
            │   ├── __init__.py
            │   └── data_util.py
            ├── evaluation.py
            ├── models
            │   ├── __init__.py
            │   ├── dot_gat.py
            │   ├── edcoder.py
            │   ├── gat.py
            │   ├── gcn.py
            │   ├── gin.py
            │   ├── loss_func.py
            │   └── vq.py
            └── utils.py
        ├── main_transductive.py
        └── run_transductive.sh


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Yuankai Luo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🔍 Research Series on Classic GNNs
 2 | 
 3 | | Benchmarking Series: Reassessing Classic GNNs | Paper |
 4 | | - | - |
 5 | | [Classic GNNs are Strong Baselines: Reassessing GNNs for Node Classification](https://github.com/LUOyk1999/tunedGNN) (NeurIPS 2024) | [Link](https://openreview.net/forum?id=xkljKdGe4E) |
 6 | | [Can Classic GNNs Be Strong Baselines for Graph-level Tasks?](https://github.com/LUOyk1999/GNNPlus) (ICML 2025) | [Link](https://arxiv.org/abs/2502.09263) | 
 7 | 
 8 | | Follow-up Studies | Paper |
 9 | | - | - |
10 | | [When Dropout Meets Graph Convolutional Networks](https://github.com/LUOyk1999/dropout-theory) (ICLR 2025)  | [Link](https://openreview.net/forum?id=PwxYoMvmvy) | 
11 | | **_[Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning](https://github.com/LUOyk1999/NodeID) (ICLR 2025)_** | [Link](https://openreview.net/forum?id=t9lS1lX9FQ) | 
12 | 
13 | # Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning (ICLR 2025)
14 | 
15 | [![OpenReview](https://img.shields.io/badge/OpenReview-t9lS1lX9FQ-b31b1b.svg)](https://openreview.net/forum?id=t9lS1lX9FQ) [![arXiv](https://img.shields.io/badge/arXiv-2405.16435-b31b1b.svg)](https://arxiv.org/abs/2405.16435)
16 | 
17 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/structure-aware-semantic-node-identifiers-for/node-classification-on-questions)](https://paperswithcode.com/sota/node-classification-on-questions?p=structure-aware-semantic-node-identifiers-for)
18 | 
19 | ## Python environment setup with Conda
20 | 
21 | Tested with Python 3.7, PyTorch 1.12.1, and PyTorch Geometric 2.3.1, dgl 1.0.2.
22 | ```bash
23 | pip install pandas
24 | pip install scikit_learn
25 | pip install numpy
26 | pip install scipy
27 | pip install einops
28 | pip install ogb
29 | pip install pyyaml
30 | pip install googledrivedownloader
31 | pip install networkx
32 | pip install vqtorch
33 | pip install gdown
34 | pip install tensorboardX
35 | pip install matplotlib
36 | pip install seaborn
37 | pip install rdkit
38 | pip install tensorboard
39 | ```
40 | 
41 | ## Overview
42 | 
43 | * `./SL` Experiment code of supervised Node ID.
44 | 
45 | * `./SSL` Experiment code of self-supervised Node ID.
46 | 
47 | ## Reference
48 | 
49 | If you find our codes useful, please consider citing our work
50 | 
51 | ```
52 | @inproceedings{
53 | luo2025node,
54 | title={Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning},
55 | author={Yuankai Luo and Hongkang Li and Qijiong Liu and Lei Shi and Xiao-Ming Wu},
56 | booktitle={The Thirteenth International Conference on Learning Representations},
57 | year={2025},
58 | url={https://openreview.net/forum?id=t9lS1lX9FQ}
59 | }
60 | ```
61 | 
62 | 
63 | ## Poster
64 | 
65 | ![nodeid.png](https://raw.githubusercontent.com/LUOyk1999/images/refs/heads/main/images/nodeid.png)
66 | 
67 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/README.md:
--------------------------------------------------------------------------------
 1 | ## Python environment setup with Conda
 2 | 
 3 | ```bash
 4 | conda create -n graphgps python=3.10
 5 | conda activate graphgps
 6 | 
 7 | conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia
 8 | pip install torch_geometric==2.3.0
 9 | pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu117.html
10 | 
11 | conda install openbabel fsspec rdkit -c conda-forge
12 | 
13 | pip install pytorch-lightning yacs torchmetrics
14 | pip install performer-pytorch
15 | pip install tensorboardX
16 | pip install ogb
17 | pip install wandb
18 | 
19 | conda clean --all
20 | ```
21 | 
22 | 
23 | ## Running Training
24 | ```bash
25 | conda activate graphgps
26 | python main.py --cfg configs/LRGB-tuned/peptides-struct-GCN.yaml wandb.use False
27 | python ID_MLP_s.py
28 | python main.py --cfg configs/LRGB-tuned/peptides-func-GCN.yaml wandb.use False
29 | python ID_MLP_f.py
30 | ```
31 | 
32 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/configs/LRGB-tuned/peptides-func-GCN.yaml:
--------------------------------------------------------------------------------
 1 | out_dir: results
 2 | metric_best: ap
 3 | wandb:
 4 |   use: True
 5 |   project: peptides-func
 6 | dataset:
 7 |   format: OGB
 8 |   name: peptides-functional
 9 |   task: graph
10 |   task_type: classification_multilabel
11 |   transductive: False
12 |   node_encoder: True
13 |   node_encoder_name: Atom+RWSE
14 |   node_encoder_bn: False
15 |   edge_encoder: True
16 |   edge_encoder_name: Bond
17 |   edge_encoder_bn: False
18 | posenc_LapPE:
19 |   enable: False
20 |   eigen:
21 |     laplacian_norm: none
22 |     eigvec_norm: L2
23 |     max_freqs: 10
24 |   model: DeepSet
25 |   dim_pe: 16
26 |   layers: 2
27 |   raw_norm_type: none
28 | posenc_RWSE:
29 |   enable: True
30 |   kernel:
31 |     times_func: range(1,21)
32 |   model: Linear
33 |   dim_pe: 28
34 |   raw_norm_type: BatchNorm
35 | train:
36 |   mode: custom
37 |   batch_size: 200
38 |   eval_period: 1
39 |   ckpt_period: 100
40 | model:
41 |   type: custom_gnn
42 |   loss_fun: cross_entropy
43 |   graph_pooling: mean
44 | gnn:
45 |   head: mlp_graph
46 |   layers_pre_mp: 0
47 |   layers_mp: 6
48 |   layers_post_mp: 3
49 |   dim_inner: 235
50 |   layer_type: gcnconv
51 |   act: gelu
52 |   residual: True
53 |   dropout: 0.1
54 | optim:
55 |   clip_grad_norm: True
56 |   optimizer: adamW
57 |   weight_decay: 0.0
58 |   base_lr: 0.001
59 |   max_epoch: 500
60 |   scheduler: cosine_with_warmup
61 |   num_warmup_epochs: 5


--------------------------------------------------------------------------------
/SL/Graph_Classification/configs/LRGB-tuned/peptides-struct-GCN.yaml:
--------------------------------------------------------------------------------
 1 | out_dir: results
 2 | metric_best: mae
 3 | metric_agg: argmin
 4 | wandb:
 5 |   use: True
 6 |   project: peptides-struct
 7 | dataset:
 8 |   format: OGB
 9 |   name: peptides-structural
10 |   task: graph
11 |   task_type: regression
12 |   transductive: False
13 |   node_encoder: True
14 |   node_encoder_name: Atom+LapPE
15 |   node_encoder_bn: False
16 |   edge_encoder: True
17 |   edge_encoder_name: Bond
18 |   edge_encoder_bn: False
19 | posenc_LapPE:
20 |   enable: True
21 |   eigen:
22 |     laplacian_norm: none
23 |     eigvec_norm: L2
24 |     max_freqs: 10
25 |   model: DeepSet
26 |   dim_pe: 16
27 |   layers: 2
28 |   raw_norm_type: none
29 | posenc_RWSE:
30 |   enable: False
31 |   kernel:
32 |     times_func: range(1,21)
33 |   model: Linear
34 |   dim_pe: 28
35 |   raw_norm_type: BatchNorm
36 | train:
37 |   mode: custom
38 |   batch_size: 200
39 |   eval_period: 1
40 |   ckpt_period: 100
41 | model:
42 |   type: custom_gnn
43 |   loss_fun: l1
44 |   graph_pooling: mean
45 | gnn:
46 |   head: mlp_graph
47 |   layers_pre_mp: 0
48 |   layers_mp: 6
49 |   layers_post_mp: 3
50 |   dim_inner: 235
51 |   layer_type: gcnconv
52 |   act: gelu
53 |   residual: True
54 |   dropout: 0.1
55 | optim:
56 |   clip_grad_norm: True
57 |   optimizer: adamW
58 |   weight_decay: 0.0
59 |   base_lr: 0.001
60 |   max_epoch: 250
61 |   scheduler: cosine_with_warmup
62 |   num_warmup_epochs: 5


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/__init__.py:
--------------------------------------------------------------------------------
 1 | from .act import * # noqa
 2 | from .config import * # noqa
 3 | from .encoder import * # noqa
 4 | from .head import * # noqa
 5 | from .layer import * # noqa
 6 | from .loader import * # noqa
 7 | from .loss import * # noqa
 8 | from .network import * # noqa
 9 | from .optimizer import * # noqa
10 | from .pooling import * # noqa
11 | from .stage import * # noqa
12 | from .train import * # noqa
13 | from .transform import * # noqa
14 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/act/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/act/example.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch_geometric.graphgym.config import cfg
 6 | from torch_geometric.graphgym.register import register_act
 7 | 
 8 | 
 9 | class SWISH(nn.Module):
10 |     def __init__(self, inplace=False):
11 |         super().__init__()
12 |         self.inplace = inplace
13 | 
14 |     def forward(self, x):
15 |         if self.inplace:
16 |             x.mul_(torch.sigmoid(x))
17 |             return x
18 |         else:
19 |             return x * torch.sigmoid(x)
20 | 
21 | 
22 | register_act('swish', partial(SWISH, inplace=cfg.mem.inplace))
23 | register_act('lrelu_03', partial(nn.LeakyReLU, 0.3, inplace=cfg.mem.inplace))
24 | 
25 | # Add Gaussian Error Linear Unit (GELU).
26 | register_act('gelu', nn.GELU)
27 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/custom_gnn_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | 
 3 | 
 4 | @register_config('custom_gnn')
 5 | def custom_gnn_cfg(cfg):
 6 |     """Extending config group of GraphGym's built-in GNN for purposes of our
 7 |     CustomGNN network model.
 8 |     """
 9 |     # Use residual connections between the GNN layers.
10 |     cfg.gnn.residual = False
11 |     cfg.gnn.heads = 4
12 |     cfg.gnn.attn_dropout = 0.1
13 | 
14 |     cfg.gnn.use_vn = True
15 |     cfg.gnn.vn_pooling = 'add'
16 | 
17 |     cfg.gnn.norm_type = 'layer'
18 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/dataset_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | 
 3 | 
 4 | @register_config('dataset_cfg')
 5 | def dataset_cfg(cfg):
 6 |     """Dataset-specific config options.
 7 |     """
 8 | 
 9 |     # The number of node types to expect in TypeDictNodeEncoder.
10 |     cfg.dataset.node_encoder_num_types = 0
11 | 
12 |     # The number of edge types to expect in TypeDictEdgeEncoder.
13 |     cfg.dataset.edge_encoder_num_types = 0
14 | 
15 |     # VOC/COCO Superpixels dataset version based on SLIC compactness parameter.
16 |     cfg.dataset.slic_compactness = 10
17 | 
18 |     # infer-link parameters (e.g., edge prediction task)
19 |     cfg.dataset.infer_link_label = "None"
20 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/defaults_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | 
 3 | 
 4 | @register_config('overwrite_defaults')
 5 | def overwrite_defaults_cfg(cfg):
 6 |     """Overwrite the default config values that are first set by GraphGym in
 7 |     torch_geometric.graphgym.config.set_cfg
 8 | 
 9 |     WARNING: At the time of writing, the order in which custom config-setting
10 |     functions like this one are executed is random; see the referenced `set_cfg`
11 |     Therefore never reset here config options that are custom added, only change
12 |     those that exist in core GraphGym.
13 |     """
14 | 
15 |     # Training (and validation) pipeline mode
16 |     cfg.train.mode = 'custom'  # 'standard' uses PyTorch-Lightning since PyG 2.1
17 | 
18 |     # Overwrite default dataset name
19 |     cfg.dataset.name = 'none'
20 | 
21 |     # Overwrite default rounding precision
22 |     cfg.round = 5
23 | 
24 | 
25 | @register_config('extended_cfg')
26 | def extended_cfg(cfg):
27 |     """General extended config options.
28 |     """
29 | 
30 |     # Additional name tag used in `run_dir` and `wandb_name` auto generation.
31 |     cfg.name_tag = ""
32 | 
33 |     # In training, if True (and also cfg.train.enable_ckpt is True) then
34 |     # always checkpoint the current best model based on validation performance,
35 |     # instead, when False, follow cfg.train.eval_period checkpointing frequency.
36 |     cfg.train.ckpt_best = False
37 | 
38 |     cfg.train.eval_smoothing_metrics = False
39 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/example.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('example')
 6 | def set_cfg_example(cfg):
 7 |     r'''
 8 |     This function sets the default config value for customized options
 9 |     :return: customized configuration use by the experiment.
10 |     '''
11 | 
12 |     # ----------------------------------------------------------------------- #
13 |     # Customized options
14 |     # ----------------------------------------------------------------------- #
15 | 
16 |     # example argument
17 |     cfg.example_arg = 'example'
18 | 
19 |     # example argument group
20 |     cfg.example_group = CN()
21 | 
22 |     # then argument can be specified within the group
23 |     cfg.example_group.example_arg = 'example'
24 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/graphormer_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('cfg_graphormer')
 6 | def set_cfg_gt(cfg):
 7 |     cfg.graphormer = CN()
 8 |     cfg.graphormer.num_layers = 6
 9 |     cfg.graphormer.embed_dim = 80
10 |     cfg.graphormer.num_heads = 4
11 |     cfg.graphormer.dropout = 0.0
12 |     cfg.graphormer.attention_dropout = 0.0
13 |     cfg.graphormer.mlp_dropout = 0.0
14 |     cfg.graphormer.input_dropout = 0.0
15 |     cfg.graphormer.use_graph_token = True
16 | 
17 |     cfg.posenc_GraphormerBias = CN()
18 |     cfg.posenc_GraphormerBias.enable = False
19 |     cfg.posenc_GraphormerBias.node_degrees_only = False
20 |     cfg.posenc_GraphormerBias.dim_pe = 0
21 |     cfg.posenc_GraphormerBias.num_spatial_types = None
22 |     cfg.posenc_GraphormerBias.num_in_degrees = None
23 |     cfg.posenc_GraphormerBias.num_out_degrees = None
24 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/gt_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('cfg_gt')
 6 | def set_cfg_gt(cfg):
 7 |     """Configuration for Graph Transformer-style models, e.g.:
 8 |     - Spectral Attention Network (SAN) Graph Transformer.
 9 |     - "vanilla" Transformer / Performer.
10 |     - General Powerful Scalable (GPS) Model.
11 |     """
12 | 
13 |     # Positional encodings argument group
14 |     cfg.gt = CN()
15 | 
16 |     # Type of Graph Transformer layer to use
17 |     cfg.gt.layer_type = 'SANLayer'
18 | 
19 |     # Number of Transformer layers in the model
20 |     cfg.gt.layers = 3
21 | 
22 |     # Number of attention heads in the Graph Transformer
23 |     cfg.gt.n_heads = 8
24 | 
25 |     # Size of the hidden node and edge representation
26 |     cfg.gt.dim_hidden = 64
27 | 
28 |     # Full attention SAN transformer including all possible pairwise edges
29 |     cfg.gt.full_graph = True
30 | 
31 |     # SAN real vs fake edge attention weighting coefficient
32 |     cfg.gt.gamma = 1e-5
33 | 
34 |     # Histogram of in-degrees of nodes in the training set used by PNAConv.
35 |     # Used when `gt.layer_type: PNAConv+...`. If empty it is precomputed during
36 |     # the dataset loading process.
37 |     cfg.gt.pna_degrees = []
38 | 
39 |     # Dropout in feed-forward module.
40 |     cfg.gt.dropout = 0.0
41 | 
42 |     # Dropout in self-attention.
43 |     cfg.gt.attn_dropout = 0.0
44 | 
45 |     cfg.gt.layer_norm = False
46 | 
47 |     cfg.gt.batch_norm = True
48 | 
49 |     cfg.gt.residual = True
50 | 
51 |     # BigBird model/GPS-BigBird layer.
52 |     cfg.gt.bigbird = CN()
53 | 
54 |     cfg.gt.bigbird.attention_type = "block_sparse"
55 | 
56 |     cfg.gt.bigbird.chunk_size_feed_forward = 0
57 | 
58 |     cfg.gt.bigbird.is_decoder = False
59 | 
60 |     cfg.gt.bigbird.add_cross_attention = False
61 | 
62 |     cfg.gt.bigbird.hidden_act = "relu"
63 | 
64 |     cfg.gt.bigbird.max_position_embeddings = 128
65 | 
66 |     cfg.gt.bigbird.use_bias = False
67 | 
68 |     cfg.gt.bigbird.num_random_blocks = 3
69 | 
70 |     cfg.gt.bigbird.block_size = 3
71 | 
72 |     cfg.gt.bigbird.layer_norm_eps = 1e-6
73 | 
74 |     cfg.gt.vn_pooling = 'mean'
75 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/optimizers_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | 
 3 | 
 4 | @register_config('extended_optim')
 5 | def extended_optim_cfg(cfg):
 6 |     """Extend optimizer config group that is first set by GraphGym in
 7 |     torch_geometric.graphgym.config.set_cfg
 8 |     """
 9 | 
10 |     # Number of batches to accumulate gradients over before updating parameters
11 |     # Requires `custom` training loop, set `train.mode: custom`
12 |     cfg.optim.batch_accumulation = 1
13 | 
14 |     # ReduceLROnPlateau: Factor by which the learning rate will be reduced
15 |     cfg.optim.reduce_factor = 0.1
16 | 
17 |     # ReduceLROnPlateau: #epochs without improvement after which LR gets reduced
18 |     cfg.optim.schedule_patience = 10
19 | 
20 |     # ReduceLROnPlateau: Lower bound on the learning rate
21 |     cfg.optim.min_lr = 0.0
22 | 
23 |     # For schedulers with warm-up phase, set the warm-up number of epochs
24 |     cfg.optim.num_warmup_epochs = 50
25 | 
26 |     # Clip gradient norms while training
27 |     cfg.optim.clip_grad_norm = False
28 |     cfg.optim.clip_grad_norm_value = 1.0
29 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/posenc_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('posenc')
 6 | def set_cfg_posenc(cfg):
 7 |     """Extend configuration with positional encoding options.
 8 |     """
 9 | 
10 |     # Argument group for each Positional Encoding class.
11 |     cfg.posenc_LapPE = CN()
12 |     cfg.posenc_SignNet = CN()
13 |     cfg.posenc_RWSE = CN()
14 |     cfg.posenc_RWSEEdge = CN()
15 |     cfg.posenc_HKdiagSE = CN()
16 |     cfg.posenc_ElstaticSE = CN()
17 |     cfg.posenc_EquivStableLapPE = CN()
18 | 
19 |     # Common arguments to all PE types.
20 |     for name in ['posenc_LapPE', 'posenc_SignNet',
21 |                  'posenc_RWSE', 'posenc_RWSEEdge', 'posenc_HKdiagSE', 'posenc_ElstaticSE']:
22 |         pecfg = getattr(cfg, name)
23 | 
24 |         # Use extended positional encodings
25 |         pecfg.enable = False
26 | 
27 |         # Neural-net model type within the PE encoder:
28 |         # 'DeepSet', 'Transformer', 'Linear', 'none', ...
29 |         pecfg.model = 'none'
30 | 
31 |         # Size of Positional Encoding embedding
32 |         pecfg.dim_pe = 16
33 | 
34 |         # Number of layers in PE encoder model
35 |         pecfg.layers = 3
36 | 
37 |         # Number of attention heads in PE encoder when model == 'Transformer'
38 |         pecfg.n_heads = 4
39 | 
40 |         # Number of layers to apply in LapPE encoder post its pooling stage
41 |         pecfg.post_layers = 0
42 | 
43 |         # Choice of normalization applied to raw PE stats: 'none', 'BatchNorm'
44 |         pecfg.raw_norm_type = 'none'
45 | 
46 |         # In addition to appending PE to the node features, pass them also as
47 |         # a separate variable in the PyG graph batch object.
48 |         pecfg.pass_as_var = False
49 | 
50 |     # Config for EquivStable LapPE
51 |     cfg.posenc_EquivStableLapPE.enable = False
52 |     cfg.posenc_EquivStableLapPE.raw_norm_type = 'none'
53 | 
54 |     # Config for Laplacian Eigen-decomposition for PEs that use it.
55 |     for name in ['posenc_LapPE', 'posenc_SignNet', 'posenc_EquivStableLapPE']:
56 |         pecfg = getattr(cfg, name)
57 |         pecfg.eigen = CN()
58 | 
59 |         # The normalization scheme for the graph Laplacian: 'none', 'sym', or 'rw'
60 |         pecfg.eigen.laplacian_norm = 'sym'
61 | 
62 |         # The normalization scheme for the eigen vectors of the Laplacian
63 |         pecfg.eigen.eigvec_norm = 'L2'
64 | 
65 |         # Maximum number of top smallest frequencies & eigenvectors to use
66 |         pecfg.eigen.max_freqs = 10
67 | 
68 |     # Config for SignNet-specific options.
69 |     cfg.posenc_SignNet.phi_out_dim = 4
70 |     cfg.posenc_SignNet.phi_hidden_dim = 64
71 | 
72 |     for name in ['posenc_RWSE', 'posenc_RWSEEdge', 'posenc_HKdiagSE', 'posenc_ElstaticSE']:
73 |         pecfg = getattr(cfg, name)
74 | 
75 |         # Config for Kernel-based PE specific options.
76 |         pecfg.kernel = CN()
77 | 
78 |         # List of times to compute the heat kernel for (the time is equivalent to
79 |         # the variance of the kernel) / the number of steps for random walk kernel
80 |         # Can be overridden by `posenc.kernel.times_func`
81 |         pecfg.kernel.times = []
82 | 
83 |         # Python snippet to generate `posenc.kernel.times`, e.g. 'range(1, 17)'
84 |         # If set, it will be executed via `eval()` and override posenc.kernel.times
85 |         pecfg.kernel.times_func = ''
86 | 
87 |     # Override default, electrostatic kernel has fixed set of 10 measures.
88 |     cfg.posenc_ElstaticSE.kernel.times_func = 'range(10)'
89 | 
90 |     cfg.posenc_RWSEEdge.num_global = 2
91 |     cfg.posenc_RWSEEdge.global_edge_dropout = 0.2
92 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/pretrained_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('cfg_pretrained')
 6 | def set_cfg_pretrained(cfg):
 7 |     """Configuration options for loading a pretrained model.
 8 |     """
 9 | 
10 |     cfg.pretrained = CN()
11 | 
12 |     # Directory path to a saved experiment, if set, load the model from there
13 |     # and fine-tune / run inference with it on a specified dataset.
14 |     cfg.pretrained.dir = ""
15 | 
16 |     # Discard pretrained weights of the prediction head and reinitialize.
17 |     cfg.pretrained.reset_prediction_head = True
18 | 
19 |     # Freeze the main pretrained 'body' of the model, learning only the new head
20 |     cfg.pretrained.freeze_main = False
21 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/split_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | 
 3 | 
 4 | @register_config('split')
 5 | def set_cfg_split(cfg):
 6 |     """Reconfigure the default config value for dataset split options.
 7 | 
 8 |     Returns:
 9 |         Reconfigured split configuration use by the experiment.
10 |     """
11 | 
12 |     # Default to selecting the standard split that ships with the dataset
13 |     cfg.dataset.split_mode = 'standard'
14 | 
15 |     # Choose a particular split to use if multiple splits are available
16 |     cfg.dataset.split_index = 0
17 | 
18 |     # Dir to cache cross-validation splits
19 |     cfg.dataset.split_dir = './splits'
20 | 
21 |     # Choose to run multiple splits in one program execution, if set,
22 |     # takes the precedence over cfg.dataset.split_index for split selection
23 |     cfg.run_multiple_splits = []
24 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/config/wandb_config.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_config
 2 | from yacs.config import CfgNode as CN
 3 | 
 4 | 
 5 | @register_config('cfg_wandb')
 6 | def set_cfg_wandb(cfg):
 7 |     """Weights & Biases tracker configuration.
 8 |     """
 9 | 
10 |     # WandB group
11 |     cfg.wandb = CN()
12 | 
13 |     # Use wandb or not
14 |     cfg.wandb.use = True
15 | 
16 |     # Wandb entity name, should exist beforehand
17 |     cfg.wandb.entity = "add-your-wandb-here"
18 | 
19 |     # Wandb project name, will be created in your team if doesn't exist already
20 |     cfg.wandb.project = "gtblueprint"
21 | 
22 |     # Optional run name
23 |     cfg.wandb.name = ""
24 | 
25 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/ast_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym.register import (register_node_encoder,
 3 |                                                register_edge_encoder)
 4 | 
 5 | """
 6 | === Description of the ogbg-code2 dataset ===
 7 | 
 8 | * Node Encoder code based on OGB's:
 9 | https://github.com/snap-stanford/ogb/blob/master/examples/graphproppred/code2/utils.py
10 | 
11 | Node Encoder config parameters are set based on the OGB example:
12 | https://github.com/snap-stanford/ogb/blob/master/examples/graphproppred/code2/main_pyg.py
13 | where the following three node features are used:
14 | 1. node type
15 | 2. node attribute
16 | 3. node depth
17 | 
18 | nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz'))
19 | nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz'))
20 | num_nodetypes = len(nodetypes_mapping['type'])
21 | num_nodeattributes = len(nodeattributes_mapping['attr'])
22 | max_depth = 20
23 | 
24 | * Edge attributes are generated by `augment_edge` function dynamically:
25 | edge_attr[:,0]: whether it is AST edge (0) for next-token edge (1)
26 | edge_attr[:,1]: whether it is original direction (0) or inverse direction (1)
27 | """
28 | 
29 | num_nodetypes = 98
30 | num_nodeattributes = 10030
31 | max_depth = 20
32 | 
33 | 
34 | @register_node_encoder('ASTNode')
35 | class ASTNodeEncoder(torch.nn.Module):
36 |     """The Abstract Syntax Tree (AST) Node Encoder used for ogbg-code2 dataset.
37 | 
38 |     Input:
39 |         x: Default node feature. The first and second column represents node
40 |             type and node attributes.
41 |         node_depth: The depth of the node in the AST.
42 |     Output:
43 |         emb_dim-dimensional vector
44 |     """
45 | 
46 |     def __init__(self, emb_dim):
47 |         super().__init__()
48 |         self.max_depth = max_depth
49 | 
50 |         self.type_encoder = torch.nn.Embedding(num_nodetypes, emb_dim)
51 |         self.attribute_encoder = torch.nn.Embedding(num_nodeattributes, emb_dim)
52 |         self.depth_encoder = torch.nn.Embedding(self.max_depth + 1, emb_dim)
53 | 
54 |     def forward(self, batch):
55 |         x = batch.x
56 |         depth = batch.node_depth.view(-1, )
57 |         depth[depth > self.max_depth] = self.max_depth
58 |         batch.x = self.type_encoder(x[:, 0]) + self.attribute_encoder(x[:, 1]) \
59 |                   + self.depth_encoder(depth)
60 |         return batch
61 | 
62 | 
63 | @register_edge_encoder('ASTEdge')
64 | class ASTEdgeEncoder(torch.nn.Module):
65 |     """The Abstract Syntax Tree (AST) Edge Encoder used for ogbg-code2 dataset.
66 | 
67 |     Edge attributes are generated by `augment_edge` function dynamically and
68 |     are expected to be:
69 |     edge_attr[:,0]: whether it is AST edge (0) for next-token edge (1)
70 |     edge_attr[:,1]: whether it is original direction (0) or inverse direction (1)
71 | 
72 |     Args:
73 |         emb_dim (int): Output edge embedding dimension
74 |     """
75 | 
76 |     def __init__(self, emb_dim):
77 |         super().__init__()
78 |         self.embedding_type = torch.nn.Embedding(2, emb_dim)
79 |         self.embedding_direction = torch.nn.Embedding(2, emb_dim)
80 | 
81 |     def forward(self, batch):
82 |         embedding = self.embedding_type(batch.edge_attr[:, 0]) + \
83 |                     self.embedding_direction(batch.edge_attr[:, 1])
84 |         batch.edge_attr = embedding
85 |         return batch
86 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/dummy_edge_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym.register import register_edge_encoder
 3 | 
 4 | 
 5 | @register_edge_encoder('DummyEdge')
 6 | class DummyEdgeEncoder(torch.nn.Module):
 7 |     def __init__(self, emb_dim):
 8 |         super().__init__()
 9 | 
10 |         self.encoder = torch.nn.Embedding(num_embeddings=1,
11 |                                           embedding_dim=emb_dim)
12 |         # torch.nn.init.xavier_uniform_(self.encoder.weight.data)
13 | 
14 |     def forward(self, batch):
15 |         dummy_attr = batch.edge_index.new_zeros(batch.edge_index.shape[1])
16 |         batch.edge_attr = self.encoder(dummy_attr)
17 |         return batch
18 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/equivstable_laplace_pos_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.register import register_node_encoder
 5 | 
 6 | 
 7 | @register_node_encoder('EquivStableLapPE')
 8 | class EquivStableLapPENodeEncoder(torch.nn.Module):
 9 |     """Equivariant and Stable Laplace Positional Embedding node encoder.
10 | 
11 |     This encoder simply transforms the k-dim node LapPE to d-dim to be
12 |     later used at the local GNN module as edge weights.
13 |     Based on the approach proposed in paper https://openreview.net/pdf?id=e95i1IHcWj
14 |     
15 |     Args:
16 |         dim_emb: Size of final node embedding
17 |     """
18 | 
19 |     def __init__(self, dim_emb):
20 |         super().__init__()
21 | 
22 |         pecfg = cfg.posenc_EquivStableLapPE
23 |         max_freqs = pecfg.eigen.max_freqs  # Num. eigenvectors (frequencies)
24 |         norm_type = pecfg.raw_norm_type.lower()  # Raw PE normalization layer type
25 | 
26 |         if norm_type == 'batchnorm':
27 |             self.raw_norm = nn.BatchNorm1d(max_freqs)
28 |         else:
29 |             self.raw_norm = None
30 | 
31 |         self.linear_encoder_eigenvec = nn.Linear(max_freqs, dim_emb)
32 | 
33 |     def forward(self, batch):
34 |         if not (hasattr(batch, 'EigVals') and hasattr(batch, 'EigVecs')):
35 |             raise ValueError("Precomputed eigen values and vectors are "
36 |                              f"required for {self.__class__.__name__}; set "
37 |                              f"config 'posenc_EquivStableLapPE.enable' to True")
38 |         pos_enc = batch.EigVecs
39 | 
40 |         empty_mask = torch.isnan(pos_enc)  # (Num nodes) x (Num Eigenvectors)
41 |         pos_enc[empty_mask] = 0.  # (Num nodes) x (Num Eigenvectors)
42 | 
43 |         if self.raw_norm:
44 |             pos_enc = self.raw_norm(pos_enc)
45 | 
46 |         pos_enc = self.linear_encoder_eigenvec(pos_enc)
47 | 
48 |         # Keep PE separate in a variable
49 |         batch.pe_EquivStableLapPE = pos_enc
50 | 
51 |         return batch
52 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/example.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from ogb.utils.features import get_bond_feature_dims
 3 | 
 4 | from torch_geometric.graphgym.register import (
 5 |     register_edge_encoder,
 6 |     register_node_encoder,
 7 | )
 8 | 
 9 | 
10 | @register_node_encoder('example')
11 | class ExampleNodeEncoder(torch.nn.Module):
12 |     """
13 |         Provides an encoder for integer node features
14 |         Parameters:
15 |         num_classes - the number of classes for the embedding mapping to learn
16 |     """
17 |     def __init__(self, emb_dim, num_classes=None):
18 |         super().__init__()
19 | 
20 |         self.encoder = torch.nn.Embedding(num_classes, emb_dim)
21 |         torch.nn.init.xavier_uniform_(self.encoder.weight.data)
22 | 
23 |     def forward(self, batch):
24 |         # Encode just the first dimension if more exist
25 |         batch.x = self.encoder(batch.x[:, 0])
26 | 
27 |         return batch
28 | 
29 | 
30 | @register_edge_encoder('example')
31 | class ExampleEdgeEncoder(torch.nn.Module):
32 |     def __init__(self, emb_dim):
33 |         super().__init__()
34 | 
35 |         self.bond_embedding_list = torch.nn.ModuleList()
36 |         full_bond_feature_dims = get_bond_feature_dims()
37 | 
38 |         for i, dim in enumerate(full_bond_feature_dims):
39 |             emb = torch.nn.Embedding(dim, emb_dim)
40 |             torch.nn.init.xavier_uniform_(emb.weight.data)
41 |             self.bond_embedding_list.append(emb)
42 | 
43 |     def forward(self, batch):
44 |         bond_embedding = 0
45 |         for i in range(batch.edge_feature.shape[1]):
46 |             bond_embedding += \
47 |                 self.bond_embedding_list[i](batch.edge_attr[:, i])
48 | 
49 |         batch.edge_attr = bond_embedding
50 |         return batch
51 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/linear_edge_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym import cfg
 3 | from torch_geometric.graphgym.register import register_edge_encoder
 4 | 
 5 | 
 6 | @register_edge_encoder('LinearEdge')
 7 | class LinearEdgeEncoder(torch.nn.Module):
 8 |     def __init__(self, emb_dim):
 9 |         super().__init__()
10 |         if cfg.dataset.name in ['MNIST', 'CIFAR10']:
11 |             self.in_dim = 1
12 |         else:
13 |             raise ValueError("Input edge feature dim is required to be hardset "
14 |                              "or refactored to use a cfg option.")
15 |         self.encoder = torch.nn.Linear(self.in_dim, emb_dim)
16 | 
17 |     def forward(self, batch):
18 |         batch.edge_attr = self.encoder(batch.edge_attr.view(-1, self.in_dim))
19 |         return batch
20 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/linear_node_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym import cfg
 3 | from torch_geometric.graphgym.register import register_node_encoder
 4 | 
 5 | 
 6 | @register_node_encoder('LinearNode')
 7 | class LinearNodeEncoder(torch.nn.Module):
 8 |     def __init__(self, emb_dim):
 9 |         super().__init__()
10 |         
11 |         self.encoder = torch.nn.Linear(cfg.share.dim_in, emb_dim)
12 | 
13 |     def forward(self, batch):
14 |         batch.x = self.encoder(batch.x)
15 |         return batch
16 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/ppa_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym.register import (register_node_encoder,
 3 |                                                register_edge_encoder)
 4 | 
 5 | 
 6 | @register_node_encoder('PPANode')
 7 | class PPANodeEncoder(torch.nn.Module):
 8 |     """
 9 |     Uniform input node embedding for PPA that has no node features.
10 |     """
11 | 
12 |     def __init__(self, emb_dim):
13 |         super().__init__()
14 |         self.encoder = torch.nn.Embedding(1, emb_dim)
15 | 
16 |     def forward(self, batch):
17 |         batch.x = self.encoder(batch.x)
18 |         return batch
19 | 
20 | 
21 | @register_edge_encoder('PPAEdge')
22 | class PPAEdgeEncoder(torch.nn.Module):
23 |     def __init__(self, emb_dim):
24 |         super().__init__()
25 |         self.encoder = torch.nn.Linear(7, emb_dim)
26 | 
27 |     def forward(self, batch):
28 |         batch.edge_attr = self.encoder(batch.edge_attr)
29 |         return batch
30 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/rwse_edge_encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.register import (register_edge_encoder,
 5 | act_dict)
 6 | 
 7 | 
 8 | @register_edge_encoder('RWSEEdge')
 9 | class RWSEEdgeEncoder(torch.nn.Module):
10 |     def __init__(self, emb_dim):
11 |         super().__init__()
12 |         pe_dim = len(cfg.posenc_RWSEEdge.kernel.times) + 1
13 |         self.pe_dim = pe_dim
14 |         self.emb_dim = emb_dim
15 | 
16 |         self.global_edge_dropout = cfg.posenc_RWSEEdge.global_edge_dropout
17 | 
18 |         self.pe_encoder = nn.Sequential(
19 |             nn.BatchNorm1d(pe_dim),
20 |             nn.Linear(pe_dim, emb_dim),
21 |             act_dict[cfg.gnn.act](),
22 |             nn.Linear(emb_dim, emb_dim),
23 |             nn.BatchNorm1d(emb_dim),
24 |         )
25 | 
26 |     def forward(self, batch):
27 |         pe_enc = torch.cat([batch.pestat_RWSEEdge, batch.pestat_RWSESelf], dim=0)
28 | 
29 |         self_loops = torch.arange(batch.num_nodes, device=pe_enc.device).view(1, -1).tile(2, 1)
30 |         edge_index = torch.cat([batch.edge_index, self_loops], dim=1)
31 | 
32 |         if 'pestat_RWSEGlobal' in batch:
33 |             global_enc = batch.pestat_RWSEGlobal
34 |             global_edge_index = batch.global_edge_index
35 | 
36 |             if self.training:
37 |                 dropout_mask = torch.rand((global_enc.shape[0],), device=global_enc.device) > self.global_edge_dropout
38 |                 global_enc = global_enc[dropout_mask]
39 |                 global_edge_index = global_edge_index[:, dropout_mask]
40 | 
41 |             pe_enc = torch.cat([pe_enc, global_enc], dim=0)
42 |             edge_index = torch.cat([edge_index, global_edge_index], dim=1)
43 | 
44 |         pe_enc = self.pe_encoder(pe_enc)
45 | 
46 |         edge_attr = pe_enc
47 |         if batch.edge_attr is not None:
48 |             edge_attr[:batch.num_edges] += batch.edge_attr
49 | 
50 |         batch.edge_index = edge_index
51 |         batch.edge_attr = edge_attr
52 |         return batch
53 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/type_dict_encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch_geometric.graphgym.config import cfg
  3 | from torch_geometric.graphgym.register import (register_node_encoder,
  4 |                                                register_edge_encoder)
  5 | 
  6 | """
  7 | Generic Node and Edge encoders for datasets with node/edge features that
  8 | consist of only one type dictionary thus require a single nn.Embedding layer.
  9 | 
 10 | The number of possible Node and Edge types must be set by cfg options:
 11 | 1) cfg.dataset.node_encoder_num_types
 12 | 2) cfg.dataset.edge_encoder_num_types
 13 | 
 14 | In case of a more complex feature set, use a data-specific encoder.
 15 | 
 16 | These generic encoders can be used e.g. for:
 17 | * ZINC
 18 | cfg.dataset.node_encoder_num_types: 28
 19 | cfg.dataset.edge_encoder_num_types: 4
 20 | 
 21 | * AQSOL
 22 | cfg.dataset.node_encoder_num_types: 65
 23 | cfg.dataset.edge_encoder_num_types: 5
 24 | 
 25 | 
 26 | === Description of the ZINC dataset === 
 27 | https://github.com/graphdeeplearning/benchmarking-gnns/issues/42
 28 | The node labels are atom types and the edge labels atom bond types.
 29 | 
 30 | Node labels:
 31 | 'C': 0
 32 | 'O': 1
 33 | 'N': 2
 34 | 'F': 3
 35 | 'C H1': 4
 36 | 'S': 5
 37 | 'Cl': 6
 38 | 'O -': 7
 39 | 'N H1 +': 8
 40 | 'Br': 9
 41 | 'N H3 +': 10
 42 | 'N H2 +': 11
 43 | 'N +': 12
 44 | 'N -': 13
 45 | 'S -': 14
 46 | 'I': 15
 47 | 'P': 16
 48 | 'O H1 +': 17
 49 | 'N H1 -': 18
 50 | 'O +': 19
 51 | 'S +': 20
 52 | 'P H1': 21
 53 | 'P H2': 22
 54 | 'C H2 -': 23
 55 | 'P +': 24
 56 | 'S H1 +': 25
 57 | 'C H1 -': 26
 58 | 'P H1 +': 27
 59 | 
 60 | Edge labels:
 61 | 'NONE': 0
 62 | 'SINGLE': 1
 63 | 'DOUBLE': 2
 64 | 'TRIPLE': 3
 65 | 
 66 | 
 67 | === Description of the AQSOL dataset === 
 68 | Node labels: 
 69 | 'Br': 0, 'C': 1, 'N': 2, 'O': 3, 'Cl': 4, 'Zn': 5, 'F': 6, 'P': 7, 'S': 8, 'Na': 9, 'Al': 10,
 70 | 'Si': 11, 'Mo': 12, 'Ca': 13, 'W': 14, 'Pb': 15, 'B': 16, 'V': 17, 'Co': 18, 'Mg': 19, 'Bi': 20, 'Fe': 21,
 71 | 'Ba': 22, 'K': 23, 'Ti': 24, 'Sn': 25, 'Cd': 26, 'I': 27, 'Re': 28, 'Sr': 29, 'H': 30, 'Cu': 31, 'Ni': 32,
 72 | 'Lu': 33, 'Pr': 34, 'Te': 35, 'Ce': 36, 'Nd': 37, 'Gd': 38, 'Zr': 39, 'Mn': 40, 'As': 41, 'Hg': 42, 'Sb':
 73 | 43, 'Cr': 44, 'Se': 45, 'La': 46, 'Dy': 47, 'Y': 48, 'Pd': 49, 'Ag': 50, 'In': 51, 'Li': 52, 'Rh': 53,
 74 | 'Nb': 54, 'Hf': 55, 'Cs': 56, 'Ru': 57, 'Au': 58, 'Sm': 59, 'Ta': 60, 'Pt': 61, 'Ir': 62, 'Be': 63, 'Ge': 64
 75 |     
 76 | Edge labels: 
 77 | 'NONE': 0, 'SINGLE': 1, 'DOUBLE': 2, 'AROMATIC': 3, 'TRIPLE': 4
 78 | """
 79 | 
 80 | 
 81 | @register_node_encoder('TypeDictNode')
 82 | class TypeDictNodeEncoder(torch.nn.Module):
 83 |     def __init__(self, emb_dim):
 84 |         super().__init__()
 85 | 
 86 |         num_types = cfg.dataset.node_encoder_num_types
 87 |         if num_types < 1:
 88 |             raise ValueError(f"Invalid 'node_encoder_num_types': {num_types}")
 89 | 
 90 |         self.encoder = torch.nn.Embedding(num_embeddings=num_types,
 91 |                                           embedding_dim=emb_dim)
 92 |         # torch.nn.init.xavier_uniform_(self.encoder.weight.data)
 93 | 
 94 |     def forward(self, batch):
 95 |         # Encode just the first dimension if more exist
 96 |         batch.x = self.encoder(batch.x[:, 0])
 97 | 
 98 |         return batch
 99 | 
100 | 
101 | @register_edge_encoder('TypeDictEdge')
102 | class TypeDictEdgeEncoder(torch.nn.Module):
103 |     def __init__(self, emb_dim):
104 |         super().__init__()
105 | 
106 |         num_types = cfg.dataset.edge_encoder_num_types
107 |         if num_types < 1:
108 |             raise ValueError(f"Invalid 'edge_encoder_num_types': {num_types}")
109 | 
110 |         self.encoder = torch.nn.Embedding(num_embeddings=num_types,
111 |                                           embedding_dim=emb_dim)
112 |         # torch.nn.init.xavier_uniform_(self.encoder.weight.data)
113 | 
114 |     def forward(self, batch):
115 |         batch.edge_attr = self.encoder(batch.edge_attr)
116 |         return batch
117 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/encoder/voc_superpixels_encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch_geometric.graphgym.config import cfg
  3 | from torch_geometric.graphgym.register import (register_node_encoder,
  4 |                                                register_edge_encoder)
  5 | 
  6 | """
  7 | === Description of the VOCSuperpixels dataset === 
  8 | Each graph is a tuple (x, edge_attr, edge_index, y)
  9 | Shape of x : [num_nodes, 14]
 10 | Shape of edge_attr : [num_edges, 1] or [num_edges, 2]
 11 | Shape of edge_index : [2, num_edges]
 12 | Shape of y : [num_nodes]
 13 | """
 14 | 
 15 | VOC_node_input_dim = 14
 16 | # VOC_edge_input_dim = 1 or 2; defined in class VOCEdgeEncoder
 17 | 
 18 | 
 19 | @register_node_encoder('VOCNode')
 20 | class VOCNodeEncoder(torch.nn.Module):
 21 |     def __init__(self, emb_dim):
 22 |         super().__init__()
 23 | 
 24 |         node_x_mean = torch.tensor([
 25 |             4.5824501e-01, 4.3857411e-01, 4.0561178e-01, 6.7938097e-02,
 26 |             6.5604292e-02, 6.5742709e-02, 6.5212941e-01, 6.2894762e-01,
 27 |             6.0173863e-01, 2.7769071e-01, 2.6425251e-01, 2.3729359e-01,
 28 |             1.9344997e+02, 2.3472206e+02
 29 |         ])
 30 |         node_x_std = torch.tensor([
 31 |             2.5952947e-01, 2.5716761e-01, 2.7130592e-01, 5.4822665e-02,
 32 |             5.4429270e-02, 5.4474957e-02, 2.6238337e-01, 2.6600540e-01,
 33 |             2.7750680e-01, 2.5197381e-01, 2.4986187e-01, 2.6069802e-01,
 34 |             1.1768297e+02, 1.4007195e+02
 35 |         ])
 36 |         self.register_buffer('node_x_mean', node_x_mean)
 37 |         self.register_buffer('node_x_std', node_x_std)
 38 |         self.encoder = torch.nn.Linear(VOC_node_input_dim, emb_dim)
 39 | 
 40 |     def forward(self, batch):
 41 |         x = batch.x - self.node_x_mean.view(1, -1)
 42 |         x /= self.node_x_std.view(1, -1)
 43 |         batch.x = self.encoder(x)
 44 |         return batch
 45 | 
 46 | 
 47 | @register_edge_encoder('VOCEdge')
 48 | class VOCEdgeEncoder(torch.nn.Module):
 49 |     def __init__(self, emb_dim):
 50 |         super().__init__()
 51 |         edge_x_mean = torch.tensor([0.07640745, 33.73478])
 52 |         edge_x_std = torch.tensor([0.0868775, 20.945076])
 53 |         self.register_buffer('edge_x_mean', edge_x_mean)
 54 |         self.register_buffer('edge_x_std', edge_x_std)
 55 | 
 56 |         VOC_edge_input_dim = 2 if cfg.dataset.name == 'edge_wt_region_boundary' else 1
 57 |         self.encoder = torch.nn.Linear(VOC_edge_input_dim, emb_dim)
 58 |         # torch.nn.init.xavier_uniform_(self.encoder.weight.data)
 59 | 
 60 |     def forward(self, batch):
 61 |         x = batch.edge_attr - self.edge_x_mean.view(1, -1)
 62 |         x /= self.edge_x_std.view(1, -1)
 63 |         batch.edge_attr = self.encoder(x)
 64 |         return batch
 65 | 
 66 | 
 67 | @register_node_encoder('COCONode')
 68 | class COCONodeEncoder(torch.nn.Module):
 69 |     def __init__(self, emb_dim):
 70 |         super().__init__()
 71 | 
 72 |         node_x_mean = torch.tensor([
 73 |             4.6977347e-01, 4.4679317e-01, 4.0790915e-01, 7.0808627e-02,
 74 |             6.8686441e-02, 6.8498217e-02, 6.7777938e-01, 6.5244222e-01,
 75 |             6.2096798e-01, 2.7554795e-01, 2.5910738e-01, 2.2901227e-01,
 76 |             2.4261935e+02, 2.8985367e+02
 77 |         ])
 78 |         node_x_std = torch.tensor([
 79 |             2.6218116e-01, 2.5831082e-01, 2.7416739e-01, 5.7440419e-02,
 80 |             5.6832556e-02, 5.7100497e-02, 2.5929087e-01, 2.6201612e-01,
 81 |             2.7675411e-01, 2.5456995e-01, 2.5140920e-01, 2.6182330e-01,
 82 |             1.5152475e+02, 1.7630779e+02
 83 |         ])
 84 | 
 85 |         self.register_buffer('node_x_mean', node_x_mean)
 86 |         self.register_buffer('node_x_std', node_x_std)
 87 |         self.encoder = torch.nn.Linear(VOC_node_input_dim, emb_dim)
 88 | 
 89 |     def forward(self, batch):
 90 |         x = batch.x - self.node_x_mean.view(1, -1)
 91 |         x /= self.node_x_std.view(1, -1)
 92 |         batch.x = self.encoder(x)
 93 |         return batch
 94 | 
 95 | 
 96 | @register_edge_encoder('COCOEdge')
 97 | class COCOEdgeEncoder(torch.nn.Module):
 98 |     def __init__(self, emb_dim):
 99 |         super().__init__()
100 |         edge_x_mean = torch.tensor([0.07848548, 43.68736])
101 |         edge_x_std = torch.tensor([0.08902349, 28.473562])
102 |         self.register_buffer('edge_x_mean', edge_x_mean)
103 |         self.register_buffer('edge_x_std', edge_x_std)
104 |         VOC_edge_input_dim = 2 if cfg.dataset.name == 'edge_wt_region_boundary' else 1
105 |         self.encoder = torch.nn.Linear(VOC_edge_input_dim, emb_dim)
106 | 
107 |     def forward(self, batch):
108 |         x = batch.edge_attr - self.edge_x_mean.view(1, -1)
109 |         x /= self.edge_x_std.view(1, -1)
110 |         batch.edge_attr = self.encoder(x)
111 |         return batch


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/example.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from torch_geometric.graphgym.register import register_head
 4 | 
 5 | 
 6 | @register_head('head')
 7 | class ExampleNodeHead(nn.Module):
 8 |     '''Head of GNN, node prediction'''
 9 |     def __init__(self, dim_in, dim_out):
10 |         super().__init__()
11 |         self.layer_post_mp = nn.Linear(dim_in, dim_out, bias=True)
12 | 
13 |     def _apply_index(self, batch):
14 |         if batch.node_label_index.shape[0] == batch.node_label.shape[0]:
15 |             return batch.x[batch.node_label_index], batch.node_label
16 |         else:
17 |             return batch.x[batch.node_label_index], \
18 |                    batch.node_label[batch.node_label_index]
19 | 
20 |     def forward(self, batch):
21 |         batch = self.layer_post_mp(batch)
22 |         pred, label = self._apply_index(batch)
23 |         return pred, label
24 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/graphormer_graph.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | import torch_geometric.graphgym.register as register
 4 | from torch_geometric.graphgym import cfg
 5 | from torch_geometric.graphgym.register import register_head
 6 | 
 7 | 
 8 | @register_head('graphormer_graph')
 9 | class GraphormerHead(torch.nn.Module):
10 |     """
11 |     Graphormer prediction head for graph prediction tasks.
12 | 
13 |     Args:
14 |         dim_in (int): Input dimension.
15 |         dim_out (int): Output dimension. For binary prediction, dim_out=1.
16 |     """
17 | 
18 |     def __init__(self, dim_in, dim_out):
19 |         super().__init__()
20 |         print(f"Initializing {cfg.model.graph_pooling} pooling function")
21 |         self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling]
22 | 
23 |         self.ln = torch.nn.LayerNorm(dim_in)
24 |         self.layers = torch.nn.Sequential(
25 |             torch.nn.Linear(dim_in, dim_out)
26 |         )
27 | 
28 |     def _apply_index(self, batch):
29 |         return batch.graph_feature, batch.y
30 | 
31 |     def forward(self, batch):
32 |         x = self.ln(batch.x)
33 |         graph_emb = self.pooling_fun(x, batch.batch)
34 |         graph_emb = self.layers(graph_emb)
35 |         batch.graph_feature = graph_emb
36 |         pred, label = self._apply_index(batch)
37 |         return pred, label
38 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/inductive_node.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch_geometric.graphgym.config import cfg
 3 | from torch_geometric.graphgym.models.layer import new_layer_config, MLP
 4 | from torch_geometric.graphgym.register import register_head
 5 | 
 6 | 
 7 | @register_head('inductive_node')
 8 | class GNNInductiveNodeHead(nn.Module):
 9 |     """
10 |     GNN prediction head for inductive node prediction tasks.
11 | 
12 |     Args:
13 |         dim_in (int): Input dimension
14 |         dim_out (int): Output dimension. For binary prediction, dim_out=1.
15 |     """
16 | 
17 |     def __init__(self, dim_in, dim_out):
18 |         super(GNNInductiveNodeHead, self).__init__()
19 |         self.layer_post_mp = MLP(
20 |             new_layer_config(dim_in, dim_out, cfg.gnn.layers_post_mp,
21 |                              has_act=False, has_bias=True, cfg=cfg))
22 | 
23 |     def _apply_index(self, batch):
24 |         return batch.x, batch.y
25 | 
26 |     def forward(self, batch):
27 |         batch = self.layer_post_mp(batch)
28 |         pred, label = self._apply_index(batch)
29 |         return pred, label
30 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/infer_links.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym import cfg
 3 | from torch_geometric.graphgym.register import register_head
 4 | 
 5 | 
 6 | @register_head('infer_links')
 7 | class InferLinksHead(torch.nn.Module):
 8 |     """
 9 |     InferLinks prediction head for graph prediction tasks.
10 | 
11 |     Args:
12 |         dim_in (int): Input dimension.
13 |         dim_out (int): Output dimension. For binary prediction, dim_out=1.
14 |     """
15 | 
16 |     def __init__(self, dim_in, dim_out):
17 |         super().__init__()
18 |         if cfg.dataset.infer_link_label == "edge":
19 |             dim_out = 2
20 |         else:
21 |             raise ValueError(f"Infer-link task {cfg.dataset.infer_link_label} not available.")
22 | 
23 |         self.predictor = torch.nn.Linear(1, dim_out)
24 | 
25 |     def forward(self, batch):
26 |         x = batch.x[batch.complete_edge_index]
27 |         x = (x[0] * x[1]).sum(1)
28 |         y = self.predictor(x.unsqueeze(1))
29 |         return y, batch.y
30 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/mlp_graph.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch_geometric.graphgym.register as register
 3 | from torch_geometric.graphgym import cfg
 4 | from torch_geometric.graphgym.register import register_head
 5 | 
 6 | 
 7 | @register_head('mlp_graph')
 8 | class MLPGraphHead(nn.Module):
 9 |     """
10 |     MLP prediction head for graph prediction tasks.
11 | 
12 |     Args:
13 |         dim_in (int): Input dimension.
14 |         dim_out (int): Output dimension. For binary prediction, dim_out=1.
15 |         L (int): Number of hidden layers.
16 |     """
17 | 
18 |     def __init__(self, dim_in, dim_out):
19 |         super().__init__()
20 |         if cfg.model.graph_pooling != 'node_ensemble':
21 |             self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling]
22 |             self.node_ensemble = False
23 |         else:
24 |             self.pooling_fun = register.pooling_dict['mean']
25 |             self.node_ensemble = True
26 | 
27 |         dropout = cfg.gnn.dropout
28 |         L = cfg.gnn.layers_post_mp
29 | 
30 |         layers = []
31 |         for _ in range(L-1):
32 |             layers.append(nn.Dropout(dropout))
33 |             layers.append(nn.Linear(dim_in, dim_in, bias=True))
34 |             layers.append(register.act_dict[cfg.gnn.act]())
35 | 
36 |         layers.append(nn.Dropout(dropout))
37 |         layers.append(nn.Linear(dim_in, dim_out, bias=True))
38 |         self.mlp = nn.Sequential(*layers)
39 | 
40 |     def _scale_and_shift(self, x):
41 |         return x
42 | 
43 |     def _apply_index(self, batch):
44 |         return batch.graph_feature, batch.y
45 | 
46 |     def forward(self, batch):
47 |         if self.node_ensemble:
48 |             x = batch.x
49 |         else:
50 |             x = self.pooling_fun(batch.x, batch.batch)
51 |         y = self.mlp(x)
52 |         y = self._scale_and_shift(y)
53 | 
54 |         if self.node_ensemble:
55 |             y_graph = self.pooling_fun(y, batch.batch)
56 |             batch.graph_feature = y_graph
57 | 
58 |             _, label = self._apply_index(batch)
59 |             if self.training:
60 |                 return y, label[batch.batch]
61 |             else:
62 |                 return y_graph, label
63 | 
64 |         else:
65 |             batch.graph_feature = y
66 |             pred, label = self._apply_index(batch)
67 |             return pred, label
68 | 
69 | 
70 | @register_head('mlp_graph_pcqm4m')
71 | class MLPGraphHeadPCQM4M(MLPGraphHead):
72 | 
73 |     def _scale_and_shift(self, x):
74 |         return (x * 1.1623) + 5.6896
75 | 
76 | 
77 | @register_head('mlp_graph_zinc')
78 | class MLPGraphHeadZINC(MLPGraphHead):
79 | 
80 |     def _scale_and_shift(self, x):
81 |         return (x * 2.0109) + 0.0153
82 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/ogb_code_graph.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | import torch_geometric.graphgym.register as register
 4 | from torch_geometric.graphgym import cfg
 5 | from torch_geometric.graphgym.register import register_head
 6 | 
 7 | 
 8 | @register_head('ogb_code_graph')
 9 | class OGBCodeGraphHead(nn.Module):
10 |     """
11 |     Sequence prediction head for ogbg-code2 graph-level prediction tasks.
12 | 
13 |     Args:
14 |         dim_in (int): Input dimension.
15 |         dim_out (int): IGNORED, kept for GraphGym framework compatibility
16 |         L (int): Number of hidden layers.
17 |     """
18 | 
19 |     def __init__(self, dim_in, dim_out, L=1):
20 |         super().__init__()
21 |         self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling]
22 |         self.L = L
23 |         num_vocab = 5002
24 |         self.max_seq_len = 5
25 | 
26 |         if self.L != 1:
27 |             raise ValueError(f"Multilayer prediction heads are not supported.")
28 | 
29 |         self.graph_pred_linear_list = nn.ModuleList()
30 |         for i in range(self.max_seq_len):
31 |             self.graph_pred_linear_list.append(nn.Linear(dim_in, num_vocab))
32 | 
33 |     def _apply_index(self, batch):
34 |         return batch.pred_list, {'y_arr': batch.y_arr, 'y': batch.y}
35 | 
36 |     def forward(self, batch):
37 |         graph_emb = self.pooling_fun(batch.x, batch.batch)
38 | 
39 |         pred_list = []
40 |         for i in range(self.max_seq_len):
41 |             pred_list.append(self.graph_pred_linear_list[i](graph_emb))
42 |         batch.pred_list = pred_list
43 | 
44 |         pred, label = self._apply_index(batch)
45 |         return pred, label
46 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/head/san_graph.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | import torch_geometric.graphgym.register as register
 4 | from torch_geometric.graphgym import cfg
 5 | from torch_geometric.graphgym.register import register_head
 6 | 
 7 | 
 8 | @register_head('san_graph')
 9 | class SANGraphHead(nn.Module):
10 |     """
11 |     SAN prediction head for graph prediction tasks.
12 | 
13 |     Args:
14 |         dim_in (int): Input dimension.
15 |         dim_out (int): Output dimension. For binary prediction, dim_out=1.
16 |         L (int): Number of hidden layers.
17 |     """
18 | 
19 |     def __init__(self, dim_in, dim_out, L=2):
20 |         super().__init__()
21 |         self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling]
22 |         list_FC_layers = [
23 |             nn.Linear(dim_in // 2 ** l, dim_in // 2 ** (l + 1), bias=True)
24 |             for l in range(L)]
25 |         list_FC_layers.append(
26 |             nn.Linear(dim_in // 2 ** L, dim_out, bias=True))
27 |         self.FC_layers = nn.ModuleList(list_FC_layers)
28 |         self.L = L
29 |         self.activation = register.act_dict[cfg.gnn.act]()
30 | 
31 |     def _apply_index(self, batch):
32 |         return batch.graph_feature, batch.y
33 | 
34 |     def forward(self, batch):
35 |         graph_emb = self.pooling_fun(batch.x, batch.batch)
36 |         for l in range(self.L):
37 |             graph_emb = self.FC_layers[l](graph_emb)
38 |             graph_emb = self.activation(graph_emb)
39 |         graph_emb = self.FC_layers[self.L](graph_emb)
40 |         batch.graph_feature = graph_emb
41 |         pred, label = self._apply_index(batch)
42 |         return pred, label
43 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/layer/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/layer/example.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn import Parameter
  4 | 
  5 | from torch_geometric.graphgym.config import cfg
  6 | from torch_geometric.graphgym.register import register_layer
  7 | from torch_geometric.nn.conv import MessagePassing
  8 | from torch_geometric.nn.inits import glorot, zeros
  9 | 
 10 | # Note: A registered GNN layer should take 'batch' as input
 11 | # and 'batch' as output
 12 | 
 13 | 
 14 | # Example 1: Directly define a GraphGym format Conv
 15 | # take 'batch' as input and 'batch' as output
 16 | @register_layer('exampleconv1')
 17 | class ExampleConv1(MessagePassing):
 18 |     r"""Example GNN layer
 19 |     """
 20 |     def __init__(self, in_channels, out_channels, bias=True, **kwargs):
 21 |         super().__init__(aggr=cfg.gnn.agg, **kwargs)
 22 | 
 23 |         self.in_channels = in_channels
 24 |         self.out_channels = out_channels
 25 | 
 26 |         self.weight = Parameter(torch.Tensor(in_channels, out_channels))
 27 | 
 28 |         if bias:
 29 |             self.bias = Parameter(torch.Tensor(out_channels))
 30 |         else:
 31 |             self.register_parameter('bias', None)
 32 | 
 33 |         self.reset_parameters()
 34 | 
 35 |     def reset_parameters(self):
 36 |         glorot(self.weight)
 37 |         zeros(self.bias)
 38 | 
 39 |     def forward(self, batch):
 40 |         """"""
 41 |         x, edge_index = batch.x, batch.edge_index
 42 |         x = torch.matmul(x, self.weight)
 43 | 
 44 |         batch.x = self.propagate(edge_index, x=x)
 45 | 
 46 |         return batch
 47 | 
 48 |     def message(self, x_j):
 49 |         return x_j
 50 | 
 51 |     def update(self, aggr_out):
 52 |         if self.bias is not None:
 53 |             aggr_out = aggr_out + self.bias
 54 |         return aggr_out
 55 | 
 56 | 
 57 | # Example 2: First define a PyG format Conv layer
 58 | # Then wrap it to become GraphGym format
 59 | class ExampleConv2Layer(MessagePassing):
 60 |     r"""Example GNN layer
 61 |     """
 62 |     def __init__(self, in_channels, out_channels, bias=True, **kwargs):
 63 |         super().__init__(aggr=cfg.gnn.agg, **kwargs)
 64 | 
 65 |         self.in_channels = in_channels
 66 |         self.out_channels = out_channels
 67 | 
 68 |         self.weight = Parameter(torch.Tensor(in_channels, out_channels))
 69 | 
 70 |         if bias:
 71 |             self.bias = Parameter(torch.Tensor(out_channels))
 72 |         else:
 73 |             self.register_parameter('bias', None)
 74 | 
 75 |         self.reset_parameters()
 76 | 
 77 |     def reset_parameters(self):
 78 |         glorot(self.weight)
 79 |         zeros(self.bias)
 80 | 
 81 |     def forward(self, x, edge_index):
 82 |         """"""
 83 |         x = torch.matmul(x, self.weight)
 84 | 
 85 |         return self.propagate(edge_index, x=x)
 86 | 
 87 |     def message(self, x_j):
 88 |         return x_j
 89 | 
 90 |     def update(self, aggr_out):
 91 |         if self.bias is not None:
 92 |             aggr_out = aggr_out + self.bias
 93 |         return aggr_out
 94 | 
 95 | 
 96 | @register_layer('exampleconv2')
 97 | class ExampleConv2(nn.Module):
 98 |     def __init__(self, dim_in, dim_out, bias=False, **kwargs):
 99 |         super().__init__()
100 |         self.model = ExampleConv2Layer(dim_in, dim_out, bias=bias)
101 | 
102 |     def forward(self, batch):
103 |         batch.x = self.model(batch.x, batch.edge_index)
104 |         return batch
105 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/layer/gcn_conv_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch_geometric.nn as pyg_nn
 3 | from torch_geometric.graphgym import cfg
 4 | import torch_geometric.graphgym.register as register
 5 | 
 6 | 
 7 | class GCNConvLayer(nn.Module):
 8 |     """Graph Isomorphism Network with Edge features (GINE) layer.
 9 |     """
10 |     def __init__(self, dim_in, dim_out, dropout, residual):
11 |         super().__init__()
12 |         self.dim_in = dim_in
13 |         self.dim_out = dim_out
14 |         self.dropout = dropout
15 |         self.residual = residual
16 | 
17 |         self.act = nn.Sequential(
18 |             register.act_dict[cfg.gnn.act](),
19 |             nn.Dropout(self.dropout),
20 |         )
21 |         self.model = pyg_nn.GCNConv(dim_in, dim_out, bias=True)
22 | 
23 |     def forward(self, batch):
24 |         x_in = batch.x
25 | 
26 |         batch.x = self.model(batch.x, batch.edge_index)
27 |         batch.x = self.act(batch.x)
28 | 
29 |         if self.residual:
30 |             batch.x = x_in + batch.x  # residual connection
31 | 
32 |         return batch
33 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/layer/graphormer_layer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.utils import to_dense_batch
 3 | 
 4 | 
 5 | class GraphormerLayer(torch.nn.Module):
 6 |     def __init__(self, embed_dim: int, num_heads: int, dropout: float,
 7 |                  attention_dropout: float, mlp_dropout: float):
 8 |         """Implementation of the Graphormer layer.
 9 |         This layer is based on the implementation at:
10 |         https://github.com/microsoft/Graphormer/tree/v1.0
11 |         Note that this refers to v1 of Graphormer.
12 | 
13 |         Args:
14 |             embed_dim: The number of hidden dimensions of the model
15 |             num_heads: The number of heads of the Graphormer model
16 |             dropout: Dropout applied after the attention and after the MLP
17 |             attention_dropout: Dropout applied within the attention
18 |             input_dropout: Dropout applied within the MLP
19 |         """
20 |         super().__init__()
21 |         self.attention = torch.nn.MultiheadAttention(embed_dim,
22 |                                                      num_heads,
23 |                                                      attention_dropout,
24 |                                                      batch_first=True)
25 |         self.input_norm = torch.nn.LayerNorm(embed_dim)
26 |         self.dropout = torch.nn.Dropout(dropout)
27 | 
28 |         # We follow the paper in that all hidden dims are
29 |         # equal to the embedding dim
30 |         self.mlp = torch.nn.Sequential(
31 |             torch.nn.LayerNorm(embed_dim),
32 |             torch.nn.Linear(embed_dim, embed_dim),
33 |             torch.nn.GELU(),
34 |             torch.nn.Dropout(mlp_dropout),
35 |             torch.nn.Linear(embed_dim, embed_dim),
36 |             torch.nn.Dropout(dropout),
37 |         )
38 | 
39 |     def forward(self, data):
40 |         x = self.input_norm(data.x)
41 |         x, real_nodes = to_dense_batch(x, data.batch)
42 | 
43 |         if hasattr(data, "attn_bias"):
44 |             x = self.attention(x, x, x, ~real_nodes, attn_mask=data.attn_bias)[0][real_nodes]
45 |         else:
46 |             x = self.attention(x, x, x, ~real_nodes)[0][real_nodes]
47 |         x = self.dropout(x) + data.x
48 |         data.x = self.mlp(x) + x
49 |         return data
50 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__init__.py


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/__init__.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/aqsol_molecules.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/aqsol_molecules.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/coco_superpixels.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/coco_superpixels.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/malnet_tiny.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/malnet_tiny.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_functional.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_functional.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_structural.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_structural.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/voc_superpixels.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/voc_superpixels.cpython-39.pyc


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loss/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loss/l1.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch_geometric.graphgym.config import cfg
 3 | from torch_geometric.graphgym.register import register_loss
 4 | 
 5 | 
 6 | @register_loss('l1_losses')
 7 | def l1_losses(pred, true):
 8 |     if cfg.model.loss_fun == 'l1':
 9 |         l1_loss = nn.L1Loss()
10 |         loss = l1_loss(pred, true)
11 |         return loss, pred
12 |     elif cfg.model.loss_fun == 'smoothl1':
13 |         l1_loss = nn.SmoothL1Loss()
14 |         loss = l1_loss(pred, true)
15 |         return loss, pred
16 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loss/multilabel_classification_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch_geometric.graphgym.config import cfg
 3 | from torch_geometric.graphgym.register import register_loss
 4 | 
 5 | 
 6 | @register_loss('multilabel_cross_entropy')
 7 | def multilabel_cross_entropy(pred, true):
 8 |     """Multilabel cross-entropy loss.
 9 |     """
10 |     if cfg.dataset.task_type == 'classification_multilabel':
11 |         if cfg.model.loss_fun != 'cross_entropy':
12 |             raise ValueError("Only 'cross_entropy' loss_fun supported with "
13 |                              "'classification_multilabel' task_type.")
14 |         bce_loss = nn.BCEWithLogitsLoss()
15 |         is_labeled = true == true  # Filter our nans.
16 |         return bce_loss(pred[is_labeled], true[is_labeled].float()), pred
17 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loss/subtoken_prediction_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch_geometric.graphgym.config import cfg
 3 | from torch_geometric.graphgym.register import register_loss
 4 | 
 5 | 
 6 | @register_loss('subtoken_cross_entropy')
 7 | def subtoken_cross_entropy(pred_list, true):
 8 |     """Subtoken prediction cross-entropy loss for ogbg-code2.
 9 |     """
10 |     if cfg.dataset.task_type == 'subtoken_prediction':
11 |         if cfg.model.loss_fun != 'cross_entropy':
12 |             raise ValueError("Only 'cross_entropy' loss_fun supported with "
13 |                              "'subtoken_prediction' task_type.")
14 |         multicls_criterion = torch.nn.CrossEntropyLoss()
15 |         loss = 0
16 |         for i in range(len(pred_list)):
17 |             loss += multicls_criterion(pred_list[i].to(torch.float32), true['y_arr'][:, i])
18 |         loss = loss / len(pred_list)
19 | 
20 |         return loss, pred_list
21 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/loss/weighted_cross_entropy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.register import register_loss
 5 | 
 6 | 
 7 | @register_loss('weighted_cross_entropy')
 8 | def weighted_cross_entropy(pred, true):
 9 |     """Weighted cross-entropy for unbalanced classes.
10 |     """
11 |     if cfg.model.loss_fun == 'weighted_cross_entropy':
12 |         # calculating label weights for weighted loss computation
13 |         V = true.size(0)
14 |         n_classes = pred.shape[1] if pred.ndim > 1 else 2
15 |         label_count = torch.bincount(true)
16 |         label_count = label_count[label_count.nonzero(as_tuple=True)].squeeze()
17 |         cluster_sizes = torch.zeros(n_classes, device=pred.device).long()
18 |         cluster_sizes[torch.unique(true)] = label_count
19 |         weight = (V - cluster_sizes).float() / V
20 |         weight *= (cluster_sizes > 0).float()
21 |         # multiclass
22 |         if pred.ndim > 1:
23 |             pred = F.log_softmax(pred, dim=-1)
24 |             return F.nll_loss(pred, true, weight=weight), pred
25 |         # binary
26 |         else:
27 |             loss = F.binary_cross_entropy_with_logits(pred, true.float(),
28 |                                                       weight=weight[true])
29 |             return loss, torch.sigmoid(pred)
30 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/metrics_ogb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.metrics import roc_auc_score, average_precision_score
  3 | 
  4 | """
  5 | Evaluation functions from OGB.
  6 | https://github.com/snap-stanford/ogb/blob/master/ogb/graphproppred/evaluate.py
  7 | """
  8 | 
  9 | def eval_rocauc(y_true, y_pred):
 10 |     '''
 11 |         compute ROC-AUC averaged across tasks
 12 |     '''
 13 | 
 14 |     rocauc_list = []
 15 | 
 16 |     for i in range(y_true.shape[1]):
 17 |         # AUC is only defined when there is at least one positive data.
 18 |         if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
 19 |             # ignore nan values
 20 |             is_labeled = y_true[:, i] == y_true[:, i]
 21 |             rocauc_list.append(
 22 |                 roc_auc_score(y_true[is_labeled, i], y_pred[is_labeled, i]))
 23 | 
 24 |     if len(rocauc_list) == 0:
 25 |         raise RuntimeError(
 26 |             'No positively labeled data available. Cannot compute ROC-AUC.')
 27 | 
 28 |     return {'rocauc': sum(rocauc_list) / len(rocauc_list)}
 29 | 
 30 | 
 31 | def eval_ap(y_true, y_pred):
 32 |     '''
 33 |         compute Average Precision (AP) averaged across tasks
 34 |     '''
 35 | 
 36 |     ap_list = []
 37 | 
 38 |     for i in range(y_true.shape[1]):
 39 |         # AUC is only defined when there is at least one positive data.
 40 |         if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0:
 41 |             # ignore nan values
 42 |             is_labeled = y_true[:, i] == y_true[:, i]
 43 |             ap = average_precision_score(y_true[is_labeled, i],
 44 |                                          y_pred[is_labeled, i])
 45 | 
 46 |             ap_list.append(ap)
 47 | 
 48 |     if len(ap_list) == 0:
 49 |         raise RuntimeError(
 50 |             'No positively labeled data available. Cannot compute Average Precision.')
 51 | 
 52 |     return {'ap': sum(ap_list) / len(ap_list)}
 53 | 
 54 | 
 55 | def eval_rmse(y_true, y_pred):
 56 |     '''
 57 |         compute RMSE score averaged across tasks
 58 |     '''
 59 |     rmse_list = []
 60 | 
 61 |     for i in range(y_true.shape[1]):
 62 |         # ignore nan values
 63 |         is_labeled = y_true[:, i] == y_true[:, i]
 64 |         rmse_list.append(np.sqrt(
 65 |             ((y_true[is_labeled, i] - y_pred[is_labeled, i]) ** 2).mean()))
 66 | 
 67 |     return {'rmse': sum(rmse_list) / len(rmse_list)}
 68 | 
 69 | 
 70 | def eval_acc(y_true, y_pred):
 71 |     acc_list = []
 72 | 
 73 |     for i in range(y_true.shape[1]):
 74 |         is_labeled = y_true[:, i] == y_true[:, i]
 75 |         correct = y_true[is_labeled, i] == y_pred[is_labeled, i]
 76 |         acc_list.append(float(np.sum(correct)) / len(correct))
 77 | 
 78 |     return {'acc': sum(acc_list) / len(acc_list)}
 79 | 
 80 | 
 81 | def eval_F1(seq_ref, seq_pred):
 82 |     # '''
 83 |     #     compute F1 score averaged over samples
 84 |     # '''
 85 | 
 86 |     precision_list = []
 87 |     recall_list = []
 88 |     f1_list = []
 89 | 
 90 |     for l, p in zip(seq_ref, seq_pred):
 91 |         label = set(l)
 92 |         prediction = set(p)
 93 |         true_positive = len(label.intersection(prediction))
 94 |         false_positive = len(prediction - label)
 95 |         false_negative = len(label - prediction)
 96 | 
 97 |         if true_positive + false_positive > 0:
 98 |             precision = true_positive / (true_positive + false_positive)
 99 |         else:
100 |             precision = 0
101 | 
102 |         if true_positive + false_negative > 0:
103 |             recall = true_positive / (true_positive + false_negative)
104 |         else:
105 |             recall = 0
106 |         if precision + recall > 0:
107 |             f1 = 2 * precision * recall / (precision + recall)
108 |         else:
109 |             f1 = 0
110 | 
111 |         precision_list.append(precision)
112 |         recall_list.append(recall)
113 |         f1_list.append(f1)
114 | 
115 |     return {'precision': np.average(precision_list),
116 |             'recall': np.average(recall_list),
117 |             'F1': np.average(f1_list)}
118 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/big_bird.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch_geometric.graphgym.register as register
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP
 5 | from torch_geometric.graphgym.register import register_network
 6 | 
 7 | from graphgps.layer.bigbird_layer import BigBirdModel as BackboneBigBird
 8 | 
 9 | 
10 | @register_network('BigBird')
11 | class BigBird(torch.nn.Module):
12 |     """BigBird without edge features.
13 |     This model disregards edge features and runs a linear transformer over a set of node features only.
14 |     BirBird applies random sparse attention to the input sequence - the longer the sequence the closer it is to O(N)
15 |     https://arxiv.org/abs/2007.14062
16 |     """
17 | 
18 |     def __init__(self, dim_in, dim_out):
19 |         super().__init__()
20 |         self.encoder = FeatureEncoder(dim_in)
21 |         dim_in = self.encoder.dim_in
22 | 
23 |         if cfg.gnn.layers_pre_mp > 0:
24 |             self.pre_mp = GNNPreMP(
25 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
26 |             dim_in = cfg.gnn.dim_inner
27 | 
28 |         assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \
29 |             "The inner and hidden dims must match."
30 | 
31 |         # Copy main Transformer hyperparams to the BigBird config.
32 |         cfg.gt.bigbird.layers = cfg.gt.layers
33 |         cfg.gt.bigbird.n_heads = cfg.gt.n_heads
34 |         cfg.gt.bigbird.dim_hidden = cfg.gt.dim_hidden
35 |         cfg.gt.bigbird.dropout = cfg.gt.dropout
36 |         self.trf = BackboneBigBird(
37 |             config=cfg.gt.bigbird,
38 |         )
39 | 
40 |         GNNHead = register.head_dict[cfg.gnn.head]
41 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
42 | 
43 |     def forward(self, batch):
44 |         for module in self.children():
45 |             batch = module(batch)
46 |         return batch
47 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/custom_gnn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch_geometric.graphgym.models.head  # noqa, register module
 3 | import torch_geometric.graphgym.register as register
 4 | from torch_geometric.graphgym.config import cfg
 5 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP
 6 | from torch_geometric.graphgym.register import register_network
 7 | 
 8 | from graphgps.layer.gatedgcn_layer import GatedGCNLayer
 9 | from graphgps.layer.gine_conv_layer import GINEConvLayer
10 | from graphgps.layer.gcn_conv_layer import GCNConvLayer
11 | from torch_geometric.nn import global_add_pool
12 | 
13 | @register_network('custom_gnn')
14 | class CustomGNN(torch.nn.Module):
15 |     """
16 |     GNN model that customizes the torch_geometric.graphgym.models.gnn.GNN
17 |     to support specific handling of new conv layers.
18 |     """
19 | 
20 |     def __init__(self, dim_in, dim_out):
21 |         super().__init__()
22 |         self.encoder = FeatureEncoder(dim_in)
23 |         dim_in = self.encoder.dim_in
24 |         self.vqs = torch.nn.ModuleList()
25 |         if cfg.gnn.layers_pre_mp > 0:
26 |             self.pre_mp = GNNPreMP(
27 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
28 |             dim_in = cfg.gnn.dim_inner
29 | 
30 |         assert cfg.gnn.dim_inner == dim_in, \
31 |             "The inner and hidden dims must match."
32 |         self.kmeans = 1
33 |         conv_model = self.build_conv_model(cfg.gnn.layer_type)
34 |         layers = []
35 |         for _ in range(cfg.gnn.layers_mp):
36 |             layers.append(conv_model(dim_in,
37 |                                      dim_in,
38 |                                      dropout=cfg.gnn.dropout,
39 |                                      residual=cfg.gnn.residual))
40 |             if self.kmeans:
41 |                 from graphgps.network.vq import VectorQuantize, ResidualVectorQuant
42 |                 print("kmeans")
43 |                 self.vqs.append(ResidualVectorQuant(dim=cfg.gnn.dim_inner, codebook_size=16, decay=0.8, commitment_weight=0.25, use_cosine_sim=True, kmeans_init=False))
44 |             else:
45 |                 from vqtorch.nn import VectorQuant, ResidualVectorQuant
46 |                 print("vq")
47 |                 self.vqs.append(ResidualVectorQuant(
48 |                         groups = 3,
49 |                         feature_size=cfg.gnn.dim_inner,     # feature dimension corresponding to the vectors
50 |                         num_codes=16,      # number of codebook vectors
51 |                         beta=0.98,           # (default: 0.9) commitment trade-off
52 |                         kmeans_init=False,    # (default: False) whether to use kmeans++ init
53 |                         norm=None,           # (default: None) normalization for the input vectors
54 |                         cb_norm=None,        # (default: None) normalization for codebook vectors
55 |                         affine_lr=10.0,      # (default: 0.0) lr scale for affine parameters
56 |                         sync_nu=0.2,         # (default: 0.0) codebook synchronization contribution
57 |                         replace_freq=20,     # (default: None) frequency to replace dead codes
58 |                         dim=-1,              # (default: -1) dimension to be quantized
59 |                         ))
60 |         self.gnn_layers = torch.nn.Sequential(*layers)
61 | 
62 |         GNNHead = register.head_dict[cfg.gnn.head]
63 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
64 | 
65 |     def build_conv_model(self, model_type):
66 |         if model_type == 'gatedgcnconv':
67 |             return GatedGCNLayer
68 |         elif model_type == 'gineconv':
69 |             return GINEConvLayer
70 |         elif model_type == 'gcnconv':
71 |             return GCNConvLayer
72 |         else:
73 |             raise ValueError("Model {} unavailable".format(model_type))
74 | 
75 |     def forward(self, batch):
76 |         
77 |         batch = self.encoder(batch)
78 |         id_list = []
79 |         quantized_list = []
80 |         total_commit_loss = 0
81 |         for (conv, vq) in zip(self.gnn_layers, self.vqs):
82 |             batch = conv(batch)
83 |             if self.kmeans:
84 |                 quantized, _, commit_loss, dist, codebook = vq(batch.x)
85 |                 id_list.append(torch.stack(_, dim=1))
86 |                 quantized_list.append(quantized)
87 |                 total_commit_loss += commit_loss
88 |             else:
89 |                 x_, vq_ = vq(batch.x)
90 |                 total_commit_loss += vq_['loss'].mean()
91 |                 id_list.append(vq_['q'])
92 |         id_list_concat = torch.cat(id_list, dim=1)
93 |         graph_id = global_add_pool(id_list_concat, batch.batch)
94 |         batch = self.post_mp(batch)
95 |         # print(graph_id.shape)
96 |         return batch, total_commit_loss, graph_id
97 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/example.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | import torch_geometric.graphgym.models.head  # noqa, register module
 6 | import torch_geometric.graphgym.register as register
 7 | import torch_geometric.nn as pyg_nn
 8 | from torch_geometric.graphgym.config import cfg
 9 | from torch_geometric.graphgym.register import register_network
10 | 
11 | 
12 | @register_network('example')
13 | class ExampleGNN(torch.nn.Module):
14 |     def __init__(self, dim_in, dim_out, num_layers=2, model_type='GCN'):
15 |         super().__init__()
16 |         conv_model = self.build_conv_model(model_type)
17 |         self.convs = nn.ModuleList()
18 |         self.convs.append(conv_model(dim_in, dim_in))
19 | 
20 |         for _ in range(num_layers - 1):
21 |             self.convs.append(conv_model(dim_in, dim_in))
22 | 
23 |         GNNHead = register.head_dict[cfg.dataset.task]
24 |         self.post_mp = GNNHead(dim_in=dim_in, dim_out=dim_out)
25 | 
26 |     def build_conv_model(self, model_type):
27 |         if model_type == 'GCN':
28 |             return pyg_nn.GCNConv
29 |         elif model_type == 'GAT':
30 |             return pyg_nn.GATConv
31 |         elif model_type == "GraphSage":
32 |             return pyg_nn.SAGEConv
33 |         else:
34 |             raise ValueError(f'Model {model_type} unavailable')
35 | 
36 |     def forward(self, batch):
37 |         x, edge_index = batch.x, batch.edge_index
38 | 
39 |         for i in range(len(self.convs)):
40 |             x = self.convs[i](x, edge_index)
41 |             x = F.relu(x)
42 |             x = F.dropout(x, p=0.1, training=self.training)
43 | 
44 |         batch.x = x
45 |         batch = self.post_mp(batch)
46 | 
47 |         return batch
48 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/gps_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch_geometric.graphgym.register as register
  3 | from torch_geometric.graphgym.config import cfg
  4 | from torch_geometric.graphgym.models.gnn import GNNPreMP
  5 | from torch_geometric.graphgym.models.layer import (new_layer_config,
  6 |                                                    BatchNorm1dNode)
  7 | from torch_geometric.graphgym.register import register_network
  8 | 
  9 | from graphgps.layer.gps_layer import GPSLayer
 10 | 
 11 | 
 12 | class FeatureEncoder(torch.nn.Module):
 13 |     """
 14 |     Encoding node and edge features
 15 | 
 16 |     Args:
 17 |         dim_in (int): Input feature dimension
 18 |     """
 19 |     def __init__(self, dim_in):
 20 |         super(FeatureEncoder, self).__init__()
 21 |         self.dim_in = dim_in
 22 |         if cfg.dataset.node_encoder:
 23 |             # Encode integer node features via nn.Embeddings
 24 |             NodeEncoder = register.node_encoder_dict[
 25 |                 cfg.dataset.node_encoder_name]
 26 |             self.node_encoder = NodeEncoder(cfg.gnn.dim_inner)
 27 |             if cfg.dataset.node_encoder_bn:
 28 |                 self.node_encoder_bn = BatchNorm1dNode(
 29 |                     new_layer_config(cfg.gnn.dim_inner, -1, -1, has_act=False,
 30 |                                      has_bias=False, cfg=cfg))
 31 |             # Update dim_in to reflect the new dimension of the node features
 32 |             self.dim_in = cfg.gnn.dim_inner
 33 |         if cfg.dataset.edge_encoder:
 34 |             # Hard-limit max edge dim for PNA.
 35 |             if 'PNA' in cfg.gt.layer_type:
 36 |                 cfg.gnn.dim_edge = min(128, cfg.gnn.dim_inner)
 37 |             else:
 38 |                 cfg.gnn.dim_edge = cfg.gnn.dim_inner
 39 |             # Encode integer edge features via nn.Embeddings
 40 |             EdgeEncoder = register.edge_encoder_dict[
 41 |                 cfg.dataset.edge_encoder_name]
 42 |             self.edge_encoder = EdgeEncoder(cfg.gnn.dim_edge)
 43 |             if cfg.dataset.edge_encoder_bn:
 44 |                 self.edge_encoder_bn = BatchNorm1dNode(
 45 |                     new_layer_config(cfg.gnn.dim_edge, -1, -1, has_act=False,
 46 |                                      has_bias=False, cfg=cfg))
 47 | 
 48 |     def forward(self, batch):
 49 |         for module in self.children():
 50 |             batch = module(batch)
 51 |         return batch
 52 | 
 53 | 
 54 | @register_network('GPSModel')
 55 | class GPSModel(torch.nn.Module):
 56 |     """General-Powerful-Scalable graph transformer.
 57 |     https://arxiv.org/abs/2205.12454
 58 |     Rampasek, L., Galkin, M., Dwivedi, V. P., Luu, A. T., Wolf, G., & Beaini, D.
 59 |     Recipe for a general, powerful, scalable graph transformer. (NeurIPS 2022)
 60 |     """
 61 | 
 62 |     def __init__(self, dim_in, dim_out):
 63 |         super().__init__()
 64 |         self.encoder = FeatureEncoder(dim_in)
 65 |         dim_in = self.encoder.dim_in
 66 | 
 67 |         if cfg.gnn.layers_pre_mp > 0:
 68 |             self.pre_mp = GNNPreMP(
 69 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
 70 |             dim_in = cfg.gnn.dim_inner
 71 | 
 72 |         if not cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in:
 73 |             raise ValueError(
 74 |                 f"The inner and hidden dims must match: "
 75 |                 f"embed_dim={cfg.gt.dim_hidden} dim_inner={cfg.gnn.dim_inner} "
 76 |                 f"dim_in={dim_in}"
 77 |             )
 78 | 
 79 |         try:
 80 |             local_gnn_type, global_model_type = cfg.gt.layer_type.split('+')
 81 |         except:
 82 |             raise ValueError(f"Unexpected layer type: {cfg.gt.layer_type}")
 83 |         layers = []
 84 |         for _ in range(cfg.gt.layers):
 85 |             layers.append(GPSLayer(
 86 |                 dim_h=cfg.gt.dim_hidden,
 87 |                 local_gnn_type=local_gnn_type,
 88 |                 global_model_type=global_model_type,
 89 |                 num_heads=cfg.gt.n_heads,
 90 |                 act=cfg.gnn.act,
 91 |                 pna_degrees=cfg.gt.pna_degrees,
 92 |                 equivstable_pe=cfg.posenc_EquivStableLapPE.enable,
 93 |                 dropout=cfg.gt.dropout,
 94 |                 attn_dropout=cfg.gt.attn_dropout,
 95 |                 layer_norm=cfg.gt.layer_norm,
 96 |                 batch_norm=cfg.gt.batch_norm,
 97 |                 bigbird_cfg=cfg.gt.bigbird,
 98 |                 log_attn_weights=cfg.train.mode == 'log-attn-weights',
 99 |             ))
100 |         self.layers = torch.nn.Sequential(*layers)
101 | 
102 |         GNNHead = register.head_dict[cfg.gnn.head]
103 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
104 | 
105 |     def forward(self, batch):
106 |         for module in self.children():
107 |             batch = module(batch)
108 |         return batch
109 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/graphormer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch_geometric.graphgym.register as register
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP
 5 | from torch_geometric.graphgym.register import register_network
 6 | 
 7 | from graphgps.layer.graphormer_layer import GraphormerLayer
 8 | 
 9 | 
10 | @register_network('Graphormer')
11 | class GraphormerModel(torch.nn.Module):
12 |     """Graphormer port to GraphGPS.
13 |     https://arxiv.org/abs/2106.05234
14 |     Ying, C., Cai, T., Luo, S., Zheng, S., Ke, G., He, D., ... & Liu, T. Y.
15 |     Do transformers really perform badly for graph representation? (NeurIPS2021)
16 |     """
17 | 
18 |     def __init__(self, dim_in, dim_out):
19 |         super().__init__()
20 |         self.encoder = FeatureEncoder(dim_in)
21 |         dim_in = self.encoder.dim_in
22 | 
23 |         if cfg.gnn.layers_pre_mp > 0:
24 |             self.pre_mp = GNNPreMP(
25 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
26 |             dim_in = cfg.gnn.dim_inner
27 | 
28 |         if not cfg.graphormer.embed_dim == cfg.gnn.dim_inner == dim_in:
29 |             raise ValueError(
30 |                 f"The inner and embed dims must match: "
31 |                 f"embed_dim={cfg.graphormer.embed_dim} "
32 |                 f"dim_inner={cfg.gnn.dim_inner} dim_in={dim_in}"
33 |             )
34 | 
35 |         layers = []
36 |         for _ in range(cfg.graphormer.num_layers):
37 |             layers.append(GraphormerLayer(
38 |                 embed_dim=cfg.graphormer.embed_dim,
39 |                 num_heads=cfg.graphormer.num_heads,
40 |                 dropout=cfg.graphormer.dropout,
41 |                 attention_dropout=cfg.graphormer.attention_dropout,
42 |                 mlp_dropout=cfg.graphormer.mlp_dropout
43 |             ))
44 |         self.layers = torch.nn.Sequential(*layers)
45 | 
46 |         GNNHead = register.head_dict[cfg.gnn.head]
47 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
48 | 
49 |     def forward(self, batch):
50 |         for module in self.children():
51 |             batch = module(batch)
52 |         return batch
53 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/performer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch_geometric.graphgym.register as register
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP
 5 | from torch_geometric.graphgym.register import register_network
 6 | 
 7 | from graphgps.layer.performer_layer import Performer as BackbonePerformer
 8 | 
 9 | 
10 | @register_network('Performer')
11 | class Performer(torch.nn.Module):
12 |     """Performer without edge features.
13 |     This model disregards edge features and runs a linear transformer over a set of node features only.
14 |     https://arxiv.org/abs/2009.14794
15 |     """
16 | 
17 |     def __init__(self, dim_in, dim_out):
18 |         super().__init__()
19 |         self.encoder = FeatureEncoder(dim_in)
20 |         dim_in = self.encoder.dim_in
21 | 
22 |         if cfg.gnn.layers_pre_mp > 0:
23 |             self.pre_mp = GNNPreMP(
24 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
25 |             dim_in = cfg.gnn.dim_inner
26 | 
27 |         assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \
28 |             "The inner and hidden dims must match."
29 | 
30 |         self.trf = BackbonePerformer(
31 |             dim=cfg.gt.dim_hidden,
32 |             depth=cfg.gt.layers,
33 |             heads=cfg.gt.n_heads,
34 |             dim_head=cfg.gt.dim_hidden // cfg.gt.n_heads
35 |         )
36 | 
37 |         GNNHead = register.head_dict[cfg.gnn.head]
38 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
39 | 
40 |     def forward(self, batch):
41 |         for module in self.children():
42 |             batch = module(batch)
43 |         return batch
44 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/network/san_transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch_geometric.graphgym.register as register
 3 | from torch_geometric.graphgym.config import cfg
 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP
 5 | from torch_geometric.graphgym.register import register_network
 6 | 
 7 | from graphgps.layer.san_layer import SANLayer
 8 | from graphgps.layer.san2_layer import SAN2Layer
 9 | 
10 | 
11 | @register_network('SANTransformer')
12 | class SANTransformer(torch.nn.Module):
13 |     """Spectral Attention Network (SAN) Graph Transformer.
14 |     https://arxiv.org/abs/2106.03893
15 |     """
16 | 
17 |     def __init__(self, dim_in, dim_out):
18 |         super().__init__()
19 |         self.encoder = FeatureEncoder(dim_in)
20 |         dim_in = self.encoder.dim_in
21 | 
22 |         if cfg.gnn.layers_pre_mp > 0:
23 |             self.pre_mp = GNNPreMP(
24 |                 dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp)
25 |             dim_in = cfg.gnn.dim_inner
26 | 
27 |         assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \
28 |             "The inner and hidden dims must match."
29 | 
30 |         fake_edge_emb = torch.nn.Embedding(1, cfg.gt.dim_hidden)
31 |         # torch.nn.init.xavier_uniform_(fake_edge_emb.weight.data)
32 |         Layer = {
33 |             'SANLayer': SANLayer,
34 |             'SAN2Layer': SAN2Layer,
35 |         }.get(cfg.gt.layer_type)
36 |         layers = []
37 |         for _ in range(cfg.gt.layers):
38 |             layers.append(Layer(gamma=cfg.gt.gamma,
39 |                                 in_dim=cfg.gt.dim_hidden,
40 |                                 out_dim=cfg.gt.dim_hidden,
41 |                                 num_heads=cfg.gt.n_heads,
42 |                                 full_graph=cfg.gt.full_graph,
43 |                                 fake_edge_emb=fake_edge_emb,
44 |                                 dropout=cfg.gt.dropout,
45 |                                 layer_norm=cfg.gt.layer_norm,
46 |                                 batch_norm=cfg.gt.batch_norm,
47 |                                 residual=cfg.gt.residual))
48 |         self.trf_layers = torch.nn.Sequential(*layers)
49 | 
50 |         GNNHead = register.head_dict[cfg.gnn.head]
51 |         self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out)
52 | 
53 |     def forward(self, batch):
54 |         for module in self.children():
55 |             batch = module(batch)
56 |         return batch
57 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/pooling/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/pooling/example.py:
--------------------------------------------------------------------------------
1 | from torch_geometric.graphgym.register import register_pooling
2 | from torch_geometric.utils import scatter
3 | 
4 | 
5 | @register_pooling('example')
6 | def global_example_pool(x, batch, size=None):
7 |     size = batch.max().item() + 1 if size is None else size
8 |     return scatter(x, batch, dim=0, dim_size=size, reduce='sum')
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/pooling/graph_token.py:
--------------------------------------------------------------------------------
 1 | from torch_geometric.graphgym.register import register_pooling
 2 | from torch_geometric.utils import to_dense_batch
 3 | 
 4 | 
 5 | @register_pooling('graph_token')
 6 | def graph_token_pooling(x, batch, *args):
 7 |     """Extracts the graph token from a batch to perform graph-level prediction.
 8 |     Typically used together with Graphormer when GraphormerEncoder is used and
 9 |     the global graph token is used: `cfg.graphormer.use_graph_token == True`.
10 |     """
11 |     x, _ = to_dense_batch(x, batch)
12 |     return x[:, 0, :]
13 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/stage/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/stage/example.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from torch_geometric.graphgym.config import cfg
 5 | from torch_geometric.graphgym.models.layer import GeneralLayer
 6 | from torch_geometric.graphgym.register import register_stage
 7 | 
 8 | 
 9 | def GNNLayer(dim_in, dim_out, has_act=True):
10 |     return GeneralLayer(cfg.gnn.layer_type, dim_in, dim_out, has_act)
11 | 
12 | 
13 | @register_stage('example')
14 | class GNNStackStage(nn.Module):
15 |     '''Simple Stage that stack GNN layers'''
16 |     def __init__(self, dim_in, dim_out, num_layers):
17 |         super().__init__()
18 |         for i in range(num_layers):
19 |             d_in = dim_in if i == 0 else dim_out
20 |             layer = GNNLayer(d_in, dim_out)
21 |             self.add_module(f'layer{i}', layer)
22 |         self.dim_out = dim_out
23 | 
24 |     def forward(self, batch):
25 |         for layer in self.children():
26 |             batch = layer(batch)
27 |         if cfg.gnn.l2norm:
28 |             batch.x = F.normalize(batch.x, p=2, dim=-1)
29 |         return batch
30 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/train/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/train/example.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import time
 3 | 
 4 | import torch
 5 | 
 6 | from torch_geometric.graphgym.checkpoint import (
 7 |     clean_ckpt,
 8 |     load_ckpt,
 9 |     save_ckpt,
10 | )
11 | from torch_geometric.graphgym.config import cfg
12 | from torch_geometric.graphgym.loss import compute_loss
13 | from torch_geometric.graphgym.register import register_train
14 | from torch_geometric.graphgym.utils.epoch import is_ckpt_epoch, is_eval_epoch
15 | 
16 | 
17 | def train_epoch(logger, loader, model, optimizer, scheduler):
18 |     model.train()
19 |     time_start = time.time()
20 |     for batch in loader:
21 |         optimizer.zero_grad()
22 |         batch.to(torch.device(cfg.device))
23 |         pred, true = model(batch)
24 |         loss, pred_score = compute_loss(pred, true)
25 |         loss.backward()
26 |         optimizer.step()
27 |         logger.update_stats(true=true.detach().cpu(),
28 |                             pred=pred_score.detach().cpu(), loss=loss.item(),
29 |                             lr=scheduler.get_last_lr()[0],
30 |                             time_used=time.time() - time_start,
31 |                             params=cfg.params)
32 |         time_start = time.time()
33 |     scheduler.step()
34 | 
35 | 
36 | def eval_epoch(logger, loader, model):
37 |     model.eval()
38 |     time_start = time.time()
39 |     for batch in loader:
40 |         batch.to(torch.device(cfg.device))
41 |         pred, true = model(batch)
42 |         loss, pred_score = compute_loss(pred, true)
43 |         logger.update_stats(true=true.detach().cpu(),
44 |                             pred=pred_score.detach().cpu(), loss=loss.item(),
45 |                             lr=0, time_used=time.time() - time_start,
46 |                             params=cfg.params)
47 |         time_start = time.time()
48 | 
49 | 
50 | @register_train('example')
51 | def train_example(loggers, loaders, model, optimizer, scheduler):
52 |     start_epoch = 0
53 |     if cfg.train.auto_resume:
54 |         start_epoch = load_ckpt(model, optimizer, scheduler,
55 |                                 cfg.train.epoch_resume)
56 |     if start_epoch == cfg.optim.max_epoch:
57 |         logging.info('Checkpoint found, Task already done')
58 |     else:
59 |         logging.info('Start from epoch %s', start_epoch)
60 | 
61 |     num_splits = len(loggers)
62 |     for cur_epoch in range(start_epoch, cfg.optim.max_epoch):
63 |         train_epoch(loggers[0], loaders[0], model, optimizer, scheduler)
64 |         loggers[0].write_epoch(cur_epoch)
65 |         if is_eval_epoch(cur_epoch):
66 |             for i in range(1, num_splits):
67 |                 eval_epoch(loggers[i], loaders[i], model)
68 |                 loggers[i].write_epoch(cur_epoch)
69 |         if is_ckpt_epoch(cur_epoch):
70 |             save_ckpt(model, optimizer, scheduler, cur_epoch)
71 |     for logger in loggers:
72 |         logger.close()
73 |     if cfg.train.ckpt_clean:
74 |         clean_ckpt()
75 | 
76 |     logging.info('Task done, results saved in %s', cfg.run_dir)
77 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/transform/__init__.py:
--------------------------------------------------------------------------------
1 | from os.path import dirname, basename, isfile, join
2 | import glob
3 | 
4 | modules = glob.glob(join(dirname(__file__), "*.py"))
5 | __all__ = [
6 |     basename(f)[:-3] for f in modules
7 |     if isfile(f) and not f.endswith('__init__.py')
8 | ]
9 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/transform/task_preprocessing.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def shuffle(tensor):
 5 |     idx = torch.randperm(len(tensor))
 6 |     return tensor[idx]
 7 | 
 8 | 
 9 | def task_specific_preprocessing(data, cfg):
10 |     """Task-specific preprocessing before the dataset is logged and finalized.
11 | 
12 |     Args:
13 |         data: PyG graph
14 |         cfg: Main configuration node
15 | 
16 |     Returns:
17 |         Extended PyG Data object.
18 |     """
19 |     if cfg.gnn.head == "infer_links":
20 |         N = data.x.size(0)
21 |         idx = torch.arange(N, dtype=torch.long)
22 |         complete_index = torch.stack([idx.repeat_interleave(N), idx.repeat(N)], 0)
23 | 
24 |         data.edge_attr = None
25 |         
26 |         if cfg.dataset.infer_link_label == "edge":
27 |             labels = torch.empty(N, N, dtype=torch.long)
28 |             non_edge_index = (complete_index.T.unsqueeze(1) != data.edge_index.T).any(2).all(1).nonzero()[:, 0]
29 |             non_edge_index = shuffle(non_edge_index)[:data.edge_index.size(1)]
30 |             edge_index = (complete_index.T.unsqueeze(1) == data.edge_index.T).all(2).any(1).nonzero()[:, 0]
31 | 
32 |             final_index = shuffle(torch.cat([edge_index, non_edge_index]))
33 |             data.complete_edge_index = complete_index[:, final_index]
34 | 
35 |             labels.fill_(0)
36 |             labels[data.edge_index[0], data.edge_index[1]] = 1
37 | 
38 |             assert labels.flatten()[final_index].mean(dtype=torch.float) == 0.5
39 |         else:
40 |             raise ValueError(f"Infer-link task {cfg.dataset.infer_link_label} not available.")
41 | 
42 |         data.y = labels.flatten()[final_index]
43 | 
44 |     supported_encoding_available = (
45 |         cfg.posenc_LapPE.enable or
46 |         cfg.posenc_RWSE.enable or
47 |         cfg.posenc_GraphormerBias.enable
48 |     )
49 | 
50 |     if cfg.dataset.name == "TRIANGLES":
51 | 
52 |         # If encodings are present they can append to the empty data.x
53 |         if not supported_encoding_available:
54 |             data.x = torch.zeros((data.x.size(0), 1))
55 |         data.y = data.y.sub(1).to(torch.long)
56 | 
57 |     if cfg.dataset.name == "CSL":
58 | 
59 |         # If encodings are present they can append to the empty data.x
60 |         if not supported_encoding_available:
61 |             data.x = torch.zeros((data.num_nodes, 1))
62 |         else:
63 |             data.x = torch.zeros((data.num_nodes, 0))
64 | 
65 |     return data
66 | 


--------------------------------------------------------------------------------
/SL/Graph_Classification/graphgps/transform/transforms.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch
 4 | from torch_geometric.utils import subgraph
 5 | from tqdm import tqdm
 6 | 
 7 | 
 8 | def pre_transform_in_memory(dataset, transform_func, show_progress=False):
 9 |     """Pre-transform already loaded PyG dataset object.
10 | 
11 |     Apply transform function to a loaded PyG dataset object so that
12 |     the transformed result is persistent for the lifespan of the object.
13 |     This means the result is not saved to disk, as what PyG's `pre_transform`
14 |     would do, but also the transform is applied only once and not at each
15 |     data access as what PyG's `transform` hook does.
16 | 
17 |     Implementation is based on torch_geometric.data.in_memory_dataset.copy
18 | 
19 |     Args:
20 |         dataset: PyG dataset object to modify
21 |         transform_func: transformation function to apply to each data example
22 |         show_progress: show tqdm progress bar
23 |     """
24 |     if transform_func is None:
25 |         return dataset
26 | 
27 |     data_list = [transform_func(dataset.get(i))
28 |                  for i in tqdm(range(len(dataset)),
29 |                                disable=not show_progress,
30 |                                mininterval=10,
31 |                                miniters=len(dataset)//20)]
32 |     data_list = list(filter(None, data_list))
33 | 
34 |     dataset._indices = None
35 |     dataset._data_list = data_list
36 |     dataset.data, dataset.slices = dataset.collate(data_list)
37 | 
38 | 
39 | def typecast_x(data, type_str):
40 |     if type_str == 'float':
41 |         data.x = data.x.float()
42 |     elif type_str == 'long':
43 |         data.x = data.x.long()
44 |     else:
45 |         raise ValueError(f"Unexpected type '{type_str}'.")
46 |     return data
47 | 
48 | 
49 | def concat_x_and_pos(data):
50 |     data.x = torch.cat((data.x, data.pos), 1)
51 |     return data
52 | 
53 | 
54 | def clip_graphs_to_size(data, size_limit=5000):
55 |     if hasattr(data, 'num_nodes'):
56 |         N = data.num_nodes  # Explicitly given number of nodes, e.g. ogbg-ppa
57 |     else:
58 |         N = data.x.shape[0]  # Number of nodes, including disconnected nodes.
59 |     if N <= size_limit:
60 |         return data
61 |     else:
62 |         logging.info(f'  ...clip to {size_limit} a graph of size: {N}')
63 |         if hasattr(data, 'edge_attr'):
64 |             edge_attr = data.edge_attr
65 |         else:
66 |             edge_attr = None
67 |         edge_index, edge_attr = subgraph(list(range(size_limit)),
68 |                                          data.edge_index, edge_attr)
69 |         if hasattr(data, 'x'):
70 |             data.x = data.x[:size_limit]
71 |             data.num_nodes = size_limit
72 |         else:
73 |             data.num_nodes = size_limit
74 |         if hasattr(data, 'node_is_attributed'):  # for ogbg-code2 dataset
75 |             data.node_is_attributed = data.node_is_attributed[:size_limit]
76 |             data.node_dfs_order = data.node_dfs_order[:size_limit]
77 |             data.node_depth = data.node_depth[:size_limit]
78 |         data.edge_index = edge_index
79 |         if hasattr(data, 'edge_attr'):
80 |             data.edge_attr = edge_attr
81 |         return data
82 | 


--------------------------------------------------------------------------------
/SL/Link_Prediction/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Prepare Datasets
 3 | 
 4 | ```
 5 | python ogbdataset.py
 6 | ```
 7 | 
 8 | ## Reproduce Results
 9 | 
10 | Cora
11 | ```
12 | python ID_pretrain.py   --xdp 0.5 --tdp 0.1 --gnndp 0.1 --gnnedp 0.1 \
13 | --predp 0.1 --preedp 0.1 --gnnlr 0.004 \
14 | --prelr 0.002  --batch_size 1152  --ln --lnnn --predictor cn1 --dataset Cora  --epochs 150 \
15 | --runs 1 --model gcn --hiddim 256 --mplayers 10  --testbs 8192  \
16 | --maskinput  --jk --codebook 32 --kmeans 1 --tailact --device 3
17 | 
18 | python ID_MLP.py  --preedp 0.4 --predp 0.4 \
19 |           --prelr 0.01  --batch_size 1152 \
20 |           --lnnn --predictor cn1 --dataset Cora  --epochs 1000 --runs 2 \
21 |           --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact  --device 3
22 | ```
23 | 
24 | Citeseer
25 | ```
26 | python ID_pretrain.py   --xdp 0.4 --tdp 0.3 --gnndp 0.3 --gnnedp 0.3 \
27 | --predp 0.3 --preedp 0.3 --gnnlr 0.01 \
28 | --prelr 0.01  --batch_size 384  --ln --lnnn --predictor cn1 --dataset Citeseer  --epochs 10 \
29 | --runs 1 --model puregcn --hiddim 256 --mplayers 10  --testbs 4096  \
30 | --maskinput  --codebook 8 --kmeans 1 --tailact --device 0
31 | 
32 | python ID_MLP.py  --preedp 0.1 --predp 0.1 \
33 |           --prelr 0.01 --batch_size 384 \
34 |           --lnnn --predictor cn1 --dataset Citeseer  --epochs 1000 --runs 2 \
35 |           --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact --device 0
36 | ```
37 | 
38 | Pubmed
39 | ```
40 | python ID_pretrain.py   --xdp 0.5  --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \
41 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \
42 | --prelr 0.002 --batch_size 2048  --ln --lnnn --predictor cn1 --dataset Pubmed  --epochs 100 \
43 | --runs 1 --model gcn --hiddim 256 --mplayers 10  --testbs 8192  \
44 | --maskinput  --jk --use_xlin --codebook 8 --kmeans 1 --tailact --device 0
45 | 
46 | python ID_MLP.py  --preedp 0.3 --predp 0.3 \
47 |   --prelr 0.001  --batch_size 4000 \
48 |   --lnnn --predictor cn1 --dataset Pubmed  --epochs 1000 --runs 2 \
49 |   --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact  --device 0
50 | ```
51 | 
52 | collab
53 | ```
54 | python ID_pretrain.py   --xdp 0.25 --tdp 0.05 --gnnedp 0.25 --preedp 0.0 --predp 0.3 --gnndp 0.1  --gnnlr 0.001 --prelr 0.001  --batch_size 65536  --ln --lnnn --predictor cn1 --dataset collab  --epochs 150 --runs 1 --model gcn --hiddim 256 --mplayers 5  --testbs 131072  --maskinput --use_valedges_as_input --jk --device 4 --tailact
55 | 
56 | python ID_MLP.py  --preedp 0.0 --predp 0.0   --prelr 0.001  --batch_size 40000   --ln --lnnn --predictor cn1 --dataset collab  --epochs 200 --runs 1 --model gcn --hiddim 256   --testbs 131072  --maskinput --use_valedges_as_input   --device 4 --num_id 15  --tailact
57 | ```
58 | 
59 | 
60 | ppa
61 | ```
62 | python ID_pretrain.py  --xdp 0.0 --tdp 0.0 --gnnedp 0.1 --preedp 0.0 --predp 0.1 --gnndp 0.0 --gnnlr 0.001 --prelr 0.001  --batch_size 16384  --ln --lnnn --predictor cn1 --dataset ppa   --epochs 60 --runs 1 --model gcn --hiddim 64 --mplayers 5 --maskinput  --tailact  --testbs 65536 --device 7 --res
63 | 
64 | python ID_MLP.py  --preedp 0.0 --predp 0.1 \
65 |  --prelr 0.005  --batch_size 56384  --ln --lnnn --predictor cn1 --dataset ppa  \
66 |   --epochs 100 --runs 1 --model gcn --hiddim 256 --maskinput  --tailact \
67 |    --testbs 65536 --device 7 --num_id 15
68 | ```
69 | 


--------------------------------------------------------------------------------
/SL/Link_Prediction/ogbdataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from sklearn.metrics import roc_auc_score, average_precision_score
 3 | from ogb.linkproppred import PygLinkPropPredDataset
 4 | import torch_geometric.transforms as T
 5 | from torch_sparse import SparseTensor
 6 | from torch_geometric.datasets import Planetoid
 7 | from torch_geometric.utils import train_test_split_edges, negative_sampling, to_undirected
 8 | from torch_geometric.transforms import RandomLinkSplit
 9 | 
10 | # random split dataset
11 | def randomsplit(dataset, val_ratio: float=0.10, test_ratio: float=0.2):
12 |     def removerepeated(ei):
13 |         ei = to_undirected(ei)
14 |         ei = ei[:, ei[0]<ei[1]]
15 |         return ei
16 |     data = dataset[0]
17 |     data.num_nodes = data.x.shape[0]
18 |     data = train_test_split_edges(data, test_ratio, test_ratio)
19 |     split_edge = {'train': {}, 'valid': {}, 'test': {}}
20 |     num_val = int(data.val_pos_edge_index.shape[1] * val_ratio/test_ratio)
21 |     data.val_pos_edge_index = data.val_pos_edge_index[:, torch.randperm(data.val_pos_edge_index.shape[1])]
22 |     split_edge['train']['edge'] = removerepeated(torch.cat((data.train_pos_edge_index, data.val_pos_edge_index[:, :-num_val]), dim=-1)).t()
23 |     split_edge['valid']['edge'] = removerepeated(data.val_pos_edge_index[:, -num_val:]).t()
24 |     split_edge['valid']['edge_neg'] = removerepeated(data.val_neg_edge_index).t()
25 |     split_edge['test']['edge'] = removerepeated(data.test_pos_edge_index).t()
26 |     split_edge['test']['edge_neg'] = removerepeated(data.test_neg_edge_index).t()
27 |     return split_edge
28 | 
29 | def loaddataset(name: str, use_valedges_as_input: bool, load=None):
30 |     if name in ["Cora", "Citeseer", "Pubmed"]:
31 |         dataset = Planetoid(root="dataset", name=name)
32 |         split_edge = randomsplit(dataset)
33 |         data = dataset[0]
34 |         data.edge_index = to_undirected(split_edge["train"]["edge"].t())
35 |         edge_index = data.edge_index
36 |         data.num_nodes = data.x.shape[0]
37 |     else:
38 |         dataset = PygLinkPropPredDataset(name=f'ogbl-{name}')
39 |         split_edge = dataset.get_edge_split()
40 |         data = dataset[0]
41 |         edge_index = data.edge_index
42 |     data.edge_weight = None 
43 |     print(data.num_nodes, edge_index.max())
44 |     data.adj_t = SparseTensor.from_edge_index(edge_index, sparse_sizes=(data.num_nodes, data.num_nodes))
45 |     data.adj_t = data.adj_t.to_symmetric().coalesce()
46 |     data.max_x = -1
47 |     if name == "ppa":
48 |         data.x = torch.argmax(data.x, dim=-1)
49 |         data.max_x = torch.max(data.x).item()
50 |     elif name == "ddi":
51 |         data.x = torch.arange(data.num_nodes)
52 |         data.max_x = data.num_nodes
53 |     if load is not None:
54 |         data.x = torch.load(load, map_location="cpu")
55 |         data.max_x = -1
56 | 
57 |     print("dataset split ")
58 |     for key1 in split_edge:
59 |         for key2  in split_edge[key1]:
60 |             print(key1, key2, split_edge[key1][key2].shape[0])
61 | 
62 | 
63 |     # Use training + validation edges for inference on test set.
64 |     if use_valedges_as_input:
65 |         val_edge_index = split_edge['valid']['edge'].t()
66 |         full_edge_index = torch.cat([edge_index, val_edge_index], dim=-1)
67 |         data.full_adj_t = SparseTensor.from_edge_index(full_edge_index, sparse_sizes=(data.num_nodes, data.num_nodes)).coalesce()
68 |         data.full_adj_t = data.full_adj_t.to_symmetric()
69 |     else:
70 |         data.full_adj_t = data.adj_t
71 |     return data, split_edge
72 | 
73 | if __name__ == "__main__":
74 |     loaddataset("Cora", False)
75 |     loaddataset("Citeseer", False)
76 |     loaddataset("Pubmed", False)
77 |     loaddataset("ppa", False)
78 |     loaddataset("collab", False)
79 |     loaddataset("citation2", False)


--------------------------------------------------------------------------------
/SL/Link_Prediction/run.sh:
--------------------------------------------------------------------------------
 1 | # collab
 2 | python ID_pretrain.py   --xdp 0.25 --tdp 0.05 --gnnedp 0.25 --preedp 0.0 --predp 0.3 --gnndp 0.1  --gnnlr 0.001 --prelr 0.001  --batch_size 65536  --ln --lnnn --predictor cn1 --dataset collab  --epochs 150 --runs 1 --model gcn --hiddim 256 --mplayers 5  --testbs 131072  --maskinput --use_valedges_as_input --jk --device 4 --tailact
 3 | 
 4 | python ID_MLP.py  --preedp 0.0 --predp 0.0   --prelr 0.001  --batch_size 40000   --ln --lnnn --predictor cn1 --dataset collab  --epochs 200 --runs 1 --model gcn --hiddim 256   --testbs 131072  --maskinput --use_valedges_as_input   --device 4 --num_id 15  --tailact
 5 | 
 6 | # ppa
 7 | python ID_pretrain.py  --xdp 0.0 --tdp 0.0 --gnnedp 0.1 --preedp 0.0 --predp 0.1 --gnndp 0.0 --gnnlr 0.001 --prelr 0.001  --batch_size 16384  --ln --lnnn --predictor cn1 --dataset ppa   --epochs 60 --runs 1 --model gcn --hiddim 64 --mplayers 5 --maskinput  --tailact  --testbs 65536 --device 7 --res
 8 | 
 9 | python ID_MLP.py  --preedp 0.0 --predp 0.1 \
10 |  --prelr 0.005  --batch_size 56384  --ln --lnnn --predictor cn1 --dataset ppa  \
11 |   --epochs 100 --runs 1 --model gcn --hiddim 256 --maskinput  --tailact \
12 |    --testbs 65536 --device 4 --num_id 15
13 | 
14 | # Cora
15 | python ID_pretrain.py   --xdp 0.5 --tdp 0.1 --gnndp 0.1 --gnnedp 0.1 \
16 | --predp 0.1 --preedp 0.1 --gnnlr 0.004 \
17 | --prelr 0.002  --batch_size 1152  --ln --lnnn --predictor cn1 --dataset Cora  --epochs 150 \
18 | --runs 1 --model gcn --hiddim 256 --mplayers 10  --testbs 8192  \
19 | --maskinput  --jk --codebook 32 --kmeans 1 --tailact --device 3
20 | 
21 | python ID_MLP.py  --preedp 0.4 --predp 0.4 \
22 |           --prelr 0.01  --batch_size 1152 \
23 |           --lnnn --predictor cn1 --dataset Cora  --epochs 1000 --runs 2 \
24 |           --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact  --device 3
25 | 
26 | # Citeseer
27 | python ID_pretrain.py   --xdp 0.4 --tdp 0.3 --gnndp 0.3 --gnnedp 0.3 \
28 | --predp 0.3 --preedp 0.3 --gnnlr 0.01 \
29 | --prelr 0.01  --batch_size 384  --ln --lnnn --predictor cn1 --dataset Citeseer  --epochs 10 \
30 | --runs 1 --model puregcn --hiddim 256 --mplayers 10  --testbs 4096  \
31 | --maskinput  --codebook 8 --kmeans 1 --tailact --device 0
32 | 
33 | python ID_MLP.py  --preedp 0.1 --predp 0.1 \
34 |           --prelr 0.01 --batch_size 384 \
35 |           --lnnn --predictor cn1 --dataset Citeseer  --epochs 1000 --runs 2 \
36 |           --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact --device 0
37 | 
38 | # Pubmed
39 | python ID_pretrain.py   --xdp 0.5  --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \
40 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \
41 | --prelr 0.002 --batch_size 2048  --ln --lnnn --predictor cn1 --dataset Pubmed  --epochs 100 \
42 | --runs 1 --model gcn --hiddim 256 --mplayers 10  --testbs 8192  \
43 | --maskinput  --jk --use_xlin --codebook 8 --kmeans 1 --tailact --device 0
44 | 
45 | python ID_MLP.py  --preedp 0.3 --predp 0.3 \
46 |   --prelr 0.001  --batch_size 4000 \
47 |   --lnnn --predictor cn1 --dataset Pubmed  --epochs 1000 --runs 2 \
48 |   --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact  --device 0
49 | 


--------------------------------------------------------------------------------
/SL/Link_Prediction/run_citeseer.sh:
--------------------------------------------------------------------------------
 1 | for kmeans in 1
 2 | do
 3 | for codebook in 8 16 32
 4 | do
 5 | for epoch in 30 5 100
 6 | do
 7 | for dropout1 in 0.1 0.3
 8 | do
 9 | for layer in 3 5 10
10 | do
11 | 
12 | python ID_pretrain.py   --xdp 0.4 --tdp $dropout1 --gnndp $dropout1 --gnnedp $dropout1 \
13 | --predp $dropout1 --preedp $dropout1 --gnnlr 0.01 \
14 | --prelr 0.01  --batch_size 384  --ln --lnnn --predictor cn1 --dataset Citeseer  --epochs $epoch \
15 | --runs 1 --model puregcn --hiddim 256 --mplayers $layer  --testbs 4096  \
16 | --maskinput  --jk --codebook $codebook --kmeans $kmeans --tailact  --device 1
17 | 
18 | for dropout in 0.1 0.5
19 | do
20 |   for lr in 0.01 0.001
21 |   do
22 |     for batch_size in 384
23 |     do
24 |       for hiddim in 512
25 |       do
26 | 
27 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
28 |           --prelr $lr --batch_size $batch_size \
29 |           --lnnn --predictor cn1 --dataset Citeseer  --epochs 1000 --runs 2 \
30 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact --device 1
31 |           
32 |       done
33 |     done
34 |   done
35 | done
36 | 
37 | 
38 | python ID_pretrain.py   --xdp 0.4 --tdp $dropout1 --gnndp $dropout1 --gnnedp $dropout1 \
39 | --predp $dropout1 --preedp $dropout1 --gnnlr 0.01 \
40 | --prelr 0.01  --batch_size 384  --ln --lnnn --predictor cn1 --dataset Citeseer  --epochs $epoch \
41 | --runs 1 --model puregcn --hiddim 256 --mplayers $layer  --testbs 4096  \
42 | --maskinput  --codebook $codebook --kmeans $kmeans --tailact --device 1
43 | 
44 | 
45 | for dropout in 0.1 0.5
46 | do
47 |   for lr in 0.01 0.001
48 |   do
49 |     for batch_size in 384
50 |     do
51 |       for hiddim in 512
52 |       do
53 | 
54 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
55 |           --prelr $lr --batch_size $batch_size \
56 |           --lnnn --predictor cn1 --dataset Citeseer  --epochs 1000 --runs 2 \
57 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact --device 1
58 |           
59 |       done
60 |     done
61 |   done
62 | done
63 | 
64 | 
65 | done
66 | done
67 | done
68 | done
69 | done
70 | 


--------------------------------------------------------------------------------
/SL/Link_Prediction/run_cora.sh:
--------------------------------------------------------------------------------
 1 | for kmeans in 0 1
 2 | do
 3 | for codebook in 16 32
 4 | do
 5 | for epoch in 150
 6 | do
 7 | for dropout1 in 0.1
 8 | do
 9 | for layer in 3 5 10
10 | do
11 | 
12 | python ID_pretrain.py   --xdp 0.5 --tdp $dropout1 --gnndp $dropout1 --gnnedp $dropout1 \
13 | --predp $dropout1 --preedp $dropout1 --gnnlr 0.004 \
14 | --prelr 0.002  --batch_size 1152  --ln --lnnn --predictor cn1 --dataset Cora  --epochs $epoch \
15 | --runs 1 --model gcn --hiddim 256 --mplayers $layer  --testbs 8192  \
16 | --maskinput  --jk --codebook $codebook --kmeans $kmeans --tailact  --device 2
17 | 
18 | for dropout in 0.1 0.4
19 | do
20 |   for lr in 0.01 0.001
21 |   do
22 |     for batch_size in 1152
23 |     do
24 |       for hiddim in 512
25 |       do
26 | 
27 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
28 |           --prelr 0.01  --batch_size $batch_size \
29 |           --lnnn --predictor cn1 --dataset Cora  --epochs 1000 --runs 2 \
30 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact  --device 2
31 |           
32 |       done
33 |     done
34 |   done
35 | done
36 | 
37 | 
38 | python ID_pretrain.py   --xdp 0.5 --tdp $dropout1 --gnndp $dropout1 --gnnedp $dropout1 \
39 | --predp $dropout1 --preedp $dropout1 --gnnlr 0.004 \
40 | --prelr 0.002  --batch_size 1152  --ln --lnnn --predictor cn1 --dataset Cora  --epochs $epoch \
41 | --runs 1 --model gcn --hiddim 256 --mplayers $layer  --testbs 8192  \
42 | --maskinput  --codebook $codebook --kmeans $kmeans --tailact  --device 2
43 | 
44 | 
45 | for dropout in 0.1 0.4
46 | do
47 |   for lr in 0.01 0.001
48 |   do
49 |     for batch_size in 1152
50 |     do
51 |       for hiddim in 512
52 |       do
53 | 
54 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
55 |           --prelr 0.01  --batch_size $batch_size \
56 |           --lnnn --predictor cn1 --dataset Cora  --epochs 1000 --runs 2 \
57 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact  --device 2
58 |           
59 |       done
60 |     done
61 |   done
62 | done
63 | 
64 | 
65 | done
66 | done
67 | done
68 | done
69 | done


--------------------------------------------------------------------------------
/SL/Link_Prediction/run_pubmed.sh:
--------------------------------------------------------------------------------
 1 | for kmeans in 1 0
 2 | do
 3 | for codebook in 8 16 32
 4 | do
 5 | for epoch in 100
 6 | do
 7 | for dropout1 in 0.5
 8 | do
 9 | for layer in 5 10 15 20
10 | do
11 | 
12 | python ID_pretrain.py   --xdp $dropout1 --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \
13 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \
14 | --prelr 0.002 --batch_size 2048  --ln --lnnn --predictor cn1 --dataset Pubmed  --epochs $epoch \
15 | --runs 1 --model gcn --hiddim 256 --mplayers $layer  --testbs 8192  \
16 | --maskinput  --jk --use_xlin --codebook $codebook --kmeans $kmeans --tailact --device 6
17 | 
18 | for dropout in 0.3 0.5
19 | do
20 |   for lr in 0.001
21 |   do
22 |     for batch_size in 6000
23 |     do
24 |       for hiddim in 512
25 |       do
26 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
27 |           --prelr $lr  --batch_size $batch_size \
28 |           --lnnn --predictor cn1 --dataset Pubmed  --epochs 1500 --runs 2 \
29 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact  --device 6
30 |           
31 |       done
32 |     done
33 |   done
34 | done
35 | 
36 | 
37 | python ID_pretrain.py   --xdp $dropout1 --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \
38 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \
39 | --prelr 0.002 --batch_size 2048  --ln --lnnn --predictor cn1 --dataset Pubmed  --epochs $epoch \
40 | --runs 1 --model gcn --hiddim 256 --mplayers $layer  --testbs 8192  \
41 | --maskinput  --jk --codebook $codebook --kmeans $kmeans --tailact --device 6
42 | 
43 | for dropout in 0.3 0.5
44 | do
45 |   for lr in 0.001
46 |   do
47 |     for batch_size in 6000
48 |     do
49 |       for hiddim in 512
50 |       do
51 |     
52 |         python ID_MLP.py  --preedp $dropout --predp $dropout \
53 |           --prelr $lr  --batch_size $batch_size \
54 |           --lnnn --predictor cn1 --dataset Pubmed  --epochs 1500 --runs 2 \
55 |           --hiddim $hiddim --testbs 8192 --maskinput --num_id $((layer * 3)) --tailact  --device 6
56 |           
57 |       done
58 |     done
59 |   done
60 | done
61 | 
62 | done
63 | done
64 | done
65 | done
66 | done
67 | 
68 | python ID_pretrain.py   --xdp 0.5  --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \
69 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \
70 | --prelr 0.002 --batch_size 2048  --ln --lnnn --predictor cn1 --dataset Pubmed  --epochs 100 \
71 | --runs 1 --model gcn --hiddim 256 --mplayers 10  --testbs 8192  \
72 | --maskinput  --jk --use_xlin --codebook 8 --kmeans 1 --tailact --device 0
73 | 
74 | python ID_MLP.py  --preedp 0.3 --predp 0.3 \
75 |   --prelr 0.001  --batch_size 4000 \
76 |   --lnnn --predictor cn1 --dataset Pubmed  --epochs 1000 --runs 2 \
77 |   --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact  --device 0


--------------------------------------------------------------------------------
/SL/Node_Classification/README.md:
--------------------------------------------------------------------------------
 1 | ## Datasets
 2 | 
 3 | Chameleon and Squirrel: one can download the datasets from the google drive link below:
 4 | https://drive.google.com/drive/folders/1rr3kewCBUvIuVxA6MJ90wzQuF-NnCRtf?usp=drive_link (provided by Qitian Wu and Wentao Zhao and Chenxiao Yang and Hengrui Zhang and Fan Nie and Haitian Jiang and Yatao Bian and Junchi Yan, Simplifying and empowering transformers for large-graph representations. In Thirty-seventh Conference on Neural Information Processing Systems, 2023b.)
 5 | 
 6 | Download the geom-gcn folder, place it in `./data/` and unzip it.
 7 | 
 8 | For Chameleon and Squirrel, we use the [new splits](https://github.com/yandex-research/heterophilous-graphs/tree/main) that filter out the overlapped nodes.
 9 | Download `chameleon_filtered.npz`, put it into `./data/geom-gcn/chameleon/`.
10 | Download `squirrel_filtered.npz`, put it into `./data/geom-gcn/squirrel/`.
11 | 
12 | ## Note
13 | 
14 | ```bash
15 | sh run.sh
16 | 
17 | cd large_graph_exp
18 | bash arxiv.sh
19 | bash pokec.sh
20 | bash product.sh
21 | bash protein.sh
22 | ```


--------------------------------------------------------------------------------
/SL/Node_Classification/cora_citeseer_pubmed_analysis/README.md:
--------------------------------------------------------------------------------
1 | ## Run the codes
2 | Please refer to the bash script `run.sh` for running the training and evaluation pipeline.
3 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/cora_citeseer_pubmed_analysis/logger.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class Logger(object):
 4 |     def __init__(self, runs, info=None):
 5 |         self.info = info
 6 |         self.results = [[] for _ in range(runs)]
 7 | 
 8 |     def add_result(self, run, result):
 9 |         assert len(result) == 4
10 |         assert run >= 0 and run < len(self.results)
11 |         self.results[run].append(result)
12 | 
13 |     def print_statistics(self, run=None, mode='max_acc'):
14 |         if run is not None:
15 |             result = 100 * torch.tensor(self.results[run])
16 |             argmax = result[:, 1].argmax().item()
17 |             argmin = result[:, 3].argmin().item()
18 |             if mode == 'max_acc':
19 |                 ind = argmax
20 |             else:
21 |                 ind = argmin
22 | 
23 |             print_str=f'Run {run + 1:02d}:'+\
24 |                 f'Highest Train: {result[:, 0].max():.2f} '+\
25 |                 f'Highest Valid: {result[:, 1].max():.2f} '+\
26 |                 f'Highest Test: {result[:, 2].max():.2f} '+\
27 |                 f'Chosen epoch: {ind+1}\n'+\
28 |                 f'Final Train: {result[ind, 0]:.2f} '+\
29 |                 f'Final Test: {result[ind, 2]:.2f}'
30 |             print(print_str)
31 |             
32 |         else:
33 |             best_results = []
34 |             max_val_epoch=0
35 |             for r in self.results:
36 |                 r=100*torch.tensor(r)
37 |                 train1 = r[:, 0].max().item()
38 |                 test1 = r[:, 2].max().item()
39 |                 valid = r[:, 1].max().item()
40 |                 if mode == 'max_acc':
41 |                     train2 = r[r[:, 1].argmax(), 0].item()
42 |                     test2 = r[r[:, 1].argmax(), 2].item()
43 |                     max_val_epoch=r[:, 1].argmax()
44 |                 else:
45 |                     train2 = r[r[:, 3].argmin(), 0].item()
46 |                     test2 = r[r[:, 3].argmin(), 2].item()
47 |                 best_results.append((train1, test1, valid, train2, test2))
48 | 
49 |             best_result = torch.tensor(best_results)
50 | 
51 |             print_str=f'{len(self.results)} runs: '
52 |             r = best_result[:, 0]
53 |             print_str+=f'Highest Train: {r.mean():.2f} ± {r.std():.2f} '
54 |             print_str+=f'Highest val epoch:{max_val_epoch}\n'
55 |             r = best_result[:, 1]
56 |             print_str+=f'Highest Test: {r.mean():.2f} ± {r.std():.2f} '
57 |             r = best_result[:, 4]
58 |             print_str+=f'Final Test: {r.mean():.2f} ± {r.std():.2f}'
59 | 
60 |             self.test=r.mean()
61 |         return print_str
62 |     
63 |     def output(self,out_path,info):
64 |         with open(out_path,'a') as f:
65 |             f.write(info)
66 |             f.write(f'test acc:{self.test}\n')
67 | 
68 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/cora_citeseer_pubmed_analysis/parse.py:
--------------------------------------------------------------------------------
 1 | from models import *
 2 | 
 3 | 
 4 | def parse_method(method, args, c, d, device):
 5 |     if method == 'gcn':
 6 |         model = GCN(in_channels=d,
 7 |                     hidden_channels=args.hidden_channels,
 8 |                     out_channels=c,
 9 |                     num_layers=args.num_layers,
10 |                     dropout=args.dropout,
11 |                     use_bn=args.use_bn, num_codes=args.num_codes, kmeans=args.kmeans).to(device)
12 |     else:
13 |         raise ValueError(f'Invalid method {method}')
14 |     return model
15 | 
16 | 
17 | def parser_add_main_args(parser):
18 |     # dataset and evaluation
19 |     parser.add_argument('--data_dir', type=str, default='./data/')
20 |     parser.add_argument('--dataset', type=str, default='cora')
21 |     parser.add_argument('--device', type=int, default=0,
22 |                         help='which gpu to use if any (default: 0)')
23 |     parser.add_argument('--seed', type=int, default=42)
24 |     parser.add_argument('--cpu', action='store_true')
25 |     parser.add_argument('--epochs', type=int, default=500)
26 |     parser.add_argument('--runs', type=int, default=1,
27 |                         help='number of distinct runs')
28 |     parser.add_argument('--train_prop', type=float, default=.6,
29 |                         help='training label proportion')
30 |     parser.add_argument('--valid_prop', type=float, default=.2,
31 |                         help='validation label proportion')
32 |     parser.add_argument('--protocol', type=str, default='semi',
33 |                         help='protocol for cora datasets, semi or supervised')
34 |     parser.add_argument('--rand_split', action='store_true',
35 |                         help='use random splits')
36 |     parser.add_argument('--rand_split_class', action='store_true',
37 |                         help='use random splits with a fixed number of labeled nodes for each class')
38 |     parser.add_argument('--label_num_per_class', type=int, default=20,
39 |                         help='labeled nodes per class(randomly selected)')
40 |     parser.add_argument('--valid_num', type=int, default=500,
41 |                         help='Total number of validation')
42 |     parser.add_argument('--test_num', type=int, default=500,
43 |                         help='Total number of test')
44 |     
45 |     parser.add_argument('--eval_step', type=int,
46 |                         default=1, help='how often to evaluate')
47 |     parser.add_argument('--batch_size', type=int, default=100000, help='mini batch training for large graphs')
48 |     parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc', 'f1'],
49 |                         help='evaluation metric')
50 |     parser.add_argument('--kmeans', type=int, default=0)
51 |     parser.add_argument('--k', type=int, default=0)
52 |     parser.add_argument('--num_id', type=int, default=9)
53 |     parser.add_argument('--norm_type', type=str, default='none')
54 |     parser.add_argument('--num_codes', type=int, default=16)
55 |     
56 |     # model
57 |     parser.add_argument('--method', type=str, default='gcn')
58 |     parser.add_argument('--hidden_channels', type=int, default=32)
59 |     parser.add_argument('--num_layers', type=int, default=2,
60 |                         help='number of layers for deep methods')
61 |     parser.add_argument('--use_bn', action='store_true', help='use layernorm')
62 |     parser.add_argument('--use_residual', action='store_true',
63 |                         help='use residual link for each GNN layer')
64 |     parser.add_argument('--use_weight', action='store_true',
65 |                         help='use weight for GNN convolution')
66 |     parser.add_argument('--use_init', action='store_true', help='use initial feat for each GNN layer')
67 |     parser.add_argument('--use_act', action='store_true', help='use activation for each GNN layer')
68 |     parser.add_argument('--patience', type=int, default=200,
69 |                         help='early stopping patience.')
70 |     
71 |     # training
72 |     parser.add_argument('--lr', type=float, default=0.01)
73 |     parser.add_argument('--weight_decay', type=float, default=0)
74 |     parser.add_argument('--dropout', type=float, default=0.5)
75 | 
76 |     # display and utility
77 |     parser.add_argument('--display_step', type=int,
78 |                         default=50, help='how often to print')
79 | 
80 |     parser.add_argument('--no_feat_norm', action='store_true',
81 |                         help='Not use feature normalization.')
82 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/cora_citeseer_pubmed_analysis/run.sh:
--------------------------------------------------------------------------------
 1 | # Cora
 2 | python main.py --dataset cora --lr 0.01 --num_layers 4 \
 3 |         --hidden_channels 128 --weight_decay 5e-4 --dropout 0.0 \
 4 |         --method gcn \
 5 |         --rand_split --no_feat_norm \
 6 |         --seed 123 --device 2 --runs 1 --num_codes 6 --epoch 1000 --kmeans 1
 7 | 
 8 | python ID_MLP.py --dataset cora --lr 0.001 --num_layers 5 \
 9 |                 --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \
10 |                 --rand_split --no_feat_norm \
11 |                 --seed 123 --device 2 --runs 5 --num_id 12 --k 0 --epoch 1000
12 | 
13 | # Citeseer
14 | python main.py --dataset citeseer --lr 0.01 --num_layers 2 \
15 |         --hidden_channels 128 --weight_decay 0.01 --dropout 0.0 \
16 |         --method gcn \
17 |         --rand_split --no_feat_norm \
18 |         --seed 123 --device 4 --runs 1 --num_codes 8 --epoch 1000 --kmeans 1
19 | 
20 | python ID_MLP.py --dataset citeseer --lr 0.001 --num_layers 5 \
21 |                 --hidden_channels 256 --weight_decay 0.01 --dropout 0.5 \
22 |                 --rand_split --no_feat_norm \
23 |                 --seed 123 --device 4 --runs 5 --num_id 6 --k 0 --epoch 1000
24 | 
25 | # Pubmed
26 | python main.py --dataset pubmed --lr 0.005 --num_layers 2 \
27 |         --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \
28 |         --method gcn \
29 |         --rand_split --no_feat_norm \
30 |         --seed 123 --device 3 --runs 1  --kmeans 1  --num_codes 16 --epoch 1000 
31 | 
32 | python ID_MLP.py --dataset pubmed --lr 0.005 --num_layers 5 \
33 |             --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \
34 |             --rand_split --no_feat_norm \
35 |             --seed 123 --device 3 --runs 5 --num_id 6 --k 0 --epoch 1000
36 | 
37 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/data/amazon-computer_split.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/amazon-computer_split.npz


--------------------------------------------------------------------------------
/SL/Node_Classification/data/amazon-photo_split.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/amazon-photo_split.npz


--------------------------------------------------------------------------------
/SL/Node_Classification/data/coauthor-cs_split.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/coauthor-cs_split.npz


--------------------------------------------------------------------------------
/SL/Node_Classification/data/coauthor-physics_split.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/coauthor-physics_split.npz


--------------------------------------------------------------------------------
/SL/Node_Classification/eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import numpy as np
 4 | 
 5 | @torch.no_grad()
 6 | def evaluate(model, dataset, split_idx, eval_func, criterion, args, result=None):
 7 |     if result is not None:
 8 |         out = result
 9 |     else:
10 |         model.eval()
11 |         out, total_commit_loss, id_list_concat, gnn_id = model(dataset.graph['node_feat'], dataset.graph['edge_index'])
12 |         id_list_concat = id_list_concat.detach().cpu().numpy()
13 |     
14 |     train_acc = eval_func(
15 |         dataset.label[split_idx['train']], out[split_idx['train']])
16 |     valid_acc = eval_func(
17 |         dataset.label[split_idx['valid']], out[split_idx['valid']])
18 |     test_acc = eval_func(
19 |         dataset.label[split_idx['test']], out[split_idx['test']])
20 | 
21 |     if args.dataset in ('questions'):
22 |         if dataset.label.shape[1] == 1:
23 |             true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
24 |         else:
25 |             true_label = dataset.label
26 |         valid_loss = criterion(out[split_idx['valid']], true_label.squeeze(1)[
27 |             split_idx['valid']].to(torch.float))
28 |     else:
29 |         out = F.log_softmax(out, dim=1)
30 |         valid_loss = criterion(
31 |             out[split_idx['valid']], dataset.label.squeeze(1)[split_idx['valid']])
32 | 
33 |     return train_acc, valid_acc, test_acc, valid_loss, out, id_list_concat
34 | 
35 | @torch.no_grad()
36 | def evaluate_cpu(model, dataset, split_idx, eval_func, criterion, args, device, result=None):
37 |     if result is not None:
38 |         out = result
39 |     else:
40 |         model.eval()
41 | 
42 |     model.to(torch.device("cpu"))
43 |     dataset.label = dataset.label.to(torch.device("cpu"))
44 |     edge_index, x = dataset.graph['edge_index'], dataset.graph['node_feat']
45 |     out, total_commit_loss, id_list_concat, gnn_id = model(x, edge_index)
46 |     id_list_concat = id_list_concat.detach().cpu().numpy()
47 |     train_acc = eval_func(
48 |         dataset.label[split_idx['train']], out[split_idx['train']])
49 |     valid_acc = eval_func(
50 |         dataset.label[split_idx['valid']], out[split_idx['valid']])
51 |     test_acc = eval_func(
52 |         dataset.label[split_idx['test']], out[split_idx['test']])
53 |     if args.dataset in ('questions'):
54 |         if dataset.label.shape[1] == 1:
55 |             true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1)
56 |         else:
57 |             true_label = dataset.label
58 |         valid_loss = criterion(out[split_idx['valid']], true_label.squeeze(1)[
59 |             split_idx['valid']].to(torch.float))
60 |     else:
61 |         out = F.log_softmax(out, dim=1)
62 |         valid_loss = criterion(
63 |             out[split_idx['valid']], dataset.label.squeeze(1)[split_idx['valid']])
64 | 
65 |     return train_acc, valid_acc, test_acc, valid_loss, out, id_list_concat
66 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/arxiv.sh:
--------------------------------------------------------------------------------
1 | python main-arxiv.py --dataset ogbn-arxiv --hidden_channels 256 --epochs 1000 --lr 0.0005 --runs 1 --local_layers 5 --post_bn --device 7
2 | 
3 | 
4 | python arxiv_ID_MLP.py --lr 0.01 --hidden_channels 256 --num_layers 4 \
5 |             --epochs 2000 --device 7 --dropout 0.5 --num_id 15 --k 0 --norm_type batch --runs 2
6 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/data/pokec/pokec-splits.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/large_graph/data/pokec/pokec-splits.npy


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/lg_parse.py:
--------------------------------------------------------------------------------
 1 | from lg_model import GAT
 2 | 
 3 | def parse_method(args, n, c, d, device):
 4 |     model = GAT(d, args.hidden_channels, c, local_layers=args.local_layers,
 5 |             in_dropout=args.in_dropout, dropout=args.dropout,
 6 |             heads=args.num_heads, pre_ln=args.pre_ln,
 7 |             post_bn=args.post_bn, local_attn=args.local_attn, kmeans=args.kmeans, num_codes=args.num_codes).to(device)
 8 |     return model
 9 | 
10 | 
11 | def parser_add_main_args(parser):
12 |     # dataset and evaluation
13 |     parser.add_argument('--dataset', type=str, default='ogbn-arxiv')
14 |     parser.add_argument('--data_dir', type=str, default='./data/')
15 |     parser.add_argument('--device', type=int, default=0,
16 |                         help='which gpu to use if any (default: 0)')
17 |     parser.add_argument('--seed', type=int, default=42)
18 |     parser.add_argument('--cpu', action='store_true')
19 |     parser.add_argument('--epochs', type=int, default=0)
20 |     parser.add_argument('--batch_size', type=int, default=100000,
21 |                         help='batch size for mini-batch training')
22 |     parser.add_argument('--runs', type=int, default=1,
23 |                         help='number of distinct runs')
24 |     parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc'],
25 |                         help='evaluation metric')
26 | 
27 |     # model
28 |     parser.add_argument('--method', type=str, default='gat')
29 |     parser.add_argument('--hidden_channels', type=int, default=256)
30 |     parser.add_argument('--local_layers', type=int, default=7,
31 |                         help='number of layers for local attention')
32 |     parser.add_argument('--num_heads', type=int, default=1,
33 |                         help='number of heads for attention')
34 |     parser.add_argument('--pre_ln', action='store_true')
35 |     parser.add_argument('--post_bn', action='store_true')
36 |     parser.add_argument('--local_attn', action='store_true')
37 | 
38 |     # training
39 |     parser.add_argument('--lr', type=float, default=0.001)
40 |     parser.add_argument('--weight_decay', type=float, default=5e-4)
41 |     parser.add_argument('--in_dropout', type=float, default=0.15)
42 |     parser.add_argument('--dropout', type=float, default=0.5)
43 | 
44 |     # display and utility
45 |     parser.add_argument('--display_step', type=int,
46 |                         default=1, help='how often to print')
47 |     parser.add_argument('--eval_step', type=int,
48 |                         default=1, help='how often to evaluate')
49 |     parser.add_argument('--eval_epoch', type=int,
50 |                         default=-1, help='when to evaluate')
51 |     parser.add_argument('--save_model', action='store_true', help='whether to save model')
52 |     parser.add_argument('--model_dir', type=str, default='./model/', help='where to save model')
53 |     parser.add_argument('--save_result', action='store_true', help='whether to save result')
54 |     
55 |     parser.add_argument('--kmeans', type=int,
56 |                         default=1)
57 |     parser.add_argument('--num_codes', type=int,
58 |                         default=16)
59 |     parser.add_argument('--num_layers', type=int, default=5)
60 |     parser.add_argument('--num_id', type=int, default=15)
61 |     parser.add_argument('--norm_type', type=str, default='none')
62 | 
63 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/logger_ copy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     def __init__(self, runs, info=None):
 6 |         self.info = info
 7 |         self.results = [[] for _ in range(runs)]
 8 | 
 9 |     def add_result(self, run, result):
10 |         assert len(result) == 3
11 |         assert run >= 0 and run < len(self.results)
12 |         self.results[run].append(result)
13 | 
14 |     def print_statistics(self, run=None):
15 |         if run is not None:
16 |             result = 100 * torch.tensor(self.results[run])
17 |             argmax = result[:, 1].argmax().item()
18 |             print(f'Run {run + 1:02d}:')
19 |             print(f'Highest Train: {result[:, 0].max():.2f}')
20 |             print(f'Highest Valid: {result[:, 1].max():.2f}')
21 |             print(f'  Final Train: {result[argmax, 0]:.2f}')
22 |             print(f'   Final Test: {result[argmax, 2]:.2f}')
23 |         else:
24 |             result = 100 * torch.tensor(self.results)
25 | 
26 |             best_results = []
27 |             for r in result:
28 |                 train1 = r[:, 0].max().item()
29 |                 valid = r[:, 1].max().item()
30 |                 train2 = r[r[:, 1].argmax(), 0].item()
31 |                 test = r[r[:, 1].argmax(), 2].item()
32 |                 best_results.append((train1, valid, train2, test))
33 | 
34 |             best_result = torch.tensor(best_results)
35 | 
36 |             print(f'All runs:')
37 |             r = best_result[:, 0]
38 |             print(f'Highest Train: {r.mean():.2f} +- {r.std():.2f}')
39 |             r = best_result[:, 1]
40 |             print(f'Highest Valid: {r.mean():.2f} +- {r.std():.2f}')
41 |             r = best_result[:, 2]
42 |             print(f'  Final Train: {r.mean():.2f} +- {r.std():.2f}')
43 |             r = best_result[:, 3]
44 |             print(f'   Final Test: {r.mean():.2f} +- {r.std():.2f}')


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/logger_.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class Logger(object):
 5 |     def __init__(self, runs, info=None):
 6 |         self.info = info
 7 |         self.results = [[] for _ in range(runs)]
 8 | 
 9 |     def add_result(self, run, result):
10 |         assert len(result) == 3
11 |         assert run >= 0 and run < len(self.results)
12 |         self.results[run].append(result)
13 | 
14 |     def print_statistics(self, run=None):
15 |         if run is not None:
16 |             result = 100 * torch.tensor(self.results[run])
17 |             argmax = result[:, 1].argmax().item()
18 |             print(f'Run {run + 1:02d}:')
19 |             print(f'Highest Train: {result[:, 0].max():.2f}')
20 |             print(f'Highest Valid: {result[:, 1].max():.2f}')
21 |             print(f'  Final Train: {result[argmax, 0]:.2f}')
22 |             print(f'   Final Test: {result[argmax, 2]:.2f}')
23 |         else:
24 |             result = 100 * torch.tensor(self.results)
25 | 
26 |             best_results = []
27 |             for r in result:
28 |                 train1 = r[:, 0].max().item()
29 |                 valid = r[:, 1].max().item()
30 |                 train2 = r[r[:, 1].argmax(), 0].item()
31 |                 test = r[r[:, 1].argmax(), 2].item()
32 |                 best_results.append((train1, valid, train2, test))
33 | 
34 |             best_result = torch.tensor(best_results)
35 | 
36 |             print(f'All runs:')
37 |             r = best_result[:, 0]
38 |             print(f'Highest Train: {r.mean():.2f} +- {r.std():.2f}')
39 |             r = best_result[:, 1]
40 |             print(f'Highest Valid: {r.mean():.2f} +- {r.std():.2f}')
41 |             r = best_result[:, 2]
42 |             print(f'  Final Train: {r.mean():.2f} +- {r.std():.2f}')
43 |             r = best_result[:, 3]
44 |             print(f'   Final Test: {r.mean():.2f} +- {r.std():.2f}')


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/main-arxiv.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import random
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch_geometric.utils import to_undirected, remove_self_loops, add_self_loops
  8 | 
  9 | from lg_parse import parse_method, parser_add_main_args
 10 | import sys
 11 | sys.path.append("../")
 12 | from logger import *
 13 | from dataset import load_dataset
 14 | from data_utils import eval_acc, eval_rocauc, load_fixed_splits
 15 | from eval import *
 16 | 
 17 | 
 18 | def fix_seed(seed=42):
 19 |     random.seed(seed)
 20 |     np.random.seed(seed)
 21 |     torch.manual_seed(seed)
 22 |     torch.cuda.manual_seed(seed)
 23 |     torch.cuda.manual_seed_all(seed)
 24 |     torch.backends.cudnn.deterministic = True
 25 |     torch.backends.cudnn.benchmark = False
 26 | 
 27 | ### Parse args ###
 28 | parser = argparse.ArgumentParser(description='Training Pipeline for Node Classification')
 29 | parser_add_main_args(parser)
 30 | args = parser.parse_args()
 31 | print(args)
 32 | 
 33 | fix_seed(args.seed)
 34 | 
 35 | if args.cpu:
 36 |     device = torch.device("cpu")
 37 | else:
 38 |     device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
 39 | 
 40 | ### Load and preprocess data ###
 41 | dataset = load_dataset(args.data_dir, args.dataset)
 42 | 
 43 | if len(dataset.label.shape) == 1:
 44 |     dataset.label = dataset.label.unsqueeze(1)
 45 | dataset.label = dataset.label.to(device)
 46 | 
 47 | split_idx_lst = [dataset.load_fixed_splits() for _ in range(args.runs)]
 48 | 
 49 | ### Basic information of datasets ###
 50 | n = dataset.graph['num_nodes']
 51 | e = dataset.graph['edge_index'].shape[1]
 52 | c = max(dataset.label.max().item() + 1, dataset.label.shape[1])
 53 | d = dataset.graph['node_feat'].shape[1]
 54 | 
 55 | print(f"dataset {args.dataset} | num nodes {n} | num edge {e} | num node feats {d} | num classes {c}")
 56 | 
 57 | dataset.graph['edge_index'] = to_undirected(dataset.graph['edge_index'])
 58 | dataset.graph['edge_index'], _ = remove_self_loops(dataset.graph['edge_index'])
 59 | dataset.graph['edge_index'], _ = add_self_loops(dataset.graph['edge_index'], num_nodes=n)
 60 | 
 61 | dataset.graph['edge_index'], dataset.graph['node_feat'] = \
 62 |     dataset.graph['edge_index'].to(device), dataset.graph['node_feat'].to(device)
 63 | 
 64 | ### Load method ###
 65 | model = parse_method(args, n, c, d, device)
 66 | 
 67 | criterion = nn.NLLLoss()
 68 | eval_func = eval_acc
 69 | logger = Logger(args.runs, args)
 70 | 
 71 | model.train()
 72 | print('MODEL:', model)
 73 | 
 74 | ### Training loop ###
 75 | for run in range(args.runs):
 76 |     split_idx = split_idx_lst[run]
 77 |     train_idx = split_idx['train'].to(device)
 78 |     model.reset_parameters()
 79 |     optimizer = torch.optim.Adam(model.parameters(),weight_decay=args.weight_decay, lr=args.lr)
 80 |     best_val = float('-inf')
 81 |     best_test = float('-inf')
 82 |     if args.save_model:
 83 |         save_model(args, model, optimizer, run)
 84 | 
 85 |     for epoch in range(args.epochs):
 86 |         model.train()
 87 |         optimizer.zero_grad()
 88 | 
 89 |         out, total_commit_loss, id_list_concat, gnn_id = model(dataset.graph['node_feat'], dataset.graph['edge_index'])
 90 |         out = F.log_softmax(out, dim=1)
 91 |         loss = criterion(
 92 |             out[train_idx], dataset.label.squeeze(1)[train_idx])
 93 |         (loss + total_commit_loss).backward()
 94 |         optimizer.step()
 95 | 
 96 |         result = evaluate(model, dataset, split_idx, eval_func, criterion, args)
 97 | 
 98 |         logger.add_result(run, result[:-2])
 99 | 
100 |         if result[1] > best_val:
101 |             best_val = result[1]
102 |             best_test = result[2]
103 |             np.savez(f"semantic_ID_{args.dataset}", result[-1])
104 |             if args.save_model:
105 |                 save_model(args, model, optimizer, run)
106 | 
107 |         if epoch % args.display_step == 0:
108 |             print(f'Epoch: {epoch:02d}, '
109 |                   f'Loss: {loss:.4f}, '
110 |                   f'Train: {100 * result[0]:.2f}%, '
111 |                   f'Valid: {100 * result[1]:.2f}%, '
112 |                   f'Test: {100 * result[2]:.2f}%, '
113 |                   f'Best Valid: {100 * best_val:.2f}%, '
114 |                   f'Best Test: {100 * best_test:.2f}%')
115 |     logger.print_statistics(run)
116 | 
117 | results = logger.print_statistics()
118 | ### Save results ###
119 | save_result(args, results)
120 | 
121 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/pokec.sh:
--------------------------------------------------------------------------------
1 | python main-batch.py --dataset pokec --hidden_channels 256 --epochs 2000 --batch_size 550000 --lr 0.0005 --runs 1 --local_layers 7 --in_drop 0.0 --dropout 0.2 --weight_decay 0.0 --post_bn --eval_step 9 --eval_epoch 1000 --device 0
2 | 
3 | python ID_MLP.py --dataset pokec --lr 0.001 --hidden_channels 256 --num_layers 5 \
4 |             --epochs 2000 --device 1 --dropout 0.5 --num_id 21 --k 0 --norm_type batch --runs 2 --eval_step 9 --eval_epoch 100
5 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/product.sh:
--------------------------------------------------------------------------------
1 | # ogbn-products
2 | python product_pre.py --device 7 --kmeans 1
3 | python product_ID_MLP.py --num_layers 4 --device 7 --num_id 15 --k 0 --lr 0.01
4 | 
5 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/large_graph/protein.sh:
--------------------------------------------------------------------------------
1 | # ogbn-proteins
2 | python -u protein_pre.py --gnum_layers 4 --gdropout 0.5 --device 0 --epoch 1000 --kmeans 1 --num_codes 4
3 | python protein_ID_MLP.py --hidden_channels 512 --lr 0.001 --num_layers 5 --num_id 12 --norm_type batch --device 1
4 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/logger.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class Logger(object):
 4 |     """ Adapted from https://github.com/snap-stanford/ogb/ """
 5 |     def __init__(self, runs, info=None):
 6 |         self.info = info
 7 |         self.results = [[] for _ in range(runs)]
 8 | 
 9 |     def add_result(self, run, result):
10 |         assert len(result) == 4
11 |         assert run >= 0 and run < len(self.results)
12 |         self.results[run].append(result)
13 | 
14 |     def print_statistics(self, run=None, mode='max_acc'):
15 |         if run is not None:
16 |             result = 100 * torch.tensor(self.results[run])
17 |             argmax = result[:, 1].argmax().item()
18 |             argmin = result[:, 3].argmin().item()
19 |             if mode == 'max_acc':
20 |                 ind = argmax
21 |             else:
22 |                 ind = argmin
23 |             print(f'Run {run + 1:02d}:')
24 |             print(f'Highest Train: {result[:, 0].max():.2f}')
25 |             print(f'Highest Valid: {result[:, 1].max():.2f}')
26 |             print(f'Highest Test: {result[:, 2].max():.2f}')
27 |             print(f'Chosen epoch: {ind}')
28 |             print(f'Final Train: {result[ind, 0]:.2f}')
29 |             print(f'Final Test: {result[ind, 2]:.2f}')
30 |             self.test=result[ind, 2]
31 |         else:
32 |             result = 100 * torch.tensor(self.results)
33 | 
34 |             best_results = []
35 |             for r in result:
36 |                 train1 = r[:, 0].max().item()
37 |                 test1 = r[:, 2].max().item()
38 |                 valid = r[:, 1].max().item()
39 |                 if mode == 'max_acc':
40 |                     train2 = r[r[:, 1].argmax(), 0].item()
41 |                     test2 = r[r[:, 1].argmax(), 2].item()
42 |                 else:
43 |                     train2 = r[r[:, 3].argmin(), 0].item()
44 |                     test2 = r[r[:, 3].argmin(), 2].item()
45 |                 best_results.append((train1, test1, valid, train2, test2))
46 | 
47 |             best_result = torch.tensor(best_results)
48 | 
49 |             print(f'All runs:')
50 |             r = best_result[:, 0]
51 |             print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}')
52 |             r = best_result[:, 1]
53 |             print(f'Highest Test: {r.mean():.2f} ± {r.std():.2f}')
54 |             r = best_result[:, 2]
55 |             print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}')
56 |             r = best_result[:, 3]
57 |             print(f'  Final Train: {r.mean():.2f} ± {r.std():.2f}')
58 |             r = best_result[:, 4]
59 |             print(f'   Final Test: {r.mean():.2f} ± {r.std():.2f}')
60 | 
61 |             self.test=r.mean()
62 |             return best_result[:, 4]
63 | 
64 |     def output(self,out_path,info):
65 |         with open(out_path,'a') as f:
66 |             f.write(info)
67 |             f.write(f'test acc:{self.test}\n')
68 | 
69 | import os
70 | def save_model(args, model, optimizer, run):
71 |     if not os.path.exists(f'models/{args.dataset}'):
72 |         os.makedirs(f'models/{args.dataset}')
73 |     model_path = f'models/{args.dataset}/{args.method}_{run}.pt'
74 |     torch.save({'model_state_dict': model.state_dict(),
75 |                 'optimizer_state_dict': optimizer.state_dict()
76 |                 }, model_path)
77 | 
78 | def load_model(args, model, optimizer, run):
79 |     model_path = f'models/{args.dataset}/{args.method}_{run}.pt'
80 |     checkpoint = torch.load(model_path)
81 |     model.load_state_dict(checkpoint['model_state_dict'])
82 |     optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
83 | 
84 |     return model, optimizer
85 | 
86 | def save_result(args, results):
87 |     if not os.path.exists(f'results/{args.dataset}'):
88 |         os.makedirs(f'results/{args.dataset}')
89 |     filename = f'results/{args.dataset}/{args.method}.csv'
90 |     print(f"Saving results to {filename}")
91 |     with open(f"{filename}", 'a+') as write_obj:
92 |         write_obj.write(
93 |             f"{args.method} " + f"{args.dropout} " + f"{args.lr} " + \
94 |             f"{results.mean():.2f} $\pm$ {results.std():.2f} \n")
95 | 
96 | 


--------------------------------------------------------------------------------
/SL/Node_Classification/parse.py:
--------------------------------------------------------------------------------
 1 | from model import GNN
 2 | 
 3 | def parse_method(args, n, c, d, device):
 4 |     if args.method == 'gcn':
 5 |         model = GNN(d, args.hidden_channels, c, local_layers=args.local_layers,
 6 |                 in_dropout=args.in_dropout, dropout=args.dropout,
 7 |                 heads=args.num_heads, pre_ln=args.pre_ln, kmeans=args.kmeans, num_codes=args.num_codes, gnn='gcn').to(device)
 8 |     else:
 9 |         model = GNN(d, args.hidden_channels, c, local_layers=args.local_layers,
10 |                 in_dropout=args.in_dropout, dropout=args.dropout,
11 |                 heads=args.num_heads, pre_ln=args.pre_ln, kmeans=args.kmeans, num_codes=args.num_codes, gnn='gat').to(device)
12 |     return model
13 | 
14 | 
15 | def parser_add_main_args(parser):
16 |     # dataset and evaluation
17 |     parser.add_argument('--dataset', type=str, default='roman-empire')
18 |     parser.add_argument('--data_dir', type=str, default='./data/')
19 |     parser.add_argument('--device', type=int, default=0,
20 |                         help='which gpu to use if any (default: 0)')
21 |     parser.add_argument('--seed', type=int, default=42)
22 |     parser.add_argument('--cpu', action='store_true')
23 |     parser.add_argument('--epochs', type=int, default=1000)
24 |     parser.add_argument('--runs', type=int, default=1,
25 |                         help='number of distinct runs')
26 |     parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc'],
27 |                         help='evaluation metric')
28 | 
29 |     parser.add_argument('--train_prop', type=float, default=.6,
30 |                         help='training label proportion')
31 |     parser.add_argument('--valid_prop', type=float, default=.2,
32 |                         help='validation label proportion')
33 |     parser.add_argument('--rand_split', action='store_true',
34 |                         help='use random splits')
35 |     parser.add_argument('--rand_split_class', action='store_true',
36 |                         help='use random splits with a fixed number of labeled nodes for each class')
37 |     
38 |     parser.add_argument('--label_num_per_class', type=int, default=20,
39 |                         help='labeled nodes per class(randomly selected)')
40 |     parser.add_argument('--valid_num', type=int, default=500,
41 |                         help='Total number of validation')
42 |     parser.add_argument('--test_num', type=int, default=1000,
43 |                         help='Total number of test')
44 |     
45 |     # model
46 |     parser.add_argument('--method', type=str, default='gat')
47 |     parser.add_argument('--hidden_channels', type=int, default=256)
48 |     parser.add_argument('--local_layers', type=int, default=7,
49 |                         help='number of layers for local attention')
50 |     parser.add_argument('--num_heads', type=int, default=1,
51 |                         help='number of heads for attention')
52 |     parser.add_argument('--pre_ln', action='store_true')
53 | 
54 |     # training
55 |     parser.add_argument('--lr', type=float, default=0.001)
56 |     parser.add_argument('--weight_decay', type=float, default=5e-4)
57 |     parser.add_argument('--in_dropout', type=float, default=0.0)
58 |     parser.add_argument('--dropout', type=float, default=0.5)
59 | 
60 |     # display and utility
61 |     parser.add_argument('--display_step', type=int,
62 |                         default=50, help='how often to print')
63 |     parser.add_argument('--save_model', action='store_true', help='whether to save model')
64 |     parser.add_argument('--model_dir', type=str, default='./model/', help='where to save model')
65 |     parser.add_argument('--save_result', action='store_true', help='whether to save result')
66 |         
67 |     parser.add_argument('--kmeans', type=int,
68 |                         default=1)
69 |     parser.add_argument('--num_codes', type=int,
70 |                         default=16)
71 |     parser.add_argument('--norm_type', type=str, default='none')
72 | 
73 |     parser.add_argument('--num_layers', type=int, default=3)
74 |     parser.add_argument('--k', type=int, default=0)
75 |     parser.add_argument('--num_id', type=int, default=15)
76 | 


--------------------------------------------------------------------------------
/SSL/DGCluster/README.md:
--------------------------------------------------------------------------------
1 | ## Training & Evaluation
2 | sh run.sh


--------------------------------------------------------------------------------
/SSL/DGCluster/install.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import subprocess
 3 | import sys
 4 | 
 5 | 
 6 | def install(package, file_path=None):
 7 |     if file_path is None:
 8 |         subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 9 |     else:
10 |         subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-f", file_path])
11 | 
12 | 
13 | def format_pytorch_version(version):
14 |     return version.split('+')[0]
15 | 
16 | 
17 | TORCH_version = torch.__version__
18 | TORCH = format_pytorch_version(TORCH_version)
19 | 
20 | 
21 | def format_cuda_version(version):
22 |     return 'cu' + version.replace('.', '')
23 | 
24 | 
25 | CUDA_version = torch.version.cuda
26 | CUDA = format_cuda_version(CUDA_version)
27 | 
28 | install(f'torch-scatter', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html')
29 | install(f'torch-sparse', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html')
30 | install(f'torch-cluster', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html')
31 | install(f'torch-spline-conv', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html')
32 | install(f'torch-geometric', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html')
33 | install(f'torchmetrics')
34 | install('ogb')
35 | install('networkx==3.1')
36 | 


--------------------------------------------------------------------------------
/SSL/DGCluster/plots.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import matplotlib.pyplot as plt
  4 | from matplotlib import rcParams
  5 | import copy
  6 | import torch
  7 | 
  8 | rcParams['pdf.fonttype'] = 42
  9 | rcParams['ps.fonttype'] = 42
 10 | 
 11 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics']
 12 | 
 13 | dataset_name_map = {
 14 |     'cora': "Cora",
 15 |     'citeseer': "CiteSeer",
 16 |     'pubmed': "PubMed",
 17 |     'computers': "Amazon PC",
 18 |     'photo': "Amazon Photo",
 19 |     'coauthorcs': "Coauthor CS",
 20 |     'coauthorphysics': "Coauthor PHY"
 21 | }
 22 | 
 23 | markers = {
 24 |     "cora": "v",
 25 |     "citeseer": "^",
 26 |     "pubmed": "<",
 27 |     "computers": ">",
 28 |     "photo": "P",
 29 |     "coauthorcs": "X",
 30 |     "coauthorphysics": "D",
 31 | }
 32 | 
 33 | colors = {
 34 |     "cora": "r",
 35 |     "citeseer": "m",
 36 |     "pubmed": "g",
 37 |     "computers": "c",
 38 |     "photo": "b",
 39 |     "coauthorcs": "y",
 40 |     "coauthorphysics": "k",
 41 | }
 42 | 
 43 | evaluation_keys = ['modularity', 'nmi']
 44 | 
 45 | results = {dataset: {} for dataset in dataset_names}
 46 | for dataset in dataset_names:
 47 |     for evaluation_key in evaluation_keys:
 48 |         result_dataset_eval = []
 49 |         result_dataset_eval_std = []
 50 |         for lam in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
 51 |             results_dataset = []
 52 |             for seed in range(10):
 53 |                 path = f"results/results_{dataset}_{lam}_300_gcn_{seed}.pt"
 54 |                 results_dataset.append(torch.load(path)[evaluation_key])
 55 |             result_dataset_eval.append(np.mean(results_dataset))
 56 |             result_dataset_eval_std.append(np.std(results_dataset))
 57 |         results[dataset][evaluation_key] = copy.deepcopy(result_dataset_eval)
 58 |         results[dataset][evaluation_key + '_std'] = copy.deepcopy(result_dataset_eval_std)
 59 | 
 60 | fig, axes = plt.subplots(nrows=1, ncols=len(evaluation_keys), figsize=(len(evaluation_keys) * 4, 4), sharex=True)
 61 | 
 62 | labelsize = 14
 63 | ticksize = 12
 64 | markersize = 6
 65 | linewidth = 1.5
 66 | 
 67 | xticks = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
 68 | 
 69 | title_map = {
 70 |     'conductance': r'$\mathcal{C}$',
 71 |     'modularity': r'$\mathcal{Q}$',
 72 |     'nmi': r'NMI',
 73 |     'sample_f1_score': r'F1'
 74 | }
 75 | 
 76 | for i, key in enumerate(evaluation_keys):
 77 |     ax = axes[i]
 78 |     ls = [None] * len(dataset_names)
 79 |     for i, method_key in enumerate(dataset_names):
 80 |         ls[i], = ax.plot(xticks, results[method_key][key], label=method_key, marker=markers[method_key], color=colors[method_key])
 81 |         ax.fill_between(xticks, np.array(results[method_key][key]) - np.array(results[method_key][key + '_std']), np.array(results[method_key][key]) + np.array(results[method_key][key + '_std']), alpha=0.2, color=colors[method_key])
 82 | 
 83 |     ax.minorticks_off()
 84 |     ax.set_xticks(xticks)  # , [1, 2, 4, 8, 16, 32])
 85 |     ax.set_xticklabels(xticks)
 86 |     ax.set_title(title_map[key], fontsize=labelsize)
 87 |     ax.tick_params(axis='x', labelsize=ticksize)
 88 |     ax.tick_params(axis='y', labelsize=ticksize)
 89 |     ax.grid(True)
 90 | 
 91 | fig.add_subplot(111, frameon=False)
 92 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
 93 | plt.xlabel(r'$\lambda$', fontsize=labelsize)
 94 | 
 95 | fig.tight_layout()
 96 | fig.subplots_adjust(left=0.035, bottom=0.16, right=0.99, wspace=0.22)
 97 | 
 98 | axes[0].legend(handles=ls, labels=[dataset_name_map[dataset_name] for dataset_name in dataset_names],
 99 |                loc='upper center', bbox_to_anchor=(1.1, -0.2), fancybox=False, shadow=False, ncol=math.ceil(len(dataset_names) / 2), fontsize=labelsize)
100 | 
101 | fig.savefig(f'plots/results_{"_".join(evaluation_keys)}.pdf', bbox_inches='tight')
102 | plt.show()
103 | 


--------------------------------------------------------------------------------
/SSL/DGCluster/plots_num_clusters.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | from matplotlib import rcParams
 4 | import copy
 5 | import torch
 6 | import math
 7 | 
 8 | rcParams['pdf.fonttype'] = 42
 9 | rcParams['ps.fonttype'] = 42
10 | 
11 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics']
12 | 
13 | dataset_name_map = {
14 |     'cora': "Cora",
15 |     'citeseer': "CiteSeer",
16 |     'pubmed': "PubMed",
17 |     'computers': "Amazon PC",
18 |     'photo': "Amazon Photo",
19 |     'coauthorcs': "Coauthor CS",
20 |     'coauthorphysics': "Coauthor PHY"
21 | }
22 | 
23 | markers = {
24 |     "cora": "v",
25 |     "citeseer": "^",
26 |     "pubmed": "<",
27 |     "computers": ">",
28 |     "photo": "P",
29 |     "coauthorcs": "X",
30 |     "coauthorphysics": "D",
31 | }
32 | 
33 | colors = {
34 |     "cora": "r",
35 |     "citeseer": "m",
36 |     "pubmed": "g",
37 |     "computers": "c",
38 |     "photo": "b",
39 |     "coauthorcs": "y",
40 |     "coauthorphysics": "k",
41 | }
42 | 
43 | evaluation_keys = ['num_clusters']
44 | 
45 | results = {dataset: {} for dataset in dataset_names}
46 | for dataset in dataset_names:
47 |     for evaluation_key in evaluation_keys:
48 |         result_dataset_eval = []
49 |         result_dataset_eval_std = []
50 |         for lam in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]:
51 |             results_dataset = []
52 |             for seed in range(10):
53 |                 path = f"results/results_{dataset}_{lam}_300_gcn_{seed}.pt"
54 |                 results_dataset.append(torch.load(path)[evaluation_key])
55 |             result_dataset_eval.append(np.mean(results_dataset))
56 |             result_dataset_eval_std.append(np.std(results_dataset))
57 |         results[dataset][evaluation_key] = copy.deepcopy(result_dataset_eval)
58 |         results[dataset][evaluation_key + '_std'] = copy.deepcopy(result_dataset_eval_std)
59 | 
60 | fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 3), sharex=True)
61 | 
62 | labelsize = 14
63 | ticksize = 12
64 | markersize = 6
65 | linewidth = 1.5
66 | 
67 | xticks = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
68 | 
69 | ax = axes
70 | ls = [None] * len(dataset_names)
71 | for i, method_key in enumerate(dataset_names):
72 |     ls[i], = ax.plot(xticks, results[method_key]['num_clusters'], label=method_key, marker=markers[method_key], color=colors[method_key])
73 |     ax.fill_between(xticks, np.array(results[method_key]['num_clusters']) - np.array(results[method_key]['num_clusters_std']), np.array(results[method_key]['num_clusters']) + np.array(results[method_key]['num_clusters_std']), alpha=0.2,
74 |                     color=colors[method_key])
75 | 
76 | ax.minorticks_off()
77 | ax.set_xticks(xticks)
78 | ax.set_xticklabels(xticks)
79 | ax.set_ylabel(r'#Communities', fontsize=labelsize)
80 | ax.tick_params(axis='x', labelsize=ticksize)
81 | ax.tick_params(axis='y', labelsize=ticksize)
82 | ax.grid(True)
83 | 
84 | fig.add_subplot(111, frameon=False)
85 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False)
86 | plt.xlabel(r'$\lambda$', fontsize=labelsize)
87 | 
88 | fig.tight_layout()
89 | fig.subplots_adjust(left=0.035, bottom=0.16, right=0.99, wspace=0.22)
90 | 
91 | axes.legend(handles=ls, labels=[dataset_name_map[dataset_name] for dataset_name in dataset_names],
92 |             loc='upper center', bbox_to_anchor=(0.45, -0.2), fancybox=False, shadow=False, ncol=math.ceil(len(dataset_names) / 2), fontsize=labelsize - 4)
93 | 
94 | fig.savefig(f'plots/results_number_clusters.pdf', bbox_inches='tight')
95 | plt.show()
96 | 


--------------------------------------------------------------------------------
/SSL/DGCluster/print_results.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics']
  5 | 
  6 | # Table 1 printing
  7 | print('Table 1 Results')
  8 | print('-----------------------------------')
  9 | for lam in [0.0, 0.2, 0.8]:
 10 |     performance = "\\model (\\lambda={}) & ".format(lam)
 11 |     for dataset_name in dataset_names:
 12 |         scores_1 = []
 13 |         scores_2 = []
 14 |         for seed in range(10):
 15 |             path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt'
 16 |             res = torch.load(path)
 17 |             scores_1.append(res['conductance'])
 18 |             scores_2.append(res['modularity'])
 19 |         score_1 = np.mean(scores_1)
 20 |         score_2 = np.mean(scores_2)
 21 |         performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & "
 22 |     print(performance[:-2] + '\\\\')
 23 | print('-----------------------------------\n')
 24 | 
 25 | # Table 3 printing
 26 | print('Table 3 Results')
 27 | print('-----------------------------------')
 28 | for lam in [0.0, 0.2, 0.8]:
 29 |     performance = "\\model (\\lambda={}) & ".format(lam)
 30 |     for dataset_name in dataset_names:
 31 |         scores_1 = []
 32 |         scores_2 = []
 33 |         for seed in range(10):
 34 |             path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt'
 35 |             res = torch.load(path)
 36 |             scores_1.append(res['nmi'])
 37 |             scores_2.append(res['sample_f1_score'])
 38 |         score_1 = np.mean(scores_1)
 39 |         score_2 = np.mean(scores_2)
 40 |         performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & "
 41 |     print(performance[:-2] + '\\\\')
 42 | print('-----------------------------------\n')
 43 | 
 44 | # Table 4 printing
 45 | print('Table 4 Results')
 46 | print('-----------------------------------')
 47 | dataset_name_map = {
 48 |     'cora': "Cora",
 49 |     'citeseer': "CiteSeer",
 50 |     'pubmed': "PubMed",
 51 |     'computers': "Amazon PC",
 52 |     'photo': "Amazon Photo",
 53 |     'coauthorcs': "Coauthor CS",
 54 |     'coauthorphysics': "Coauthor PHYSICS"
 55 | }
 56 | lam = 0.2
 57 | all_data = []
 58 | for dataset_name in dataset_names:
 59 |     row_data = []
 60 |     performance = f"\\textsc{{{dataset_name_map[dataset_name]}}} & "
 61 |     for metric in ['conductance', 'modularity', 'nmi', 'sample_f1_score']:
 62 |         for base_model in ['gcn', 'gat', 'gin', 'sage']:
 63 |             scores = []
 64 |             for seed in range(10):
 65 |                 path = f'results/results_{dataset_name}_{lam}_300_{base_model}_{seed}.pt'
 66 |                 res = torch.load(path)
 67 |                 scores.append(res[metric])
 68 |             score = np.mean(scores)
 69 |             performance += f"{(score * 100):.1f} & "
 70 |             row_data.append(score)
 71 |     print(performance[:-2] + '\\\\')
 72 |     all_data.append(row_data)
 73 | all_data = np.array(all_data).mean(axis=0)
 74 | performance = f"\\textsc{{AVERAGE}} & "
 75 | for i in range(len(all_data)):
 76 |     performance += f"{(all_data[i] * 100):.1f} & "
 77 | print(performance[:-2] + '\\\\')
 78 | print('-----------------------------------\n')
 79 | 
 80 | # Table 5 printing
 81 | dataset_names_2 = ['cora', 'citeseer', 'pubmed']
 82 | print('Table 5 Results')
 83 | print('-----------------------------------')
 84 | for alp in [0.0, 0.5, 1.0]:
 85 |     performance = f"$\\alpha={alp}$ & "
 86 |     for dataset_name in dataset_names_2:
 87 |         scores_1 = []
 88 |         scores_2 = []
 89 |         for seed in range(10):
 90 |             if alp == 0.0:
 91 |                 path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt'
 92 |             else:
 93 |                 path = f'results/results_{dataset_name}_{lam}_{alp}_300_gcn_{seed}.pt'
 94 |             res = torch.load(path)
 95 |             scores_1.append(res['modularity'])
 96 |             scores_2.append(res['sample_f1_score'])
 97 |         score_1 = np.mean(scores_1)
 98 |         score_2 = np.mean(scores_2)
 99 |         performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & "
100 |     print(performance[:-2] + '\\\\')
101 | print('-----------------------------------\n')
102 | 


--------------------------------------------------------------------------------
/SSL/DGCluster/run.sh:
--------------------------------------------------------------------------------
 1 | datasets="cora citeseer pubmed computers photo coauthorphysics"
 2 | lams="0.8"
 3 | seeds="0 1 2 3 4 5 6 7 8 9"
 4 | 
 5 | for dataset in $datasets; do
 6 |   for lam in $lams; do
 7 |     for seed in $seeds; do
 8 |       python main.py --dataset $dataset --lam $lam --seed $seed --device cuda:0
 9 |     done
10 |   done
11 | done


--------------------------------------------------------------------------------
/SSL/DGCluster/utils.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | from sklearn.metrics.cluster import normalized_mutual_info_score
 6 | from sklearn.metrics import f1_score
 7 | 
 8 | import networkx as nx
 9 | 
10 | 
11 | def sample_f1_score(test_data, clusters, num_nodes):
12 |     k = 10
13 |     res = 0
14 |     for i in range(k):
15 |         s = random.sample(range(0, num_nodes), 1000)
16 | 
17 |         mx = max(clusters)
18 |         s_clusters = clusters[s]
19 | 
20 |         MM = np.zeros((len(s_clusters), mx + 1))
21 |         for i in range(len(s_clusters)):
22 |             MM[i][s_clusters[i]] = 1
23 |         MM = torch.tensor(MM)
24 |         MM = torch.matmul(MM, torch.t(MM)).flatten()
25 | 
26 |         labels = test_data.y.squeeze()
27 |         mx = max(labels)
28 | 
29 |         s_labels = labels[s]
30 | 
31 |         CM = np.zeros((len(s_labels), mx + 1))
32 |         for i in range(len(s_labels)):
33 |             CM[i][s_labels[i]] = 1
34 |         CM = torch.tensor(CM)
35 |         CM = torch.matmul(CM, torch.t(CM)).flatten()
36 | 
37 |         x = f1_score(CM, MM)
38 |         res = res + x
39 | 
40 |     return res / k
41 | 
42 | 
43 | def compute_fast_modularity(clusters, num_nodes, num_edges, torch_sparse_adj, degree, device):
44 |     mx = max(clusters)
45 |     MM = np.zeros((num_nodes, mx + 1))
46 |     for i in range(len(clusters)):
47 |         MM[i][clusters[i]] = 1
48 |     MM = torch.tensor(MM).double().to(device)
49 | 
50 |     x = torch.matmul(torch.t(MM), torch_sparse_adj.double())
51 |     x = torch.matmul(x, MM)
52 |     x = torch.trace(x)
53 | 
54 |     y = torch.matmul(torch.t(MM), degree.double())
55 |     y = torch.matmul(torch.t(y.unsqueeze(dim=0)), y.unsqueeze(dim=0))
56 |     y = torch.trace(y)
57 |     y = y / (2 * num_edges)
58 |     return ((x - y) / (2 * num_edges)).item()
59 | 
60 | 
61 | def compute_nmi(clusters, labels):
62 |     return normalized_mutual_info_score(clusters, labels)
63 | 
64 | 
65 | def compute_conductance(clusters, Graph):
66 |     comms = [[] for i in range(max(clusters) + 1)]
67 |     for i in range(len(clusters)):
68 |         comms[clusters[i]].append(i)
69 |     conductance=[]
70 |     for com in comms:
71 |         try:
72 |             conductance.append(nx.conductance(Graph, com, weight='weight'))
73 |         except:
74 |             continue
75 | 
76 |     return conductance
77 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies & Dataset
 2 | 
 3 | Please refer to https://github.com/snap-stanford/pretrain-gnns#installation for environment setup and https://github.com/snap-stanford/pretrain-gnns#dataset-download to download dataset. Download the dataset, place it in `./chem/` and unzip it.
 4 | 
 5 | If you cannot manage to install the old torch-geometric version, one alternative way is to use the new one (maybe ==1.6.0) and make some modifications based on this issue https://github.com/snap-stanford/pretrain-gnns/issues/14.
 6 | 
 7 | ## Training & Evaluation
 8 | ### Pre-training: ###
 9 | ```
10 | cd ./chem
11 | python pretrain_graphcl.py --aug1 random --aug2 none
12 | ```
13 | 
14 | ### Node ID prediction: ###
15 | ```
16 | cd ./chem
17 | ./run.sh
18 | ```


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/chem/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from torch.utils.data.dataloader import default_collate
 3 | 
 4 | from batch import BatchSubstructContext, BatchMasking, BatchAE
 5 | 
 6 | class DataLoaderSubstructContext(torch.utils.data.DataLoader):
 7 |     r"""Data loader which merges data objects from a
 8 |     :class:`torch_geometric.data.dataset` to a mini-batch.
 9 |     Args:
10 |         dataset (Dataset): The dataset from which to load the data.
11 |         batch_size (int, optional): How may samples per batch to load.
12 |             (default: :obj:`1`)
13 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
14 |             reshuffled at every epoch (default: :obj:`True`)
15 |     """
16 | 
17 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
18 |         super(DataLoaderSubstructContext, self).__init__(
19 |             dataset,
20 |             batch_size,
21 |             shuffle,
22 |             collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list),
23 |             **kwargs)
24 | 
25 | class DataLoaderMasking(torch.utils.data.DataLoader):
26 |     r"""Data loader which merges data objects from a
27 |     :class:`torch_geometric.data.dataset` to a mini-batch.
28 |     Args:
29 |         dataset (Dataset): The dataset from which to load the data.
30 |         batch_size (int, optional): How may samples per batch to load.
31 |             (default: :obj:`1`)
32 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
33 |             reshuffled at every epoch (default: :obj:`True`)
34 |     """
35 | 
36 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
37 |         super(DataLoaderMasking, self).__init__(
38 |             dataset,
39 |             batch_size,
40 |             shuffle,
41 |             collate_fn=lambda data_list: BatchMasking.from_data_list(data_list),
42 |             **kwargs)
43 | 
44 | 
45 | class DataLoaderAE(torch.utils.data.DataLoader):
46 |     r"""Data loader which merges data objects from a
47 |     :class:`torch_geometric.data.dataset` to a mini-batch.
48 |     Args:
49 |         dataset (Dataset): The dataset from which to load the data.
50 |         batch_size (int, optional): How may samples per batch to load.
51 |             (default: :obj:`1`)
52 |         shuffle (bool, optional): If set to :obj:`True`, the data will be
53 |             reshuffled at every epoch (default: :obj:`True`)
54 |     """
55 | 
56 |     def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs):
57 |         super(DataLoaderAE, self).__init__(
58 |             dataset,
59 |             batch_size,
60 |             shuffle,
61 |             collate_fn=lambda data_list: BatchAE.from_data_list(data_list),
62 |             **kwargs)
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/chem/finetune.sh:
--------------------------------------------------------------------------------
 1 | #### GIN fine-tuning
 2 | split=scaffold
 3 | dataset=$1
 4 | 
 5 | CUDA_VISIBLE_DEVICES=0
 6 | for runseed in 0 1
 7 | do
 8 | python finetune.py --input_model_file models_graphcl/graphcl_80.pth --split $split --runseed $runseed --gnn_type gin --dataset $dataset --lr 1e-3 --epochs 100
 9 | done
10 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/chem/parse_result.py:
--------------------------------------------------------------------------------
 1 | ### Parsing the result!
 2 | import tensorflow as tf
 3 | import os
 4 | import numpy as np
 5 | import pickle
 6 | 
 7 | def get_test_acc(event_file):
 8 |     val_auc_list = np.zeros(100)
 9 |     test_auc_list = np.zeros(100)
10 |     for e in list(tf.train.summary_iterator(event_file)):
11 |         if len(e.summary.value) == 0:
12 |             continue
13 |         if e.summary.value[0].tag == "data/val_auc":
14 |             val_auc_list[e.step-1] = e.summary.value[0].simple_value
15 |         if e.summary.value[0].tag == "data/test_auc":
16 |             test_auc_list[e.step-1] = e.summary.value[0].simple_value
17 |     
18 |     best_epoch = np.argmax(val_auc_list)
19 | 
20 |     return test_auc_list[best_epoch]
21 | 
22 | if __name__ == "__main__":
23 | 
24 |     dataset_list = ["muv", "bace", "bbbp", "clintox", "hiv", "sider", "tox21", "toxcast"]
25 |     #10 random seed
26 |     seed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
27 |     config_list = []
28 | 
29 |     config_list.append("gin_nopretrain")
30 |     config_list.append("gin_infomax")
31 |     config_list.append("gin_edgepred")
32 |     config_list.append("gin_masking")
33 |     config_list.append("gin_contextpred")
34 |     config_list.append("gin_supervised")
35 |     config_list.append("gin_supervised_infomax")
36 |     config_list.append("gin_supervised_edgepred")
37 |     config_list.append("gin_supervised_masking")
38 |     config_list.append("gin_supervised_contextpred")
39 |     config_list.append("gcn_nopretrain")
40 |     config_list.append("gcn_supervised_contextpred")
41 |     config_list.append("graphsage_nopretrain")
42 |     config_list.append("graphsage_supervised_contextpred")
43 |     config_list.append("gat_nopretrain")
44 |     config_list.append("gat_supervised_contextpred")
45 | 
46 |     result_mat = np.zeros((len(seed_list), len(config_list), len(dataset_list)))
47 | 
48 |     for i, seed in enumerate(seed_list):
49 |         for j, config in enumerate(config_list):
50 |             for k, dataset in enumerate(dataset_list):
51 |                 dir_name = "runs/finetune_cls_runseed" + str(seed) + "/" + dataset + "/" + config
52 |                 print(dir_name)
53 |                 file_in_dir = os.listdir(dir_name)
54 |                 event_file_list = []
55 |                 for f in file_in_dir:
56 |                     if "events" in f:
57 |                         event_file_list.append(f)
58 | 
59 |                 event_file = event_file_list[0]
60 | 
61 |                 result_mat[i, j, k] = get_test_acc(dir_name + "/" + event_file)
62 | 
63 |     with open("result_summary", "wb") as f:
64 |         pickle.dump({"result_mat": result_mat, "seed_list": seed_list, "config_list": config_list, "dataset_list": dataset_list}, f)
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/chem/run.sh:
--------------------------------------------------------------------------------
 1 | #### GIN fine-tuning
 2 | 
 3 | ./finetune.sh bace > log_bace_2
 4 | ./finetune.sh bbbp > log_bbbp_2
 5 | ./finetune.sh clintox > log_clintox_2 
 6 | ./finetune.sh sider > log_sider_2 
 7 | ./finetune.sh tox21 > log_tox21_2 
 8 | ./finetune.sh toxcast > log_toxcast_2 
 9 | ./finetune.sh hiv > log_hiv_2 
10 | ./finetune.sh muv > log_muv_2 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/transferLearning_MoleculeNet/environment.yml:
--------------------------------------------------------------------------------
 1 | name: pregnn
 2 | channels:
 3 |   - rdkit
 4 |   - defaults
 5 | dependencies:
 6 |   - _libgcc_mutex=0.1=main
 7 |   - blas=1.0=mkl
 8 |   - bzip2=1.0.8=h7b6447c_0
 9 |   - ca-certificates=2020.1.1=0
10 |   - cairo=1.14.12=h8948797_3
11 |   - certifi=2020.4.5.1=py36_0
12 |   - fontconfig=2.13.0=h9420a91_0
13 |   - freetype=2.9.1=h8a8886c_1
14 |   - glib=2.63.1=h5a9c865_0
15 |   - icu=58.2=he6710b0_3
16 |   - intel-openmp=2020.0=166
17 |   - jpeg=9b=h024ee3a_2
18 |   - libboost=1.67.0=h46d08c1_4
19 |   - libedit=3.1=heed3624_0
20 |   - libffi=3.2.1=hd88cf55_4
21 |   - libgcc-ng=9.1.0=hdf63c60_0
22 |   - libgfortran-ng=7.3.0=hdf63c60_0
23 |   - libpng=1.6.37=hbc83047_0
24 |   - libstdcxx-ng=9.1.0=hdf63c60_0
25 |   - libtiff=4.1.0=h2733197_0
26 |   - libuuid=1.0.3=h1bed415_2
27 |   - libxcb=1.13=h1bed415_1
28 |   - libxml2=2.9.9=hea5a465_1
29 |   - mkl=2020.0=166
30 |   - mkl-service=2.3.0=py36he904b0f_0
31 |   - mkl_fft=1.0.15=py36ha843d7b_0
32 |   - mkl_random=1.1.0=py36hd6b4f25_0
33 |   - ncurses=6.0=0
34 |   - numpy=1.18.1=py36h4f9e942_0
35 |   - numpy-base=1.18.1=py36hde5b4d6_1
36 |   - olefile=0.46=py_0
37 |   - openssl=1.0.2u=h7b6447c_0
38 |   - pandas=1.0.3=py36h0573a6f_0
39 |   - pcre=8.43=he6710b0_0
40 |   - pillow=7.1.2=py36hb39fc2d_0
41 |   - pip=20.0.2=py36_1
42 |   - pixman=0.38.0=h7b6447c_0
43 |   - py-boost=1.67.0=py36h04863e7_4
44 |   - python=3.6.5=hc3d631a_2
45 |   - python-dateutil=2.8.1=py_0
46 |   - pytz=2020.1=py_0
47 |   - rdkit=2019.03.1.0=py36hc20afe1_1
48 |   - readline=7.0=ha6073c6_4
49 |   - setuptools=46.1.3=py36_0
50 |   - six=1.14.0=py36_0
51 |   - sqlite=3.23.1=he433501_0
52 |   - tk=8.6.8=hbc83047_0
53 |   - wheel=0.34.2=py36_0
54 |   - xz=5.2.5=h7b6447c_0
55 |   - zlib=1.2.11=h7b6447c_3
56 |   - zstd=1.3.7=h0b5b093_0
57 |   - pip:
58 |     - decorator==4.4.2
59 |     - isodate==0.6.0
60 |     - joblib==0.14.1
61 |     - networkx==2.4
62 |     - plyfile==0.7.2
63 |     - protobuf==3.11.3
64 |     - pyparsing==2.4.7
65 |     - rdflib==5.0.0
66 |     - scikit-learn==0.22.2.post1
67 |     - scipy==1.4.1
68 |     - sklearn==0.0
69 |     - tensorboardx==2.0
70 |     - torch==1.0.1
71 |     - torch-cluster==1.2.4
72 |     - torch-geometric==1.0.3
73 |     - torch-scatter==1.1.2
74 |     - torch-sparse==0.2.4
75 |     - torch-spline-conv==1.0.6
76 |     - tqdm==4.46.0
77 | 
78 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/README.md:
--------------------------------------------------------------------------------
1 | ## Training & Evaluation
2 | ./go.sh 0 NCI1 random2
3 | ./go.sh 1 PROTEINS random2
4 | ./go.sh 2 DD random2
5 | ./go.sh 3 MUTAG random2
6 | ./go.sh 4 COLLAB random2
7 | ./go.sh 5 IMDB-BINARY random2
8 | ./go.sh 6 REDDIT-BINARY random2
9 | ./go.sh 7 REDDIT-MULTI-5K random2


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/arguments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def arg_parse():
 4 |     parser = argparse.ArgumentParser(description='GcnInformax Arguments.')
 5 |     parser.add_argument('--DS', dest='DS', help='Dataset')
 6 |     parser.add_argument('--local', dest='local', action='store_const', 
 7 |             const=True, default=False)
 8 |     parser.add_argument('--glob', dest='glob', action='store_const', 
 9 |             const=True, default=False)
10 |     parser.add_argument('--prior', dest='prior', action='store_const', 
11 |             const=True, default=False)
12 | 
13 |     parser.add_argument('--lr', dest='lr', type=float,
14 |             help='Learning rate.')
15 |     parser.add_argument('--num-gc-layers', dest='num_gc_layers', type=int, default=5,
16 |             help='Number of graph convolution layers before each pooling')
17 |     parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=32,
18 |             help='')
19 | 
20 |     parser.add_argument('--aug', type=str, default='dnodes')
21 |     parser.add_argument('--seed', type=int, default=0)
22 |     parser.add_argument('--num_code', type=int, default=16)
23 | 
24 |     return parser.parse_args()
25 | 
26 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/configs/convnets.py:
--------------------------------------------------------------------------------
 1 | '''Basic convnet hyperparameters.
 2 | 
 3 | conv_args are in format (dim_h, f_size, stride, pad batch_norm, dropout, nonlinearity, pool)
 4 | fc_args are in format (dim_h, batch_norm, dropout, nonlinearity)
 5 | 
 6 | '''
 7 | 
 8 | from cortex_DIM.nn_modules.encoder import ConvnetEncoder, FoldedConvnetEncoder
 9 | 
10 | 
11 | # Basic DCGAN-like encoders
12 | 
13 | _basic28x28 = dict(
14 |     Encoder=ConvnetEncoder,
15 |     conv_args=[(64, 5, 2, 2, True, False, 'ReLU', None),
16 |                (128, 5, 2, 2, True, False, 'ReLU', None)],
17 |     fc_args=[(1024, True, False, 'ReLU', None)],
18 |     local_idx=1,
19 |     fc_idx=0
20 | )
21 | 
22 | _basic32x32 = dict(
23 |     Encoder=ConvnetEncoder,
24 |     conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
25 |                (128, 4, 2, 1, True, False, 'ReLU', None),
26 |                (256, 4, 2, 1, True, False, 'ReLU', None)],
27 |     fc_args=[(1024, True, False, 'ReLU')],
28 |     local_idx=1,
29 |     conv_idx=2,
30 |     fc_idx=0
31 | )
32 | 
33 | _basic64x64 = dict(
34 |     Encoder=ConvnetEncoder,
35 |     conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None),
36 |                (128, 4, 2, 1, True, False, 'ReLU', None),
37 |                (256, 4, 2, 1, True, False, 'ReLU', None),
38 |                (512, 4, 2, 1, True, False, 'ReLU', None)],
39 |     fc_args=[(1024, True, False, 'ReLU')],
40 |     local_idx=2,
41 |     conv_idx=3,
42 |     fc_idx=0
43 | )
44 | 
45 | # Alexnet-like encoders
46 | 
47 | _alex64x64 = dict(
48 |     Encoder=ConvnetEncoder,
49 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
50 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
51 |                (384, 3, 1, 1, True, False, 'ReLU', None),
52 |                (384, 3, 1, 1, True, False, 'ReLU', None),
53 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
54 |     fc_args=[(4096, True, False, 'ReLU'),
55 |              (4096, True, False, 'ReLU')],
56 |     local_idx=2,
57 |     conv_idx=4,
58 |     fc_idx=1
59 | )
60 | 
61 | _foldalex64x64 = dict(
62 |     Encoder=FoldedConvnetEncoder,
63 |     crop_size=16,
64 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
65 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
66 |                (384, 3, 1, 1, True, False, 'ReLU', None),
67 |                (384, 3, 1, 1, True, False, 'ReLU', None),
68 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))],
69 |     fc_args=[(4096, True, False, 'ReLU'),
70 |              (4096, True, False, 'ReLU')],
71 |     local_idx=4,
72 |     fc_idx=1
73 | )
74 | 
75 | _foldmultialex64x64 = dict(
76 |     Encoder=FoldedConvnetEncoder,
77 |     crop_size=16,
78 |     conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
79 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
80 |                (384, 3, 1, 1, True, False, 'ReLU', None),
81 |                (384, 3, 1, 1, True, False, 'ReLU', None),
82 |                (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)),
83 |                (192, 3, 1, 0, True, False, 'ReLU', None),
84 |                (192, 1, 1, 0, True, False, 'ReLU', None)],
85 |     fc_args=[(4096, True, False, 'ReLU')],
86 |     local_idx=4,
87 |     multi_idx=6,
88 |     fc_idx=1
89 | )
90 | 
91 | configs = dict(
92 |     basic28x28=_basic28x28,
93 |     basic32x32=_basic32x32,
94 |     basic64x64=_basic64x64,
95 |     alex64x64=_alex64x64,
96 |     foldalex64x64=_foldalex64x64,
97 |     foldmultialex64x64=_foldmultialex64x64
98 | )


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/functions/gan_losses.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 
 3 | """
 4 | 
 5 | import math
 6 | 
 7 | import torch
 8 | import torch.nn.functional as F
 9 | 
10 | from cortex_DIM.functions.misc import log_sum_exp
11 | 
12 | 
13 | def raise_measure_error(measure):
14 |     supported_measures = ['GAN', 'JSD', 'X2', 'KL', 'RKL', 'DV', 'H2', 'W1']
15 |     raise NotImplementedError(
16 |         'Measure `{}` not supported. Supported: {}'.format(measure,
17 |                                                            supported_measures))
18 | 
19 | 
20 | def get_positive_expectation(p_samples, measure, average=True):
21 |     """Computes the positive part of a divergence / difference.
22 | 
23 |     Args:
24 |         p_samples: Positive samples.
25 |         measure: Measure to compute for.
26 |         average: Average the result over samples.
27 | 
28 |     Returns:
29 |         torch.Tensor
30 | 
31 |     """
32 |     log_2 = math.log(2.)
33 | 
34 |     if measure == 'GAN':
35 |         Ep = - F.softplus(-p_samples)
36 |     elif measure == 'JSD':
37 |         Ep = log_2 - F.softplus(- p_samples)
38 |     elif measure == 'X2':
39 |         Ep = p_samples ** 2
40 |     elif measure == 'KL':
41 |         Ep = p_samples + 1.
42 |     elif measure == 'RKL':
43 |         Ep = -torch.exp(-p_samples)
44 |     elif measure == 'DV':
45 |         Ep = p_samples
46 |     elif measure == 'H2':
47 |         Ep = 1. - torch.exp(-p_samples)
48 |     elif measure == 'W1':
49 |         Ep = p_samples
50 |     else:
51 |         raise_measure_error(measure)
52 | 
53 |     if average:
54 |         return Ep.mean()
55 |     else:
56 |         return Ep
57 | 
58 | 
59 | def get_negative_expectation(q_samples, measure, average=True):
60 |     """Computes the negative part of a divergence / difference.
61 | 
62 |     Args:
63 |         q_samples: Negative samples.
64 |         measure: Measure to compute for.
65 |         average: Average the result over samples.
66 | 
67 |     Returns:
68 |         torch.Tensor
69 | 
70 |     """
71 |     log_2 = math.log(2.)
72 | 
73 |     if measure == 'GAN':
74 |         Eq = F.softplus(-q_samples) + q_samples
75 |     elif measure == 'JSD':
76 |         Eq = F.softplus(-q_samples) + q_samples - log_2
77 |     elif measure == 'X2':
78 |         Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2)
79 |     elif measure == 'KL':
80 |         Eq = torch.exp(q_samples)
81 |     elif measure == 'RKL':
82 |         Eq = q_samples - 1.
83 |     elif measure == 'DV':
84 |         Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0))
85 |     elif measure == 'H2':
86 |         Eq = torch.exp(q_samples) - 1.
87 |     elif measure == 'W1':
88 |         Eq = q_samples
89 |     else:
90 |         raise_measure_error(measure)
91 | 
92 |     if average:
93 |         return Eq.mean()
94 |     else:
95 |         return Eq


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/functions/misc.py:
--------------------------------------------------------------------------------
 1 | """Miscilaneous functions.
 2 | 
 3 | """
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def log_sum_exp(x, axis=None):
 9 |     """Log sum exp function
10 | 
11 |     Args:
12 |         x: Input.
13 |         axis: Axis over which to perform sum.
14 | 
15 |     Returns:
16 |         torch.Tensor: log sum exp
17 | 
18 |     """
19 |     x_max = torch.max(x, axis)[0]
20 |     y = torch.log((torch.exp(x - x_max)).sum(axis)) + x_max
21 |     return y
22 | 
23 | 
24 | def random_permute(X):
25 |     """Randomly permutes a tensor.
26 | 
27 |     Args:
28 |         X: Input tensor.
29 | 
30 |     Returns:
31 |         torch.Tensor
32 | 
33 |     """
34 |     X = X.transpose(1, 2)
35 |     b = torch.rand((X.size(0), X.size(1))).cuda()
36 |     idx = b.sort(0)[1]
37 |     adx = torch.range(0, X.size(1) - 1).long()
38 |     X = X[idx, adx[None, :]].transpose(1, 2)
39 |     return X
40 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/encoder.py:
--------------------------------------------------------------------------------
 1 | '''Basic cortex_DIM encoder.
 2 | 
 3 | '''
 4 | 
 5 | import torch
 6 | 
 7 | from cortex_DIM.nn_modules.convnet import Convnet, FoldedConvnet
 8 | from cortex_DIM.nn_modules.resnet import ResNet, FoldedResNet
 9 | 
10 | 
11 | def create_encoder(Module):
12 |     class Encoder(Module):
13 |         '''Encoder used for cortex_DIM.
14 | 
15 |         '''
16 | 
17 |         def __init__(self, *args, local_idx=None, multi_idx=None, conv_idx=None, fc_idx=None, **kwargs):
18 |             '''
19 | 
20 |             Args:
21 |                 args: Arguments for parent class.
22 |                 local_idx: Index in list of convolutional layers for local features.
23 |                 multi_idx: Index in list of convolutional layers for multiple globals.
24 |                 conv_idx: Index in list of convolutional layers for intermediate features.
25 |                 fc_idx: Index in list of fully-connected layers for intermediate features.
26 |                 kwargs: Keyword arguments for the parent class.
27 |             '''
28 | 
29 |             super().__init__(*args, **kwargs)
30 | 
31 |             if local_idx is None:
32 |                 raise ValueError('`local_idx` must be set')
33 | 
34 |             conv_idx = conv_idx or local_idx
35 | 
36 |             self.local_idx = local_idx
37 |             self.multi_idx = multi_idx
38 |             self.conv_idx = conv_idx
39 |             self.fc_idx = fc_idx
40 | 
41 |         def forward(self, x: torch.Tensor):
42 |             '''
43 | 
44 |             Args:
45 |                 x: Input tensor.
46 | 
47 |             Returns:
48 |                 local_out, multi_out, hidden_out, global_out
49 | 
50 |             '''
51 | 
52 |             outs = super().forward(x, return_full_list=True)
53 |             if len(outs) == 2:
54 |                 conv_out, fc_out = outs
55 |             else:
56 |                 conv_before_out, res_out, conv_after_out, fc_out = outs
57 |                 conv_out = conv_before_out + res_out + conv_after_out
58 | 
59 |             local_out = conv_out[self.local_idx]
60 | 
61 |             if self.multi_idx is not None:
62 |                 multi_out = conv_out[self.multi_idx]
63 |             else:
64 |                 multi_out = None
65 | 
66 |             if len(fc_out) > 0:
67 |                 if self.fc_idx is not None:
68 |                     hidden_out = fc_out[self.fc_idx]
69 |                 else:
70 |                     hidden_out = None
71 |                 global_out = fc_out[-1]
72 |             else:
73 |                 hidden_out = None
74 |                 global_out = None
75 | 
76 |             conv_out = conv_out[self.conv_idx]
77 | 
78 |             return local_out, conv_out, multi_out, hidden_out, global_out
79 | 
80 |     return Encoder
81 | 
82 | 
83 | class ConvnetEncoder(create_encoder(Convnet)):
84 |     pass
85 | 
86 | 
87 | class FoldedConvnetEncoder(create_encoder(FoldedConvnet)):
88 |     pass
89 | 
90 | 
91 | class ResnetEncoder(create_encoder(ResNet)):
92 |     pass
93 | 
94 | 
95 | class FoldedResnetEncoder(create_encoder(FoldedResNet)):
96 |     pass
97 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/mi_networks.py:
--------------------------------------------------------------------------------
  1 | """Module for networks used for computing MI.
  2 | 
  3 | """
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | from cortex_DIM.nn_modules.misc import Permute
 10 | 
 11 | 
 12 | class MIFCNet(nn.Module):
 13 |     """Simple custom network for computing MI.
 14 | 
 15 |     """
 16 |     def __init__(self, n_input, n_units):
 17 |         """
 18 | 
 19 |         Args:
 20 |             n_input: Number of input units.
 21 |             n_units: Number of output units.
 22 |         """
 23 |         super().__init__()
 24 | 
 25 |         assert(n_units >= n_input)
 26 | 
 27 |         self.linear_shortcut = nn.Linear(n_input, n_units)
 28 |         self.block_nonlinear = nn.Sequential(
 29 |             nn.Linear(n_input, n_units),
 30 |             nn.BatchNorm1d(n_units),
 31 |             nn.ReLU(),
 32 |             nn.Linear(n_units, n_units)
 33 |         )
 34 | 
 35 |         # initialize the initial projection to a sort of noisy copy
 36 |         eye_mask = np.zeros((n_units, n_input), dtype=np.uint8)
 37 |         for i in range(n_input):
 38 |             eye_mask[i, i] = 1
 39 | 
 40 |         self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
 41 |         self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
 42 | 
 43 |     def forward(self, x):
 44 |         """
 45 | 
 46 |         Args:
 47 |             x: Input tensor.
 48 | 
 49 |         Returns:
 50 |             torch.Tensor: network output.
 51 | 
 52 |         """
 53 |         h = self.block_nonlinear(x) + self.linear_shortcut(x)
 54 |         return h
 55 | 
 56 | 
 57 | class MI1x1ConvNet(nn.Module):
 58 |     """Simple custorm 1x1 convnet.
 59 | 
 60 |     """
 61 |     def __init__(self, n_input, n_units):
 62 |         """
 63 | 
 64 |         Args:
 65 |             n_input: Number of input units.
 66 |             n_units: Number of output units.
 67 |         """
 68 | 
 69 |         super().__init__()
 70 | 
 71 |         self.block_nonlinear = nn.Sequential(
 72 |             nn.Conv1d(n_input, n_units, kernel_size=1, stride=1, padding=0, bias=False),
 73 |             nn.BatchNorm1d(n_units),
 74 |             nn.ReLU(),
 75 |             nn.Conv1d(n_units, n_units, kernel_size=1, stride=1, padding=0, bias=True),
 76 |         )
 77 | 
 78 |         self.block_ln = nn.Sequential(
 79 |             Permute(0, 2, 1),
 80 |             nn.LayerNorm(n_units),
 81 |             Permute(0, 2, 1)
 82 |         )
 83 | 
 84 |         self.linear_shortcut = nn.Conv1d(n_input, n_units, kernel_size=1,
 85 |                                          stride=1, padding=0, bias=False)
 86 | 
 87 |         # initialize shortcut to be like identity (if possible)
 88 |         if n_units >= n_input:
 89 |             eye_mask = np.zeros((n_units, n_input, 1), dtype=np.uint8)
 90 |             for i in range(n_input):
 91 |                 eye_mask[i, i, 0] = 1
 92 |             self.linear_shortcut.weight.data.uniform_(-0.01, 0.01)
 93 |             self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.)
 94 | 
 95 |     def forward(self, x):
 96 |         """
 97 | 
 98 |             Args:
 99 |                 x: Input tensor.
100 | 
101 |             Returns:
102 |                 torch.Tensor: network output.
103 | 
104 |         """
105 |         h = self.block_ln(self.block_nonlinear(x) + self.linear_shortcut(x))
106 |         return h
107 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/misc.py:
--------------------------------------------------------------------------------
  1 | '''Various miscellaneous modules
  2 | 
  3 | '''
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | class View(torch.nn.Module):
  9 |     """Basic reshape module.
 10 | 
 11 |     """
 12 |     def __init__(self, *shape):
 13 |         """
 14 | 
 15 |         Args:
 16 |             *shape: Input shape.
 17 |         """
 18 |         super().__init__()
 19 |         self.shape = shape
 20 | 
 21 |     def forward(self, input):
 22 |         """Reshapes tensor.
 23 | 
 24 |         Args:
 25 |             input: Input tensor.
 26 | 
 27 |         Returns:
 28 |             torch.Tensor: Flattened tensor.
 29 | 
 30 |         """
 31 |         return input.view(*self.shape)
 32 | 
 33 | 
 34 | class Unfold(torch.nn.Module):
 35 |     """Module for unfolding tensor.
 36 | 
 37 |     Performs strided crops on 2d (image) tensors. Stride is assumed to be half the crop size.
 38 | 
 39 |     """
 40 |     def __init__(self, img_size, fold_size):
 41 |         """
 42 | 
 43 |         Args:
 44 |             img_size: Input size.
 45 |             fold_size: Crop size.
 46 |         """
 47 |         super().__init__()
 48 | 
 49 |         fold_stride = fold_size // 2
 50 |         self.fold_size = fold_size
 51 |         self.fold_stride = fold_stride
 52 |         self.n_locs = 2 * (img_size // fold_size) - 1
 53 |         self.unfold = torch.nn.Unfold((self.fold_size, self.fold_size),
 54 |                                       stride=(self.fold_stride, self.fold_stride))
 55 | 
 56 |     def forward(self, x):
 57 |         """Unfolds tensor.
 58 | 
 59 |         Args:
 60 |             x: Input tensor.
 61 | 
 62 |         Returns:
 63 |             torch.Tensor: Unfolded tensor.
 64 | 
 65 |         """
 66 |         N = x.size(0)
 67 |         x = self.unfold(x).reshape(N, -1, self.fold_size, self.fold_size, self.n_locs * self.n_locs)\
 68 |             .permute(0, 4, 1, 2, 3)\
 69 |             .reshape(N * self.n_locs * self.n_locs, -1, self.fold_size, self.fold_size)
 70 |         return x
 71 | 
 72 | 
 73 | class Fold(torch.nn.Module):
 74 |     """Module (re)folding tensor.
 75 | 
 76 |     Undoes the strided crops above. Works only on 1x1.
 77 | 
 78 |     """
 79 |     def __init__(self, img_size, fold_size):
 80 |         """
 81 | 
 82 |         Args:
 83 |             img_size: Images size.
 84 |             fold_size: Crop size.
 85 |         """
 86 |         super().__init__()
 87 |         self.n_locs = 2 * (img_size // fold_size) - 1
 88 | 
 89 |     def forward(self, x):
 90 |         """(Re)folds tensor.
 91 | 
 92 |         Args:
 93 |             x: Input tensor.
 94 | 
 95 |         Returns:
 96 |             torch.Tensor: Refolded tensor.
 97 | 
 98 |         """
 99 |         dim_c, dim_x, dim_y = x.size()[1:]
100 |         x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)
101 |         x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)\
102 |             .permute(0, 2, 3, 1)\
103 |             .reshape(-1, dim_c * dim_x * dim_y, self.n_locs, self.n_locs).contiguous()
104 |         return x
105 | 
106 | 
107 | class Permute(torch.nn.Module):
108 |     """Module for permuting axes.
109 | 
110 |     """
111 |     def __init__(self, *perm):
112 |         """
113 | 
114 |         Args:
115 |             *perm: Permute axes.
116 |         """
117 |         super().__init__()
118 |         self.perm = perm
119 | 
120 |     def forward(self, input):
121 |         """Permutes axes of tensor.
122 | 
123 |         Args:
124 |             input: Input tensor.
125 | 
126 |         Returns:
127 |             torch.Tensor: permuted tensor.
128 | 
129 |         """
130 |         return input.permute(*self.perm)
131 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/go.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -ex
 2 | 
 3 | for layers in 3 4 5 6
 4 | do
 5 | for num_code in 16 8 4 32 20
 6 | do
 7 | for seed in 0 1 2
 8 | do
 9 |   CUDA_VISIBLE_DEVICES=$1 python gsimclr.py --num_code $num_code --DS $2 --lr 0.01 --local --num-gc-layers $layers --aug random2 --seed $seed
10 | 
11 | done
12 | done
13 | done
14 | 
15 | # dataset layers codebook_size
16 | # DD 4 4
17 | # NCI1 5 4
18 | # PROTEINS 3 8
19 | # COLLAB 5 32
20 | # IMDB-B 3 8
21 | # RDT-B 5 4
22 | # RDT-M5K 4 4
23 | # MUTAG 4 16


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from cortex_DIM.functions.gan_losses import get_positive_expectation, get_negative_expectation
 5 | 
 6 | def local_global_loss_(l_enc, g_enc, edge_index, batch, measure):
 7 |     '''
 8 |     Args:
 9 |         l: Local feature map.
10 |         g: Global features.
11 |         measure: Type of f-divergence. For use with mode `fd`
12 |         mode: Loss mode. Fenchel-dual `fd`, NCE `nce`, or Donsker-Vadadhan `dv`.
13 |     Returns:
14 |         torch.Tensor: Loss.
15 |     '''
16 |     num_graphs = g_enc.shape[0]
17 |     num_nodes = l_enc.shape[0]
18 | 
19 |     pos_mask = torch.zeros((num_nodes, num_graphs)).cuda()
20 |     neg_mask = torch.ones((num_nodes, num_graphs)).cuda()
21 |     for nodeidx, graphidx in enumerate(batch):
22 |         pos_mask[nodeidx][graphidx] = 1.
23 |         neg_mask[nodeidx][graphidx] = 0.
24 | 
25 |     res = torch.mm(l_enc, g_enc.t())
26 | 
27 |     E_pos = get_positive_expectation(res * pos_mask, measure, average=False).sum()
28 |     E_pos = E_pos / num_nodes
29 |     E_neg = get_negative_expectation(res * neg_mask, measure, average=False).sum()
30 |     E_neg = E_neg / (num_nodes * (num_graphs - 1))
31 | 
32 |     return E_neg - E_pos
33 | 
34 | def adj_loss_(l_enc, g_enc, edge_index, batch):
35 |     num_graphs = g_enc.shape[0]
36 |     num_nodes = l_enc.shape[0]
37 | 
38 |     adj = torch.zeros((num_nodes, num_nodes)).cuda()
39 |     mask = torch.eye(num_nodes).cuda()
40 |     for node1, node2 in zip(edge_index[0], edge_index[1]):
41 |         adj[node1.item()][node2.item()] = 1.
42 |         adj[node2.item()][node1.item()] = 1.
43 | 
44 |     res = torch.sigmoid((torch.mm(l_enc, l_enc.t())))
45 |     res = (1-mask) * res
46 |     # print(res.shape, adj.shape)
47 |     # input()
48 | 
49 |     loss = nn.BCELoss()(res, adj)
50 |     return loss
51 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | import numpy as np
 6 | # from core.encoders import *
 7 | import json
 8 | from torch import optim
 9 | 
10 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet
11 | from losses import *
12 | 
13 | 
14 | class GlobalDiscriminator(nn.Module):
15 |     def __init__(self, args, input_dim):
16 |         super().__init__()
17 |         
18 |         self.l0 = nn.Linear(32, 32)
19 |         self.l1 = nn.Linear(32, 32)
20 | 
21 |         self.l2 = nn.Linear(512, 1)
22 |     def forward(self, y, M, data):
23 | 
24 |         adj = Variable(data['adj'].float(), requires_grad=False).cuda()
25 |         # h0 = Variable(data['feats'].float()).cuda()
26 |         batch_num_nodes = data['num_nodes'].int().numpy()
27 |         M, _ = self.encoder(M, adj, batch_num_nodes)
28 |         # h = F.relu(self.c0(M))
29 |         # h = self.c1(h)
30 |         # h = h.view(y.shape[0], -1)
31 |         h = torch.cat((y, M), dim=1)
32 |         h = F.relu(self.l0(h))
33 |         h = F.relu(self.l1(h))
34 |         return self.l2(h)
35 | 
36 | class PriorDiscriminator(nn.Module):
37 |     def __init__(self, input_dim):
38 |         super().__init__()
39 |         self.l0 = nn.Linear(input_dim, input_dim)
40 |         self.l1 = nn.Linear(input_dim, input_dim)
41 |         self.l2 = nn.Linear(input_dim, 1)
42 | 
43 |     def forward(self, x):
44 |         h = F.relu(self.l0(x))
45 |         h = F.relu(self.l1(h))
46 |         return torch.sigmoid(self.l2(h))
47 | 
48 | class FF(nn.Module):
49 |     def __init__(self, input_dim):
50 |         super().__init__()
51 |         # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1)
52 |         # self.c1 = nn.Conv1d(512, 512, kernel_size=1)
53 |         # self.c2 = nn.Conv1d(512, 1, kernel_size=1)
54 |         self.block = nn.Sequential(
55 |             nn.Linear(input_dim, input_dim),
56 |             nn.ReLU(),
57 |             nn.Linear(input_dim, input_dim),
58 |             nn.ReLU(),
59 |             nn.Linear(input_dim, input_dim),
60 |             nn.ReLU()
61 |         )
62 |         self.linear_shortcut = nn.Linear(input_dim, input_dim)
63 |         # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1, stride=1, padding=0)
64 |         # self.c1 = nn.Conv1d(512, 512, kernel_size=1, stride=1, padding=0)
65 |         # self.c2 = nn.Conv1d(512, 1, kernel_size=1, stride=1, padding=0)
66 | 
67 |     def forward(self, x):
68 |         return self.block(x) + self.linear_shortcut(x)
69 | 
70 | 


--------------------------------------------------------------------------------
/SSL/GraphCL/unsupervised_TU/test.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import numpy as np
 3 | import json
 4 | import pandas as pd
 5 | import collections
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |     for epoch in [20, 100]:
10 |         print(epoch)
11 |         real_res = {'logreg':[-1], 'svc':[-1], 'linearsvc':[-1], 'randomforest':[-1]}
12 |         for gc in [3, 5, 8, 16]:
13 |             for lr in [0.01, 0.1, 0.001]:
14 |                 for tpe in ['local', 'localprior']:
15 |                     res = collections.defaultdict(lambda :collections.defaultdict(list))
16 |                     with open(sys.argv[1], 'r') as f:
17 |                         for line in f:
18 |                             x = line.strip().split(',', 6)
19 |                             if x[1] != tpe:
20 |                                 continue
21 |                             if x[2] != str(gc):
22 |                                 continue
23 |                             if x[3] != str(epoch):
24 |                                 continue
25 |                             if x[5] != str(lr):
26 |                                 continue
27 |                             tmp = json.loads(x[-1])
28 | 
29 |                             DS = x[0]
30 |                             res[DS]['logreg'].append(tmp['logreg'])
31 |                             res[DS]['svc'].append(tmp['svc'])
32 |                             res[DS]['linearsvc'].append(tmp['linearsvc'])
33 |                             res[DS]['randomforest'].append(tmp['randomforest'])
34 | 
35 |                     for DS, lst in res.items():
36 |                         if DS != sys.argv[2]:
37 |                             continue
38 |                         # print('====================')
39 |                         # print(DS)
40 |                         for clf, v in lst.items():
41 |                             mn = np.mean(np.array(v[:5]), axis=0)
42 |                             std = np.std(np.array(v[:5]), axis=0)
43 | 
44 |                             idx = np.argmax(mn)
45 |                             if mn[idx] > real_res[clf][0] and len(v) > 1:
46 |                                 real_res[clf] = [mn[idx], std[idx], epoch, lr, gc, idx, len(v)]
47 |                                 # print(epoch, lr, gc, clf, idx, mn[idx], std[idx], len(v))
48 |         print(real_res)
49 | 
50 | 


--------------------------------------------------------------------------------
/SSL/GraphMAE/README.md:
--------------------------------------------------------------------------------
1 | ## Run the codes
2 | Node Classification:
3 | Please refer to the bash script `run_transductive.sh` for running the training and evaluation pipeline.
4 | 
5 | 


--------------------------------------------------------------------------------
/SSL/GraphMAE/configs.yml:
--------------------------------------------------------------------------------
 1 | cora:
 2 |   lr: 0.001
 3 |   lr_f: 0.005
 4 |   # num_hidden: 512
 5 |   # num_heads: 2
 6 |   # num_layers: 3
 7 |   weight_decay: 2e-4
 8 |   weight_decay_f: 1e-4
 9 |   max_epoch: 1500
10 |   max_epoch_f: 1000
11 |   mask_rate: 0.5
12 |   encoder: gat
13 |   decoder: gat 
14 |   activation: prelu
15 |   in_drop: 0.2
16 |   attn_drop: 0.1
17 |   linear_prob: True
18 |   loss_fn: sce 
19 |   drop_edge_rate: 0.0
20 |   optimizer: adam
21 |   replace_rate: 0.05 
22 |   alpha_l: 3
23 |   scheduler: False
24 | citeseer:
25 |   lr: 0.001
26 |   lr_f: 0.005
27 |   # num_hidden: 512
28 |   # num_heads: 4
29 |   # num_layers: 2
30 |   weight_decay: 2e-5
31 |   weight_decay_f: 0.01
32 |   max_epoch: 500
33 |   max_epoch_f: 500
34 |   mask_rate: 0.5
35 |   encoder: gat
36 |   decoder: gat
37 |   activation: prelu
38 |   in_drop: 0.2  
39 |   attn_drop: 0.1
40 |   linear_prob: True
41 |   loss_fn: sce
42 |   drop_edge_rate: 0.0
43 |   optimizer: adam
44 |   replace_rate: 0.1
45 |   alpha_l: 1 # or 3 
46 |   scheduler: False
47 | pubmed:
48 |   lr: 0.0005
49 |   lr_f: 0.001
50 |   # num_hidden: 1024
51 |   # num_heads: 1
52 |   # num_layers: 5
53 |   weight_decay: 1e-5
54 |   weight_decay_f: 1e-4
55 |   max_epoch: 500
56 |   max_epoch_f: 500
57 |   mask_rate: 0.5
58 |   encoder: gat
59 |   decoder: gat
60 |   activation: prelu
61 |   in_drop: 0.2
62 |   attn_drop: 0.1
63 |   linear_prob: True
64 |   loss_fn: sce
65 |   drop_edge_rate: 0.0
66 |   optimizer: adam
67 |   replace_rate: 0.0
68 |   alpha_l: 3
69 |   scheduler: False
70 | 


--------------------------------------------------------------------------------
/SSL/GraphMAE/graphmae/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SSL/GraphMAE/graphmae/__init__.py


--------------------------------------------------------------------------------
/SSL/GraphMAE/graphmae/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SSL/GraphMAE/graphmae/datasets/__init__.py


--------------------------------------------------------------------------------
/SSL/GraphMAE/graphmae/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .edcoder import PreModel
 2 | 
 3 | 
 4 | def build_model(args):
 5 |     num_heads = args.num_heads
 6 |     num_out_heads = args.num_out_heads
 7 |     num_hidden = args.num_hidden
 8 |     num_layers = args.num_layers
 9 |     residual = args.residual
10 |     attn_drop = args.attn_drop
11 |     in_drop = args.in_drop
12 |     norm = args.norm
13 |     negative_slope = args.negative_slope
14 |     encoder_type = args.encoder
15 |     decoder_type = args.decoder
16 |     mask_rate = args.mask_rate
17 |     drop_edge_rate = args.drop_edge_rate
18 |     replace_rate = args.replace_rate
19 | 
20 | 
21 |     activation = args.activation
22 |     loss_fn = args.loss_fn
23 |     alpha_l = args.alpha_l
24 |     concat_hidden = args.concat_hidden
25 |     num_features = args.num_features
26 |     num_codes = args.num_codes
27 |     model = PreModel(
28 |         in_dim=num_features,
29 |         num_hidden=num_hidden,
30 |         num_layers=num_layers,
31 |         nhead=num_heads,
32 |         nhead_out=num_out_heads,
33 |         activation=activation,
34 |         feat_drop=in_drop,
35 |         attn_drop=attn_drop,
36 |         negative_slope=negative_slope,
37 |         residual=residual,
38 |         encoder_type=encoder_type,
39 |         decoder_type=decoder_type,
40 |         mask_rate=mask_rate,
41 |         norm=norm,
42 |         loss_fn=loss_fn,
43 |         drop_edge_rate=drop_edge_rate,
44 |         replace_rate=replace_rate,
45 |         alpha_l=alpha_l,
46 |         concat_hidden=concat_hidden, num_codes=num_codes
47 |     )
48 |     return model
49 | 


--------------------------------------------------------------------------------
/SSL/GraphMAE/graphmae/models/loss_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def sce_loss(x, y, alpha=3):
 6 |     x = F.normalize(x, p=2, dim=-1)
 7 |     y = F.normalize(y, p=2, dim=-1)
 8 | 
 9 |     # loss =  - (x * y).sum(dim=-1)
10 |     # loss = (x_h - y_h).norm(dim=1).pow(alpha)
11 | 
12 |     loss = (1 - (x * y).sum(dim=-1)).pow_(alpha)
13 | 
14 |     loss = loss.mean()
15 |     return loss
16 | 
17 | 
18 | def sig_loss(x, y):
19 |     x = F.normalize(x, p=2, dim=-1)
20 |     y = F.normalize(y, p=2, dim=-1)
21 | 
22 |     loss = (x * y).sum(1)
23 |     loss = torch.sigmoid(-loss)
24 |     loss = loss.mean()
25 |     return loss


--------------------------------------------------------------------------------
/SSL/GraphMAE/run_transductive.sh:
--------------------------------------------------------------------------------
 1 | # Node classification results in unsupervised representation learning
 2 | 
 3 | python main_transductive.py \
 4 | 	--device 0 \
 5 | 	--dataset cora \
 6 | 	--num_codes 32 \
 7 | 	--num_layers 2 \
 8 | 	--num_heads 4 \
 9 | 	--num_hidden 1024 \
10 | 	--use_cfg 
11 | 
12 | python main_transductive.py \
13 | 	--device 0 \
14 | 	--dataset citeseer \
15 | 	--num_codes 8 \
16 | 	--num_layers 2 \
17 | 	--num_heads 2 \
18 | 	--num_hidden 256 \
19 | 	--use_cfg 
20 | 
21 | python main_transductive.py \
22 | 	--device 2 \
23 | 	--dataset pubmed \
24 | 	--num_codes 16 \
25 | 	--num_layers 2 \
26 | 	--num_heads 1 \
27 | 	--num_hidden 128 \
28 | 	--use_cfg 
29 | 
30 | # for num_hidden in 1024 512 256 128
31 | # do
32 | # for num_heads in 4 2 1
33 | # do
34 | # for num_codes in 16 32 8
35 | # do
36 | #     for num_layers in 2 3 4
37 | #     do
38 | # python -u main_transductive.py \
39 | # 	--device $2 \
40 | # 	--dataset $1 \
41 | # 	--num_codes $num_codes \
42 | # 	--num_layers $num_layers \
43 | # 	--num_heads $num_heads \
44 | # 	--num_hidden $num_hidden \
45 | # 	--use_cfg 
46 | # done
47 | # done
48 | # done
49 | # done


--------------------------------------------------------------------------------