├── LICENSE ├── README.md ├── SL ├── Graph_Classification │ ├── ID_MLP_f.py │ ├── ID_MLP_s.py │ ├── README.md │ ├── configs │ │ └── LRGB-tuned │ │ │ ├── peptides-func-GCN.yaml │ │ │ └── peptides-struct-GCN.yaml │ ├── graphgps │ │ ├── __init__.py │ │ ├── act │ │ │ ├── __init__.py │ │ │ └── example.py │ │ ├── agg_runs.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── custom_gnn_config.py │ │ │ ├── dataset_config.py │ │ │ ├── defaults_config.py │ │ │ ├── example.py │ │ │ ├── graphormer_config.py │ │ │ ├── gt_config.py │ │ │ ├── optimizers_config.py │ │ │ ├── posenc_config.py │ │ │ ├── pretrained_config.py │ │ │ ├── split_config.py │ │ │ └── wandb_config.py │ │ ├── encoder │ │ │ ├── __init__.py │ │ │ ├── ast_encoder.py │ │ │ ├── composed_encoders.py │ │ │ ├── dummy_edge_encoder.py │ │ │ ├── equivstable_laplace_pos_encoder.py │ │ │ ├── example.py │ │ │ ├── graphormer_encoder.py │ │ │ ├── kernel_pos_encoder.py │ │ │ ├── laplace_pos_encoder.py │ │ │ ├── linear_edge_encoder.py │ │ │ ├── linear_node_encoder.py │ │ │ ├── ppa_encoder.py │ │ │ ├── rwse_edge_encoder.py │ │ │ ├── signnet_pos_encoder.py │ │ │ ├── type_dict_encoder.py │ │ │ └── voc_superpixels_encoder.py │ │ ├── finetuning.py │ │ ├── head │ │ │ ├── __init__.py │ │ │ ├── example.py │ │ │ ├── graphormer_graph.py │ │ │ ├── inductive_edge.py │ │ │ ├── inductive_node.py │ │ │ ├── infer_links.py │ │ │ ├── mlp_graph.py │ │ │ ├── ogb_code_graph.py │ │ │ └── san_graph.py │ │ ├── layer │ │ │ ├── __init__.py │ │ │ ├── bigbird_layer.py │ │ │ ├── example.py │ │ │ ├── gatedgcn_layer.py │ │ │ ├── gcn_conv_layer.py │ │ │ ├── gine_conv_layer.py │ │ │ ├── gps_layer.py │ │ │ ├── graphormer_layer.py │ │ │ ├── performer_layer.py │ │ │ ├── san2_layer.py │ │ │ └── san_layer.py │ │ ├── loader │ │ │ ├── __init__.py │ │ │ ├── dataset │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-39.pyc │ │ │ │ │ ├── aqsol_molecules.cpython-39.pyc │ │ │ │ │ ├── coco_superpixels.cpython-39.pyc │ │ │ │ │ ├── malnet_tiny.cpython-39.pyc │ │ │ │ │ ├── peptides_functional.cpython-39.pyc │ │ │ │ │ ├── peptides_structural.cpython-39.pyc │ │ │ │ │ └── voc_superpixels.cpython-39.pyc │ │ │ │ ├── aqsol_molecules.py │ │ │ │ ├── coco_superpixels.py │ │ │ │ ├── malnet_tiny.py │ │ │ │ ├── pcqm4mv2_contact.py │ │ │ │ ├── peptides_functional.py │ │ │ │ ├── peptides_structural.py │ │ │ │ └── voc_superpixels.py │ │ │ ├── master_loader.py │ │ │ ├── ogbg_code2_utils.py │ │ │ └── split_generator.py │ │ ├── logger.py │ │ ├── loss │ │ │ ├── __init__.py │ │ │ ├── l1.py │ │ │ ├── multilabel_classification_loss.py │ │ │ ├── subtoken_prediction_loss.py │ │ │ └── weighted_cross_entropy.py │ │ ├── metric_wrapper.py │ │ ├── metrics_ogb.py │ │ ├── network │ │ │ ├── __init__.py │ │ │ ├── big_bird.py │ │ │ ├── custom_gnn.py │ │ │ ├── example.py │ │ │ ├── gps_model.py │ │ │ ├── graphormer.py │ │ │ ├── performer.py │ │ │ ├── san_transformer.py │ │ │ └── vq.py │ │ ├── optimizer │ │ │ ├── __init__.py │ │ │ └── extra_optimizers.py │ │ ├── pooling │ │ │ ├── __init__.py │ │ │ ├── example.py │ │ │ └── graph_token.py │ │ ├── stage │ │ │ ├── __init__.py │ │ │ └── example.py │ │ ├── train │ │ │ ├── __init__.py │ │ │ ├── custom_train.py │ │ │ ├── custom_train_bechmark.py │ │ │ └── example.py │ │ ├── transform │ │ │ ├── __init__.py │ │ │ ├── posenc_stats.py │ │ │ ├── task_preprocessing.py │ │ │ └── transforms.py │ │ └── utils.py │ └── main.py ├── Link_Prediction │ ├── ID_MLP.py │ ├── ID_pretrain.py │ ├── README.md │ ├── env.yaml │ ├── model.py │ ├── ogbdataset.py │ ├── run.sh │ ├── run_citeseer.sh │ ├── run_cora.sh │ ├── run_pubmed.sh │ ├── utils.py │ └── vq.py └── Node_Classification │ ├── ID_MLP.py │ ├── README.md │ ├── cora_citeseer_pubmed_analysis │ ├── ID_MLP.py │ ├── README.md │ ├── data_utils.py │ ├── dataset.py │ ├── dataset_large.py │ ├── eval.py │ ├── logger.py │ ├── main.py │ ├── models.py │ ├── parse.py │ ├── run.sh │ └── vq.py │ ├── data │ ├── amazon-computer_split.npz │ ├── amazon-photo_split.npz │ ├── coauthor-cs_split.npz │ └── coauthor-physics_split.npz │ ├── data_utils.py │ ├── dataset.py │ ├── eval.py │ ├── large_graph │ ├── ID_MLP.py │ ├── arxiv.sh │ ├── arxiv_ID_MLP.py │ ├── data │ │ └── pokec │ │ │ └── pokec-splits.npy │ ├── lg_model.py │ ├── lg_parse.py │ ├── logger_ copy.py │ ├── logger_.py │ ├── main-arxiv.py │ ├── main-batch.py │ ├── pokec.sh │ ├── product.sh │ ├── product_ID_MLP.py │ ├── product_pre.py │ ├── protein.sh │ ├── protein_ID_MLP.py │ ├── protein_pre.py │ └── vq.py │ ├── logger.py │ ├── main.py │ ├── model.py │ ├── parse.py │ ├── run.sh │ └── vq.py └── SSL ├── DGCluster ├── README.md ├── env.yml ├── install.py ├── main.py ├── plots.py ├── plots_num_clusters.py ├── print_results.py ├── run.sh ├── utils.py └── vq.py ├── GraphCL ├── transferLearning_MoleculeNet │ ├── README.md │ ├── chem │ │ ├── batch.py │ │ ├── dataloader.py │ │ ├── finetune.py │ │ ├── finetune.sh │ │ ├── finetune_mutag_ptc.py │ │ ├── loader.py │ │ ├── model.py │ │ ├── parse_result.py │ │ ├── pretrain_contextpred.py │ │ ├── pretrain_deepgraphinfomax.py │ │ ├── pretrain_edgepred.py │ │ ├── pretrain_graphcl.py │ │ ├── pretrain_masking.py │ │ ├── pretrain_supervised.py │ │ ├── run.sh │ │ ├── splitters.py │ │ ├── util.py │ │ └── vq.py │ └── environment.yml └── unsupervised_TU │ ├── README.md │ ├── arguments.py │ ├── aug.py │ ├── cortex_DIM │ ├── configs │ │ ├── convnets.py │ │ └── resnets.py │ ├── functions │ │ ├── dim_losses.py │ │ ├── gan_losses.py │ │ └── misc.py │ └── nn_modules │ │ ├── convnet.py │ │ ├── encoder.py │ │ ├── mi_networks.py │ │ ├── misc.py │ │ └── resnet.py │ ├── deepinfomax.py │ ├── deepinfomax_v.py │ ├── evaluate_embedding.py │ ├── gin.py │ ├── go.sh │ ├── gsimclr.py │ ├── losses.py │ ├── model.py │ ├── test.py │ └── vq.py └── GraphMAE ├── README.md ├── configs.yml ├── graphmae ├── __init__.py ├── datasets │ ├── __init__.py │ └── data_util.py ├── evaluation.py ├── models │ ├── __init__.py │ ├── dot_gat.py │ ├── edcoder.py │ ├── gat.py │ ├── gcn.py │ ├── gin.py │ ├── loss_func.py │ └── vq.py └── utils.py ├── main_transductive.py └── run_transductive.sh /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Yuankai Luo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🔍 Research Series on Classic GNNs 2 | 3 | | Benchmarking Series: Reassessing Classic GNNs | Paper | 4 | | - | - | 5 | | [Classic GNNs are Strong Baselines: Reassessing GNNs for Node Classification](https://github.com/LUOyk1999/tunedGNN) (NeurIPS 2024) | [Link](https://openreview.net/forum?id=xkljKdGe4E) | 6 | | [Can Classic GNNs Be Strong Baselines for Graph-level Tasks?](https://github.com/LUOyk1999/GNNPlus) (ICML 2025) | [Link](https://arxiv.org/abs/2502.09263) | 7 | 8 | | Follow-up Studies | Paper | 9 | | - | - | 10 | | [When Dropout Meets Graph Convolutional Networks](https://github.com/LUOyk1999/dropout-theory) (ICLR 2025) | [Link](https://openreview.net/forum?id=PwxYoMvmvy) | 11 | | **_[Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning](https://github.com/LUOyk1999/NodeID) (ICLR 2025)_** | [Link](https://openreview.net/forum?id=t9lS1lX9FQ) | 12 | 13 | # Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning (ICLR 2025) 14 | 15 | [![OpenReview](https://img.shields.io/badge/OpenReview-t9lS1lX9FQ-b31b1b.svg)](https://openreview.net/forum?id=t9lS1lX9FQ) [![arXiv](https://img.shields.io/badge/arXiv-2405.16435-b31b1b.svg)](https://arxiv.org/abs/2405.16435) 16 | 17 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/structure-aware-semantic-node-identifiers-for/node-classification-on-questions)](https://paperswithcode.com/sota/node-classification-on-questions?p=structure-aware-semantic-node-identifiers-for) 18 | 19 | ## Python environment setup with Conda 20 | 21 | Tested with Python 3.7, PyTorch 1.12.1, and PyTorch Geometric 2.3.1, dgl 1.0.2. 22 | ```bash 23 | pip install pandas 24 | pip install scikit_learn 25 | pip install numpy 26 | pip install scipy 27 | pip install einops 28 | pip install ogb 29 | pip install pyyaml 30 | pip install googledrivedownloader 31 | pip install networkx 32 | pip install vqtorch 33 | pip install gdown 34 | pip install tensorboardX 35 | pip install matplotlib 36 | pip install seaborn 37 | pip install rdkit 38 | pip install tensorboard 39 | ``` 40 | 41 | ## Overview 42 | 43 | * `./SL` Experiment code of supervised Node ID. 44 | 45 | * `./SSL` Experiment code of self-supervised Node ID. 46 | 47 | ## Reference 48 | 49 | If you find our codes useful, please consider citing our work 50 | 51 | ``` 52 | @inproceedings{ 53 | luo2025node, 54 | title={Node Identifiers: Compact, Discrete Representations for Efficient Graph Learning}, 55 | author={Yuankai Luo and Hongkang Li and Qijiong Liu and Lei Shi and Xiao-Ming Wu}, 56 | booktitle={The Thirteenth International Conference on Learning Representations}, 57 | year={2025}, 58 | url={https://openreview.net/forum?id=t9lS1lX9FQ} 59 | } 60 | ``` 61 | 62 | 63 | ## Poster 64 | 65 | ![nodeid.png](https://raw.githubusercontent.com/LUOyk1999/images/refs/heads/main/images/nodeid.png) 66 | 67 | -------------------------------------------------------------------------------- /SL/Graph_Classification/README.md: -------------------------------------------------------------------------------- 1 | ## Python environment setup with Conda 2 | 3 | ```bash 4 | conda create -n graphgps python=3.10 5 | conda activate graphgps 6 | 7 | conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia 8 | pip install torch_geometric==2.3.0 9 | pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cu117.html 10 | 11 | conda install openbabel fsspec rdkit -c conda-forge 12 | 13 | pip install pytorch-lightning yacs torchmetrics 14 | pip install performer-pytorch 15 | pip install tensorboardX 16 | pip install ogb 17 | pip install wandb 18 | 19 | conda clean --all 20 | ``` 21 | 22 | 23 | ## Running Training 24 | ```bash 25 | conda activate graphgps 26 | python main.py --cfg configs/LRGB-tuned/peptides-struct-GCN.yaml wandb.use False 27 | python ID_MLP_s.py 28 | python main.py --cfg configs/LRGB-tuned/peptides-func-GCN.yaml wandb.use False 29 | python ID_MLP_f.py 30 | ``` 31 | 32 | -------------------------------------------------------------------------------- /SL/Graph_Classification/configs/LRGB-tuned/peptides-func-GCN.yaml: -------------------------------------------------------------------------------- 1 | out_dir: results 2 | metric_best: ap 3 | wandb: 4 | use: True 5 | project: peptides-func 6 | dataset: 7 | format: OGB 8 | name: peptides-functional 9 | task: graph 10 | task_type: classification_multilabel 11 | transductive: False 12 | node_encoder: True 13 | node_encoder_name: Atom+RWSE 14 | node_encoder_bn: False 15 | edge_encoder: True 16 | edge_encoder_name: Bond 17 | edge_encoder_bn: False 18 | posenc_LapPE: 19 | enable: False 20 | eigen: 21 | laplacian_norm: none 22 | eigvec_norm: L2 23 | max_freqs: 10 24 | model: DeepSet 25 | dim_pe: 16 26 | layers: 2 27 | raw_norm_type: none 28 | posenc_RWSE: 29 | enable: True 30 | kernel: 31 | times_func: range(1,21) 32 | model: Linear 33 | dim_pe: 28 34 | raw_norm_type: BatchNorm 35 | train: 36 | mode: custom 37 | batch_size: 200 38 | eval_period: 1 39 | ckpt_period: 100 40 | model: 41 | type: custom_gnn 42 | loss_fun: cross_entropy 43 | graph_pooling: mean 44 | gnn: 45 | head: mlp_graph 46 | layers_pre_mp: 0 47 | layers_mp: 6 48 | layers_post_mp: 3 49 | dim_inner: 235 50 | layer_type: gcnconv 51 | act: gelu 52 | residual: True 53 | dropout: 0.1 54 | optim: 55 | clip_grad_norm: True 56 | optimizer: adamW 57 | weight_decay: 0.0 58 | base_lr: 0.001 59 | max_epoch: 500 60 | scheduler: cosine_with_warmup 61 | num_warmup_epochs: 5 -------------------------------------------------------------------------------- /SL/Graph_Classification/configs/LRGB-tuned/peptides-struct-GCN.yaml: -------------------------------------------------------------------------------- 1 | out_dir: results 2 | metric_best: mae 3 | metric_agg: argmin 4 | wandb: 5 | use: True 6 | project: peptides-struct 7 | dataset: 8 | format: OGB 9 | name: peptides-structural 10 | task: graph 11 | task_type: regression 12 | transductive: False 13 | node_encoder: True 14 | node_encoder_name: Atom+LapPE 15 | node_encoder_bn: False 16 | edge_encoder: True 17 | edge_encoder_name: Bond 18 | edge_encoder_bn: False 19 | posenc_LapPE: 20 | enable: True 21 | eigen: 22 | laplacian_norm: none 23 | eigvec_norm: L2 24 | max_freqs: 10 25 | model: DeepSet 26 | dim_pe: 16 27 | layers: 2 28 | raw_norm_type: none 29 | posenc_RWSE: 30 | enable: False 31 | kernel: 32 | times_func: range(1,21) 33 | model: Linear 34 | dim_pe: 28 35 | raw_norm_type: BatchNorm 36 | train: 37 | mode: custom 38 | batch_size: 200 39 | eval_period: 1 40 | ckpt_period: 100 41 | model: 42 | type: custom_gnn 43 | loss_fun: l1 44 | graph_pooling: mean 45 | gnn: 46 | head: mlp_graph 47 | layers_pre_mp: 0 48 | layers_mp: 6 49 | layers_post_mp: 3 50 | dim_inner: 235 51 | layer_type: gcnconv 52 | act: gelu 53 | residual: True 54 | dropout: 0.1 55 | optim: 56 | clip_grad_norm: True 57 | optimizer: adamW 58 | weight_decay: 0.0 59 | base_lr: 0.001 60 | max_epoch: 250 61 | scheduler: cosine_with_warmup 62 | num_warmup_epochs: 5 -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/__init__.py: -------------------------------------------------------------------------------- 1 | from .act import * # noqa 2 | from .config import * # noqa 3 | from .encoder import * # noqa 4 | from .head import * # noqa 5 | from .layer import * # noqa 6 | from .loader import * # noqa 7 | from .loss import * # noqa 8 | from .network import * # noqa 9 | from .optimizer import * # noqa 10 | from .pooling import * # noqa 11 | from .stage import * # noqa 12 | from .train import * # noqa 13 | from .transform import * # noqa 14 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/act/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/act/example.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch_geometric.graphgym.config import cfg 6 | from torch_geometric.graphgym.register import register_act 7 | 8 | 9 | class SWISH(nn.Module): 10 | def __init__(self, inplace=False): 11 | super().__init__() 12 | self.inplace = inplace 13 | 14 | def forward(self, x): 15 | if self.inplace: 16 | x.mul_(torch.sigmoid(x)) 17 | return x 18 | else: 19 | return x * torch.sigmoid(x) 20 | 21 | 22 | register_act('swish', partial(SWISH, inplace=cfg.mem.inplace)) 23 | register_act('lrelu_03', partial(nn.LeakyReLU, 0.3, inplace=cfg.mem.inplace)) 24 | 25 | # Add Gaussian Error Linear Unit (GELU). 26 | register_act('gelu', nn.GELU) 27 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/custom_gnn_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | 3 | 4 | @register_config('custom_gnn') 5 | def custom_gnn_cfg(cfg): 6 | """Extending config group of GraphGym's built-in GNN for purposes of our 7 | CustomGNN network model. 8 | """ 9 | # Use residual connections between the GNN layers. 10 | cfg.gnn.residual = False 11 | cfg.gnn.heads = 4 12 | cfg.gnn.attn_dropout = 0.1 13 | 14 | cfg.gnn.use_vn = True 15 | cfg.gnn.vn_pooling = 'add' 16 | 17 | cfg.gnn.norm_type = 'layer' 18 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/dataset_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | 3 | 4 | @register_config('dataset_cfg') 5 | def dataset_cfg(cfg): 6 | """Dataset-specific config options. 7 | """ 8 | 9 | # The number of node types to expect in TypeDictNodeEncoder. 10 | cfg.dataset.node_encoder_num_types = 0 11 | 12 | # The number of edge types to expect in TypeDictEdgeEncoder. 13 | cfg.dataset.edge_encoder_num_types = 0 14 | 15 | # VOC/COCO Superpixels dataset version based on SLIC compactness parameter. 16 | cfg.dataset.slic_compactness = 10 17 | 18 | # infer-link parameters (e.g., edge prediction task) 19 | cfg.dataset.infer_link_label = "None" 20 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/defaults_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | 3 | 4 | @register_config('overwrite_defaults') 5 | def overwrite_defaults_cfg(cfg): 6 | """Overwrite the default config values that are first set by GraphGym in 7 | torch_geometric.graphgym.config.set_cfg 8 | 9 | WARNING: At the time of writing, the order in which custom config-setting 10 | functions like this one are executed is random; see the referenced `set_cfg` 11 | Therefore never reset here config options that are custom added, only change 12 | those that exist in core GraphGym. 13 | """ 14 | 15 | # Training (and validation) pipeline mode 16 | cfg.train.mode = 'custom' # 'standard' uses PyTorch-Lightning since PyG 2.1 17 | 18 | # Overwrite default dataset name 19 | cfg.dataset.name = 'none' 20 | 21 | # Overwrite default rounding precision 22 | cfg.round = 5 23 | 24 | 25 | @register_config('extended_cfg') 26 | def extended_cfg(cfg): 27 | """General extended config options. 28 | """ 29 | 30 | # Additional name tag used in `run_dir` and `wandb_name` auto generation. 31 | cfg.name_tag = "" 32 | 33 | # In training, if True (and also cfg.train.enable_ckpt is True) then 34 | # always checkpoint the current best model based on validation performance, 35 | # instead, when False, follow cfg.train.eval_period checkpointing frequency. 36 | cfg.train.ckpt_best = False 37 | 38 | cfg.train.eval_smoothing_metrics = False 39 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/example.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('example') 6 | def set_cfg_example(cfg): 7 | r''' 8 | This function sets the default config value for customized options 9 | :return: customized configuration use by the experiment. 10 | ''' 11 | 12 | # ----------------------------------------------------------------------- # 13 | # Customized options 14 | # ----------------------------------------------------------------------- # 15 | 16 | # example argument 17 | cfg.example_arg = 'example' 18 | 19 | # example argument group 20 | cfg.example_group = CN() 21 | 22 | # then argument can be specified within the group 23 | cfg.example_group.example_arg = 'example' 24 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/graphormer_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('cfg_graphormer') 6 | def set_cfg_gt(cfg): 7 | cfg.graphormer = CN() 8 | cfg.graphormer.num_layers = 6 9 | cfg.graphormer.embed_dim = 80 10 | cfg.graphormer.num_heads = 4 11 | cfg.graphormer.dropout = 0.0 12 | cfg.graphormer.attention_dropout = 0.0 13 | cfg.graphormer.mlp_dropout = 0.0 14 | cfg.graphormer.input_dropout = 0.0 15 | cfg.graphormer.use_graph_token = True 16 | 17 | cfg.posenc_GraphormerBias = CN() 18 | cfg.posenc_GraphormerBias.enable = False 19 | cfg.posenc_GraphormerBias.node_degrees_only = False 20 | cfg.posenc_GraphormerBias.dim_pe = 0 21 | cfg.posenc_GraphormerBias.num_spatial_types = None 22 | cfg.posenc_GraphormerBias.num_in_degrees = None 23 | cfg.posenc_GraphormerBias.num_out_degrees = None 24 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/gt_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('cfg_gt') 6 | def set_cfg_gt(cfg): 7 | """Configuration for Graph Transformer-style models, e.g.: 8 | - Spectral Attention Network (SAN) Graph Transformer. 9 | - "vanilla" Transformer / Performer. 10 | - General Powerful Scalable (GPS) Model. 11 | """ 12 | 13 | # Positional encodings argument group 14 | cfg.gt = CN() 15 | 16 | # Type of Graph Transformer layer to use 17 | cfg.gt.layer_type = 'SANLayer' 18 | 19 | # Number of Transformer layers in the model 20 | cfg.gt.layers = 3 21 | 22 | # Number of attention heads in the Graph Transformer 23 | cfg.gt.n_heads = 8 24 | 25 | # Size of the hidden node and edge representation 26 | cfg.gt.dim_hidden = 64 27 | 28 | # Full attention SAN transformer including all possible pairwise edges 29 | cfg.gt.full_graph = True 30 | 31 | # SAN real vs fake edge attention weighting coefficient 32 | cfg.gt.gamma = 1e-5 33 | 34 | # Histogram of in-degrees of nodes in the training set used by PNAConv. 35 | # Used when `gt.layer_type: PNAConv+...`. If empty it is precomputed during 36 | # the dataset loading process. 37 | cfg.gt.pna_degrees = [] 38 | 39 | # Dropout in feed-forward module. 40 | cfg.gt.dropout = 0.0 41 | 42 | # Dropout in self-attention. 43 | cfg.gt.attn_dropout = 0.0 44 | 45 | cfg.gt.layer_norm = False 46 | 47 | cfg.gt.batch_norm = True 48 | 49 | cfg.gt.residual = True 50 | 51 | # BigBird model/GPS-BigBird layer. 52 | cfg.gt.bigbird = CN() 53 | 54 | cfg.gt.bigbird.attention_type = "block_sparse" 55 | 56 | cfg.gt.bigbird.chunk_size_feed_forward = 0 57 | 58 | cfg.gt.bigbird.is_decoder = False 59 | 60 | cfg.gt.bigbird.add_cross_attention = False 61 | 62 | cfg.gt.bigbird.hidden_act = "relu" 63 | 64 | cfg.gt.bigbird.max_position_embeddings = 128 65 | 66 | cfg.gt.bigbird.use_bias = False 67 | 68 | cfg.gt.bigbird.num_random_blocks = 3 69 | 70 | cfg.gt.bigbird.block_size = 3 71 | 72 | cfg.gt.bigbird.layer_norm_eps = 1e-6 73 | 74 | cfg.gt.vn_pooling = 'mean' 75 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/optimizers_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | 3 | 4 | @register_config('extended_optim') 5 | def extended_optim_cfg(cfg): 6 | """Extend optimizer config group that is first set by GraphGym in 7 | torch_geometric.graphgym.config.set_cfg 8 | """ 9 | 10 | # Number of batches to accumulate gradients over before updating parameters 11 | # Requires `custom` training loop, set `train.mode: custom` 12 | cfg.optim.batch_accumulation = 1 13 | 14 | # ReduceLROnPlateau: Factor by which the learning rate will be reduced 15 | cfg.optim.reduce_factor = 0.1 16 | 17 | # ReduceLROnPlateau: #epochs without improvement after which LR gets reduced 18 | cfg.optim.schedule_patience = 10 19 | 20 | # ReduceLROnPlateau: Lower bound on the learning rate 21 | cfg.optim.min_lr = 0.0 22 | 23 | # For schedulers with warm-up phase, set the warm-up number of epochs 24 | cfg.optim.num_warmup_epochs = 50 25 | 26 | # Clip gradient norms while training 27 | cfg.optim.clip_grad_norm = False 28 | cfg.optim.clip_grad_norm_value = 1.0 29 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/posenc_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('posenc') 6 | def set_cfg_posenc(cfg): 7 | """Extend configuration with positional encoding options. 8 | """ 9 | 10 | # Argument group for each Positional Encoding class. 11 | cfg.posenc_LapPE = CN() 12 | cfg.posenc_SignNet = CN() 13 | cfg.posenc_RWSE = CN() 14 | cfg.posenc_RWSEEdge = CN() 15 | cfg.posenc_HKdiagSE = CN() 16 | cfg.posenc_ElstaticSE = CN() 17 | cfg.posenc_EquivStableLapPE = CN() 18 | 19 | # Common arguments to all PE types. 20 | for name in ['posenc_LapPE', 'posenc_SignNet', 21 | 'posenc_RWSE', 'posenc_RWSEEdge', 'posenc_HKdiagSE', 'posenc_ElstaticSE']: 22 | pecfg = getattr(cfg, name) 23 | 24 | # Use extended positional encodings 25 | pecfg.enable = False 26 | 27 | # Neural-net model type within the PE encoder: 28 | # 'DeepSet', 'Transformer', 'Linear', 'none', ... 29 | pecfg.model = 'none' 30 | 31 | # Size of Positional Encoding embedding 32 | pecfg.dim_pe = 16 33 | 34 | # Number of layers in PE encoder model 35 | pecfg.layers = 3 36 | 37 | # Number of attention heads in PE encoder when model == 'Transformer' 38 | pecfg.n_heads = 4 39 | 40 | # Number of layers to apply in LapPE encoder post its pooling stage 41 | pecfg.post_layers = 0 42 | 43 | # Choice of normalization applied to raw PE stats: 'none', 'BatchNorm' 44 | pecfg.raw_norm_type = 'none' 45 | 46 | # In addition to appending PE to the node features, pass them also as 47 | # a separate variable in the PyG graph batch object. 48 | pecfg.pass_as_var = False 49 | 50 | # Config for EquivStable LapPE 51 | cfg.posenc_EquivStableLapPE.enable = False 52 | cfg.posenc_EquivStableLapPE.raw_norm_type = 'none' 53 | 54 | # Config for Laplacian Eigen-decomposition for PEs that use it. 55 | for name in ['posenc_LapPE', 'posenc_SignNet', 'posenc_EquivStableLapPE']: 56 | pecfg = getattr(cfg, name) 57 | pecfg.eigen = CN() 58 | 59 | # The normalization scheme for the graph Laplacian: 'none', 'sym', or 'rw' 60 | pecfg.eigen.laplacian_norm = 'sym' 61 | 62 | # The normalization scheme for the eigen vectors of the Laplacian 63 | pecfg.eigen.eigvec_norm = 'L2' 64 | 65 | # Maximum number of top smallest frequencies & eigenvectors to use 66 | pecfg.eigen.max_freqs = 10 67 | 68 | # Config for SignNet-specific options. 69 | cfg.posenc_SignNet.phi_out_dim = 4 70 | cfg.posenc_SignNet.phi_hidden_dim = 64 71 | 72 | for name in ['posenc_RWSE', 'posenc_RWSEEdge', 'posenc_HKdiagSE', 'posenc_ElstaticSE']: 73 | pecfg = getattr(cfg, name) 74 | 75 | # Config for Kernel-based PE specific options. 76 | pecfg.kernel = CN() 77 | 78 | # List of times to compute the heat kernel for (the time is equivalent to 79 | # the variance of the kernel) / the number of steps for random walk kernel 80 | # Can be overridden by `posenc.kernel.times_func` 81 | pecfg.kernel.times = [] 82 | 83 | # Python snippet to generate `posenc.kernel.times`, e.g. 'range(1, 17)' 84 | # If set, it will be executed via `eval()` and override posenc.kernel.times 85 | pecfg.kernel.times_func = '' 86 | 87 | # Override default, electrostatic kernel has fixed set of 10 measures. 88 | cfg.posenc_ElstaticSE.kernel.times_func = 'range(10)' 89 | 90 | cfg.posenc_RWSEEdge.num_global = 2 91 | cfg.posenc_RWSEEdge.global_edge_dropout = 0.2 92 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/pretrained_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('cfg_pretrained') 6 | def set_cfg_pretrained(cfg): 7 | """Configuration options for loading a pretrained model. 8 | """ 9 | 10 | cfg.pretrained = CN() 11 | 12 | # Directory path to a saved experiment, if set, load the model from there 13 | # and fine-tune / run inference with it on a specified dataset. 14 | cfg.pretrained.dir = "" 15 | 16 | # Discard pretrained weights of the prediction head and reinitialize. 17 | cfg.pretrained.reset_prediction_head = True 18 | 19 | # Freeze the main pretrained 'body' of the model, learning only the new head 20 | cfg.pretrained.freeze_main = False 21 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/split_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | 3 | 4 | @register_config('split') 5 | def set_cfg_split(cfg): 6 | """Reconfigure the default config value for dataset split options. 7 | 8 | Returns: 9 | Reconfigured split configuration use by the experiment. 10 | """ 11 | 12 | # Default to selecting the standard split that ships with the dataset 13 | cfg.dataset.split_mode = 'standard' 14 | 15 | # Choose a particular split to use if multiple splits are available 16 | cfg.dataset.split_index = 0 17 | 18 | # Dir to cache cross-validation splits 19 | cfg.dataset.split_dir = './splits' 20 | 21 | # Choose to run multiple splits in one program execution, if set, 22 | # takes the precedence over cfg.dataset.split_index for split selection 23 | cfg.run_multiple_splits = [] 24 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/config/wandb_config.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_config 2 | from yacs.config import CfgNode as CN 3 | 4 | 5 | @register_config('cfg_wandb') 6 | def set_cfg_wandb(cfg): 7 | """Weights & Biases tracker configuration. 8 | """ 9 | 10 | # WandB group 11 | cfg.wandb = CN() 12 | 13 | # Use wandb or not 14 | cfg.wandb.use = True 15 | 16 | # Wandb entity name, should exist beforehand 17 | cfg.wandb.entity = "add-your-wandb-here" 18 | 19 | # Wandb project name, will be created in your team if doesn't exist already 20 | cfg.wandb.project = "gtblueprint" 21 | 22 | # Optional run name 23 | cfg.wandb.name = "" 24 | 25 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/ast_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.register import (register_node_encoder, 3 | register_edge_encoder) 4 | 5 | """ 6 | === Description of the ogbg-code2 dataset === 7 | 8 | * Node Encoder code based on OGB's: 9 | https://github.com/snap-stanford/ogb/blob/master/examples/graphproppred/code2/utils.py 10 | 11 | Node Encoder config parameters are set based on the OGB example: 12 | https://github.com/snap-stanford/ogb/blob/master/examples/graphproppred/code2/main_pyg.py 13 | where the following three node features are used: 14 | 1. node type 15 | 2. node attribute 16 | 3. node depth 17 | 18 | nodetypes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'typeidx2type.csv.gz')) 19 | nodeattributes_mapping = pd.read_csv(os.path.join(dataset.root, 'mapping', 'attridx2attr.csv.gz')) 20 | num_nodetypes = len(nodetypes_mapping['type']) 21 | num_nodeattributes = len(nodeattributes_mapping['attr']) 22 | max_depth = 20 23 | 24 | * Edge attributes are generated by `augment_edge` function dynamically: 25 | edge_attr[:,0]: whether it is AST edge (0) for next-token edge (1) 26 | edge_attr[:,1]: whether it is original direction (0) or inverse direction (1) 27 | """ 28 | 29 | num_nodetypes = 98 30 | num_nodeattributes = 10030 31 | max_depth = 20 32 | 33 | 34 | @register_node_encoder('ASTNode') 35 | class ASTNodeEncoder(torch.nn.Module): 36 | """The Abstract Syntax Tree (AST) Node Encoder used for ogbg-code2 dataset. 37 | 38 | Input: 39 | x: Default node feature. The first and second column represents node 40 | type and node attributes. 41 | node_depth: The depth of the node in the AST. 42 | Output: 43 | emb_dim-dimensional vector 44 | """ 45 | 46 | def __init__(self, emb_dim): 47 | super().__init__() 48 | self.max_depth = max_depth 49 | 50 | self.type_encoder = torch.nn.Embedding(num_nodetypes, emb_dim) 51 | self.attribute_encoder = torch.nn.Embedding(num_nodeattributes, emb_dim) 52 | self.depth_encoder = torch.nn.Embedding(self.max_depth + 1, emb_dim) 53 | 54 | def forward(self, batch): 55 | x = batch.x 56 | depth = batch.node_depth.view(-1, ) 57 | depth[depth > self.max_depth] = self.max_depth 58 | batch.x = self.type_encoder(x[:, 0]) + self.attribute_encoder(x[:, 1]) \ 59 | + self.depth_encoder(depth) 60 | return batch 61 | 62 | 63 | @register_edge_encoder('ASTEdge') 64 | class ASTEdgeEncoder(torch.nn.Module): 65 | """The Abstract Syntax Tree (AST) Edge Encoder used for ogbg-code2 dataset. 66 | 67 | Edge attributes are generated by `augment_edge` function dynamically and 68 | are expected to be: 69 | edge_attr[:,0]: whether it is AST edge (0) for next-token edge (1) 70 | edge_attr[:,1]: whether it is original direction (0) or inverse direction (1) 71 | 72 | Args: 73 | emb_dim (int): Output edge embedding dimension 74 | """ 75 | 76 | def __init__(self, emb_dim): 77 | super().__init__() 78 | self.embedding_type = torch.nn.Embedding(2, emb_dim) 79 | self.embedding_direction = torch.nn.Embedding(2, emb_dim) 80 | 81 | def forward(self, batch): 82 | embedding = self.embedding_type(batch.edge_attr[:, 0]) + \ 83 | self.embedding_direction(batch.edge_attr[:, 1]) 84 | batch.edge_attr = embedding 85 | return batch 86 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/dummy_edge_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.register import register_edge_encoder 3 | 4 | 5 | @register_edge_encoder('DummyEdge') 6 | class DummyEdgeEncoder(torch.nn.Module): 7 | def __init__(self, emb_dim): 8 | super().__init__() 9 | 10 | self.encoder = torch.nn.Embedding(num_embeddings=1, 11 | embedding_dim=emb_dim) 12 | # torch.nn.init.xavier_uniform_(self.encoder.weight.data) 13 | 14 | def forward(self, batch): 15 | dummy_attr = batch.edge_index.new_zeros(batch.edge_index.shape[1]) 16 | batch.edge_attr = self.encoder(dummy_attr) 17 | return batch 18 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/equivstable_laplace_pos_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.register import register_node_encoder 5 | 6 | 7 | @register_node_encoder('EquivStableLapPE') 8 | class EquivStableLapPENodeEncoder(torch.nn.Module): 9 | """Equivariant and Stable Laplace Positional Embedding node encoder. 10 | 11 | This encoder simply transforms the k-dim node LapPE to d-dim to be 12 | later used at the local GNN module as edge weights. 13 | Based on the approach proposed in paper https://openreview.net/pdf?id=e95i1IHcWj 14 | 15 | Args: 16 | dim_emb: Size of final node embedding 17 | """ 18 | 19 | def __init__(self, dim_emb): 20 | super().__init__() 21 | 22 | pecfg = cfg.posenc_EquivStableLapPE 23 | max_freqs = pecfg.eigen.max_freqs # Num. eigenvectors (frequencies) 24 | norm_type = pecfg.raw_norm_type.lower() # Raw PE normalization layer type 25 | 26 | if norm_type == 'batchnorm': 27 | self.raw_norm = nn.BatchNorm1d(max_freqs) 28 | else: 29 | self.raw_norm = None 30 | 31 | self.linear_encoder_eigenvec = nn.Linear(max_freqs, dim_emb) 32 | 33 | def forward(self, batch): 34 | if not (hasattr(batch, 'EigVals') and hasattr(batch, 'EigVecs')): 35 | raise ValueError("Precomputed eigen values and vectors are " 36 | f"required for {self.__class__.__name__}; set " 37 | f"config 'posenc_EquivStableLapPE.enable' to True") 38 | pos_enc = batch.EigVecs 39 | 40 | empty_mask = torch.isnan(pos_enc) # (Num nodes) x (Num Eigenvectors) 41 | pos_enc[empty_mask] = 0. # (Num nodes) x (Num Eigenvectors) 42 | 43 | if self.raw_norm: 44 | pos_enc = self.raw_norm(pos_enc) 45 | 46 | pos_enc = self.linear_encoder_eigenvec(pos_enc) 47 | 48 | # Keep PE separate in a variable 49 | batch.pe_EquivStableLapPE = pos_enc 50 | 51 | return batch 52 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from ogb.utils.features import get_bond_feature_dims 3 | 4 | from torch_geometric.graphgym.register import ( 5 | register_edge_encoder, 6 | register_node_encoder, 7 | ) 8 | 9 | 10 | @register_node_encoder('example') 11 | class ExampleNodeEncoder(torch.nn.Module): 12 | """ 13 | Provides an encoder for integer node features 14 | Parameters: 15 | num_classes - the number of classes for the embedding mapping to learn 16 | """ 17 | def __init__(self, emb_dim, num_classes=None): 18 | super().__init__() 19 | 20 | self.encoder = torch.nn.Embedding(num_classes, emb_dim) 21 | torch.nn.init.xavier_uniform_(self.encoder.weight.data) 22 | 23 | def forward(self, batch): 24 | # Encode just the first dimension if more exist 25 | batch.x = self.encoder(batch.x[:, 0]) 26 | 27 | return batch 28 | 29 | 30 | @register_edge_encoder('example') 31 | class ExampleEdgeEncoder(torch.nn.Module): 32 | def __init__(self, emb_dim): 33 | super().__init__() 34 | 35 | self.bond_embedding_list = torch.nn.ModuleList() 36 | full_bond_feature_dims = get_bond_feature_dims() 37 | 38 | for i, dim in enumerate(full_bond_feature_dims): 39 | emb = torch.nn.Embedding(dim, emb_dim) 40 | torch.nn.init.xavier_uniform_(emb.weight.data) 41 | self.bond_embedding_list.append(emb) 42 | 43 | def forward(self, batch): 44 | bond_embedding = 0 45 | for i in range(batch.edge_feature.shape[1]): 46 | bond_embedding += \ 47 | self.bond_embedding_list[i](batch.edge_attr[:, i]) 48 | 49 | batch.edge_attr = bond_embedding 50 | return batch 51 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/linear_edge_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym import cfg 3 | from torch_geometric.graphgym.register import register_edge_encoder 4 | 5 | 6 | @register_edge_encoder('LinearEdge') 7 | class LinearEdgeEncoder(torch.nn.Module): 8 | def __init__(self, emb_dim): 9 | super().__init__() 10 | if cfg.dataset.name in ['MNIST', 'CIFAR10']: 11 | self.in_dim = 1 12 | else: 13 | raise ValueError("Input edge feature dim is required to be hardset " 14 | "or refactored to use a cfg option.") 15 | self.encoder = torch.nn.Linear(self.in_dim, emb_dim) 16 | 17 | def forward(self, batch): 18 | batch.edge_attr = self.encoder(batch.edge_attr.view(-1, self.in_dim)) 19 | return batch 20 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/linear_node_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym import cfg 3 | from torch_geometric.graphgym.register import register_node_encoder 4 | 5 | 6 | @register_node_encoder('LinearNode') 7 | class LinearNodeEncoder(torch.nn.Module): 8 | def __init__(self, emb_dim): 9 | super().__init__() 10 | 11 | self.encoder = torch.nn.Linear(cfg.share.dim_in, emb_dim) 12 | 13 | def forward(self, batch): 14 | batch.x = self.encoder(batch.x) 15 | return batch 16 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/ppa_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.register import (register_node_encoder, 3 | register_edge_encoder) 4 | 5 | 6 | @register_node_encoder('PPANode') 7 | class PPANodeEncoder(torch.nn.Module): 8 | """ 9 | Uniform input node embedding for PPA that has no node features. 10 | """ 11 | 12 | def __init__(self, emb_dim): 13 | super().__init__() 14 | self.encoder = torch.nn.Embedding(1, emb_dim) 15 | 16 | def forward(self, batch): 17 | batch.x = self.encoder(batch.x) 18 | return batch 19 | 20 | 21 | @register_edge_encoder('PPAEdge') 22 | class PPAEdgeEncoder(torch.nn.Module): 23 | def __init__(self, emb_dim): 24 | super().__init__() 25 | self.encoder = torch.nn.Linear(7, emb_dim) 26 | 27 | def forward(self, batch): 28 | batch.edge_attr = self.encoder(batch.edge_attr) 29 | return batch 30 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/rwse_edge_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.register import (register_edge_encoder, 5 | act_dict) 6 | 7 | 8 | @register_edge_encoder('RWSEEdge') 9 | class RWSEEdgeEncoder(torch.nn.Module): 10 | def __init__(self, emb_dim): 11 | super().__init__() 12 | pe_dim = len(cfg.posenc_RWSEEdge.kernel.times) + 1 13 | self.pe_dim = pe_dim 14 | self.emb_dim = emb_dim 15 | 16 | self.global_edge_dropout = cfg.posenc_RWSEEdge.global_edge_dropout 17 | 18 | self.pe_encoder = nn.Sequential( 19 | nn.BatchNorm1d(pe_dim), 20 | nn.Linear(pe_dim, emb_dim), 21 | act_dict[cfg.gnn.act](), 22 | nn.Linear(emb_dim, emb_dim), 23 | nn.BatchNorm1d(emb_dim), 24 | ) 25 | 26 | def forward(self, batch): 27 | pe_enc = torch.cat([batch.pestat_RWSEEdge, batch.pestat_RWSESelf], dim=0) 28 | 29 | self_loops = torch.arange(batch.num_nodes, device=pe_enc.device).view(1, -1).tile(2, 1) 30 | edge_index = torch.cat([batch.edge_index, self_loops], dim=1) 31 | 32 | if 'pestat_RWSEGlobal' in batch: 33 | global_enc = batch.pestat_RWSEGlobal 34 | global_edge_index = batch.global_edge_index 35 | 36 | if self.training: 37 | dropout_mask = torch.rand((global_enc.shape[0],), device=global_enc.device) > self.global_edge_dropout 38 | global_enc = global_enc[dropout_mask] 39 | global_edge_index = global_edge_index[:, dropout_mask] 40 | 41 | pe_enc = torch.cat([pe_enc, global_enc], dim=0) 42 | edge_index = torch.cat([edge_index, global_edge_index], dim=1) 43 | 44 | pe_enc = self.pe_encoder(pe_enc) 45 | 46 | edge_attr = pe_enc 47 | if batch.edge_attr is not None: 48 | edge_attr[:batch.num_edges] += batch.edge_attr 49 | 50 | batch.edge_index = edge_index 51 | batch.edge_attr = edge_attr 52 | return batch 53 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/type_dict_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.register import (register_node_encoder, 4 | register_edge_encoder) 5 | 6 | """ 7 | Generic Node and Edge encoders for datasets with node/edge features that 8 | consist of only one type dictionary thus require a single nn.Embedding layer. 9 | 10 | The number of possible Node and Edge types must be set by cfg options: 11 | 1) cfg.dataset.node_encoder_num_types 12 | 2) cfg.dataset.edge_encoder_num_types 13 | 14 | In case of a more complex feature set, use a data-specific encoder. 15 | 16 | These generic encoders can be used e.g. for: 17 | * ZINC 18 | cfg.dataset.node_encoder_num_types: 28 19 | cfg.dataset.edge_encoder_num_types: 4 20 | 21 | * AQSOL 22 | cfg.dataset.node_encoder_num_types: 65 23 | cfg.dataset.edge_encoder_num_types: 5 24 | 25 | 26 | === Description of the ZINC dataset === 27 | https://github.com/graphdeeplearning/benchmarking-gnns/issues/42 28 | The node labels are atom types and the edge labels atom bond types. 29 | 30 | Node labels: 31 | 'C': 0 32 | 'O': 1 33 | 'N': 2 34 | 'F': 3 35 | 'C H1': 4 36 | 'S': 5 37 | 'Cl': 6 38 | 'O -': 7 39 | 'N H1 +': 8 40 | 'Br': 9 41 | 'N H3 +': 10 42 | 'N H2 +': 11 43 | 'N +': 12 44 | 'N -': 13 45 | 'S -': 14 46 | 'I': 15 47 | 'P': 16 48 | 'O H1 +': 17 49 | 'N H1 -': 18 50 | 'O +': 19 51 | 'S +': 20 52 | 'P H1': 21 53 | 'P H2': 22 54 | 'C H2 -': 23 55 | 'P +': 24 56 | 'S H1 +': 25 57 | 'C H1 -': 26 58 | 'P H1 +': 27 59 | 60 | Edge labels: 61 | 'NONE': 0 62 | 'SINGLE': 1 63 | 'DOUBLE': 2 64 | 'TRIPLE': 3 65 | 66 | 67 | === Description of the AQSOL dataset === 68 | Node labels: 69 | 'Br': 0, 'C': 1, 'N': 2, 'O': 3, 'Cl': 4, 'Zn': 5, 'F': 6, 'P': 7, 'S': 8, 'Na': 9, 'Al': 10, 70 | 'Si': 11, 'Mo': 12, 'Ca': 13, 'W': 14, 'Pb': 15, 'B': 16, 'V': 17, 'Co': 18, 'Mg': 19, 'Bi': 20, 'Fe': 21, 71 | 'Ba': 22, 'K': 23, 'Ti': 24, 'Sn': 25, 'Cd': 26, 'I': 27, 'Re': 28, 'Sr': 29, 'H': 30, 'Cu': 31, 'Ni': 32, 72 | 'Lu': 33, 'Pr': 34, 'Te': 35, 'Ce': 36, 'Nd': 37, 'Gd': 38, 'Zr': 39, 'Mn': 40, 'As': 41, 'Hg': 42, 'Sb': 73 | 43, 'Cr': 44, 'Se': 45, 'La': 46, 'Dy': 47, 'Y': 48, 'Pd': 49, 'Ag': 50, 'In': 51, 'Li': 52, 'Rh': 53, 74 | 'Nb': 54, 'Hf': 55, 'Cs': 56, 'Ru': 57, 'Au': 58, 'Sm': 59, 'Ta': 60, 'Pt': 61, 'Ir': 62, 'Be': 63, 'Ge': 64 75 | 76 | Edge labels: 77 | 'NONE': 0, 'SINGLE': 1, 'DOUBLE': 2, 'AROMATIC': 3, 'TRIPLE': 4 78 | """ 79 | 80 | 81 | @register_node_encoder('TypeDictNode') 82 | class TypeDictNodeEncoder(torch.nn.Module): 83 | def __init__(self, emb_dim): 84 | super().__init__() 85 | 86 | num_types = cfg.dataset.node_encoder_num_types 87 | if num_types < 1: 88 | raise ValueError(f"Invalid 'node_encoder_num_types': {num_types}") 89 | 90 | self.encoder = torch.nn.Embedding(num_embeddings=num_types, 91 | embedding_dim=emb_dim) 92 | # torch.nn.init.xavier_uniform_(self.encoder.weight.data) 93 | 94 | def forward(self, batch): 95 | # Encode just the first dimension if more exist 96 | batch.x = self.encoder(batch.x[:, 0]) 97 | 98 | return batch 99 | 100 | 101 | @register_edge_encoder('TypeDictEdge') 102 | class TypeDictEdgeEncoder(torch.nn.Module): 103 | def __init__(self, emb_dim): 104 | super().__init__() 105 | 106 | num_types = cfg.dataset.edge_encoder_num_types 107 | if num_types < 1: 108 | raise ValueError(f"Invalid 'edge_encoder_num_types': {num_types}") 109 | 110 | self.encoder = torch.nn.Embedding(num_embeddings=num_types, 111 | embedding_dim=emb_dim) 112 | # torch.nn.init.xavier_uniform_(self.encoder.weight.data) 113 | 114 | def forward(self, batch): 115 | batch.edge_attr = self.encoder(batch.edge_attr) 116 | return batch 117 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/encoder/voc_superpixels_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.register import (register_node_encoder, 4 | register_edge_encoder) 5 | 6 | """ 7 | === Description of the VOCSuperpixels dataset === 8 | Each graph is a tuple (x, edge_attr, edge_index, y) 9 | Shape of x : [num_nodes, 14] 10 | Shape of edge_attr : [num_edges, 1] or [num_edges, 2] 11 | Shape of edge_index : [2, num_edges] 12 | Shape of y : [num_nodes] 13 | """ 14 | 15 | VOC_node_input_dim = 14 16 | # VOC_edge_input_dim = 1 or 2; defined in class VOCEdgeEncoder 17 | 18 | 19 | @register_node_encoder('VOCNode') 20 | class VOCNodeEncoder(torch.nn.Module): 21 | def __init__(self, emb_dim): 22 | super().__init__() 23 | 24 | node_x_mean = torch.tensor([ 25 | 4.5824501e-01, 4.3857411e-01, 4.0561178e-01, 6.7938097e-02, 26 | 6.5604292e-02, 6.5742709e-02, 6.5212941e-01, 6.2894762e-01, 27 | 6.0173863e-01, 2.7769071e-01, 2.6425251e-01, 2.3729359e-01, 28 | 1.9344997e+02, 2.3472206e+02 29 | ]) 30 | node_x_std = torch.tensor([ 31 | 2.5952947e-01, 2.5716761e-01, 2.7130592e-01, 5.4822665e-02, 32 | 5.4429270e-02, 5.4474957e-02, 2.6238337e-01, 2.6600540e-01, 33 | 2.7750680e-01, 2.5197381e-01, 2.4986187e-01, 2.6069802e-01, 34 | 1.1768297e+02, 1.4007195e+02 35 | ]) 36 | self.register_buffer('node_x_mean', node_x_mean) 37 | self.register_buffer('node_x_std', node_x_std) 38 | self.encoder = torch.nn.Linear(VOC_node_input_dim, emb_dim) 39 | 40 | def forward(self, batch): 41 | x = batch.x - self.node_x_mean.view(1, -1) 42 | x /= self.node_x_std.view(1, -1) 43 | batch.x = self.encoder(x) 44 | return batch 45 | 46 | 47 | @register_edge_encoder('VOCEdge') 48 | class VOCEdgeEncoder(torch.nn.Module): 49 | def __init__(self, emb_dim): 50 | super().__init__() 51 | edge_x_mean = torch.tensor([0.07640745, 33.73478]) 52 | edge_x_std = torch.tensor([0.0868775, 20.945076]) 53 | self.register_buffer('edge_x_mean', edge_x_mean) 54 | self.register_buffer('edge_x_std', edge_x_std) 55 | 56 | VOC_edge_input_dim = 2 if cfg.dataset.name == 'edge_wt_region_boundary' else 1 57 | self.encoder = torch.nn.Linear(VOC_edge_input_dim, emb_dim) 58 | # torch.nn.init.xavier_uniform_(self.encoder.weight.data) 59 | 60 | def forward(self, batch): 61 | x = batch.edge_attr - self.edge_x_mean.view(1, -1) 62 | x /= self.edge_x_std.view(1, -1) 63 | batch.edge_attr = self.encoder(x) 64 | return batch 65 | 66 | 67 | @register_node_encoder('COCONode') 68 | class COCONodeEncoder(torch.nn.Module): 69 | def __init__(self, emb_dim): 70 | super().__init__() 71 | 72 | node_x_mean = torch.tensor([ 73 | 4.6977347e-01, 4.4679317e-01, 4.0790915e-01, 7.0808627e-02, 74 | 6.8686441e-02, 6.8498217e-02, 6.7777938e-01, 6.5244222e-01, 75 | 6.2096798e-01, 2.7554795e-01, 2.5910738e-01, 2.2901227e-01, 76 | 2.4261935e+02, 2.8985367e+02 77 | ]) 78 | node_x_std = torch.tensor([ 79 | 2.6218116e-01, 2.5831082e-01, 2.7416739e-01, 5.7440419e-02, 80 | 5.6832556e-02, 5.7100497e-02, 2.5929087e-01, 2.6201612e-01, 81 | 2.7675411e-01, 2.5456995e-01, 2.5140920e-01, 2.6182330e-01, 82 | 1.5152475e+02, 1.7630779e+02 83 | ]) 84 | 85 | self.register_buffer('node_x_mean', node_x_mean) 86 | self.register_buffer('node_x_std', node_x_std) 87 | self.encoder = torch.nn.Linear(VOC_node_input_dim, emb_dim) 88 | 89 | def forward(self, batch): 90 | x = batch.x - self.node_x_mean.view(1, -1) 91 | x /= self.node_x_std.view(1, -1) 92 | batch.x = self.encoder(x) 93 | return batch 94 | 95 | 96 | @register_edge_encoder('COCOEdge') 97 | class COCOEdgeEncoder(torch.nn.Module): 98 | def __init__(self, emb_dim): 99 | super().__init__() 100 | edge_x_mean = torch.tensor([0.07848548, 43.68736]) 101 | edge_x_std = torch.tensor([0.08902349, 28.473562]) 102 | self.register_buffer('edge_x_mean', edge_x_mean) 103 | self.register_buffer('edge_x_std', edge_x_std) 104 | VOC_edge_input_dim = 2 if cfg.dataset.name == 'edge_wt_region_boundary' else 1 105 | self.encoder = torch.nn.Linear(VOC_edge_input_dim, emb_dim) 106 | 107 | def forward(self, batch): 108 | x = batch.edge_attr - self.edge_x_mean.view(1, -1) 109 | x /= self.edge_x_std.view(1, -1) 110 | batch.edge_attr = self.encoder(x) 111 | return batch -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/example.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from torch_geometric.graphgym.register import register_head 4 | 5 | 6 | @register_head('head') 7 | class ExampleNodeHead(nn.Module): 8 | '''Head of GNN, node prediction''' 9 | def __init__(self, dim_in, dim_out): 10 | super().__init__() 11 | self.layer_post_mp = nn.Linear(dim_in, dim_out, bias=True) 12 | 13 | def _apply_index(self, batch): 14 | if batch.node_label_index.shape[0] == batch.node_label.shape[0]: 15 | return batch.x[batch.node_label_index], batch.node_label 16 | else: 17 | return batch.x[batch.node_label_index], \ 18 | batch.node_label[batch.node_label_index] 19 | 20 | def forward(self, batch): 21 | batch = self.layer_post_mp(batch) 22 | pred, label = self._apply_index(batch) 23 | return pred, label 24 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/graphormer_graph.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | import torch_geometric.graphgym.register as register 4 | from torch_geometric.graphgym import cfg 5 | from torch_geometric.graphgym.register import register_head 6 | 7 | 8 | @register_head('graphormer_graph') 9 | class GraphormerHead(torch.nn.Module): 10 | """ 11 | Graphormer prediction head for graph prediction tasks. 12 | 13 | Args: 14 | dim_in (int): Input dimension. 15 | dim_out (int): Output dimension. For binary prediction, dim_out=1. 16 | """ 17 | 18 | def __init__(self, dim_in, dim_out): 19 | super().__init__() 20 | print(f"Initializing {cfg.model.graph_pooling} pooling function") 21 | self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling] 22 | 23 | self.ln = torch.nn.LayerNorm(dim_in) 24 | self.layers = torch.nn.Sequential( 25 | torch.nn.Linear(dim_in, dim_out) 26 | ) 27 | 28 | def _apply_index(self, batch): 29 | return batch.graph_feature, batch.y 30 | 31 | def forward(self, batch): 32 | x = self.ln(batch.x) 33 | graph_emb = self.pooling_fun(x, batch.batch) 34 | graph_emb = self.layers(graph_emb) 35 | batch.graph_feature = graph_emb 36 | pred, label = self._apply_index(batch) 37 | return pred, label 38 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/inductive_node.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.models.layer import new_layer_config, MLP 4 | from torch_geometric.graphgym.register import register_head 5 | 6 | 7 | @register_head('inductive_node') 8 | class GNNInductiveNodeHead(nn.Module): 9 | """ 10 | GNN prediction head for inductive node prediction tasks. 11 | 12 | Args: 13 | dim_in (int): Input dimension 14 | dim_out (int): Output dimension. For binary prediction, dim_out=1. 15 | """ 16 | 17 | def __init__(self, dim_in, dim_out): 18 | super(GNNInductiveNodeHead, self).__init__() 19 | self.layer_post_mp = MLP( 20 | new_layer_config(dim_in, dim_out, cfg.gnn.layers_post_mp, 21 | has_act=False, has_bias=True, cfg=cfg)) 22 | 23 | def _apply_index(self, batch): 24 | return batch.x, batch.y 25 | 26 | def forward(self, batch): 27 | batch = self.layer_post_mp(batch) 28 | pred, label = self._apply_index(batch) 29 | return pred, label 30 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/infer_links.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym import cfg 3 | from torch_geometric.graphgym.register import register_head 4 | 5 | 6 | @register_head('infer_links') 7 | class InferLinksHead(torch.nn.Module): 8 | """ 9 | InferLinks prediction head for graph prediction tasks. 10 | 11 | Args: 12 | dim_in (int): Input dimension. 13 | dim_out (int): Output dimension. For binary prediction, dim_out=1. 14 | """ 15 | 16 | def __init__(self, dim_in, dim_out): 17 | super().__init__() 18 | if cfg.dataset.infer_link_label == "edge": 19 | dim_out = 2 20 | else: 21 | raise ValueError(f"Infer-link task {cfg.dataset.infer_link_label} not available.") 22 | 23 | self.predictor = torch.nn.Linear(1, dim_out) 24 | 25 | def forward(self, batch): 26 | x = batch.x[batch.complete_edge_index] 27 | x = (x[0] * x[1]).sum(1) 28 | y = self.predictor(x.unsqueeze(1)) 29 | return y, batch.y 30 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/mlp_graph.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym import cfg 4 | from torch_geometric.graphgym.register import register_head 5 | 6 | 7 | @register_head('mlp_graph') 8 | class MLPGraphHead(nn.Module): 9 | """ 10 | MLP prediction head for graph prediction tasks. 11 | 12 | Args: 13 | dim_in (int): Input dimension. 14 | dim_out (int): Output dimension. For binary prediction, dim_out=1. 15 | L (int): Number of hidden layers. 16 | """ 17 | 18 | def __init__(self, dim_in, dim_out): 19 | super().__init__() 20 | if cfg.model.graph_pooling != 'node_ensemble': 21 | self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling] 22 | self.node_ensemble = False 23 | else: 24 | self.pooling_fun = register.pooling_dict['mean'] 25 | self.node_ensemble = True 26 | 27 | dropout = cfg.gnn.dropout 28 | L = cfg.gnn.layers_post_mp 29 | 30 | layers = [] 31 | for _ in range(L-1): 32 | layers.append(nn.Dropout(dropout)) 33 | layers.append(nn.Linear(dim_in, dim_in, bias=True)) 34 | layers.append(register.act_dict[cfg.gnn.act]()) 35 | 36 | layers.append(nn.Dropout(dropout)) 37 | layers.append(nn.Linear(dim_in, dim_out, bias=True)) 38 | self.mlp = nn.Sequential(*layers) 39 | 40 | def _scale_and_shift(self, x): 41 | return x 42 | 43 | def _apply_index(self, batch): 44 | return batch.graph_feature, batch.y 45 | 46 | def forward(self, batch): 47 | if self.node_ensemble: 48 | x = batch.x 49 | else: 50 | x = self.pooling_fun(batch.x, batch.batch) 51 | y = self.mlp(x) 52 | y = self._scale_and_shift(y) 53 | 54 | if self.node_ensemble: 55 | y_graph = self.pooling_fun(y, batch.batch) 56 | batch.graph_feature = y_graph 57 | 58 | _, label = self._apply_index(batch) 59 | if self.training: 60 | return y, label[batch.batch] 61 | else: 62 | return y_graph, label 63 | 64 | else: 65 | batch.graph_feature = y 66 | pred, label = self._apply_index(batch) 67 | return pred, label 68 | 69 | 70 | @register_head('mlp_graph_pcqm4m') 71 | class MLPGraphHeadPCQM4M(MLPGraphHead): 72 | 73 | def _scale_and_shift(self, x): 74 | return (x * 1.1623) + 5.6896 75 | 76 | 77 | @register_head('mlp_graph_zinc') 78 | class MLPGraphHeadZINC(MLPGraphHead): 79 | 80 | def _scale_and_shift(self, x): 81 | return (x * 2.0109) + 0.0153 82 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/ogb_code_graph.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | import torch_geometric.graphgym.register as register 4 | from torch_geometric.graphgym import cfg 5 | from torch_geometric.graphgym.register import register_head 6 | 7 | 8 | @register_head('ogb_code_graph') 9 | class OGBCodeGraphHead(nn.Module): 10 | """ 11 | Sequence prediction head for ogbg-code2 graph-level prediction tasks. 12 | 13 | Args: 14 | dim_in (int): Input dimension. 15 | dim_out (int): IGNORED, kept for GraphGym framework compatibility 16 | L (int): Number of hidden layers. 17 | """ 18 | 19 | def __init__(self, dim_in, dim_out, L=1): 20 | super().__init__() 21 | self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling] 22 | self.L = L 23 | num_vocab = 5002 24 | self.max_seq_len = 5 25 | 26 | if self.L != 1: 27 | raise ValueError(f"Multilayer prediction heads are not supported.") 28 | 29 | self.graph_pred_linear_list = nn.ModuleList() 30 | for i in range(self.max_seq_len): 31 | self.graph_pred_linear_list.append(nn.Linear(dim_in, num_vocab)) 32 | 33 | def _apply_index(self, batch): 34 | return batch.pred_list, {'y_arr': batch.y_arr, 'y': batch.y} 35 | 36 | def forward(self, batch): 37 | graph_emb = self.pooling_fun(batch.x, batch.batch) 38 | 39 | pred_list = [] 40 | for i in range(self.max_seq_len): 41 | pred_list.append(self.graph_pred_linear_list[i](graph_emb)) 42 | batch.pred_list = pred_list 43 | 44 | pred, label = self._apply_index(batch) 45 | return pred, label 46 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/head/san_graph.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | import torch_geometric.graphgym.register as register 4 | from torch_geometric.graphgym import cfg 5 | from torch_geometric.graphgym.register import register_head 6 | 7 | 8 | @register_head('san_graph') 9 | class SANGraphHead(nn.Module): 10 | """ 11 | SAN prediction head for graph prediction tasks. 12 | 13 | Args: 14 | dim_in (int): Input dimension. 15 | dim_out (int): Output dimension. For binary prediction, dim_out=1. 16 | L (int): Number of hidden layers. 17 | """ 18 | 19 | def __init__(self, dim_in, dim_out, L=2): 20 | super().__init__() 21 | self.pooling_fun = register.pooling_dict[cfg.model.graph_pooling] 22 | list_FC_layers = [ 23 | nn.Linear(dim_in // 2 ** l, dim_in // 2 ** (l + 1), bias=True) 24 | for l in range(L)] 25 | list_FC_layers.append( 26 | nn.Linear(dim_in // 2 ** L, dim_out, bias=True)) 27 | self.FC_layers = nn.ModuleList(list_FC_layers) 28 | self.L = L 29 | self.activation = register.act_dict[cfg.gnn.act]() 30 | 31 | def _apply_index(self, batch): 32 | return batch.graph_feature, batch.y 33 | 34 | def forward(self, batch): 35 | graph_emb = self.pooling_fun(batch.x, batch.batch) 36 | for l in range(self.L): 37 | graph_emb = self.FC_layers[l](graph_emb) 38 | graph_emb = self.activation(graph_emb) 39 | graph_emb = self.FC_layers[self.L](graph_emb) 40 | batch.graph_feature = graph_emb 41 | pred, label = self._apply_index(batch) 42 | return pred, label 43 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/layer/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/layer/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn import Parameter 4 | 5 | from torch_geometric.graphgym.config import cfg 6 | from torch_geometric.graphgym.register import register_layer 7 | from torch_geometric.nn.conv import MessagePassing 8 | from torch_geometric.nn.inits import glorot, zeros 9 | 10 | # Note: A registered GNN layer should take 'batch' as input 11 | # and 'batch' as output 12 | 13 | 14 | # Example 1: Directly define a GraphGym format Conv 15 | # take 'batch' as input and 'batch' as output 16 | @register_layer('exampleconv1') 17 | class ExampleConv1(MessagePassing): 18 | r"""Example GNN layer 19 | """ 20 | def __init__(self, in_channels, out_channels, bias=True, **kwargs): 21 | super().__init__(aggr=cfg.gnn.agg, **kwargs) 22 | 23 | self.in_channels = in_channels 24 | self.out_channels = out_channels 25 | 26 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 27 | 28 | if bias: 29 | self.bias = Parameter(torch.Tensor(out_channels)) 30 | else: 31 | self.register_parameter('bias', None) 32 | 33 | self.reset_parameters() 34 | 35 | def reset_parameters(self): 36 | glorot(self.weight) 37 | zeros(self.bias) 38 | 39 | def forward(self, batch): 40 | """""" 41 | x, edge_index = batch.x, batch.edge_index 42 | x = torch.matmul(x, self.weight) 43 | 44 | batch.x = self.propagate(edge_index, x=x) 45 | 46 | return batch 47 | 48 | def message(self, x_j): 49 | return x_j 50 | 51 | def update(self, aggr_out): 52 | if self.bias is not None: 53 | aggr_out = aggr_out + self.bias 54 | return aggr_out 55 | 56 | 57 | # Example 2: First define a PyG format Conv layer 58 | # Then wrap it to become GraphGym format 59 | class ExampleConv2Layer(MessagePassing): 60 | r"""Example GNN layer 61 | """ 62 | def __init__(self, in_channels, out_channels, bias=True, **kwargs): 63 | super().__init__(aggr=cfg.gnn.agg, **kwargs) 64 | 65 | self.in_channels = in_channels 66 | self.out_channels = out_channels 67 | 68 | self.weight = Parameter(torch.Tensor(in_channels, out_channels)) 69 | 70 | if bias: 71 | self.bias = Parameter(torch.Tensor(out_channels)) 72 | else: 73 | self.register_parameter('bias', None) 74 | 75 | self.reset_parameters() 76 | 77 | def reset_parameters(self): 78 | glorot(self.weight) 79 | zeros(self.bias) 80 | 81 | def forward(self, x, edge_index): 82 | """""" 83 | x = torch.matmul(x, self.weight) 84 | 85 | return self.propagate(edge_index, x=x) 86 | 87 | def message(self, x_j): 88 | return x_j 89 | 90 | def update(self, aggr_out): 91 | if self.bias is not None: 92 | aggr_out = aggr_out + self.bias 93 | return aggr_out 94 | 95 | 96 | @register_layer('exampleconv2') 97 | class ExampleConv2(nn.Module): 98 | def __init__(self, dim_in, dim_out, bias=False, **kwargs): 99 | super().__init__() 100 | self.model = ExampleConv2Layer(dim_in, dim_out, bias=bias) 101 | 102 | def forward(self, batch): 103 | batch.x = self.model(batch.x, batch.edge_index) 104 | return batch 105 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/layer/gcn_conv_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch_geometric.nn as pyg_nn 3 | from torch_geometric.graphgym import cfg 4 | import torch_geometric.graphgym.register as register 5 | 6 | 7 | class GCNConvLayer(nn.Module): 8 | """Graph Isomorphism Network with Edge features (GINE) layer. 9 | """ 10 | def __init__(self, dim_in, dim_out, dropout, residual): 11 | super().__init__() 12 | self.dim_in = dim_in 13 | self.dim_out = dim_out 14 | self.dropout = dropout 15 | self.residual = residual 16 | 17 | self.act = nn.Sequential( 18 | register.act_dict[cfg.gnn.act](), 19 | nn.Dropout(self.dropout), 20 | ) 21 | self.model = pyg_nn.GCNConv(dim_in, dim_out, bias=True) 22 | 23 | def forward(self, batch): 24 | x_in = batch.x 25 | 26 | batch.x = self.model(batch.x, batch.edge_index) 27 | batch.x = self.act(batch.x) 28 | 29 | if self.residual: 30 | batch.x = x_in + batch.x # residual connection 31 | 32 | return batch 33 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/layer/graphormer_layer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.utils import to_dense_batch 3 | 4 | 5 | class GraphormerLayer(torch.nn.Module): 6 | def __init__(self, embed_dim: int, num_heads: int, dropout: float, 7 | attention_dropout: float, mlp_dropout: float): 8 | """Implementation of the Graphormer layer. 9 | This layer is based on the implementation at: 10 | https://github.com/microsoft/Graphormer/tree/v1.0 11 | Note that this refers to v1 of Graphormer. 12 | 13 | Args: 14 | embed_dim: The number of hidden dimensions of the model 15 | num_heads: The number of heads of the Graphormer model 16 | dropout: Dropout applied after the attention and after the MLP 17 | attention_dropout: Dropout applied within the attention 18 | input_dropout: Dropout applied within the MLP 19 | """ 20 | super().__init__() 21 | self.attention = torch.nn.MultiheadAttention(embed_dim, 22 | num_heads, 23 | attention_dropout, 24 | batch_first=True) 25 | self.input_norm = torch.nn.LayerNorm(embed_dim) 26 | self.dropout = torch.nn.Dropout(dropout) 27 | 28 | # We follow the paper in that all hidden dims are 29 | # equal to the embedding dim 30 | self.mlp = torch.nn.Sequential( 31 | torch.nn.LayerNorm(embed_dim), 32 | torch.nn.Linear(embed_dim, embed_dim), 33 | torch.nn.GELU(), 34 | torch.nn.Dropout(mlp_dropout), 35 | torch.nn.Linear(embed_dim, embed_dim), 36 | torch.nn.Dropout(dropout), 37 | ) 38 | 39 | def forward(self, data): 40 | x = self.input_norm(data.x) 41 | x, real_nodes = to_dense_batch(x, data.batch) 42 | 43 | if hasattr(data, "attn_bias"): 44 | x = self.attention(x, x, x, ~real_nodes, attn_mask=data.attn_bias)[0][real_nodes] 45 | else: 46 | x = self.attention(x, x, x, ~real_nodes)[0][real_nodes] 47 | x = self.dropout(x) + data.x 48 | data.x = self.mlp(x) + x 49 | return data 50 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__init__.py -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/aqsol_molecules.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/aqsol_molecules.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/coco_superpixels.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/coco_superpixels.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/malnet_tiny.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/malnet_tiny.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_functional.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_functional.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_structural.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/peptides_structural.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loader/dataset/__pycache__/voc_superpixels.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Graph_Classification/graphgps/loader/dataset/__pycache__/voc_superpixels.cpython-39.pyc -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loss/l1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.register import register_loss 4 | 5 | 6 | @register_loss('l1_losses') 7 | def l1_losses(pred, true): 8 | if cfg.model.loss_fun == 'l1': 9 | l1_loss = nn.L1Loss() 10 | loss = l1_loss(pred, true) 11 | return loss, pred 12 | elif cfg.model.loss_fun == 'smoothl1': 13 | l1_loss = nn.SmoothL1Loss() 14 | loss = l1_loss(pred, true) 15 | return loss, pred 16 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loss/multilabel_classification_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.register import register_loss 4 | 5 | 6 | @register_loss('multilabel_cross_entropy') 7 | def multilabel_cross_entropy(pred, true): 8 | """Multilabel cross-entropy loss. 9 | """ 10 | if cfg.dataset.task_type == 'classification_multilabel': 11 | if cfg.model.loss_fun != 'cross_entropy': 12 | raise ValueError("Only 'cross_entropy' loss_fun supported with " 13 | "'classification_multilabel' task_type.") 14 | bce_loss = nn.BCEWithLogitsLoss() 15 | is_labeled = true == true # Filter our nans. 16 | return bce_loss(pred[is_labeled], true[is_labeled].float()), pred 17 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loss/subtoken_prediction_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.graphgym.config import cfg 3 | from torch_geometric.graphgym.register import register_loss 4 | 5 | 6 | @register_loss('subtoken_cross_entropy') 7 | def subtoken_cross_entropy(pred_list, true): 8 | """Subtoken prediction cross-entropy loss for ogbg-code2. 9 | """ 10 | if cfg.dataset.task_type == 'subtoken_prediction': 11 | if cfg.model.loss_fun != 'cross_entropy': 12 | raise ValueError("Only 'cross_entropy' loss_fun supported with " 13 | "'subtoken_prediction' task_type.") 14 | multicls_criterion = torch.nn.CrossEntropyLoss() 15 | loss = 0 16 | for i in range(len(pred_list)): 17 | loss += multicls_criterion(pred_list[i].to(torch.float32), true['y_arr'][:, i]) 18 | loss = loss / len(pred_list) 19 | 20 | return loss, pred_list 21 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/loss/weighted_cross_entropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.register import register_loss 5 | 6 | 7 | @register_loss('weighted_cross_entropy') 8 | def weighted_cross_entropy(pred, true): 9 | """Weighted cross-entropy for unbalanced classes. 10 | """ 11 | if cfg.model.loss_fun == 'weighted_cross_entropy': 12 | # calculating label weights for weighted loss computation 13 | V = true.size(0) 14 | n_classes = pred.shape[1] if pred.ndim > 1 else 2 15 | label_count = torch.bincount(true) 16 | label_count = label_count[label_count.nonzero(as_tuple=True)].squeeze() 17 | cluster_sizes = torch.zeros(n_classes, device=pred.device).long() 18 | cluster_sizes[torch.unique(true)] = label_count 19 | weight = (V - cluster_sizes).float() / V 20 | weight *= (cluster_sizes > 0).float() 21 | # multiclass 22 | if pred.ndim > 1: 23 | pred = F.log_softmax(pred, dim=-1) 24 | return F.nll_loss(pred, true, weight=weight), pred 25 | # binary 26 | else: 27 | loss = F.binary_cross_entropy_with_logits(pred, true.float(), 28 | weight=weight[true]) 29 | return loss, torch.sigmoid(pred) 30 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/metrics_ogb.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import roc_auc_score, average_precision_score 3 | 4 | """ 5 | Evaluation functions from OGB. 6 | https://github.com/snap-stanford/ogb/blob/master/ogb/graphproppred/evaluate.py 7 | """ 8 | 9 | def eval_rocauc(y_true, y_pred): 10 | ''' 11 | compute ROC-AUC averaged across tasks 12 | ''' 13 | 14 | rocauc_list = [] 15 | 16 | for i in range(y_true.shape[1]): 17 | # AUC is only defined when there is at least one positive data. 18 | if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0: 19 | # ignore nan values 20 | is_labeled = y_true[:, i] == y_true[:, i] 21 | rocauc_list.append( 22 | roc_auc_score(y_true[is_labeled, i], y_pred[is_labeled, i])) 23 | 24 | if len(rocauc_list) == 0: 25 | raise RuntimeError( 26 | 'No positively labeled data available. Cannot compute ROC-AUC.') 27 | 28 | return {'rocauc': sum(rocauc_list) / len(rocauc_list)} 29 | 30 | 31 | def eval_ap(y_true, y_pred): 32 | ''' 33 | compute Average Precision (AP) averaged across tasks 34 | ''' 35 | 36 | ap_list = [] 37 | 38 | for i in range(y_true.shape[1]): 39 | # AUC is only defined when there is at least one positive data. 40 | if np.sum(y_true[:, i] == 1) > 0 and np.sum(y_true[:, i] == 0) > 0: 41 | # ignore nan values 42 | is_labeled = y_true[:, i] == y_true[:, i] 43 | ap = average_precision_score(y_true[is_labeled, i], 44 | y_pred[is_labeled, i]) 45 | 46 | ap_list.append(ap) 47 | 48 | if len(ap_list) == 0: 49 | raise RuntimeError( 50 | 'No positively labeled data available. Cannot compute Average Precision.') 51 | 52 | return {'ap': sum(ap_list) / len(ap_list)} 53 | 54 | 55 | def eval_rmse(y_true, y_pred): 56 | ''' 57 | compute RMSE score averaged across tasks 58 | ''' 59 | rmse_list = [] 60 | 61 | for i in range(y_true.shape[1]): 62 | # ignore nan values 63 | is_labeled = y_true[:, i] == y_true[:, i] 64 | rmse_list.append(np.sqrt( 65 | ((y_true[is_labeled, i] - y_pred[is_labeled, i]) ** 2).mean())) 66 | 67 | return {'rmse': sum(rmse_list) / len(rmse_list)} 68 | 69 | 70 | def eval_acc(y_true, y_pred): 71 | acc_list = [] 72 | 73 | for i in range(y_true.shape[1]): 74 | is_labeled = y_true[:, i] == y_true[:, i] 75 | correct = y_true[is_labeled, i] == y_pred[is_labeled, i] 76 | acc_list.append(float(np.sum(correct)) / len(correct)) 77 | 78 | return {'acc': sum(acc_list) / len(acc_list)} 79 | 80 | 81 | def eval_F1(seq_ref, seq_pred): 82 | # ''' 83 | # compute F1 score averaged over samples 84 | # ''' 85 | 86 | precision_list = [] 87 | recall_list = [] 88 | f1_list = [] 89 | 90 | for l, p in zip(seq_ref, seq_pred): 91 | label = set(l) 92 | prediction = set(p) 93 | true_positive = len(label.intersection(prediction)) 94 | false_positive = len(prediction - label) 95 | false_negative = len(label - prediction) 96 | 97 | if true_positive + false_positive > 0: 98 | precision = true_positive / (true_positive + false_positive) 99 | else: 100 | precision = 0 101 | 102 | if true_positive + false_negative > 0: 103 | recall = true_positive / (true_positive + false_negative) 104 | else: 105 | recall = 0 106 | if precision + recall > 0: 107 | f1 = 2 * precision * recall / (precision + recall) 108 | else: 109 | f1 = 0 110 | 111 | precision_list.append(precision) 112 | recall_list.append(recall) 113 | f1_list.append(f1) 114 | 115 | return {'precision': np.average(precision_list), 116 | 'recall': np.average(recall_list), 117 | 'F1': np.average(f1_list)} 118 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/big_bird.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP 5 | from torch_geometric.graphgym.register import register_network 6 | 7 | from graphgps.layer.bigbird_layer import BigBirdModel as BackboneBigBird 8 | 9 | 10 | @register_network('BigBird') 11 | class BigBird(torch.nn.Module): 12 | """BigBird without edge features. 13 | This model disregards edge features and runs a linear transformer over a set of node features only. 14 | BirBird applies random sparse attention to the input sequence - the longer the sequence the closer it is to O(N) 15 | https://arxiv.org/abs/2007.14062 16 | """ 17 | 18 | def __init__(self, dim_in, dim_out): 19 | super().__init__() 20 | self.encoder = FeatureEncoder(dim_in) 21 | dim_in = self.encoder.dim_in 22 | 23 | if cfg.gnn.layers_pre_mp > 0: 24 | self.pre_mp = GNNPreMP( 25 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 26 | dim_in = cfg.gnn.dim_inner 27 | 28 | assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \ 29 | "The inner and hidden dims must match." 30 | 31 | # Copy main Transformer hyperparams to the BigBird config. 32 | cfg.gt.bigbird.layers = cfg.gt.layers 33 | cfg.gt.bigbird.n_heads = cfg.gt.n_heads 34 | cfg.gt.bigbird.dim_hidden = cfg.gt.dim_hidden 35 | cfg.gt.bigbird.dropout = cfg.gt.dropout 36 | self.trf = BackboneBigBird( 37 | config=cfg.gt.bigbird, 38 | ) 39 | 40 | GNNHead = register.head_dict[cfg.gnn.head] 41 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 42 | 43 | def forward(self, batch): 44 | for module in self.children(): 45 | batch = module(batch) 46 | return batch 47 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/custom_gnn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.models.head # noqa, register module 3 | import torch_geometric.graphgym.register as register 4 | from torch_geometric.graphgym.config import cfg 5 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP 6 | from torch_geometric.graphgym.register import register_network 7 | 8 | from graphgps.layer.gatedgcn_layer import GatedGCNLayer 9 | from graphgps.layer.gine_conv_layer import GINEConvLayer 10 | from graphgps.layer.gcn_conv_layer import GCNConvLayer 11 | from torch_geometric.nn import global_add_pool 12 | 13 | @register_network('custom_gnn') 14 | class CustomGNN(torch.nn.Module): 15 | """ 16 | GNN model that customizes the torch_geometric.graphgym.models.gnn.GNN 17 | to support specific handling of new conv layers. 18 | """ 19 | 20 | def __init__(self, dim_in, dim_out): 21 | super().__init__() 22 | self.encoder = FeatureEncoder(dim_in) 23 | dim_in = self.encoder.dim_in 24 | self.vqs = torch.nn.ModuleList() 25 | if cfg.gnn.layers_pre_mp > 0: 26 | self.pre_mp = GNNPreMP( 27 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 28 | dim_in = cfg.gnn.dim_inner 29 | 30 | assert cfg.gnn.dim_inner == dim_in, \ 31 | "The inner and hidden dims must match." 32 | self.kmeans = 1 33 | conv_model = self.build_conv_model(cfg.gnn.layer_type) 34 | layers = [] 35 | for _ in range(cfg.gnn.layers_mp): 36 | layers.append(conv_model(dim_in, 37 | dim_in, 38 | dropout=cfg.gnn.dropout, 39 | residual=cfg.gnn.residual)) 40 | if self.kmeans: 41 | from graphgps.network.vq import VectorQuantize, ResidualVectorQuant 42 | print("kmeans") 43 | self.vqs.append(ResidualVectorQuant(dim=cfg.gnn.dim_inner, codebook_size=16, decay=0.8, commitment_weight=0.25, use_cosine_sim=True, kmeans_init=False)) 44 | else: 45 | from vqtorch.nn import VectorQuant, ResidualVectorQuant 46 | print("vq") 47 | self.vqs.append(ResidualVectorQuant( 48 | groups = 3, 49 | feature_size=cfg.gnn.dim_inner, # feature dimension corresponding to the vectors 50 | num_codes=16, # number of codebook vectors 51 | beta=0.98, # (default: 0.9) commitment trade-off 52 | kmeans_init=False, # (default: False) whether to use kmeans++ init 53 | norm=None, # (default: None) normalization for the input vectors 54 | cb_norm=None, # (default: None) normalization for codebook vectors 55 | affine_lr=10.0, # (default: 0.0) lr scale for affine parameters 56 | sync_nu=0.2, # (default: 0.0) codebook synchronization contribution 57 | replace_freq=20, # (default: None) frequency to replace dead codes 58 | dim=-1, # (default: -1) dimension to be quantized 59 | )) 60 | self.gnn_layers = torch.nn.Sequential(*layers) 61 | 62 | GNNHead = register.head_dict[cfg.gnn.head] 63 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 64 | 65 | def build_conv_model(self, model_type): 66 | if model_type == 'gatedgcnconv': 67 | return GatedGCNLayer 68 | elif model_type == 'gineconv': 69 | return GINEConvLayer 70 | elif model_type == 'gcnconv': 71 | return GCNConvLayer 72 | else: 73 | raise ValueError("Model {} unavailable".format(model_type)) 74 | 75 | def forward(self, batch): 76 | 77 | batch = self.encoder(batch) 78 | id_list = [] 79 | quantized_list = [] 80 | total_commit_loss = 0 81 | for (conv, vq) in zip(self.gnn_layers, self.vqs): 82 | batch = conv(batch) 83 | if self.kmeans: 84 | quantized, _, commit_loss, dist, codebook = vq(batch.x) 85 | id_list.append(torch.stack(_, dim=1)) 86 | quantized_list.append(quantized) 87 | total_commit_loss += commit_loss 88 | else: 89 | x_, vq_ = vq(batch.x) 90 | total_commit_loss += vq_['loss'].mean() 91 | id_list.append(vq_['q']) 92 | id_list_concat = torch.cat(id_list, dim=1) 93 | graph_id = global_add_pool(id_list_concat, batch.batch) 94 | batch = self.post_mp(batch) 95 | # print(graph_id.shape) 96 | return batch, total_commit_loss, graph_id 97 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import torch_geometric.graphgym.models.head # noqa, register module 6 | import torch_geometric.graphgym.register as register 7 | import torch_geometric.nn as pyg_nn 8 | from torch_geometric.graphgym.config import cfg 9 | from torch_geometric.graphgym.register import register_network 10 | 11 | 12 | @register_network('example') 13 | class ExampleGNN(torch.nn.Module): 14 | def __init__(self, dim_in, dim_out, num_layers=2, model_type='GCN'): 15 | super().__init__() 16 | conv_model = self.build_conv_model(model_type) 17 | self.convs = nn.ModuleList() 18 | self.convs.append(conv_model(dim_in, dim_in)) 19 | 20 | for _ in range(num_layers - 1): 21 | self.convs.append(conv_model(dim_in, dim_in)) 22 | 23 | GNNHead = register.head_dict[cfg.dataset.task] 24 | self.post_mp = GNNHead(dim_in=dim_in, dim_out=dim_out) 25 | 26 | def build_conv_model(self, model_type): 27 | if model_type == 'GCN': 28 | return pyg_nn.GCNConv 29 | elif model_type == 'GAT': 30 | return pyg_nn.GATConv 31 | elif model_type == "GraphSage": 32 | return pyg_nn.SAGEConv 33 | else: 34 | raise ValueError(f'Model {model_type} unavailable') 35 | 36 | def forward(self, batch): 37 | x, edge_index = batch.x, batch.edge_index 38 | 39 | for i in range(len(self.convs)): 40 | x = self.convs[i](x, edge_index) 41 | x = F.relu(x) 42 | x = F.dropout(x, p=0.1, training=self.training) 43 | 44 | batch.x = x 45 | batch = self.post_mp(batch) 46 | 47 | return batch 48 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/gps_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.models.gnn import GNNPreMP 5 | from torch_geometric.graphgym.models.layer import (new_layer_config, 6 | BatchNorm1dNode) 7 | from torch_geometric.graphgym.register import register_network 8 | 9 | from graphgps.layer.gps_layer import GPSLayer 10 | 11 | 12 | class FeatureEncoder(torch.nn.Module): 13 | """ 14 | Encoding node and edge features 15 | 16 | Args: 17 | dim_in (int): Input feature dimension 18 | """ 19 | def __init__(self, dim_in): 20 | super(FeatureEncoder, self).__init__() 21 | self.dim_in = dim_in 22 | if cfg.dataset.node_encoder: 23 | # Encode integer node features via nn.Embeddings 24 | NodeEncoder = register.node_encoder_dict[ 25 | cfg.dataset.node_encoder_name] 26 | self.node_encoder = NodeEncoder(cfg.gnn.dim_inner) 27 | if cfg.dataset.node_encoder_bn: 28 | self.node_encoder_bn = BatchNorm1dNode( 29 | new_layer_config(cfg.gnn.dim_inner, -1, -1, has_act=False, 30 | has_bias=False, cfg=cfg)) 31 | # Update dim_in to reflect the new dimension of the node features 32 | self.dim_in = cfg.gnn.dim_inner 33 | if cfg.dataset.edge_encoder: 34 | # Hard-limit max edge dim for PNA. 35 | if 'PNA' in cfg.gt.layer_type: 36 | cfg.gnn.dim_edge = min(128, cfg.gnn.dim_inner) 37 | else: 38 | cfg.gnn.dim_edge = cfg.gnn.dim_inner 39 | # Encode integer edge features via nn.Embeddings 40 | EdgeEncoder = register.edge_encoder_dict[ 41 | cfg.dataset.edge_encoder_name] 42 | self.edge_encoder = EdgeEncoder(cfg.gnn.dim_edge) 43 | if cfg.dataset.edge_encoder_bn: 44 | self.edge_encoder_bn = BatchNorm1dNode( 45 | new_layer_config(cfg.gnn.dim_edge, -1, -1, has_act=False, 46 | has_bias=False, cfg=cfg)) 47 | 48 | def forward(self, batch): 49 | for module in self.children(): 50 | batch = module(batch) 51 | return batch 52 | 53 | 54 | @register_network('GPSModel') 55 | class GPSModel(torch.nn.Module): 56 | """General-Powerful-Scalable graph transformer. 57 | https://arxiv.org/abs/2205.12454 58 | Rampasek, L., Galkin, M., Dwivedi, V. P., Luu, A. T., Wolf, G., & Beaini, D. 59 | Recipe for a general, powerful, scalable graph transformer. (NeurIPS 2022) 60 | """ 61 | 62 | def __init__(self, dim_in, dim_out): 63 | super().__init__() 64 | self.encoder = FeatureEncoder(dim_in) 65 | dim_in = self.encoder.dim_in 66 | 67 | if cfg.gnn.layers_pre_mp > 0: 68 | self.pre_mp = GNNPreMP( 69 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 70 | dim_in = cfg.gnn.dim_inner 71 | 72 | if not cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in: 73 | raise ValueError( 74 | f"The inner and hidden dims must match: " 75 | f"embed_dim={cfg.gt.dim_hidden} dim_inner={cfg.gnn.dim_inner} " 76 | f"dim_in={dim_in}" 77 | ) 78 | 79 | try: 80 | local_gnn_type, global_model_type = cfg.gt.layer_type.split('+') 81 | except: 82 | raise ValueError(f"Unexpected layer type: {cfg.gt.layer_type}") 83 | layers = [] 84 | for _ in range(cfg.gt.layers): 85 | layers.append(GPSLayer( 86 | dim_h=cfg.gt.dim_hidden, 87 | local_gnn_type=local_gnn_type, 88 | global_model_type=global_model_type, 89 | num_heads=cfg.gt.n_heads, 90 | act=cfg.gnn.act, 91 | pna_degrees=cfg.gt.pna_degrees, 92 | equivstable_pe=cfg.posenc_EquivStableLapPE.enable, 93 | dropout=cfg.gt.dropout, 94 | attn_dropout=cfg.gt.attn_dropout, 95 | layer_norm=cfg.gt.layer_norm, 96 | batch_norm=cfg.gt.batch_norm, 97 | bigbird_cfg=cfg.gt.bigbird, 98 | log_attn_weights=cfg.train.mode == 'log-attn-weights', 99 | )) 100 | self.layers = torch.nn.Sequential(*layers) 101 | 102 | GNNHead = register.head_dict[cfg.gnn.head] 103 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 104 | 105 | def forward(self, batch): 106 | for module in self.children(): 107 | batch = module(batch) 108 | return batch 109 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/graphormer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP 5 | from torch_geometric.graphgym.register import register_network 6 | 7 | from graphgps.layer.graphormer_layer import GraphormerLayer 8 | 9 | 10 | @register_network('Graphormer') 11 | class GraphormerModel(torch.nn.Module): 12 | """Graphormer port to GraphGPS. 13 | https://arxiv.org/abs/2106.05234 14 | Ying, C., Cai, T., Luo, S., Zheng, S., Ke, G., He, D., ... & Liu, T. Y. 15 | Do transformers really perform badly for graph representation? (NeurIPS2021) 16 | """ 17 | 18 | def __init__(self, dim_in, dim_out): 19 | super().__init__() 20 | self.encoder = FeatureEncoder(dim_in) 21 | dim_in = self.encoder.dim_in 22 | 23 | if cfg.gnn.layers_pre_mp > 0: 24 | self.pre_mp = GNNPreMP( 25 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 26 | dim_in = cfg.gnn.dim_inner 27 | 28 | if not cfg.graphormer.embed_dim == cfg.gnn.dim_inner == dim_in: 29 | raise ValueError( 30 | f"The inner and embed dims must match: " 31 | f"embed_dim={cfg.graphormer.embed_dim} " 32 | f"dim_inner={cfg.gnn.dim_inner} dim_in={dim_in}" 33 | ) 34 | 35 | layers = [] 36 | for _ in range(cfg.graphormer.num_layers): 37 | layers.append(GraphormerLayer( 38 | embed_dim=cfg.graphormer.embed_dim, 39 | num_heads=cfg.graphormer.num_heads, 40 | dropout=cfg.graphormer.dropout, 41 | attention_dropout=cfg.graphormer.attention_dropout, 42 | mlp_dropout=cfg.graphormer.mlp_dropout 43 | )) 44 | self.layers = torch.nn.Sequential(*layers) 45 | 46 | GNNHead = register.head_dict[cfg.gnn.head] 47 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 48 | 49 | def forward(self, batch): 50 | for module in self.children(): 51 | batch = module(batch) 52 | return batch 53 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/performer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP 5 | from torch_geometric.graphgym.register import register_network 6 | 7 | from graphgps.layer.performer_layer import Performer as BackbonePerformer 8 | 9 | 10 | @register_network('Performer') 11 | class Performer(torch.nn.Module): 12 | """Performer without edge features. 13 | This model disregards edge features and runs a linear transformer over a set of node features only. 14 | https://arxiv.org/abs/2009.14794 15 | """ 16 | 17 | def __init__(self, dim_in, dim_out): 18 | super().__init__() 19 | self.encoder = FeatureEncoder(dim_in) 20 | dim_in = self.encoder.dim_in 21 | 22 | if cfg.gnn.layers_pre_mp > 0: 23 | self.pre_mp = GNNPreMP( 24 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 25 | dim_in = cfg.gnn.dim_inner 26 | 27 | assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \ 28 | "The inner and hidden dims must match." 29 | 30 | self.trf = BackbonePerformer( 31 | dim=cfg.gt.dim_hidden, 32 | depth=cfg.gt.layers, 33 | heads=cfg.gt.n_heads, 34 | dim_head=cfg.gt.dim_hidden // cfg.gt.n_heads 35 | ) 36 | 37 | GNNHead = register.head_dict[cfg.gnn.head] 38 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 39 | 40 | def forward(self, batch): 41 | for module in self.children(): 42 | batch = module(batch) 43 | return batch 44 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/network/san_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch_geometric.graphgym.register as register 3 | from torch_geometric.graphgym.config import cfg 4 | from torch_geometric.graphgym.models.gnn import FeatureEncoder, GNNPreMP 5 | from torch_geometric.graphgym.register import register_network 6 | 7 | from graphgps.layer.san_layer import SANLayer 8 | from graphgps.layer.san2_layer import SAN2Layer 9 | 10 | 11 | @register_network('SANTransformer') 12 | class SANTransformer(torch.nn.Module): 13 | """Spectral Attention Network (SAN) Graph Transformer. 14 | https://arxiv.org/abs/2106.03893 15 | """ 16 | 17 | def __init__(self, dim_in, dim_out): 18 | super().__init__() 19 | self.encoder = FeatureEncoder(dim_in) 20 | dim_in = self.encoder.dim_in 21 | 22 | if cfg.gnn.layers_pre_mp > 0: 23 | self.pre_mp = GNNPreMP( 24 | dim_in, cfg.gnn.dim_inner, cfg.gnn.layers_pre_mp) 25 | dim_in = cfg.gnn.dim_inner 26 | 27 | assert cfg.gt.dim_hidden == cfg.gnn.dim_inner == dim_in, \ 28 | "The inner and hidden dims must match." 29 | 30 | fake_edge_emb = torch.nn.Embedding(1, cfg.gt.dim_hidden) 31 | # torch.nn.init.xavier_uniform_(fake_edge_emb.weight.data) 32 | Layer = { 33 | 'SANLayer': SANLayer, 34 | 'SAN2Layer': SAN2Layer, 35 | }.get(cfg.gt.layer_type) 36 | layers = [] 37 | for _ in range(cfg.gt.layers): 38 | layers.append(Layer(gamma=cfg.gt.gamma, 39 | in_dim=cfg.gt.dim_hidden, 40 | out_dim=cfg.gt.dim_hidden, 41 | num_heads=cfg.gt.n_heads, 42 | full_graph=cfg.gt.full_graph, 43 | fake_edge_emb=fake_edge_emb, 44 | dropout=cfg.gt.dropout, 45 | layer_norm=cfg.gt.layer_norm, 46 | batch_norm=cfg.gt.batch_norm, 47 | residual=cfg.gt.residual)) 48 | self.trf_layers = torch.nn.Sequential(*layers) 49 | 50 | GNNHead = register.head_dict[cfg.gnn.head] 51 | self.post_mp = GNNHead(dim_in=cfg.gnn.dim_inner, dim_out=dim_out) 52 | 53 | def forward(self, batch): 54 | for module in self.children(): 55 | batch = module(batch) 56 | return batch 57 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/pooling/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/pooling/example.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_pooling 2 | from torch_geometric.utils import scatter 3 | 4 | 5 | @register_pooling('example') 6 | def global_example_pool(x, batch, size=None): 7 | size = batch.max().item() + 1 if size is None else size 8 | return scatter(x, batch, dim=0, dim_size=size, reduce='sum') 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/pooling/graph_token.py: -------------------------------------------------------------------------------- 1 | from torch_geometric.graphgym.register import register_pooling 2 | from torch_geometric.utils import to_dense_batch 3 | 4 | 5 | @register_pooling('graph_token') 6 | def graph_token_pooling(x, batch, *args): 7 | """Extracts the graph token from a batch to perform graph-level prediction. 8 | Typically used together with Graphormer when GraphormerEncoder is used and 9 | the global graph token is used: `cfg.graphormer.use_graph_token == True`. 10 | """ 11 | x, _ = to_dense_batch(x, batch) 12 | return x[:, 0, :] 13 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/stage/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/stage/example.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from torch_geometric.graphgym.config import cfg 5 | from torch_geometric.graphgym.models.layer import GeneralLayer 6 | from torch_geometric.graphgym.register import register_stage 7 | 8 | 9 | def GNNLayer(dim_in, dim_out, has_act=True): 10 | return GeneralLayer(cfg.gnn.layer_type, dim_in, dim_out, has_act) 11 | 12 | 13 | @register_stage('example') 14 | class GNNStackStage(nn.Module): 15 | '''Simple Stage that stack GNN layers''' 16 | def __init__(self, dim_in, dim_out, num_layers): 17 | super().__init__() 18 | for i in range(num_layers): 19 | d_in = dim_in if i == 0 else dim_out 20 | layer = GNNLayer(d_in, dim_out) 21 | self.add_module(f'layer{i}', layer) 22 | self.dim_out = dim_out 23 | 24 | def forward(self, batch): 25 | for layer in self.children(): 26 | batch = layer(batch) 27 | if cfg.gnn.l2norm: 28 | batch.x = F.normalize(batch.x, p=2, dim=-1) 29 | return batch 30 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/train/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/train/example.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import time 3 | 4 | import torch 5 | 6 | from torch_geometric.graphgym.checkpoint import ( 7 | clean_ckpt, 8 | load_ckpt, 9 | save_ckpt, 10 | ) 11 | from torch_geometric.graphgym.config import cfg 12 | from torch_geometric.graphgym.loss import compute_loss 13 | from torch_geometric.graphgym.register import register_train 14 | from torch_geometric.graphgym.utils.epoch import is_ckpt_epoch, is_eval_epoch 15 | 16 | 17 | def train_epoch(logger, loader, model, optimizer, scheduler): 18 | model.train() 19 | time_start = time.time() 20 | for batch in loader: 21 | optimizer.zero_grad() 22 | batch.to(torch.device(cfg.device)) 23 | pred, true = model(batch) 24 | loss, pred_score = compute_loss(pred, true) 25 | loss.backward() 26 | optimizer.step() 27 | logger.update_stats(true=true.detach().cpu(), 28 | pred=pred_score.detach().cpu(), loss=loss.item(), 29 | lr=scheduler.get_last_lr()[0], 30 | time_used=time.time() - time_start, 31 | params=cfg.params) 32 | time_start = time.time() 33 | scheduler.step() 34 | 35 | 36 | def eval_epoch(logger, loader, model): 37 | model.eval() 38 | time_start = time.time() 39 | for batch in loader: 40 | batch.to(torch.device(cfg.device)) 41 | pred, true = model(batch) 42 | loss, pred_score = compute_loss(pred, true) 43 | logger.update_stats(true=true.detach().cpu(), 44 | pred=pred_score.detach().cpu(), loss=loss.item(), 45 | lr=0, time_used=time.time() - time_start, 46 | params=cfg.params) 47 | time_start = time.time() 48 | 49 | 50 | @register_train('example') 51 | def train_example(loggers, loaders, model, optimizer, scheduler): 52 | start_epoch = 0 53 | if cfg.train.auto_resume: 54 | start_epoch = load_ckpt(model, optimizer, scheduler, 55 | cfg.train.epoch_resume) 56 | if start_epoch == cfg.optim.max_epoch: 57 | logging.info('Checkpoint found, Task already done') 58 | else: 59 | logging.info('Start from epoch %s', start_epoch) 60 | 61 | num_splits = len(loggers) 62 | for cur_epoch in range(start_epoch, cfg.optim.max_epoch): 63 | train_epoch(loggers[0], loaders[0], model, optimizer, scheduler) 64 | loggers[0].write_epoch(cur_epoch) 65 | if is_eval_epoch(cur_epoch): 66 | for i in range(1, num_splits): 67 | eval_epoch(loggers[i], loaders[i], model) 68 | loggers[i].write_epoch(cur_epoch) 69 | if is_ckpt_epoch(cur_epoch): 70 | save_ckpt(model, optimizer, scheduler, cur_epoch) 71 | for logger in loggers: 72 | logger.close() 73 | if cfg.train.ckpt_clean: 74 | clean_ckpt() 75 | 76 | logging.info('Task done, results saved in %s', cfg.run_dir) 77 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/transform/__init__.py: -------------------------------------------------------------------------------- 1 | from os.path import dirname, basename, isfile, join 2 | import glob 3 | 4 | modules = glob.glob(join(dirname(__file__), "*.py")) 5 | __all__ = [ 6 | basename(f)[:-3] for f in modules 7 | if isfile(f) and not f.endswith('__init__.py') 8 | ] 9 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/transform/task_preprocessing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def shuffle(tensor): 5 | idx = torch.randperm(len(tensor)) 6 | return tensor[idx] 7 | 8 | 9 | def task_specific_preprocessing(data, cfg): 10 | """Task-specific preprocessing before the dataset is logged and finalized. 11 | 12 | Args: 13 | data: PyG graph 14 | cfg: Main configuration node 15 | 16 | Returns: 17 | Extended PyG Data object. 18 | """ 19 | if cfg.gnn.head == "infer_links": 20 | N = data.x.size(0) 21 | idx = torch.arange(N, dtype=torch.long) 22 | complete_index = torch.stack([idx.repeat_interleave(N), idx.repeat(N)], 0) 23 | 24 | data.edge_attr = None 25 | 26 | if cfg.dataset.infer_link_label == "edge": 27 | labels = torch.empty(N, N, dtype=torch.long) 28 | non_edge_index = (complete_index.T.unsqueeze(1) != data.edge_index.T).any(2).all(1).nonzero()[:, 0] 29 | non_edge_index = shuffle(non_edge_index)[:data.edge_index.size(1)] 30 | edge_index = (complete_index.T.unsqueeze(1) == data.edge_index.T).all(2).any(1).nonzero()[:, 0] 31 | 32 | final_index = shuffle(torch.cat([edge_index, non_edge_index])) 33 | data.complete_edge_index = complete_index[:, final_index] 34 | 35 | labels.fill_(0) 36 | labels[data.edge_index[0], data.edge_index[1]] = 1 37 | 38 | assert labels.flatten()[final_index].mean(dtype=torch.float) == 0.5 39 | else: 40 | raise ValueError(f"Infer-link task {cfg.dataset.infer_link_label} not available.") 41 | 42 | data.y = labels.flatten()[final_index] 43 | 44 | supported_encoding_available = ( 45 | cfg.posenc_LapPE.enable or 46 | cfg.posenc_RWSE.enable or 47 | cfg.posenc_GraphormerBias.enable 48 | ) 49 | 50 | if cfg.dataset.name == "TRIANGLES": 51 | 52 | # If encodings are present they can append to the empty data.x 53 | if not supported_encoding_available: 54 | data.x = torch.zeros((data.x.size(0), 1)) 55 | data.y = data.y.sub(1).to(torch.long) 56 | 57 | if cfg.dataset.name == "CSL": 58 | 59 | # If encodings are present they can append to the empty data.x 60 | if not supported_encoding_available: 61 | data.x = torch.zeros((data.num_nodes, 1)) 62 | else: 63 | data.x = torch.zeros((data.num_nodes, 0)) 64 | 65 | return data 66 | -------------------------------------------------------------------------------- /SL/Graph_Classification/graphgps/transform/transforms.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch 4 | from torch_geometric.utils import subgraph 5 | from tqdm import tqdm 6 | 7 | 8 | def pre_transform_in_memory(dataset, transform_func, show_progress=False): 9 | """Pre-transform already loaded PyG dataset object. 10 | 11 | Apply transform function to a loaded PyG dataset object so that 12 | the transformed result is persistent for the lifespan of the object. 13 | This means the result is not saved to disk, as what PyG's `pre_transform` 14 | would do, but also the transform is applied only once and not at each 15 | data access as what PyG's `transform` hook does. 16 | 17 | Implementation is based on torch_geometric.data.in_memory_dataset.copy 18 | 19 | Args: 20 | dataset: PyG dataset object to modify 21 | transform_func: transformation function to apply to each data example 22 | show_progress: show tqdm progress bar 23 | """ 24 | if transform_func is None: 25 | return dataset 26 | 27 | data_list = [transform_func(dataset.get(i)) 28 | for i in tqdm(range(len(dataset)), 29 | disable=not show_progress, 30 | mininterval=10, 31 | miniters=len(dataset)//20)] 32 | data_list = list(filter(None, data_list)) 33 | 34 | dataset._indices = None 35 | dataset._data_list = data_list 36 | dataset.data, dataset.slices = dataset.collate(data_list) 37 | 38 | 39 | def typecast_x(data, type_str): 40 | if type_str == 'float': 41 | data.x = data.x.float() 42 | elif type_str == 'long': 43 | data.x = data.x.long() 44 | else: 45 | raise ValueError(f"Unexpected type '{type_str}'.") 46 | return data 47 | 48 | 49 | def concat_x_and_pos(data): 50 | data.x = torch.cat((data.x, data.pos), 1) 51 | return data 52 | 53 | 54 | def clip_graphs_to_size(data, size_limit=5000): 55 | if hasattr(data, 'num_nodes'): 56 | N = data.num_nodes # Explicitly given number of nodes, e.g. ogbg-ppa 57 | else: 58 | N = data.x.shape[0] # Number of nodes, including disconnected nodes. 59 | if N <= size_limit: 60 | return data 61 | else: 62 | logging.info(f' ...clip to {size_limit} a graph of size: {N}') 63 | if hasattr(data, 'edge_attr'): 64 | edge_attr = data.edge_attr 65 | else: 66 | edge_attr = None 67 | edge_index, edge_attr = subgraph(list(range(size_limit)), 68 | data.edge_index, edge_attr) 69 | if hasattr(data, 'x'): 70 | data.x = data.x[:size_limit] 71 | data.num_nodes = size_limit 72 | else: 73 | data.num_nodes = size_limit 74 | if hasattr(data, 'node_is_attributed'): # for ogbg-code2 dataset 75 | data.node_is_attributed = data.node_is_attributed[:size_limit] 76 | data.node_dfs_order = data.node_dfs_order[:size_limit] 77 | data.node_depth = data.node_depth[:size_limit] 78 | data.edge_index = edge_index 79 | if hasattr(data, 'edge_attr'): 80 | data.edge_attr = edge_attr 81 | return data 82 | -------------------------------------------------------------------------------- /SL/Link_Prediction/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Prepare Datasets 3 | 4 | ``` 5 | python ogbdataset.py 6 | ``` 7 | 8 | ## Reproduce Results 9 | 10 | Cora 11 | ``` 12 | python ID_pretrain.py --xdp 0.5 --tdp 0.1 --gnndp 0.1 --gnnedp 0.1 \ 13 | --predp 0.1 --preedp 0.1 --gnnlr 0.004 \ 14 | --prelr 0.002 --batch_size 1152 --ln --lnnn --predictor cn1 --dataset Cora --epochs 150 \ 15 | --runs 1 --model gcn --hiddim 256 --mplayers 10 --testbs 8192 \ 16 | --maskinput --jk --codebook 32 --kmeans 1 --tailact --device 3 17 | 18 | python ID_MLP.py --preedp 0.4 --predp 0.4 \ 19 | --prelr 0.01 --batch_size 1152 \ 20 | --lnnn --predictor cn1 --dataset Cora --epochs 1000 --runs 2 \ 21 | --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact --device 3 22 | ``` 23 | 24 | Citeseer 25 | ``` 26 | python ID_pretrain.py --xdp 0.4 --tdp 0.3 --gnndp 0.3 --gnnedp 0.3 \ 27 | --predp 0.3 --preedp 0.3 --gnnlr 0.01 \ 28 | --prelr 0.01 --batch_size 384 --ln --lnnn --predictor cn1 --dataset Citeseer --epochs 10 \ 29 | --runs 1 --model puregcn --hiddim 256 --mplayers 10 --testbs 4096 \ 30 | --maskinput --codebook 8 --kmeans 1 --tailact --device 0 31 | 32 | python ID_MLP.py --preedp 0.1 --predp 0.1 \ 33 | --prelr 0.01 --batch_size 384 \ 34 | --lnnn --predictor cn1 --dataset Citeseer --epochs 1000 --runs 2 \ 35 | --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact --device 0 36 | ``` 37 | 38 | Pubmed 39 | ``` 40 | python ID_pretrain.py --xdp 0.5 --tdp 0.0 --gnndp 0.1 --gnnedp 0.0 \ 41 | --predp 0.0 --preedp 0.0 --gnnlr 0.01 \ 42 | --prelr 0.002 --batch_size 2048 --ln --lnnn --predictor cn1 --dataset Pubmed --epochs 100 \ 43 | --runs 1 --model gcn --hiddim 256 --mplayers 10 --testbs 8192 \ 44 | --maskinput --jk --use_xlin --codebook 8 --kmeans 1 --tailact --device 0 45 | 46 | python ID_MLP.py --preedp 0.3 --predp 0.3 \ 47 | --prelr 0.001 --batch_size 4000 \ 48 | --lnnn --predictor cn1 --dataset Pubmed --epochs 1000 --runs 2 \ 49 | --hiddim 512 --testbs 8192 --maskinput --num_id 30 --tailact --device 0 50 | ``` 51 | 52 | collab 53 | ``` 54 | python ID_pretrain.py --xdp 0.25 --tdp 0.05 --gnnedp 0.25 --preedp 0.0 --predp 0.3 --gnndp 0.1 --gnnlr 0.001 --prelr 0.001 --batch_size 65536 --ln --lnnn --predictor cn1 --dataset collab --epochs 150 --runs 1 --model gcn --hiddim 256 --mplayers 5 --testbs 131072 --maskinput --use_valedges_as_input --jk --device 4 --tailact 55 | 56 | python ID_MLP.py --preedp 0.0 --predp 0.0 --prelr 0.001 --batch_size 40000 --ln --lnnn --predictor cn1 --dataset collab --epochs 200 --runs 1 --model gcn --hiddim 256 --testbs 131072 --maskinput --use_valedges_as_input --device 4 --num_id 15 --tailact 57 | ``` 58 | 59 | 60 | ppa 61 | ``` 62 | python ID_pretrain.py --xdp 0.0 --tdp 0.0 --gnnedp 0.1 --preedp 0.0 --predp 0.1 --gnndp 0.0 --gnnlr 0.001 --prelr 0.001 --batch_size 16384 --ln --lnnn --predictor cn1 --dataset ppa --epochs 60 --runs 1 --model gcn --hiddim 64 --mplayers 5 --maskinput --tailact --testbs 65536 --device 7 --res 63 | 64 | python ID_MLP.py --preedp 0.0 --predp 0.1 \ 65 | --prelr 0.005 --batch_size 56384 --ln --lnnn --predictor cn1 --dataset ppa \ 66 | --epochs 100 --runs 1 --model gcn --hiddim 256 --maskinput --tailact \ 67 | --testbs 65536 --device 7 --num_id 15 68 | ``` 69 | -------------------------------------------------------------------------------- /SL/Link_Prediction/ogbdataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from sklearn.metrics import roc_auc_score, average_precision_score 3 | from ogb.linkproppred import PygLinkPropPredDataset 4 | import torch_geometric.transforms as T 5 | from torch_sparse import SparseTensor 6 | from torch_geometric.datasets import Planetoid 7 | from torch_geometric.utils import train_test_split_edges, negative_sampling, to_undirected 8 | from torch_geometric.transforms import RandomLinkSplit 9 | 10 | # random split dataset 11 | def randomsplit(dataset, val_ratio: float=0.10, test_ratio: float=0.2): 12 | def removerepeated(ei): 13 | ei = to_undirected(ei) 14 | ei = ei[:, ei[0]= 0 and run < len(self.results) 11 | self.results[run].append(result) 12 | 13 | def print_statistics(self, run=None, mode='max_acc'): 14 | if run is not None: 15 | result = 100 * torch.tensor(self.results[run]) 16 | argmax = result[:, 1].argmax().item() 17 | argmin = result[:, 3].argmin().item() 18 | if mode == 'max_acc': 19 | ind = argmax 20 | else: 21 | ind = argmin 22 | 23 | print_str=f'Run {run + 1:02d}:'+\ 24 | f'Highest Train: {result[:, 0].max():.2f} '+\ 25 | f'Highest Valid: {result[:, 1].max():.2f} '+\ 26 | f'Highest Test: {result[:, 2].max():.2f} '+\ 27 | f'Chosen epoch: {ind+1}\n'+\ 28 | f'Final Train: {result[ind, 0]:.2f} '+\ 29 | f'Final Test: {result[ind, 2]:.2f}' 30 | print(print_str) 31 | 32 | else: 33 | best_results = [] 34 | max_val_epoch=0 35 | for r in self.results: 36 | r=100*torch.tensor(r) 37 | train1 = r[:, 0].max().item() 38 | test1 = r[:, 2].max().item() 39 | valid = r[:, 1].max().item() 40 | if mode == 'max_acc': 41 | train2 = r[r[:, 1].argmax(), 0].item() 42 | test2 = r[r[:, 1].argmax(), 2].item() 43 | max_val_epoch=r[:, 1].argmax() 44 | else: 45 | train2 = r[r[:, 3].argmin(), 0].item() 46 | test2 = r[r[:, 3].argmin(), 2].item() 47 | best_results.append((train1, test1, valid, train2, test2)) 48 | 49 | best_result = torch.tensor(best_results) 50 | 51 | print_str=f'{len(self.results)} runs: ' 52 | r = best_result[:, 0] 53 | print_str+=f'Highest Train: {r.mean():.2f} ± {r.std():.2f} ' 54 | print_str+=f'Highest val epoch:{max_val_epoch}\n' 55 | r = best_result[:, 1] 56 | print_str+=f'Highest Test: {r.mean():.2f} ± {r.std():.2f} ' 57 | r = best_result[:, 4] 58 | print_str+=f'Final Test: {r.mean():.2f} ± {r.std():.2f}' 59 | 60 | self.test=r.mean() 61 | return print_str 62 | 63 | def output(self,out_path,info): 64 | with open(out_path,'a') as f: 65 | f.write(info) 66 | f.write(f'test acc:{self.test}\n') 67 | 68 | -------------------------------------------------------------------------------- /SL/Node_Classification/cora_citeseer_pubmed_analysis/parse.py: -------------------------------------------------------------------------------- 1 | from models import * 2 | 3 | 4 | def parse_method(method, args, c, d, device): 5 | if method == 'gcn': 6 | model = GCN(in_channels=d, 7 | hidden_channels=args.hidden_channels, 8 | out_channels=c, 9 | num_layers=args.num_layers, 10 | dropout=args.dropout, 11 | use_bn=args.use_bn, num_codes=args.num_codes, kmeans=args.kmeans).to(device) 12 | else: 13 | raise ValueError(f'Invalid method {method}') 14 | return model 15 | 16 | 17 | def parser_add_main_args(parser): 18 | # dataset and evaluation 19 | parser.add_argument('--data_dir', type=str, default='./data/') 20 | parser.add_argument('--dataset', type=str, default='cora') 21 | parser.add_argument('--device', type=int, default=0, 22 | help='which gpu to use if any (default: 0)') 23 | parser.add_argument('--seed', type=int, default=42) 24 | parser.add_argument('--cpu', action='store_true') 25 | parser.add_argument('--epochs', type=int, default=500) 26 | parser.add_argument('--runs', type=int, default=1, 27 | help='number of distinct runs') 28 | parser.add_argument('--train_prop', type=float, default=.6, 29 | help='training label proportion') 30 | parser.add_argument('--valid_prop', type=float, default=.2, 31 | help='validation label proportion') 32 | parser.add_argument('--protocol', type=str, default='semi', 33 | help='protocol for cora datasets, semi or supervised') 34 | parser.add_argument('--rand_split', action='store_true', 35 | help='use random splits') 36 | parser.add_argument('--rand_split_class', action='store_true', 37 | help='use random splits with a fixed number of labeled nodes for each class') 38 | parser.add_argument('--label_num_per_class', type=int, default=20, 39 | help='labeled nodes per class(randomly selected)') 40 | parser.add_argument('--valid_num', type=int, default=500, 41 | help='Total number of validation') 42 | parser.add_argument('--test_num', type=int, default=500, 43 | help='Total number of test') 44 | 45 | parser.add_argument('--eval_step', type=int, 46 | default=1, help='how often to evaluate') 47 | parser.add_argument('--batch_size', type=int, default=100000, help='mini batch training for large graphs') 48 | parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc', 'f1'], 49 | help='evaluation metric') 50 | parser.add_argument('--kmeans', type=int, default=0) 51 | parser.add_argument('--k', type=int, default=0) 52 | parser.add_argument('--num_id', type=int, default=9) 53 | parser.add_argument('--norm_type', type=str, default='none') 54 | parser.add_argument('--num_codes', type=int, default=16) 55 | 56 | # model 57 | parser.add_argument('--method', type=str, default='gcn') 58 | parser.add_argument('--hidden_channels', type=int, default=32) 59 | parser.add_argument('--num_layers', type=int, default=2, 60 | help='number of layers for deep methods') 61 | parser.add_argument('--use_bn', action='store_true', help='use layernorm') 62 | parser.add_argument('--use_residual', action='store_true', 63 | help='use residual link for each GNN layer') 64 | parser.add_argument('--use_weight', action='store_true', 65 | help='use weight for GNN convolution') 66 | parser.add_argument('--use_init', action='store_true', help='use initial feat for each GNN layer') 67 | parser.add_argument('--use_act', action='store_true', help='use activation for each GNN layer') 68 | parser.add_argument('--patience', type=int, default=200, 69 | help='early stopping patience.') 70 | 71 | # training 72 | parser.add_argument('--lr', type=float, default=0.01) 73 | parser.add_argument('--weight_decay', type=float, default=0) 74 | parser.add_argument('--dropout', type=float, default=0.5) 75 | 76 | # display and utility 77 | parser.add_argument('--display_step', type=int, 78 | default=50, help='how often to print') 79 | 80 | parser.add_argument('--no_feat_norm', action='store_true', 81 | help='Not use feature normalization.') 82 | -------------------------------------------------------------------------------- /SL/Node_Classification/cora_citeseer_pubmed_analysis/run.sh: -------------------------------------------------------------------------------- 1 | # Cora 2 | python main.py --dataset cora --lr 0.01 --num_layers 4 \ 3 | --hidden_channels 128 --weight_decay 5e-4 --dropout 0.0 \ 4 | --method gcn \ 5 | --rand_split --no_feat_norm \ 6 | --seed 123 --device 2 --runs 1 --num_codes 6 --epoch 1000 --kmeans 1 7 | 8 | python ID_MLP.py --dataset cora --lr 0.001 --num_layers 5 \ 9 | --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \ 10 | --rand_split --no_feat_norm \ 11 | --seed 123 --device 2 --runs 5 --num_id 12 --k 0 --epoch 1000 12 | 13 | # Citeseer 14 | python main.py --dataset citeseer --lr 0.01 --num_layers 2 \ 15 | --hidden_channels 128 --weight_decay 0.01 --dropout 0.0 \ 16 | --method gcn \ 17 | --rand_split --no_feat_norm \ 18 | --seed 123 --device 4 --runs 1 --num_codes 8 --epoch 1000 --kmeans 1 19 | 20 | python ID_MLP.py --dataset citeseer --lr 0.001 --num_layers 5 \ 21 | --hidden_channels 256 --weight_decay 0.01 --dropout 0.5 \ 22 | --rand_split --no_feat_norm \ 23 | --seed 123 --device 4 --runs 5 --num_id 6 --k 0 --epoch 1000 24 | 25 | # Pubmed 26 | python main.py --dataset pubmed --lr 0.005 --num_layers 2 \ 27 | --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \ 28 | --method gcn \ 29 | --rand_split --no_feat_norm \ 30 | --seed 123 --device 3 --runs 1 --kmeans 1 --num_codes 16 --epoch 1000 31 | 32 | python ID_MLP.py --dataset pubmed --lr 0.005 --num_layers 5 \ 33 | --hidden_channels 256 --weight_decay 5e-4 --dropout 0.5 \ 34 | --rand_split --no_feat_norm \ 35 | --seed 123 --device 3 --runs 5 --num_id 6 --k 0 --epoch 1000 36 | 37 | -------------------------------------------------------------------------------- /SL/Node_Classification/data/amazon-computer_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/amazon-computer_split.npz -------------------------------------------------------------------------------- /SL/Node_Classification/data/amazon-photo_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/amazon-photo_split.npz -------------------------------------------------------------------------------- /SL/Node_Classification/data/coauthor-cs_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/coauthor-cs_split.npz -------------------------------------------------------------------------------- /SL/Node_Classification/data/coauthor-physics_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/data/coauthor-physics_split.npz -------------------------------------------------------------------------------- /SL/Node_Classification/eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import numpy as np 4 | 5 | @torch.no_grad() 6 | def evaluate(model, dataset, split_idx, eval_func, criterion, args, result=None): 7 | if result is not None: 8 | out = result 9 | else: 10 | model.eval() 11 | out, total_commit_loss, id_list_concat, gnn_id = model(dataset.graph['node_feat'], dataset.graph['edge_index']) 12 | id_list_concat = id_list_concat.detach().cpu().numpy() 13 | 14 | train_acc = eval_func( 15 | dataset.label[split_idx['train']], out[split_idx['train']]) 16 | valid_acc = eval_func( 17 | dataset.label[split_idx['valid']], out[split_idx['valid']]) 18 | test_acc = eval_func( 19 | dataset.label[split_idx['test']], out[split_idx['test']]) 20 | 21 | if args.dataset in ('questions'): 22 | if dataset.label.shape[1] == 1: 23 | true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1) 24 | else: 25 | true_label = dataset.label 26 | valid_loss = criterion(out[split_idx['valid']], true_label.squeeze(1)[ 27 | split_idx['valid']].to(torch.float)) 28 | else: 29 | out = F.log_softmax(out, dim=1) 30 | valid_loss = criterion( 31 | out[split_idx['valid']], dataset.label.squeeze(1)[split_idx['valid']]) 32 | 33 | return train_acc, valid_acc, test_acc, valid_loss, out, id_list_concat 34 | 35 | @torch.no_grad() 36 | def evaluate_cpu(model, dataset, split_idx, eval_func, criterion, args, device, result=None): 37 | if result is not None: 38 | out = result 39 | else: 40 | model.eval() 41 | 42 | model.to(torch.device("cpu")) 43 | dataset.label = dataset.label.to(torch.device("cpu")) 44 | edge_index, x = dataset.graph['edge_index'], dataset.graph['node_feat'] 45 | out, total_commit_loss, id_list_concat, gnn_id = model(x, edge_index) 46 | id_list_concat = id_list_concat.detach().cpu().numpy() 47 | train_acc = eval_func( 48 | dataset.label[split_idx['train']], out[split_idx['train']]) 49 | valid_acc = eval_func( 50 | dataset.label[split_idx['valid']], out[split_idx['valid']]) 51 | test_acc = eval_func( 52 | dataset.label[split_idx['test']], out[split_idx['test']]) 53 | if args.dataset in ('questions'): 54 | if dataset.label.shape[1] == 1: 55 | true_label = F.one_hot(dataset.label, dataset.label.max() + 1).squeeze(1) 56 | else: 57 | true_label = dataset.label 58 | valid_loss = criterion(out[split_idx['valid']], true_label.squeeze(1)[ 59 | split_idx['valid']].to(torch.float)) 60 | else: 61 | out = F.log_softmax(out, dim=1) 62 | valid_loss = criterion( 63 | out[split_idx['valid']], dataset.label.squeeze(1)[split_idx['valid']]) 64 | 65 | return train_acc, valid_acc, test_acc, valid_loss, out, id_list_concat 66 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/arxiv.sh: -------------------------------------------------------------------------------- 1 | python main-arxiv.py --dataset ogbn-arxiv --hidden_channels 256 --epochs 1000 --lr 0.0005 --runs 1 --local_layers 5 --post_bn --device 7 2 | 3 | 4 | python arxiv_ID_MLP.py --lr 0.01 --hidden_channels 256 --num_layers 4 \ 5 | --epochs 2000 --device 7 --dropout 0.5 --num_id 15 --k 0 --norm_type batch --runs 2 6 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/data/pokec/pokec-splits.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SL/Node_Classification/large_graph/data/pokec/pokec-splits.npy -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/lg_parse.py: -------------------------------------------------------------------------------- 1 | from lg_model import GAT 2 | 3 | def parse_method(args, n, c, d, device): 4 | model = GAT(d, args.hidden_channels, c, local_layers=args.local_layers, 5 | in_dropout=args.in_dropout, dropout=args.dropout, 6 | heads=args.num_heads, pre_ln=args.pre_ln, 7 | post_bn=args.post_bn, local_attn=args.local_attn, kmeans=args.kmeans, num_codes=args.num_codes).to(device) 8 | return model 9 | 10 | 11 | def parser_add_main_args(parser): 12 | # dataset and evaluation 13 | parser.add_argument('--dataset', type=str, default='ogbn-arxiv') 14 | parser.add_argument('--data_dir', type=str, default='./data/') 15 | parser.add_argument('--device', type=int, default=0, 16 | help='which gpu to use if any (default: 0)') 17 | parser.add_argument('--seed', type=int, default=42) 18 | parser.add_argument('--cpu', action='store_true') 19 | parser.add_argument('--epochs', type=int, default=0) 20 | parser.add_argument('--batch_size', type=int, default=100000, 21 | help='batch size for mini-batch training') 22 | parser.add_argument('--runs', type=int, default=1, 23 | help='number of distinct runs') 24 | parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc'], 25 | help='evaluation metric') 26 | 27 | # model 28 | parser.add_argument('--method', type=str, default='gat') 29 | parser.add_argument('--hidden_channels', type=int, default=256) 30 | parser.add_argument('--local_layers', type=int, default=7, 31 | help='number of layers for local attention') 32 | parser.add_argument('--num_heads', type=int, default=1, 33 | help='number of heads for attention') 34 | parser.add_argument('--pre_ln', action='store_true') 35 | parser.add_argument('--post_bn', action='store_true') 36 | parser.add_argument('--local_attn', action='store_true') 37 | 38 | # training 39 | parser.add_argument('--lr', type=float, default=0.001) 40 | parser.add_argument('--weight_decay', type=float, default=5e-4) 41 | parser.add_argument('--in_dropout', type=float, default=0.15) 42 | parser.add_argument('--dropout', type=float, default=0.5) 43 | 44 | # display and utility 45 | parser.add_argument('--display_step', type=int, 46 | default=1, help='how often to print') 47 | parser.add_argument('--eval_step', type=int, 48 | default=1, help='how often to evaluate') 49 | parser.add_argument('--eval_epoch', type=int, 50 | default=-1, help='when to evaluate') 51 | parser.add_argument('--save_model', action='store_true', help='whether to save model') 52 | parser.add_argument('--model_dir', type=str, default='./model/', help='where to save model') 53 | parser.add_argument('--save_result', action='store_true', help='whether to save result') 54 | 55 | parser.add_argument('--kmeans', type=int, 56 | default=1) 57 | parser.add_argument('--num_codes', type=int, 58 | default=16) 59 | parser.add_argument('--num_layers', type=int, default=5) 60 | parser.add_argument('--num_id', type=int, default=15) 61 | parser.add_argument('--norm_type', type=str, default='none') 62 | 63 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/logger_ copy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, runs, info=None): 6 | self.info = info 7 | self.results = [[] for _ in range(runs)] 8 | 9 | def add_result(self, run, result): 10 | assert len(result) == 3 11 | assert run >= 0 and run < len(self.results) 12 | self.results[run].append(result) 13 | 14 | def print_statistics(self, run=None): 15 | if run is not None: 16 | result = 100 * torch.tensor(self.results[run]) 17 | argmax = result[:, 1].argmax().item() 18 | print(f'Run {run + 1:02d}:') 19 | print(f'Highest Train: {result[:, 0].max():.2f}') 20 | print(f'Highest Valid: {result[:, 1].max():.2f}') 21 | print(f' Final Train: {result[argmax, 0]:.2f}') 22 | print(f' Final Test: {result[argmax, 2]:.2f}') 23 | else: 24 | result = 100 * torch.tensor(self.results) 25 | 26 | best_results = [] 27 | for r in result: 28 | train1 = r[:, 0].max().item() 29 | valid = r[:, 1].max().item() 30 | train2 = r[r[:, 1].argmax(), 0].item() 31 | test = r[r[:, 1].argmax(), 2].item() 32 | best_results.append((train1, valid, train2, test)) 33 | 34 | best_result = torch.tensor(best_results) 35 | 36 | print(f'All runs:') 37 | r = best_result[:, 0] 38 | print(f'Highest Train: {r.mean():.2f} +- {r.std():.2f}') 39 | r = best_result[:, 1] 40 | print(f'Highest Valid: {r.mean():.2f} +- {r.std():.2f}') 41 | r = best_result[:, 2] 42 | print(f' Final Train: {r.mean():.2f} +- {r.std():.2f}') 43 | r = best_result[:, 3] 44 | print(f' Final Test: {r.mean():.2f} +- {r.std():.2f}') -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/logger_.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, runs, info=None): 6 | self.info = info 7 | self.results = [[] for _ in range(runs)] 8 | 9 | def add_result(self, run, result): 10 | assert len(result) == 3 11 | assert run >= 0 and run < len(self.results) 12 | self.results[run].append(result) 13 | 14 | def print_statistics(self, run=None): 15 | if run is not None: 16 | result = 100 * torch.tensor(self.results[run]) 17 | argmax = result[:, 1].argmax().item() 18 | print(f'Run {run + 1:02d}:') 19 | print(f'Highest Train: {result[:, 0].max():.2f}') 20 | print(f'Highest Valid: {result[:, 1].max():.2f}') 21 | print(f' Final Train: {result[argmax, 0]:.2f}') 22 | print(f' Final Test: {result[argmax, 2]:.2f}') 23 | else: 24 | result = 100 * torch.tensor(self.results) 25 | 26 | best_results = [] 27 | for r in result: 28 | train1 = r[:, 0].max().item() 29 | valid = r[:, 1].max().item() 30 | train2 = r[r[:, 1].argmax(), 0].item() 31 | test = r[r[:, 1].argmax(), 2].item() 32 | best_results.append((train1, valid, train2, test)) 33 | 34 | best_result = torch.tensor(best_results) 35 | 36 | print(f'All runs:') 37 | r = best_result[:, 0] 38 | print(f'Highest Train: {r.mean():.2f} +- {r.std():.2f}') 39 | r = best_result[:, 1] 40 | print(f'Highest Valid: {r.mean():.2f} +- {r.std():.2f}') 41 | r = best_result[:, 2] 42 | print(f' Final Train: {r.mean():.2f} +- {r.std():.2f}') 43 | r = best_result[:, 3] 44 | print(f' Final Test: {r.mean():.2f} +- {r.std():.2f}') -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/main-arxiv.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import random 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch_geometric.utils import to_undirected, remove_self_loops, add_self_loops 8 | 9 | from lg_parse import parse_method, parser_add_main_args 10 | import sys 11 | sys.path.append("../") 12 | from logger import * 13 | from dataset import load_dataset 14 | from data_utils import eval_acc, eval_rocauc, load_fixed_splits 15 | from eval import * 16 | 17 | 18 | def fix_seed(seed=42): 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed(seed) 23 | torch.cuda.manual_seed_all(seed) 24 | torch.backends.cudnn.deterministic = True 25 | torch.backends.cudnn.benchmark = False 26 | 27 | ### Parse args ### 28 | parser = argparse.ArgumentParser(description='Training Pipeline for Node Classification') 29 | parser_add_main_args(parser) 30 | args = parser.parse_args() 31 | print(args) 32 | 33 | fix_seed(args.seed) 34 | 35 | if args.cpu: 36 | device = torch.device("cpu") 37 | else: 38 | device = torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") 39 | 40 | ### Load and preprocess data ### 41 | dataset = load_dataset(args.data_dir, args.dataset) 42 | 43 | if len(dataset.label.shape) == 1: 44 | dataset.label = dataset.label.unsqueeze(1) 45 | dataset.label = dataset.label.to(device) 46 | 47 | split_idx_lst = [dataset.load_fixed_splits() for _ in range(args.runs)] 48 | 49 | ### Basic information of datasets ### 50 | n = dataset.graph['num_nodes'] 51 | e = dataset.graph['edge_index'].shape[1] 52 | c = max(dataset.label.max().item() + 1, dataset.label.shape[1]) 53 | d = dataset.graph['node_feat'].shape[1] 54 | 55 | print(f"dataset {args.dataset} | num nodes {n} | num edge {e} | num node feats {d} | num classes {c}") 56 | 57 | dataset.graph['edge_index'] = to_undirected(dataset.graph['edge_index']) 58 | dataset.graph['edge_index'], _ = remove_self_loops(dataset.graph['edge_index']) 59 | dataset.graph['edge_index'], _ = add_self_loops(dataset.graph['edge_index'], num_nodes=n) 60 | 61 | dataset.graph['edge_index'], dataset.graph['node_feat'] = \ 62 | dataset.graph['edge_index'].to(device), dataset.graph['node_feat'].to(device) 63 | 64 | ### Load method ### 65 | model = parse_method(args, n, c, d, device) 66 | 67 | criterion = nn.NLLLoss() 68 | eval_func = eval_acc 69 | logger = Logger(args.runs, args) 70 | 71 | model.train() 72 | print('MODEL:', model) 73 | 74 | ### Training loop ### 75 | for run in range(args.runs): 76 | split_idx = split_idx_lst[run] 77 | train_idx = split_idx['train'].to(device) 78 | model.reset_parameters() 79 | optimizer = torch.optim.Adam(model.parameters(),weight_decay=args.weight_decay, lr=args.lr) 80 | best_val = float('-inf') 81 | best_test = float('-inf') 82 | if args.save_model: 83 | save_model(args, model, optimizer, run) 84 | 85 | for epoch in range(args.epochs): 86 | model.train() 87 | optimizer.zero_grad() 88 | 89 | out, total_commit_loss, id_list_concat, gnn_id = model(dataset.graph['node_feat'], dataset.graph['edge_index']) 90 | out = F.log_softmax(out, dim=1) 91 | loss = criterion( 92 | out[train_idx], dataset.label.squeeze(1)[train_idx]) 93 | (loss + total_commit_loss).backward() 94 | optimizer.step() 95 | 96 | result = evaluate(model, dataset, split_idx, eval_func, criterion, args) 97 | 98 | logger.add_result(run, result[:-2]) 99 | 100 | if result[1] > best_val: 101 | best_val = result[1] 102 | best_test = result[2] 103 | np.savez(f"semantic_ID_{args.dataset}", result[-1]) 104 | if args.save_model: 105 | save_model(args, model, optimizer, run) 106 | 107 | if epoch % args.display_step == 0: 108 | print(f'Epoch: {epoch:02d}, ' 109 | f'Loss: {loss:.4f}, ' 110 | f'Train: {100 * result[0]:.2f}%, ' 111 | f'Valid: {100 * result[1]:.2f}%, ' 112 | f'Test: {100 * result[2]:.2f}%, ' 113 | f'Best Valid: {100 * best_val:.2f}%, ' 114 | f'Best Test: {100 * best_test:.2f}%') 115 | logger.print_statistics(run) 116 | 117 | results = logger.print_statistics() 118 | ### Save results ### 119 | save_result(args, results) 120 | 121 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/pokec.sh: -------------------------------------------------------------------------------- 1 | python main-batch.py --dataset pokec --hidden_channels 256 --epochs 2000 --batch_size 550000 --lr 0.0005 --runs 1 --local_layers 7 --in_drop 0.0 --dropout 0.2 --weight_decay 0.0 --post_bn --eval_step 9 --eval_epoch 1000 --device 0 2 | 3 | python ID_MLP.py --dataset pokec --lr 0.001 --hidden_channels 256 --num_layers 5 \ 4 | --epochs 2000 --device 1 --dropout 0.5 --num_id 21 --k 0 --norm_type batch --runs 2 --eval_step 9 --eval_epoch 100 5 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/product.sh: -------------------------------------------------------------------------------- 1 | # ogbn-products 2 | python product_pre.py --device 7 --kmeans 1 3 | python product_ID_MLP.py --num_layers 4 --device 7 --num_id 15 --k 0 --lr 0.01 4 | 5 | -------------------------------------------------------------------------------- /SL/Node_Classification/large_graph/protein.sh: -------------------------------------------------------------------------------- 1 | # ogbn-proteins 2 | python -u protein_pre.py --gnum_layers 4 --gdropout 0.5 --device 0 --epoch 1000 --kmeans 1 --num_codes 4 3 | python protein_ID_MLP.py --hidden_channels 512 --lr 0.001 --num_layers 5 --num_id 12 --norm_type batch --device 1 4 | -------------------------------------------------------------------------------- /SL/Node_Classification/logger.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class Logger(object): 4 | """ Adapted from https://github.com/snap-stanford/ogb/ """ 5 | def __init__(self, runs, info=None): 6 | self.info = info 7 | self.results = [[] for _ in range(runs)] 8 | 9 | def add_result(self, run, result): 10 | assert len(result) == 4 11 | assert run >= 0 and run < len(self.results) 12 | self.results[run].append(result) 13 | 14 | def print_statistics(self, run=None, mode='max_acc'): 15 | if run is not None: 16 | result = 100 * torch.tensor(self.results[run]) 17 | argmax = result[:, 1].argmax().item() 18 | argmin = result[:, 3].argmin().item() 19 | if mode == 'max_acc': 20 | ind = argmax 21 | else: 22 | ind = argmin 23 | print(f'Run {run + 1:02d}:') 24 | print(f'Highest Train: {result[:, 0].max():.2f}') 25 | print(f'Highest Valid: {result[:, 1].max():.2f}') 26 | print(f'Highest Test: {result[:, 2].max():.2f}') 27 | print(f'Chosen epoch: {ind}') 28 | print(f'Final Train: {result[ind, 0]:.2f}') 29 | print(f'Final Test: {result[ind, 2]:.2f}') 30 | self.test=result[ind, 2] 31 | else: 32 | result = 100 * torch.tensor(self.results) 33 | 34 | best_results = [] 35 | for r in result: 36 | train1 = r[:, 0].max().item() 37 | test1 = r[:, 2].max().item() 38 | valid = r[:, 1].max().item() 39 | if mode == 'max_acc': 40 | train2 = r[r[:, 1].argmax(), 0].item() 41 | test2 = r[r[:, 1].argmax(), 2].item() 42 | else: 43 | train2 = r[r[:, 3].argmin(), 0].item() 44 | test2 = r[r[:, 3].argmin(), 2].item() 45 | best_results.append((train1, test1, valid, train2, test2)) 46 | 47 | best_result = torch.tensor(best_results) 48 | 49 | print(f'All runs:') 50 | r = best_result[:, 0] 51 | print(f'Highest Train: {r.mean():.2f} ± {r.std():.2f}') 52 | r = best_result[:, 1] 53 | print(f'Highest Test: {r.mean():.2f} ± {r.std():.2f}') 54 | r = best_result[:, 2] 55 | print(f'Highest Valid: {r.mean():.2f} ± {r.std():.2f}') 56 | r = best_result[:, 3] 57 | print(f' Final Train: {r.mean():.2f} ± {r.std():.2f}') 58 | r = best_result[:, 4] 59 | print(f' Final Test: {r.mean():.2f} ± {r.std():.2f}') 60 | 61 | self.test=r.mean() 62 | return best_result[:, 4] 63 | 64 | def output(self,out_path,info): 65 | with open(out_path,'a') as f: 66 | f.write(info) 67 | f.write(f'test acc:{self.test}\n') 68 | 69 | import os 70 | def save_model(args, model, optimizer, run): 71 | if not os.path.exists(f'models/{args.dataset}'): 72 | os.makedirs(f'models/{args.dataset}') 73 | model_path = f'models/{args.dataset}/{args.method}_{run}.pt' 74 | torch.save({'model_state_dict': model.state_dict(), 75 | 'optimizer_state_dict': optimizer.state_dict() 76 | }, model_path) 77 | 78 | def load_model(args, model, optimizer, run): 79 | model_path = f'models/{args.dataset}/{args.method}_{run}.pt' 80 | checkpoint = torch.load(model_path) 81 | model.load_state_dict(checkpoint['model_state_dict']) 82 | optimizer.load_state_dict(checkpoint['optimizer_state_dict']) 83 | 84 | return model, optimizer 85 | 86 | def save_result(args, results): 87 | if not os.path.exists(f'results/{args.dataset}'): 88 | os.makedirs(f'results/{args.dataset}') 89 | filename = f'results/{args.dataset}/{args.method}.csv' 90 | print(f"Saving results to {filename}") 91 | with open(f"{filename}", 'a+') as write_obj: 92 | write_obj.write( 93 | f"{args.method} " + f"{args.dropout} " + f"{args.lr} " + \ 94 | f"{results.mean():.2f} $\pm$ {results.std():.2f} \n") 95 | 96 | -------------------------------------------------------------------------------- /SL/Node_Classification/parse.py: -------------------------------------------------------------------------------- 1 | from model import GNN 2 | 3 | def parse_method(args, n, c, d, device): 4 | if args.method == 'gcn': 5 | model = GNN(d, args.hidden_channels, c, local_layers=args.local_layers, 6 | in_dropout=args.in_dropout, dropout=args.dropout, 7 | heads=args.num_heads, pre_ln=args.pre_ln, kmeans=args.kmeans, num_codes=args.num_codes, gnn='gcn').to(device) 8 | else: 9 | model = GNN(d, args.hidden_channels, c, local_layers=args.local_layers, 10 | in_dropout=args.in_dropout, dropout=args.dropout, 11 | heads=args.num_heads, pre_ln=args.pre_ln, kmeans=args.kmeans, num_codes=args.num_codes, gnn='gat').to(device) 12 | return model 13 | 14 | 15 | def parser_add_main_args(parser): 16 | # dataset and evaluation 17 | parser.add_argument('--dataset', type=str, default='roman-empire') 18 | parser.add_argument('--data_dir', type=str, default='./data/') 19 | parser.add_argument('--device', type=int, default=0, 20 | help='which gpu to use if any (default: 0)') 21 | parser.add_argument('--seed', type=int, default=42) 22 | parser.add_argument('--cpu', action='store_true') 23 | parser.add_argument('--epochs', type=int, default=1000) 24 | parser.add_argument('--runs', type=int, default=1, 25 | help='number of distinct runs') 26 | parser.add_argument('--metric', type=str, default='acc', choices=['acc', 'rocauc'], 27 | help='evaluation metric') 28 | 29 | parser.add_argument('--train_prop', type=float, default=.6, 30 | help='training label proportion') 31 | parser.add_argument('--valid_prop', type=float, default=.2, 32 | help='validation label proportion') 33 | parser.add_argument('--rand_split', action='store_true', 34 | help='use random splits') 35 | parser.add_argument('--rand_split_class', action='store_true', 36 | help='use random splits with a fixed number of labeled nodes for each class') 37 | 38 | parser.add_argument('--label_num_per_class', type=int, default=20, 39 | help='labeled nodes per class(randomly selected)') 40 | parser.add_argument('--valid_num', type=int, default=500, 41 | help='Total number of validation') 42 | parser.add_argument('--test_num', type=int, default=1000, 43 | help='Total number of test') 44 | 45 | # model 46 | parser.add_argument('--method', type=str, default='gat') 47 | parser.add_argument('--hidden_channels', type=int, default=256) 48 | parser.add_argument('--local_layers', type=int, default=7, 49 | help='number of layers for local attention') 50 | parser.add_argument('--num_heads', type=int, default=1, 51 | help='number of heads for attention') 52 | parser.add_argument('--pre_ln', action='store_true') 53 | 54 | # training 55 | parser.add_argument('--lr', type=float, default=0.001) 56 | parser.add_argument('--weight_decay', type=float, default=5e-4) 57 | parser.add_argument('--in_dropout', type=float, default=0.0) 58 | parser.add_argument('--dropout', type=float, default=0.5) 59 | 60 | # display and utility 61 | parser.add_argument('--display_step', type=int, 62 | default=50, help='how often to print') 63 | parser.add_argument('--save_model', action='store_true', help='whether to save model') 64 | parser.add_argument('--model_dir', type=str, default='./model/', help='where to save model') 65 | parser.add_argument('--save_result', action='store_true', help='whether to save result') 66 | 67 | parser.add_argument('--kmeans', type=int, 68 | default=1) 69 | parser.add_argument('--num_codes', type=int, 70 | default=16) 71 | parser.add_argument('--norm_type', type=str, default='none') 72 | 73 | parser.add_argument('--num_layers', type=int, default=3) 74 | parser.add_argument('--k', type=int, default=0) 75 | parser.add_argument('--num_id', type=int, default=15) 76 | -------------------------------------------------------------------------------- /SSL/DGCluster/README.md: -------------------------------------------------------------------------------- 1 | ## Training & Evaluation 2 | sh run.sh -------------------------------------------------------------------------------- /SSL/DGCluster/install.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import subprocess 3 | import sys 4 | 5 | 6 | def install(package, file_path=None): 7 | if file_path is None: 8 | subprocess.check_call([sys.executable, "-m", "pip", "install", package]) 9 | else: 10 | subprocess.check_call([sys.executable, "-m", "pip", "install", package, "-f", file_path]) 11 | 12 | 13 | def format_pytorch_version(version): 14 | return version.split('+')[0] 15 | 16 | 17 | TORCH_version = torch.__version__ 18 | TORCH = format_pytorch_version(TORCH_version) 19 | 20 | 21 | def format_cuda_version(version): 22 | return 'cu' + version.replace('.', '') 23 | 24 | 25 | CUDA_version = torch.version.cuda 26 | CUDA = format_cuda_version(CUDA_version) 27 | 28 | install(f'torch-scatter', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html') 29 | install(f'torch-sparse', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html') 30 | install(f'torch-cluster', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html') 31 | install(f'torch-spline-conv', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html') 32 | install(f'torch-geometric', f'https://pytorch-geometric.com/whl/torch-{TORCH}+{CUDA}.html') 33 | install(f'torchmetrics') 34 | install('ogb') 35 | install('networkx==3.1') 36 | -------------------------------------------------------------------------------- /SSL/DGCluster/plots.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | from matplotlib import rcParams 5 | import copy 6 | import torch 7 | 8 | rcParams['pdf.fonttype'] = 42 9 | rcParams['ps.fonttype'] = 42 10 | 11 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics'] 12 | 13 | dataset_name_map = { 14 | 'cora': "Cora", 15 | 'citeseer': "CiteSeer", 16 | 'pubmed': "PubMed", 17 | 'computers': "Amazon PC", 18 | 'photo': "Amazon Photo", 19 | 'coauthorcs': "Coauthor CS", 20 | 'coauthorphysics': "Coauthor PHY" 21 | } 22 | 23 | markers = { 24 | "cora": "v", 25 | "citeseer": "^", 26 | "pubmed": "<", 27 | "computers": ">", 28 | "photo": "P", 29 | "coauthorcs": "X", 30 | "coauthorphysics": "D", 31 | } 32 | 33 | colors = { 34 | "cora": "r", 35 | "citeseer": "m", 36 | "pubmed": "g", 37 | "computers": "c", 38 | "photo": "b", 39 | "coauthorcs": "y", 40 | "coauthorphysics": "k", 41 | } 42 | 43 | evaluation_keys = ['modularity', 'nmi'] 44 | 45 | results = {dataset: {} for dataset in dataset_names} 46 | for dataset in dataset_names: 47 | for evaluation_key in evaluation_keys: 48 | result_dataset_eval = [] 49 | result_dataset_eval_std = [] 50 | for lam in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]: 51 | results_dataset = [] 52 | for seed in range(10): 53 | path = f"results/results_{dataset}_{lam}_300_gcn_{seed}.pt" 54 | results_dataset.append(torch.load(path)[evaluation_key]) 55 | result_dataset_eval.append(np.mean(results_dataset)) 56 | result_dataset_eval_std.append(np.std(results_dataset)) 57 | results[dataset][evaluation_key] = copy.deepcopy(result_dataset_eval) 58 | results[dataset][evaluation_key + '_std'] = copy.deepcopy(result_dataset_eval_std) 59 | 60 | fig, axes = plt.subplots(nrows=1, ncols=len(evaluation_keys), figsize=(len(evaluation_keys) * 4, 4), sharex=True) 61 | 62 | labelsize = 14 63 | ticksize = 12 64 | markersize = 6 65 | linewidth = 1.5 66 | 67 | xticks = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] 68 | 69 | title_map = { 70 | 'conductance': r'$\mathcal{C}$', 71 | 'modularity': r'$\mathcal{Q}$', 72 | 'nmi': r'NMI', 73 | 'sample_f1_score': r'F1' 74 | } 75 | 76 | for i, key in enumerate(evaluation_keys): 77 | ax = axes[i] 78 | ls = [None] * len(dataset_names) 79 | for i, method_key in enumerate(dataset_names): 80 | ls[i], = ax.plot(xticks, results[method_key][key], label=method_key, marker=markers[method_key], color=colors[method_key]) 81 | ax.fill_between(xticks, np.array(results[method_key][key]) - np.array(results[method_key][key + '_std']), np.array(results[method_key][key]) + np.array(results[method_key][key + '_std']), alpha=0.2, color=colors[method_key]) 82 | 83 | ax.minorticks_off() 84 | ax.set_xticks(xticks) # , [1, 2, 4, 8, 16, 32]) 85 | ax.set_xticklabels(xticks) 86 | ax.set_title(title_map[key], fontsize=labelsize) 87 | ax.tick_params(axis='x', labelsize=ticksize) 88 | ax.tick_params(axis='y', labelsize=ticksize) 89 | ax.grid(True) 90 | 91 | fig.add_subplot(111, frameon=False) 92 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) 93 | plt.xlabel(r'$\lambda$', fontsize=labelsize) 94 | 95 | fig.tight_layout() 96 | fig.subplots_adjust(left=0.035, bottom=0.16, right=0.99, wspace=0.22) 97 | 98 | axes[0].legend(handles=ls, labels=[dataset_name_map[dataset_name] for dataset_name in dataset_names], 99 | loc='upper center', bbox_to_anchor=(1.1, -0.2), fancybox=False, shadow=False, ncol=math.ceil(len(dataset_names) / 2), fontsize=labelsize) 100 | 101 | fig.savefig(f'plots/results_{"_".join(evaluation_keys)}.pdf', bbox_inches='tight') 102 | plt.show() 103 | -------------------------------------------------------------------------------- /SSL/DGCluster/plots_num_clusters.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from matplotlib import rcParams 4 | import copy 5 | import torch 6 | import math 7 | 8 | rcParams['pdf.fonttype'] = 42 9 | rcParams['ps.fonttype'] = 42 10 | 11 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics'] 12 | 13 | dataset_name_map = { 14 | 'cora': "Cora", 15 | 'citeseer': "CiteSeer", 16 | 'pubmed': "PubMed", 17 | 'computers': "Amazon PC", 18 | 'photo': "Amazon Photo", 19 | 'coauthorcs': "Coauthor CS", 20 | 'coauthorphysics': "Coauthor PHY" 21 | } 22 | 23 | markers = { 24 | "cora": "v", 25 | "citeseer": "^", 26 | "pubmed": "<", 27 | "computers": ">", 28 | "photo": "P", 29 | "coauthorcs": "X", 30 | "coauthorphysics": "D", 31 | } 32 | 33 | colors = { 34 | "cora": "r", 35 | "citeseer": "m", 36 | "pubmed": "g", 37 | "computers": "c", 38 | "photo": "b", 39 | "coauthorcs": "y", 40 | "coauthorphysics": "k", 41 | } 42 | 43 | evaluation_keys = ['num_clusters'] 44 | 45 | results = {dataset: {} for dataset in dataset_names} 46 | for dataset in dataset_names: 47 | for evaluation_key in evaluation_keys: 48 | result_dataset_eval = [] 49 | result_dataset_eval_std = [] 50 | for lam in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]: 51 | results_dataset = [] 52 | for seed in range(10): 53 | path = f"results/results_{dataset}_{lam}_300_gcn_{seed}.pt" 54 | results_dataset.append(torch.load(path)[evaluation_key]) 55 | result_dataset_eval.append(np.mean(results_dataset)) 56 | result_dataset_eval_std.append(np.std(results_dataset)) 57 | results[dataset][evaluation_key] = copy.deepcopy(result_dataset_eval) 58 | results[dataset][evaluation_key + '_std'] = copy.deepcopy(result_dataset_eval_std) 59 | 60 | fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 3), sharex=True) 61 | 62 | labelsize = 14 63 | ticksize = 12 64 | markersize = 6 65 | linewidth = 1.5 66 | 67 | xticks = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8] 68 | 69 | ax = axes 70 | ls = [None] * len(dataset_names) 71 | for i, method_key in enumerate(dataset_names): 72 | ls[i], = ax.plot(xticks, results[method_key]['num_clusters'], label=method_key, marker=markers[method_key], color=colors[method_key]) 73 | ax.fill_between(xticks, np.array(results[method_key]['num_clusters']) - np.array(results[method_key]['num_clusters_std']), np.array(results[method_key]['num_clusters']) + np.array(results[method_key]['num_clusters_std']), alpha=0.2, 74 | color=colors[method_key]) 75 | 76 | ax.minorticks_off() 77 | ax.set_xticks(xticks) 78 | ax.set_xticklabels(xticks) 79 | ax.set_ylabel(r'#Communities', fontsize=labelsize) 80 | ax.tick_params(axis='x', labelsize=ticksize) 81 | ax.tick_params(axis='y', labelsize=ticksize) 82 | ax.grid(True) 83 | 84 | fig.add_subplot(111, frameon=False) 85 | plt.tick_params(labelcolor='none', top=False, bottom=False, left=False, right=False) 86 | plt.xlabel(r'$\lambda$', fontsize=labelsize) 87 | 88 | fig.tight_layout() 89 | fig.subplots_adjust(left=0.035, bottom=0.16, right=0.99, wspace=0.22) 90 | 91 | axes.legend(handles=ls, labels=[dataset_name_map[dataset_name] for dataset_name in dataset_names], 92 | loc='upper center', bbox_to_anchor=(0.45, -0.2), fancybox=False, shadow=False, ncol=math.ceil(len(dataset_names) / 2), fontsize=labelsize - 4) 93 | 94 | fig.savefig(f'plots/results_number_clusters.pdf', bbox_inches='tight') 95 | plt.show() 96 | -------------------------------------------------------------------------------- /SSL/DGCluster/print_results.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | dataset_names = ['cora', 'citeseer', 'pubmed', 'computers', 'photo', 'coauthorcs', 'coauthorphysics'] 5 | 6 | # Table 1 printing 7 | print('Table 1 Results') 8 | print('-----------------------------------') 9 | for lam in [0.0, 0.2, 0.8]: 10 | performance = "\\model (\\lambda={}) & ".format(lam) 11 | for dataset_name in dataset_names: 12 | scores_1 = [] 13 | scores_2 = [] 14 | for seed in range(10): 15 | path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt' 16 | res = torch.load(path) 17 | scores_1.append(res['conductance']) 18 | scores_2.append(res['modularity']) 19 | score_1 = np.mean(scores_1) 20 | score_2 = np.mean(scores_2) 21 | performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & " 22 | print(performance[:-2] + '\\\\') 23 | print('-----------------------------------\n') 24 | 25 | # Table 3 printing 26 | print('Table 3 Results') 27 | print('-----------------------------------') 28 | for lam in [0.0, 0.2, 0.8]: 29 | performance = "\\model (\\lambda={}) & ".format(lam) 30 | for dataset_name in dataset_names: 31 | scores_1 = [] 32 | scores_2 = [] 33 | for seed in range(10): 34 | path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt' 35 | res = torch.load(path) 36 | scores_1.append(res['nmi']) 37 | scores_2.append(res['sample_f1_score']) 38 | score_1 = np.mean(scores_1) 39 | score_2 = np.mean(scores_2) 40 | performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & " 41 | print(performance[:-2] + '\\\\') 42 | print('-----------------------------------\n') 43 | 44 | # Table 4 printing 45 | print('Table 4 Results') 46 | print('-----------------------------------') 47 | dataset_name_map = { 48 | 'cora': "Cora", 49 | 'citeseer': "CiteSeer", 50 | 'pubmed': "PubMed", 51 | 'computers': "Amazon PC", 52 | 'photo': "Amazon Photo", 53 | 'coauthorcs': "Coauthor CS", 54 | 'coauthorphysics': "Coauthor PHYSICS" 55 | } 56 | lam = 0.2 57 | all_data = [] 58 | for dataset_name in dataset_names: 59 | row_data = [] 60 | performance = f"\\textsc{{{dataset_name_map[dataset_name]}}} & " 61 | for metric in ['conductance', 'modularity', 'nmi', 'sample_f1_score']: 62 | for base_model in ['gcn', 'gat', 'gin', 'sage']: 63 | scores = [] 64 | for seed in range(10): 65 | path = f'results/results_{dataset_name}_{lam}_300_{base_model}_{seed}.pt' 66 | res = torch.load(path) 67 | scores.append(res[metric]) 68 | score = np.mean(scores) 69 | performance += f"{(score * 100):.1f} & " 70 | row_data.append(score) 71 | print(performance[:-2] + '\\\\') 72 | all_data.append(row_data) 73 | all_data = np.array(all_data).mean(axis=0) 74 | performance = f"\\textsc{{AVERAGE}} & " 75 | for i in range(len(all_data)): 76 | performance += f"{(all_data[i] * 100):.1f} & " 77 | print(performance[:-2] + '\\\\') 78 | print('-----------------------------------\n') 79 | 80 | # Table 5 printing 81 | dataset_names_2 = ['cora', 'citeseer', 'pubmed'] 82 | print('Table 5 Results') 83 | print('-----------------------------------') 84 | for alp in [0.0, 0.5, 1.0]: 85 | performance = f"$\\alpha={alp}$ & " 86 | for dataset_name in dataset_names_2: 87 | scores_1 = [] 88 | scores_2 = [] 89 | for seed in range(10): 90 | if alp == 0.0: 91 | path = f'results/results_{dataset_name}_{lam}_300_gcn_{seed}.pt' 92 | else: 93 | path = f'results/results_{dataset_name}_{lam}_{alp}_300_gcn_{seed}.pt' 94 | res = torch.load(path) 95 | scores_1.append(res['modularity']) 96 | scores_2.append(res['sample_f1_score']) 97 | score_1 = np.mean(scores_1) 98 | score_2 = np.mean(scores_2) 99 | performance += f"{(score_1 * 100):.1f} & {(score_2 * 100):.1f} & " 100 | print(performance[:-2] + '\\\\') 101 | print('-----------------------------------\n') 102 | -------------------------------------------------------------------------------- /SSL/DGCluster/run.sh: -------------------------------------------------------------------------------- 1 | datasets="cora citeseer pubmed computers photo coauthorphysics" 2 | lams="0.8" 3 | seeds="0 1 2 3 4 5 6 7 8 9" 4 | 5 | for dataset in $datasets; do 6 | for lam in $lams; do 7 | for seed in $seeds; do 8 | python main.py --dataset $dataset --lam $lam --seed $seed --device cuda:0 9 | done 10 | done 11 | done -------------------------------------------------------------------------------- /SSL/DGCluster/utils.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | import numpy as np 4 | 5 | from sklearn.metrics.cluster import normalized_mutual_info_score 6 | from sklearn.metrics import f1_score 7 | 8 | import networkx as nx 9 | 10 | 11 | def sample_f1_score(test_data, clusters, num_nodes): 12 | k = 10 13 | res = 0 14 | for i in range(k): 15 | s = random.sample(range(0, num_nodes), 1000) 16 | 17 | mx = max(clusters) 18 | s_clusters = clusters[s] 19 | 20 | MM = np.zeros((len(s_clusters), mx + 1)) 21 | for i in range(len(s_clusters)): 22 | MM[i][s_clusters[i]] = 1 23 | MM = torch.tensor(MM) 24 | MM = torch.matmul(MM, torch.t(MM)).flatten() 25 | 26 | labels = test_data.y.squeeze() 27 | mx = max(labels) 28 | 29 | s_labels = labels[s] 30 | 31 | CM = np.zeros((len(s_labels), mx + 1)) 32 | for i in range(len(s_labels)): 33 | CM[i][s_labels[i]] = 1 34 | CM = torch.tensor(CM) 35 | CM = torch.matmul(CM, torch.t(CM)).flatten() 36 | 37 | x = f1_score(CM, MM) 38 | res = res + x 39 | 40 | return res / k 41 | 42 | 43 | def compute_fast_modularity(clusters, num_nodes, num_edges, torch_sparse_adj, degree, device): 44 | mx = max(clusters) 45 | MM = np.zeros((num_nodes, mx + 1)) 46 | for i in range(len(clusters)): 47 | MM[i][clusters[i]] = 1 48 | MM = torch.tensor(MM).double().to(device) 49 | 50 | x = torch.matmul(torch.t(MM), torch_sparse_adj.double()) 51 | x = torch.matmul(x, MM) 52 | x = torch.trace(x) 53 | 54 | y = torch.matmul(torch.t(MM), degree.double()) 55 | y = torch.matmul(torch.t(y.unsqueeze(dim=0)), y.unsqueeze(dim=0)) 56 | y = torch.trace(y) 57 | y = y / (2 * num_edges) 58 | return ((x - y) / (2 * num_edges)).item() 59 | 60 | 61 | def compute_nmi(clusters, labels): 62 | return normalized_mutual_info_score(clusters, labels) 63 | 64 | 65 | def compute_conductance(clusters, Graph): 66 | comms = [[] for i in range(max(clusters) + 1)] 67 | for i in range(len(clusters)): 68 | comms[clusters[i]].append(i) 69 | conductance=[] 70 | for com in comms: 71 | try: 72 | conductance.append(nx.conductance(Graph, com, weight='weight')) 73 | except: 74 | continue 75 | 76 | return conductance 77 | -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/README.md: -------------------------------------------------------------------------------- 1 | ## Dependencies & Dataset 2 | 3 | Please refer to https://github.com/snap-stanford/pretrain-gnns#installation for environment setup and https://github.com/snap-stanford/pretrain-gnns#dataset-download to download dataset. Download the dataset, place it in `./chem/` and unzip it. 4 | 5 | If you cannot manage to install the old torch-geometric version, one alternative way is to use the new one (maybe ==1.6.0) and make some modifications based on this issue https://github.com/snap-stanford/pretrain-gnns/issues/14. 6 | 7 | ## Training & Evaluation 8 | ### Pre-training: ### 9 | ``` 10 | cd ./chem 11 | python pretrain_graphcl.py --aug1 random --aug2 none 12 | ``` 13 | 14 | ### Node ID prediction: ### 15 | ``` 16 | cd ./chem 17 | ./run.sh 18 | ``` -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/chem/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from torch.utils.data.dataloader import default_collate 3 | 4 | from batch import BatchSubstructContext, BatchMasking, BatchAE 5 | 6 | class DataLoaderSubstructContext(torch.utils.data.DataLoader): 7 | r"""Data loader which merges data objects from a 8 | :class:`torch_geometric.data.dataset` to a mini-batch. 9 | Args: 10 | dataset (Dataset): The dataset from which to load the data. 11 | batch_size (int, optional): How may samples per batch to load. 12 | (default: :obj:`1`) 13 | shuffle (bool, optional): If set to :obj:`True`, the data will be 14 | reshuffled at every epoch (default: :obj:`True`) 15 | """ 16 | 17 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 18 | super(DataLoaderSubstructContext, self).__init__( 19 | dataset, 20 | batch_size, 21 | shuffle, 22 | collate_fn=lambda data_list: BatchSubstructContext.from_data_list(data_list), 23 | **kwargs) 24 | 25 | class DataLoaderMasking(torch.utils.data.DataLoader): 26 | r"""Data loader which merges data objects from a 27 | :class:`torch_geometric.data.dataset` to a mini-batch. 28 | Args: 29 | dataset (Dataset): The dataset from which to load the data. 30 | batch_size (int, optional): How may samples per batch to load. 31 | (default: :obj:`1`) 32 | shuffle (bool, optional): If set to :obj:`True`, the data will be 33 | reshuffled at every epoch (default: :obj:`True`) 34 | """ 35 | 36 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 37 | super(DataLoaderMasking, self).__init__( 38 | dataset, 39 | batch_size, 40 | shuffle, 41 | collate_fn=lambda data_list: BatchMasking.from_data_list(data_list), 42 | **kwargs) 43 | 44 | 45 | class DataLoaderAE(torch.utils.data.DataLoader): 46 | r"""Data loader which merges data objects from a 47 | :class:`torch_geometric.data.dataset` to a mini-batch. 48 | Args: 49 | dataset (Dataset): The dataset from which to load the data. 50 | batch_size (int, optional): How may samples per batch to load. 51 | (default: :obj:`1`) 52 | shuffle (bool, optional): If set to :obj:`True`, the data will be 53 | reshuffled at every epoch (default: :obj:`True`) 54 | """ 55 | 56 | def __init__(self, dataset, batch_size=1, shuffle=True, **kwargs): 57 | super(DataLoaderAE, self).__init__( 58 | dataset, 59 | batch_size, 60 | shuffle, 61 | collate_fn=lambda data_list: BatchAE.from_data_list(data_list), 62 | **kwargs) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/chem/finetune.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | split=scaffold 3 | dataset=$1 4 | 5 | CUDA_VISIBLE_DEVICES=0 6 | for runseed in 0 1 7 | do 8 | python finetune.py --input_model_file models_graphcl/graphcl_80.pth --split $split --runseed $runseed --gnn_type gin --dataset $dataset --lr 1e-3 --epochs 100 9 | done 10 | -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/chem/parse_result.py: -------------------------------------------------------------------------------- 1 | ### Parsing the result! 2 | import tensorflow as tf 3 | import os 4 | import numpy as np 5 | import pickle 6 | 7 | def get_test_acc(event_file): 8 | val_auc_list = np.zeros(100) 9 | test_auc_list = np.zeros(100) 10 | for e in list(tf.train.summary_iterator(event_file)): 11 | if len(e.summary.value) == 0: 12 | continue 13 | if e.summary.value[0].tag == "data/val_auc": 14 | val_auc_list[e.step-1] = e.summary.value[0].simple_value 15 | if e.summary.value[0].tag == "data/test_auc": 16 | test_auc_list[e.step-1] = e.summary.value[0].simple_value 17 | 18 | best_epoch = np.argmax(val_auc_list) 19 | 20 | return test_auc_list[best_epoch] 21 | 22 | if __name__ == "__main__": 23 | 24 | dataset_list = ["muv", "bace", "bbbp", "clintox", "hiv", "sider", "tox21", "toxcast"] 25 | #10 random seed 26 | seed_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 27 | config_list = [] 28 | 29 | config_list.append("gin_nopretrain") 30 | config_list.append("gin_infomax") 31 | config_list.append("gin_edgepred") 32 | config_list.append("gin_masking") 33 | config_list.append("gin_contextpred") 34 | config_list.append("gin_supervised") 35 | config_list.append("gin_supervised_infomax") 36 | config_list.append("gin_supervised_edgepred") 37 | config_list.append("gin_supervised_masking") 38 | config_list.append("gin_supervised_contextpred") 39 | config_list.append("gcn_nopretrain") 40 | config_list.append("gcn_supervised_contextpred") 41 | config_list.append("graphsage_nopretrain") 42 | config_list.append("graphsage_supervised_contextpred") 43 | config_list.append("gat_nopretrain") 44 | config_list.append("gat_supervised_contextpred") 45 | 46 | result_mat = np.zeros((len(seed_list), len(config_list), len(dataset_list))) 47 | 48 | for i, seed in enumerate(seed_list): 49 | for j, config in enumerate(config_list): 50 | for k, dataset in enumerate(dataset_list): 51 | dir_name = "runs/finetune_cls_runseed" + str(seed) + "/" + dataset + "/" + config 52 | print(dir_name) 53 | file_in_dir = os.listdir(dir_name) 54 | event_file_list = [] 55 | for f in file_in_dir: 56 | if "events" in f: 57 | event_file_list.append(f) 58 | 59 | event_file = event_file_list[0] 60 | 61 | result_mat[i, j, k] = get_test_acc(dir_name + "/" + event_file) 62 | 63 | with open("result_summary", "wb") as f: 64 | pickle.dump({"result_mat": result_mat, "seed_list": seed_list, "config_list": config_list, "dataset_list": dataset_list}, f) 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/chem/run.sh: -------------------------------------------------------------------------------- 1 | #### GIN fine-tuning 2 | 3 | ./finetune.sh bace > log_bace_2 4 | ./finetune.sh bbbp > log_bbbp_2 5 | ./finetune.sh clintox > log_clintox_2 6 | ./finetune.sh sider > log_sider_2 7 | ./finetune.sh tox21 > log_tox21_2 8 | ./finetune.sh toxcast > log_toxcast_2 9 | ./finetune.sh hiv > log_hiv_2 10 | ./finetune.sh muv > log_muv_2 11 | 12 | 13 | -------------------------------------------------------------------------------- /SSL/GraphCL/transferLearning_MoleculeNet/environment.yml: -------------------------------------------------------------------------------- 1 | name: pregnn 2 | channels: 3 | - rdkit 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - blas=1.0=mkl 8 | - bzip2=1.0.8=h7b6447c_0 9 | - ca-certificates=2020.1.1=0 10 | - cairo=1.14.12=h8948797_3 11 | - certifi=2020.4.5.1=py36_0 12 | - fontconfig=2.13.0=h9420a91_0 13 | - freetype=2.9.1=h8a8886c_1 14 | - glib=2.63.1=h5a9c865_0 15 | - icu=58.2=he6710b0_3 16 | - intel-openmp=2020.0=166 17 | - jpeg=9b=h024ee3a_2 18 | - libboost=1.67.0=h46d08c1_4 19 | - libedit=3.1=heed3624_0 20 | - libffi=3.2.1=hd88cf55_4 21 | - libgcc-ng=9.1.0=hdf63c60_0 22 | - libgfortran-ng=7.3.0=hdf63c60_0 23 | - libpng=1.6.37=hbc83047_0 24 | - libstdcxx-ng=9.1.0=hdf63c60_0 25 | - libtiff=4.1.0=h2733197_0 26 | - libuuid=1.0.3=h1bed415_2 27 | - libxcb=1.13=h1bed415_1 28 | - libxml2=2.9.9=hea5a465_1 29 | - mkl=2020.0=166 30 | - mkl-service=2.3.0=py36he904b0f_0 31 | - mkl_fft=1.0.15=py36ha843d7b_0 32 | - mkl_random=1.1.0=py36hd6b4f25_0 33 | - ncurses=6.0=0 34 | - numpy=1.18.1=py36h4f9e942_0 35 | - numpy-base=1.18.1=py36hde5b4d6_1 36 | - olefile=0.46=py_0 37 | - openssl=1.0.2u=h7b6447c_0 38 | - pandas=1.0.3=py36h0573a6f_0 39 | - pcre=8.43=he6710b0_0 40 | - pillow=7.1.2=py36hb39fc2d_0 41 | - pip=20.0.2=py36_1 42 | - pixman=0.38.0=h7b6447c_0 43 | - py-boost=1.67.0=py36h04863e7_4 44 | - python=3.6.5=hc3d631a_2 45 | - python-dateutil=2.8.1=py_0 46 | - pytz=2020.1=py_0 47 | - rdkit=2019.03.1.0=py36hc20afe1_1 48 | - readline=7.0=ha6073c6_4 49 | - setuptools=46.1.3=py36_0 50 | - six=1.14.0=py36_0 51 | - sqlite=3.23.1=he433501_0 52 | - tk=8.6.8=hbc83047_0 53 | - wheel=0.34.2=py36_0 54 | - xz=5.2.5=h7b6447c_0 55 | - zlib=1.2.11=h7b6447c_3 56 | - zstd=1.3.7=h0b5b093_0 57 | - pip: 58 | - decorator==4.4.2 59 | - isodate==0.6.0 60 | - joblib==0.14.1 61 | - networkx==2.4 62 | - plyfile==0.7.2 63 | - protobuf==3.11.3 64 | - pyparsing==2.4.7 65 | - rdflib==5.0.0 66 | - scikit-learn==0.22.2.post1 67 | - scipy==1.4.1 68 | - sklearn==0.0 69 | - tensorboardx==2.0 70 | - torch==1.0.1 71 | - torch-cluster==1.2.4 72 | - torch-geometric==1.0.3 73 | - torch-scatter==1.1.2 74 | - torch-sparse==0.2.4 75 | - torch-spline-conv==1.0.6 76 | - tqdm==4.46.0 77 | 78 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/README.md: -------------------------------------------------------------------------------- 1 | ## Training & Evaluation 2 | ./go.sh 0 NCI1 random2 3 | ./go.sh 1 PROTEINS random2 4 | ./go.sh 2 DD random2 5 | ./go.sh 3 MUTAG random2 6 | ./go.sh 4 COLLAB random2 7 | ./go.sh 5 IMDB-BINARY random2 8 | ./go.sh 6 REDDIT-BINARY random2 9 | ./go.sh 7 REDDIT-MULTI-5K random2 -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/arguments.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def arg_parse(): 4 | parser = argparse.ArgumentParser(description='GcnInformax Arguments.') 5 | parser.add_argument('--DS', dest='DS', help='Dataset') 6 | parser.add_argument('--local', dest='local', action='store_const', 7 | const=True, default=False) 8 | parser.add_argument('--glob', dest='glob', action='store_const', 9 | const=True, default=False) 10 | parser.add_argument('--prior', dest='prior', action='store_const', 11 | const=True, default=False) 12 | 13 | parser.add_argument('--lr', dest='lr', type=float, 14 | help='Learning rate.') 15 | parser.add_argument('--num-gc-layers', dest='num_gc_layers', type=int, default=5, 16 | help='Number of graph convolution layers before each pooling') 17 | parser.add_argument('--hidden-dim', dest='hidden_dim', type=int, default=32, 18 | help='') 19 | 20 | parser.add_argument('--aug', type=str, default='dnodes') 21 | parser.add_argument('--seed', type=int, default=0) 22 | parser.add_argument('--num_code', type=int, default=16) 23 | 24 | return parser.parse_args() 25 | 26 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/configs/convnets.py: -------------------------------------------------------------------------------- 1 | '''Basic convnet hyperparameters. 2 | 3 | conv_args are in format (dim_h, f_size, stride, pad batch_norm, dropout, nonlinearity, pool) 4 | fc_args are in format (dim_h, batch_norm, dropout, nonlinearity) 5 | 6 | ''' 7 | 8 | from cortex_DIM.nn_modules.encoder import ConvnetEncoder, FoldedConvnetEncoder 9 | 10 | 11 | # Basic DCGAN-like encoders 12 | 13 | _basic28x28 = dict( 14 | Encoder=ConvnetEncoder, 15 | conv_args=[(64, 5, 2, 2, True, False, 'ReLU', None), 16 | (128, 5, 2, 2, True, False, 'ReLU', None)], 17 | fc_args=[(1024, True, False, 'ReLU', None)], 18 | local_idx=1, 19 | fc_idx=0 20 | ) 21 | 22 | _basic32x32 = dict( 23 | Encoder=ConvnetEncoder, 24 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None), 25 | (128, 4, 2, 1, True, False, 'ReLU', None), 26 | (256, 4, 2, 1, True, False, 'ReLU', None)], 27 | fc_args=[(1024, True, False, 'ReLU')], 28 | local_idx=1, 29 | conv_idx=2, 30 | fc_idx=0 31 | ) 32 | 33 | _basic64x64 = dict( 34 | Encoder=ConvnetEncoder, 35 | conv_args=[(64, 4, 2, 1, True, False, 'ReLU', None), 36 | (128, 4, 2, 1, True, False, 'ReLU', None), 37 | (256, 4, 2, 1, True, False, 'ReLU', None), 38 | (512, 4, 2, 1, True, False, 'ReLU', None)], 39 | fc_args=[(1024, True, False, 'ReLU')], 40 | local_idx=2, 41 | conv_idx=3, 42 | fc_idx=0 43 | ) 44 | 45 | # Alexnet-like encoders 46 | 47 | _alex64x64 = dict( 48 | Encoder=ConvnetEncoder, 49 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 50 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 51 | (384, 3, 1, 1, True, False, 'ReLU', None), 52 | (384, 3, 1, 1, True, False, 'ReLU', None), 53 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))], 54 | fc_args=[(4096, True, False, 'ReLU'), 55 | (4096, True, False, 'ReLU')], 56 | local_idx=2, 57 | conv_idx=4, 58 | fc_idx=1 59 | ) 60 | 61 | _foldalex64x64 = dict( 62 | Encoder=FoldedConvnetEncoder, 63 | crop_size=16, 64 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 65 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 66 | (384, 3, 1, 1, True, False, 'ReLU', None), 67 | (384, 3, 1, 1, True, False, 'ReLU', None), 68 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2))], 69 | fc_args=[(4096, True, False, 'ReLU'), 70 | (4096, True, False, 'ReLU')], 71 | local_idx=4, 72 | fc_idx=1 73 | ) 74 | 75 | _foldmultialex64x64 = dict( 76 | Encoder=FoldedConvnetEncoder, 77 | crop_size=16, 78 | conv_args=[(96, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 79 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 80 | (384, 3, 1, 1, True, False, 'ReLU', None), 81 | (384, 3, 1, 1, True, False, 'ReLU', None), 82 | (192, 3, 1, 1, True, False, 'ReLU', ('MaxPool2d', 3, 2)), 83 | (192, 3, 1, 0, True, False, 'ReLU', None), 84 | (192, 1, 1, 0, True, False, 'ReLU', None)], 85 | fc_args=[(4096, True, False, 'ReLU')], 86 | local_idx=4, 87 | multi_idx=6, 88 | fc_idx=1 89 | ) 90 | 91 | configs = dict( 92 | basic28x28=_basic28x28, 93 | basic32x32=_basic32x32, 94 | basic64x64=_basic64x64, 95 | alex64x64=_alex64x64, 96 | foldalex64x64=_foldalex64x64, 97 | foldmultialex64x64=_foldmultialex64x64 98 | ) -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/functions/gan_losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | """ 4 | 5 | import math 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | from cortex_DIM.functions.misc import log_sum_exp 11 | 12 | 13 | def raise_measure_error(measure): 14 | supported_measures = ['GAN', 'JSD', 'X2', 'KL', 'RKL', 'DV', 'H2', 'W1'] 15 | raise NotImplementedError( 16 | 'Measure `{}` not supported. Supported: {}'.format(measure, 17 | supported_measures)) 18 | 19 | 20 | def get_positive_expectation(p_samples, measure, average=True): 21 | """Computes the positive part of a divergence / difference. 22 | 23 | Args: 24 | p_samples: Positive samples. 25 | measure: Measure to compute for. 26 | average: Average the result over samples. 27 | 28 | Returns: 29 | torch.Tensor 30 | 31 | """ 32 | log_2 = math.log(2.) 33 | 34 | if measure == 'GAN': 35 | Ep = - F.softplus(-p_samples) 36 | elif measure == 'JSD': 37 | Ep = log_2 - F.softplus(- p_samples) 38 | elif measure == 'X2': 39 | Ep = p_samples ** 2 40 | elif measure == 'KL': 41 | Ep = p_samples + 1. 42 | elif measure == 'RKL': 43 | Ep = -torch.exp(-p_samples) 44 | elif measure == 'DV': 45 | Ep = p_samples 46 | elif measure == 'H2': 47 | Ep = 1. - torch.exp(-p_samples) 48 | elif measure == 'W1': 49 | Ep = p_samples 50 | else: 51 | raise_measure_error(measure) 52 | 53 | if average: 54 | return Ep.mean() 55 | else: 56 | return Ep 57 | 58 | 59 | def get_negative_expectation(q_samples, measure, average=True): 60 | """Computes the negative part of a divergence / difference. 61 | 62 | Args: 63 | q_samples: Negative samples. 64 | measure: Measure to compute for. 65 | average: Average the result over samples. 66 | 67 | Returns: 68 | torch.Tensor 69 | 70 | """ 71 | log_2 = math.log(2.) 72 | 73 | if measure == 'GAN': 74 | Eq = F.softplus(-q_samples) + q_samples 75 | elif measure == 'JSD': 76 | Eq = F.softplus(-q_samples) + q_samples - log_2 77 | elif measure == 'X2': 78 | Eq = -0.5 * ((torch.sqrt(q_samples ** 2) + 1.) ** 2) 79 | elif measure == 'KL': 80 | Eq = torch.exp(q_samples) 81 | elif measure == 'RKL': 82 | Eq = q_samples - 1. 83 | elif measure == 'DV': 84 | Eq = log_sum_exp(q_samples, 0) - math.log(q_samples.size(0)) 85 | elif measure == 'H2': 86 | Eq = torch.exp(q_samples) - 1. 87 | elif measure == 'W1': 88 | Eq = q_samples 89 | else: 90 | raise_measure_error(measure) 91 | 92 | if average: 93 | return Eq.mean() 94 | else: 95 | return Eq -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/functions/misc.py: -------------------------------------------------------------------------------- 1 | """Miscilaneous functions. 2 | 3 | """ 4 | 5 | import torch 6 | 7 | 8 | def log_sum_exp(x, axis=None): 9 | """Log sum exp function 10 | 11 | Args: 12 | x: Input. 13 | axis: Axis over which to perform sum. 14 | 15 | Returns: 16 | torch.Tensor: log sum exp 17 | 18 | """ 19 | x_max = torch.max(x, axis)[0] 20 | y = torch.log((torch.exp(x - x_max)).sum(axis)) + x_max 21 | return y 22 | 23 | 24 | def random_permute(X): 25 | """Randomly permutes a tensor. 26 | 27 | Args: 28 | X: Input tensor. 29 | 30 | Returns: 31 | torch.Tensor 32 | 33 | """ 34 | X = X.transpose(1, 2) 35 | b = torch.rand((X.size(0), X.size(1))).cuda() 36 | idx = b.sort(0)[1] 37 | adx = torch.range(0, X.size(1) - 1).long() 38 | X = X[idx, adx[None, :]].transpose(1, 2) 39 | return X 40 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/encoder.py: -------------------------------------------------------------------------------- 1 | '''Basic cortex_DIM encoder. 2 | 3 | ''' 4 | 5 | import torch 6 | 7 | from cortex_DIM.nn_modules.convnet import Convnet, FoldedConvnet 8 | from cortex_DIM.nn_modules.resnet import ResNet, FoldedResNet 9 | 10 | 11 | def create_encoder(Module): 12 | class Encoder(Module): 13 | '''Encoder used for cortex_DIM. 14 | 15 | ''' 16 | 17 | def __init__(self, *args, local_idx=None, multi_idx=None, conv_idx=None, fc_idx=None, **kwargs): 18 | ''' 19 | 20 | Args: 21 | args: Arguments for parent class. 22 | local_idx: Index in list of convolutional layers for local features. 23 | multi_idx: Index in list of convolutional layers for multiple globals. 24 | conv_idx: Index in list of convolutional layers for intermediate features. 25 | fc_idx: Index in list of fully-connected layers for intermediate features. 26 | kwargs: Keyword arguments for the parent class. 27 | ''' 28 | 29 | super().__init__(*args, **kwargs) 30 | 31 | if local_idx is None: 32 | raise ValueError('`local_idx` must be set') 33 | 34 | conv_idx = conv_idx or local_idx 35 | 36 | self.local_idx = local_idx 37 | self.multi_idx = multi_idx 38 | self.conv_idx = conv_idx 39 | self.fc_idx = fc_idx 40 | 41 | def forward(self, x: torch.Tensor): 42 | ''' 43 | 44 | Args: 45 | x: Input tensor. 46 | 47 | Returns: 48 | local_out, multi_out, hidden_out, global_out 49 | 50 | ''' 51 | 52 | outs = super().forward(x, return_full_list=True) 53 | if len(outs) == 2: 54 | conv_out, fc_out = outs 55 | else: 56 | conv_before_out, res_out, conv_after_out, fc_out = outs 57 | conv_out = conv_before_out + res_out + conv_after_out 58 | 59 | local_out = conv_out[self.local_idx] 60 | 61 | if self.multi_idx is not None: 62 | multi_out = conv_out[self.multi_idx] 63 | else: 64 | multi_out = None 65 | 66 | if len(fc_out) > 0: 67 | if self.fc_idx is not None: 68 | hidden_out = fc_out[self.fc_idx] 69 | else: 70 | hidden_out = None 71 | global_out = fc_out[-1] 72 | else: 73 | hidden_out = None 74 | global_out = None 75 | 76 | conv_out = conv_out[self.conv_idx] 77 | 78 | return local_out, conv_out, multi_out, hidden_out, global_out 79 | 80 | return Encoder 81 | 82 | 83 | class ConvnetEncoder(create_encoder(Convnet)): 84 | pass 85 | 86 | 87 | class FoldedConvnetEncoder(create_encoder(FoldedConvnet)): 88 | pass 89 | 90 | 91 | class ResnetEncoder(create_encoder(ResNet)): 92 | pass 93 | 94 | 95 | class FoldedResnetEncoder(create_encoder(FoldedResNet)): 96 | pass 97 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/mi_networks.py: -------------------------------------------------------------------------------- 1 | """Module for networks used for computing MI. 2 | 3 | """ 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | from cortex_DIM.nn_modules.misc import Permute 10 | 11 | 12 | class MIFCNet(nn.Module): 13 | """Simple custom network for computing MI. 14 | 15 | """ 16 | def __init__(self, n_input, n_units): 17 | """ 18 | 19 | Args: 20 | n_input: Number of input units. 21 | n_units: Number of output units. 22 | """ 23 | super().__init__() 24 | 25 | assert(n_units >= n_input) 26 | 27 | self.linear_shortcut = nn.Linear(n_input, n_units) 28 | self.block_nonlinear = nn.Sequential( 29 | nn.Linear(n_input, n_units), 30 | nn.BatchNorm1d(n_units), 31 | nn.ReLU(), 32 | nn.Linear(n_units, n_units) 33 | ) 34 | 35 | # initialize the initial projection to a sort of noisy copy 36 | eye_mask = np.zeros((n_units, n_input), dtype=np.uint8) 37 | for i in range(n_input): 38 | eye_mask[i, i] = 1 39 | 40 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01) 41 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.) 42 | 43 | def forward(self, x): 44 | """ 45 | 46 | Args: 47 | x: Input tensor. 48 | 49 | Returns: 50 | torch.Tensor: network output. 51 | 52 | """ 53 | h = self.block_nonlinear(x) + self.linear_shortcut(x) 54 | return h 55 | 56 | 57 | class MI1x1ConvNet(nn.Module): 58 | """Simple custorm 1x1 convnet. 59 | 60 | """ 61 | def __init__(self, n_input, n_units): 62 | """ 63 | 64 | Args: 65 | n_input: Number of input units. 66 | n_units: Number of output units. 67 | """ 68 | 69 | super().__init__() 70 | 71 | self.block_nonlinear = nn.Sequential( 72 | nn.Conv1d(n_input, n_units, kernel_size=1, stride=1, padding=0, bias=False), 73 | nn.BatchNorm1d(n_units), 74 | nn.ReLU(), 75 | nn.Conv1d(n_units, n_units, kernel_size=1, stride=1, padding=0, bias=True), 76 | ) 77 | 78 | self.block_ln = nn.Sequential( 79 | Permute(0, 2, 1), 80 | nn.LayerNorm(n_units), 81 | Permute(0, 2, 1) 82 | ) 83 | 84 | self.linear_shortcut = nn.Conv1d(n_input, n_units, kernel_size=1, 85 | stride=1, padding=0, bias=False) 86 | 87 | # initialize shortcut to be like identity (if possible) 88 | if n_units >= n_input: 89 | eye_mask = np.zeros((n_units, n_input, 1), dtype=np.uint8) 90 | for i in range(n_input): 91 | eye_mask[i, i, 0] = 1 92 | self.linear_shortcut.weight.data.uniform_(-0.01, 0.01) 93 | self.linear_shortcut.weight.data.masked_fill_(torch.tensor(eye_mask), 1.) 94 | 95 | def forward(self, x): 96 | """ 97 | 98 | Args: 99 | x: Input tensor. 100 | 101 | Returns: 102 | torch.Tensor: network output. 103 | 104 | """ 105 | h = self.block_ln(self.block_nonlinear(x) + self.linear_shortcut(x)) 106 | return h 107 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/cortex_DIM/nn_modules/misc.py: -------------------------------------------------------------------------------- 1 | '''Various miscellaneous modules 2 | 3 | ''' 4 | 5 | import torch 6 | 7 | 8 | class View(torch.nn.Module): 9 | """Basic reshape module. 10 | 11 | """ 12 | def __init__(self, *shape): 13 | """ 14 | 15 | Args: 16 | *shape: Input shape. 17 | """ 18 | super().__init__() 19 | self.shape = shape 20 | 21 | def forward(self, input): 22 | """Reshapes tensor. 23 | 24 | Args: 25 | input: Input tensor. 26 | 27 | Returns: 28 | torch.Tensor: Flattened tensor. 29 | 30 | """ 31 | return input.view(*self.shape) 32 | 33 | 34 | class Unfold(torch.nn.Module): 35 | """Module for unfolding tensor. 36 | 37 | Performs strided crops on 2d (image) tensors. Stride is assumed to be half the crop size. 38 | 39 | """ 40 | def __init__(self, img_size, fold_size): 41 | """ 42 | 43 | Args: 44 | img_size: Input size. 45 | fold_size: Crop size. 46 | """ 47 | super().__init__() 48 | 49 | fold_stride = fold_size // 2 50 | self.fold_size = fold_size 51 | self.fold_stride = fold_stride 52 | self.n_locs = 2 * (img_size // fold_size) - 1 53 | self.unfold = torch.nn.Unfold((self.fold_size, self.fold_size), 54 | stride=(self.fold_stride, self.fold_stride)) 55 | 56 | def forward(self, x): 57 | """Unfolds tensor. 58 | 59 | Args: 60 | x: Input tensor. 61 | 62 | Returns: 63 | torch.Tensor: Unfolded tensor. 64 | 65 | """ 66 | N = x.size(0) 67 | x = self.unfold(x).reshape(N, -1, self.fold_size, self.fold_size, self.n_locs * self.n_locs)\ 68 | .permute(0, 4, 1, 2, 3)\ 69 | .reshape(N * self.n_locs * self.n_locs, -1, self.fold_size, self.fold_size) 70 | return x 71 | 72 | 73 | class Fold(torch.nn.Module): 74 | """Module (re)folding tensor. 75 | 76 | Undoes the strided crops above. Works only on 1x1. 77 | 78 | """ 79 | def __init__(self, img_size, fold_size): 80 | """ 81 | 82 | Args: 83 | img_size: Images size. 84 | fold_size: Crop size. 85 | """ 86 | super().__init__() 87 | self.n_locs = 2 * (img_size // fold_size) - 1 88 | 89 | def forward(self, x): 90 | """(Re)folds tensor. 91 | 92 | Args: 93 | x: Input tensor. 94 | 95 | Returns: 96 | torch.Tensor: Refolded tensor. 97 | 98 | """ 99 | dim_c, dim_x, dim_y = x.size()[1:] 100 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y) 101 | x = x.reshape(-1, self.n_locs * self.n_locs, dim_c, dim_x * dim_y)\ 102 | .permute(0, 2, 3, 1)\ 103 | .reshape(-1, dim_c * dim_x * dim_y, self.n_locs, self.n_locs).contiguous() 104 | return x 105 | 106 | 107 | class Permute(torch.nn.Module): 108 | """Module for permuting axes. 109 | 110 | """ 111 | def __init__(self, *perm): 112 | """ 113 | 114 | Args: 115 | *perm: Permute axes. 116 | """ 117 | super().__init__() 118 | self.perm = perm 119 | 120 | def forward(self, input): 121 | """Permutes axes of tensor. 122 | 123 | Args: 124 | input: Input tensor. 125 | 126 | Returns: 127 | torch.Tensor: permuted tensor. 128 | 129 | """ 130 | return input.permute(*self.perm) 131 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/go.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -ex 2 | 3 | for layers in 3 4 5 6 4 | do 5 | for num_code in 16 8 4 32 20 6 | do 7 | for seed in 0 1 2 8 | do 9 | CUDA_VISIBLE_DEVICES=$1 python gsimclr.py --num_code $num_code --DS $2 --lr 0.01 --local --num-gc-layers $layers --aug random2 --seed $seed 10 | 11 | done 12 | done 13 | done 14 | 15 | # dataset layers codebook_size 16 | # DD 4 4 17 | # NCI1 5 4 18 | # PROTEINS 3 8 19 | # COLLAB 5 32 20 | # IMDB-B 3 8 21 | # RDT-B 5 4 22 | # RDT-M5K 4 4 23 | # MUTAG 4 16 -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from cortex_DIM.functions.gan_losses import get_positive_expectation, get_negative_expectation 5 | 6 | def local_global_loss_(l_enc, g_enc, edge_index, batch, measure): 7 | ''' 8 | Args: 9 | l: Local feature map. 10 | g: Global features. 11 | measure: Type of f-divergence. For use with mode `fd` 12 | mode: Loss mode. Fenchel-dual `fd`, NCE `nce`, or Donsker-Vadadhan `dv`. 13 | Returns: 14 | torch.Tensor: Loss. 15 | ''' 16 | num_graphs = g_enc.shape[0] 17 | num_nodes = l_enc.shape[0] 18 | 19 | pos_mask = torch.zeros((num_nodes, num_graphs)).cuda() 20 | neg_mask = torch.ones((num_nodes, num_graphs)).cuda() 21 | for nodeidx, graphidx in enumerate(batch): 22 | pos_mask[nodeidx][graphidx] = 1. 23 | neg_mask[nodeidx][graphidx] = 0. 24 | 25 | res = torch.mm(l_enc, g_enc.t()) 26 | 27 | E_pos = get_positive_expectation(res * pos_mask, measure, average=False).sum() 28 | E_pos = E_pos / num_nodes 29 | E_neg = get_negative_expectation(res * neg_mask, measure, average=False).sum() 30 | E_neg = E_neg / (num_nodes * (num_graphs - 1)) 31 | 32 | return E_neg - E_pos 33 | 34 | def adj_loss_(l_enc, g_enc, edge_index, batch): 35 | num_graphs = g_enc.shape[0] 36 | num_nodes = l_enc.shape[0] 37 | 38 | adj = torch.zeros((num_nodes, num_nodes)).cuda() 39 | mask = torch.eye(num_nodes).cuda() 40 | for node1, node2 in zip(edge_index[0], edge_index[1]): 41 | adj[node1.item()][node2.item()] = 1. 42 | adj[node2.item()][node1.item()] = 1. 43 | 44 | res = torch.sigmoid((torch.mm(l_enc, l_enc.t()))) 45 | res = (1-mask) * res 46 | # print(res.shape, adj.shape) 47 | # input() 48 | 49 | loss = nn.BCELoss()(res, adj) 50 | return loss 51 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | # from core.encoders import * 7 | import json 8 | from torch import optim 9 | 10 | from cortex_DIM.nn_modules.mi_networks import MIFCNet, MI1x1ConvNet 11 | from losses import * 12 | 13 | 14 | class GlobalDiscriminator(nn.Module): 15 | def __init__(self, args, input_dim): 16 | super().__init__() 17 | 18 | self.l0 = nn.Linear(32, 32) 19 | self.l1 = nn.Linear(32, 32) 20 | 21 | self.l2 = nn.Linear(512, 1) 22 | def forward(self, y, M, data): 23 | 24 | adj = Variable(data['adj'].float(), requires_grad=False).cuda() 25 | # h0 = Variable(data['feats'].float()).cuda() 26 | batch_num_nodes = data['num_nodes'].int().numpy() 27 | M, _ = self.encoder(M, adj, batch_num_nodes) 28 | # h = F.relu(self.c0(M)) 29 | # h = self.c1(h) 30 | # h = h.view(y.shape[0], -1) 31 | h = torch.cat((y, M), dim=1) 32 | h = F.relu(self.l0(h)) 33 | h = F.relu(self.l1(h)) 34 | return self.l2(h) 35 | 36 | class PriorDiscriminator(nn.Module): 37 | def __init__(self, input_dim): 38 | super().__init__() 39 | self.l0 = nn.Linear(input_dim, input_dim) 40 | self.l1 = nn.Linear(input_dim, input_dim) 41 | self.l2 = nn.Linear(input_dim, 1) 42 | 43 | def forward(self, x): 44 | h = F.relu(self.l0(x)) 45 | h = F.relu(self.l1(h)) 46 | return torch.sigmoid(self.l2(h)) 47 | 48 | class FF(nn.Module): 49 | def __init__(self, input_dim): 50 | super().__init__() 51 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1) 52 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1) 53 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1) 54 | self.block = nn.Sequential( 55 | nn.Linear(input_dim, input_dim), 56 | nn.ReLU(), 57 | nn.Linear(input_dim, input_dim), 58 | nn.ReLU(), 59 | nn.Linear(input_dim, input_dim), 60 | nn.ReLU() 61 | ) 62 | self.linear_shortcut = nn.Linear(input_dim, input_dim) 63 | # self.c0 = nn.Conv1d(input_dim, 512, kernel_size=1, stride=1, padding=0) 64 | # self.c1 = nn.Conv1d(512, 512, kernel_size=1, stride=1, padding=0) 65 | # self.c2 = nn.Conv1d(512, 1, kernel_size=1, stride=1, padding=0) 66 | 67 | def forward(self, x): 68 | return self.block(x) + self.linear_shortcut(x) 69 | 70 | -------------------------------------------------------------------------------- /SSL/GraphCL/unsupervised_TU/test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | import json 4 | import pandas as pd 5 | import collections 6 | 7 | if __name__ == '__main__': 8 | 9 | for epoch in [20, 100]: 10 | print(epoch) 11 | real_res = {'logreg':[-1], 'svc':[-1], 'linearsvc':[-1], 'randomforest':[-1]} 12 | for gc in [3, 5, 8, 16]: 13 | for lr in [0.01, 0.1, 0.001]: 14 | for tpe in ['local', 'localprior']: 15 | res = collections.defaultdict(lambda :collections.defaultdict(list)) 16 | with open(sys.argv[1], 'r') as f: 17 | for line in f: 18 | x = line.strip().split(',', 6) 19 | if x[1] != tpe: 20 | continue 21 | if x[2] != str(gc): 22 | continue 23 | if x[3] != str(epoch): 24 | continue 25 | if x[5] != str(lr): 26 | continue 27 | tmp = json.loads(x[-1]) 28 | 29 | DS = x[0] 30 | res[DS]['logreg'].append(tmp['logreg']) 31 | res[DS]['svc'].append(tmp['svc']) 32 | res[DS]['linearsvc'].append(tmp['linearsvc']) 33 | res[DS]['randomforest'].append(tmp['randomforest']) 34 | 35 | for DS, lst in res.items(): 36 | if DS != sys.argv[2]: 37 | continue 38 | # print('====================') 39 | # print(DS) 40 | for clf, v in lst.items(): 41 | mn = np.mean(np.array(v[:5]), axis=0) 42 | std = np.std(np.array(v[:5]), axis=0) 43 | 44 | idx = np.argmax(mn) 45 | if mn[idx] > real_res[clf][0] and len(v) > 1: 46 | real_res[clf] = [mn[idx], std[idx], epoch, lr, gc, idx, len(v)] 47 | # print(epoch, lr, gc, clf, idx, mn[idx], std[idx], len(v)) 48 | print(real_res) 49 | 50 | -------------------------------------------------------------------------------- /SSL/GraphMAE/README.md: -------------------------------------------------------------------------------- 1 | ## Run the codes 2 | Node Classification: 3 | Please refer to the bash script `run_transductive.sh` for running the training and evaluation pipeline. 4 | 5 | -------------------------------------------------------------------------------- /SSL/GraphMAE/configs.yml: -------------------------------------------------------------------------------- 1 | cora: 2 | lr: 0.001 3 | lr_f: 0.005 4 | # num_hidden: 512 5 | # num_heads: 2 6 | # num_layers: 3 7 | weight_decay: 2e-4 8 | weight_decay_f: 1e-4 9 | max_epoch: 1500 10 | max_epoch_f: 1000 11 | mask_rate: 0.5 12 | encoder: gat 13 | decoder: gat 14 | activation: prelu 15 | in_drop: 0.2 16 | attn_drop: 0.1 17 | linear_prob: True 18 | loss_fn: sce 19 | drop_edge_rate: 0.0 20 | optimizer: adam 21 | replace_rate: 0.05 22 | alpha_l: 3 23 | scheduler: False 24 | citeseer: 25 | lr: 0.001 26 | lr_f: 0.005 27 | # num_hidden: 512 28 | # num_heads: 4 29 | # num_layers: 2 30 | weight_decay: 2e-5 31 | weight_decay_f: 0.01 32 | max_epoch: 500 33 | max_epoch_f: 500 34 | mask_rate: 0.5 35 | encoder: gat 36 | decoder: gat 37 | activation: prelu 38 | in_drop: 0.2 39 | attn_drop: 0.1 40 | linear_prob: True 41 | loss_fn: sce 42 | drop_edge_rate: 0.0 43 | optimizer: adam 44 | replace_rate: 0.1 45 | alpha_l: 1 # or 3 46 | scheduler: False 47 | pubmed: 48 | lr: 0.0005 49 | lr_f: 0.001 50 | # num_hidden: 1024 51 | # num_heads: 1 52 | # num_layers: 5 53 | weight_decay: 1e-5 54 | weight_decay_f: 1e-4 55 | max_epoch: 500 56 | max_epoch_f: 500 57 | mask_rate: 0.5 58 | encoder: gat 59 | decoder: gat 60 | activation: prelu 61 | in_drop: 0.2 62 | attn_drop: 0.1 63 | linear_prob: True 64 | loss_fn: sce 65 | drop_edge_rate: 0.0 66 | optimizer: adam 67 | replace_rate: 0.0 68 | alpha_l: 3 69 | scheduler: False 70 | -------------------------------------------------------------------------------- /SSL/GraphMAE/graphmae/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SSL/GraphMAE/graphmae/__init__.py -------------------------------------------------------------------------------- /SSL/GraphMAE/graphmae/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LUOyk1999/NodeID/d3d5318e870b34bceeeb0453bd61cdaf7c939ff2/SSL/GraphMAE/graphmae/datasets/__init__.py -------------------------------------------------------------------------------- /SSL/GraphMAE/graphmae/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .edcoder import PreModel 2 | 3 | 4 | def build_model(args): 5 | num_heads = args.num_heads 6 | num_out_heads = args.num_out_heads 7 | num_hidden = args.num_hidden 8 | num_layers = args.num_layers 9 | residual = args.residual 10 | attn_drop = args.attn_drop 11 | in_drop = args.in_drop 12 | norm = args.norm 13 | negative_slope = args.negative_slope 14 | encoder_type = args.encoder 15 | decoder_type = args.decoder 16 | mask_rate = args.mask_rate 17 | drop_edge_rate = args.drop_edge_rate 18 | replace_rate = args.replace_rate 19 | 20 | 21 | activation = args.activation 22 | loss_fn = args.loss_fn 23 | alpha_l = args.alpha_l 24 | concat_hidden = args.concat_hidden 25 | num_features = args.num_features 26 | num_codes = args.num_codes 27 | model = PreModel( 28 | in_dim=num_features, 29 | num_hidden=num_hidden, 30 | num_layers=num_layers, 31 | nhead=num_heads, 32 | nhead_out=num_out_heads, 33 | activation=activation, 34 | feat_drop=in_drop, 35 | attn_drop=attn_drop, 36 | negative_slope=negative_slope, 37 | residual=residual, 38 | encoder_type=encoder_type, 39 | decoder_type=decoder_type, 40 | mask_rate=mask_rate, 41 | norm=norm, 42 | loss_fn=loss_fn, 43 | drop_edge_rate=drop_edge_rate, 44 | replace_rate=replace_rate, 45 | alpha_l=alpha_l, 46 | concat_hidden=concat_hidden, num_codes=num_codes 47 | ) 48 | return model 49 | -------------------------------------------------------------------------------- /SSL/GraphMAE/graphmae/models/loss_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def sce_loss(x, y, alpha=3): 6 | x = F.normalize(x, p=2, dim=-1) 7 | y = F.normalize(y, p=2, dim=-1) 8 | 9 | # loss = - (x * y).sum(dim=-1) 10 | # loss = (x_h - y_h).norm(dim=1).pow(alpha) 11 | 12 | loss = (1 - (x * y).sum(dim=-1)).pow_(alpha) 13 | 14 | loss = loss.mean() 15 | return loss 16 | 17 | 18 | def sig_loss(x, y): 19 | x = F.normalize(x, p=2, dim=-1) 20 | y = F.normalize(y, p=2, dim=-1) 21 | 22 | loss = (x * y).sum(1) 23 | loss = torch.sigmoid(-loss) 24 | loss = loss.mean() 25 | return loss -------------------------------------------------------------------------------- /SSL/GraphMAE/run_transductive.sh: -------------------------------------------------------------------------------- 1 | # Node classification results in unsupervised representation learning 2 | 3 | python main_transductive.py \ 4 | --device 0 \ 5 | --dataset cora \ 6 | --num_codes 32 \ 7 | --num_layers 2 \ 8 | --num_heads 4 \ 9 | --num_hidden 1024 \ 10 | --use_cfg 11 | 12 | python main_transductive.py \ 13 | --device 0 \ 14 | --dataset citeseer \ 15 | --num_codes 8 \ 16 | --num_layers 2 \ 17 | --num_heads 2 \ 18 | --num_hidden 256 \ 19 | --use_cfg 20 | 21 | python main_transductive.py \ 22 | --device 2 \ 23 | --dataset pubmed \ 24 | --num_codes 16 \ 25 | --num_layers 2 \ 26 | --num_heads 1 \ 27 | --num_hidden 128 \ 28 | --use_cfg 29 | 30 | # for num_hidden in 1024 512 256 128 31 | # do 32 | # for num_heads in 4 2 1 33 | # do 34 | # for num_codes in 16 32 8 35 | # do 36 | # for num_layers in 2 3 4 37 | # do 38 | # python -u main_transductive.py \ 39 | # --device $2 \ 40 | # --dataset $1 \ 41 | # --num_codes $num_codes \ 42 | # --num_layers $num_layers \ 43 | # --num_heads $num_heads \ 44 | # --num_hidden $num_hidden \ 45 | # --use_cfg 46 | # done 47 | # done 48 | # done 49 | # done --------------------------------------------------------------------------------