├── cora.png ├── pubmed.png ├── citeseer.png ├── requirements-cpu.txt ├── requirements-gpu.txt ├── LICENSE ├── .gitignore ├── gcn ├── utils.py ├── trainer.py └── model.py ├── README.md └── notebooks └── gcn_testing.ipynb /cora.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrejmiscic/gcn-pytorch/HEAD/cora.png -------------------------------------------------------------------------------- /pubmed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrejmiscic/gcn-pytorch/HEAD/pubmed.png -------------------------------------------------------------------------------- /citeseer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/andrejmiscic/gcn-pytorch/HEAD/citeseer.png -------------------------------------------------------------------------------- /requirements-cpu.txt: -------------------------------------------------------------------------------- 1 | dataclasses==0.7 2 | numpy==1.18.5 3 | pandas==1.1.3 4 | plotnine==0.6.0 5 | scipy==1.4.1 6 | scikit-learn==0.22.2.post1 7 | torch==1.6.0+cpu 8 | torch-cluster==1.5.7+cpu 9 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 10 | torch-scatter==2.0.5+cpu 11 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 12 | torch-sparse==0.6.7+cpu 13 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 14 | torch-spline-conv==1.2.0+cpu 15 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 16 | torch-geometric==1.6.1 17 | tqdm==4.41.1 -------------------------------------------------------------------------------- /requirements-gpu.txt: -------------------------------------------------------------------------------- 1 | dataclasses==0.7 2 | numpy==1.18.5 3 | pandas==1.1.3 4 | plotnine==0.6.0 5 | scipy==1.4.1 6 | scikit-learn==0.22.2.post1 7 | torch==1.6.0+cu101 8 | torch-scatter==latest+cu101 9 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 10 | torch-sparse==latest+cu101 11 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 12 | torch-cluster==latest+cu101 13 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 14 | torch-spline-conv==latest+cu101 15 | -f https://pytorch-geometric.com/whl/torch-1.6.0.html 16 | torch-geometric==1.6.1 17 | tqdm==4.41.1 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Andrej Miscic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /gcn/utils.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | import numpy as np 4 | import scipy.sparse as sparse 5 | import torch 6 | import torch.nn as nn 7 | from torch_geometric.datasets import Planetoid 8 | from torch_geometric.utils import to_scipy_sparse_matrix 9 | 10 | 11 | class Dataset(Enum): 12 | Cora = 0 13 | CiteSeer = 1 14 | PubMed = 2 15 | 16 | 17 | def load_data(dataset_name: Dataset, load_dir="planetoid"): 18 | dataset = Planetoid(root=load_dir, name=dataset_name.name) 19 | data = dataset[0] # a single graph 20 | 21 | # read & normalize features 22 | features = data.x.clone() 23 | features_sum = features.sum(1).unsqueeze(1) 24 | features_sum[features_sum == 0] = 1. 25 | features = torch.div(features, features_sum) 26 | 27 | # read train, test, valid labels based on public splits of this data 28 | ignore_index = nn.CrossEntropyLoss().ignore_index # = -100, used to ignore not allowed labels in CE loss 29 | num_classes = len(set(data.y.numpy())) 30 | labels = data.y.clone() 31 | train_labels = set_labels(data.y.clone(), data.train_mask, ignore_index) 32 | val_labels = set_labels(data.y.clone(), data.val_mask, ignore_index) 33 | test_labels = set_labels(data.y.clone(), data.test_mask, ignore_index) 34 | 35 | # read & normalize adjacency matrix 36 | adjacency_matrix, adj_csr = get_adjacency_matrix(data.edge_index) 37 | 38 | # compute rescaled laplacian 39 | laplacian_matrix = get_laplacian_matrix(adj_csr) 40 | 41 | return features, labels, train_labels, val_labels, test_labels, adjacency_matrix, laplacian_matrix, num_classes 42 | 43 | 44 | def set_labels(initial_labels, set_mask, ignore_label): 45 | initial_labels[~set_mask] = ignore_label 46 | return initial_labels 47 | 48 | 49 | def get_adjacency_matrix(edge_index): 50 | # working with scipy sparse since current PyTorch version doesn't support sparse x sparse multiplication 51 | adj = to_scipy_sparse_matrix(edge_index) 52 | adj += sparse.eye(adj.shape[0]) # add self loops 53 | degree_for_norm = sparse.diags(np.power(np.array(adj.sum(1)), -0.5).flatten()) # D^(-0.5) 54 | adj_hat_csr = degree_for_norm.dot(adj.dot(degree_for_norm)) # D^(-0.5) * A * D^(-0.5) 55 | adj_hat_coo = adj_hat_csr.tocoo().astype(np.float32) 56 | # to torch sparse matrix 57 | indices = torch.from_numpy(np.vstack((adj_hat_coo.row, adj_hat_coo.col)).astype(np.int64)) 58 | values = torch.from_numpy(adj_hat_coo.data) 59 | adjacency_matrix = torch.sparse_coo_tensor(indices, values, torch.Size(adj_hat_coo.shape)) 60 | 61 | return adjacency_matrix, adj_hat_csr 62 | 63 | 64 | def get_laplacian_matrix(adjacency_matrix_csr: sparse.csr_matrix): 65 | # since adjacency_matrix_csr is already in form D^(-0.5) * A * D^(-0.5), we can simply get normalized laplacian by: 66 | laplacian = sparse.eye(adjacency_matrix_csr.shape[0]) - adjacency_matrix_csr 67 | # rescaling laplacian 68 | max_eigenval = sparse.linalg.eigsh(laplacian, k=1, which='LM', return_eigenvectors=False)[0] 69 | laplacian = 2 * laplacian / max_eigenval - sparse.eye(adjacency_matrix_csr.shape[0]) 70 | # to torch sparse matrix 71 | laplacian = laplacian.tocoo().astype(np.float32) 72 | indices = torch.from_numpy(np.vstack((laplacian.row, laplacian.col)).astype(np.int64)) 73 | values = torch.from_numpy(laplacian.data) 74 | laplacian_matrix = torch.sparse_coo_tensor(indices, values, torch.Size(laplacian.shape)) 75 | return laplacian_matrix 76 | -------------------------------------------------------------------------------- /gcn/trainer.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from dataclasses import dataclass 3 | import os 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.optim import Adam, lr_scheduler 8 | from tqdm import tqdm 9 | 10 | 11 | @dataclass 12 | class RunConfig: # default parameters from the paper and official implementation 13 | learning_rate: float = 0.01 14 | num_epochs: int = 200 15 | weight_decay: float = 5e-4 16 | num_warmup_steps: int = 0 17 | save_each_epoch: bool = False 18 | output_dir: str = "." 19 | 20 | 21 | class Trainer: 22 | def __init__(self, model): 23 | self.model = model 24 | 25 | def train(self, features, train_labels, val_labels, additional_matrix, device, run_config, log=True): 26 | self.model = self.model.to(device) 27 | features = features.to(device) 28 | train_labels = train_labels.to(device) 29 | additional_matrix = additional_matrix.to(device) # adjacency or laplacian matrix depending on the model 30 | 31 | optimizer = Adam(self.model.parameters(), lr=run_config.learning_rate, weight_decay=run_config.weight_decay) 32 | 33 | # https://huggingface.co/transformers/_modules/transformers/optimization.html#get_linear_schedule_with_warmup 34 | def lr_lambda(current_step: int): 35 | if current_step < run_config.num_warmup_steps: 36 | return float(current_step) / float(max(1, run_config.num_warmup_steps)) 37 | return max(0.0, float(run_config.num_epochs - current_step) / 38 | float(max(1, run_config.num_epochs - run_config.num_warmup_steps))) 39 | 40 | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda) 41 | 42 | if log: 43 | print("Training started:") 44 | print(f"\tNum Epochs = {run_config.num_epochs}") 45 | 46 | best_loss, best_model_accuracy = float("inf"), 0 47 | best_model_state_dict = None 48 | train_iterator = tqdm(range(0, int(run_config.num_epochs)), desc="Epoch") 49 | for epoch in train_iterator: 50 | self.model.train() 51 | outputs = self.model(features, additional_matrix, train_labels) 52 | loss = outputs[1] 53 | 54 | self.model.zero_grad() 55 | loss.backward() 56 | optimizer.step() 57 | scheduler.step() 58 | 59 | val_loss, val_accuracy = self.evaluate(features, val_labels, additional_matrix, device) 60 | train_iterator.set_description(f"Training loss = {loss.item():.4f}, " 61 | f"val loss = {val_loss:.4f}, val accuracy = {val_accuracy:.2f}") 62 | 63 | save_best_model = val_loss < best_loss 64 | if save_best_model: 65 | best_loss = val_loss 66 | best_model_accuracy = val_accuracy 67 | best_model_state_dict = copy.deepcopy(self.model.state_dict()) 68 | if save_best_model or run_config.save_each_epoch or epoch + 1 == run_config.num_epochs: 69 | output_dir = os.path.join(run_config.output_dir, f"Epoch_{epoch + 1}") 70 | self.save(output_dir) 71 | if log: 72 | print(f"Best model val CE loss = {best_loss:.4f}, best model val accuracy = {best_model_accuracy:.2f}") 73 | # reloads the best model state dict, bit hacky :P 74 | self.model.load_state_dict(best_model_state_dict) 75 | 76 | def evaluate(self, features, test_labels, additional_matrix, device): 77 | features = features.to(device) 78 | test_labels = test_labels.to(device) 79 | additional_matrix = additional_matrix.to(device) 80 | 81 | self.model.eval() 82 | 83 | outputs = self.model(features, additional_matrix, test_labels) 84 | ce_loss = outputs[1].item() 85 | 86 | ignore_label = nn.CrossEntropyLoss().ignore_index 87 | predicted_label = torch.max(outputs[0], dim=1).indices[test_labels != ignore_label] 88 | true_label = test_labels[test_labels != -100] 89 | accuracy = torch.mean((true_label == predicted_label).type(torch.FloatTensor)).item() 90 | 91 | return ce_loss, accuracy 92 | 93 | def save(self, output_dir): 94 | if not os.path.isdir(output_dir): 95 | os.makedirs(output_dir) 96 | 97 | model_path = os.path.join(output_dir, "model.pth") 98 | torch.save(self.model.state_dict(), model_path) 99 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Graph Convolutional Networks in PyTorch 2 | 3 | Re-implementation of the work described in [Semi-Supervised Classification with Graph Convolutional Networks](https://arxiv.org/abs/1609.02907). 4 | 5 | The implementation contains two different propagation models, the one from original GCN as described in the above paper and the Chebyshev filter based one from [Convolutional Neural Networks on Graphs with Fast Localized Spectral Filtering](https://arxiv.org/abs/1606.09375). 6 | 7 | ## Installation & Usage 8 | 9 | To quickly check: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/andrejmiscic/gcn-pytorch/blob/main/notebooks/gcn_testing.ipynb) 10 | 11 | ```bash 12 | git clone https://github.com/andrejmiscic/gcn-pytorch.git 13 | cd gcn-pytorch 14 | ``` 15 | 16 | The requirements are dependent on whether you want to use a GPU or not: 17 | 18 | ```bash 19 | pip install -r requirements_gpu.txt 20 | ``` 21 | or 22 | ```bash 23 | pip install -r requirements_cpu.txt 24 | ``` 25 | 26 | A simple evaluation of the model on Cora dataset: 27 | 28 | ```python 29 | import torch 30 | 31 | from gcn.model import TwoLayerGCN 32 | from gcn.trainer import Trainer, RunConfig 33 | from gcn.utils import Dataset, load_data 34 | 35 | features, labels, train_labels, val_labels, test_labels, adjacency_matrix, \ 36 | laplacian_matrix, num_classes = load_data(Dataset.Cora) 37 | 38 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 39 | 40 | # training parameters 41 | run_config = RunConfig(learning_rate=0.1, num_epochs=200, weight_decay=5e-4, output_dir="gcn/") 42 | 43 | # constructing a GCN model 44 | model = TwoLayerGCN( 45 | input_size=features.size(1), 46 | hidden_size=16, 47 | output_size=num_classes, 48 | dropout=0.5 49 | ) 50 | 51 | # training 52 | trainer = Trainer(model) 53 | trainer.train(features, train_labels, val_labels, adjacency_matrix, device, run_config, log=False) 54 | 55 | # evaluating 56 | ce_loss, accuracy = trainer.evaluate(features, test_labels, adjacency_matrix, device) 57 | ``` 58 | 59 | You can check out `notebooks/gcn_testing.ipynb` that contains all the code for reproducing the results. 60 | 61 | To run the notebook on Google Colab follow the link 62 | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/andrejmiscic/gcn-pytorch/blob/main/notebooks/gcn_testing.ipynb) 63 | 64 | ## Results 65 | 66 | Test set accuracy for this implementation in comparison to the original paper. All results are based on public splits of analyzed datasets. 67 | In our results we report standard deviation of accuracy based on 100 repetitions. 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 |
Dataset:CoraCiteSeerPubMed
Original paper
GCN81.570.379.0
Cheb (K=2)81.269.673.8
Cheb (K=3)79.569.874.4
This implementation
GCN82.2 ± 0.571.0 ± 0.679.1 ± 0.5
Cheb (K=2)81.3 ± 0.771.1 ± 0.977.9 ± 0.9
Cheb (K=3)82.5 ± 0.771.2 ± 0.879.0 ± 0.7
123 | 124 | Results of experiments with model depth and residual connections are shown below. Same as in the original paper the whole dataset is used and the mean accuracy of 5-fold cross validation is plotted. 125 | 126 |

127 | 128 | 129 | 130 |

131 | 132 | ### References & Citations 133 | 134 | * [Official GCN Tensorflow implementation](https://github.com/tkipf/gcn) 135 | * [Spectral graph Convnets (ChebNets) implementation](https://github.com/xbresson/spectral_graph_convnets) 136 | 137 | ```bibtex 138 | @article{kipf2016semi, 139 | title={Semi-supervised classification with graph convolutional networks}, 140 | author={Kipf, Thomas N and Welling, Max}, 141 | journal={arXiv preprint arXiv:1609.02907}, 142 | year={2016} 143 | } 144 | ``` 145 | 146 | ```bibtex 147 | @inproceedings{defferrard2016convolutional, 148 | title={Convolutional neural networks on graphs with fast localized spectral filtering}, 149 | author={Defferrard, Micha{\"e}l and Bresson, Xavier and Vandergheynst, Pierre}, 150 | booktitle={Advances in neural information processing systems}, 151 | pages={3844--3852}, 152 | year={2016} 153 | } 154 | ``` 155 | -------------------------------------------------------------------------------- /gcn/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | """ 7 | LAYERS: GCNConv and ChebNetConv 8 | """ 9 | 10 | 11 | class GCNConv(nn.Module): 12 | def __init__(self, in_features, out_features): 13 | super(GCNConv, self).__init__() 14 | self.linear = nn.Linear(in_features, out_features, bias=False) 15 | 16 | def forward(self, x: torch.Tensor, adjacency_hat: torch.sparse_coo_tensor): 17 | x = self.linear(x) 18 | x = torch.sparse.mm(adjacency_hat, x) 19 | return x 20 | 21 | 22 | class ChebNetConv(nn.Module): 23 | def __init__(self, in_features, out_features, k): 24 | super(ChebNetConv, self).__init__() 25 | 26 | self.K = k 27 | self.linear = nn.Linear(in_features * k, out_features) 28 | 29 | def forward(self, x: torch.Tensor, laplacian: torch.sparse_coo_tensor): 30 | x = self.__transform_to_chebyshev(x, laplacian) 31 | x = self.linear(x) 32 | return x 33 | 34 | def __transform_to_chebyshev(self, x, laplacian): 35 | cheb_x = x.unsqueeze(2) 36 | x0 = x 37 | 38 | if self.K > 1: 39 | x1 = torch.sparse.mm(laplacian, x0) 40 | cheb_x = torch.cat((cheb_x, x1.unsqueeze(2)), 2) 41 | for _ in range(2, self.K): 42 | x2 = 2 * torch.sparse.mm(laplacian, x1) - x0 43 | cheb_x = torch.cat((cheb_x, x2.unsqueeze(2)), 2) 44 | x0, x1 = x1, x2 45 | 46 | cheb_x = cheb_x.reshape([x.shape[0], -1]) 47 | return cheb_x 48 | 49 | 50 | """ 51 | MODELS 52 | """ 53 | 54 | 55 | class TwoLayerGCN(nn.Module): 56 | def __init__(self, input_size, hidden_size, output_size, dropout=0.1): 57 | super(TwoLayerGCN, self).__init__() 58 | 59 | self.conv1 = GCNConv(input_size, hidden_size) 60 | self.conv2 = GCNConv(hidden_size, output_size) 61 | self.relu = nn.ReLU() 62 | self.dropout = nn.Dropout(dropout) 63 | 64 | def forward(self, x: torch.Tensor, adjacency_hat: torch.sparse_coo_tensor, labels: torch.Tensor = None): 65 | x = self.dropout(x) 66 | x = self.conv1(x, adjacency_hat) 67 | x = self.relu(x) 68 | x = self.dropout(x) 69 | x = self.conv2(x, adjacency_hat) 70 | 71 | if labels is None: 72 | return x 73 | 74 | loss = nn.CrossEntropyLoss()(x, labels) 75 | return x, loss 76 | 77 | 78 | class GCN(nn.Module): 79 | def __init__(self, input_size, hidden_size, output_size, num_hidden_layers=0, dropout=0.1, residual=False): 80 | super(GCN, self).__init__() 81 | 82 | self.dropout = dropout 83 | self.residual = residual 84 | 85 | self.input_conv = GCNConv(input_size, hidden_size) 86 | self.hidden_convs = nn.ModuleList([GCNConv(hidden_size, hidden_size) for _ in range(num_hidden_layers)]) 87 | self.output_conv = GCNConv(hidden_size, output_size) 88 | 89 | def forward(self, x: torch.Tensor, adjacency_hat: torch.sparse_coo_tensor, labels: torch.Tensor = None): 90 | x = F.dropout(x, p=self.dropout, training=self.training) 91 | x = F.relu(self.input_conv(x, adjacency_hat)) 92 | for conv in self.hidden_convs: 93 | if self.residual: 94 | x = F.relu(conv(x, adjacency_hat)) + x 95 | else: 96 | x = F.relu(conv(x, adjacency_hat)) 97 | x = F.dropout(x, p=self.dropout, training=self.training) 98 | x = self.output_conv(x, adjacency_hat) 99 | 100 | if labels is None: 101 | return x 102 | 103 | loss = nn.CrossEntropyLoss()(x, labels) 104 | return x, loss 105 | 106 | 107 | class TwoLayerChebNet(nn.Module): 108 | def __init__(self, input_size, hidden_size, output_size, dropout=0.1, k=2): 109 | super(TwoLayerChebNet, self).__init__() 110 | 111 | self.conv1 = ChebNetConv(input_size, hidden_size, k) 112 | self.conv2 = ChebNetConv(hidden_size, output_size, k) 113 | self.relu = nn.ReLU() 114 | self.dropout = nn.Dropout(dropout) 115 | 116 | def forward(self, x: torch.Tensor, laplacian: torch.sparse_coo_tensor, labels: torch.Tensor = None): 117 | x = self.dropout(x) 118 | x = self.conv1(x, laplacian) 119 | x = self.relu(x) 120 | x = self.dropout(x) 121 | x = self.conv2(x, laplacian) 122 | 123 | if labels is None: 124 | return x 125 | 126 | loss = nn.CrossEntropyLoss()(x, labels) 127 | return x, loss 128 | 129 | 130 | class ChebNetGCN(nn.Module): 131 | def __init__(self, input_size, hidden_size, output_size, num_hidden_layers=0, dropout=0.1, residual=False, k=2): 132 | super(ChebNetGCN, self).__init__() 133 | 134 | self.dropout = dropout 135 | self.residual = residual 136 | 137 | self.input_conv = ChebNetConv(input_size, hidden_size, k) 138 | self.hidden_convs = nn.ModuleList([ChebNetConv(hidden_size, hidden_size, k) for _ in range(num_hidden_layers)]) 139 | self.output_conv = ChebNetConv(hidden_size, output_size, k) 140 | 141 | def forward(self, x: torch.Tensor, laplacian: torch.sparse_coo_tensor, labels: torch.Tensor = None): 142 | x = F.dropout(x, p=self.dropout, training=self.training) 143 | x = F.relu(self.input_conv(x, laplacian)) 144 | for conv in self.hidden_convs: 145 | if self.residual: 146 | x = F.relu(conv(x, laplacian)) + x 147 | else: 148 | x = F.relu(conv(x, laplacian)) 149 | x = F.dropout(x, p=self.dropout, training=self.training) 150 | x = self.output_conv(x, laplacian) 151 | 152 | if labels is None: 153 | return x 154 | 155 | loss = nn.CrossEntropyLoss()(x, labels) 156 | return x, loss 157 | -------------------------------------------------------------------------------- /notebooks/gcn_testing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "gcn_testing.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | }, 15 | "accelerator": "GPU" 16 | }, 17 | "cells": [ 18 | { 19 | "cell_type": "markdown", 20 | "metadata": { 21 | "id": "dvqCZrwkP2wC" 22 | }, 23 | "source": [ 24 | "## Graph Convolutional Networks\n", 25 | "\n", 26 | "Reproducing some of the experiments from the [original paper](https://arxiv.org/abs/1609.02907)." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "R9gXGQC2bie7", 33 | "outputId": "df4cecc2-28d9-48c9-90d0-736e1d727794", 34 | "colab": { 35 | "base_uri": "https://localhost:8080/" 36 | } 37 | }, 38 | "source": [ 39 | "!git clone https://github.com/andrejmiscic/gcn-pytorch.git\n", 40 | "!cp -R /content/gcn-pytorch/gcn /content/gcn/" 41 | ], 42 | "execution_count": 1, 43 | "outputs": [ 44 | { 45 | "output_type": "stream", 46 | "text": [ 47 | "Cloning into 'gcn-pytorch'...\n", 48 | "remote: Enumerating objects: 19, done.\u001b[K\n", 49 | "remote: Counting objects: 100% (19/19), done.\u001b[K\n", 50 | "remote: Compressing objects: 100% (16/16), done.\u001b[K\n", 51 | "remote: Total 19 (delta 4), reused 13 (delta 2), pack-reused 0\u001b[K\n", 52 | "Unpacking objects: 100% (19/19), done.\n" 53 | ], 54 | "name": "stdout" 55 | } 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "eADBSdRJC6a2" 62 | }, 63 | "source": [ 64 | "The models are implemented in pure PyTorch, but we require PyTorch Geometric for loading the data (it's the easiest this way). The following two cells install Pytorch Geometric library." 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "metadata": { 70 | "id": "nBXZ7lP9v0Ok" 71 | }, 72 | "source": [ 73 | "import torch\n", 74 | "\n", 75 | "TORCH_version = torch.__version__\n", 76 | "TORCH = TORCH_version.split('+')[0]\n", 77 | "CUDA_version = torch.version.cuda\n", 78 | "CUDA = \"cu\" + CUDA_version.replace('.', '')" 79 | ], 80 | "execution_count": 2, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "metadata": { 86 | "id": "i8Iv4ozAvvcr" 87 | }, 88 | "source": [ 89 | "%%capture\n", 90 | "!pip install torch-scatter==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html\n", 91 | "!pip install torch-sparse==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html\n", 92 | "!pip install torch-cluster==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html\n", 93 | "!pip install torch-spline-conv==latest+{CUDA} -f https://pytorch-geometric.com/whl/torch-{TORCH}.html\n", 94 | "!pip install torch-geometric" 95 | ], 96 | "execution_count": 3, 97 | "outputs": [] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "metadata": { 102 | "id": "9fQki12CYnA_" 103 | }, 104 | "source": [ 105 | "import numpy as np\n", 106 | "import pandas as pd\n", 107 | "from plotnine import ggplot, geom_line, aes, xlab, theme, element_blank, ggtitle\n", 108 | "import scipy.sparse as sparse\n", 109 | "from sklearn.model_selection import KFold\n", 110 | "import torch\n", 111 | "import torch.nn as nn\n", 112 | "\n", 113 | "from gcn.model import TwoLayerGCN, GCN, TwoLayerChebNet\n", 114 | "from gcn.trainer import Trainer, RunConfig\n", 115 | "from gcn.utils import Dataset, load_data, set_labels" 116 | ], 117 | "execution_count": 4, 118 | "outputs": [] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "metadata": { 123 | "id": "yL4Vsp-XZFtd" 124 | }, 125 | "source": [ 126 | "# important for reproducibility!\n", 127 | "def set_seed(seed=1):\n", 128 | " np.random.seed(seed)\n", 129 | " torch.manual_seed(seed)\n", 130 | " if torch.cuda.is_available():\n", 131 | " torch.cuda.manual_seed(seed)" 132 | ], 133 | "execution_count": 5, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "metadata": { 139 | "id": "ft-KIwU3ZiZB" 140 | }, 141 | "source": [ 142 | "# training parameters, there is no batch size as we use the whole set in each iteration\n", 143 | "run_config = RunConfig(\n", 144 | " learning_rate=0.1,\n", 145 | " num_epochs=200,\n", 146 | " weight_decay=5e-4,\n", 147 | " output_dir=\"/content/gcn-training/\"\n", 148 | ")" 149 | ], 150 | "execution_count": 6, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "xjhGd9Sr0hzK" 157 | }, 158 | "source": [ 159 | "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")" 160 | ], 161 | "execution_count": 7, 162 | "outputs": [] 163 | }, 164 | { 165 | "cell_type": "markdown", 166 | "metadata": { 167 | "id": "XmBZ_tv4Vwbz" 168 | }, 169 | "source": [ 170 | "### Evaluation on Cora, CiteSeer and PubMed datasets\n", 171 | "\n", 172 | "We compare two different propagation models: the graph convolutional layer as introduced by [Kipf and Welling](https://arxiv.org/abs/1609.02907) and the Chebyshev convolutional layer as introduced by [Defferrard, Bresson and Vandergheynst](https://arxiv.org/abs/1606.09375). For the latter we set the order of expansion *k* to 2 and 3." 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "metadata": { 178 | "id": "fBPzQJYNZgq-" 179 | }, 180 | "source": [ 181 | "def evaluate_gcn_on_dataset(dataset: Dataset, iter = 1):\n", 182 | " set_seed()\n", 183 | " features, labels, train_labels, val_labels, test_labels, adjacency_matrix, \\\n", 184 | " laplacian_matrix, num_classes = load_data(dataset)\n", 185 | " accuracies = []\n", 186 | "\n", 187 | " for i in range(iter):\n", 188 | " model = TwoLayerGCN(\n", 189 | " input_size=features.size(1),\n", 190 | " hidden_size=16,\n", 191 | " output_size=num_classes,\n", 192 | " dropout=0.5\n", 193 | " )\n", 194 | " trainer = Trainer(model)\n", 195 | " trainer.train(features, train_labels, val_labels, adjacency_matrix, device, run_config, log=False)\n", 196 | "\n", 197 | " _, accuracy = trainer.evaluate(features, test_labels, adjacency_matrix, device)\n", 198 | " accuracies.append(accuracy)\n", 199 | " print(f\"\\nPerformance on {dataset.name}:\\n- test accuracy = {np.mean(accuracies):.3f} +- {np.std(accuracies):.3f}\\n\")\n", 200 | "\n", 201 | "def evaluate_chebnet_on_dataset(dataset: Dataset, k = 2, iter = 1):\n", 202 | " set_seed()\n", 203 | " features, labels, train_labels, val_labels, test_labels, adjacency_matrix, \\\n", 204 | " laplacian_matrix, num_classes = load_data(dataset)\n", 205 | " accuracies = []\n", 206 | "\n", 207 | " for i in range(iter):\n", 208 | " model = TwoLayerChebNet(\n", 209 | " input_size=features.size(1),\n", 210 | " hidden_size=16,\n", 211 | " output_size=num_classes,\n", 212 | " dropout=0.5,\n", 213 | " k=k\n", 214 | " )\n", 215 | "\n", 216 | " trainer = Trainer(model)\n", 217 | " trainer.train(features, train_labels, val_labels, laplacian_matrix, device, run_config, log=False)\n", 218 | "\n", 219 | " _ , accuracy = trainer.evaluate(features, test_labels, laplacian_matrix, device)\n", 220 | " accuracies.append(accuracy)\n", 221 | " print(f\"\\nPerformance on {dataset.name}:\\n- test accuracy = {np.mean(accuracies):.3f} +- {np.std(accuracies):.3f}\\n\")" 222 | ], 223 | "execution_count": 8, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "metadata": { 229 | "id": "UABHmdg_1AFU", 230 | "outputId": "8c49bbe3-cf64-451a-940c-2b20e9c88444", 231 | "colab": { 232 | "base_uri": "https://localhost:8080/" 233 | } 234 | }, 235 | "source": [ 236 | "evaluate_gcn_on_dataset(Dataset.Cora) # iter=100 to get uncertainty reported\n", 237 | "evaluate_gcn_on_dataset(Dataset.CiteSeer)\n", 238 | "evaluate_gcn_on_dataset(Dataset.PubMed)" 239 | ], 240 | "execution_count": 10, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "text": [ 245 | "Training loss = 0.3118, val loss = 0.7106, val accuracy = 0.80: 100%|██████████| 200/200 [00:01<00:00, 106.37it/s]\n", 246 | "Training loss = 1.6862, val loss = 1.7310, val accuracy = 0.35: 0%| | 0/200 [00:00" 584 | ] 585 | }, 586 | "metadata": { 587 | "tags": [] 588 | } 589 | }, 590 | { 591 | "output_type": "stream", 592 | "text": [ 593 | "\n" 594 | ], 595 | "name": "stdout" 596 | } 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "metadata": { 602 | "id": "YBWIeG8pqPCi" 603 | }, 604 | "source": [ 605 | "%%capture\n", 606 | "df_citeseer = compute_residual_effect_df(Dataset.CiteSeer)" 607 | ], 608 | "execution_count": 18, 609 | "outputs": [] 610 | }, 611 | { 612 | "cell_type": "code", 613 | "metadata": { 614 | "id": "EjEC1ogiqQpN", 615 | "outputId": "dc55d36c-2aa3-4d98-a63a-f612e3b2f95c", 616 | "colab": { 617 | "base_uri": "https://localhost:8080/", 618 | "height": 548 619 | } 620 | }, 621 | "source": [ 622 | "plot_residual_effect(df_citeseer, Dataset.CiteSeer)" 623 | ], 624 | "execution_count": 19, 625 | "outputs": [ 626 | { 627 | "output_type": "stream", 628 | "text": [ 629 | "/usr/local/lib/python3.6/dist-packages/plotnine/utils.py:1246: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead\n", 630 | " if pdtypes.is_categorical(arr):\n" 631 | ], 632 | "name": "stderr" 633 | }, 634 | { 635 | "output_type": "display_data", 636 | "data": { 637 | "image/png": "\n", 638 | "text/plain": [ 639 | "
" 640 | ] 641 | }, 642 | "metadata": { 643 | "tags": [] 644 | } 645 | }, 646 | { 647 | "output_type": "stream", 648 | "text": [ 649 | "\n" 650 | ], 651 | "name": "stdout" 652 | } 653 | ] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "metadata": { 658 | "id": "rZJ1YeKqqjMZ" 659 | }, 660 | "source": [ 661 | "%%capture\n", 662 | "df_pubmed = compute_residual_effect_df(Dataset.PubMed)" 663 | ], 664 | "execution_count": 20, 665 | "outputs": [] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "metadata": { 670 | "id": "2RYzDN90qnWp", 671 | "outputId": "40d8b8d5-33b3-4169-b4ab-2e72136eaf31", 672 | "colab": { 673 | "base_uri": "https://localhost:8080/", 674 | "height": 548 675 | } 676 | }, 677 | "source": [ 678 | "plot_residual_effect(df_pubmed, Dataset.PubMed)" 679 | ], 680 | "execution_count": null, 681 | "outputs": [ 682 | { 683 | "output_type": "stream", 684 | "text": [ 685 | "/usr/local/lib/python3.6/dist-packages/plotnine/utils.py:1246: FutureWarning: is_categorical is deprecated and will be removed in a future version. Use is_categorical_dtype instead\n", 686 | " if pdtypes.is_categorical(arr):\n" 687 | ], 688 | "name": "stderr" 689 | }, 690 | { 691 | "output_type": "display_data", 692 | "data": { 693 | "image/png": "\n", 694 | "text/plain": [ 695 | "
" 696 | ] 697 | }, 698 | "metadata": { 699 | "tags": [] 700 | } 701 | }, 702 | { 703 | "output_type": "stream", 704 | "text": [ 705 | "\n" 706 | ], 707 | "name": "stdout" 708 | } 709 | ] 710 | } 711 | ] 712 | } --------------------------------------------------------------------------------