├── scripts ├── run_cgnn_gcn.sh ├── run_lp_gcn.sh ├── run_lp_graphsage.sh ├── run_cgnn_graphsage.sh ├── run_cgnn_sgc.sh ├── run_lp_sgc.sh ├── run_cgnn_gat.sh └── run_lp_gat.sh ├── data └── county │ └── election │ ├── 2012 │ ├── A.npy │ ├── feats.npy │ ├── labels.npy │ ├── test_idx.npy │ ├── val_idx.npy │ └── train_idx.npy │ └── 2016 │ ├── feats.npy │ ├── labels.npy │ ├── test_idx.npy │ ├── val_idx.npy │ └── train_idx.npy ├── __pycache__ └── utils.cpython-37.pyc ├── gat ├── __pycache__ │ ├── gat.cpython-37.pyc │ ├── utils.cpython-37.pyc │ └── gatconv.cpython-37.pyc ├── gat.py ├── utils.py ├── gatconv.py ├── train_lpgnn.py └── train_cgnn.py ├── sgc ├── __pycache__ │ ├── utils.cpython-37.pyc │ └── sgconv.cpython-37.pyc ├── utils.py ├── sgconv.py ├── train_lpgnn.py └── train_cgnn.py ├── gcn ├── __pycache__ │ ├── layers.cpython-37.pyc │ └── models.cpython-37.pyc ├── __init__.py ├── models.py ├── layers.py ├── train_lpgnn.py └── train_cgnn.py ├── README.md ├── utils.py └── graphsage ├── train_lpgnn.py └── train_cgnn.py /scripts/run_cgnn_gcn.sh: -------------------------------------------------------------------------------- 1 | python gcn/train_cgnn.py 2 | -------------------------------------------------------------------------------- /scripts/run_lp_gcn.sh: -------------------------------------------------------------------------------- 1 | python gcn/train_lpgnn.py 2 | -------------------------------------------------------------------------------- /scripts/run_lp_graphsage.sh: -------------------------------------------------------------------------------- 1 | python graphsage/train_lpgnn.py 2 | -------------------------------------------------------------------------------- /scripts/run_cgnn_graphsage.sh: -------------------------------------------------------------------------------- 1 | python graphsage/train_cgnn.py 2 | -------------------------------------------------------------------------------- /scripts/run_cgnn_sgc.sh: -------------------------------------------------------------------------------- 1 | python sgc/train_cgnn.py --dataset election --gpu 0 --n-epochs 1000 --lr 0.1 #--bias 2 | -------------------------------------------------------------------------------- /scripts/run_lp_sgc.sh: -------------------------------------------------------------------------------- 1 | python sgc/train_lpgnn.py --dataset election --gpu 0 --n-epochs 300 --lr 0.1 --bias 2 | -------------------------------------------------------------------------------- /data/county/election/2012/A.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/A.npy -------------------------------------------------------------------------------- /__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /data/county/election/2012/feats.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/feats.npy -------------------------------------------------------------------------------- /data/county/election/2012/labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/labels.npy -------------------------------------------------------------------------------- /data/county/election/2016/feats.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2016/feats.npy -------------------------------------------------------------------------------- /data/county/election/2016/labels.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2016/labels.npy -------------------------------------------------------------------------------- /gat/__pycache__/gat.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/gat/__pycache__/gat.cpython-37.pyc -------------------------------------------------------------------------------- /gat/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/gat/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /sgc/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/sgc/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /data/county/election/2012/test_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/test_idx.npy -------------------------------------------------------------------------------- /data/county/election/2012/val_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/val_idx.npy -------------------------------------------------------------------------------- /data/county/election/2016/test_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2016/test_idx.npy -------------------------------------------------------------------------------- /data/county/election/2016/val_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2016/val_idx.npy -------------------------------------------------------------------------------- /gat/__pycache__/gatconv.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/gat/__pycache__/gatconv.cpython-37.pyc -------------------------------------------------------------------------------- /gcn/__pycache__/layers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/gcn/__pycache__/layers.cpython-37.pyc -------------------------------------------------------------------------------- /gcn/__pycache__/models.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/gcn/__pycache__/models.cpython-37.pyc -------------------------------------------------------------------------------- /sgc/__pycache__/sgconv.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/sgc/__pycache__/sgconv.cpython-37.pyc -------------------------------------------------------------------------------- /data/county/election/2012/train_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2012/train_idx.npy -------------------------------------------------------------------------------- /data/county/election/2016/train_idx.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JunwenBai/correlation-gnn/HEAD/data/county/election/2016/train_idx.npy -------------------------------------------------------------------------------- /gcn/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from __future__ import division 3 | 4 | from .layers import * 5 | from .models import * 6 | from .utils import * -------------------------------------------------------------------------------- /scripts/run_cgnn_gat.sh: -------------------------------------------------------------------------------- 1 | python gat/train_cgnn.py --dataset election --gpu 0 --num-hidden 32 --num-layers 1 --num-heads 8 --lr 5e-4 --negative-slope 0.2 --epochs 1000 --bias 2 | -------------------------------------------------------------------------------- /scripts/run_lp_gat.sh: -------------------------------------------------------------------------------- 1 | python gat/train_lpgnn.py --dataset election --gpu 0 --num-hidden 32 --num-layers 1 --num-heads 8 --lr 1e-3 --negative-slope 0.2 --epochs 1500 --bias 2 | -------------------------------------------------------------------------------- /sgc/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import torch 4 | from sklearn.metrics import r2_score 5 | from gpytorch import inv_matmul, matmul 6 | from gpytorch.utils import linear_cg 7 | 8 | def get_Gamma(alpha, beta, S): 9 | return beta * torch.eye(S.size(0)) - beta * alpha * S 10 | 11 | def interpolate(idx_train, idx_test, res_pred_train, Gamma): 12 | idx_train = idx_train.cpu().detach().numpy() 13 | idx_test = idx_test.cpu().detach().numpy() 14 | idx = np.arange(Gamma.shape[0]) 15 | idx_val = np.setdiff1d(idx, np.concatenate((idx_train, idx_test))) 16 | idx_test_val = np.concatenate((idx_test, idx_val)) 17 | test_val_Gamma = Gamma[idx_test_val, :][:, idx_test_val] 18 | 19 | res_pred_test = inv_matmul(test_val_Gamma, -matmul(Gamma[idx_test_val, :][:, idx_train], res_pred_train)) 20 | return res_pred_test[:len(idx_test)] 21 | 22 | def lp_refine(idx_test, idx_train, labels, output, S, alpha=1., beta=1.): 23 | Gamma = get_Gamma(alpha, beta, S) 24 | pred_train = output[idx_train] 25 | pred_test = output[idx_test] 26 | res_pred_train = labels[idx_train] - output[idx_train] 27 | refined_test = pred_test + interpolate(idx_train, idx_test, res_pred_train, Gamma) 28 | return refined_test 29 | 30 | def normalize(mx): 31 | """Row-normalize sparse matrix""" 32 | rowsum = np.array(mx.sum(1)) 33 | r_inv = np.power(rowsum, -1).flatten() 34 | r_inv[np.isinf(r_inv)] = 0. 35 | r_mat_inv = sp.diags(r_inv) 36 | mx = r_mat_inv.dot(mx) 37 | return mx 38 | 39 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 40 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 41 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 42 | indices = torch.from_numpy( 43 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 44 | values = torch.from_numpy(sparse_mx.data) 45 | shape = torch.Size(sparse_mx.shape) 46 | return torch.sparse.FloatTensor(indices, values, shape) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyTorch Implementation of Correlated Graph Neural Networks 2 | 3 | [Outcome Correlation in Graph Neural Network Regression](https://arxiv.org/abs/2002.08274)
4 | 5 | Junteng Jia and Austin Benson
6 | 7 | arXiv:2002.08274, 2020.
8 | 9 | ## Overview 10 | Correlated Graph Neural Networks model the correlations among labels as well as features of each node: 11 | - C-GNN models the correlation as a multivariate Gaussian and learns the correlation structure in O(m) per optimization step, where m is the number of edges. 12 | - LP-GNN assumes positive correlation among neighboring vertices, and runs label propagation to interpolate GNN residuals on the testing vertices. 13 | 14 | ## Requirements 15 | - Python 3.7+ 16 | - PyTorch 1.2.0+ 17 | - [DGL](https://github.com/dmlc/dgl) 18 | - [GPyTorch](https://github.com/cornellius-gp/gpytorch) 19 | 20 | ## Usage 21 | 22 | ### Download this Repository 23 | ```git clone``` this repo to your local machine. 24 | 25 | ### Dataset 26 | [US Election](https://projects.fivethirtyeight.com/2016-election-forecast/) dataset is used as a running example. The dataset is included in this repo. 27 | 28 | ### Train and Test 29 | 30 | We so far implemented 2 graph neural network structures: GCN and GraphSAGE, for LP-GNN as well as C-GNN. 31 | 32 | To train and test GCN-based LP-GNN, use the following script: 33 | ```bash 34 | scripts/run_lp_gcn.sh 35 | ``` 36 | To train and test GraphSAGE-based LP-GNN, use the following script: 37 | ```bash 38 | scripts/run_lp_graphsage.sh 39 | ``` 40 | To train and test GCN-based C-GNN, use the following script: 41 | 42 | ```bash 43 | scripts/run_cgnn_gcn.sh 44 | ``` 45 | 46 | To train and test GraphSAGE-based C-GNN, use the following script: 47 | 48 | ```bash 49 | scripts/run_cgnn_graphsage.sh 50 | ``` 51 | 52 | The default hyper-parameters should give reasonably good results. 53 | 54 | If you have any questions, feel free to open an issue. 55 | 56 | ## References 57 | [C-GNN](https://github.com/000Justin000/gnn-residual-correlation) (original implementation in Julia)
58 | [GCN](https://github.com/tkipf/pygcn)
59 | [GraphSAGE](https://github.com/dmlc/dgl/tree/master/examples/pytorch/graphsage)
60 | -------------------------------------------------------------------------------- /gat/gat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graph Attention Networks in DGL using SPMV optimization. 3 | References 4 | ---------- 5 | Paper: https://arxiv.org/abs/1710.10903 6 | Author's code: https://github.com/PetarV-/GAT 7 | Pytorch implementation: https://github.com/Diego999/pyGAT 8 | """ 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from gatconv import GATConv 14 | 15 | class GAT(nn.Module): 16 | def __init__(self, 17 | g, 18 | num_layers, 19 | in_dim, 20 | num_hidden, 21 | num_classes, 22 | heads, 23 | activation, 24 | feat_drop, 25 | attn_drop, 26 | negative_slope, 27 | residual, 28 | bias): 29 | super(GAT, self).__init__() 30 | self.g = g 31 | self.num_layers = num_layers 32 | self.gat_layers = nn.ModuleList() 33 | self.activation = activation 34 | # input projection (no residual) 35 | self.gat_layers.append(GATConv( 36 | in_dim, num_hidden, heads[0], 37 | feat_drop, attn_drop, negative_slope, False, bias, self.activation)) 38 | # hidden layers 39 | for l in range(1, num_layers): 40 | # due to multi-head, the in_dim = num_hidden * num_heads 41 | self.gat_layers.append(GATConv( 42 | num_hidden * heads[l-1], num_hidden, heads[l], 43 | feat_drop, attn_drop, negative_slope, residual, bias, self.activation)) 44 | # output projection 45 | self.gat_layers.append(GATConv( 46 | num_hidden * heads[-2], num_classes, heads[-1], 47 | feat_drop, attn_drop, negative_slope, residual, bias, None)) 48 | #self.fc1 = nn.Linear(num_hidden, num_hidden) 49 | #self.fc2 = nn.Linear(num_hidden, num_classes) 50 | 51 | def forward(self, inputs): 52 | h = inputs 53 | for l in range(self.num_layers): 54 | h = self.gat_layers[l](self.g, h).flatten(1) 55 | # output projection 56 | logits = self.gat_layers[-1](self.g, h).mean(1) 57 | #logits = F.elu(self.fc1(logits)) 58 | #pred = self.fc2(logits) 59 | return logits 60 | -------------------------------------------------------------------------------- /gcn/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers import GraphConvolution, GraphAttentionLayer 5 | 6 | 7 | class GCN(nn.Module): 8 | def __init__(self, nfeat, nhid, nclass, dropout): 9 | super(GCN, self).__init__() 10 | self.gc1 = GraphConvolution(nfeat, nhid) 11 | self.gc2 = GraphConvolution(nhid, nhid) 12 | self.fc = nn.Linear(nhid, nclass) 13 | self.dropout = dropout 14 | 15 | def forward(self, x, adj): 16 | x = F.relu(self.gc1(x, adj)) 17 | x = F.dropout(x, self.dropout, training=self.training) 18 | x = F.relu(self.gc2(x, adj)) 19 | x = F.dropout(x, self.dropout, training=self.training) 20 | x = self.fc(x) 21 | return x 22 | 23 | class GAT(nn.Module): 24 | def __init__(self, in_dim, hid_dim, class_num, alpha, dropout, nheads, use_cuda): 25 | super(GAT, self).__init__() 26 | self.in_dim = in_dim 27 | self.hid_dim = hid_dim 28 | self.class_num = class_num 29 | self.alpha = alpha 30 | self.dropout = dropout 31 | self.nheads = nheads 32 | self.use_cuda = use_cuda 33 | 34 | self.attentions = [GraphAttentionLayer(self.in_dim, self.hid_dim, self.alpha, self.dropout, nonlinear=True, use_cuda=self.use_cuda) for _ in range(self.nheads)] 35 | 36 | for k in range(self.nheads): 37 | self.add_module('attention_' + str(k), self.attentions[k]) 38 | 39 | ## we change the second-layer attention to fc layers. 40 | self.classifier = nn.Sequential( 41 | nn.Linear(self.hid_dim, self.class_num), 42 | ) 43 | 44 | def forward(self, global_feature, nodes, neighbors_list): 45 | # global_feature = F.dropout(global_feature, self.dropout, training=self.training) 46 | # new_feature = torch.cat([atten(global_feature, nodes, neighbors_list) for atten in self.attentions], dim=1) 47 | new_feature = torch.mean(torch.cat([atten(global_feature, nodes, neighbors_list).view(1, -1) for atten in self.attentions], dim=0), dim=0).view(len(nodes), -1) 48 | # new_feature = F.dropout(new_feature, self.dropout, training=self.training) 49 | logit = self.classifier(new_feature) 50 | return new_feature, logit 51 | -------------------------------------------------------------------------------- /sgc/sgconv.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import dgl.function as fn 5 | 6 | class SGConv(nn.Module): 7 | def __init__(self, 8 | in_feats, 9 | out_feats, 10 | k=1, 11 | n_hid=32, 12 | cached=False, 13 | bias=True, 14 | norm=None): 15 | super(SGConv, self).__init__() 16 | self._cached = cached 17 | self._cached_h = None 18 | self._k = k 19 | self.norm = norm 20 | self.n_hid = n_hid 21 | #self.fc1 = nn.Linear(in_feats, n_hid, bias=bias) 22 | self.fc1 = nn.Linear(in_feats, out_feats, bias=bias) 23 | self.fc2 = nn.Linear(n_hid, out_feats, bias=bias) 24 | 25 | #self.reset_parameters() 26 | 27 | def reset_parameters(self): 28 | """Reinitialize learnable parameters.""" 29 | nn.init.xavier_uniform_(self.fc1.weight) 30 | if self.fc1.bias is not None: 31 | nn.init.zeros_(self.fc1.bias) 32 | nn.init.xavier_uniform_(self.fc2.weight) 33 | if self.fc2.bias is not None: 34 | nn.init.zeros_(self.fc2.bias) 35 | 36 | 37 | def forward(self, graph, feat): 38 | graph = graph.local_var() 39 | if self._cached_h is not None: 40 | feat = self._cached_h 41 | else: 42 | # compute normalization 43 | degs = graph.in_degrees().float().clamp(min=1) 44 | norm = th.pow(degs, -0.5) 45 | norm = norm.to(feat.device).unsqueeze(1) 46 | # compute (D^-1 A^k D)^k X 47 | for _ in range(self._k): 48 | feat = feat * norm 49 | graph.ndata['h'] = feat 50 | graph.update_all(fn.copy_u('h', 'm'), 51 | fn.sum('m', 'h')) 52 | feat = graph.ndata.pop('h') 53 | feat = feat * norm 54 | 55 | if self.norm is not None: 56 | feat = self.norm(feat) 57 | 58 | # cache feature 59 | if self._cached: 60 | self._cached_h = feat 61 | x = self.fc1(feat) 62 | #x = self.fc1(F.elu(feat)) 63 | #x = self.fc2(F.elu(x)) 64 | return x 65 | -------------------------------------------------------------------------------- /gat/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import scipy.sparse as sp 4 | from gpytorch import inv_matmul, matmul 5 | from gpytorch.utils import linear_cg 6 | 7 | class EarlyStopping: 8 | def __init__(self, patience=10): 9 | self.patience = patience 10 | self.counter = 0 11 | self.best_score = None 12 | self.early_stop = False 13 | 14 | def step(self, acc, model): 15 | score = acc 16 | if self.best_score is None: 17 | self.best_score = score 18 | self.save_checkpoint(model) 19 | elif score < self.best_score: 20 | self.counter += 1 21 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 22 | if self.counter >= self.patience: 23 | self.early_stop = True 24 | else: 25 | self.best_score = score 26 | self.save_checkpoint(model) 27 | self.counter = 0 28 | return self.early_stop 29 | 30 | def save_checkpoint(self, model): 31 | '''Saves model when validation loss decrease.''' 32 | torch.save(model.state_dict(), 'es_checkpoint.pt') 33 | 34 | def get_Gamma(alpha, beta, S): 35 | return beta * torch.eye(S.size(0)) - beta * alpha * S 36 | 37 | def interpolate(idx_train, idx_test, res_pred_train, Gamma): 38 | idx_train = idx_train.cpu().detach().numpy() 39 | idx_test = idx_test.cpu().detach().numpy() 40 | idx = np.arange(Gamma.shape[0]) 41 | idx_val = np.setdiff1d(idx, np.concatenate((idx_train, idx_test))) 42 | idx_test_val = np.concatenate((idx_test, idx_val)) 43 | test_val_Gamma = Gamma[idx_test_val, :][:, idx_test_val] 44 | 45 | res_pred_test = inv_matmul(test_val_Gamma, -matmul(Gamma[idx_test_val, :][:, idx_train], res_pred_train)) 46 | return res_pred_test[:len(idx_test)] 47 | 48 | def lp_refine(idx_test, idx_train, labels, output, S, alpha=1., beta=1.): 49 | Gamma = get_Gamma(alpha, beta, S) 50 | 51 | pred_train = output[idx_train] 52 | pred_test = output[idx_test] 53 | res_pred_train = labels[idx_train] - output[idx_train] 54 | refined_test = pred_test + interpolate(idx_train, idx_test, res_pred_train, Gamma) 55 | 56 | return refined_test 57 | 58 | def normalize(mx): 59 | """Row-normalize sparse matrix""" 60 | rowsum = np.array(mx.sum(1)) 61 | r_inv = np.power(rowsum, -1).flatten() 62 | r_inv[np.isinf(r_inv)] = 0. 63 | r_mat_inv = sp.diags(r_inv) 64 | mx = r_mat_inv.dot(mx) 65 | return mx 66 | 67 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 68 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 69 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 70 | indices = torch.from_numpy( 71 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 72 | values = torch.from_numpy(sparse_mx.data) 73 | shape = torch.Size(sparse_mx.shape) 74 | return torch.sparse.FloatTensor(indices, values, shape) 75 | 76 | -------------------------------------------------------------------------------- /gat/gatconv.py: -------------------------------------------------------------------------------- 1 | """Torch modules for graph attention networks(GAT).""" 2 | # pylint: disable= no-member, arguments-differ, invalid-name 3 | import torch as th 4 | from torch import nn 5 | 6 | import dgl.function as fn 7 | from dgl.nn.pytorch.softmax import edge_softmax 8 | from dgl.nn.pytorch.utils import Identity 9 | from dgl.utils import expand_as_pair 10 | 11 | # pylint: enable=W0235 12 | class GATConv(nn.Module): 13 | def __init__(self, 14 | in_feats, 15 | out_feats, 16 | num_heads, 17 | feat_drop=0., 18 | attn_drop=0., 19 | negative_slope=0.2, 20 | residual=False, 21 | bias=False, 22 | activation=None): 23 | super(GATConv, self).__init__() 24 | self._num_heads = num_heads 25 | self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats) 26 | self._out_feats = out_feats 27 | if isinstance(in_feats, tuple): 28 | self.fc_src = nn.Linear( 29 | self._in_src_feats, out_feats * num_heads, bias=bias) 30 | self.fc_dst = nn.Linear( 31 | self._in_dst_feats, out_feats * num_heads, bias=bias) 32 | else: 33 | self.fc = nn.Linear( 34 | self._in_src_feats, out_feats * num_heads, bias=bias) 35 | self.attn_l = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) 36 | self.attn_r = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats))) 37 | self.feat_drop = nn.Dropout(feat_drop) 38 | self.attn_drop = nn.Dropout(attn_drop) 39 | self.leaky_relu = nn.LeakyReLU(negative_slope) 40 | if residual: 41 | if self._in_dst_feats != out_feats: 42 | self.res_fc = nn.Linear( 43 | self._in_dst_feats, num_heads * out_feats, bias=bias) 44 | else: 45 | self.res_fc = Identity() 46 | else: 47 | self.register_buffer('res_fc', None) 48 | self.reset_parameters() 49 | self.activation = activation 50 | 51 | def reset_parameters(self): 52 | """Reinitialize learnable parameters.""" 53 | gain = nn.init.calculate_gain('relu') 54 | if hasattr(self, 'fc'): 55 | nn.init.xavier_normal_(self.fc.weight, gain=gain) 56 | else: # bipartite graph neural networks 57 | nn.init.xavier_normal_(self.fc_src.weight, gain=gain) 58 | nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) 59 | nn.init.xavier_normal_(self.attn_l, gain=gain) 60 | nn.init.xavier_normal_(self.attn_r, gain=gain) 61 | if isinstance(self.res_fc, nn.Linear): 62 | nn.init.xavier_normal_(self.res_fc.weight, gain=gain) 63 | 64 | def forward(self, graph, feat): 65 | graph = graph.local_var() 66 | if isinstance(feat, tuple): 67 | h_src = self.feat_drop(feat[0]) 68 | h_dst = self.feat_drop(feat[1]) 69 | feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats) 70 | feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats) 71 | else: 72 | h_src = h_dst = self.feat_drop(feat) 73 | feat_src = feat_dst = self.fc(h_src).view( 74 | -1, self._num_heads, self._out_feats) 75 | el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1) 76 | er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1) 77 | graph.srcdata.update({'ft': feat_src, 'el': el}) 78 | graph.dstdata.update({'er': er}) 79 | # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively. 80 | graph.apply_edges(fn.u_add_v('el', 'er', 'e')) 81 | e = self.leaky_relu(graph.edata.pop('e')) 82 | # compute softmax 83 | graph.edata['a'] = self.attn_drop(edge_softmax(graph, e)) 84 | # message passing 85 | graph.update_all(fn.u_mul_e('ft', 'a', 'm'), 86 | fn.sum('m', 'ft')) 87 | rst = graph.dstdata['ft'] 88 | # residual 89 | if self.res_fc is not None: 90 | resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats) 91 | rst = rst + resval 92 | # activation 93 | if self.activation: 94 | rst = self.activation(rst) 95 | return rst 96 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import torch 4 | from sklearn.metrics import r2_score 5 | from gpytorch import inv_matmul, matmul 6 | from gpytorch.utils import linear_cg 7 | 8 | def encode_onehot(labels): 9 | classes = set(labels) 10 | classes_dict = {c: np.identity(len(classes))[i, :] for i, c in 11 | enumerate(classes)} 12 | labels_onehot = np.array(list(map(classes_dict.get, labels)), 13 | dtype=np.int32) 14 | return labels_onehot 15 | 16 | def load_jj_data(path, load_partial=False): 17 | features = np.load(path+"/feats.npy") 18 | labels = np.load(path+"/labels.npy") 19 | if load_partial: 20 | return None, torch.FloatTensor(features), torch.FloatTensor(labels), None, None, None 21 | adj = np.load(path+"/A.npy").astype(float) 22 | sp_adj = sp.coo_matrix(adj) 23 | sp_adj = normalize(sp_adj) 24 | idx_train = np.load(path+"/train_idx.npy")-1 25 | idx_val = np.load(path+"/val_idx.npy")-1 26 | idx_test = np.load(path+"/test_idx.npy")-1 27 | return sparse_mx_to_torch_sparse_tensor(sp_adj), torch.FloatTensor(features), torch.FloatTensor(labels), torch.LongTensor(idx_train), torch.LongTensor(idx_val), torch.LongTensor(idx_test) 28 | 29 | def load_data(path="../data/cora/", dataset="cora"): 30 | """Load citation network dataset (cora only for now)""" 31 | print('Loading {} dataset...'.format(dataset)) 32 | 33 | idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), 34 | dtype=np.dtype(str)) 35 | features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) 36 | labels = encode_onehot(idx_features_labels[:, -1]) 37 | 38 | # build graph 39 | idx = np.array(idx_features_labels[:, 0], dtype=np.int32) 40 | idx_map = {j: i for i, j in enumerate(idx)} 41 | edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), 42 | dtype=np.int32) 43 | edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), 44 | dtype=np.int32).reshape(edges_unordered.shape) 45 | adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), 46 | shape=(labels.shape[0], labels.shape[0]), 47 | dtype=np.float32) 48 | 49 | # build symmetric adjacency matrix 50 | adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) ### 51 | 52 | features = normalize(features) 53 | adj = normalize(adj + sp.eye(adj.shape[0])) 54 | 55 | idx_train = range(140) 56 | idx_val = range(200, 500) 57 | idx_test = range(500, 1500) 58 | 59 | features = torch.FloatTensor(np.array(features.todense())) 60 | labels = torch.LongTensor(np.where(labels)[1]) 61 | adj = sparse_mx_to_torch_sparse_tensor(adj) 62 | 63 | idx_train = torch.LongTensor(idx_train) 64 | idx_val = torch.LongTensor(idx_val) 65 | idx_test = torch.LongTensor(idx_test) 66 | 67 | return adj, features, labels, idx_train, idx_val, idx_test 68 | 69 | 70 | def R2(outputs, labels): 71 | outputs = outputs.cpu().detach().numpy().reshape(-1) 72 | labels = labels.cpu().detach().numpy().reshape(-1) 73 | return r2_score(labels, outputs) 74 | 75 | def normalize(mx): 76 | """Row-normalize sparse matrix""" 77 | rowsum = np.array(mx.sum(1)) 78 | r_inv = np.power(rowsum, -1).flatten() 79 | r_inv[np.isinf(r_inv)] = 0. 80 | r_mat_inv = sp.diags(r_inv) 81 | mx = r_mat_inv.dot(mx) 82 | return mx 83 | 84 | 85 | def accuracy(output, labels): 86 | preds = output.max(1)[1].type_as(labels) 87 | correct = preds.eq(labels).double() 88 | correct = correct.sum() 89 | return correct / len(labels) 90 | 91 | 92 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 93 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 94 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 95 | indices = torch.from_numpy( 96 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 97 | values = torch.from_numpy(sparse_mx.data) 98 | shape = torch.Size(sparse_mx.shape) 99 | return torch.sparse.FloatTensor(indices, values, shape) 100 | 101 | def get_Gamma(alpha, beta, S): 102 | return beta * torch.eye(S.size(0)) - beta * alpha * S 103 | 104 | def interpolate(idx_train, idx_test, res_pred_train, Gamma): 105 | idx_train = idx_train.cpu().detach().numpy() 106 | idx_test = idx_test.cpu().detach().numpy() 107 | idx = np.arange(Gamma.shape[0]) 108 | idx_val = np.setdiff1d(idx, np.concatenate((idx_train, idx_test))) 109 | idx_test_val = np.concatenate((idx_test, idx_val)) 110 | test_val_Gamma = Gamma[idx_test_val, :][:, idx_test_val] 111 | 112 | res_pred_test = inv_matmul(test_val_Gamma, -matmul(Gamma[idx_test_val, :][:, idx_train], res_pred_train)) 113 | return res_pred_test[:len(idx_test)] 114 | 115 | def lp_refine(idx_test, idx_train, labels, output, S, alpha=1., beta=1.): 116 | Gamma = get_Gamma(alpha, beta, S) 117 | 118 | pred_train = output[idx_train] 119 | pred_test = output[idx_test] 120 | res_pred_train = labels[idx_train] - output[idx_train] 121 | 122 | refined_test = pred_test + interpolate(idx_train, idx_test, res_pred_train, Gamma) 123 | 124 | return refined_test 125 | -------------------------------------------------------------------------------- /gcn/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.nn.parameter import Parameter 6 | from torch.nn.modules.module import Module 7 | 8 | 9 | class GraphConvolution(Module): 10 | """ 11 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 12 | """ 13 | 14 | def __init__(self, in_features, out_features, bias=True): 15 | super(GraphConvolution, self).__init__() 16 | self.in_features = in_features 17 | self.out_features = out_features 18 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 19 | if bias: 20 | self.bias = Parameter(torch.FloatTensor(out_features)) 21 | else: 22 | self.register_parameter('bias', None) 23 | self.reset_parameters() 24 | 25 | def reset_parameters(self): 26 | stdv = 1. / math.sqrt(self.weight.size(1)) 27 | self.weight.data.uniform_(-stdv, stdv) 28 | if self.bias is not None: 29 | self.bias.data.uniform_(-stdv, stdv) 30 | 31 | def forward(self, input, adj): 32 | #print("input:", input.shape) 33 | #print("adj:", adj.shape) 34 | #print("adj[0,0]:", adj) 35 | #print("weight:", self.weight.shape) 36 | support = torch.mm(input, self.weight) 37 | output = torch.spmm(adj, support) 38 | #print("support:", support.shape) 39 | #print("output:", output.shape) 40 | #exit() 41 | if self.bias is not None: 42 | return output + self.bias 43 | else: 44 | return output 45 | 46 | def __repr__(self): 47 | return self.__class__.__name__ + ' (' \ 48 | + str(self.in_features) + ' -> ' \ 49 | + str(self.out_features) + ')' 50 | 51 | 52 | class GraphAttentionLayer(nn.Module): 53 | ''' 54 | simple GAT layer, similar to https://arxiv.org/abs/1710.10903 55 | ''' 56 | def __init__(self, in_dim, out_dim, alpha, dropout, nonlinear=False, use_cuda=True): 57 | super(GraphAttentionLayer, self).__init__() 58 | self.in_dim = in_dim 59 | self.out_dim = out_dim 60 | self.leakyrelu = nn.LeakyReLU(alpha) 61 | self.dropout = dropout 62 | self.nonlinear = nonlinear 63 | self.use_cuda = use_cuda 64 | 65 | self.W = nn.Parameter(torch.zeros(in_dim, out_dim)) 66 | nn.init.xavier_uniform_(self.W, gain=1.414) 67 | self.a = nn.Parameter(torch.zeros(2*out_dim, 1)) 68 | nn.init.xavier_uniform_(self.a, gain=1.414) 69 | 70 | @staticmethod 71 | def getMask(global_feature, nodes, neighbors_list): 72 | neighbors_list = [(neighbors | set([nodes[i]])) for i, neighbors in enumerate(neighbors_list)] 73 | unique_nodes_list = list(set.union(*neighbors_list)) 74 | unique_nodes_dict = {node:n for n, node in enumerate(unique_nodes_list)} 75 | 76 | mask = torch.zeros(len(nodes), len(unique_nodes_list)) 77 | row_indices = [i for i, neighbors in enumerate(neighbors_list) for node in neighbors] 78 | col_indices = [unique_nodes_dict[node] for neighbors in neighbors_list for node in neighbors] 79 | mask[row_indices, col_indices] = 1 80 | 81 | return mask, unique_nodes_list 82 | 83 | def meanAggregate(self, global_feature, nodes, neighbors_list): 84 | mask, unique_nodes_list = self.getMask(global_feature, nodes, neighbors_list) 85 | if self.use_cuda: 86 | mask = mask.cuda() 87 | neighbor_num = mask.sum(1, keepdim=True) 88 | mask = mask.div(neighbor_num) 89 | 90 | neighbors_feature = global_feature[unique_nodes_list] 91 | return torch.matmul(mask, neighbors_feature) 92 | 93 | def forward(self, global_feature, nodes, neighbors_list): 94 | mask, unique_nodes_list = self.getMask(global_feature, nodes, neighbors_list) 95 | if self.use_cuda: 96 | mask = mask.cuda() 97 | 98 | nodes_feature = torch.matmul(global_feature[nodes], self.W) ## B x out_dim 99 | neighbors_feature = torch.matmul(global_feature[unique_nodes_list], self.W) ## N x out_dim 100 | B = nodes_feature.size(0) 101 | N = neighbors_feature.size(0) 102 | 103 | concate_feature = torch.cat((nodes_feature.repeat(1, N).view(B*N, -1), neighbors_feature.repeat(B, 1)), dim = 1) ## BN x 2out_dim 104 | e = torch.matmul(concate_feature, self.a).squeeze(1).view(B, N) 105 | # residual_feature = nodes_feature.repeat(1, N).view(B*N, -1) - neighbors_feature.repeat(B, 1) ## BN x out_dim 106 | # e = torch.matmul(residual_feature, self.a).squeeze(1).view(B, N) 107 | e = self.leakyrelu(e) 108 | neg_inf = -9e15 * torch.ones_like(e) 109 | e = torch.where(mask>0, e, neg_inf) 110 | attention = F.softmax(e, dim=1) ## B x N 111 | 112 | attention = F.dropout(attention, self.dropout, training=self.training) 113 | out_feature = torch.matmul(attention, neighbors_feature) 114 | 115 | out_feature = F.normalize(out_feature, p=2, dim=1) 116 | 117 | if self.nonlinear: 118 | out_feature = F.elu(out_feature) 119 | 120 | return out_feature 121 | 122 | def __repr__(self): 123 | return self.__class__.__name__ + '(' + str(self.in_dim) + ' -> ' + str(self.out_dim) + ')' 124 | -------------------------------------------------------------------------------- /gcn/train_lpgnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import sys 5 | sys.path.append(".") 6 | 7 | import time 8 | import argparse 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from torch.autograd import Variable 15 | 16 | from utils import load_data, R2, load_jj_data, lp_refine 17 | from models import GCN, GAT 18 | 19 | # Training settings 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--no_cuda', action='store_true', default=False, 22 | help='Disables CUDA training.') 23 | parser.add_argument('--fastmode', action='store_true', default=False, 24 | help='Validate during training pass.') 25 | parser.add_argument('--use_gcn', action='store_true', default=False, 26 | help='use chebynet') 27 | parser.add_argument('--seed', type=int, default=19940423, help='Random seed.') 28 | parser.add_argument('--epochs', type=int, default=1500, 29 | help='Number of epochs to train.') 30 | parser.add_argument('--lr', type=float, default=0.001, 31 | help='Initial learning rate.') 32 | parser.add_argument('--weight_decay', type=float, default=5e-4, 33 | help='Weight decay (L2 loss on parameters).') 34 | parser.add_argument('--hidden', type=int, default=32, 35 | help='Number of hidden units.') 36 | parser.add_argument('--dropout', type=float, default=0.5, 37 | help='Dropout rate (1 - keep probability).') 38 | parser.add_argument('--nb_heads', type=int, default=4, 39 | help='Number of head attentions.') 40 | parser.add_argument('--alpha', type=float, default=0.2, 41 | help='Alpha for the leaky_relu.') 42 | 43 | args = parser.parse_args() 44 | args.cuda = not args.no_cuda and torch.cuda.is_available() 45 | print("use gpu:", args.cuda) 46 | 47 | np.random.seed(args.seed) 48 | torch.manual_seed(args.seed) 49 | if args.cuda: 50 | torch.cuda.manual_seed(args.seed) 51 | 52 | # Load data 53 | path = "./data/county/election/2012" 54 | adj, features, labels, idx_train, idx_val, idx_test = load_jj_data(path) 55 | ind_path = "./data/county/election/2016" 56 | _, ind_features, ind_labels, _, _, _ = load_jj_data(ind_path, load_partial=True) 57 | 58 | print("adj:", adj.shape) 59 | print("features:", features.shape) 60 | print("labels:", labels.shape) 61 | print("idx_train:", idx_train.shape) 62 | print("idx_val:", idx_val.shape) 63 | print("idx_test:", idx_test.shape) 64 | print("n_hid:", args.hidden) 65 | 66 | idx_train_lst = [] 67 | batch_size = 128 68 | for i in range(args.epochs): 69 | perm = torch.randperm(len(idx_train)) 70 | sample_idx = perm[:batch_size] 71 | samples = idx_train[sample_idx] 72 | idx_train_lst.append(samples) 73 | 74 | # Model and optimizer 75 | model = GCN(nfeat=features.shape[1], 76 | nhid=args.hidden, 77 | nclass=1, 78 | dropout=args.dropout) 79 | 80 | optimizer = optim.Adam(model.parameters(), 81 | lr=args.lr, weight_decay=args.weight_decay) 82 | 83 | if args.cuda: 84 | model.cuda() 85 | features = features.cuda() 86 | ind_features = ind_features.cuda() 87 | adj = adj.cuda() 88 | labels = labels.cuda() 89 | ind_labels = ind_labels.cuda() 90 | idx_train = idx_train.cuda() 91 | idx_val = idx_val.cuda() 92 | idx_test = idx_test.cuda() 93 | print("\nstart training!\n\n") 94 | 95 | def loss(output, labels, idx): 96 | output = output.view(-1) 97 | l = F.mse_loss(output[idx], labels[idx]) 98 | #l = F.l1_loss(output[idx], labels[idx]) 99 | #l = - output[idx].dot(labels[idx]) 100 | return l 101 | 102 | def train(epoch): 103 | t = time.time() 104 | model.train() 105 | optimizer.zero_grad() 106 | idx = idx_train_lst[epoch] 107 | output = model(features, adj).view(-1) 108 | loss_train = loss(output, labels, idx) 109 | r2_train = R2(output[idx], labels[idx]) 110 | loss_train.backward() 111 | optimizer.step() 112 | 113 | if not args.fastmode: 114 | # Evaluate validation set performance separately, 115 | # deactivates dropout during validation run. 116 | model.eval() 117 | output = model(features, adj) 118 | 119 | loss_val = loss(output, labels, idx_val) 120 | r2_val = R2(output[idx_val], labels[idx_val]) 121 | print('Epoch: {:04d}'.format(epoch+1), 122 | 'loss_train: {:.4f}'.format(loss_train.item()), 123 | 'r2_train: {:.4f}'.format(r2_train.item()), 124 | 'loss_val: {:.4f}'.format(loss_val.item()), 125 | 'r2_val: {:.4f}'.format(r2_val.item()), 126 | 'time: {:.4f}s'.format(time.time() - t)) 127 | 128 | 129 | def test(adj, features, labels, test_meta): 130 | model.eval() 131 | output = model(features, adj).view(-1) 132 | ''' 133 | np.save("test_lp/output_all.npy", output.cpu().detach().numpy()) 134 | np.save("test_lp/labels_all.npy", labels.cpu().detach().numpy()) 135 | np.save("test_lp/idx_train.npy", idx_train.cpu().detach().numpy()) 136 | np.save("test_lp/idx_val.npy", idx_val.cpu().detach().numpy()) 137 | np.save("test_lp/idx_test.npy", idx_test.cpu().detach().numpy()) 138 | ''' 139 | 140 | loss_test = loss(output, labels, idx_test) 141 | r2_test = R2(output[idx_test], labels[idx_test]) 142 | 143 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 144 | #adj = torch.FloatTensor(np.load("test_lp/raw_S.npy")) 145 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj) 146 | lp_r2_test = R2(lp_output, labels[idx_test]) 147 | print("Test set ({}) results:".format(test_meta), 148 | "loss= {:.4f}".format(loss_test.item()), 149 | "R2= {:.4f}".format(r2_test.item()), 150 | "LP_R2= {:.4f}\n".format(lp_r2_test.item())) 151 | 152 | 153 | # Train model 154 | t_total = time.time() 155 | for epoch in range(args.epochs): 156 | train(epoch) 157 | print("Optimization Finished!") 158 | print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) 159 | 160 | # Testing 161 | test(adj, features, labels, "2012") 162 | test(adj, ind_features, ind_labels, "2016") 163 | -------------------------------------------------------------------------------- /gcn/train_cgnn.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import sys 5 | sys.path.append(".") 6 | 7 | import time 8 | import argparse 9 | import numpy as np 10 | 11 | import torch 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from torch.autograd import Variable 15 | 16 | #from pygcn.utils import load_data, R2 17 | #from pygcn.models import GCN 18 | from utils import load_data, R2, load_jj_data, lp_refine 19 | from models import GCN, GAT 20 | from torch import matmul 21 | from gpytorch import inv_matmul, logdet 22 | from gpytorch.utils import linear_cg 23 | 24 | # Training settings 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--no_cuda', action='store_true', default=False, 27 | help='Disables CUDA training.') 28 | parser.add_argument('--fastmode', action='store_true', default=False, 29 | help='Validate during training pass.') 30 | parser.add_argument('--use_gcn', action='store_true', default=False, 31 | help='use chebynet') 32 | parser.add_argument('--seed', type=int, default=19940423, help='Random seed.') 33 | parser.add_argument('--epochs', type=int, default=1500, 34 | help='Number of epochs to train.') 35 | parser.add_argument('--lr', type=float, default=0.001, 36 | help='Initial learning rate.') 37 | parser.add_argument('--weight_decay', type=float, default=5e-4, 38 | help='Weight decay (L2 loss on parameters).') 39 | parser.add_argument('--hidden', type=int, default=32, 40 | help='Number of hidden units.') 41 | parser.add_argument('--dropout', type=float, default=0.5, 42 | help='Dropout rate (1 - keep probability).') 43 | parser.add_argument('--nb_heads', type=int, default=4, 44 | help='Number of head attentions.') 45 | parser.add_argument('--alpha', type=float, default=0.2, 46 | help='Alpha for the leaky_relu.') 47 | parser.add_argument('--batch_size', type=int, default=256, 48 | help='batch size.') 49 | 50 | 51 | args = parser.parse_args() 52 | args.cuda = not args.no_cuda and torch.cuda.is_available() 53 | print("use gpu:", args.cuda) 54 | 55 | np.random.seed(args.seed) 56 | torch.manual_seed(args.seed) 57 | if args.cuda: 58 | torch.cuda.manual_seed(args.seed) 59 | 60 | # Load data 61 | path = "./data/county/election/2012" 62 | adj, features, labels, idx_train, idx_val, idx_test = load_jj_data(path) 63 | ind_path = "./data/county/election/2016" 64 | _, ind_features, ind_labels, _, _, _ = load_jj_data(ind_path, load_partial=True) 65 | 66 | print("adj:", adj.shape) 67 | print("features:", features.shape) 68 | print("labels:", labels.shape, torch.max(labels), torch.min(labels)) 69 | print("idx_train:", idx_train.shape, torch.max(idx_train), torch.min(idx_train)) 70 | print("idx_val:", idx_val.shape, torch.max(idx_val), torch.min(idx_val)) 71 | print("idx_test:", idx_test.shape, torch.max(idx_test), torch.min(idx_test)) 72 | print("n_hid:", args.hidden) 73 | print() 74 | 75 | idx_train_lst = [] 76 | for i in range(args.epochs): 77 | perm = torch.randperm(len(idx_train)) 78 | sample_idx = perm[:args.batch_size] 79 | samples = idx_train[sample_idx] 80 | idx_train_lst.append(samples) 81 | I = torch.eye(adj.size(0)) 82 | 83 | # Model and optimizer 84 | model = GCN(nfeat=features.shape[1], 85 | nhid=args.hidden, 86 | nclass=1, 87 | dropout=args.dropout) 88 | optimizer = optim.Adam(model.parameters(), 89 | lr=args.lr, weight_decay=args.weight_decay) 90 | 91 | coeffs = Variable(torch.FloatTensor([1., 3.0]).cuda() if args.cuda else torch.FloatTensor([1., 3.0]) , requires_grad=True) 92 | coeffs_optimizer = optim.SGD([coeffs], lr=1e-1, momentum=0.0) 93 | 94 | if args.cuda: 95 | model.cuda() 96 | features = features.cuda() 97 | ind_features = ind_features.cuda() 98 | adj = adj.cuda() 99 | labels = labels.cuda() 100 | ind_labels = ind_labels.cuda() 101 | idx_train = idx_train.cuda() 102 | idx_val = idx_val.cuda() 103 | idx_test = idx_test.cuda() 104 | I = I.cuda() 105 | print("\nstart training!\n\n") 106 | 107 | def setdiff(n, idx): 108 | idx = idx.cpu().detach().numpy() 109 | cp_idx = np.setdiff1d(np.arange(n), idx) 110 | return cp_idx 111 | 112 | def loss(output, labels, idx, S, coeffs, add_logdet): 113 | output = output.view(-1) 114 | rL = labels[idx] - output[idx] 115 | S = S.to_dense() 116 | 117 | Gamma = (I - torch.tanh(coeffs[0])*S)*torch.exp(coeffs[1]) 118 | cp_idx = setdiff(len(S), idx) 119 | loss1 = rL.dot(matmul(Gamma[idx, :][:, idx], rL) - matmul(Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) 120 | loss2 = torch.Tensor([0.]).cuda() if args.cuda else torch.Tensor([0.]) 121 | if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) 122 | l = loss1 - loss2 123 | 124 | return l/len(idx) 125 | 126 | def train(epoch): 127 | t = time.time() 128 | 129 | # without logdet 130 | model.train() 131 | optimizer.zero_grad() 132 | idx = idx_train_lst[epoch] 133 | output = model(features, adj).view(-1) 134 | r2_train = R2(output[idx], labels[idx]) 135 | loss_train = loss(output, labels, idx, adj, coeffs, False) 136 | loss_train.backward() 137 | optimizer.step() 138 | 139 | # with logdet 140 | if epoch % 10 == 0: 141 | model.train() 142 | coeffs_optimizer.zero_grad() 143 | output = model(features, adj).view(-1) 144 | loss_train = loss(output, labels, idx, adj, coeffs, True) 145 | loss_train.backward() 146 | coeffs_optimizer.step() 147 | 148 | if not args.fastmode: 149 | # Evaluate validation set performance separately, 150 | # deactivates dropout during validation run. 151 | model.eval() 152 | output = model(features, adj) 153 | 154 | loss_val = loss(output, labels, idx_val, adj, coeffs, True) 155 | r2_val = R2(output[idx_val], labels[idx_val]) 156 | print('Epoch: {:04d}'.format(epoch+1), 157 | 'loss_train: {:.4f}'.format(loss_train.item()), 158 | 'r2_train: {:.4f}'.format(r2_train.item()), 159 | 'loss_val: {:.4f}'.format(loss_val.item()), 160 | 'r2_val: {:.4f}'.format(r2_val.item()), 161 | 'alpha: {}'.format(torch.tanh(coeffs[0])), 162 | 'beta: {}'.format(torch.exp(coeffs[1])), 163 | 'time: {:.4f}s'.format(time.time() - t)) 164 | 165 | 166 | def test(adj, features, labels, test_meta): 167 | model.eval() 168 | output = model(features, adj).view(-1) 169 | 170 | loss_test = loss(output, labels, idx_test, adj, coeffs, True) 171 | r2_test = R2(output[idx_test], labels[idx_test]) 172 | 173 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 174 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj, torch.tanh(coeffs[0]).item(), torch.exp(coeffs[1]).item()) 175 | lp_r2_test = R2(lp_output, labels[idx_test]) 176 | lp_output_raw_conv = lp_refine(idx_test, idx_train, labels, output, adj) 177 | lp_r2_test_raw_conv = R2(lp_output_raw_conv, labels[idx_test]) 178 | print("Test set ({}) results:".format(test_meta), 179 | "loss= {:.4f}".format(loss_test.item()), 180 | "R2= {:.4f}".format(r2_test.item()), 181 | "LP_R2= {:.4f}".format(lp_r2_test.item()), 182 | "LP_R2_raw_conv= {:.4f}\n".format(lp_r2_test_raw_conv.item())) 183 | 184 | 185 | # Train model 186 | t_total = time.time() 187 | for epoch in range(args.epochs): 188 | train(epoch) 189 | print("Optimization Finished!") 190 | print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) 191 | 192 | # Testing 193 | test(adj, features, labels, "2012") 194 | test(adj, ind_features, ind_labels, "2016") 195 | -------------------------------------------------------------------------------- /sgc/train_lpgnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code was modified from the GCN implementation in DGL examples. 3 | Simplifying Graph Convolutional Networks 4 | Paper: https://arxiv.org/abs/1902.07153 5 | Code: https://github.com/Tiiiger/SGC 6 | SGC implementation in DGL. 7 | """ 8 | import argparse, time, math 9 | import numpy as np 10 | from sklearn.metrics import r2_score 11 | import scipy.sparse as sp 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | 17 | import dgl 18 | import dgl.function as fn 19 | from dgl import DGLGraph 20 | from dgl.data import register_data_args, load_data 21 | #from dgl.nn.pytorch.conv import SGConv 22 | from sgconv import SGConv 23 | from utils import normalize, sparse_mx_to_torch_sparse_tensor, lp_refine 24 | 25 | def evaluate(model, g, features, labels, mask): 26 | model.eval() 27 | with torch.no_grad(): 28 | pred = model(g, features)[mask] # only compute the evaluation set 29 | labels = labels[mask] 30 | return compute_r2(pred, labels) 31 | 32 | def evaluate_test(model, g, inputs, labels, test_mask, lp_dict, meta): 33 | model.eval() 34 | with torch.no_grad(): 35 | pred = model(g, inputs).squeeze() 36 | 37 | output = pred.cuda() 38 | labels = labels.cuda() 39 | idx_test = lp_dict['idx_test'] 40 | idx_train = lp_dict['idx_train'] 41 | adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj'])) 42 | #print(adj.to_dense()[np.arange(100), np.arange(100)+1]) 43 | 44 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 45 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 46 | r2_test = compute_r2(output[idx_test], labels[idx_test]) 47 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj) 48 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 49 | 50 | print("------------") 51 | print("election year {}".format(meta)) 52 | print("loss:", loss.item()) 53 | print("raw_r2:", r2_test) 54 | print("refined_r2:", lp_r2_test) 55 | print("------------") 56 | 57 | def load_cls_data(args): 58 | data = load_data(args) 59 | features = torch.FloatTensor(data.features) 60 | labels = torch.LongTensor(data.labels) 61 | n_classes = data.num_labels 62 | 63 | if hasattr(torch, 'BoolTensor'): 64 | train_mask = torch.BoolTensor(data.train_mask) 65 | val_mask = torch.BoolTensor(data.val_mask) 66 | test_mask = torch.BoolTensor(data.test_mask) 67 | else: 68 | train_mask = torch.ByteTensor(data.train_mask) 69 | val_mask = torch.ByteTensor(data.val_mask) 70 | test_mask = torch.ByteTensor(data.test_mask) 71 | 72 | g = DGLGraph(data.graph) 73 | g.add_edges(g.nodes(), g.nodes()) 74 | 75 | return g, features, labels, n_classes, train_mask, val_mask, test_mask 76 | 77 | def load_reg_data(args): 78 | path = './data/county/election/2012' 79 | adj = np.load(path+"/A.npy") 80 | labels = np.load(path+"/labels.npy") 81 | features = np.load(path+"/feats.npy") 82 | idx_train = np.load(path+"/train_idx.npy")-1 83 | idx_val = np.load(path+"/val_idx.npy")-1 84 | idx_test = np.load(path+"/test_idx.npy")-1 85 | n = len(adj) 86 | train_mask = np.zeros(n).astype(bool) 87 | train_mask[idx_train] = True 88 | val_mask = np.zeros(n).astype(bool) 89 | val_mask[idx_val] = True 90 | test_mask = np.zeros(n).astype(bool) 91 | test_mask[idx_test] = True 92 | n_classes = 1 93 | sp_adj = sp.coo_matrix(adj) 94 | g = dgl.graph((torch.LongTensor(sp_adj.row), torch.LongTensor(sp_adj.col))) 95 | lp_dict = {'idx_test': torch.LongTensor(idx_test), 'idx_train': torch.LongTensor(idx_train), 'sp_adj': sp_adj.astype(float)} 96 | 97 | features = torch.FloatTensor(features) 98 | labels = torch.FloatTensor(labels) 99 | train_mask = torch.BoolTensor(train_mask) 100 | val_mask = torch.BoolTensor(val_mask) 101 | test_mask = torch.BoolTensor(test_mask) 102 | 103 | path = './data/county/election/2016' 104 | ind_features = torch.FloatTensor(np.load(path+"/feats.npy")) 105 | ind_labels = torch.FloatTensor(np.load(path+"/labels.npy")) 106 | 107 | return g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels 108 | 109 | def loss_fcn(pred, labels): 110 | pred, labels = pred.squeeze(), labels.squeeze() 111 | return F.mse_loss(pred, labels) 112 | 113 | def compute_r2(pred, labels): 114 | pred, labels = pred.squeeze(), labels.squeeze() 115 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 116 | 117 | def main(args): 118 | # load and preprocess dataset 119 | g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) 120 | n_edges = g.number_of_edges() 121 | in_feats = features.shape[1] 122 | 123 | print("""----Data statistics------' 124 | #Edges %d 125 | #Classes %d 126 | #Train samples %d 127 | #Val samples %d 128 | #Test samples %d""" % 129 | (n_edges, n_classes, 130 | train_mask.int().sum().item(), 131 | val_mask.int().sum().item(), 132 | test_mask.int().sum().item())) 133 | 134 | if args.gpu < 0: 135 | cuda = False 136 | else: 137 | cuda = True 138 | torch.cuda.set_device(args.gpu) 139 | features = features.cuda() 140 | ind_features = ind_features.cuda() 141 | labels = labels.cuda() 142 | ind_labels = ind_labels.cuda() 143 | train_mask = train_mask.cuda() 144 | val_mask = val_mask.cuda() 145 | test_mask = test_mask.cuda() 146 | 147 | # create SGC model 148 | model = SGConv(in_feats, 149 | n_classes, 150 | k=2, 151 | n_hid=32, 152 | cached=True, 153 | bias=args.bias) 154 | 155 | if cuda: model.cuda() 156 | 157 | # use optimizer 158 | optimizer = torch.optim.Adam(model.parameters(), 159 | lr=args.lr, 160 | weight_decay=args.weight_decay) 161 | 162 | # initialize graph 163 | dur = [] 164 | for epoch in range(args.n_epochs): 165 | model.train() 166 | if epoch >= 3: 167 | t0 = time.time() 168 | # forward 169 | pred = model(g, features) # only compute the train set 170 | loss = loss_fcn(pred[train_mask], labels[train_mask]) 171 | train_r2 = compute_r2(pred[train_mask], labels[train_mask]) 172 | 173 | optimizer.zero_grad() 174 | loss.backward() 175 | optimizer.step() 176 | 177 | if epoch >= 3: 178 | dur.append(time.time() - t0) 179 | 180 | r2 = evaluate(model, g, features, labels, val_mask) 181 | print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | R2 {:.4f} | " 182 | "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), 183 | r2, n_edges / np.mean(dur) / 1000)) 184 | 185 | print() 186 | evaluate_test(model, g, features, labels, test_mask, lp_dict, "2012") 187 | evaluate_test(model, g, ind_features, ind_labels, test_mask, lp_dict, "2016") 188 | 189 | if __name__ == '__main__': 190 | parser = argparse.ArgumentParser(description='SGC') 191 | register_data_args(parser) 192 | parser.add_argument("--gpu", type=int, default=-1, 193 | help="gpu") 194 | parser.add_argument("--lr", type=float, default=0.2, 195 | help="learning rate") 196 | parser.add_argument("--bias", action='store_true', default=False, 197 | help="flag to use bias") 198 | parser.add_argument("--n-epochs", type=int, default=100, 199 | help="number of training epochs") 200 | parser.add_argument("--weight-decay", type=float, default=5e-6, 201 | help="Weight for L2 loss") 202 | parser.add_argument('--seed', type=int, default=19940423, help='Random seed.') 203 | args = parser.parse_args() 204 | 205 | print(args) 206 | np.random.seed(args.seed) 207 | torch.manual_seed(args.seed) 208 | torch.cuda.manual_seed(args.seed) 209 | 210 | main(args) 211 | -------------------------------------------------------------------------------- /sgc/train_cgnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code was modified from the GCN implementation in DGL examples. 3 | Simplifying Graph Convolutional Networks 4 | Paper: https://arxiv.org/abs/1902.07153 5 | Code: https://github.com/Tiiiger/SGC 6 | SGC implementation in DGL. 7 | """ 8 | import argparse, time, math 9 | import numpy as np 10 | from sklearn.metrics import r2_score 11 | import scipy.sparse as sp 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | from torch.autograd import Variable 17 | 18 | from gpytorch import inv_matmul, logdet 19 | from gpytorch.utils import linear_cg 20 | from torch import matmul 21 | 22 | import dgl 23 | import dgl.function as fn 24 | from dgl import DGLGraph 25 | from dgl.data import register_data_args, load_data 26 | #from dgl.nn.pytorch.conv import SGConv 27 | from sgconv import SGConv 28 | from utils import normalize, sparse_mx_to_torch_sparse_tensor, lp_refine 29 | 30 | def evaluate(model, g, features, labels, mask): 31 | model.eval() 32 | with torch.no_grad(): 33 | pred = model(g, features)[mask] # only compute the evaluation set 34 | labels = labels[mask] 35 | return compute_r2(pred, labels) 36 | 37 | def evaluate_test(model, g, inputs, labels, test_mask, lp_dict, coeffs, meta): 38 | model.eval() 39 | with torch.no_grad(): 40 | pred = model(g, inputs).squeeze() 41 | 42 | output = pred.cuda() 43 | labels = labels.cuda() 44 | idx_test = lp_dict['idx_test'] 45 | idx_train = lp_dict['idx_train'] 46 | adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj'])) 47 | #print(adj.to_dense()[np.arange(100), np.arange(100)+1]) 48 | 49 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 50 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 51 | r2_test = compute_r2(output[idx_test], labels[idx_test]) 52 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj, torch.tanh(coeffs[0]).item(), torch.exp(coeffs[1]).item()) 53 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 54 | lp_output_raw_cov = lp_refine(idx_test, idx_train, labels, output, adj) 55 | lp_r2_test_raw_cov = compute_r2(lp_output_raw_cov, labels[idx_test]) 56 | 57 | print("------------") 58 | print("election year {}".format(meta)) 59 | print("loss:", loss.item()) 60 | print("raw_r2:", r2_test) 61 | print("refined_r2:", lp_r2_test) 62 | print("refined_r2_raw_cov:", lp_r2_test_raw_cov) 63 | print("------------") 64 | 65 | 66 | def load_cls_data(args): 67 | data = load_data(args) 68 | features = torch.FloatTensor(data.features) 69 | labels = torch.LongTensor(data.labels) 70 | n_classes = data.num_labels 71 | 72 | if hasattr(torch, 'BoolTensor'): 73 | train_mask = torch.BoolTensor(data.train_mask) 74 | val_mask = torch.BoolTensor(data.val_mask) 75 | test_mask = torch.BoolTensor(data.test_mask) 76 | else: 77 | train_mask = torch.ByteTensor(data.train_mask) 78 | val_mask = torch.ByteTensor(data.val_mask) 79 | test_mask = torch.ByteTensor(data.test_mask) 80 | 81 | g = DGLGraph(data.graph) 82 | g.add_edges(g.nodes(), g.nodes()) 83 | 84 | return g, features, labels, n_classes, train_mask, val_mask, test_mask 85 | 86 | def load_reg_data(args): 87 | path = './data/county/election/2012' 88 | adj = np.load(path+"/A.npy") 89 | labels = np.load(path+"/labels.npy") 90 | features = np.load(path+"/feats.npy") 91 | idx_train = np.load(path+"/train_idx.npy")-1 92 | idx_val = np.load(path+"/val_idx.npy")-1 93 | idx_test = np.load(path+"/test_idx.npy")-1 94 | n = len(adj) 95 | train_mask = np.zeros(n).astype(bool) 96 | train_mask[idx_train] = True 97 | val_mask = np.zeros(n).astype(bool) 98 | val_mask[idx_val] = True 99 | test_mask = np.zeros(n).astype(bool) 100 | test_mask[idx_test] = True 101 | n_classes = 1 102 | sp_adj = sp.coo_matrix(adj) 103 | g = dgl.graph((torch.LongTensor(sp_adj.row), torch.LongTensor(sp_adj.col))) 104 | lp_dict = {'idx_test': torch.LongTensor(idx_test), 'idx_train': torch.LongTensor(idx_train), 'sp_adj': sp_adj.astype(float), 'adj': sparse_mx_to_torch_sparse_tensor(normalize(sp_adj.astype(float)))} 105 | 106 | features = torch.FloatTensor(features) 107 | labels = torch.FloatTensor(labels) 108 | train_mask = torch.BoolTensor(train_mask) 109 | val_mask = torch.BoolTensor(val_mask) 110 | test_mask = torch.BoolTensor(test_mask) 111 | 112 | path = './data/county/election/2016' 113 | ind_features = torch.FloatTensor(np.load(path+"/feats.npy")) 114 | ind_labels = torch.FloatTensor(np.load(path+"/labels.npy")) 115 | 116 | return g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels 117 | 118 | def loss_fcn(output, labels, idx, S, coeffs, add_logdet): 119 | output, labels = output.squeeze(), labels.squeeze() 120 | rL = labels - output 121 | S = S.to_dense() 122 | Gamma = (torch.eye(S.size(0)).cuda() - torch.tanh(coeffs[0]) * S.cuda()) * torch.exp(coeffs[1]) 123 | cp_idx = setdiff(len(S), idx) 124 | 125 | loss1 = rL.dot(matmul(Gamma[idx, :][:, idx], rL) - matmul(Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) 126 | 127 | loss2 = torch.Tensor([0.]).cuda() 128 | if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) 129 | l = loss1 - loss2 130 | return l/len(idx) 131 | 132 | def setdiff(n, idx): 133 | idx = idx.cpu().detach().numpy() 134 | cp_idx = np.setdiff1d(np.arange(n), idx) 135 | return cp_idx 136 | 137 | def compute_r2(pred, labels): 138 | pred, labels = pred.squeeze(), labels.squeeze() 139 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 140 | 141 | def main(args): 142 | # load and preprocess dataset 143 | g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) 144 | n_edges = g.number_of_edges() 145 | in_feats = features.shape[1] 146 | 147 | print("""----Data statistics------' 148 | #Edges %d 149 | #Classes %d 150 | #Train samples %d 151 | #Val samples %d 152 | #Test samples %d""" % 153 | (n_edges, n_classes, 154 | train_mask.int().sum().item(), 155 | val_mask.int().sum().item(), 156 | test_mask.int().sum().item())) 157 | 158 | if args.gpu < 0: 159 | cuda = False 160 | else: 161 | cuda = True 162 | torch.cuda.set_device(args.gpu) 163 | features = features.cuda() 164 | ind_features = ind_features.cuda() 165 | labels = labels.cuda() 166 | ind_labels = ind_labels.cuda() 167 | train_mask = train_mask.cuda() 168 | val_mask = val_mask.cuda() 169 | test_mask = test_mask.cuda() 170 | 171 | # create SGC model 172 | model = SGConv(in_feats, 173 | n_classes, 174 | k=2, 175 | n_hid=32, 176 | cached=True, 177 | bias=args.bias) 178 | 179 | if cuda: model.cuda() 180 | 181 | # use optimizer 182 | optimizer = torch.optim.Adam(model.parameters(), 183 | lr=args.lr, 184 | weight_decay=args.weight_decay) 185 | coeffs = Variable(torch.FloatTensor([1., 3.0]).cuda() if cuda else torch.FloatTensor([1., 3.0]) , requires_grad=True) 186 | coeffs_optimizer = torch.optim.SGD([coeffs], lr=1e-1, momentum=0.0) 187 | 188 | # initialize graph 189 | dur = [] 190 | for epoch in range(args.n_epochs): 191 | model.train() 192 | if epoch >= 3: 193 | t0 = time.time() 194 | # forward 195 | pred = model(g, features) # only compute the train set 196 | loss = loss_fcn(pred[train_mask], labels[train_mask], lp_dict['idx_train'], lp_dict['adj'], coeffs, False) 197 | optimizer.zero_grad() 198 | loss.backward() 199 | optimizer.step() 200 | 201 | if epoch % 10 == 0: 202 | model.train() 203 | pred = model(g, features) 204 | loss = loss_fcn(pred[train_mask], labels[train_mask], lp_dict['idx_train'], lp_dict['adj'], coeffs, True) 205 | train_r2 = compute_r2(pred[train_mask], labels[train_mask]) 206 | coeffs_optimizer.zero_grad() 207 | loss.backward() 208 | coeffs_optimizer.step() 209 | 210 | if epoch >= 3: 211 | dur.append(time.time() - t0) 212 | 213 | r2 = evaluate(model, g, features, labels, val_mask) 214 | print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | R2 {:.4f} | " 215 | "ETputs(KTEPS) {:.2f}". format(epoch, np.mean(dur), loss.item(), 216 | r2, n_edges / np.mean(dur) / 1000)) 217 | 218 | print() 219 | evaluate_test(model, g, features, labels, test_mask, lp_dict, coeffs, "2012") 220 | evaluate_test(model, g, ind_features, ind_labels, test_mask, lp_dict, coeffs,"2016") 221 | 222 | if __name__ == '__main__': 223 | parser = argparse.ArgumentParser(description='SGC') 224 | register_data_args(parser) 225 | parser.add_argument("--gpu", type=int, default=-1, 226 | help="gpu") 227 | parser.add_argument("--lr", type=float, default=0.2, 228 | help="learning rate") 229 | parser.add_argument("--bias", action='store_true', default=False, 230 | help="flag to use bias") 231 | parser.add_argument("--n-epochs", type=int, default=100, 232 | help="number of training epochs") 233 | parser.add_argument("--weight-decay", type=float, default=5e-6, 234 | help="Weight for L2 loss") 235 | parser.add_argument('--seed', type=int, default=19940423, help='Random seed.') 236 | args = parser.parse_args() 237 | 238 | print(args) 239 | np.random.seed(args.seed) 240 | torch.manual_seed(args.seed) 241 | torch.cuda.manual_seed(args.seed) 242 | 243 | main(args) 244 | -------------------------------------------------------------------------------- /gat/train_lpgnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graph Attention Networks in DGL using SPMV optimization. 3 | Multiple heads are also batched together for faster training. 4 | References 5 | ---------- 6 | Paper: https://arxiv.org/abs/1710.10903 7 | Author's code: https://github.com/PetarV-/GAT 8 | Pytorch implementation: https://github.com/Diego999/pyGAT 9 | """ 10 | 11 | import argparse 12 | import numpy as np 13 | import networkx as nx 14 | import time 15 | import torch 16 | import torch.nn.functional as F 17 | import dgl 18 | from dgl import DGLGraph 19 | from dgl.data import register_data_args, load_data 20 | from gat import GAT 21 | from utils import EarlyStopping 22 | import scipy.sparse as sp 23 | from sklearn.metrics import r2_score 24 | from utils import sparse_mx_to_torch_sparse_tensor, normalize, lp_refine 25 | 26 | def compute_r2(pred, labels): 27 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 28 | 29 | def evaluate(model, features, labels, mask): 30 | model.eval() 31 | with torch.no_grad(): 32 | pred = model(features) 33 | pred = pred[mask] 34 | labels = labels[mask] 35 | return compute_r2(pred, labels) 36 | 37 | def evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012"): 38 | model.eval() 39 | with torch.no_grad(): 40 | output = model(features).squeeze() 41 | 42 | output = output.cuda() 43 | labels = labels.cuda() 44 | idx_test = lp_dict['idx_test'] 45 | idx_train = lp_dict['idx_train'] 46 | adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj'])) 47 | 48 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 49 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 50 | r2_test = compute_r2(output[idx_test], labels[idx_test]) 51 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj) 52 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 53 | 54 | print("------------") 55 | print("election year {}".format(meta)) 56 | print("loss:", loss.item()) 57 | print("raw_r2:", r2_test) 58 | print("refined_r2:", lp_r2_test) 59 | print("------------") 60 | 61 | 62 | def load_cls_data(args): 63 | data = load_data(args) 64 | features = torch.FloatTensor(data.features) 65 | labels = torch.LongTensor(data.labels) 66 | n_classes = data.num_labels 67 | 68 | if hasattr(torch, 'BoolTensor'): 69 | train_mask = torch.BoolTensor(data.train_mask) 70 | val_mask = torch.BoolTensor(data.val_mask) 71 | test_mask = torch.BoolTensor(data.test_mask) 72 | else: 73 | train_mask = torch.ByteTensor(data.train_mask) 74 | val_mask = torch.ByteTensor(data.val_mask) 75 | test_mask = torch.ByteTensor(data.test_mask) 76 | 77 | g = data.graph 78 | # add self loop 79 | g.remove_edges_from(nx.selfloop_edges(g)) 80 | g = DGLGraph(g) 81 | g.add_edges(g.nodes(), g.nodes()) 82 | row = g.edges()[0] 83 | col = g.edges()[1] 84 | g = dgl.graph((row, col)) 85 | 86 | return g, features, labels, n_classes, train_mask, val_mask, test_mask 87 | 88 | def load_reg_data(args): 89 | path = './data/county/election/2012' 90 | adj = np.load(path+"/A.npy") 91 | labels = np.load(path+"/labels.npy") 92 | features = np.load(path+"/feats.npy") 93 | idx_train = np.load(path+"/train_idx.npy")-1 94 | idx_val = np.load(path+"/val_idx.npy")-1 95 | idx_test = np.load(path+"/test_idx.npy")-1 96 | n = len(adj) 97 | train_mask = np.zeros(n).astype(bool) 98 | train_mask[idx_train] = True 99 | val_mask = np.zeros(n).astype(bool) 100 | val_mask[idx_val] = True 101 | test_mask = np.zeros(n).astype(bool) 102 | test_mask[idx_test] = True 103 | n_classes = 1 104 | sp_adj = sp.coo_matrix(adj) 105 | g = dgl.graph((torch.LongTensor(sp_adj.row), torch.LongTensor(sp_adj.col))) 106 | lp_dict = {'idx_test': torch.LongTensor(idx_test), 'idx_train': torch.LongTensor(idx_train), 'sp_adj': sp_adj.astype(float)} 107 | 108 | features = torch.FloatTensor(features) 109 | labels = torch.FloatTensor(labels) 110 | train_mask = torch.BoolTensor(train_mask) 111 | val_mask = torch.BoolTensor(val_mask) 112 | test_mask = torch.BoolTensor(test_mask) 113 | 114 | path = './data/county/election/2016' 115 | ind_features = torch.FloatTensor(np.load(path+"/feats.npy")) 116 | ind_labels = torch.FloatTensor(np.load(path+"/labels.npy")) 117 | 118 | return g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels 119 | 120 | def loss_fcn(pred, labels): 121 | pred, labels = pred.squeeze(), labels.squeeze() 122 | return F.mse_loss(pred, labels) 123 | 124 | def main(args): 125 | # load and preprocess dataset 126 | g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) 127 | num_feats = features.shape[1] 128 | n_edges = g.number_of_edges() 129 | 130 | print("""----Data statistics------' 131 | #use cuda: %d 132 | #Edges %d 133 | #Classes %d 134 | #Train samples %d 135 | #Val samples %d 136 | #Test samples %d""" % 137 | (args.gpu, n_edges, n_classes, 138 | train_mask.int().sum().item(), 139 | val_mask.int().sum().item(), 140 | test_mask.int().sum().item())) 141 | 142 | if args.gpu < 0: 143 | cuda = False 144 | else: 145 | cuda = True 146 | torch.cuda.set_device(args.gpu) 147 | features = features.cuda() 148 | ind_features = ind_features.cuda() 149 | labels = labels.cuda() 150 | ind_labels = ind_labels.cuda() 151 | train_mask = train_mask.cuda() 152 | val_mask = val_mask.cuda() 153 | test_mask = test_mask.cuda() 154 | 155 | # create model 156 | heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] 157 | model = GAT(g, 158 | args.num_layers, 159 | num_feats, 160 | args.num_hidden, 161 | n_classes, 162 | heads, 163 | F.elu, 164 | args.in_drop, 165 | args.attn_drop, 166 | args.negative_slope, 167 | args.residual, 168 | args.bias) 169 | print(model) 170 | if args.early_stop: 171 | stopper = EarlyStopping(patience=100) 172 | if cuda: 173 | model.cuda() 174 | 175 | # use optimizer 176 | optimizer = torch.optim.Adam( 177 | model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 178 | 179 | # initialize graph 180 | dur = [] 181 | for epoch in range(args.epochs): 182 | model.train() 183 | if epoch >= 3: 184 | t0 = time.time() 185 | # forward 186 | pred = model(features) 187 | loss = loss_fcn(pred[train_mask], labels[train_mask]) 188 | 189 | optimizer.zero_grad() 190 | loss.backward() 191 | optimizer.step() 192 | 193 | if epoch >= 3: 194 | dur.append(time.time() - t0) 195 | 196 | train_r2 = compute_r2(pred[train_mask], labels[train_mask]) 197 | 198 | if args.fastmode: 199 | val_r2 = compute_r2(pred[val_mask], labels[val_mask]) 200 | else: 201 | val_r2 = evaluate(model, features, labels, val_mask) 202 | if args.early_stop: 203 | if stopper.step(val_r2, model): 204 | break 205 | 206 | if epoch > 3: 207 | print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |" 208 | " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}". 209 | format(epoch, np.mean(dur), loss.item(), train_r2, 210 | val_r2, n_edges / np.mean(dur) / 1000)) 211 | 212 | print() 213 | if args.early_stop: 214 | model.load_state_dict(torch.load('es_checkpoint.pt')) 215 | evaluate_test(model, features, labels, test_mask, lp_dict, meta="2012") 216 | evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, meta="2016") 217 | 218 | if __name__ == '__main__': 219 | 220 | parser = argparse.ArgumentParser(description='GAT') 221 | register_data_args(parser) 222 | parser.add_argument("--gpu", type=int, default=-1, 223 | help="which GPU to use. Set -1 to use CPU.") 224 | parser.add_argument("--epochs", type=int, default=100, 225 | help="number of training epochs") 226 | parser.add_argument("--num-heads", type=int, default=8, 227 | help="number of hidden attention heads") 228 | parser.add_argument("--num-out-heads", type=int, default=1, 229 | help="number of output attention heads") 230 | parser.add_argument("--num-layers", type=int, default=1, 231 | help="number of hidden layers") 232 | parser.add_argument("--num-hidden", type=int, default=32, 233 | help="number of hidden units") 234 | parser.add_argument("--residual", action="store_true", default=False, 235 | help="use residual connection") 236 | parser.add_argument("--in-drop", type=float, default=.6, 237 | help="input feature dropout") 238 | parser.add_argument("--attn-drop", type=float, default=.6, 239 | help="attention dropout") 240 | parser.add_argument("--lr", type=float, default=0.005, 241 | help="learning rate") 242 | parser.add_argument('--weight-decay', type=float, default=5e-4, 243 | help="weight decay") 244 | parser.add_argument('--negative-slope', type=float, default=0.2, 245 | help="the negative slope of leaky relu") 246 | parser.add_argument('--early-stop', action='store_true', default=False, 247 | help="indicates whether to use early stop or not") 248 | parser.add_argument('--bias', action='store_true', default=False, 249 | help="whether to add Dense layer bias") 250 | parser.add_argument('--fastmode', action="store_true", default=False, 251 | help="skip re-evaluate the validation set") 252 | args = parser.parse_args() 253 | print(args) 254 | 255 | main(args) 256 | -------------------------------------------------------------------------------- /gat/train_cgnn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Graph Attention Networks in DGL using SPMV optimization. 3 | Multiple heads are also batched together for faster training. 4 | References 5 | ---------- 6 | Paper: https://arxiv.org/abs/1710.10903 7 | Author's code: https://github.com/PetarV-/GAT 8 | Pytorch implementation: https://github.com/Diego999/pyGAT 9 | """ 10 | 11 | import argparse 12 | import numpy as np 13 | import networkx as nx 14 | import time 15 | import torch 16 | import torch.nn.functional as F 17 | from torch.autograd import Variable 18 | 19 | from gpytorch import inv_matmul, logdet 20 | from gpytorch.utils import linear_cg 21 | from torch import matmul 22 | 23 | import dgl 24 | from dgl import DGLGraph 25 | from dgl.data import register_data_args, load_data 26 | from gat import GAT 27 | from utils import EarlyStopping 28 | import scipy.sparse as sp 29 | from sklearn.metrics import r2_score 30 | from utils import sparse_mx_to_torch_sparse_tensor, normalize, lp_refine 31 | 32 | def compute_r2(pred, labels): 33 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 34 | 35 | def evaluate(model, features, labels, mask): 36 | model.eval() 37 | with torch.no_grad(): 38 | pred = model(features) 39 | pred = pred[mask] 40 | labels = labels[mask] 41 | return compute_r2(pred, labels) 42 | 43 | def evaluate_test(model, features, labels, test_mask, lp_dict, coeffs, meta="2012"): 44 | model.eval() 45 | with torch.no_grad(): 46 | output = model(features).squeeze() 47 | 48 | output = output.cuda() 49 | labels = labels.cuda() 50 | idx_test = lp_dict['idx_test'] 51 | idx_train = lp_dict['idx_train'] 52 | adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj'])) 53 | 54 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 55 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 56 | r2_test = compute_r2(output[idx_test], labels[idx_test]) 57 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj, torch.tanh(coeffs[0]).item(), torch.exp(coeffs[1]).item()) 58 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 59 | lp_output_raw_conv = lp_refine(idx_test, idx_train, labels, output, adj) 60 | lp_r2_test_raw_conv = compute_r2(lp_output_raw_conv, labels[idx_test]) 61 | 62 | print("------------") 63 | print("election year {}".format(meta)) 64 | print("loss:", loss.item()) 65 | print("raw_r2:", r2_test) 66 | print("refined_r2:", lp_r2_test) 67 | print("refined_r2_raw_conv:", lp_r2_test_raw_conv) 68 | print("------------") 69 | 70 | 71 | def load_cls_data(args): 72 | data = load_data(args) 73 | features = torch.FloatTensor(data.features) 74 | labels = torch.LongTensor(data.labels) 75 | n_classes = data.num_labels 76 | 77 | if hasattr(torch, 'BoolTensor'): 78 | train_mask = torch.BoolTensor(data.train_mask) 79 | val_mask = torch.BoolTensor(data.val_mask) 80 | test_mask = torch.BoolTensor(data.test_mask) 81 | else: 82 | train_mask = torch.ByteTensor(data.train_mask) 83 | val_mask = torch.ByteTensor(data.val_mask) 84 | test_mask = torch.ByteTensor(data.test_mask) 85 | 86 | g = data.graph 87 | # add self loop 88 | g.remove_edges_from(nx.selfloop_edges(g)) 89 | g = DGLGraph(g) 90 | g.add_edges(g.nodes(), g.nodes()) 91 | row = g.edges()[0] 92 | col = g.edges()[1] 93 | g = dgl.graph((row, col)) 94 | 95 | return g, features, labels, n_classes, train_mask, val_mask, test_mask 96 | 97 | def load_reg_data(args): 98 | path = './data/county/election/2012' 99 | adj = np.load(path+"/A.npy") 100 | labels = np.load(path+"/labels.npy") 101 | features = np.load(path+"/feats.npy") 102 | idx_train = np.load(path+"/train_idx.npy")-1 103 | idx_val = np.load(path+"/val_idx.npy")-1 104 | idx_test = np.load(path+"/test_idx.npy")-1 105 | n = len(adj) 106 | train_mask = np.zeros(n).astype(bool) 107 | train_mask[idx_train] = True 108 | val_mask = np.zeros(n).astype(bool) 109 | val_mask[idx_val] = True 110 | test_mask = np.zeros(n).astype(bool) 111 | test_mask[idx_test] = True 112 | n_classes = 1 113 | sp_adj = sp.coo_matrix(adj) 114 | g = dgl.graph((torch.LongTensor(sp_adj.row), torch.LongTensor(sp_adj.col))) 115 | lp_dict = {'idx_test': torch.LongTensor(idx_test), 'idx_train': torch.LongTensor(idx_train), 'sp_adj': sp_adj.astype(float), 'adj':sparse_mx_to_torch_sparse_tensor(normalize(sp_adj.astype(float)))} 116 | 117 | features = torch.FloatTensor(features) 118 | labels = torch.FloatTensor(labels) 119 | train_mask = torch.BoolTensor(train_mask) 120 | val_mask = torch.BoolTensor(val_mask) 121 | test_mask = torch.BoolTensor(test_mask) 122 | 123 | path = './data/county/election/2016' 124 | ind_features = torch.FloatTensor(np.load(path+"/feats.npy")) 125 | ind_labels = torch.FloatTensor(np.load(path+"/labels.npy")) 126 | 127 | return g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels 128 | 129 | def loss_fcn(output, labels, idx, S, coeffs, add_logdet): 130 | output, labels = output.squeeze(), labels.squeeze() 131 | rL = labels - output 132 | S = S.to_dense() 133 | Gamma = (torch.eye(S.size(0)).cuda() - torch.tanh(coeffs[0]) * S.cuda()) * torch.exp(coeffs[1]) 134 | cp_idx = setdiff(len(S), idx) 135 | 136 | loss1 = rL.dot(matmul(Gamma[idx, :][:, idx], rL) - matmul(Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) 137 | 138 | loss2 = torch.Tensor([0.]).cuda() 139 | if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) 140 | l = loss1 - loss2 141 | return l/len(idx) 142 | 143 | def setdiff(n, idx): 144 | idx = idx.cpu().detach().numpy() 145 | cp_idx = np.setdiff1d(np.arange(n), idx) 146 | return cp_idx 147 | 148 | def main(args): 149 | # load and preprocess dataset 150 | g, features, labels, n_classes, train_mask, val_mask, test_mask, lp_dict, ind_features, ind_labels = load_reg_data(args) 151 | num_feats = features.shape[1] 152 | n_edges = g.number_of_edges() 153 | 154 | print("""----Data statistics------' 155 | #use cuda: %d 156 | #Edges %d 157 | #Classes %d 158 | #Train samples %d 159 | #Val samples %d 160 | #Test samples %d""" % 161 | (args.gpu, n_edges, n_classes, 162 | train_mask.int().sum().item(), 163 | val_mask.int().sum().item(), 164 | test_mask.int().sum().item())) 165 | 166 | if args.gpu < 0: 167 | cuda = False 168 | else: 169 | cuda = True 170 | torch.cuda.set_device(args.gpu) 171 | features = features.cuda() 172 | ind_features = ind_features.cuda() 173 | labels = labels.cuda() 174 | ind_labels = ind_labels.cuda() 175 | train_mask = train_mask.cuda() 176 | val_mask = val_mask.cuda() 177 | test_mask = test_mask.cuda() 178 | 179 | # create model 180 | heads = ([args.num_heads] * args.num_layers) + [args.num_out_heads] 181 | model = GAT(g, 182 | args.num_layers, 183 | num_feats, 184 | args.num_hidden, 185 | n_classes, 186 | heads, 187 | F.elu, 188 | args.in_drop, 189 | args.attn_drop, 190 | args.negative_slope, 191 | args.residual, 192 | args.bias) 193 | print(model) 194 | if args.early_stop: 195 | stopper = EarlyStopping(patience=100) 196 | if cuda: 197 | model.cuda() 198 | 199 | # use optimizer 200 | optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 201 | coeffs = Variable(torch.FloatTensor([1., 3.0]).cuda() if cuda else torch.FloatTensor([1., 3.0]) , requires_grad=True) 202 | coeffs_optimizer = torch.optim.SGD([coeffs], lr=1e-1, momentum=0.0) 203 | 204 | # initialize graph 205 | dur = [] 206 | for epoch in range(args.epochs): 207 | model.train() 208 | if epoch >= 3: 209 | t0 = time.time() 210 | # forward 211 | pred = model(features) 212 | loss = loss_fcn(pred[train_mask], labels[train_mask], lp_dict['idx_train'], lp_dict['adj'], coeffs, False) 213 | optimizer.zero_grad() 214 | loss.backward() 215 | optimizer.step() 216 | 217 | if epoch >= 3: 218 | dur.append(time.time() - t0) 219 | 220 | if epoch % 10 == 0: 221 | model.train() 222 | pred = model(features) 223 | loss = loss_fcn(pred[train_mask], labels[train_mask], lp_dict['idx_train'], lp_dict['adj'], coeffs, True) 224 | train_r2 = compute_r2(pred[train_mask], labels[train_mask]) 225 | coeffs_optimizer.zero_grad() 226 | loss.backward() 227 | coeffs_optimizer.step() 228 | 229 | if args.fastmode: 230 | val_r2 = compute_r2(pred[val_mask], labels[val_mask]) 231 | else: 232 | val_r2 = evaluate(model, features, labels, val_mask) 233 | if args.early_stop: 234 | if stopper.step(val_r2, model): 235 | break 236 | 237 | if epoch > 3: 238 | print("Epoch {:05d} | Time(s) {:.4f} | Loss {:.4f} | TrainR2 {:.4f} |" 239 | " Val R2 {:.4f} | ETputs(KTEPS) {:.2f}". 240 | format(epoch, np.mean(dur), loss.item(), train_r2, 241 | val_r2, n_edges / np.mean(dur) / 1000)) 242 | 243 | print() 244 | if args.early_stop: 245 | model.load_state_dict(torch.load('es_checkpoint.pt')) 246 | evaluate_test(model, features, labels, test_mask, lp_dict, coeffs, meta="2012") 247 | evaluate_test(model, ind_features, ind_labels, test_mask, lp_dict, coeffs, meta="2016") 248 | 249 | if __name__ == '__main__': 250 | 251 | parser = argparse.ArgumentParser(description='GAT') 252 | register_data_args(parser) 253 | parser.add_argument("--gpu", type=int, default=-1, 254 | help="which GPU to use. Set -1 to use CPU.") 255 | parser.add_argument("--epochs", type=int, default=100, 256 | help="number of training epochs") 257 | parser.add_argument("--num-heads", type=int, default=8, 258 | help="number of hidden attention heads") 259 | parser.add_argument("--num-out-heads", type=int, default=1, 260 | help="number of output attention heads") 261 | parser.add_argument("--num-layers", type=int, default=1, 262 | help="number of hidden layers") 263 | parser.add_argument("--num-hidden", type=int, default=32, 264 | help="number of hidden units") 265 | parser.add_argument("--seed", type=int, default=19940423, 266 | help="random seed") 267 | parser.add_argument("--residual", action="store_true", default=False, 268 | help="use residual connection") 269 | parser.add_argument("--in-drop", type=float, default=.6, 270 | help="input feature dropout") 271 | parser.add_argument("--attn-drop", type=float, default=.6, 272 | help="attention dropout") 273 | parser.add_argument("--lr", type=float, default=0.005, 274 | help="learning rate") 275 | parser.add_argument('--weight-decay', type=float, default=5e-4, 276 | help="weight decay") 277 | parser.add_argument('--negative-slope', type=float, default=0.2, 278 | help="the negative slope of leaky relu") 279 | parser.add_argument('--early-stop', action='store_true', default=False, 280 | help="indicates whether to use early stop or not") 281 | parser.add_argument('--bias', action='store_true', default=False, 282 | help="whether to add Dense layer bias") 283 | parser.add_argument('--fastmode', action="store_true", default=False, 284 | help="skip re-evaluate the validation set") 285 | args = parser.parse_args() 286 | print(args) 287 | 288 | np.random.seed(args.seed) 289 | torch.manual_seed(args.seed) 290 | torch.cuda.manual_seed(args.seed) 291 | 292 | main(args) 293 | -------------------------------------------------------------------------------- /graphsage/train_lpgnn.py: -------------------------------------------------------------------------------- 1 | import dgl 2 | import sys 3 | sys.path.append(".") 4 | import numpy as np 5 | import torch 6 | import torch as th 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | import torch.multiprocessing as mp 11 | from torch.utils.data import DataLoader 12 | import dgl.function as fn 13 | import dgl.nn.pytorch as dglnn 14 | import time 15 | import argparse 16 | from _thread import start_new_thread 17 | from functools import wraps 18 | from dgl.data import RedditDataset 19 | import tqdm 20 | import traceback 21 | import scipy.sparse as sp 22 | from sklearn.metrics import r2_score 23 | from utils import lp_refine, R2, sparse_mx_to_torch_sparse_tensor, normalize 24 | 25 | #### Neighbor sampler 26 | 27 | class NeighborSampler(object): 28 | def __init__(self, g, fanouts): 29 | self.g = g 30 | self.fanouts = fanouts 31 | 32 | def sample_blocks(self, seeds): 33 | seeds = th.LongTensor(np.asarray(seeds)) 34 | blocks = [] 35 | for fanout in self.fanouts: 36 | # For each seed node, sample ``fanout`` neighbors. 37 | frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=True) 38 | # Then we compact the frontier into a bipartite graph for message passing. 39 | block = dgl.to_block(frontier, seeds) 40 | # Obtain the seed nodes for next layer. 41 | seeds = block.srcdata[dgl.NID] 42 | 43 | blocks.insert(0, block) 44 | return blocks 45 | 46 | class SAGE(nn.Module): 47 | def __init__(self, 48 | in_feats, 49 | n_hidden, 50 | n_classes, 51 | n_layers, 52 | activation, 53 | dropout): 54 | super().__init__() 55 | self.n_layers = n_layers 56 | self.n_hidden = n_hidden 57 | self.n_classes = n_classes 58 | self.layers = nn.ModuleList() 59 | self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) 60 | for i in range(1, n_layers - 1): 61 | self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) 62 | self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) 63 | self.dropout = nn.Dropout(dropout) 64 | self.activation = activation 65 | 66 | def forward(self, blocks, x): 67 | h = x 68 | for l, (layer, block) in enumerate(zip(self.layers, blocks)): 69 | # We need to first copy the representation of nodes on the RHS from the 70 | # appropriate nodes on the LHS. 71 | # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst 72 | # would be (num_nodes_RHS, D) 73 | h_dst = h[:block.number_of_dst_nodes()] 74 | # Then we compute the updated representation on the RHS. 75 | # The shape of h now becomes (num_nodes_RHS, D) 76 | h = layer(block, (h, h_dst)) 77 | if l != len(self.layers) - 1: 78 | h = self.activation(h) 79 | h = self.dropout(h) 80 | return h 81 | 82 | def inference(self, g, x, batch_size, device): 83 | """ 84 | Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). 85 | g : the entire graph. 86 | x : the input of entire node set. 87 | The inference code is written in a fashion that it could handle any number of nodes and 88 | layers. 89 | """ 90 | # During inference with sampling, multi-layer blocks are very inefficient because 91 | # lots of computations in the first few layers are repeated. 92 | # Therefore, we compute the representation of all nodes layer by layer. The nodes 93 | # on each layer are of course splitted in batches. 94 | # TODO: can we standardize this? 95 | nodes = th.arange(g.number_of_nodes()) 96 | for l, layer in enumerate(self.layers): 97 | y = th.zeros(g.number_of_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) 98 | 99 | for start in tqdm.trange(0, len(nodes), batch_size): 100 | end = start + batch_size 101 | batch_nodes = nodes[start:end] 102 | block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes) 103 | input_nodes = block.srcdata[dgl.NID] 104 | 105 | h = x[input_nodes].to(device) 106 | h_dst = h[:block.number_of_dst_nodes()] 107 | h = layer(block, (h, h_dst)) 108 | if l != len(self.layers) - 1: 109 | h = self.activation(h) 110 | h = self.dropout(h) 111 | 112 | y[start:end] = h.cpu() 113 | 114 | x = y 115 | return y 116 | 117 | def prepare_mp(g): 118 | """ 119 | Explicitly materialize the CSR, CSC and COO representation of the given graph 120 | so that they could be shared via copy-on-write to sampler workers and GPU 121 | trainers. 122 | This is a workaround before full shared memory support on heterogeneous graphs. 123 | """ 124 | g.in_degree(0) 125 | g.out_degree(0) 126 | g.find_edges([0]) 127 | 128 | def compute_r2(pred, labels): 129 | """ 130 | Compute the R2 of prediction given the labels. 131 | """ 132 | #return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) 133 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 134 | 135 | def evaluate(model, g, inputs, labels, val_mask, batch_size, device): 136 | """ 137 | Evaluate the model on the validation set specified by ``val_mask``. 138 | g : The entire graph. 139 | inputs : The features of all the nodes. 140 | labels : The labels of all the nodes. 141 | val_mask : A 0-1 mask indicating which nodes do we actually compute the R2 for. 142 | batch_size : Number of nodes to compute at the same time. 143 | device : The GPU device to evaluate on. 144 | """ 145 | model.eval() 146 | with th.no_grad(): 147 | pred = model.inference(g, inputs, batch_size, device) 148 | model.train() 149 | return compute_r2(pred[val_mask], labels[val_mask]) 150 | 151 | def evaluate_test(model, g, inputs, labels, test_mask, batch_size, device, lp_dict, meta): 152 | model.eval() 153 | with th.no_grad(): 154 | pred = model.inference(g, inputs, batch_size, device).view(-1) 155 | 156 | output = pred.to(device) 157 | labels = labels.to(device) 158 | idx_test = lp_dict['idx_test'] 159 | idx_train = lp_dict['idx_train'] 160 | adj = sparse_mx_to_torch_sparse_tensor(normalize(lp_dict['sp_adj'])) 161 | 162 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 163 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 164 | r2_test = compute_r2(output[test_mask], labels[test_mask]) 165 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj) 166 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 167 | 168 | print("------------") 169 | print("election year {}".format(meta)) 170 | print("loss:", loss.item()) 171 | print("raw_r2:", r2_test) 172 | print("refined_r2:", lp_r2_test) 173 | print("------------") 174 | 175 | model.train() 176 | 177 | def load_subtensor(g, labels, seeds, input_nodes, device): 178 | """ 179 | Copys features and labels of a set of nodes onto GPU. 180 | """ 181 | batch_inputs = g.ndata['features'][input_nodes].to(device) 182 | batch_labels = labels[seeds].to(device) 183 | return batch_inputs, batch_labels 184 | 185 | #### Entry point 186 | def run(args, device, data): 187 | # Unpack data 188 | train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict = data 189 | 190 | train_nid = th.LongTensor(np.nonzero(train_mask)[0]) 191 | val_nid = th.LongTensor(np.nonzero(val_mask)[0]) 192 | train_mask = th.BoolTensor(train_mask) 193 | val_mask = th.BoolTensor(val_mask) 194 | test_mask = th.BoolTensor(test_mask) 195 | 196 | # Create sampler 197 | sampler = NeighborSampler(g, [int(fanout) for fanout in args.fan_out.split(',')]) 198 | 199 | # Create PyTorch DataLoader for constructing blocks 200 | dataloader = DataLoader( 201 | dataset=train_nid.numpy(), 202 | batch_size=args.batch_size, 203 | collate_fn=sampler.sample_blocks, 204 | shuffle=True, 205 | drop_last=False, 206 | num_workers=args.num_workers) 207 | 208 | # Define model and optimizer 209 | model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) 210 | model = model.to(device) 211 | loss_fcn = nn.MSELoss() 212 | loss_fcn = loss_fcn.to(device) 213 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 214 | 215 | # Training loop 216 | avg = 0 217 | iter_tput = [] 218 | for epoch in range(args.num_epochs): 219 | tic = time.time() 220 | 221 | # Loop over the dataloader to sample the computation dependency graph as a list of 222 | # blocks. 223 | for step, blocks in enumerate(dataloader): 224 | tic_step = time.time() 225 | 226 | # The nodes for input lies at the LHS side of the first block. 227 | # The nodes for output lies at the RHS side of the last block. 228 | input_nodes = blocks[0].srcdata[dgl.NID] 229 | seeds = blocks[-1].dstdata[dgl.NID] 230 | 231 | # Load the input features as well as output labels 232 | batch_inputs, batch_labels = load_subtensor(g, labels, seeds, input_nodes, device) 233 | 234 | # Compute loss and prediction 235 | batch_pred = model(blocks, batch_inputs) 236 | loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze()) 237 | optimizer.zero_grad() 238 | loss.backward() 239 | optimizer.step() 240 | 241 | iter_tput.append(len(seeds) / (time.time() - tic_step)) 242 | if step % args.log_every == 0: 243 | r2 = compute_r2(batch_pred, batch_labels) 244 | gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 245 | print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'.format( 246 | epoch, step, loss.item(), r2.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) 247 | 248 | toc = time.time() 249 | print('Epoch Time(s): {:.4f}'.format(toc - tic)) 250 | if epoch >= 5: 251 | avg += toc - tic 252 | if epoch % args.eval_every == 0 and epoch != 0: 253 | eval_r2 = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, device) 254 | print('Eval R2: {:.4f}'.format(eval_r2)) 255 | 256 | evaluate_test(model, g, g.ndata['features'], labels, test_mask, args.batch_size, device, lp_dict, "2012") 257 | evaluate_test(model, ind_g, ind_g.ndata['features'], ind_labels, test_mask, args.batch_size, device, lp_dict, "2016") 258 | 259 | print('Avg epoch time: {}'.format(avg / (epoch - 4))) 260 | 261 | if __name__ == '__main__': 262 | argparser = argparse.ArgumentParser("multi-gpu training") 263 | argparser.add_argument('--gpu', type=int, default=0, 264 | help="GPU device ID. Use -1 for CPU training") 265 | argparser.add_argument('--num-epochs', type=int, default=500) 266 | argparser.add_argument('--num-hidden', type=int, default=16) 267 | argparser.add_argument('--num-layers', type=int, default=2) 268 | argparser.add_argument('--fan-out', type=str, default='25,25') 269 | argparser.add_argument('--batch-size', type=int, default=128) 270 | argparser.add_argument('--log-every', type=int, default=20) 271 | argparser.add_argument('--eval-every', type=int, default=5) 272 | argparser.add_argument('--lr', type=float, default=0.003) 273 | argparser.add_argument('--dropout', type=float, default=0.5) 274 | argparser.add_argument('--num-workers', type=int, default=0, 275 | help="Number of sampling processes. Use 0 for no extra process.") 276 | args = argparser.parse_args() 277 | 278 | if args.gpu >= 0: 279 | device = th.device('cuda:%d' % args.gpu) 280 | else: 281 | device = th.device('cpu') 282 | 283 | path = './data/county/election/2012' 284 | adj = np.load(path+"/A.npy") 285 | labels = np.load(path+"/labels.npy") 286 | features = np.load(path+"/feats.npy") 287 | idx_train = np.load(path+"/train_idx.npy")-1 288 | idx_val = np.load(path+"/val_idx.npy")-1 289 | idx_test = np.load(path+"/test_idx.npy")-1 290 | n = len(adj) 291 | train_mask = np.zeros(n).astype(bool) 292 | train_mask[idx_train] = True 293 | val_mask = np.zeros(n).astype(bool) 294 | val_mask[idx_val] = True 295 | test_mask = np.zeros(n).astype(bool) 296 | test_mask[idx_test] = True 297 | in_feats = features.shape[1] 298 | labels = th.FloatTensor(labels) 299 | n_classes = 1 300 | 301 | sp_adj = sp.coo_matrix(adj) 302 | g = dgl.graph((th.LongTensor(sp_adj.row), th.LongTensor(sp_adj.col))) 303 | g.ndata['features'] = th.FloatTensor(features) 304 | prepare_mp(g) 305 | lp_dict = {'idx_test': th.LongTensor(idx_test), 'idx_train': th.LongTensor(idx_train), 'sp_adj': sp_adj.astype(float)} 306 | 307 | ind_path = './data/county/election/2016' 308 | ind_features = np.load(ind_path+"/feats.npy") 309 | ind_labels = np.load(ind_path+"/labels.npy") 310 | ind_labels = th.FloatTensor(ind_labels) 311 | ind_g = dgl.graph((th.LongTensor(sp_adj.row), th.LongTensor(sp_adj.col))) 312 | ind_g.ndata['features'] = th.FloatTensor(ind_features) 313 | prepare_mp(ind_g) 314 | 315 | # Pack data 316 | data = train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict 317 | 318 | run(args, device, data) 319 | -------------------------------------------------------------------------------- /graphsage/train_cgnn.py: -------------------------------------------------------------------------------- 1 | import dgl 2 | import sys 3 | sys.path.append(".") 4 | import numpy as np 5 | import torch 6 | import torch as th 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import torch.optim as optim 10 | import torch.multiprocessing as mp 11 | from torch.autograd import Variable 12 | from torch.utils.data import DataLoader 13 | import dgl.function as fn 14 | import dgl.nn.pytorch as dglnn 15 | import time 16 | import argparse 17 | from _thread import start_new_thread 18 | from functools import wraps 19 | from dgl.data import RedditDataset 20 | import tqdm 21 | import traceback 22 | import scipy.sparse as sp 23 | from sklearn.metrics import r2_score 24 | from utils import lp_refine, R2, sparse_mx_to_torch_sparse_tensor, normalize 25 | from gpytorch import inv_matmul, logdet 26 | from gpytorch.utils import linear_cg 27 | from torch import matmul 28 | 29 | #### Neighbor sampler 30 | 31 | class NeighborSampler(object): 32 | def __init__(self, g, fanouts): 33 | self.g = g 34 | self.fanouts = fanouts 35 | 36 | def sample_blocks(self, seeds): 37 | seeds = th.LongTensor(np.asarray(seeds)) 38 | blocks = [] 39 | for fanout in self.fanouts: 40 | # For each seed node, sample ``fanout`` neighbors. 41 | frontier = dgl.sampling.sample_neighbors(self.g, seeds, fanout, replace=True) 42 | # Then we compact the frontier into a bipartite graph for message passing. 43 | block = dgl.to_block(frontier, seeds) 44 | # Obtain the seed nodes for next layer. 45 | seeds = block.srcdata[dgl.NID] 46 | 47 | blocks.insert(0, block) 48 | return blocks 49 | 50 | class SAGE(nn.Module): 51 | def __init__(self, 52 | in_feats, 53 | n_hidden, 54 | n_classes, 55 | n_layers, 56 | activation, 57 | dropout): 58 | super().__init__() 59 | self.n_layers = n_layers 60 | self.n_hidden = n_hidden 61 | self.n_classes = n_classes 62 | self.layers = nn.ModuleList() 63 | self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) 64 | for i in range(1, n_layers - 1): 65 | self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) 66 | self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) 67 | self.dropout = nn.Dropout(dropout) 68 | self.activation = activation 69 | 70 | def forward(self, blocks, x): 71 | h = x 72 | for l, (layer, block) in enumerate(zip(self.layers, blocks)): 73 | # We need to first copy the representation of nodes on the RHS from the 74 | # appropriate nodes on the LHS. 75 | # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst 76 | # would be (num_nodes_RHS, D) 77 | h_dst = h[:block.number_of_dst_nodes()] 78 | # Then we compute the updated representation on the RHS. 79 | # The shape of h now becomes (num_nodes_RHS, D) 80 | h = layer(block, (h, h_dst)) 81 | if l != len(self.layers) - 1: 82 | h = self.activation(h) 83 | h = self.dropout(h) 84 | return h 85 | 86 | def inference(self, g, x, batch_size, device): 87 | """ 88 | Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling). 89 | g : the entire graph. 90 | x : the input of entire node set. 91 | The inference code is written in a fashion that it could handle any number of nodes and 92 | layers. 93 | """ 94 | # During inference with sampling, multi-layer blocks are very inefficient because 95 | # lots of computations in the first few layers are repeated. 96 | # Therefore, we compute the representation of all nodes layer by layer. The nodes 97 | # on each layer are of course splitted in batches. 98 | # TODO: can we standardize this? 99 | nodes = th.arange(g.number_of_nodes()) 100 | for l, layer in enumerate(self.layers): 101 | y = th.zeros(g.number_of_nodes(), self.n_hidden if l != len(self.layers) - 1 else self.n_classes) 102 | 103 | for start in tqdm.trange(0, len(nodes), batch_size): 104 | end = start + batch_size 105 | batch_nodes = nodes[start:end] 106 | block = dgl.to_block(dgl.in_subgraph(g, batch_nodes), batch_nodes) 107 | input_nodes = block.srcdata[dgl.NID] 108 | 109 | h = x[input_nodes].to(device) 110 | h_dst = h[:block.number_of_dst_nodes()] 111 | h = layer(block, (h, h_dst)) 112 | if l != len(self.layers) - 1: 113 | h = self.activation(h) 114 | h = self.dropout(h) 115 | 116 | y[start:end] = h.cpu() 117 | 118 | x = y 119 | return y 120 | 121 | def prepare_mp(g): 122 | """ 123 | Explicitly materialize the CSR, CSC and COO representation of the given graph 124 | so that they could be shared via copy-on-write to sampler workers and GPU 125 | trainers. 126 | This is a workaround before full shared memory support on heterogeneous graphs. 127 | """ 128 | g.in_degree(0) 129 | g.out_degree(0) 130 | g.find_edges([0]) 131 | 132 | def compute_r2(pred, labels): 133 | """ 134 | Compute the R2 of prediction given the labels. 135 | """ 136 | #return (th.argmax(pred, dim=1) == labels).float().sum() / len(pred) 137 | return r2_score(labels.cpu().detach().numpy(), pred.cpu().detach().numpy()) 138 | 139 | def evaluate(model, g, inputs, labels, val_mask, batch_size, device): 140 | """ 141 | Evaluate the model on the validation set specified by ``val_mask``. 142 | g : The entire graph. 143 | inputs : The features of all the nodes. 144 | labels : The labels of all the nodes. 145 | val_mask : A 0-1 mask indicating which nodes do we actually compute the R2 for. 146 | batch_size : Number of nodes to compute at the same time. 147 | device : The GPU device to evaluate on. 148 | """ 149 | model.eval() 150 | with th.no_grad(): 151 | pred = model.inference(g, inputs, batch_size, device) 152 | model.train() 153 | return compute_r2(pred[val_mask], labels[val_mask]) 154 | 155 | def evaluate_test(model, g, inputs, labels, test_mask, batch_size, device, lp_dict, coeffs, meta): 156 | model.eval() 157 | with th.no_grad(): 158 | pred = model.inference(g, inputs, batch_size, device).view(-1) 159 | 160 | output = pred.cuda() 161 | labels = labels.cuda() 162 | idx_test = lp_dict['idx_test'] 163 | idx_train = lp_dict['idx_train'] 164 | adj = lp_dict['adj'] 165 | 166 | labels, output, adj = labels.cpu(), output.cpu(), adj.cpu() 167 | loss = F.mse_loss(output[idx_test].squeeze(), labels[idx_test].squeeze()) 168 | r2_test = compute_r2(output[test_mask], labels[test_mask]) 169 | lp_output = lp_refine(idx_test, idx_train, labels, output, adj, torch.tanh(coeffs[0]).item(), torch.exp(coeffs[1]).item()) 170 | lp_r2_test = compute_r2(lp_output, labels[idx_test]) 171 | lp_output_raw_conv = lp_refine(idx_test, idx_train, labels, output, adj) 172 | lp_r2_test_raw_conv = R2(lp_output_raw_conv, labels[idx_test]) 173 | 174 | print("------------") 175 | print("election year {}".format(meta)) 176 | print("loss:", loss.item()) 177 | print("raw_r2:", r2_test) 178 | print("refined_r2:", lp_r2_test) 179 | print("refined_r2_raw_conv:", lp_r2_test_raw_conv) 180 | print("------------") 181 | 182 | model.train() 183 | 184 | return lp_r2_test 185 | 186 | def load_subtensor(g, labels, seeds, input_nodes, device): 187 | """ 188 | Copys features and labels of a set of nodes onto GPU. 189 | """ 190 | batch_inputs = g.ndata['features'][input_nodes].to(device) 191 | batch_labels = labels[seeds].to(device) 192 | return batch_inputs, batch_labels 193 | 194 | def setdiff(n, idx): 195 | idx = idx.cpu().detach().numpy() 196 | cp_idx = np.setdiff1d(np.arange(n), idx) 197 | return cp_idx 198 | 199 | def loss_fcn(output, labels, idx, S, coeffs, device, add_logdet): 200 | rL = labels - output 201 | S = S.to_dense() 202 | Gamma = (torch.eye(S.size(0)).to(device) - torch.tanh(coeffs[0]) * S.to(device)) * torch.exp(coeffs[1]) 203 | cp_idx = setdiff(len(S), idx) 204 | 205 | loss1 = rL.dot(matmul(Gamma[idx, :][:, idx], rL) - matmul(Gamma[idx, :][:, cp_idx], inv_matmul(Gamma[cp_idx, :][:, cp_idx], matmul(Gamma[cp_idx, :][:, idx], rL)))) 206 | loss2 = 0. 207 | if add_logdet: loss2 = logdet(Gamma) - logdet(Gamma[cp_idx, :][:, cp_idx]) 208 | l = loss1 - loss2 209 | return l/len(idx) 210 | 211 | #### Entry point 212 | def run(args, device, data): 213 | # Unpack data 214 | train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict = data 215 | 216 | train_nid = th.LongTensor(np.nonzero(train_mask)[0]) 217 | val_nid = th.LongTensor(np.nonzero(val_mask)[0]) 218 | train_mask = th.BoolTensor(train_mask) 219 | val_mask = th.BoolTensor(val_mask) 220 | test_mask = th.BoolTensor(test_mask) 221 | 222 | # Create sampler 223 | sampler = NeighborSampler(g, [int(fanout) for fanout in args.fan_out.split(',')]) 224 | 225 | # Create PyTorch DataLoader for constructing blocks 226 | dataloader = DataLoader( 227 | dataset=train_nid.numpy(), 228 | batch_size=args.batch_size, 229 | collate_fn=sampler.sample_blocks, 230 | shuffle=True, 231 | drop_last=False, 232 | num_workers=args.num_workers) 233 | 234 | # Define model and optimizer 235 | model = SAGE(in_feats, args.num_hidden, n_classes, args.num_layers, F.relu, args.dropout) 236 | model = model.to(device) 237 | optimizer = optim.Adam(model.parameters(), lr=args.lr) 238 | 239 | coeffs = Variable(torch.FloatTensor([1., 3.0]).to(device) , requires_grad=True) 240 | coeffs_optimizer = optim.SGD([coeffs], lr=1e-1, momentum=0.0) 241 | 242 | # Training loop 243 | avg = 0 244 | iter_tput = [] 245 | steps_per_epoch = len(dataloader) 246 | for epoch in range(args.num_epochs): 247 | tic = time.time() 248 | 249 | # Loop over the dataloader to sample the computation dependency graph as a list of 250 | # blocks. 251 | for step, blocks in enumerate(dataloader): 252 | tic_step = time.time() 253 | 254 | # The nodes for input lies at the LHS side of the first block. 255 | # The nodes for output lies at the RHS side of the last block. 256 | input_nodes = blocks[0].srcdata[dgl.NID] 257 | seeds = blocks[-1].dstdata[dgl.NID] 258 | 259 | # Load the input features as well as output labels 260 | batch_inputs, batch_labels = load_subtensor(g, labels, seeds, input_nodes, device) 261 | # Compute loss and prediction 262 | model.train() 263 | batch_pred = model(blocks, batch_inputs) 264 | loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(), seeds, lp_dict['adj'], coeffs, device, False) 265 | optimizer.zero_grad() 266 | loss.backward() 267 | optimizer.step() 268 | 269 | if (step+1) % (steps_per_epoch//2) == 0: 270 | model.train() 271 | batch_pred = model(blocks, batch_inputs) 272 | loss = loss_fcn(batch_pred.squeeze(), batch_labels.squeeze(), seeds, lp_dict['adj'], coeffs, device, True) 273 | coeffs_optimizer.zero_grad() 274 | loss.backward() 275 | coeffs_optimizer.step() 276 | 277 | iter_tput.append(len(seeds) / (time.time() - tic_step)) 278 | if step % args.log_every == 0: 279 | r2 = compute_r2(batch_pred, batch_labels) 280 | gpu_mem_alloc = th.cuda.max_memory_allocated() / 1000000 if th.cuda.is_available() else 0 281 | #print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | Speed (samples/sec) {:.4f} | GPU {:.1f} MiB'.format(epoch, step, loss.item(), r2.item(), np.mean(iter_tput[3:]), gpu_mem_alloc)) 282 | print('Epoch {:05d} | Step {:05d} | Loss {:.4f} | Train R2 {:.4f} | alpha: {:.4f} | beta: {:.4f}'.format(epoch, step, loss.item(), r2.item(), th.tanh(coeffs[0]).item(), th.exp(coeffs[1]).item())) 283 | 284 | toc = time.time() 285 | print('Epoch Time(s): {:.4f}'.format(toc - tic)) 286 | if epoch >= 5: 287 | avg += toc - tic 288 | if epoch % args.eval_every == 0 and epoch != 0: 289 | eval_r2 = evaluate(model, g, g.ndata['features'], labels, val_mask, args.batch_size, device) 290 | print('Eval R2: {:.4f}'.format(eval_r2)) 291 | 292 | evaluate_test(model, g, g.ndata['features'], labels, test_mask, args.batch_size, device, lp_dict, coeffs, "2012") 293 | evaluate_test(model, ind_g, ind_g.ndata['features'], ind_labels, test_mask, args.batch_size, device, lp_dict, coeffs, "2016") 294 | 295 | print('Avg epoch time: {}'.format(avg / (epoch - 4))) 296 | 297 | if __name__ == '__main__': 298 | argparser = argparse.ArgumentParser("multi-gpu training") 299 | argparser.add_argument('--gpu', type=int, default=0, 300 | help="GPU device ID. Use -1 for CPU training") 301 | argparser.add_argument('--num-epochs', type=int, default=500) 302 | argparser.add_argument('--num-hidden', type=int, default=32) 303 | argparser.add_argument('--num-layers', type=int, default=2) 304 | argparser.add_argument('--seed', type=int, default=19940423) 305 | argparser.add_argument('--fan-out', type=str, default='25,25') 306 | argparser.add_argument('--batch-size', type=int, default=128) 307 | argparser.add_argument('--log-every', type=int, default=20) 308 | argparser.add_argument('--eval-every', type=int, default=5) 309 | argparser.add_argument('--lr', type=float, default=0.003) 310 | argparser.add_argument('--dropout', type=float, default=0.5) 311 | argparser.add_argument('--num-workers', type=int, default=0, 312 | help="Number of sampling processes. Use 0 for no extra process.") 313 | args = argparser.parse_args() 314 | 315 | np.random.seed(args.seed) 316 | torch.manual_seed(args.seed) 317 | if args.gpu >= 0: 318 | device = th.device('cuda:%d' % args.gpu) 319 | torch.cuda.manual_seed(args.seed) 320 | else: 321 | device = th.device('cpu') 322 | 323 | path = './data/county/election/2012' 324 | adj = np.load(path+"/A.npy") 325 | labels = np.load(path+"/labels.npy") 326 | features = np.load(path+"/feats.npy") 327 | idx_train = np.load(path+"/train_idx.npy")-1 328 | idx_val = np.load(path+"/val_idx.npy")-1 329 | idx_test = np.load(path+"/test_idx.npy")-1 330 | n = len(adj) 331 | train_mask = np.zeros(n).astype(bool) 332 | train_mask[idx_train] = True 333 | val_mask = np.zeros(n).astype(bool) 334 | val_mask[idx_val] = True 335 | test_mask = np.zeros(n).astype(bool) 336 | test_mask[idx_test] = True 337 | in_feats = features.shape[1] 338 | labels = th.FloatTensor(labels) 339 | n_classes = 1 340 | 341 | sp_adj = sp.coo_matrix(adj) 342 | g = dgl.graph((th.LongTensor(sp_adj.row), th.LongTensor(sp_adj.col))) 343 | g.ndata['features'] = th.FloatTensor(features) 344 | prepare_mp(g) 345 | lp_dict = {'idx_test': th.LongTensor(idx_test), 'idx_train': th.LongTensor(idx_train), 'adj': sparse_mx_to_torch_sparse_tensor(normalize(sp_adj.astype(float)))} 346 | 347 | ind_path = './data/county/election/2016' 348 | ind_features = np.load(ind_path+"/feats.npy") 349 | ind_labels = np.load(ind_path+"/labels.npy") 350 | ind_labels = th.FloatTensor(ind_labels) 351 | ind_g = dgl.graph((th.LongTensor(sp_adj.row), th.LongTensor(sp_adj.col))) 352 | ind_g.ndata['features'] = th.FloatTensor(ind_features) 353 | prepare_mp(ind_g) 354 | 355 | # Pack data 356 | data = train_mask, val_mask, test_mask, in_feats, labels, ind_labels, n_classes, g, ind_g, lp_dict 357 | 358 | run(args, device, data) 359 | --------------------------------------------------------------------------------