├── Build_Your_Own_IGL.ipynb ├── Dockerfile ├── IGL_Bench ├── __init__.py ├── algorithm │ ├── COLDBREW │ │ ├── GCN.py │ │ ├── GNN_normalizations.py │ │ ├── Label_propagation_model │ │ │ ├── LP_Adj.py │ │ │ ├── diffusion_feature.py │ │ │ ├── norm_spec.jl │ │ │ └── outcome_correlation.py │ │ ├── __init__.py │ │ ├── norm_tricks.py │ │ ├── solver.py │ │ └── utils.py │ ├── DEMONet │ │ ├── __init__.py │ │ ├── models.py │ │ ├── solver.py │ │ └── util.py │ ├── DPGNN │ │ ├── __init__.py │ │ ├── learn.py │ │ ├── model.py │ │ ├── solver.py │ │ └── utils.py │ ├── DRGCN │ │ ├── __init__.py │ │ ├── load_data.py │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── adversarialNets.py │ │ │ ├── gmm.py │ │ │ └── graph.py │ │ ├── solver.py │ │ └── sparse.py │ ├── DataDec │ │ ├── __init__.py │ │ ├── contrast.py │ │ ├── dataloader.py │ │ ├── model.py │ │ ├── prune.py │ │ └── solver.py │ ├── G2GNN │ │ ├── __init__.py │ │ ├── aug.py │ │ ├── dataloader.py │ │ ├── kernel.py │ │ └── solver.py │ ├── GCN │ │ ├── __init__.py │ │ └── solver.py │ ├── GIN │ │ ├── __init__.py │ │ └── solver.py │ ├── GRAPHPATCHER │ │ ├── GCN.py │ │ ├── __init__.py │ │ ├── data_load.py │ │ ├── solver.py │ │ └── utils.py │ ├── HyperIMBA │ │ ├── GcnHyper.py │ │ ├── Poincare.py │ │ ├── __init__.py │ │ ├── cal.py │ │ └── solver.py │ ├── ImGAGN │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── models.py │ │ ├── solver.py │ │ └── utils.py │ ├── ImGKB │ │ ├── __init__.py │ │ ├── dataloader.py │ │ ├── inforneck.py │ │ ├── kernel.py │ │ ├── layers.py │ │ ├── loss.py │ │ ├── model.py │ │ ├── solver.py │ │ └── util.py │ ├── PASTEL │ │ ├── __init__.py │ │ ├── cal.py │ │ ├── eval.py │ │ ├── graph_clf.py │ │ ├── graph_learner.py │ │ ├── model.py │ │ └── solver.py │ ├── RAWLSGCN │ │ ├── RawlsGCN.py │ │ ├── __init__.py │ │ ├── solver.py │ │ └── utils.py │ ├── ReNode │ │ ├── __init__.py │ │ ├── reweight.py │ │ ├── solver.py │ │ └── util.py │ ├── SOLTGNN │ │ ├── PatternMemory.py │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── sampling.zip │ │ ├── solver.py │ │ ├── subgraph_sample.py │ │ └── utils.py │ ├── TAILGNN │ │ ├── TailGNN.py │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── solver.py │ │ └── util.py │ ├── TAM │ │ ├── __init__.py │ │ ├── solver.py │ │ └── tam.py │ ├── TOPOAUC │ │ ├── __init__.py │ │ ├── cal.py │ │ ├── myloss.py │ │ ├── solver.py │ │ └── util.py │ └── TopoImb │ │ ├── __init__.py │ │ ├── layers.py │ │ ├── model.py │ │ ├── solver.py │ │ ├── topo_util.py │ │ ├── trainer.py │ │ └── utils.py ├── backbone │ ├── __init__.py │ ├── gcn.py │ └── gin.py ├── config │ ├── __init__.py │ └── util.py ├── dataset │ ├── __init__.py │ ├── dataset.py │ ├── graph_topology_imbalance │ │ ├── COLLAB │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ ├── DD │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ ├── FRANKENSTEIN │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ ├── IMDB-BINARY │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ ├── PROTEINS │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ ├── PTC_MR │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ │ └── REDDIT-BINARY │ │ │ ├── split_high.pt │ │ │ ├── split_low.pt │ │ │ └── split_mid.pt │ ├── load_graph.py │ ├── load_node.py │ └── split.py └── manage │ ├── __init__.py │ └── runner.py ├── LICENSE ├── MANIFEST.in ├── README.md ├── config ├── graph │ ├── class │ │ ├── DataDec.yml │ │ ├── G2GNN.yml │ │ ├── GCN.yml │ │ ├── GIN.yml │ │ ├── ImGKB.yml │ │ └── TopoImb.yml │ └── topology │ │ ├── GIN.yml │ │ ├── SOLTGNN.yml │ │ └── TopoImb.yml └── node │ ├── class │ ├── DPGNN.yml │ ├── DRGCN.yml │ └── ImGAGN.yml │ ├── topo_global │ ├── GCN.yml │ ├── HyperIMBA.yml │ ├── PASTEL.yml │ ├── ReNode.yml │ ├── TAM.yml │ └── TOPOAUC.yml │ └── topo_local │ ├── COLDBREW.yml │ ├── DEMONet.yml │ ├── GCN.yml │ ├── GRAPHPATCHER.yml │ ├── LTE4G.yml │ ├── RAWLSGCN.yml │ └── TAILGNN.yml ├── dataset └── link.txt ├── demo.ipynb ├── figs ├── logo.png ├── package.png ├── scope.png └── timeline.png ├── requirements.txt └── setup.py /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-runtime 2 | 3 | USER root 4 | 5 | RUN apt-get update && apt-get install -y --no-install-recommends \ 6 | sudo \ 7 | git \ 8 | wget \ 9 | vim \ 10 | ffmpeg \ 11 | libgl1-mesa-glx \ 12 | libglib2.0-0 \ 13 | libssl-dev \ 14 | cmake \ 15 | g++ \ 16 | python3-dev \ 17 | libgomp1 && \ 18 | rm -rf /var/lib/apt/lists/* 19 | 20 | RUN conda install -c conda-forge networkit -y && conda clean -afy 21 | 22 | RUN pip install torchdata==0.7.1 23 | 24 | RUN pip install --no-cache-dir pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv \ 25 | -f https://data.pyg.org/whl/torch-2.4.0+cu124.html && \ 26 | pip install --no-cache-dir torch-geometric 27 | 28 | RUN pip install --no-cache-dir pydantic 29 | RUN pip install --no-cache-dir dgl==1.1.2 -f https://data.dgl.ai/wheels/cu124.html 30 | 31 | RUN pip install --no-cache-dir \ 32 | huggingface-hub \ 33 | scipy \ 34 | GPUtil \ 35 | networkx \ 36 | ogb \ 37 | Tree \ 38 | GCL \ 39 | PyGCL \ 40 | PyYAML \ 41 | scikit-learn \ 42 | GraKeL \ 43 | GraphRicciCurvature \ 44 | ipdb \ 45 | dill \ 46 | julia 47 | 48 | ENV DGLBACKEND=pytorch 49 | 50 | RUN echo 'echo "🐳 Welcome to IGL-Bench Dev Container!"' >> ~/.bashrc && \ 51 | echo 'alias ll="ls -alh"' >> ~/.bashrc 52 | 53 | RUN python3 -c "import torch; print('✔️ PyTorch:', torch.__version__)" && \ 54 | python3 -c "import dgl; print('✔️ DGL:', dgl.__version__)" && \ 55 | python3 -c "import torch_geometric; print('✔️ PyG:', torch_geometric.__version__)" && \ 56 | python3 -c "import torchdata; print('✔️ torchdata:', torchdata.__version__)" 57 | 58 | CMD ["/bin/bash"] 59 | -------------------------------------------------------------------------------- /IGL_Bench/__init__.py: -------------------------------------------------------------------------------- 1 | from . import dataset as dataset 2 | from . import config as config 3 | from . import manage as manage -------------------------------------------------------------------------------- /IGL_Bench/algorithm/COLDBREW/GNN_normalizations.py: -------------------------------------------------------------------------------- 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | # SPDX-License-Identifier: Apache-2.0 3 | 4 | from .norm_tricks import * 5 | from .GCN import TricksComb 6 | from torch import nn 7 | from utils import D 8 | 9 | class TeacherGNN(nn.Module): 10 | # This class is the teacher GCN model (with structural embedding) for cold brew 11 | def __init__(self, args, proj2class=None): 12 | super().__init__() 13 | proj2class = proj2class or nn.Identity() 14 | args.num_classes_bkup = args.num_classes 15 | args.num_classes = args.dim_commonEmb 16 | self.args = args 17 | 18 | if self.args.dim_learnable_input>0: 19 | embs = torch.randn(args.N_nodes, args.dim_learnable_input)*0.001 20 | self.embs = nn.Parameter(embs, requires_grad=True) 21 | self.args.num_feats_bkup = self.args.num_feats 22 | self.args.num_feats = self.args.dim_learnable_input 23 | 24 | self.model = GNN_norm(args) 25 | 26 | self.proj2linkp = nn.Identity() 27 | self.proj2class = proj2class 28 | self.dglgraph = None 29 | 30 | def forward(self, x, edge_index): 31 | if self.args.TeacherGNN.change_to_featureless: 32 | x = x*0 33 | if self.args.dim_learnable_input>0: 34 | x = self.embs 35 | commonEmb, self.se_reg_all = self.model(x, edge_index) 36 | self.out = commonEmb 37 | return commonEmb 38 | 39 | def get_3_embs(self, x, edge_index, mask=None, want_heads=True): 40 | commonEmb = self.forward(x, edge_index) 41 | emb4classi_full = self.proj2class(commonEmb) 42 | if want_heads: 43 | if mask is not None: 44 | emb4classi = emb4classi_full[mask] 45 | else: 46 | emb4classi = emb4classi_full 47 | 48 | emb4linkp = self.proj2linkp(commonEmb) 49 | else: 50 | emb4linkp = emb4classi = None 51 | res = D() 52 | res.commonEmb, res.emb4classi, res.emb4classi_full, res.emb4linkp = commonEmb, emb4classi, emb4classi_full, emb4linkp 53 | 54 | return res 55 | 56 | def get_emb4linkp(self, x, edge_index, mask=None): 57 | # return ALL nodes 58 | _, _, emb4linkp = self.get_3_embs(x, edge_index, want_heads=True) 59 | return emb4linkp 60 | 61 | def graph2commonEmb(self, x, edge_index, train_mask): 62 | commonEmb = self.forward(x, edge_index) 63 | commonEmb_train = commonEmb[train_mask] 64 | return commonEmb_train, commonEmb 65 | 66 | class GNN_norm(nn.Module): 67 | def __init__(self, args): 68 | super(GNN_norm, self).__init__() 69 | self.model = TricksComb(args) 70 | 71 | def forward(self, x, edge_index): 72 | return self.model.forward(x, edge_index) 73 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/COLDBREW/Label_propagation_model/norm_spec.jl: -------------------------------------------------------------------------------- 1 | using LinearAlgebra 2 | using LinearMaps 3 | using MAT 4 | using SparseArrays 5 | using Arpack 6 | 7 | using PyCall, SparseArrays 8 | 9 | function scipyCSC_to_julia(A) 10 | m, n = A.shape 11 | colPtr = Int[i+1 for i in PyArray(A."indptr")] 12 | rowVal = Int[i+1 for i in PyArray(A."indices")] 13 | nzVal = Vector{Float64}(PyArray(A."data")) 14 | B = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal) 15 | return PyCall.pyjlwrap_new(B) 16 | end 17 | 18 | function read_arxiv(file::String) 19 | I = Int64[] 20 | J = Int64[] 21 | open(file) do f 22 | for line in eachline(f) 23 | if line[1] == '#'; continue; end 24 | data = split(line, ",") 25 | push!(I, parse(Int64, data[1])) 26 | push!(J, parse(Int64, data[2])) 27 | end 28 | end 29 | I .+= 1 30 | J .+= 1 31 | n = max(maximum(I), maximum(J)) 32 | A = sparse(I, J, 1, n, n) 33 | A = max.(A, A') 34 | A = min.(A, 1) 35 | return A 36 | end 37 | 38 | 39 | function main(PyA, k::Int64) 40 | m, n = PyA.shape 41 | colPtr = Int[i+1 for i in PyArray(PyA."indptr")] 42 | rowVal = Int[i+1 for i in PyArray(PyA."indices")] 43 | nzVal = Vector{Float64}(PyArray(PyA."data")) 44 | A = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal) 45 | d = vec(sum(A, dims=2)) 46 | τ = sum(d) / length(d) 47 | N = size(A)[1] 48 | 49 | # normalized regularized laplacian 50 | D = Diagonal(1.0 ./ sqrt.(d .+ τ)) 51 | Aop = LinearMap{Float64}(X -> A * X .+ (τ / N) * sum(X), N, N, isposdef=true, issymmetric=true) 52 | NRL = I + D * Aop * D 53 | 54 | (Λ, V) = eigs(NRL, nev=k, tol=1e-6, ncv=2*k+1, which=:LM) 55 | 56 | # axis rotation (not necessary, but could be helpful) 57 | piv = qr(V', Val(true)).jpvt[1:k] 58 | piv_svd = svd(V[piv,:]', full=false) 59 | SCDM_V = V * (piv_svd.U * piv_svd.Vt) 60 | 61 | # save 62 | 63 | return SCDM_V 64 | end 65 | 66 | #A = read_arxiv(ARGS[1]) 67 | #embed = main(A, 128) 68 | #matwrite("$(ARGS[2])_spectral_embedding.mat", Dict("V" => embed), compress=true) 69 | 70 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/COLDBREW/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/COLDBREW/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DEMONet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DEMONet/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DPGNN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DPGNN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DPGNN/learn.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.algorithm.DPGNN.utils import * 2 | import torch 3 | import copy 4 | 5 | def train(encoder, dist_encoder, prototype, data, optimizer, criterion, args): 6 | encoder.train() 7 | 8 | support, query = episodic_generator( 9 | data, args.episodic_samp, args.classes, data.x.size(0)) 10 | 11 | embedding = encoder(data) 12 | 13 | support_embed = [embedding[support[i]] for i in range(len(args.classes))] 14 | 15 | query_embed = [embedding[query[i]] for i in range(len(args.classes))] 16 | query_size = [query_embed[i].size() for i in range(len(query_embed))] 17 | 18 | query_embed = torch.stack(query_embed, dim=0) 19 | 20 | proto_embed = [prototype(support_embed[i]) 21 | for i in range(len(args.classes))] 22 | 23 | proto_embed = torch.stack(proto_embed, dim=0) # C*D 24 | 25 | query_dist_embed = dist_encoder(query_embed, proto_embed, args.classes) 26 | proto_dist_embed = dist_encoder(proto_embed, proto_embed, args.classes) 27 | 28 | logits = torch.log_softmax( 29 | torch.mm(query_dist_embed, proto_dist_embed), dim=1) 30 | 31 | loss1 = criterion(logits, args.classes) 32 | 33 | # topo 34 | if(args.ssl == 'yes'): 35 | dist_embed = dist_encoder(embedding, proto_embed, args.classes) 36 | loss3 = torch.mean((dist_embed[data.edge_index[0]] * args.deg_inv_sqrt[data.edge_index[0]].view(-1, 1) - 37 | dist_embed[data.edge_index[1]] * args.deg_inv_sqrt[data.edge_index[1]].view(-1, 1))**2) 38 | 39 | class_sim = cos_sim_pair(proto_embed) 40 | loss2 = (torch.sum(class_sim) - torch.trace(class_sim)) / \ 41 | ((class_sim.size(0)**2 - class_sim.size(0)) / 2) 42 | else: 43 | loss3 = 0 44 | loss2 = 0 45 | 46 | loss = loss1 + args.lamb1 * loss2 + args.lamb2 * loss3 47 | 48 | optimizer.zero_grad() 49 | loss.backward() 50 | optimizer.step() 51 | 52 | 53 | def test(encoder, dist_encoder, prototype, data, args): 54 | encoder.eval() 55 | 56 | with torch.no_grad(): 57 | embedding = encoder(data) 58 | 59 | support, query = episodic_generator( 60 | data, 1, args.classes, data.x.size(0)) # take all samples in that class 61 | support_embed = [embedding[support[i]] 62 | for i in range(len(args.classes))] 63 | 64 | proto_embed = [prototype(support_embed[i]) 65 | for i in range(len(args.classes))] 66 | 67 | proto_embed = torch.stack(proto_embed, dim=0) # C*D 68 | 69 | f1, f1w, acc = [], [], [] 70 | for _, mask in data('train_mask', 'val_mask', 'test_mask'): 71 | y = data.y[mask] 72 | 73 | query_embed = embedding[mask] # N*D 74 | # query_dist = torch.cdist(query_embed, proto_embed, p = 2) #N*D, C*D --> N*C 75 | query_dist_embed = dist_encoder( 76 | query_embed, proto_embed, args.classes) 77 | proto_dist_embed = dist_encoder( 78 | proto_embed, proto_embed, args.classes) 79 | # logits = torch.softmax(-query_dist, dim = 1) #N*C 80 | logits = torch.log_softmax( 81 | torch.mm(query_dist_embed, proto_dist_embed), dim=1) 82 | 83 | pred = logits.max(dim=1)[1] 84 | 85 | acc.append(pred.eq(y).sum().item() / mask.sum().item()) 86 | f1.append(f1_score(y.tolist(), pred.tolist(), labels=np.arange( 87 | 0, len(args.classes)), average=None, zero_division=0)) 88 | f1w.append(f1_score(y.tolist(), pred.tolist(), labels=np.arange( 89 | 0, len(args.classes)), average='weighted', zero_division=0)) 90 | 91 | return f1, f1w, acc 92 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DPGNN/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn import GCNConv, MessagePassing 4 | from torch.nn import Linear 5 | from torch_geometric.utils import add_remaining_self_loops 6 | from torch_scatter import scatter_add 7 | import copy 8 | 9 | class GCN(torch.nn.Module): 10 | def __init__(self, args): 11 | super(GCN, self).__init__() 12 | self.conv1 = GCNConv(args.num_features, args.n_hidden) 13 | self.conv2 = GCNConv(args.n_hidden, args.n_hidden) 14 | 15 | def forward(self, data): 16 | x, edge_index = data.x, data.edge_index 17 | x = F.relu(self.conv1(x, edge_index)) 18 | x = F.dropout(x, training=self.training) 19 | x = self.conv2(x, edge_index) 20 | return x 21 | 22 | 23 | class prototype(torch.nn.Module): 24 | def __init__(self): 25 | super(prototype, self).__init__() 26 | 27 | 28 | def forward(self, x): 29 | return torch.mean(x, dim = 0) 30 | 31 | 32 | class dist_embed(torch.nn.Module): 33 | def __init__(self, args): 34 | super(dist_embed, self).__init__() 35 | self.lin = Linear(args.n_hidden*args.num_classes, args.num_classes) 36 | 37 | def forward(self, query, proto, classes): 38 | d1 = query.size(0) 39 | d2 = proto.size(0) 40 | 41 | query = torch.repeat_interleave(query, d2, dim = 0) 42 | proto = torch.tile(proto, (d1, 1)) 43 | 44 | dist = self.lin((query - proto).view(d1, -1)) 45 | 46 | return dist 47 | 48 | 49 | 50 | def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False, 51 | add_self_loops=True, dtype=None): 52 | 53 | fill_value = 2. if improved else 1. 54 | num_nodes = int(edge_index.max()) + 1 if num_nodes is None else num_nodes 55 | if edge_weight is None: 56 | edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype, 57 | device=edge_index.device) 58 | 59 | if add_self_loops: 60 | edge_index, tmp_edge_weight = add_remaining_self_loops( 61 | edge_index, edge_weight, fill_value, num_nodes) 62 | assert tmp_edge_weight is not None 63 | edge_weight = tmp_edge_weight 64 | 65 | row, col = edge_index[0], edge_index[1] 66 | deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes) 67 | deg_inv_sqrt = deg.pow_(-0.5) 68 | deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0) 69 | return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col] 70 | 71 | 72 | class Prop(MessagePassing): 73 | def __init__(self, num_classes, K=10, bias=True, **kwargs): 74 | super(Prop, self).__init__(aggr='add', **kwargs) 75 | self.K = K 76 | 77 | def forward(self, x, edge_index, edge_weight=None): 78 | edge_index, norm = gcn_norm(edge_index, edge_weight, x.size(0), dtype=x.dtype) 79 | 80 | 81 | preds = [] 82 | preds.append(x) 83 | for k in range(self.K): 84 | x = self.propagate(edge_index, x=x, norm=norm) 85 | preds.append(x) 86 | 87 | pps = torch.stack(preds) 88 | out = torch.sum(pps, dim = 0) 89 | return out 90 | 91 | def message(self, x_j, norm): 92 | return norm.view(-1, 1) * x_j 93 | 94 | def __repr__(self): 95 | return '{}(K={})'.format(self.__class__.__name__, self.K) 96 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DPGNN/solver.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.algorithm.DPGNN.utils import * 2 | from IGL_Bench.algorithm.DPGNN.model import * 3 | from IGL_Bench.algorithm.DPGNN.learn import * 4 | 5 | import torch 6 | import numpy as np 7 | import copy 8 | from sklearn.metrics import balanced_accuracy_score, f1_score, roc_auc_score 9 | 10 | class DPGNN_node_solver: 11 | def __init__(self, config, dataset, device: str = 'cuda'): 12 | self.config = config 13 | self.device = torch.device(device if torch.cuda.is_available() else 'cpu') 14 | self.data = dataset.to(self.device) 15 | self.num_classes: int = int(self.data.y.max().item() + 1) 16 | self.num_features: int = self.data.x.size(1) 17 | self.config.num_classes = self.num_classes 18 | self.config.num_features = self.num_features 19 | self.classes = torch.arange(self.num_classes, device=self.device) 20 | self.config.classes = self.classes 21 | train_counts = torch.bincount(self.data.y[self.data.train_mask].cpu(), 22 | minlength=self.num_classes) 23 | self.config.c_train_num = train_counts 24 | if getattr(self.config, 'ssl', 'no') == 'yes': 25 | self.config.deg_inv_sqrt = deg(self.data.edge_index, self.data.x).to(self.device) 26 | if getattr(self.config, 'backbone', 'GCN') == 'GCN': 27 | self.encoder = GCN(self.config).to(self.device) 28 | else: 29 | raise ValueError(f"Unsupported encoder: {self.config.encoder}") 30 | self.prototype_net = prototype().to(self.device) 31 | self.dist_encoder = dist_embed(self.config).to(self.device) 32 | self._build_optimizer() 33 | self.criterion = torch.nn.NLLLoss() 34 | self.data.y_aug = self.data.y.clone() 35 | 36 | def _build_optimizer(self): 37 | param_groups = [ 38 | {'params': self.encoder.conv1.parameters(), 'lr': 1e-2, 'weight_decay': 5e-4}, 39 | {'params': self.encoder.conv2.parameters(), 'lr': 1e-2, 'weight_decay': 0.0}, 40 | {'params': self.dist_encoder.lin.parameters(), 'lr': 1e-2, 'weight_decay': 0.0}, 41 | ] 42 | self.optimizer = torch.optim.Adam(param_groups) 43 | 44 | def reset_parameters(self): 45 | self.encoder.conv1.reset_parameters() 46 | self.encoder.conv2.reset_parameters() 47 | self.dist_encoder.lin.reset_parameters() 48 | 49 | def _label_prop_augment(self): 50 | if getattr(self.config, 'label_prop', 'no') != 'yes': 51 | return 52 | y_prop = label_prop(self.data.edge_index, 53 | self.data.train_mask, 54 | self.config.c_train_num, 55 | self.data.y, 56 | epochs=20) 57 | y_aug, new_train_mask = sample(self.data.train_mask, 58 | self.config.c_train_num, 59 | y_prop, 60 | self.data.y, 61 | eta=self.config.eta) 62 | self.data.y_aug = y_aug.to(self.device) 63 | self.data.train_mask = new_train_mask.to(self.device) 64 | 65 | def train(self): 66 | self.reset_parameters() 67 | self._label_prop_augment() 68 | best_val_f1 = -1.0 69 | early_stopping = getattr(self.config, 'early_stopping', 10) 70 | history = [] 71 | for epoch in range(getattr(self.config, 'epochs', 500)): 72 | train(self.encoder, self.dist_encoder, self.prototype_net, 73 | self.data, self.optimizer, self.criterion, self.config) 74 | f1_all, _, _ = test(self.encoder, self.dist_encoder, self.prototype_net, 75 | self.data, self.config) 76 | val_f1_mean = np.mean(f1_all[1]) 77 | print('Epoch: {:03d}, val_f1_mean: {:.4f}'.format(epoch, val_f1_mean)) 78 | history.append(val_f1_mean) 79 | if val_f1_mean > best_val_f1: 80 | best_val_f1 = val_f1_mean 81 | self._best_state = { 82 | 'encoder': copy.deepcopy(self.encoder.state_dict()), 83 | 'dist': copy.deepcopy(self.dist_encoder.state_dict()) 84 | } 85 | if early_stopping > 0 and epoch > self.config.epochs // 10: 86 | if len(history) > early_stopping: 87 | recent = np.array(history[-early_stopping:]) 88 | if val_f1_mean < recent.mean(): 89 | break 90 | if hasattr(self, '_best_state'): 91 | self.encoder.load_state_dict(self._best_state['encoder']) 92 | self.dist_encoder.load_state_dict(self._best_state['dist']) 93 | 94 | def test(self): 95 | self.encoder.eval() 96 | with torch.no_grad(): 97 | embedding = self.encoder(self.data) 98 | proto_list = [] 99 | for c in self.classes: 100 | idx = (self.data.y_aug == c) & self.data.train_mask 101 | proto_list.append(self.prototype_net(embedding[idx])) 102 | proto = torch.stack(proto_list, dim=0) 103 | query_emb = embedding[self.data.test_mask] 104 | query_dist = self.dist_encoder(query_emb, proto, self.classes) 105 | proto_dist = self.dist_encoder(proto, proto, self.classes) 106 | logits = torch.log_softmax(torch.mm(query_dist, proto_dist), dim=1) 107 | probs = torch.exp(logits).cpu().numpy() 108 | preds = logits.max(dim=1)[1].cpu() 109 | labels = self.data.y[self.data.test_mask].cpu() 110 | acc = (preds == labels).sum().item() / labels.size(0) 111 | bacc = balanced_accuracy_score(labels, preds) 112 | mf1 = f1_score(labels, preds, average='macro', zero_division=0) 113 | try: 114 | roc = roc_auc_score(labels, probs, multi_class='ovr') 115 | except Exception: 116 | roc = float('nan') 117 | return acc, bacc, mf1, roc 118 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DRGCN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/load_data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import scipy.sparse as sp 4 | import torch 5 | 6 | def _torch_sparse_to_scipy(tsp, shape=None): 7 | tsp = tsp.coalesce() 8 | idx = tsp.indices().cpu().numpy() 9 | val = tsp.values().cpu().numpy() 10 | if shape is None: 11 | shape = tsp.shape 12 | return sp.coo_matrix((val, (idx[0], idx[1])), shape=shape).tocsr() 13 | 14 | def _dense_to_scipy(mat_like, shape=None): 15 | """torch / numpy 稠密矩阵 → CSR""" 16 | if isinstance(mat_like, torch.Tensor): 17 | mat_like = mat_like.cpu().numpy() 18 | if shape is None: 19 | shape = mat_like.shape 20 | return sp.coo_matrix(mat_like.reshape(shape)).tocsr() 21 | 22 | def _edge_index_to_scipy(edge_index, num_nodes, edge_weight=None): 23 | row, col = edge_index.cpu().numpy() 24 | if edge_weight is None: 25 | edge_weight = np.ones(row.shape[0], dtype=np.float32) 26 | else: 27 | edge_weight = edge_weight.cpu().numpy() 28 | return sp.coo_matrix((edge_weight, (row, col)), 29 | shape=(num_nodes, num_nodes)).tocsr() 30 | 31 | def _any_adj_to_scipy(adj_like, num_nodes): 32 | """ 33 | 将任意 PyG 里可能出现的邻接存储格式统一转 CSR 34 | """ 35 | # 1) 直接是 scipy 36 | if isinstance(adj_like, sp.spmatrix): 37 | return adj_like.tocsr() 38 | 39 | # 2) torch 稀疏张量 40 | if isinstance(adj_like, torch.Tensor): 41 | if adj_like.is_sparse: 42 | return _torch_sparse_to_scipy(adj_like, (num_nodes, num_nodes)) 43 | else: # 稠密 torch.Tensor 44 | return _dense_to_scipy(adj_like, (num_nodes, num_nodes)) 45 | 46 | # 3) torch_sparse.SparseTensor 47 | try: 48 | from torch_sparse import SparseTensor 49 | if isinstance(adj_like, SparseTensor): 50 | row, col, val = adj_like.coo() 51 | return sp.coo_matrix( 52 | (val.cpu().numpy(), 53 | (row.cpu().numpy(), col.cpu().numpy())), 54 | shape=(num_nodes, num_nodes)).tocsr() 55 | except ImportError: 56 | pass 57 | 58 | # 4) numpy ndarray / list 等稠密 59 | if isinstance(adj_like, (np.ndarray, list)): 60 | return _dense_to_scipy(np.asarray(adj_like, dtype=np.float32), 61 | (num_nodes, num_nodes)) 62 | 63 | raise TypeError(f"Unsupported adjacency type: {type(adj_like)}") 64 | 65 | # ------------------------------------------------------------------ # 66 | def data_process(dataset): 67 | 68 | # ---------- 基础数据 ---------- 69 | x = dataset.x.cpu().numpy().astype(np.float32) 70 | label = dataset.y.squeeze().cpu().numpy().astype(np.int64) 71 | num_nodes = x.shape[0] 72 | 73 | # ---------- 邻接矩阵 ---------- 74 | if hasattr(dataset, 'adj') and dataset.adj is not None: 75 | adj = _any_adj_to_scipy(dataset.adj, num_nodes) 76 | elif hasattr(dataset, 'edge_index'): 77 | edge_weight = getattr(dataset, 'edge_weight', None) 78 | adj = _edge_index_to_scipy(dataset.edge_index, num_nodes, edge_weight) 79 | else: 80 | raise ValueError("Dataset 必须包含 adj / edge_index") 81 | 82 | # ---------- 归一化邻接 ---------- 83 | if hasattr(dataset, 'adj_norm') and dataset.adj_norm is not None: 84 | adj_norm = _any_adj_to_scipy(dataset.adj_norm, num_nodes) 85 | else: 86 | deg = np.array(adj.sum(1)).flatten() 87 | deg_inv_sqrt = np.power(deg, -0.5, where=deg > 0) 88 | D_inv_sqrt = sp.diags(deg_inv_sqrt) 89 | adj_norm = D_inv_sqrt @ adj @ D_inv_sqrt 90 | 91 | # ---------- 划分索引 ---------- 92 | train_indexes = np.asarray(dataset.train_index, dtype=np.int64) 93 | validation_indexes = np.asarray(dataset.val_index, dtype=np.int64) 94 | test_indexes = np.asarray(dataset.test_index, dtype=np.int64) 95 | 96 | # ---------- GAN 采样 ---------- 97 | label_counts = {} 98 | for idx in train_indexes: 99 | lab = int(label[idx]) 100 | label_counts.setdefault(lab, []).append(idx) 101 | balance_num = max(len(v) for v in label_counts.values()) 102 | 103 | real_gan_nodes, generated_gan_nodes, real_node_sequence = [], [], [] 104 | for lab, nodes in label_counts.items(): 105 | for n in nodes: # 全量真实 106 | real_gan_nodes.append([n, lab]) 107 | real_node_sequence.append(n) 108 | for _ in range(balance_num - len(nodes)): # 随机补足 109 | s = random.choice(nodes) 110 | real_gan_nodes.append([s, lab]) 111 | real_node_sequence.append(s) 112 | generated_gan_nodes.append([s, lab]) 113 | 114 | perm = np.random.permutation(len(real_gan_nodes)) 115 | real_gan_nodes = [real_gan_nodes[i] for i in perm] 116 | real_node_sequence = [real_node_sequence[i] for i in perm] 117 | 118 | # ---------- 二部图 ---------- 119 | adj_coo = adj.tocoo() 120 | neighbor_dict = {} 121 | for r, c in zip(adj_coo.row, adj_coo.col): 122 | neighbor_dict.setdefault(r, []).append(c) 123 | 124 | all_neighbor_nodes = sorted( 125 | {nbr for v in real_node_sequence for nbr in neighbor_dict.get(v, [])}) 126 | real_num = len(real_node_sequence) 127 | neigh_num = len(all_neighbor_nodes) 128 | 129 | adj_neighbor = np.zeros((real_num, neigh_num), dtype=np.float32) 130 | col_map = {n: j for j, n in enumerate(all_neighbor_nodes)} 131 | for i, v in enumerate(real_node_sequence): 132 | for nbr in neighbor_dict.get(v, []): 133 | j = col_map.get(nbr) 134 | if j is not None: 135 | adj_neighbor[i, j] = 1.0 136 | 137 | # ---------- 返回 ---------- 138 | return (x, adj, adj_norm, label, 139 | train_indexes, test_indexes, validation_indexes, 140 | real_gan_nodes, generated_gan_nodes, 141 | adj_neighbor, all_neighbor_nodes) 142 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DRGCN/models/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/models/adversarialNets.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | import numpy as np 3 | 4 | tf.disable_v2_behavior() 5 | 6 | 7 | def glorot(shape, name): 8 | init_range = np.sqrt(6.0 / (shape[0] + shape[1])) 9 | initializer = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32) 10 | return tf.get_variable(name, initializer=initializer) 11 | 12 | def zeros(shape, name): 13 | initializer = tf.zeros(shape, dtype=tf.float32) 14 | return tf.get_variable(name, initializer=initializer) 15 | 16 | 17 | class Generator: 18 | def __init__(self, x_dim, y_dim, z_dim, h_dim): 19 | self.x_dim = x_dim 20 | self.y_dim = y_dim 21 | self.z_dim = z_dim 22 | self.h_dim = h_dim 23 | self._build_model() 24 | 25 | def _build_model(self): 26 | with tf.variable_scope("gan/generator"): 27 | self.G_W1 = glorot([self.z_dim + self.y_dim, self.h_dim], name='G_W1') 28 | self.G_b1 = zeros([self.h_dim], name='G_b1') 29 | self.G_W2 = glorot([self.h_dim, self.x_dim], name='G_W2') 30 | self.G_b2 = zeros([self.x_dim], name='G_b2') 31 | 32 | def call(self, z, y): 33 | inputs = tf.concat([z, y], axis=1) 34 | h1 = tf.nn.relu(tf.matmul(inputs, self.G_W1) + self.G_b1) 35 | log_prob = tf.matmul(h1, self.G_W2) + self.G_b2 36 | prob = tf.nn.softmax(tf.nn.tanh(log_prob)) 37 | return prob 38 | 39 | def __call__(self, *args, **kwargs): 40 | return self.call(*args, **kwargs) 41 | 42 | 43 | class Discriminator: 44 | def __init__(self, x_dim, y_dim, h_dim): 45 | self.x_dim = x_dim 46 | self.y_dim = y_dim 47 | self.h_dim = h_dim 48 | self._build_model() 49 | 50 | def _build_model(self): 51 | with tf.variable_scope("gan/discriminator"): 52 | self.D_W1 = glorot([self.x_dim + self.y_dim, self.h_dim], name='D_W1') 53 | self.D_b1 = zeros([self.h_dim], name='D_b1') 54 | self.D_W2 = glorot([self.h_dim, 1], name='D_W2') 55 | self.D_b2 = zeros([1], name='D_b2') 56 | 57 | def call(self, x, y): 58 | inputs = tf.concat([x, y], axis=1) 59 | h1 = tf.nn.relu(tf.matmul(inputs, self.D_W1) + self.D_b1) 60 | logit = tf.matmul(h1, self.D_W2) + self.D_b2 61 | prob = tf.nn.sigmoid(logit) 62 | return prob, logit 63 | 64 | def __call__(self, *args, **kwargs): 65 | return self.call(*args, **kwargs) -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/models/gmm.py: -------------------------------------------------------------------------------- 1 | import tensorflow.compat.v1 as tf 2 | tf.disable_v2_behavior() 3 | import tensorflow_probability as tfp 4 | 5 | tfd = tfp.distributions 6 | 7 | 8 | def _softplus_inverse(x): 9 | """Helper which computes the function inverse of `tf.nn.softplus`.""" 10 | return tf.math.log(tf.math.expm1(x)) 11 | 12 | class gaussianMixtureModel: 13 | 14 | def __init__(self, mixture_components, latent_size): 15 | self.mixture_components = mixture_components 16 | self.latent_size = latent_size 17 | 18 | def make_mixture_posterior(self, feats): 19 | return tfd.MultivariateNormalDiag( 20 | loc=feats, 21 | scale_diag=tf.nn.softplus(feats + _softplus_inverse(1.0)), 22 | name="unlabeled_dist" 23 | ) 24 | 25 | def make_mixture_prior(self): 26 | """Creates the mixture of Gaussians prior distribution. 27 | Returns: 28 | A `tfd.Distribution` instance representing the prior over latent encodings. 29 | """ 30 | if self.mixture_components == 1: 31 | # Use fixed standard Gaussian 32 | return tfd.MultivariateNormalDiag( 33 | loc=tf.zeros([self.latent_size]), 34 | scale_diag=tf.ones([self.latent_size]), # <- replaced scale_identity_multiplier 35 | name="labeled_dist" 36 | ) 37 | 38 | # Learnable mixture parameters 39 | loc = tf.compat.v1.get_variable( 40 | name="loc", shape=[self.mixture_components, self.latent_size]) 41 | raw_scale_diag = tf.compat.v1.get_variable( 42 | name="raw_scale_diag", shape=[self.mixture_components, self.latent_size]) 43 | mixture_logits = tf.compat.v1.get_variable( 44 | name="mixture_logits", shape=[self.mixture_components]) 45 | 46 | return tfd.MixtureSameFamily( 47 | mixture_distribution=tfd.Categorical(logits=mixture_logits), 48 | components_distribution=tfd.MultivariateNormalDiag( 49 | loc=loc, 50 | scale_diag=tf.nn.softplus(raw_scale_diag) 51 | ), 52 | name="labeled_dist" 53 | ) 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/models/graph.py: -------------------------------------------------------------------------------- 1 | """Spectral Graph Convolutional filter cell.""" 2 | import numpy as np 3 | import tensorflow.compat.v1 as tf 4 | tf.disable_v2_behavior() 5 | import os 6 | 7 | def _dot(x, y, sparse=False): 8 | if sparse: 9 | return tf.sparse_tensor_dense_matmul(x, y) 10 | return tf.matmul(x, y) 11 | 12 | def sparse_dropout(x, keep_prob, noise_shape): 13 | """Dropout for sparse tensors.""" 14 | random_tensor = keep_prob 15 | random_tensor += tf.random_uniform(noise_shape) 16 | dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool) 17 | pre_out = tf.sparse_retain(x, dropout_mask) 18 | return pre_out * (1./keep_prob) 19 | 20 | def glorot(shape, name=None): 21 | """Glorot & Bengio (AISTATS 2010) init.""" 22 | init_range = np.sqrt(6.0/(shape[0]+shape[1])) 23 | initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32) 24 | return tf.Variable(initial, name=name) 25 | 26 | def zeros(shape, name=None): 27 | """All zeros.""" 28 | initial = tf.zeros(shape, dtype=tf.float32) 29 | return tf.Variable(initial, name=name) 30 | 31 | class GraphConvLayer: 32 | def __init__(self, input_dim, output_dim, name, holders, act=tf.nn.relu, 33 | dropout=False, bias=True): 34 | # name, act=tf.nn.relu, bias=False, dropout=): 35 | self.input_dim = input_dim 36 | self.output_dim = output_dim 37 | self.act=act 38 | self.bias = bias 39 | self.dropout = dropout 40 | self.var={} 41 | 42 | with tf.variable_scope(name): 43 | 44 | with tf.name_scope('weights'): 45 | self.var['w']=glorot([input_dim, output_dim], 46 | name='w') 47 | if self.bias: 48 | self.var['b']=zeros([output_dim], 49 | name='b') 50 | if self.dropout: 51 | self.dropout_prob = holders['dropout_prob'] 52 | else: 53 | self.dropout_prob = 0. 54 | self.num_features_nonzero = holders['num_features_nonzero'] 55 | 56 | def call(self, adj_norm, x, sparse=False): 57 | 58 | if sparse: 59 | x = sparse_dropout(x, 1-self.dropout_prob, self.num_features_nonzero) 60 | else: 61 | x = tf.nn.dropout(x, 1-self.dropout_prob) 62 | hw = _dot(x=x, y=self.var['w'], sparse=sparse) 63 | ahw = _dot(x=adj_norm, y=hw, sparse=True) 64 | 65 | embed_out = self.act(ahw) 66 | 67 | if self.bias: 68 | embed_out = self.act(tf.add(ahw, self.var['b'])) 69 | return embed_out 70 | 71 | def __call__(self, *args, **kwargs): 72 | return self.call(*args, **kwargs) -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DRGCN/sparse.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | 4 | 5 | def preprocess_features(features): 6 | """Row-normalize feature matrix and convert to tuple representation""" 7 | rowsum = np.array(features.sum(1)) 8 | r_inv = np.power(rowsum, -1).flatten() 9 | r_inv[np.isinf(r_inv)] = 0. 10 | r_mat_inv = sp.diags(r_inv) 11 | features = r_mat_inv.dot(features) 12 | return sparse_to_tuple(features) 13 | 14 | def sparse_to_tuple(sparse_mx): 15 | """Convert sparse matrix to tuple representation.""" 16 | # The zeroth element of the tuple contains the cell location of each 17 | # non-zero value in the sparse matrix 18 | # The first element of the tuple contains the value at each cell location 19 | # in the sparse matrix 20 | # The second element of the tuple contains the full shape of the sparse 21 | # matrix 22 | def to_tuple(mx): 23 | if not sp.isspmatrix_coo(mx): 24 | mx = mx.tocoo() 25 | coords = np.vstack((mx.row, mx.col)).transpose() 26 | values = mx.data 27 | shape = mx.shape 28 | return coords, values, shape 29 | 30 | if isinstance(sparse_mx, list): 31 | for i in range(len(sparse_mx)): 32 | sparse_mx[i] = to_tuple(sparse_mx[i]) 33 | else: 34 | sparse_mx = to_tuple(sparse_mx) 35 | 36 | return sparse_mx 37 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DataDec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DataDec/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DataDec/contrast.py: -------------------------------------------------------------------------------- 1 | from GCL.models import DualBranchContrast 2 | from GCL.models.contrast_model import add_extra_mask 3 | from GCL.losses import Loss 4 | import torch 5 | import numpy as np 6 | 7 | class DualBranchContrast_diet(DualBranchContrast): 8 | def __init__(self, loss: Loss, mode: str, intraview_negs: bool = False, use_grad_norm: bool = False, ord: int = 1 ): 9 | super(DualBranchContrast_diet, self).__init__(loss=loss, mode=mode, intraview_negs=intraview_negs) 10 | self.use_grad_norm = use_grad_norm 11 | self.ord = ord 12 | 13 | 14 | def forward(self, h1=None, h2=None, g1=None, g2=None, batch=None, h3=None, h4=None, 15 | extra_pos_mask=None, extra_neg_mask=None): 16 | if self.mode == 'L2L': 17 | assert h1 is not None and h2 is not None 18 | anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=h1, sample=h2) 19 | anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=h2, sample=h1) 20 | elif self.mode == 'G2G': 21 | assert g1 is not None and g2 is not None 22 | anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=g2) 23 | anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=g1) 24 | else: 25 | if batch is None or batch.max().item() + 1 <= 1: 26 | assert all(v is not None for v in [h1, h2, g1, g2, h3, h4]) 27 | anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=h2, neg_sample=h4) 28 | anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=h1, neg_sample=h3) 29 | else: 30 | assert all(v is not None for v in [h1, h2, g1, g2, batch]) 31 | anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=h2, batch=batch) 32 | anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=h1, batch=batch) 33 | 34 | pos_mask1, neg_mask1 = add_extra_mask(pos_mask1, neg_mask1, extra_pos_mask, extra_neg_mask) 35 | pos_mask2, neg_mask2 = add_extra_mask(pos_mask2, neg_mask2, extra_pos_mask, extra_neg_mask) 36 | l1 = self.loss(anchor=anchor1, sample=sample1, pos_mask=pos_mask1, neg_mask=neg_mask1, **self.kwargs) 37 | l2 = self.loss(anchor=anchor2, sample=sample2, pos_mask=pos_mask2, neg_mask=neg_mask2, **self.kwargs) 38 | 39 | sample1.retain_grad() 40 | sample2.retain_grad() 41 | scores1 = get_lord_error_fn(anchor1, sample1, self.ord) 42 | scores2 = get_lord_error_fn(anchor2, sample2, self.ord) 43 | return (l1 + l2) * 0.5, scores1, scores2, sample1, sample2 44 | 45 | def get_lord_error_fn(logits, Y, ord): 46 | errors = torch.nn.functional.softmax(logits, dim=1) - Y 47 | scores = np.linalg.norm(errors.detach().cpu().numpy(), ord=ord, axis=-1) 48 | return scores 49 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DataDec/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Sampler 3 | from torch_geometric.data import Dataset 4 | 5 | class MyDataset(Dataset): 6 | def __init__(self, full_dataset, sampled_list): 7 | self.full_dataset = full_dataset 8 | self.sampled_list = sampled_list 9 | 10 | def __len__(self): 11 | return len(self.sampled_list) 12 | 13 | def __getitem__(self, idx): 14 | actual_idx = self.sampled_list[idx] 15 | data = self.full_dataset[actual_idx] 16 | return data, actual_idx 17 | 18 | class IndexSampler(Sampler): 19 | def __init__(self, sampled_list): 20 | self.sampled_list = sampled_list 21 | 22 | def __iter__(self): 23 | return iter(self.sampled_list) 24 | 25 | def __len__(self): 26 | return len(self.sampled_list) 27 | 28 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/DataDec/prune.py: -------------------------------------------------------------------------------- 1 | import json 2 | import numpy as np 3 | import torch 4 | from collections import OrderedDict 5 | 6 | from pdb import set_trace 7 | 8 | 9 | class Mask(object): 10 | def __init__(self, model, no_reset=False): 11 | super(Mask, self).__init__() 12 | self.model = model 13 | if not no_reset: 14 | self.reset() 15 | 16 | @property 17 | def sparsity(self): 18 | """Return the percent of weights that have been pruned as a decimal.""" 19 | prunableTensors = [] 20 | for name, module in self.model.named_modules(): 21 | if hasattr(module, "prune_mask"): 22 | prunableTensors.append(module.prune_mask.detach()) 23 | 24 | unpruned = torch.sum(torch.tensor([torch.sum(v) for v in prunableTensors])) 25 | total = torch.sum(torch.tensor([torch.sum(torch.ones_like(v)) for v in prunableTensors])) 26 | return 1 - unpruned.float() / total.float() 27 | 28 | @property 29 | def density(self): 30 | return 1 - self.sparsity 31 | 32 | def magnitudePruning(self, magnitudePruneFraction, randomPruneFraction=0): 33 | weights = [] 34 | for name, module in self.model.named_modules(): 35 | if hasattr(module, "prune_mask"): 36 | weights.append(module.weight.clone().cpu().detach().numpy()) 37 | 38 | 39 | self.reset() 40 | prunableTensors = [] 41 | for name, module in self.model.named_modules(): 42 | if hasattr(module, "prune_mask"): 43 | prunableTensors.append(module.prune_mask.detach()) 44 | 45 | number_of_remaining_weights = torch.sum(torch.tensor([torch.sum(v) for v in prunableTensors])).cpu().numpy() 46 | number_of_weights_to_prune_magnitude = np.ceil(magnitudePruneFraction * number_of_remaining_weights).astype(int) 47 | number_of_weights_to_prune_random = np.ceil(randomPruneFraction * number_of_remaining_weights).astype(int) 48 | random_prune_prob = number_of_weights_to_prune_random / (number_of_remaining_weights - number_of_weights_to_prune_magnitude) 49 | 50 | 51 | weight_vector = np.concatenate([v.flatten() for v in weights]) 52 | threshold = np.sort(np.abs(weight_vector))[min(number_of_weights_to_prune_magnitude, len(weight_vector) - 1)] 53 | 54 | 55 | for name, module in self.model.named_modules(): 56 | if hasattr(module, "prune_mask"): 57 | module.prune_mask = (torch.abs(module.weight) >= threshold).float() 58 | 59 | module.prune_mask[torch.rand_like(module.prune_mask) < random_prune_prob] = 0 60 | 61 | def reset(self): 62 | for name, module in self.model.named_modules(): 63 | if hasattr(module, "prune_mask"): 64 | module.prune_mask = torch.ones_like(module.weight) 65 | 66 | 67 | def save_mask(epoch, model, filename): 68 | pruneMask = OrderedDict() 69 | 70 | for name, module in model.named_modules(): 71 | if hasattr(module, "prune_mask"): 72 | pruneMask[name] = module.prune_mask.cpu().type(torch.bool) 73 | 74 | torch.save({"epoch": epoch, "pruneMask": pruneMask}, filename) 75 | 76 | 77 | def load_mask(model, state_dict, device): 78 | 79 | for name, module in model.named_modules(): 80 | if hasattr(module, "prune_mask"): 81 | module.prune_mask.data = state_dict[name].to(device).float() 82 | 83 | return model 84 | 85 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/G2GNN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/G2GNN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/G2GNN/aug.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch_geometric.utils.dropout import dropout_adj 3 | import torch 4 | import random 5 | 6 | def remove_edge(edge_index, drop_ratio): 7 | edge_index, _ = dropout_adj(edge_index, p = drop_ratio) 8 | 9 | return edge_index 10 | 11 | 12 | def drop_node(x, drop_ratio): 13 | node_num, _ = x.size() 14 | drop_num = int(node_num * drop_ratio) 15 | 16 | idx_mask = np.random.choice(node_num, drop_num, replace = False).tolist() 17 | 18 | x[idx_mask] = 0 19 | 20 | return x 21 | 22 | def upsample(dataset): 23 | y = torch.tensor([dataset[i].y for i in range(len(dataset))]) 24 | classes = torch.unique(y) 25 | 26 | num_class_graph = [(y == i.item()).sum() for i in classes] 27 | 28 | max_num_class_graph = max(num_class_graph) 29 | 30 | chosen = [] 31 | for i in range(len(classes)): 32 | train_idx = torch.where((y == classes[i]) == True)[0].tolist() 33 | 34 | up_sample_ratio = max_num_class_graph / num_class_graph[i] 35 | up_sample_num = int( 36 | num_class_graph[i] * up_sample_ratio - num_class_graph[i]) 37 | 38 | if(up_sample_num <= len(train_idx)): 39 | up_sample = random.sample(train_idx, up_sample_num) 40 | else: 41 | tmp = int(up_sample_num / len(train_idx)) 42 | up_sample = train_idx * tmp 43 | tmp = up_sample_num - len(train_idx) * tmp 44 | 45 | up_sample.extend(random.sample(train_idx, tmp)) 46 | 47 | chosen.extend(up_sample) 48 | 49 | if not chosen: 50 | return list(dataset) 51 | 52 | chosen = torch.tensor(chosen) 53 | extend_data = dataset[chosen] 54 | 55 | data = list(dataset) + list(extend_data) 56 | 57 | return data 58 | 59 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/G2GNN/dataloader.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.algorithm.G2GNN.aug import * 2 | from torch.utils.data import Dataset as BaseDataset 3 | from torch_geometric.data.collate import collate 4 | import torch 5 | from torch_geometric.utils import subgraph, degree, add_remaining_self_loops 6 | from torch_sparse import SparseTensor 7 | 8 | class Dataset_knn_aug(BaseDataset): 9 | def __init__(self, dataset, all_dataset, args): 10 | self.args = args 11 | self.dataset = dataset 12 | self.all_dataset = all_dataset 13 | 14 | def _get_feed_dict(self, index): 15 | feed_dict = self.dataset[index] 16 | 17 | return feed_dict 18 | 19 | def __len__(self): 20 | return len(self.dataset) 21 | 22 | def __getitem__(self, index): 23 | return self._get_feed_dict(index) 24 | 25 | def collate_batch(self, feed_dicts): 26 | batch_id = torch.tensor([feed_dict.id for feed_dict in feed_dicts]) 27 | # prevent testing data leakage 28 | train_idx = torch.arange(batch_id.shape[0]) 29 | 30 | # add_knn_dataset to feed_dicts 31 | pad_knn_id = find_knn_id(batch_id, self.args.kernel_idx) 32 | feed_dicts.extend([self.all_dataset[i] for i in pad_knn_id]) 33 | 34 | data, slices, _ = collate( 35 | feed_dicts[0].__class__, 36 | data_list=feed_dicts, 37 | increment=True, 38 | add_batch=True, 39 | ) 40 | 41 | knn_edge_index, _ = subgraph( 42 | data.id, self.args.knn_edge_index, relabel_nodes=True) 43 | 44 | knn_edge_index, _ = add_remaining_self_loops(knn_edge_index) 45 | row, col = knn_edge_index 46 | knn_deg = degree(col, data.id.shape[0]) 47 | deg_inv_sqrt = knn_deg.pow(-0.5) 48 | edge_weight = deg_inv_sqrt[col] * deg_inv_sqrt[col] 49 | 50 | knn_adj_t = torch.sparse.FloatTensor( 51 | knn_edge_index, edge_weight, (data.id.size(0), data.id.size(0))) 52 | 53 | 54 | aug_xs, aug_adj_ts = [], [] 55 | for i in range(self.args.aug_num): 56 | edge_index = torch.stack(data.adj_t.coo()[:2]) 57 | edge_index_aug = remove_edge(edge_index, self.args.drop_edge_ratio) 58 | aug_adj_ts.append(SparseTensor( 59 | row=edge_index_aug[0], col=edge_index_aug[1], value=None, sparse_sizes=(data.x.size(0), data.x.size(0)))) 60 | 61 | aug_xs.append(drop_node(data.x, self.args.mask_node_ratio)) 62 | 63 | batch = {'data': data, 64 | 'train_idx': train_idx, 65 | 'aug_adj_ts': aug_adj_ts, 66 | 'aug_xs': aug_xs, 67 | 'knn_adj_t': knn_adj_t} 68 | 69 | return batch 70 | 71 | def find_knn_id(batch_id, kernel_idx): 72 | knn_id = set(kernel_idx[batch_id].view(-1).tolist()) 73 | pad_knn_id = knn_id.difference(set(batch_id.tolist())) 74 | 75 | return list(pad_knn_id) -------------------------------------------------------------------------------- /IGL_Bench/algorithm/G2GNN/kernel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | 4 | from grakel.kernels import ShortestPath 5 | from grakel import Graph 6 | 7 | def construct_knn(kernel_idx): 8 | edge_index = [[], []] 9 | 10 | for i in range(len(kernel_idx)): 11 | for j in range(len(kernel_idx[i])): 12 | edge_index[0].append(kernel_idx[i, j].item()) 13 | edge_index[1].append(i) 14 | 15 | edge_index[1].append(kernel_idx[i, j].item()) 16 | edge_index[0].append(i) 17 | 18 | return torch.tensor(edge_index, dtype=torch.long) 19 | 20 | def pyg_to_grakel(pyg_graph): 21 | edge_index = pyg_graph.edge_index.numpy() 22 | edges = list(zip(edge_index[0], edge_index[1])) 23 | node_labels = {i: str(label) for i, label in enumerate(pyg_graph.x.numpy())} 24 | return Graph(edges, node_labels=node_labels) 25 | 26 | def get_kernel_knn(dataname, kernel_type, knn_nei_num, dataset): 27 | current_dir = os.path.dirname(os.path.abspath(__file__)) 28 | kernel_file = os.path.join(current_dir, '../../../G2GNN_kernel', 29 | f'{dataname}_{kernel_type}_{knn_nei_num}.txt') 30 | 31 | if(os.path.exists(kernel_file)): 32 | kernel_simi = torch.load(kernel_file) 33 | else: 34 | #dataset = fetch_dataset(dataname, verbose=False) 35 | G = [pyg_to_grakel(graph) for graph in dataset] 36 | if(dataname in ['IMDB-BINARY', 'REDDIT-BINARY']): 37 | gk = ShortestPath(normalize=True, with_labels=False) 38 | else: 39 | gk = ShortestPath(normalize=True) 40 | kernel_simi = torch.tensor(gk.fit_transform(G)) 41 | torch.save(kernel_simi, kernel_file) 42 | 43 | kernel_idx = torch.topk(kernel_simi, k=knn_nei_num, 44 | dim=1, largest=True)[1][:, 1:] 45 | 46 | knn_edge_index = construct_knn(kernel_idx) 47 | 48 | return kernel_idx, knn_edge_index -------------------------------------------------------------------------------- /IGL_Bench/algorithm/GCN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GCN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/GIN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GIN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/GRAPHPATCHER/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GRAPHPATCHER/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/GRAPHPATCHER/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import dgl 3 | import torch.nn.functional as F 4 | import tensorflow as tf 5 | 6 | 7 | def inject_nodes(batched_masked_graphs, generated_neighbors, masked_offset, device, mask=None): 8 | assert len(masked_offset) == len(generated_neighbors) 9 | batched_masked_graphs_ = dgl.add_nodes(batched_masked_graphs, len(masked_offset), {'feat':generated_neighbors}) 10 | temp = torch.arange(batched_masked_graphs_.number_of_nodes() - len(masked_offset), batched_masked_graphs_.number_of_nodes()).to(device) 11 | masked_offset = masked_offset.to(device) 12 | # src = torch.cat([temp, masked_offset]) 13 | # dst = torch.cat([masked_offset, temp]) 14 | src = temp[mask] if mask != None else temp 15 | dst = masked_offset[mask] if mask != None else masked_offset 16 | batched_masked_graphs_.add_edges(src, dst) 17 | return batched_masked_graphs_ 18 | 19 | 20 | def kl_div(x, y): 21 | x = F.log_softmax(x, dim=1) 22 | y = F.softmax(y, dim=1) 23 | return F.kl_div(x, y, reduction='batchmean') 24 | 25 | 26 | def construct_placeholder(num_nodes, fea_size, num_classes): 27 | with tf.name_scope('input'): 28 | placeholders = { 29 | 'labels': tf.compat.v1.placeholder(tf.float32, shape=(None, num_classes), name='labels'), 30 | 'features': tf.compat.v1.placeholder(tf.float32, shape=(num_nodes, fea_size), name='features'), 31 | 'dropout': tf.compat.v1.placeholder_with_default(0., shape=(), name='dropout'), 32 | 'masks': tf.compat.v1.placeholder(dtype=tf.int32, shape=(num_nodes,), name='masks'), 33 | } 34 | return placeholders 35 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/HyperIMBA/Poincare.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import tqdm 3 | import numpy as np 4 | from multiprocessing import Lock, Manager, Pool 5 | from numpy.linalg import norm 6 | 7 | def norm(x, axis=None): 8 | return np.linalg.norm(x, axis=axis) 9 | 10 | def poincare_dist(u, v, eps=1e-5): 11 | d = 1 + 2 * norm(u-v)**2 / ((1 - norm(u)**2) * (1 - norm(v)**2) + eps) 12 | return np.arccosh(d) 13 | 14 | class PoincareModel(): 15 | 16 | def __init__(self, relations, node_weights, node_labels, n_components=2, eta=0.01, n_negative=10, 17 | eps=1e-5, burn_in=10, burn_in_eta=0.01, init_lower=-0.001, 18 | init_upper=0.001, dtype=np.float64, seed=0, name="", device='cuda', batch_size=None): 19 | self.relations = relations 20 | self.n_components = n_components 21 | self.eta = eta # Learning rate for training 22 | self.burn_in_eta = burn_in_eta # Learning rate for burn-in 23 | self.n_negative = n_negative 24 | self.eps = eps 25 | self.burn_in = burn_in 26 | self.dtype = dtype 27 | self.init_lower = init_lower 28 | self.init_upper = init_upper 29 | self.node_weights = node_weights 30 | self.node_labels = node_labels 31 | self.network = nx.Graph() 32 | self.name = name 33 | self.device = device 34 | self.batch_size = batch_size 35 | self.manager = Manager() 36 | self.lock = self.manager.Lock() 37 | 38 | def init_embeddings(self): 39 | unique_nodes = np.unique([item for sublist in self.relations for item in sublist]) 40 | theta_init = np.random.uniform(self.init_lower, self.init_upper, 41 | size=(len(unique_nodes), self.n_components)) 42 | embedding_dict = dict(zip(unique_nodes, theta_init)) 43 | self.nodes = unique_nodes 44 | self.embeddings = theta_init 45 | self.emb_dict = embedding_dict 46 | 47 | 48 | def negative_sample(self, u): 49 | positives = [x[1] for x in self.relations if x[0] == u] 50 | negatives = np.array([x for x in self.nodes if x not in positives]) 51 | random_ix = np.random.permutation(len(negatives))[:self.n_negative] 52 | neg_samples = [[u, x] for x in negatives[random_ix]] 53 | neg_samples.append([u,u]) 54 | return neg_samples 55 | 56 | def partial_d(self, theta, x): 57 | alpha = 1 - norm(theta)**2 58 | beta = 1 - norm(x)**2 59 | gamma = 1 + 2/(alpha*beta + self.eps) * norm(theta-x)**2 60 | lhs = 4 / (beta*np.sqrt(gamma**2 - 1) + self.eps) 61 | rhs = 1/(alpha**2 + self.eps) * (norm(x)**2 - 2*np.inner(theta,x) + 1) * theta - x/(alpha + self.eps) 62 | return lhs*rhs 63 | 64 | def proj(self, theta): 65 | if norm(theta) >= 1: 66 | theta = theta/norm(theta) - self.eps 67 | return theta 68 | 69 | def update(self, u, grad): 70 | with self.lock: 71 | theta = self.emb_dict[u] 72 | step = 1/4 * self.eta * (1 - norm(theta)**2)**2 * grad 73 | self.emb_dict[u] = self.proj(theta - step) 74 | 75 | def train(self, num_epochs=10, edge_index=None): 76 | node_rank = np.array([1 / self.node_labels[v] for v in self.node_labels]) 77 | 78 | if edge_index is not None: 79 | self.relations = edge_index 80 | 81 | for _ in range(num_epochs): 82 | losses = 0 83 | 84 | for relation in tqdm.tqdm(self.relations): 85 | u, v = relation[0], relation[1] 86 | if u == v: 87 | continue 88 | 89 | theta, x = self.emb_dict[u], self.emb_dict[v] 90 | neg_relations = [x[1] for x in self.negative_sample(u)] 91 | neg_embed = np.array([self.emb_dict[x] for x in neg_relations]) 92 | 93 | # Vectorized computation for partial derivatives 94 | rank_comparison = node_rank[u] > node_rank[v] 95 | dd_theta = self.partial_d(theta, x) * rank_comparison 96 | dd_x = self.partial_d(x, theta) * (~rank_comparison) 97 | 98 | if np.isnan(dd_theta).any() or np.isinf(dd_theta).any() or np.isnan(dd_x).any() or np.isinf(dd_x).any(): 99 | return 100 | 101 | # Loss gradients 102 | grad_theta = -dd_theta 103 | grad_x = -dd_x 104 | 105 | self.update(u, grad_theta) 106 | self.update(v, grad_x) 107 | 108 | # Vectorized gradient computation for negative samples 109 | neg_dists = np.array([np.exp(-poincare_dist(theta, self.emb_dict[vprime])) for vprime in neg_relations]) 110 | Z = np.sum(neg_dists) 111 | losses_for_neg = neg_dists / Z 112 | 113 | for idx, vprime in enumerate(neg_relations): 114 | if node_rank[u] < node_rank[vprime]: 115 | dd_u = self.partial_d(theta, self.emb_dict[vprime]) 116 | grad_u = dd_u * (-losses_for_neg[idx]) 117 | self.update(u, grad_u) 118 | else: 119 | dd_vprime = self.partial_d(self.emb_dict[vprime], theta) 120 | grad_vprime = dd_vprime * (-losses_for_neg[idx]) 121 | self.update(vprime, grad_vprime) 122 | 123 | losses += losses_for_neg[idx] * poincare_dist(theta, self.emb_dict[vprime]) 124 | 125 | pos_loss = np.exp(-poincare_dist(theta, x)) 126 | losses += pos_loss -------------------------------------------------------------------------------- /IGL_Bench/algorithm/HyperIMBA/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/HyperIMBA/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/HyperIMBA/cal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from IGL_Bench.algorithm.HyperIMBA.Poincare import PoincareModel 5 | from torch_geometric.utils import degree, to_networkx 6 | from GraphRicciCurvature.OllivierRicci import OllivierRicci 7 | 8 | def compute_ricci_and_poincare(dataset): 9 | current_dir = os.path.dirname(os.path.abspath(__file__)) 10 | file_dir = os.path.join(current_dir, '../../..') 11 | 12 | ricci_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}.edge_list') 13 | keys_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}_keys.npy') 14 | values_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}_values.npy') 15 | 16 | if os.path.exists(ricci_file) and os.path.exists(keys_file) and os.path.exists(values_file): 17 | print(f"Files for {dataset.data_name} already exist, skipping computation.") 18 | return 19 | 20 | os.makedirs(os.path.dirname(ricci_file), exist_ok=True) 21 | os.makedirs(os.path.dirname(keys_file), exist_ok=True) 22 | os.makedirs(os.path.dirname(values_file), exist_ok=True) 23 | 24 | G = to_networkx(dataset) 25 | orc = OllivierRicci(G, alpha=0.5, verbose="TRACE") 26 | orc.compute_ricci_curvature() 27 | G_orc = orc.G.copy() # save an intermediate result 28 | 29 | curvature = "ricciCurvature" 30 | ricci_results = {} 31 | ricci = {} 32 | for i, (n1, n2) in enumerate(list(G_orc.edges()), 0): 33 | ricci[i] = [int(n1), int(n2), G_orc[n1][n2][curvature]] 34 | 35 | # Save ricci results 36 | weights = [ricci[i] for i in ricci.keys()] 37 | np.savetxt(ricci_file, weights, fmt="%d %d %.16f") 38 | 39 | # Poincare Model computation 40 | degrees = np.array(degree(dataset.edge_index[0], num_nodes=dataset.num_nodes) + degree(dataset.edge_index[1], num_nodes=dataset.num_nodes)) 41 | edges_list = list(dataset.edge_index.t().numpy()) 42 | labels = dict(enumerate(dataset.y.numpy() + 1, 0)) 43 | device = torch.device('cpu') 44 | dim = 2 45 | model = PoincareModel(edges_list, node_weights=degrees * 0.2, node_labels=labels, n_components=dim, 46 | eta=0.01, n_negative=10, name="hierarchy", device=device) 47 | model.init_embeddings() 48 | model.train(num_epochs=1) 49 | 50 | # Save the Poincare model embeddings 51 | weights = model.embeddings 52 | keys = np.array([item for item in model.emb_dict.keys()]) 53 | values = np.array([item for item in model.emb_dict.values()]) 54 | np.save(keys_file, keys) 55 | np.save(values_file, values) 56 | 57 | print(f"Computation for {dataset.data_name} completed and files saved.") 58 | 59 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/HyperIMBA/solver.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.algorithm.HyperIMBA.cal import compute_ricci_and_poincare 2 | import IGL_Bench.algorithm.HyperIMBA.GcnHyper as GcnHyper 3 | import torch 4 | import torch.nn.functional as F 5 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score 6 | 7 | class HyperIMBA_node_solver: 8 | def __init__(self, config, dataset, device='cuda'): 9 | self.config = config 10 | self.dataset = dataset 11 | self.device = device 12 | 13 | compute_ricci_and_poincare(self.dataset) 14 | 15 | self.model = {} 16 | self.optimizer = {} 17 | self.initialization() 18 | self.dataset = self.dataset.to(self.device) 19 | 20 | def initialization(self): 21 | if self.config.backbone == 'GCN': 22 | self.model['default'], self.dataset = GcnHyper.set_model(self.dataset, self.config) 23 | 24 | self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) 25 | 26 | def reset_parameters(self): 27 | self.initialization() 28 | 29 | def train(self): 30 | self.reset_parameters() 31 | num_epochs = getattr(self.config, 'epoch', 500) 32 | patience = getattr(self.config, 'patience', 50) 33 | least_epoch = getattr(self.config, 'least_epoch', 40) 34 | best_val_accuracy = 0 35 | 36 | for epoch in range(1, num_epochs + 1): 37 | self.model['default'].train() 38 | self.optimizer['default'].zero_grad() 39 | 40 | output = self.model['default'](self.dataset, self.config.loss_hp) 41 | loss = F.cross_entropy(output[self.dataset.train_mask], self.dataset.y[self.dataset.train_mask]) 42 | loss.backward() 43 | self.optimizer['default'].step() 44 | 45 | print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}") 46 | 47 | val_accuracy = self.eval(metric="accuracy") 48 | 49 | if val_accuracy > best_val_accuracy: 50 | best_val_accuracy = val_accuracy 51 | patience_counter = 0 52 | else: 53 | patience_counter += 1 54 | 55 | if patience_counter >= patience and epoch > least_epoch: 56 | print(f"Early stopping at epoch {epoch+1}.") 57 | break 58 | 59 | print("Training Finished!") 60 | 61 | def eval(self, metric="accuracy"): 62 | self.model['default'].eval() 63 | all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy() 64 | 65 | with torch.no_grad(): 66 | out = self.model['default'](self.dataset, self.config.loss_hp) 67 | predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy() 68 | 69 | if metric == "accuracy": 70 | return accuracy_score(all_labels, predictions) 71 | elif metric == "bacc": 72 | return balanced_accuracy_score(all_labels, predictions) 73 | elif metric == "macro_f1": 74 | return f1_score(all_labels, predictions, average='macro') 75 | else: 76 | raise ValueError(f"Unknown metric: {metric}") 77 | 78 | def test(self): 79 | self.model['default'].eval() 80 | all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy() 81 | 82 | with torch.no_grad(): 83 | out = self.model['default'](self.dataset, self.config.loss_hp) 84 | predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy() 85 | probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy() 86 | 87 | accuracy = accuracy_score(all_labels, predictions) 88 | macro_f1 = f1_score(all_labels, predictions, average='macro') 89 | bacc = balanced_accuracy_score(all_labels, predictions) 90 | auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro') 91 | 92 | return accuracy, bacc, macro_f1, auc_roc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGAGN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ImGAGN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGAGN/layers.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | from torch.nn.parameter import Parameter 6 | from torch.nn.modules.module import Module 7 | 8 | 9 | class GraphConvolution(Module): 10 | 11 | def __init__(self, in_features, out_features, bias=True): 12 | super(GraphConvolution, self).__init__() 13 | self.in_features = in_features 14 | self.out_features = out_features 15 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 16 | if bias: 17 | self.bias = Parameter(torch.FloatTensor(out_features)) 18 | else: 19 | self.register_parameter('bias', None) 20 | self.reset_parameters() 21 | 22 | def reset_parameters(self): 23 | stdv = 1. / math.sqrt(self.weight.size(1)) 24 | self.weight.data.uniform_(-stdv, stdv) 25 | if self.bias is not None: 26 | self.bias.data.uniform_(-stdv, stdv) 27 | 28 | def forward(self, input, adj): 29 | support = torch.mm(input, self.weight) 30 | output = torch.spmm(adj, support) 31 | if self.bias is not None: 32 | return output + self.bias 33 | else: 34 | return output 35 | 36 | def __repr__(self): 37 | return self.__class__.__name__ + ' (' \ 38 | + str(self.in_features) + ' -> ' \ 39 | + str(self.out_features) + ')' 40 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGAGN/models.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from IGL_Bench.algorithm.ImGAGN.layers import GraphConvolution 4 | import torch 5 | 6 | class Attention(nn.Module): 7 | def __init__(self, input_dim, output_dim): 8 | super(Attention, self).__init__() 9 | self.mlp = nn.Sequential( 10 | nn.Linear(input_dim, input_dim // 2, bias=True), 11 | nn.ReLU(), 12 | nn.Linear(input_dim // 2, output_dim, bias=True), 13 | ) 14 | 15 | def forward(self, x): 16 | return self.mlp(x) 17 | 18 | class GCN(nn.Module): 19 | def __init__(self, nfeat, nhid, nclass, dropout, generate_node, min_node): 20 | super(GCN, self).__init__() 21 | 22 | self.gc1 = GraphConvolution(nfeat, nhid) 23 | self.gc2 = GraphConvolution(nhid, nclass) 24 | self.gc3 = GraphConvolution(nhid, 2) 25 | self.attention = Attention(nfeat*2, 1) 26 | self.generate_node = generate_node 27 | self.min_node = min_node 28 | self.dropout = dropout 29 | self.eps = 1e-10 30 | 31 | def forward(self, x, adj): 32 | 33 | x = F.relu(self.gc1(x, adj)) 34 | x = F.dropout(x, self.dropout, training=self.training) 35 | x1 = self.gc2(x, adj) 36 | x2 = self.gc3(x, adj) 37 | return F.log_softmax(x1, dim=1), F.log_softmax(x2, dim=1), F.softmax(x1, dim=1)[:,-1] 38 | 39 | def get_embedding(self,x , adj): 40 | x = F.relu(self.gc1(x, adj)) 41 | x = torch.spmm(adj, x) 42 | return x 43 | 44 | def reset_parameters(self): 45 | for m in self.modules(): 46 | if isinstance(m, nn.Linear): 47 | nn.init.xavier_uniform_(m.weight) 48 | if m.bias is not None: 49 | nn.init.zeros_(m.bias) 50 | elif isinstance(m, GraphConvolution): 51 | nn.init.xavier_uniform_(m.weight) 52 | if m.bias is not None: 53 | nn.init.zeros_(m.bias) 54 | elif isinstance(m, Attention): 55 | for layer in m.mlp: 56 | if isinstance(layer, nn.Linear): 57 | nn.init.xavier_uniform_(layer.weight) 58 | if layer.bias is not None: 59 | nn.init.zeros_(layer.bias) 60 | 61 | class Generator(nn.Module): 62 | def __init__(self, dim): 63 | super(Generator, self).__init__( ) 64 | 65 | self.fc1 = nn.Linear(100, 200) 66 | self.fc2 = nn.Linear(200, 200) 67 | self.fc3 = nn.Linear(200, dim) 68 | self.fc4 = nn.Tanh() 69 | 70 | def forward(self, x): 71 | x = F.relu(self.fc1(x)) 72 | x = F.relu(self.fc2(x)) 73 | x = self.fc3(x) 74 | x = self.fc4(x) 75 | x = (x+1)/2 76 | return x 77 | 78 | def reset_parameters(self): 79 | for m in self.modules(): 80 | if isinstance(m, nn.Linear): 81 | nn.init.xavier_uniform_(m.weight) 82 | if m.bias is not None: 83 | nn.init.zeros_(m.bias) 84 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGAGN/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import torch 4 | from sklearn.metrics import classification_report 5 | import sklearn 6 | 7 | def load_data(ratio_generated, path="../dataset/citeseer/", dataset="citeseer"): 8 | print('Loading {} dataset...'.format(dataset)) 9 | 10 | idx_features_labels = np.genfromtxt("{}features.{}".format(path, dataset), 11 | dtype=np.float32) 12 | features = sp.csr_matrix(idx_features_labels[:, 0:-1], dtype=np.float32) 13 | labels = idx_features_labels[:, -1] 14 | 15 | idx_train = np.genfromtxt("{}train.{}".format(path, dataset), 16 | dtype=np.int32).squeeze() 17 | 18 | idx_test = np.genfromtxt("{}test.{}".format(path, dataset), 19 | dtype=np.int32).squeeze() 20 | 21 | majority = np.array([x for x in idx_train if labels[x] == 0]) 22 | minority = np.array([x for x in idx_train if labels[x] == 1]) 23 | 24 | num_minority = minority.shape[0] 25 | num_majority = majority.shape[0] 26 | print("Number of majority: ", num_majority) 27 | print("Number of minority: ", num_minority) 28 | 29 | generate_node = [] 30 | generate_label=[] 31 | for i in range(labels.shape[0], labels.shape[0]+int(ratio_generated*num_majority)-num_minority): 32 | generate_node.append(i) 33 | generate_label.append(1) 34 | idx_train= np.hstack((idx_train, np.array(generate_node))) 35 | print(idx_train.shape) 36 | 37 | minority_test = np.array([x for x in idx_test if labels[x] == 1]) 38 | minority_all = np.hstack((minority, minority_test)) 39 | 40 | 41 | labels= np.hstack((labels, np.array(generate_label))) 42 | 43 | 44 | edges = np.genfromtxt("{}edges.{}".format(path, dataset), 45 | dtype=np.int32) 46 | 47 | adj_real = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), 48 | shape=(labels.shape[0], labels.shape[0]), 49 | dtype=np.float32) 50 | 51 | adj = adj_real + adj_real.T.multiply(adj_real.T > adj_real) - adj_real.multiply(adj_real.T > adj_real) 52 | 53 | features = normalize(features) 54 | adj = normalize(adj + sp.eye(adj.shape[0])) 55 | 56 | features = torch.FloatTensor(np.array(features.todense())) 57 | labels = torch.LongTensor(labels) 58 | adj = sparse_mx_to_torch_sparse_tensor(adj) 59 | 60 | idx_train = torch.LongTensor(idx_train) 61 | idx_test = torch.LongTensor(idx_test) 62 | generate_node=torch.LongTensor(np.array(generate_node)) 63 | minority = torch.LongTensor(minority) 64 | majority = torch.LongTensor(majority) 65 | minority_all = torch.LongTensor(minority_all) 66 | 67 | return adj, adj_real,features, labels, idx_train, idx_test, generate_node, minority, majority, minority_all#, generate_node_test, minority_test 68 | 69 | 70 | 71 | 72 | def normalize(mx): 73 | rowsum = np.array(mx.sum(1)) 74 | r_inv = np.power(rowsum, -1).flatten() 75 | r_inv[np.isinf(r_inv)] = 0. 76 | r_mat_inv = sp.diags(r_inv) 77 | mx = r_mat_inv.dot(mx) 78 | return mx 79 | 80 | 81 | def accuracy(output, labels, output_AUC): 82 | preds = output.max(1)[1].type_as(labels) 83 | 84 | y_true = labels.cpu().numpy() 85 | y_pred = preds.cpu().numpy() 86 | y_score = output_AUC.detach().cpu().numpy() 87 | 88 | recall = sklearn.metrics.recall_score(y_true, y_pred, average='macro') 89 | f1 = sklearn.metrics.f1_score(y_true, y_pred, average='macro') 90 | acc = sklearn.metrics.accuracy_score(y_true, y_pred) 91 | precision = sklearn.metrics.precision_score(y_true, y_pred, average='macro') 92 | 93 | try: 94 | auc = sklearn.metrics.roc_auc_score(y_true, y_score, multi_class='ovr', average='macro') 95 | except ValueError: 96 | auc = 0.0 97 | 98 | return recall, f1, auc, acc, precision 99 | 100 | 101 | 102 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 103 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 104 | indices = torch.from_numpy( 105 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 106 | values = torch.from_numpy(sparse_mx.data) 107 | shape = torch.Size(sparse_mx.shape) 108 | return torch.sparse.FloatTensor(indices, values, shape) 109 | 110 | def add_edges(adj_real, adj_new): 111 | adj = adj_real+adj_new 112 | adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) 113 | adj = normalize(adj + sp.eye(adj.shape[0])) 114 | adj = sparse_mx_to_torch_sparse_tensor(adj) 115 | return adj 116 | 117 | def euclidean_dist(x, y): 118 | m, n = x.size(0), y.size(0) 119 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 120 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 121 | dist = xx + yy 122 | dist.addmm_(1, -2, x, y.t()) 123 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 124 | return dist 125 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ImGKB/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from scipy.sparse import csr_matrix,lil_matrix 4 | import math 5 | 6 | class GraphBatchGenerator: 7 | def __init__(self, config, adj, features, y, index, device = 'cuda'): 8 | 9 | self.batch_size = config.batch_size 10 | self.graph_pooling_type = getattr(config,'graph_pooling_type','average') 11 | self.shuffle = getattr(config,"shuffle", True) 12 | self.device = device 13 | 14 | self.adj = [adj[i] for i in index] 15 | self.features = [features[i] for i in index] 16 | self.y = [y[i] for i in index] 17 | 18 | self.adj_lst = [] 19 | self.features_lst = [] 20 | self.graph_pool_lst = [] 21 | self.graph_indicator_lst = [] 22 | self.y_lst = [] 23 | self.n_valid_batches = 0 24 | 25 | self.generate_batches() 26 | 27 | def generate_batches(self): 28 | N = len(self.y) 29 | if self.shuffle: 30 | index = np.random.permutation(N) 31 | else: 32 | index = np.arange(N, dtype=np.int32) 33 | 34 | n_batches = math.ceil(N / self.batch_size) 35 | 36 | adj_lst_tmp = [] 37 | features_lst_tmp = [] 38 | graph_pool_lst_tmp = [] 39 | graph_indicator_lst_tmp = [] 40 | y_lst_tmp = [] 41 | 42 | nu = 0 43 | 44 | for i in range(0, N, self.batch_size): 45 | n_graphs = min(i + self.batch_size, N) - i 46 | n_nodes = sum(self.adj[index[j]].shape[0] 47 | for j in range(i, i + n_graphs)) 48 | 49 | adj_batch = lil_matrix((n_nodes, n_nodes)) 50 | d_feat = self.features[0].shape[1] 51 | features_batch = np.zeros((n_nodes, d_feat), dtype=np.float32) 52 | 53 | graph_indicator_batch = np.zeros(n_nodes, dtype=np.int64) 54 | y_batch = np.zeros(n_graphs, dtype=np.int64) 55 | graph_pool_batch = lil_matrix((n_graphs, n_nodes)) 56 | 57 | idx = 0 58 | for j in range(i, i + n_graphs): 59 | n = self.adj[index[j]].shape[0] 60 | 61 | adj_batch[idx: idx + n, idx: idx + n] = self.adj[index[j]] 62 | features_batch[idx: idx + n, :] = self.features[index[j]] 63 | 64 | graph_indicator_batch[idx: idx + n] = j - i 65 | 66 | y_batch[j - i] = self.y[index[j]] 67 | 68 | if self.graph_pooling_type == "average": 69 | graph_pool_batch[j - i, idx: idx + n] = 1.0 / n 70 | else: 71 | graph_pool_batch[j - i, idx: idx + n] = 1 72 | 73 | idx += n 74 | 75 | if sum(y_batch) == 0 or sum(y_batch) == n_graphs: 76 | nu += 1 77 | else: 78 | adj_lst_tmp.append(sparse_mx_to_torch_sparse_tensor(adj_batch).to(self.device)) 79 | features_lst_tmp.append(torch.FloatTensor(features_batch).to(self.device)) 80 | graph_pool_lst_tmp.append(sparse_mx_to_torch_sparse_tensor(graph_pool_batch).to(self.device)) 81 | graph_indicator_lst_tmp.append(torch.LongTensor(graph_indicator_batch).to(self.device)) 82 | y_lst_tmp.append(torch.LongTensor(y_batch).to(self.device)) 83 | 84 | self.adj = adj_lst_tmp 85 | self.features = features_lst_tmp 86 | self.graph_pool = graph_pool_lst_tmp 87 | self.graph_indicator = graph_indicator_lst_tmp 88 | self.y = y_lst_tmp 89 | self.n_batches = n_batches - nu 90 | 91 | 92 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 93 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 94 | indices = torch.from_numpy(np.vstack((sparse_mx.row, 95 | sparse_mx.col))).long() 96 | values = torch.from_numpy(sparse_mx.data) 97 | shape = torch.Size(sparse_mx.shape) 98 | return torch.sparse.FloatTensor(indices, values, shape) 99 | 100 | def my_load_data(dataset): 101 | edges = dataset.data.edge_index.numpy() 102 | graph_indicator = [] 103 | for graph_id, data in enumerate(dataset): 104 | num_nodes = data.num_nodes 105 | graph_indicator.extend([graph_id] * num_nodes) 106 | graph_indicator = np.array(graph_indicator) 107 | 108 | A = csr_matrix( 109 | (np.ones(edges.shape[1]), (edges[0, :], edges[1, :])), 110 | shape=(graph_indicator.size, graph_indicator.size) 111 | ) 112 | 113 | X = dataset.data.x.numpy() 114 | labels = [data.y.item() for data in dataset] # shape: G 115 | 116 | _, graph_size = np.unique(graph_indicator, return_counts=True) 117 | adj = [] 118 | features = [] 119 | start_idx = 0 120 | for i in range(len(dataset)): 121 | end_idx = start_idx + graph_size[i] 122 | sub_adj = A[start_idx:end_idx, start_idx:end_idx] 123 | sub_features = X[start_idx:end_idx, :] 124 | 125 | adj.append(sub_adj) 126 | features.append(sub_features) 127 | 128 | start_idx = end_idx 129 | labels = np.array(labels) 130 | return adj, features, labels -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/inforneck.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.nn as nn 4 | 5 | def MI_Est(discriminator, embeddings, positive): 6 | eps = 1e-10 7 | batch_size = embeddings.shape[0] 8 | shuffle_embeddings = positive[torch.randperm(batch_size)] 9 | joint = discriminator(embeddings,positive) 10 | margin = discriminator(embeddings, shuffle_embeddings) 11 | joint = joint + eps 12 | margin = margin + eps 13 | swich = 'Donsker' 14 | if swich=='Donsker': 15 | mi_est = torch.mean(joint) + torch.clamp(torch.log(torch.mean(torch.exp(margin))),-10000,10000) 16 | elif swich=='JSD': 17 | mi_est = -torch.mean(F.softplus(-joint)) - torch.mean(F.softplus(-margin)+margin) 18 | elif swich=='x^2': 19 | mi_est = torch.mean(joint**2) - 0.5* torch.mean((torch.sqrt(margin**2)+1.0)**2) 20 | return mi_est 21 | 22 | class InBo(torch.nn.Module): 23 | def __init__(self, hidden_size): 24 | super(InBo, self).__init__() 25 | 26 | self.input_size = hidden_size 27 | self.hidden_size = hidden_size 28 | self.lin1 = torch.nn.Linear(self.input_size,self.hidden_size) 29 | self.lin2 = torch.nn.Linear(self.hidden_size, 1) 30 | self.relu = torch.nn.ReLU() 31 | self.reset_parameters() 32 | 33 | def reset_parameters(self): 34 | self.lin1.reset_parameters() 35 | self.lin2.reset_parameters() 36 | def forward(self, embeddings,positive): 37 | cat_embeddings = torch.cat((embeddings, positive),dim = 1) 38 | pre = self.relu(self.lin1(cat_embeddings)) 39 | pre = self.relu(self.lin2(pre)) 40 | return pre 41 | 42 | class Attention(nn.Module): 43 | def __init__(self, in_size, hidden_size=16): 44 | super(Attention, self).__init__() 45 | 46 | self.project = nn.Sequential( 47 | nn.Linear(in_size, hidden_size), 48 | nn.ReLU(), 49 | nn.Linear(hidden_size, 1, bias=False) 50 | ) 51 | def forward(self, z): 52 | w = self.project(z) 53 | beta = torch.softmax(w, dim=1) 54 | return (beta * z).sum(1), beta -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/kernel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.parameter import Parameter 4 | 5 | class KerRW(nn.Module): 6 | def __init__(self, max_step, hidden_graphs, size_hidden_graphs, hidden_dim, device): 7 | super(KerRW, self).__init__() 8 | self.max_step = max_step 9 | self.hidden_graphs = hidden_graphs 10 | self.size_hidden_graphs = size_hidden_graphs 11 | self.device = device 12 | self.adj_hidden = Parameter( 13 | torch.FloatTensor(hidden_graphs, (size_hidden_graphs * (size_hidden_graphs - 1)) // 2)) 14 | self.features_hidden = Parameter(torch.FloatTensor(hidden_graphs, size_hidden_graphs, hidden_dim)) 15 | self.bn = nn.BatchNorm1d(hidden_graphs * max_step) 16 | self.relu = nn.ReLU() 17 | self.sigmoid = nn.Sigmoid() 18 | self.init_weights() 19 | 20 | def init_weights(self): 21 | nn.init.kaiming_normal_(self.adj_hidden) 22 | nn.init.kaiming_normal_(self.features_hidden) 23 | 24 | def forward(self, features): 25 | 26 | adj_hidden_norm = torch.zeros(self.hidden_graphs, self.size_hidden_graphs, self.size_hidden_graphs).to(self.device) 27 | idx = torch.triu_indices(self.size_hidden_graphs, self.size_hidden_graphs, 1) 28 | adj_hidden_norm[:, idx[0], idx[1]] = self.relu(self.adj_hidden) 29 | adj_hidden_norm = adj_hidden_norm + torch.transpose(adj_hidden_norm, 1, 2) 30 | x_o = features 31 | z = self.features_hidden 32 | zx = torch.einsum("abc,dc->abd", (z, x_o)) 33 | out = list() 34 | for i in range(self.max_step): 35 | z = torch.einsum("abc,acd->abd", (adj_hidden_norm, z)) 36 | t = torch.einsum("abc,dc->abd", (z, x_o)) 37 | t = torch.mul(zx, t) 38 | t = torch.sum(t, dim=1) 39 | t = torch.transpose(t, 0, 1) #N X m 40 | out.append(t) 41 | out_rw = torch.cat(out, dim=1) 42 | out_att = torch.stack(out, dim=1) 43 | return out_rw, out_att -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | def focal_loss(logits, labels, alpha=None, gamma=2): 6 | """Compute the focal loss between `logits` and the ground truth `labels`. 7 | Focal loss = -alpha_t * (1-pt)^gamma * log(pt) 8 | where pt is the probability of being classified to the true class. 9 | pt = p (if true class), otherwise pt = 1 - p. p = sigmoid(logit). 10 | Args: 11 | logits: A float tensor of size [batch, num_classes]. 12 | labels: A float tensor of size [batch, num_classes]. 13 | alpha: A float tensor of size [batch_size] 14 | specifying per-example weight for balanced cross entropy. 15 | gamma: A float scalar modulating loss from hard and easy examples. 16 | Returns: 17 | focal_loss: A float32 scalar representing normalized total loss. 18 | """ 19 | bc_loss = F.binary_cross_entropy_with_logits(input=logits, target=labels.float(), reduction="none") 20 | logits = torch.clamp(logits, 0.001, 0.999) 21 | 22 | if gamma == 0.0: 23 | modulator = 1.0 24 | else: 25 | modulator = torch.exp(-gamma * labels * logits - gamma * torch.log(1 + torch.exp(-1.0 * logits))) 26 | 27 | loss = modulator * bc_loss 28 | 29 | if alpha is not None: 30 | weighted_loss = alpha * loss 31 | focal_loss = torch.sum(weighted_loss) 32 | else: 33 | focal_loss = torch.sum(loss) 34 | 35 | focal_loss /= torch.sum(labels) 36 | return focal_loss 37 | 38 | 39 | class Loss(torch.nn.Module): 40 | def __init__( 41 | self, 42 | loss_type: str = "cross_entropy", 43 | beta: float = 0.999, 44 | fl_gamma=2, 45 | samples_per_class=None, 46 | class_balanced=True, 47 | ): 48 | """ 49 | Compute the Class Balanced Loss between `logits` and the ground truth `labels`. 50 | Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits) 51 | where Loss is one of the standard losses used for Neural Networks. 52 | reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Cui_Class-Balanced_Loss_Based_on_Effective_Number_of_Samples_CVPR_2019_paper.pdf 53 | Args: 54 | loss_type: string. One of "focal_loss", "cross_entropy", 55 | "binary_cross_entropy", "softmax_binary_cross_entropy". 56 | beta: float. Hyperparameter for Class balanced loss. 57 | fl_gamma: float. Hyperparameter for Focal loss. 58 | samples_per_class: A python list of size [num_classes]. 59 | Required if class_balance is True. 60 | class_balanced: bool. Whether to use class balanced loss. 61 | Returns: 62 | Loss instance 63 | """ 64 | super(Loss, self).__init__() 65 | 66 | # if class_balanced is True and samples_per_class is None: 67 | # raise ValueError("samples_per_class cannot be None when class_balanced is True") 68 | 69 | self.loss_type = loss_type 70 | self.beta = beta 71 | self.fl_gamma = fl_gamma 72 | self.samples_per_class = samples_per_class 73 | self.class_balanced = class_balanced 74 | 75 | def forward( 76 | self, 77 | logits: torch.tensor, 78 | labels: torch.tensor, 79 | ): 80 | """ 81 | Compute the Class Balanced Loss between `logits` and the ground truth `labels`. 82 | Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits) 83 | where Loss is one of the standard losses used for Neural Networks. 84 | Args: 85 | logits: A float tensor of size [batch, num_classes]. 86 | labels: An int tensor of size [batch]. 87 | Returns: 88 | cb_loss: A float tensor representing class balanced loss 89 | """ 90 | 91 | batch_size = logits.size(0) 92 | num_classes = logits.size(1) 93 | # labels_one_hot= labels 94 | labels_one_hot = F.one_hot(labels, num_classes) 95 | 96 | self.samples_per_class = labels_one_hot.sum(axis=0).cpu().numpy().tolist() 97 | 98 | if self.class_balanced: 99 | effective_num = 1.0 - np.power(self.beta, self.samples_per_class) 100 | weights = (1.0 - self.beta) / np.array(effective_num) 101 | weights = weights / np.sum(weights) * num_classes 102 | weights = torch.tensor(weights, device=logits.device).float() 103 | # print('weight= ',weights) 104 | 105 | if self.loss_type != "cross_entropy": 106 | weights = weights.unsqueeze(0) 107 | weights = weights.repeat(batch_size, 1) * labels_one_hot 108 | weights = weights.sum(1) 109 | weights = weights.unsqueeze(1) 110 | weights = weights.repeat(1, num_classes) 111 | else: 112 | weights = None 113 | 114 | if self.loss_type == "focal_loss": 115 | cb_loss = focal_loss(logits, labels_one_hot, alpha=weights, gamma=self.fl_gamma) 116 | elif self.loss_type == "cross_entropy": 117 | cb_loss = F.cross_entropy(input=logits, target=labels, weight=weights) 118 | elif self.loss_type == "binary_cross_entropy": 119 | cb_loss = F.binary_cross_entropy_with_logits(input=logits, target=labels_one_hot, weight=weights) 120 | elif self.loss_type == "softmax_binary_cross_entropy": 121 | pred = logits.softmax(dim=1) 122 | cb_loss = F.binary_cross_entropy(input=pred, target=labels_one_hot.to(torch.float32), weight=weights) 123 | return cb_loss -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.parameter import Parameter 4 | from IGL_Bench.algorithm.ImGKB.layers import graph_convolution, MLP 5 | from torch_geometric.nn import global_mean_pool 6 | from IGL_Bench.algorithm.ImGKB.inforneck import InBo, MI_Est, Attention 7 | from IGL_Bench.algorithm.ImGKB.kernel import KerRW 8 | import torch.nn.functional as F 9 | 10 | class KGIB(nn.Module): 11 | def __init__(self, input_dim, hidden_dim, hidden_graphs, size_hidden_graphs, 12 | nclass, max_step, num_layers, backbone, device='cuda'): 13 | super(KGIB, self).__init__() 14 | self.num_layers = num_layers 15 | self.device = device 16 | self.relu = nn.ReLU() 17 | self.ker_layers = torch.nn.ModuleList() 18 | self.batch_norms = torch.nn.ModuleList() 19 | self.bn = nn.BatchNorm1d(hidden_graphs * max_step) 20 | self.infoneck = InBo(hidden_dim + hidden_graphs) 21 | self.atten = Attention(hidden_graphs) 22 | self.linear_transform_in = nn.Sequential(nn.Linear(input_dim, 32), nn.Dropout(0.5),self.relu, nn.Linear(32, hidden_dim),self.relu) 23 | self.linear_transform_out = nn.Sequential(nn.Linear(hidden_graphs * max_step, hidden_graphs * max_step), 24 | self.relu, nn.Linear(hidden_graphs * max_step, 2)) 25 | self.mlp_1 = nn.Linear(self.num_layers*hidden_graphs * max_step + hidden_dim, hidden_dim) 26 | self.mlp_2 = nn.Linear(hidden_dim, 2) 27 | self.GNN_features = graph_convolution(hidden_dim, hidden_dim, hidden_dim, device, backbone) 28 | self.conv = nn.ModuleList([self.GNN_features for _ in range(self.num_layers)]) 29 | for layer in range(self.num_layers): 30 | self.ker_layers.append(KerRW(max_step, hidden_graphs, size_hidden_graphs, hidden_dim, self.device)) 31 | self.linears_prediction = torch.nn.ModuleList() 32 | num_mlp_layers = 2 33 | hidden_dim1 = hidden_graphs * max_step 34 | for layer in range(self.num_layers + 1): 35 | if layer == 0: 36 | self.linears_prediction.append(MLP(num_mlp_layers, hidden_dim, hidden_dim, nclass)) 37 | else: 38 | self.linears_prediction.append(MLP(num_mlp_layers, hidden_dim1, hidden_dim, nclass)) 39 | 40 | def forward(self, adj, features, graph_indicator): 41 | h = self.linear_transform_in(features) 42 | graph_embs = global_mean_pool(h, graph_indicator) 43 | hidden_rep = [graph_embs] 44 | loss_mi = 0 45 | for layer in range(self.num_layers): 46 | h = self.conv[layer](h, adj) 47 | h_g = global_mean_pool(h,graph_indicator) 48 | h1, h_att = self.ker_layers[layer](h_g) 49 | h_a,_ =self.atten(h_att) 50 | loss_mi += MI_Est(self.infoneck, h_g, h_a) # I(H_G, H^_G) 51 | hidden_rep.append(h1) 52 | 53 | # I(Y, H^_G) 54 | score_over_layer = 0 55 | for layer, h in enumerate(hidden_rep): 56 | score_over_layer += self.linears_prediction[layer](h) 57 | 58 | return score_over_layer, loss_mi -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ImGKB/util.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, balanced_accuracy_score 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | class AverageMeter(object): 6 | """Computes and stores the average and current value""" 7 | def __init__(self): 8 | self.reset() 9 | def reset(self): 10 | self.val = 0 11 | self.avg = 0 12 | self.sum = 0 13 | self.count = 0 14 | def update(self, val, n=1): 15 | self.val = val 16 | self.sum += val * n 17 | self.count += n 18 | self.avg = self.sum / self.count 19 | 20 | def Roc_F(logits, labels, pre='valid'): 21 | if labels.max() > 1:#require set(labels) to be the same as columns of logits 22 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr') 23 | else: 24 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro') 25 | 26 | macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro') 27 | 28 | return auc_score, macro_F 29 | 30 | def compute_metrics(logits, labels): 31 | preds = torch.argmax(logits, dim=-1) 32 | 33 | acc = accuracy_score(labels.detach().cpu(), preds.detach().cpu()) 34 | 35 | bacc = balanced_accuracy_score(labels.detach().cpu(), preds.detach().cpu()) 36 | 37 | mf1 = f1_score(labels.detach().cpu(), preds.detach().cpu(), average='macro') 38 | 39 | if labels.max() > 1: 40 | auc_roc = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr') 41 | else: 42 | auc_roc = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:, 1].detach().cpu(), average='macro') 43 | 44 | return acc, bacc, mf1, auc_roc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/PASTEL/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/PASTEL/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/PASTEL/cal.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import numpy as np 3 | import multiprocessing as mp 4 | import math 5 | import torch 6 | 7 | def cal_shortest_path_distance(adj, approximate, n_nodes): 8 | Adj = adj.detach().cpu().numpy() 9 | G = nx.from_numpy_array(Adj) 10 | G.edges(data=True) 11 | dists_array = np.zeros((n_nodes, n_nodes)) 12 | dists_dict = all_pairs_shortest_path_length_parallel(G, cutoff=approximate if approximate > 0 else None) 13 | 14 | cnt_disconnected = 0 15 | 16 | for i, node_i in enumerate(G.nodes()): 17 | shortest_dist = dists_dict[node_i] 18 | for j, node_j in enumerate(G.nodes()): 19 | dist = shortest_dist.get(node_j, -1) 20 | if dist == -1: 21 | cnt_disconnected += 1 22 | if dist != -1: 23 | dists_array[node_i, node_j] = dist 24 | return dists_array 25 | 26 | def all_pairs_shortest_path_length_parallel(graph, cutoff=None, num_workers=4): 27 | nodes = list(graph.nodes) 28 | if len(nodes) < 50: 29 | num_workers = int(num_workers / 4) 30 | elif len(nodes) < 400: 31 | num_workers = int(num_workers / 2) 32 | 33 | pool = mp.Pool(processes=num_workers) 34 | results = [pool.apply_async(single_source_shortest_path_length_range, 35 | args=(graph, nodes[int(len(nodes) / num_workers * i):int(len(nodes) / num_workers * (i + 1))], cutoff)) for i in range(num_workers)] 36 | output = [p.get() for p in results] 37 | dists_dict = merge_dicts(output) 38 | pool.close() 39 | pool.join() 40 | return dists_dict 41 | 42 | def single_source_shortest_path_length_range(graph, node_range, cutoff): 43 | dists_dict = {} 44 | for node in node_range: 45 | dists_dict[node] = nx.single_source_shortest_path_length(graph, node, cutoff) # unweighted 46 | return dists_dict 47 | 48 | 49 | 50 | def merge_dicts(dicts): 51 | result = {} 52 | for dictionary in dicts: 53 | result.update(dictionary) 54 | return result 55 | 56 | def cal_group_pagerank_args(pagerank_before, pagerank_after, num_nodes): 57 | node_pair_group_pagerank_mat = rank_group_pagerank(pagerank_before, pagerank_after, num_nodes) # rank 58 | PI = 3.1415926 59 | for i in range(num_nodes): 60 | for j in range(num_nodes): 61 | node_pair_group_pagerank_mat[i][j] = 2 - (math.cos((node_pair_group_pagerank_mat[i][j] / (num_nodes * num_nodes)) * PI) + 1) 62 | 63 | return node_pair_group_pagerank_mat 64 | 65 | def rank_group_pagerank(pagerank_before, pagerank_after, num_nodes): 66 | pagerank_dist = torch.mm(pagerank_before, pagerank_after.transpose(-1, -2)).detach().cpu() 67 | node_pair_group_pagerank_mat = np.zeros((num_nodes, num_nodes)) 68 | node_pair_group_pagerank_mat_list = [] 69 | for i in range(num_nodes): 70 | for j in range(num_nodes): 71 | node_pair_group_pagerank_mat_list.append(pagerank_dist[i, j]) 72 | node_pair_group_pagerank_mat_list = np.array(node_pair_group_pagerank_mat_list) 73 | index = np.argsort(-node_pair_group_pagerank_mat_list) 74 | rank = np.argsort(index) 75 | rank = rank + 1 76 | iter = 0 77 | for i in range(num_nodes): 78 | for j in range(num_nodes): 79 | node_pair_group_pagerank_mat[i][j] = rank[iter] 80 | iter = iter + 1 81 | 82 | return node_pair_group_pagerank_mat 83 | 84 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/PASTEL/eval.py: -------------------------------------------------------------------------------- 1 | class AverageMeter(object): 2 | def __init__(self): 3 | self.history = [] 4 | self.last = None 5 | self.val = 0 6 | self.sum = 0 7 | self.count = 0 8 | 9 | def reset(self): 10 | self.last = self.mean() 11 | self.history.append(self.last) 12 | self.val = 0 13 | self.sum = 0 14 | self.count = 0 15 | 16 | def update(self, val, n=1): 17 | self.val = val 18 | self.sum += val * n 19 | self.count += n 20 | 21 | def mean(self): 22 | if self.count == 0: 23 | return 0. 24 | return self.sum / self.count -------------------------------------------------------------------------------- /IGL_Bench/algorithm/PASTEL/graph_learner.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class GraphLearner(nn.Module): 6 | def __init__(self, input_size, hidden_size, n_nodes, n_class, n_anchors, topk=None, epsilon=None, n_pers=16, device=None): 7 | super(GraphLearner, self).__init__() 8 | self.n_nodes = n_nodes 9 | self.n_class = n_class 10 | self.n_anchors = n_anchors 11 | self.topk = topk 12 | self.epsilon = epsilon 13 | self.device = device 14 | self.input_size=input_size 15 | 16 | self.weight_tensor = torch.Tensor(n_pers, input_size) 17 | self.weight_tensor = nn.Parameter(nn.init.xavier_uniform_(self.weight_tensor)) 18 | 19 | self.weight_tensor_for_pe = torch.Tensor(self.n_anchors, hidden_size) 20 | self.weight_tensor_for_pe = nn.Parameter(nn.init.xavier_uniform_(self.weight_tensor_for_pe)) 21 | 22 | def forward(self, context, position_encoding, gpr_rank, position_flag, ctx_mask=None): 23 | expand_weight_tensor = self.weight_tensor.unsqueeze(1) 24 | if len(context.shape) == 3: 25 | expand_weight_tensor = expand_weight_tensor.unsqueeze(1) 26 | context_fc = context.unsqueeze(0) * expand_weight_tensor 27 | context_norm = F.normalize(context_fc, p=2, dim=-1) 28 | attention = torch.bmm(context_norm, context_norm.transpose(-1, -2)).mean(0) 29 | 30 | if position_flag == 1: 31 | pe_fc = torch.mm(position_encoding, self.weight_tensor_for_pe) 32 | pe_attention = torch.mm(pe_fc, pe_fc.transpose(-1, -2)) 33 | try: 34 | attention = (attention * 0.5 + pe_attention * 0.5) * gpr_rank 35 | except RuntimeError as e: 36 | attention_cpu = attention.to('cpu') 37 | pe_attention_cpu = pe_attention.to('cpu') 38 | gpr_rank = gpr_rank.to('cpu') 39 | attention_cpu = attention_cpu * 0.5 + pe_attention_cpu * 0.5 40 | attention_cpu = attention_cpu * gpr_rank 41 | attention = attention_cpu.to('cuda') 42 | else: 43 | attention = attention * gpr_rank 44 | 45 | markoff_value = 0 46 | 47 | if ctx_mask is not None: 48 | attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(1), markoff_value) 49 | attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(-1), markoff_value) 50 | 51 | if self.epsilon is not None: 52 | if not self.epsilon == 0: 53 | attention = self.build_epsilon_neighbourhood(attention, self.epsilon, markoff_value) 54 | 55 | if self.topk is not None: 56 | attention = self.build_knn_neighbourhood(attention, self.topk, markoff_value) 57 | 58 | return attention 59 | 60 | 61 | def build_knn_neighbourhood(self, attention, topk, markoff_value): 62 | topk = min(topk, attention.size(-1)) 63 | knn_val, knn_ind = torch.topk(attention, topk, dim=-1) 64 | weighted_adjacency_matrix = to_cuda((markoff_value * torch.ones_like(attention)).scatter_(-1, knn_ind, knn_val), self.device) 65 | return weighted_adjacency_matrix 66 | 67 | 68 | def build_epsilon_neighbourhood(self, attention, epsilon, markoff_value): 69 | mask = (attention > epsilon).detach().float() 70 | 71 | try: 72 | weighted_adjacency_matrix = attention * mask + markoff_value * (1 - mask) 73 | except: 74 | attention_np = attention.cpu().detach().numpy() 75 | mask_np = mask.cpu().detach().numpy() 76 | weighted_adjacency_matrix_np = attention_np * mask_np + markoff_value * (1 - mask_np) 77 | weighted_adjacency_matrix = torch.from_numpy(weighted_adjacency_matrix_np).to(self.device) 78 | 79 | return weighted_adjacency_matrix 80 | 81 | def to_cuda(x, device=None): 82 | if device: 83 | x = x.to(device) 84 | return x -------------------------------------------------------------------------------- /IGL_Bench/algorithm/PASTEL/model.py: -------------------------------------------------------------------------------- 1 | from .graph_clf import GraphClf 2 | import torch.nn.functional as F 3 | import torch 4 | from sklearn.metrics import f1_score, accuracy_score,balanced_accuracy_score,roc_auc_score 5 | import numpy as np 6 | import os 7 | 8 | class Model(object): 9 | def __init__(self, config): 10 | self.config = config 11 | self.criterion = F.nll_loss 12 | 13 | self.score_func = accuracy 14 | self.wf1 = wf1 15 | self.mf1 = mf1 16 | self.bacc = bacc 17 | self.auroc = auroc 18 | self.metric_name = 'acc' 19 | 20 | self._init_new_network() 21 | self._init_optimizer() 22 | 23 | def _init_new_network(self): 24 | self.network = GraphClf(self.config) 25 | 26 | def _init_optimizer(self): 27 | self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) 28 | 29 | def save(self, dirname): 30 | params = { 31 | 'state_dict': { 32 | 'network': self.network.state_dict(), 33 | }, 34 | 'config': self.config, 35 | 'dir': dirname, 36 | } 37 | try: 38 | torch.save(params, os.path.join(dirname, "params.saved")) 39 | except BaseException: 40 | print('[ WARN: Saving failed... continuing anyway. ]') 41 | 42 | def init_saved_network(self, saved_dir): 43 | fname = os.path.join(saved_dir, "params.saved") 44 | print('[ Loading saved models %s ]' % fname) 45 | saved_params = torch.load(fname, map_location=lambda storage, loc: storage) 46 | self.state_dict = saved_params['state_dict'] 47 | 48 | self.network = GraphClf(self.config) 49 | 50 | if self.state_dict: 51 | merged_state_dict = self.network.state_dict() 52 | for k, v in self.state_dict['network'].items(): 53 | if k in merged_state_dict: 54 | merged_state_dict[k] = v 55 | self.network.load_state_dict(merged_state_dict) 56 | 57 | def reset_parameters(self): 58 | print("[ Resetting model parameters ]") 59 | # Reinitialize the network 60 | self._init_new_network() 61 | 62 | # Reinitialize optimizer and scheduler 63 | self._init_optimizer() 64 | 65 | def accuracy(labels, output): 66 | preds = output.max(1)[1].type_as(labels) 67 | correct = preds.eq(labels).double() 68 | correct = correct.sum().item() 69 | return correct / len(labels) 70 | 71 | 72 | def wf1(labels, output): 73 | pred = output.cpu().max(1)[1].numpy() 74 | labels = labels.cpu().numpy() 75 | return f1_score(labels, pred, average='weighted') 76 | 77 | 78 | def mf1(labels, output): 79 | pred = output.cpu().max(1)[1].numpy() 80 | labels = labels.cpu().numpy() 81 | return f1_score(labels, pred, average='macro') 82 | 83 | def bacc(labels, output): 84 | pred = output.cpu().max(1)[1].numpy() 85 | labels = labels.cpu().numpy() 86 | return balanced_accuracy_score(labels, pred) 87 | 88 | def auroc(labels, output): 89 | labels = labels.cpu().numpy() 90 | output = output.cpu().detach().numpy() 91 | 92 | n_classes = output.shape[1] 93 | labels_binary = np.eye(n_classes)[labels] 94 | 95 | auroc = roc_auc_score(labels_binary, output, multi_class='ovr', average='macro') 96 | return auroc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/RAWLSGCN/RawlsGCN.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from IGL_Bench.backbone.gcn import GCNLayer 5 | 6 | 7 | class RawlsGCNGraph(nn.Module): 8 | def __init__(self, nfeat, nhid, nclass, dropout): 9 | super(RawlsGCNGraph, self).__init__() 10 | self.gc1 = GCNLayer(nfeat, nhid, bias=True) 11 | self.gc2 = GCNLayer(nhid, nclass, bias=True) 12 | self.dropout = dropout 13 | 14 | def forward(self, x, adj): 15 | x = F.relu(self.gc1(x, adj)) 16 | x = F.dropout(x, self.dropout, training=self.training) 17 | x = self.gc2(x, adj) 18 | return F.log_softmax(x, dim=1) 19 | 20 | 21 | class RawlsGCNGrad(nn.Module): 22 | def __init__(self, nfeat, nhid, nclass, dropout): 23 | super(RawlsGCNGrad, self).__init__() 24 | self.gc1 = GCNLayer(nfeat, nhid, bias=True) 25 | self.gc2 = GCNLayer(nhid, nclass, bias=True) 26 | self.dropout = dropout 27 | # to fix gradient in trainer 28 | self.layers_info = { 29 | "gc1": 0, 30 | "gc2": 1, 31 | } 32 | 33 | def forward(self, x, adj): 34 | pre_act_embs, embs = [], [x] # adding input node features to make index padding consistent 35 | x = self.gc1(x, adj) 36 | x.retain_grad() 37 | pre_act_embs.append(x) 38 | x = F.relu(x) 39 | x = F.dropout(x, self.dropout, training=self.training) 40 | embs.append(x) 41 | 42 | x = self.gc2(x, adj) 43 | x.retain_grad() 44 | pre_act_embs.append(x) 45 | x = F.log_softmax(x, dim=1) 46 | embs.append(x) 47 | return pre_act_embs, embs 48 | 49 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/RAWLSGCN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/RAWLSGCN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/RAWLSGCN/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import torch 4 | 5 | 6 | def encode_onehot(labels): 7 | """Encode label to a one-hot vector.""" 8 | classes = set(labels) 9 | classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)} 10 | labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32) 11 | return labels_onehot 12 | 13 | 14 | def row_normalize(mx): 15 | """Row-normalize sparse matrix.""" 16 | rowsum = np.array(mx.sum(1)) 17 | r_inv = np.power(rowsum, -1).flatten() 18 | r_inv[np.isinf(r_inv)] = 0.0 19 | r_mat_inv = sp.diags(r_inv) 20 | mx = r_mat_inv @ mx 21 | return mx 22 | 23 | 24 | def symmetric_normalize(mat): 25 | """Symmetric-normalize sparse matrix.""" 26 | D = np.asarray(mat.sum(axis=0).flatten()) 27 | D = np.divide(1, D, out=np.zeros_like(D), where=D != 0) 28 | D = sp.diags(np.asarray(D)[0, :]) 29 | D.data = np.sqrt(D.data) 30 | return D @ mat @ D 31 | 32 | 33 | def matrix2tensor(mat): 34 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 35 | mat = mat.tocoo().astype(np.float32) 36 | indices = torch.from_numpy(np.vstack((mat.row, mat.col)).astype(np.int64)) 37 | values = torch.from_numpy(mat.data) 38 | shape = torch.Size(mat.shape) 39 | return torch.sparse_coo_tensor(indices, values, shape) 40 | 41 | 42 | def tensor2matrix(t): 43 | """Convert a torch sparse tensor to a scipy sparse matrix.""" 44 | indices = t.indices() 45 | row, col = indices[0, :].cpu().numpy(), indices[1, :].cpu().numpy() 46 | values = t.values().cpu().numpy() 47 | mat = sp.coo_matrix((values, (row, col)), shape=(t.shape[0], t.shape[1])) 48 | return mat 49 | 50 | 51 | def sparse_to_tuple(sparse_mx): 52 | """Convert sparse matrix to tuple representation.""" 53 | 54 | def to_tuple(mx): 55 | if not sp.isspmatrix_coo(mx): 56 | mx = mx.tocoo() 57 | coords = np.vstack((mx.row, mx.col)).transpose() 58 | values = mx.data 59 | shape = mx.shape 60 | return coords, values, shape 61 | 62 | if isinstance(sparse_mx, list): 63 | for i in range(len(sparse_mx)): 64 | sparse_mx[i] = to_tuple(sparse_mx[i]) 65 | else: 66 | sparse_mx = to_tuple(sparse_mx) 67 | return sparse_mx 68 | 69 | def get_doubly_stochastic(mat): 70 | sk = SinkhornKnopp(max_iter=1000, epsilon=1e-2) 71 | mat = matrix2tensor( 72 | sk.fit(mat) 73 | ) 74 | return mat 75 | 76 | 77 | class SinkhornKnopp: 78 | """ 79 | Sinkhorn-Knopp algorithm to compute doubly stochastic matrix for a non-negative square matrix with total support. 80 | For reference, see original paper: http://msp.org/pjm/1967/21-2/pjm-v21-n2-p14-s.pdf 81 | """ 82 | 83 | def __init__(self, max_iter=1000, epsilon=1e-3): 84 | """ 85 | Args: 86 | max_iter (int): The maximum number of iterations, default is 1000. 87 | epsilon (float): Error tolerance for row/column sum, should be in the range of [0, 1], default is 1e-3. 88 | """ 89 | 90 | assert isinstance(max_iter, int) or isinstance(max_iter, float), ( 91 | "max_iter is not int or float: %r" % max_iter 92 | ) 93 | assert max_iter > 0, "max_iter must be greater than 0: %r" % max_iter 94 | self.max_iter = int(max_iter) 95 | 96 | assert isinstance(epsilon, int) or isinstance(epsilon, float), ( 97 | "epsilon is not of type float or int: %r" % epsilon 98 | ) 99 | assert 0 <= epsilon < 1, ( 100 | "epsilon must be between 0 and 1 exclusive: %r" % epsilon 101 | ) 102 | self.epsilon = epsilon 103 | 104 | def fit(self, mat): 105 | """ 106 | 107 | Args: 108 | mat (scipy.sparse.matrix): The input non-negative square matrix. The matrix must have total support, i.e., 109 | row/column sum must be non-zero. 110 | Returns: 111 | ds_mat (scipy.sparse.matrix): The doubly stochastic matrix of the input matrix. 112 | """ 113 | assert sum(mat.data < 0) == 0 # must be non-negative 114 | assert mat.ndim == 2 # must be a matrix 115 | assert mat.shape[0] == mat.shape[1] # must be square 116 | 117 | max_threshold, min_threshold = 1 + self.epsilon, 1 - self.epsilon 118 | 119 | right = np.ravel(mat.sum(axis=0).flatten()) 120 | right = np.divide(1, right, where=right != 0) 121 | 122 | left = mat @ right 123 | left = np.divide(1, left, out=np.zeros_like(left), where=left != 0) 124 | 125 | for iter in range(self.max_iter): 126 | row_sum = np.ravel(mat.sum(axis=1)).flatten() 127 | col_sum = np.ravel(mat.sum(axis=0)).flatten() 128 | if ( 129 | sum(row_sum < min_threshold) == 0 130 | and sum(row_sum > max_threshold) == 0 131 | and sum(col_sum < min_threshold) == 0 132 | and sum(col_sum > max_threshold) == 0 133 | ): 134 | logger.info( 135 | "Sinkhorn-Knopp - Converged in {iter} iterations.".format(iter=iter) 136 | ) 137 | return mat 138 | 139 | right = left @ mat 140 | right = np.divide(1, right, out=np.zeros_like(right), where=right != 0) 141 | 142 | left = mat @ right 143 | left = np.divide(1, left, out=np.zeros_like(left), where=left != 0) 144 | 145 | right_diag = sp.diags(right) 146 | left_diag = sp.diags(left) 147 | mat = left_diag @ mat @ right_diag 148 | logger.info("Sinkhorn-Knopp - Maximum number of iterations reached.") 149 | return mat 150 | 151 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ReNode/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ReNode/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ReNode/reweight.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn.functional as F 4 | import math 5 | import numpy as np 6 | from IGL_Bench.algorithm.ReNode.util import index2sparse, direct_sparse_eye, compute_degree_matrix, index2dense 7 | 8 | def get_renode_weight(config, data): 9 | ppr_matrix = data.Pi 10 | gpr_matrix = torch.tensor(data.gpr).float() 11 | 12 | base_w = config.rn_base_weight 13 | scale_w = config.rn_scale_weight 14 | nnode = ppr_matrix.size(0) 15 | unlabel_mask = data.train_mask.int().ne(1) 16 | 17 | gpr_sum = torch.sum(gpr_matrix, dim=1) 18 | gpr_rn = gpr_sum.unsqueeze(1) - gpr_matrix 19 | rn_matrix = torch.mm(ppr_matrix, gpr_rn) 20 | 21 | labels = data.y.squeeze() 22 | label_matrix = F.one_hot(labels, gpr_matrix.size(1)).float() 23 | label_matrix[unlabel_mask] = 0 24 | 25 | rn_matrix = torch.sum(rn_matrix * label_matrix, dim=1) 26 | rn_matrix[unlabel_mask] = rn_matrix.max() + 99 27 | 28 | train_size = torch.sum(data.train_mask.int()).item() 29 | totoro_list = rn_matrix.tolist() 30 | id2totoro = {i: totoro_list[i] for i in range(len(totoro_list))} 31 | sorted_totoro = sorted(id2totoro.items(), key=lambda x: x[1], reverse=False) 32 | id2rank = {sorted_totoro[i][0]: i for i in range(nnode)} 33 | totoro_rank = [id2rank[i] for i in range(nnode)] 34 | 35 | rn_weight = [(base_w + 0.5 * scale_w * (1 + math.cos(x * 1.0 * math.pi / (train_size - 1)))) for x in totoro_rank] 36 | rn_weight = torch.from_numpy(np.array(rn_weight)).type(torch.FloatTensor) 37 | rn_weight = rn_weight * data.train_mask.float() 38 | 39 | return rn_weight 40 | 41 | def compute_rn_weight(dataset, config): 42 | target_data = dataset 43 | 44 | train_index = dataset.train_index 45 | num_classes = dataset.y.numpy().max().item() + 1 46 | train_node = [[] for _ in range(num_classes)] 47 | num_classes = torch.max(target_data.y).item() + 1 48 | for class_id in range(num_classes): 49 | class_mask = target_data.y.eq(class_id) 50 | for idx in target_data.train_index: 51 | if class_mask[idx]: 52 | train_node[class_id].append(idx) 53 | 54 | current_dir = os.path.dirname(os.path.abspath(__file__)) 55 | ppr_file = os.path.join(current_dir, '../../../PPR_file', 56 | f"{target_data.data_name}_pagerank.pt") 57 | 58 | if os.path.exists(ppr_file): 59 | target_data.Pi = torch.load(ppr_file) 60 | elif dataset.data_name == 'ogbn-arxiv': 61 | A = index2sparse(target_data.edge_index, target_data.num_nodes) 62 | A = A + direct_sparse_eye(target_data.num_nodes) # Add self-loop 63 | D = compute_degree_matrix(A, target_data.num_nodes) 64 | A_normalized = D @ A @ D 65 | 66 | ppr = torch.ones((target_data.num_nodes, 1)) / target_data.num_nodes 67 | alpha = config.pagerank_prob 68 | 69 | for _ in range(40): # Power iteration 70 | ppr = (1 - alpha) * A_normalized @ ppr + alpha * (torch.ones((target_data.num_nodes, 1)) / target_data.num_nodes) 71 | 72 | target_data.Pi = ppr 73 | else: 74 | pr_prob = 1 - config.pagerank_prob 75 | A = index2dense(target_data.edge_index, target_data.num_nodes) 76 | A_hat = A + torch.eye(A.size(0)) # Add self-loop 77 | D = torch.diag(torch.sum(A_hat, 1)) 78 | D = D.inverse().sqrt() 79 | A_hat = torch.mm(torch.mm(D, A_hat), D) 80 | target_data.Pi = pr_prob * ((torch.eye(A.size(0)) - (1 - pr_prob) * A_hat).inverse()) 81 | target_data.Pi = target_data.Pi.cpu() 82 | 83 | gpr_matrix = [] 84 | for iter_c in range(num_classes): 85 | iter_Pi = target_data.Pi[torch.tensor(train_node[iter_c]).long()] 86 | iter_gpr = torch.mean(iter_Pi, dim=0).squeeze() 87 | gpr_matrix.append(iter_gpr) 88 | 89 | temp_gpr = torch.stack(gpr_matrix, dim=0) 90 | if temp_gpr.dim() == 1: 91 | temp_gpr = temp_gpr.unsqueeze(1) 92 | temp_gpr = temp_gpr.transpose(0, 1) 93 | target_data.gpr = temp_gpr 94 | 95 | rn_weight = get_renode_weight(config, target_data) 96 | 97 | return rn_weight 98 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ReNode/solver.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.backbone.gcn import GCN_node_sparse 2 | from IGL_Bench.algorithm.ReNode.reweight import compute_rn_weight 3 | import torch 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score 7 | 8 | 9 | class ReNode_node_solver: 10 | def __init__(self, config, dataset, device='cuda'): 11 | self.config = config 12 | self.dataset = dataset 13 | self.device = device 14 | 15 | self.rn_weight = compute_rn_weight(dataset, config) 16 | self.rn_weight = self.rn_weight.to(self.device) 17 | self.criterion = torch.nn.CrossEntropyLoss() 18 | 19 | self.model = {} 20 | self.optimizer = {} 21 | self.initializtion() 22 | self.dataset = self.dataset.to(self.device) 23 | 24 | def initializtion(self): 25 | num_classes = self.dataset.y.numpy().max().item() + 1 26 | self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 27 | n_hidden=self.config.hidden_dim, 28 | n_class=num_classes, 29 | n_layer=self.config.n_layer,dropout=self.config.dropout).to(self.device) 30 | 31 | self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) 32 | 33 | def reset_parameters(self): 34 | for model_name, model in self.model.items(): 35 | if hasattr(model, 'reset_parameters'): 36 | model.reset_parameters() 37 | else: 38 | for layer in model.modules(): 39 | if hasattr(layer, 'reset_parameters'): 40 | layer.reset_parameters() 41 | 42 | self.optimizer = {} 43 | for model_name, model in self.model.items(): 44 | self.optimizer[model_name] = torch.optim.Adam( 45 | model.parameters(), 46 | lr=self.config.lr, 47 | weight_decay=self.config.weight_decay 48 | ) 49 | 50 | def train(self): 51 | self.reset_parameters() 52 | num_epochs = getattr(self.config, 'epoch', 500) 53 | patience = getattr(self.config, 'patience', 20) 54 | least_epoch = getattr(self.config, 'least_epoch', 40) 55 | best_val_accuracy = 0 56 | 57 | for epoch in range(1, num_epochs + 1): 58 | self.model['default'].train() 59 | self.optimizer['default'].zero_grad() 60 | 61 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 62 | cls_loss = F.cross_entropy(out[self.dataset.train_mask], self.dataset.y[self.dataset.train_mask],weight=None,reduction='none') 63 | cls_loss = torch.sum(cls_loss * self.rn_weight[self.dataset.train_mask]) / cls_loss.size(0) 64 | 65 | cls_loss.backward() 66 | self.optimizer['default'].step() 67 | 68 | print(f"Epoch [{epoch}/{num_epochs}], Loss: {cls_loss.item():.4f}") 69 | 70 | val_accuracy = self.eval(metric="accuracy") 71 | 72 | if val_accuracy > best_val_accuracy: 73 | best_val_accuracy = val_accuracy 74 | patience_counter = 0 75 | else: 76 | patience_counter += 1 77 | 78 | if patience_counter >= patience and epoch > least_epoch: 79 | print(f"Early stopping at epoch {epoch+1}.") 80 | break 81 | 82 | print("Training Finished!") 83 | 84 | def eval(self, metric="accuracy"): 85 | """ Evaluate the model on the validation or test set using the selected metric. """ 86 | self.model['default'].eval() 87 | all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy() 88 | 89 | with torch.no_grad(): 90 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 91 | predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy() 92 | 93 | if metric == "accuracy": 94 | return accuracy_score(all_labels, predictions) 95 | elif metric == "bacc": 96 | return balanced_accuracy_score(all_labels, predictions) 97 | elif metric == "macro_f1": 98 | return f1_score(all_labels, predictions, average='macro') 99 | else: 100 | raise ValueError(f"Unknown metric: {metric}") 101 | 102 | def test(self): 103 | self.model['default'].eval() 104 | all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy() 105 | 106 | with torch.no_grad(): 107 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 108 | predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy() 109 | probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy() 110 | 111 | accuracy = accuracy_score(all_labels, predictions) 112 | macro_f1 = f1_score(all_labels, predictions, average='macro') 113 | bacc = balanced_accuracy_score(all_labels, predictions) 114 | auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro') 115 | 116 | return accuracy, bacc, macro_f1, auc_roc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/ReNode/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def index2sparse(edge_index, num_nodes): 5 | # edge_index to sparse format 6 | row, col = edge_index 7 | edge_weight = torch.ones(col.size(0), dtype=torch.float32) # assuming edge weight = 1 8 | adj_sparse = torch.sparse_coo_tensor(torch.stack([row, col]), edge_weight, (num_nodes, num_nodes)) 9 | return adj_sparse 10 | 11 | def direct_sparse_eye(n): 12 | indices = torch.arange(n) 13 | indices = torch.stack([indices, indices]) 14 | values = torch.ones(n) 15 | return torch.sparse_coo_tensor(indices, values, (n, n)) 16 | 17 | def compute_degree_matrix(A, num_nodes): 18 | indices = A._indices() 19 | values = A._values() 20 | row_indices = indices[0] 21 | 22 | degree = torch.zeros(num_nodes, dtype=values.dtype) 23 | 24 | for idx, value in zip(row_indices, values): 25 | degree[idx] += value 26 | 27 | degree = degree.pow(-0.5) 28 | 29 | diag_indices = torch.stack([torch.arange(num_nodes), torch.arange(num_nodes)]) 30 | D = torch.sparse_coo_tensor(diag_indices, degree, (num_nodes, num_nodes)) 31 | 32 | return D 33 | 34 | def index2dense(edge_index,nnode=2708): 35 | indx = edge_index.numpy() 36 | adj = np.zeros((nnode,nnode),dtype = 'int8') 37 | adj[(indx[0],indx[1])]=1 38 | new_adj = torch.from_numpy(adj).float() 39 | return new_adj 40 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/SOLTGNN/PatternMemory.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class PatternMemory(nn.Module): 8 | def __init__(self, embeddings_dimension, modelsize = 64): 9 | ''' 10 | num_layers: number of layers in the neural networks (EXCLUDING the input layer). If num_layers=1, this reduces to linear model. 11 | input_dim: dimensionality of input features 12 | hidden_dim: dimensionality of hidden units at ALL layers 13 | output_dim: number of classes for prediction 14 | device: which device to use 15 | ''' 16 | 17 | super(PatternMemory, self).__init__() 18 | 19 | self.know_matrix = nn.Parameter(torch.FloatTensor(modelsize, modelsize)) 20 | self.size = modelsize 21 | self.dim = embeddings_dimension 22 | self.leakyrelu = nn.LeakyReLU(0.2) 23 | 24 | self.Wgama = nn.Parameter(torch.FloatTensor( 25 | 1, modelsize)) 26 | self.Wbeta = nn.Parameter(torch.FloatTensor( 27 | 1, modelsize)) 28 | self.Ugama = nn.Parameter(torch.FloatTensor(embeddings_dimension, modelsize)) 29 | self.Ubeta = nn.Parameter(torch.FloatTensor(embeddings_dimension, modelsize)) 30 | 31 | self.M = nn.Parameter(torch.FloatTensor( 32 | modelsize, embeddings_dimension)) 33 | 34 | self.Eta = nn.Parameter(torch.FloatTensor(modelsize,1)) 35 | 36 | self.reset_parameters() 37 | 38 | def reset_parameters(self): 39 | def reset(tensor): 40 | stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1))) 41 | tensor.data.uniform_(-stdv, stdv) 42 | 43 | reset(self.know_matrix) 44 | reset(self.Wgama) 45 | reset(self.Wbeta) 46 | reset(self.Ugama) 47 | reset(self.Ubeta) 48 | reset(self.M) 49 | reset(self.Eta) 50 | 51 | def forward(self, graph_rep): 52 | x_g = torch.matmul(graph_rep, self.Ugama) 53 | x_g = self.leakyrelu(torch.matmul(x_g.unsqueeze(2), self.Wgama).permute(0,2,1)) 54 | x_b = torch.matmul(graph_rep, self.Ubeta) 55 | x_b = self.leakyrelu(torch.matmul(x_b.unsqueeze(2), self.Wbeta).permute(0,2,1)) 56 | P_q = torch.mul((x_g + 1),self.know_matrix) + x_b 57 | H_q = self.leakyrelu(torch.matmul(P_q,self.M)) 58 | h_q = H_q.permute(0,2,1).matmul(self.Eta).squeeze(2) 59 | 60 | return h_q 61 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/SOLTGNN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/SOLTGNN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/SOLTGNN/sampling.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/SOLTGNN/sampling.zip -------------------------------------------------------------------------------- /IGL_Bench/algorithm/SOLTGNN/subgraph_sample.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | 5 | subgraph_default_border = { 6 | 'PTC_MR': 19, 7 | "PROTEINS": 54, 8 | "IMDB-BINARY": 25, 9 | "DD": 395, 10 | "FRANKENSTEIN": 22, 11 | "REDDIT": 469, 12 | "COLLAB": 91 13 | } 14 | 15 | 16 | def subgraph_sample(dataset, graph_list, nums=500): 17 | np.random.seed(0) 18 | border = subgraph_default_border.get(dataset, 0) 19 | for i in range(len(graph_list)): 20 | if graph_list[i].g.number_of_nodes() >= border: 21 | graph_list[i].nodegroup += 1 22 | sample_path = os.path.join(os.path.dirname(__file__), f'sampling/{dataset}/sampling.txt') 23 | with open(sample_path, 'w') as f: 24 | f.write(str(len(graph_list)) + '\n') 25 | for graph in graph_list: 26 | if graph.nodegroup == 1: 27 | graph.sample_list = [] 28 | graph.unsample_list = [] 29 | graph.sample_x = [] 30 | n = graph.g.number_of_nodes() 31 | K = int(min(border - 1, n / 2)) 32 | f.write(str(K) + '\n') 33 | graph.K = K 34 | for i in range(nums): 35 | sample_idx = np.random.permutation(n) 36 | j = 0 37 | sample_set = set() 38 | wait_set = [] 39 | cnt = 0 40 | if (len(graph.neighbors[j]) == 0): 41 | j += 1 42 | wait_set.append(sample_idx[j]) 43 | while cnt < K: 44 | if len(wait_set) != 0: 45 | x = wait_set.pop() 46 | else: 47 | break 48 | while x in sample_set: 49 | if len(wait_set) != 0: 50 | x = wait_set.pop() 51 | else: 52 | cnt = K 53 | break 54 | sample_set.add(x) 55 | cnt += 1 56 | wait_set.extend(graph.neighbors[x]) 57 | unsample_set = set(range(n)).difference(sample_set) 58 | f.write(str(len(sample_set)) + ' ') 59 | for x in list(sample_set): 60 | f.write(str(x) + ' ') 61 | for x in list(unsample_set): 62 | f.write(str(x) + ' ') 63 | f.write('\n') 64 | else: 65 | f.write('0\n') 66 | 67 | 68 | if __name__ == '__main__': 69 | pass 70 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAILGNN/TailGNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.sparse as sp 5 | 6 | from IGL_Bench.algorithm.TAILGNN.layers import Relation, Relationv2, Generator 7 | from IGL_Bench.backbone.gcn import GCNLayer 8 | 9 | 10 | class TailGCN_SP(nn.Module): 11 | def __init__(self, nfeat, nclass, params, device, ver=1, ablation=0): 12 | super(TailGCN_SP, self).__init__() 13 | 14 | self.device = device 15 | self.nhid = params.hidden 16 | self.dropout = params.dropout 17 | self.ablation = ablation 18 | 19 | # self.rel1 = TransGCN_SP(nfeat, self.nhid, g_sigma=params.g_sigma, ver=ver) 20 | if ver == 1: 21 | self.r1 = Relation(nfeat, ablation=ablation) 22 | else: 23 | self.r1 = Relationv2(nfeat, self.nhid, ablation=ablation) 24 | self.g1 = Generator(nfeat, params.g_sigma, ablation).to(device) 25 | 26 | self.gc1 = GCNLayer(nfeat, self.nhid).to(device) 27 | self.rel2 = TransGCN_SP(self.nhid, nclass, g_sigma=params.g_sigma, ver=ver, ablation=ablation).to(device) 28 | 29 | def forward(self, x, adj, head, adj_self=None, norm=None): 30 | 31 | # rewrite rel1 32 | neighbor = sp.mm(adj, x) 33 | m1 = self.r1(x, neighbor) 34 | 35 | x = x.to(self.device) 36 | m1 = m1.to(self.device) 37 | adj = adj.to(self.device) 38 | adj_self = adj_self.to(self.device) 39 | norm = norm.to(self.device) 40 | 41 | if head or self.ablation == 2: 42 | x1 = self.gc1(x, adj_self, norm=norm) 43 | else: 44 | if self.ablation == 1: 45 | h_s = self.g1(m1) 46 | else: 47 | h_s = m1 48 | 49 | h_s = torch.mm(h_s, self.gc1.weight) 50 | h_k = self.gc1(x, adj_self) 51 | x1 = (h_k + h_s) / (norm + 1) 52 | 53 | x1 = F.elu(x1) 54 | x1 = F.dropout(x1, self.dropout, training=self.training) 55 | 56 | x2, m2 = self.rel2(x1, adj, adj_self, head, norm) 57 | norm_m1 = torch.norm(m1, dim=1) 58 | norm_m2 = torch.norm(m2, dim=1) 59 | 60 | return x2, norm_m1, norm_m2 # , head_prob, tail_prob 61 | 62 | 63 | class TransGCN_SP(nn.Module): 64 | def __init__(self, nfeat, nhid, g_sigma, ver, ablation=0): 65 | super(TransGCN_SP, self).__init__() 66 | 67 | if ver == 1: 68 | self.r = Relation(nfeat, ablation) 69 | else: 70 | self.r = Relationv2(nfeat, nhid, ablation=ablation) 71 | 72 | self.g = Generator(nfeat, g_sigma, ablation) 73 | self.gc = GCNLayer(nfeat, nhid) 74 | self.ablation = ablation 75 | 76 | def forward(self, x, adj, adj_self, head, norm): 77 | 78 | # norm = sp.sum(adj, dim=1).to_dense().view(-1,1) 79 | neighbor = sp.mm(adj, x) 80 | m = self.r(x, neighbor) 81 | 82 | if head or self.ablation == 2: 83 | # norm = sp.sum(adj_self, dim=1).to_dense().view(-1,1) 84 | h_k = self.gc(x, adj_self, norm=norm) 85 | else: 86 | if self.ablation == 1: 87 | h_s = self.g(m) 88 | else: 89 | h_s = m 90 | 91 | h_s = torch.mm(h_s, self.gc.weight) 92 | h_k = self.gc(x, adj_self) 93 | h_k = (h_k + h_s) / (norm + 1) 94 | 95 | return h_k, m 96 | 97 | 98 | class Discriminator(nn.Module): 99 | def __init__(self, in_features): 100 | super(Discriminator, self).__init__() 101 | 102 | self.d = nn.Linear(in_features, in_features, bias=True) 103 | self.wd = nn.Linear(in_features, 1, bias=False) 104 | self.sigmoid = nn.Sigmoid() 105 | 106 | ''' 107 | def weight_init(self, m): 108 | if isinstance(m, Parameter): 109 | torch.nn.init.xavier_uniform_(m.weight.data) 110 | 111 | if isinstance(m, nn.Linear): 112 | stdv = 1. / math.sqrt(m.weight.size(1)) 113 | torch.nn.init.xavier_uniform_(m.weight.data) 114 | if m.bias is not None: 115 | m.bias.data.uniform_(-stdv, stdv) 116 | ''' 117 | 118 | def forward(self, ft): 119 | ft = F.elu(ft) 120 | ft = F.dropout(ft, 0.5, training=self.training) 121 | 122 | fc = F.elu(self.d(ft)) 123 | prob = self.wd(fc) 124 | 125 | return self.sigmoid(prob) 126 | 127 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAILGNN/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TAILGNN/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAILGNN/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.nn.parameter import Parameter 4 | import torch.nn.functional as F 5 | import math 6 | 7 | 8 | class Relation(nn.Module): 9 | def __init__(self, in_features, ablation): 10 | super(Relation, self).__init__() 11 | 12 | self.gamma_1 = nn.Linear(in_features, in_features, bias=False) 13 | self.gamma_2 = nn.Linear(in_features, in_features, bias=False) 14 | 15 | self.beta_1 = nn.Linear(in_features, in_features, bias=False) 16 | self.beta_2 = nn.Linear(in_features, in_features, bias=False) 17 | 18 | self.r = Parameter(torch.FloatTensor(1, in_features)) 19 | 20 | self.elu = nn.ELU() 21 | self.lrelu = nn.LeakyReLU(0.2) 22 | 23 | self.sigmoid = nn.Sigmoid() 24 | self.reset_parameter() 25 | self.ablation = ablation 26 | 27 | def reset_parameter(self): 28 | stdv = 1. / math.sqrt(self.r.size(1)) 29 | self.r.data.uniform_(-stdv, stdv) 30 | 31 | def forward(self, ft, neighbor): 32 | 33 | if self.ablation == 3: 34 | self.m = ft + self.r - neighbor 35 | else: 36 | gamma = self.gamma_1(ft) + self.gamma_2(neighbor) 37 | gamma = self.lrelu(gamma) + 1.0 38 | 39 | beta = self.beta_1(ft) + self.beta_2(neighbor) 40 | beta = self.lrelu(beta) 41 | 42 | self.r_v = gamma * self.r + beta 43 | 44 | # transE 45 | self.m = ft + self.r_v - neighbor 46 | ''' 47 | #transH 48 | norm = F.normalize(self.r_v) 49 | h_ft = ft - norm * torch.sum((norm * ft), dim=1, keepdim=True) 50 | h_neighbor = neighbor - norm * torch.sum((norm * neighbor), dim=1, keepdim=True) 51 | self.m = h_ft - h_neighbor 52 | ''' 53 | return self.m # F.normalize(self.m) 54 | 55 | 56 | class Relationv2(nn.Module): 57 | def __init__(self, in_features, out_features, ablation=0): 58 | super(Relationv2, self).__init__() 59 | 60 | self.gamma1_1 = nn.Linear(in_features, out_features, bias=False) 61 | self.gamma1_2 = nn.Linear(out_features, in_features, bias=False) 62 | 63 | self.gamma2_1 = nn.Linear(in_features, out_features, bias=False) 64 | self.gamma2_2 = nn.Linear(out_features, in_features, bias=False) 65 | 66 | self.beta1_1 = nn.Linear(in_features, out_features, bias=False) 67 | self.beta1_2 = nn.Linear(out_features, in_features, bias=False) 68 | 69 | self.beta2_1 = nn.Linear(in_features, out_features, bias=False) 70 | self.beta2_2 = nn.Linear(out_features, in_features, bias=False) 71 | 72 | self.r = Parameter(torch.FloatTensor(1, in_features)) 73 | 74 | self.ablation = ablation 75 | self.elu = nn.ELU() 76 | self.lrelu = nn.LeakyReLU(0.2) 77 | self.sigmoid = nn.Sigmoid() 78 | self.reset_parameter() 79 | 80 | def weight_init(self, m): 81 | return 82 | 83 | def reset_parameter(self): 84 | stdv = 1. / math.sqrt(self.r.size(1)) 85 | self.r.data.uniform_(-stdv, stdv) 86 | 87 | def forward(self, ft, neighbor): 88 | 89 | if self.ablation == 3: 90 | self.m = ft + self.r - neighbor 91 | else: 92 | 93 | gamma1 = self.gamma1_2(self.gamma1_1(ft)) 94 | gamma2 = self.gamma2_2(self.gamma2_1(neighbor)) 95 | gamma = self.lrelu(gamma1 + gamma2) + 1.0 96 | 97 | beta1 = self.beta1_2(self.beta1_1(ft)) 98 | beta2 = self.beta2_2(self.beta2_1(neighbor)) 99 | beta = self.lrelu(beta1 + beta2) 100 | 101 | self.r_v = gamma * self.r + beta 102 | self.m = ft + self.r_v - neighbor 103 | 104 | return F.normalize(self.m) 105 | 106 | 107 | class Generator(nn.Module): 108 | def __init__(self, in_features, std, ablation): 109 | super(Generator, self).__init__() 110 | 111 | self.g = nn.Linear(in_features, in_features, bias=True) 112 | self.std = std 113 | self.ablation = ablation 114 | 115 | def forward(self, ft): 116 | # h_s = ft 117 | if self.training: 118 | # if self.ablation == 2: 119 | mean = torch.zeros(ft.shape, device='cuda') 120 | ft = torch.normal(mean, 1.) 121 | # else: 122 | # ft = torch.normal(ft, self.std) 123 | h_s = F.elu(self.g(ft)) 124 | 125 | return h_s -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAILGNN/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import torch 4 | 5 | 6 | def link_dropout(adj, idx, k=5): 7 | tail_adj = adj.copy() 8 | num_links = np.random.randint(k, size=idx.shape[0]) 9 | num_links += 1 10 | 11 | for i in range(idx.shape[0]): 12 | index = tail_adj[idx[i]].nonzero()[1] 13 | new_idx = np.random.choice(index, min(len(index), num_links[i]), replace=False) 14 | tail_adj[idx[i]] = 0.0 15 | for j in new_idx: 16 | tail_adj[idx[i], j] = 1.0 17 | return tail_adj 18 | 19 | def normalize(mx): 20 | """Row-normalize sparse matrix""" 21 | rowsum = np.array(mx.sum(1)) 22 | rowsum = np.where(rowsum==0, 1, rowsum) 23 | r_inv = np.power(rowsum, -1).flatten() 24 | r_inv[np.isinf(r_inv)] = 0. 25 | r_mat_inv = sp.diags(r_inv) 26 | mx = r_mat_inv.dot(mx) 27 | 28 | return mx 29 | 30 | def convert_sparse_tensor(sparse_mx): 31 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 32 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 33 | indices = torch.from_numpy( 34 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 35 | values = torch.from_numpy(sparse_mx.data) 36 | shape = torch.Size(sparse_mx.shape) 37 | return torch.sparse.FloatTensor(indices, values, shape) 38 | 39 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAM/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TAM/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAM/solver.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.backbone.gcn import GCN_node_sparse 2 | from IGL_Bench.algorithm.TAM.tam import * 3 | import torch 4 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score 5 | 6 | class TAM_node_solver: 7 | def __init__(self, config, dataset, device='cuda'): 8 | self.config = config 9 | self.dataset = dataset 10 | self.device = device 11 | 12 | num_classes = self.dataset.y.numpy().max().item() + 1 13 | stats = dataset.y[dataset.train_mask] 14 | n_data = [] 15 | for i in range(num_classes): 16 | data_num = (stats == i).sum() 17 | n_data.append(int(data_num.item())) 18 | self.class_num_list = n_data 19 | 20 | self.criterion = torch.nn.CrossEntropyLoss() 21 | 22 | self.model = {} 23 | self.optimizer = {} 24 | self.initializtion() 25 | self.aggregator = MeanAggregation() 26 | 27 | self.model['default'] = self.model['default'].to(device) 28 | self.dataset = self.dataset.to(self.device) 29 | 30 | def initializtion(self): 31 | num_classes = self.dataset.y.numpy().max().item() + 1 32 | self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 33 | n_hidden=self.config.hidden_dim, 34 | n_class=num_classes, 35 | n_layer=self.config.n_layer,dropout=self.config.dropout) 36 | 37 | self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) 38 | 39 | def reset_parameters(self): 40 | for model_name, model in self.model.items(): 41 | if hasattr(model, 'reset_parameters'): 42 | model.reset_parameters() 43 | else: 44 | for layer in model.modules(): 45 | if hasattr(layer, 'reset_parameters'): 46 | layer.reset_parameters() 47 | 48 | self.optimizer = {} 49 | for model_name, model in self.model.items(): 50 | self.optimizer[model_name] = torch.optim.Adam( 51 | model.parameters(), 52 | lr=self.config.lr, 53 | weight_decay=self.config.weight_decay 54 | ) 55 | 56 | def train(self): 57 | self.reset_parameters() 58 | num_epochs = getattr(self.config, 'epoch', 500) 59 | patience = getattr(self.config, 'patience', 10) 60 | least_epoch = getattr(self.config, 'least_epoch', 100) 61 | 62 | best_loss = float('inf') 63 | patience_counter = 0 64 | best_val_accuracy = 0 65 | 66 | for epoch in range(1, num_epochs + 1): 67 | self.model['default'].train() 68 | self.optimizer['default'].zero_grad() 69 | 70 | output = self.model['default'](self.dataset.x, self.dataset.edge_index) 71 | output = adjust_output(self.config, output, self.dataset.edge_index, self.dataset.y, \ 72 | self.dataset.train_mask, self.aggregator, self.class_num_list, epoch) 73 | 74 | loss = self.criterion(output, self.dataset.y[self.dataset.train_mask]) 75 | loss.backward() 76 | self.optimizer['default'].step() 77 | 78 | print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}") 79 | 80 | val_accuracy = self.eval(metric="accuracy") 81 | 82 | if val_accuracy > best_val_accuracy: 83 | best_val_accuracy = val_accuracy 84 | patience_counter = 0 85 | else: 86 | patience_counter += 1 87 | 88 | if patience_counter >= patience and epoch > least_epoch: 89 | print(f"Early stopping at epoch {epoch+1}.") 90 | break 91 | 92 | print("Training Finished!") 93 | 94 | def eval(self, metric="accuracy"): 95 | """ Evaluate the model on the validation or test set using the selected metric. """ 96 | self.model['default'].eval() 97 | all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy() 98 | 99 | with torch.no_grad(): 100 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 101 | predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy() 102 | 103 | if metric == "accuracy": 104 | return accuracy_score(all_labels, predictions) 105 | elif metric == "bacc": 106 | return balanced_accuracy_score(all_labels, predictions) 107 | elif metric == "macro_f1": 108 | return f1_score(all_labels, predictions, average='macro') 109 | else: 110 | raise ValueError(f"Unknown metric: {metric}") 111 | 112 | def test(self): 113 | self.model['default'].eval() 114 | all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy() 115 | 116 | with torch.no_grad(): 117 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 118 | predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy() 119 | probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy() 120 | 121 | accuracy = accuracy_score(all_labels, predictions) 122 | macro_f1 = f1_score(all_labels, predictions, average='macro') 123 | bacc = balanced_accuracy_score(all_labels, predictions) 124 | auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro') 125 | 126 | return accuracy, bacc, macro_f1, auc_roc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TAM/tam.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch_scatter import scatter_add 6 | from torch_geometric.utils import to_dense_batch 7 | from torch_geometric.nn import MessagePassing 8 | from torch_geometric.utils import add_self_loops, degree 9 | 10 | 11 | ## Jensen-Shanon Divergence ## 12 | def compute_jsd(dist1, dist2): 13 | dist_mean = (dist1 + dist2) / 2. 14 | jsd = (F.kl_div(dist_mean.log(), dist1, reduction = 'none') + F.kl_div(dist_mean.log(), dist2, reduction = 'none')) / 2. 15 | return jsd 16 | 17 | 18 | ## TAM ## 19 | @torch.no_grad() 20 | def compute_tam(output, edge_index, label, train_mask, aggregator, class_num_list=None, temp_phi = None, temp_gamma = None): 21 | n_cls = label.max().item() + 1 22 | if label.dim()==2: 23 | label = label.squeeze() 24 | # Apply class-wise temperature 25 | cls_num_list = torch.FloatTensor(class_num_list).to(output.device) 26 | cls_num_ratio = cls_num_list / cls_num_list.sum() 27 | cls_num_ratio = cls_num_ratio * temp_gamma + (1- temp_gamma) 28 | max_beta = torch.max(cls_num_ratio) 29 | cls_temperature = (temp_phi * (cls_num_ratio + 1 - max_beta)).unsqueeze(0) 30 | temp = 1 / cls_temperature 31 | 32 | # Predict unlabeled nodes 33 | agg_out = F.softmax(output.clone().detach()/temp, dim=1) 34 | agg_out[train_mask] = F.one_hot(label[train_mask].clone(), num_classes=n_cls).float() # only use labeled nodes 35 | neighbor_dist = aggregator(agg_out, edge_index)[train_mask] # (# of labeled nodes, # of classes) 36 | 37 | # Compute class-wise connectivity matrix 38 | connectivity_matrix= [] 39 | for c in range(n_cls): 40 | c_mask = (label[train_mask] == c) 41 | connectivity_matrix.append(neighbor_dist[c_mask].mean(dim=0)) 42 | connectivity_matrix= torch.stack(connectivity_matrix, dim=0) 43 | 44 | # Preprocess class-wise connectivity matrix and NLD for numerical stability 45 | center_mask = F.one_hot(label[train_mask].clone(), num_classes=n_cls).bool() 46 | neighbor_dist[neighbor_dist<1e-6] = 1e-6 47 | connectivity_matrix[connectivity_matrix<1e-6] = 1e-6 48 | 49 | # Compute ACM 50 | acm = (neighbor_dist[center_mask].unsqueeze(dim=1) / torch.diagonal(connectivity_matrix).unsqueeze(dim=1)[label[train_mask]]) \ 51 | * (connectivity_matrix[label[train_mask]] / neighbor_dist) 52 | acm[acm>1] = 1 53 | acm[center_mask] = 1 54 | 55 | # Compute ADM 56 | cls_pair_jsd = compute_jsd(connectivity_matrix.unsqueeze(dim=0), connectivity_matrix.unsqueeze(dim=1)).sum(dim=-1) # distance between classes 57 | cls_pair_jsd[cls_pair_jsd<1e-6] = 1e-6 58 | self_kl = compute_jsd(neighbor_dist, connectivity_matrix[label[train_mask]]).sum(dim=-1,keepdim=True) # devation from self-class averaged nld 59 | neighbor_kl = compute_jsd(neighbor_dist.unsqueeze(1),connectivity_matrix.unsqueeze(0)).sum(dim=-1) # distance between node nld and each class averaged nld 60 | adm = (self_kl**2 + (cls_pair_jsd**2)[label[train_mask]] - neighbor_kl**2) / (2*(cls_pair_jsd**2)[label[train_mask]]) 61 | 62 | adm[center_mask] = 0 63 | 64 | return acm, adm 65 | 66 | 67 | def adjust_output(args, output, edge_index, label, train_mask, aggregator, class_num_list, epoch): 68 | """ 69 | Adjust the margin of each labeled nodes according to local topolgy 70 | Input: 71 | args: hyperparameters for TAM 72 | output: model prediction for whole nodes (include unlabeled nodes); [# of nodes, # of classes] 73 | edge_index: ; [2, # of nodes] 74 | label: ; [# of nodes] 75 | train_mask: ; [# of nodes] 76 | aggregator: function (below) 77 | class_num_list: the number of nodes for each class; [# of classes] 78 | epoch: current epoch; integer 79 | Output: 80 | output: adjusted logits 81 | """ 82 | 83 | # Compute ACM and ADM 84 | if args.tam and epoch > args.warmup: 85 | acm, adm = compute_tam(output, edge_index, label, train_mask, aggregator, \ 86 | class_num_list=class_num_list, temp_phi = args.temp_phi, temp_gamma = 0.4) 87 | 88 | output = output[train_mask] 89 | # Adjust outputs 90 | if args.tam and epoch > args.warmup: 91 | acm = acm.log() 92 | adm = - adm 93 | output = output + args.tam_alpha*acm + args.tam_beta*adm 94 | 95 | return output 96 | 97 | 98 | class MeanAggregation(MessagePassing): 99 | def __init__(self): 100 | super(MeanAggregation, self).__init__(aggr='mean') 101 | 102 | def forward(self, x, edge_index): 103 | # x has shape [N, in_channels] 104 | # edge_index has shape [2, E] 105 | 106 | # Step 1: Add self-loops to the adjacency matrix. 107 | _edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0)) 108 | 109 | # Step 4-5: Start propagating messages. 110 | return self.propagate(_edge_index, x=x) -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TOPOAUC/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TOPOAUC/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TOPOAUC/cal.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | def compute_ppr_and_gpr(dataset, pr_prob): 4 | def index2dense(edge_index, num_nodes): 5 | A = torch.zeros((num_nodes, num_nodes), device=edge_index.device) 6 | A[edge_index[0], edge_index[1]] = 1 7 | return A 8 | 9 | edge_index = dataset.edge_index 10 | num_nodes = dataset.num_nodes 11 | labels = dataset.y 12 | train_index = torch.tensor(dataset.train_index, device=edge_index.device, dtype=torch.long) 13 | 14 | num_classes = labels.max().item() + 1 15 | train_nodes_per_class = [] 16 | for cls in range(num_classes): 17 | class_train_nodes = train_index[labels[train_index] == cls].tolist() 18 | train_nodes_per_class.append(class_train_nodes) 19 | 20 | A = index2dense(edge_index, num_nodes) 21 | A_hat = A + torch.eye(num_nodes, device=edge_index.device) 22 | D = torch.diag(torch.sum(A_hat, dim=1)) 23 | D = D.inverse().sqrt() 24 | A_hat = torch.mm(torch.mm(D, A_hat), D) 25 | 26 | I = torch.eye(num_nodes, device=edge_index.device) 27 | PPR = pr_prob * ((I - (1 - pr_prob) * A_hat).inverse()) 28 | 29 | gpr_matrix = [] 30 | for class_nodes in train_nodes_per_class: 31 | class_nodes_tensor = torch.tensor(class_nodes, device=edge_index.device, dtype=torch.long) 32 | class_ppr = PPR[class_nodes_tensor] 33 | class_gpr = torch.mean(class_ppr, dim=0).squeeze() 34 | gpr_matrix.append(class_gpr) 35 | 36 | GPR = torch.stack(gpr_matrix, dim=0).transpose(0, 1) 37 | 38 | return PPR.cpu(), GPR.cpu() -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TOPOAUC/solver.py: -------------------------------------------------------------------------------- 1 | from IGL_Bench.backbone.gcn import GCN_node_sparse 2 | from IGL_Bench.algorithm.TOPOAUC.myloss import ELossFN 3 | from IGL_Bench.algorithm.TOPOAUC.cal import compute_ppr_and_gpr 4 | from IGL_Bench.algorithm.TOPOAUC.util import * 5 | import torch 6 | import torch.nn.functional as F 7 | import numpy as np 8 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score 9 | 10 | class TOPOAUC_node_solver: 11 | def __init__(self, config, dataset, device='cuda'): 12 | self.config = config 13 | self.dataset = dataset 14 | self.device = device 15 | 16 | self.model = {} 17 | self.optimizer = {} 18 | self.ppr, self.gpr = compute_ppr_and_gpr(self.dataset, self.config.pagerank_prob) 19 | self.initializtion() 20 | 21 | self.model['default'] = self.model['default'].to(device) 22 | self.my_loss = self.my_loss.to(device) 23 | self.dataset = self.dataset.to(device) 24 | 25 | def initializtion(self): 26 | num_classes = self.dataset.y.numpy().max().item() + 1 27 | self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 28 | n_hidden=self.config.hidden_dim, 29 | n_class=num_classes, 30 | n_layer=self.config.n_layer,dropout=self.config.dropout) 31 | 32 | adj_bool=index2adj_bool(self.dataset.edge_index,self.dataset.num_nodes) 33 | 34 | self.my_loss=ELossFN(num_classes,self.dataset.num_nodes,adj_bool,self.ppr,self.gpr,self.dataset.train_mask, 35 | self.device,weight_sub_dim=self.config.weight_sub_dim,weight_inter_dim=self.config.weight_inter_dim, 36 | weight_global_dim=self.config.weight_global_dim,beta= self.config.beta,gamma=self.config.gamma, 37 | loss_type=self.config.loss) 38 | 39 | self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay) 40 | 41 | def reset_parameters(self): 42 | for model_name, model in self.model.items(): 43 | if hasattr(model, 'reset_parameters'): 44 | model.reset_parameters() 45 | else: 46 | for layer in model.modules(): 47 | if hasattr(layer, 'reset_parameters'): 48 | layer.reset_parameters() 49 | 50 | self.optimizer = {} 51 | for model_name, model in self.model.items(): 52 | self.optimizer[model_name] = torch.optim.Adam( 53 | model.parameters(), 54 | lr=self.config.lr, 55 | weight_decay=self.config.weight_decay 56 | ) 57 | 58 | def train(self): 59 | self.reset_parameters() 60 | 61 | num_epochs = getattr(self.config, 'epoch', 500) 62 | patience = getattr(self.config, 'patience', 10) 63 | least_epoch = getattr(self.config, 'least_epoch', 40) 64 | 65 | criterion = torch.nn.CrossEntropyLoss() 66 | 67 | best_loss = float('inf') 68 | patience_counter = 0 69 | best_val_accuracy = 0 70 | 71 | for epoch in range(1, num_epochs + 1): 72 | self.model['default'].train() 73 | self.optimizer['default'].zero_grad() 74 | 75 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 76 | logits = F.softmax(out, dim=-1) 77 | loss = self.my_loss(logits, self.dataset.y,self.dataset.train_mask) 78 | loss = torch.mean(loss) 79 | loss.backward() 80 | self.optimizer['default'].step() 81 | 82 | print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}") 83 | 84 | val_accuracy = self.eval(metric="accuracy") 85 | 86 | if val_accuracy > best_val_accuracy: 87 | best_val_accuracy = val_accuracy 88 | patience_counter = 0 89 | else: 90 | patience_counter += 1 91 | 92 | if patience_counter >= patience and epoch > least_epoch: 93 | print(f"Early stopping at epoch {epoch+1}.") 94 | break 95 | 96 | print("Training Finished!") 97 | 98 | def eval(self, metric="accuracy"): 99 | self.model['default'].eval() 100 | all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy() 101 | 102 | with torch.no_grad(): 103 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 104 | predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy() 105 | 106 | if metric == "accuracy": 107 | return accuracy_score(all_labels, predictions) 108 | elif metric == "bacc": 109 | return balanced_accuracy_score(all_labels, predictions) 110 | elif metric == "macro_f1": 111 | return f1_score(all_labels, predictions, average='macro') 112 | else: 113 | raise ValueError(f"Unknown metric: {metric}") 114 | 115 | def test(self): 116 | self.model['default'].eval() 117 | all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy() 118 | 119 | with torch.no_grad(): 120 | out = self.model['default'](self.dataset.x, self.dataset.edge_index) 121 | predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy() 122 | probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy() 123 | 124 | accuracy = accuracy_score(all_labels, predictions) 125 | macro_f1 = f1_score(all_labels, predictions, average='macro') 126 | bacc = balanced_accuracy_score(all_labels, predictions) 127 | auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro') 128 | 129 | return accuracy, bacc, macro_f1, auc_roc -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TOPOAUC/util.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def index2adj_bool(edge_index,nnode): 5 | 6 | indx = edge_index.numpy() 7 | adj = np.zeros((nnode,nnode),dtype = 'bool') 8 | adj[(indx[0],indx[1])]=1 9 | new_adj = torch.from_numpy(adj) 10 | 11 | return new_adj -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TopoImb/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TopoImb/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TopoImb/topo_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | 5 | import torch 6 | import torch.nn.functional as F 7 | from torch.nn import Linear 8 | from torch_geometric.nn import WLConv 9 | from torch_geometric.loader import DataLoader 10 | 11 | from sklearn.cluster import SpectralClustering 12 | from IGL_Bench.algorithm.TopoImb.trainer import GClsTrainer 13 | 14 | def clust(features, n_clusters=8): 15 | clustering = SpectralClustering( 16 | n_clusters=n_clusters, 17 | assign_labels='discretize', 18 | random_state=0, 19 | affinity='nearest_neighbors' 20 | ).fit(features) 21 | 22 | return clustering.labels_ 23 | 24 | class WLGraph_model(torch.nn.Module): 25 | def __init__(self, args, nfeat, nhid, nclass, dropout, nlayer=2, res=True): 26 | super().__init__() 27 | self.args = args 28 | self.nhid = nhid 29 | self.res = res 30 | 31 | self.convs = torch.nn.ModuleList() 32 | for layer in range(nlayer): 33 | self.convs.append(WLConv()) 34 | 35 | self.emb = torch.nn.Embedding(5000, 32) 36 | 37 | self.color_size = -1 38 | self.graph_map = {} 39 | 40 | self.lin1 = Linear(32, nhid) 41 | self.lin2 = Linear(nhid, nclass) 42 | 43 | self.dropout = dropout 44 | 45 | def forward(self, x, edge_index, edge_weight=None, batch=None): 46 | if batch is None: # No batch given 47 | print('no batch info given') 48 | batch = x.new(x.size(0)).long().fill_(0) 49 | 50 | x = self.embedding(x, edge_index, edge_weight, batch=batch) 51 | x = self.lin2(F.leaky_relu(self.lin1(x))) 52 | return F.log_softmax(x, dim=1) 53 | 54 | def embedding(self, x, edge_index, edge_weight=None, batch=None): 55 | if x.shape[-1] != 1: 56 | x = x.argmax(-1) 57 | 58 | for gconv in self.convs: 59 | x = gconv(x, edge_index) 60 | 61 | out = [] 62 | for b_i in set(batch.cpu().numpy()): 63 | b_idx = (batch == b_i) 64 | g_i = x[b_idx] 65 | idx = hash(tuple(g_i.cpu().numpy().tolist())) 66 | if idx not in self.graph_map: 67 | self.graph_map[idx] = len(self.graph_map) 68 | out.append(self.graph_map[idx]) 69 | g_x = torch.tensor(out, device=x.device) 70 | 71 | if self.color_size == -1: 72 | self.color_size = len(self.graph_map) 73 | 74 | g_x = self.emb(g_x) 75 | gx = F.dropout(g_x, self.dropout, training=self.training) 76 | return gx 77 | 78 | def wl(self, x, edge_index, batch=None): 79 | if x.shape[-1] != 1: 80 | x = x.argmax(-1) 81 | 82 | for gconv in self.convs: 83 | x = gconv(x, edge_index) 84 | 85 | out = [] 86 | for b_i in set(batch.cpu().numpy()): 87 | b_idx = batch == b_i 88 | g_i = x[b_idx] 89 | idx = hash(tuple(g_i.cpu().numpy().tolist())) 90 | if idx not in self.graph_map: 91 | self.graph_map[idx] = len(self.graph_map) 92 | out.append(self.graph_map[idx]) 93 | g_x = torch.tensor(out, device=x.device) 94 | return g_x 95 | 96 | def graph_wl_dist(self, x, edge_index, batch=None): 97 | if x.shape[-1] != 1: 98 | x = x.argmax(-1) 99 | 100 | for gconv in self.convs: 101 | x = gconv(x, edge_index) 102 | 103 | out = self.convs[-1].histogram(x, batch, norm=True) # (batch_size, num_colors) 104 | out = out.to(x.device) 105 | return out 106 | 107 | 108 | def generate_topo_labels(dataset, config): 109 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 110 | 111 | allloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False) 112 | steps = math.ceil(len(dataset) / config.batch_size) 113 | 114 | current_dir = os.path.dirname(os.path.abspath(__file__)) 115 | topo_path = os.path.join(current_dir, f"../../../TopoImb_topo_file/{dataset.name}_topo_labels.npy") 116 | print(f"Checking path: {os.path.abspath(topo_path)}") 117 | 118 | if os.path.exists(topo_path): 119 | topo_labels_np = np.load(topo_path) 120 | topo_labels = torch.tensor(topo_labels_np, dtype=torch.long, device=device) 121 | return topo_labels 122 | 123 | wlmodel = WLGraph_model( 124 | args=config, 125 | nfeat=dataset.num_features, 126 | nhid=config.hidden_dim, 127 | nclass=dataset.num_classes, 128 | dropout=0, 129 | nlayer=config.n_layer 130 | ).to(device) 131 | 132 | WLtrainer = GClsTrainer(config, wlmodel, dataset=dataset) 133 | 134 | for epoch in range(5): 135 | for batch, data in enumerate(allloader): 136 | log_info = WLtrainer.train_step(data.to(device), epoch) 137 | # print(f"[Epoch {epoch}] Train log: {log_info}") 138 | 139 | wlmodel.eval() 140 | wl_dists = [] 141 | for batch, data in enumerate(allloader): 142 | graph_wl_tensor = wlmodel.graph_wl_dist( 143 | data.x.float().to(device), 144 | data.edge_index.to(device), 145 | batch=data.batch.to(device) 146 | ).detach() 147 | wl_dists.append(graph_wl_tensor.cpu().numpy()) 148 | 149 | wl_dists = np.concatenate(wl_dists, axis=0) 150 | 151 | graph_clust = clust(wl_dists, n_clusters=8) 152 | topo_labels = torch.tensor(graph_clust, device=device) 153 | 154 | # torch.save(topo_labels, topo_path) 155 | topo_dir = os.path.dirname(topo_path) 156 | os.makedirs(topo_dir, exist_ok=True) 157 | 158 | np.save(topo_path, topo_labels.cpu().numpy()) 159 | 160 | return topo_labels 161 | -------------------------------------------------------------------------------- /IGL_Bench/algorithm/TopoImb/utils.py: -------------------------------------------------------------------------------- 1 | from sklearn.metrics import roc_auc_score, f1_score 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | def accuracy(logits, labels): 6 | preds = logits.max(1)[1].type_as(labels) 7 | correct = preds.eq(labels).double() 8 | correct = correct.sum() 9 | return correct / len(labels) 10 | 11 | def grouped_accuracy(logits, labels, group_labels): 12 | preds = logits.argmax(1) 13 | group_ac_dict={} 14 | for group in set(group_labels): 15 | group_idx = group_labels==group 16 | group_ac = (preds[group_idx]==labels[group_idx]).sum()/(group_idx.sum()+0.00000001) 17 | group_ac_dict[group] = group_ac 18 | 19 | return group_ac_dict 20 | 21 | def print_class_acc(logits, labels, pre='valid'): 22 | pre_num = 0 23 | #print class-wise performance 24 | 25 | for i in range(labels.max()+1): 26 | index_pos = labels==i 27 | cur_tpr = accuracy(logits[index_pos], labels[index_pos]) 28 | print(str(pre)+" class {:d} True Positive Rate: {:.3f}".format(i,cur_tpr.item())) 29 | 30 | index_neg = labels != i 31 | labels_neg = labels.new(labels.shape).fill_(i) 32 | 33 | cur_fpr = accuracy(logits[index_neg,:], labels_neg[index_neg]) 34 | print(str(pre)+" class {:d} False Positive Rate: {:.3f}".format(i,cur_fpr.item())) 35 | 36 | 37 | if labels.max() > 1: 38 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr') 39 | else: 40 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro') 41 | 42 | macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro') 43 | print(str(pre)+' current auc-roc score: {:f}, current macro_F score: {:f}'.format(auc_score,macro_F)) 44 | 45 | return 46 | 47 | def Roc_F(logits, labels, pre='valid'): 48 | if labels.max() > 1:#require set(labels) to be the same as columns of logits 49 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr') 50 | else: 51 | auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro') 52 | 53 | macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro') 54 | 55 | return auc_score, macro_F 56 | 57 | class meters: 58 | ''' 59 | collects the results at each inference batch, and return the result in total 60 | param orders: the order in updating values 61 | ''' 62 | def __init__(self, orders=1): 63 | self.avg_value = 0 64 | self.tot_weight = 0 65 | self.orders = orders 66 | 67 | def update(self, value, weight=1.0): 68 | value = float(value) 69 | 70 | if self.orders == 1: 71 | update_step = self.tot_weight/(self.tot_weight+weight) 72 | self.avg_value = self.avg_value*update_step + value*(1-update_step) 73 | self.tot_weight += weight 74 | 75 | 76 | def avg(self): 77 | 78 | return self.avg_value -------------------------------------------------------------------------------- /IGL_Bench/backbone/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/backbone/__init__.py -------------------------------------------------------------------------------- /IGL_Bench/backbone/gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch_geometric.nn import GCNConv, global_add_pool, global_mean_pool, global_max_pool 5 | from torch_geometric.nn import global_add_pool 6 | 7 | class GCN_graph(torch.nn.Module): 8 | def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, pooling='sum'): 9 | super(GCN_graph, self).__init__() 10 | 11 | self.n_layer = n_layer 12 | self.dropout = dropout 13 | 14 | if pooling == 'sum': 15 | self.pool = global_add_pool 16 | elif pooling == 'mean': 17 | self.pool = global_mean_pool 18 | elif pooling == 'max': 19 | self.pool = global_max_pool 20 | else: 21 | raise ValueError(f"Unsupported pooling method: {pooling}. Choose from 'sum', 'mean', 'max'.") 22 | 23 | self.convs = nn.ModuleList() 24 | for i in range(n_layer): 25 | in_dim = n_feat if i == 0 else n_hidden 26 | self.convs.append( 27 | GCNConv(in_dim, n_hidden) 28 | ) 29 | 30 | self.out_layer = nn.Linear(n_hidden, n_class) 31 | 32 | def forward(self, x, edge_index, batch): 33 | for conv in self.convs: 34 | x = conv(x, edge_index) 35 | x = F.relu(x) 36 | x = F.dropout(x, p=self.dropout, training=self.training) 37 | 38 | x = self.pool(x, batch) 39 | x = self.out_layer(x) 40 | return x 41 | 42 | def encode(self, x, edge_index, batch): 43 | for conv in self.convs: 44 | x = conv(x, edge_index) 45 | x = F.relu(x) 46 | x = F.dropout(x, p=self.dropout, training=self.training) 47 | x = self.pool(x, batch) 48 | return x 49 | 50 | def cls(self, encoded_features): 51 | return self.out_layer(encoded_features) 52 | 53 | class GCNLayer(nn.Module): 54 | def __init__(self, n_feat, n_hidden, bias=False, batch_norm=False): 55 | super(GCNLayer, self).__init__() 56 | self.weight = torch.Tensor(n_feat, n_hidden) 57 | self.weight = nn.Parameter(nn.init.xavier_uniform_(self.weight)) 58 | if bias: 59 | self.bias = torch.Tensor(n_hidden) 60 | self.bias = nn.Parameter(nn.init.xavier_uniform_(self.bias)) 61 | else: 62 | self.register_parameter('bias', None) 63 | 64 | self.bn = nn.BatchNorm1d(n_hidden) if batch_norm else None 65 | 66 | 67 | def forward(self, input, adj, batch_norm=True): 68 | support = torch.matmul(input, self.weight) 69 | output = torch.matmul(adj, support) 70 | if self.bias is not None: 71 | output = output + self.bias 72 | if self.bn is not None and batch_norm: 73 | output = self.compute_bn(output) 74 | return output 75 | 76 | 77 | def compute_bn(self, x): 78 | if len(x.shape) == 2: 79 | return self.bn(x) 80 | else: 81 | return self.bn(x.view(-1, x.size(-1))).view(x.size()) 82 | 83 | 84 | class GCN_node_dense(nn.Module): 85 | def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, batch_norm=False): 86 | super(GCN_node_dense, self).__init__() 87 | self.dropout = dropout 88 | 89 | self.graph_encoders = nn.ModuleList() 90 | self.graph_encoders.append(GCNLayer(n_feat, n_hidden, batch_norm=batch_norm)) 91 | 92 | for _ in range(n_layer - 2): 93 | self.graph_encoders.append(GCNLayer(n_hidden, n_hidden, batch_norm=batch_norm)) 94 | 95 | self.graph_encoders.append(GCNLayer(n_hidden, n_class, batch_norm=False)) 96 | 97 | 98 | def forward(self, x, adj): 99 | for i, encoder in enumerate(self.graph_encoders[:-1]): 100 | x = F.relu(encoder(x, adj)) 101 | x = F.dropout(x, self.dropout, training=self.training) 102 | 103 | x = self.graph_encoders[-1](x, adj) 104 | return x 105 | 106 | class GCN_node_sparse(nn.Module): 107 | def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, batch_norm=False): 108 | super(GCN_node_sparse, self).__init__() 109 | self.dropout = dropout 110 | 111 | self.graph_encoders = nn.ModuleList() 112 | self.graph_encoders.append(GCNConv(n_feat, n_hidden)) 113 | 114 | for _ in range(n_layer - 2): 115 | self.graph_encoders.append(GCNConv(n_hidden, n_hidden)) 116 | 117 | self.graph_encoders.append(GCNConv(n_hidden, n_class)) 118 | 119 | self.bn = nn.ModuleList([nn.BatchNorm1d(n_hidden) for _ in range(n_layer - 1)]) if batch_norm else None 120 | 121 | def forward(self, x, edge_index, edge_weight=None): 122 | for i, encoder in enumerate(self.graph_encoders[:-1]): 123 | x = encoder(x, edge_index, edge_weight) 124 | if self.bn is not None: 125 | x = self.bn[i](x) 126 | x = F.relu(x) 127 | x = F.dropout(x, self.dropout, training=self.training) 128 | 129 | x = self.graph_encoders[-1](x, edge_index, edge_weight) 130 | return x -------------------------------------------------------------------------------- /IGL_Bench/backbone/gin.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.nn import Linear, BatchNorm1d, ReLU, Sequential, ModuleList 4 | from torch_geometric.nn import GINConv, global_add_pool, global_mean_pool, global_max_pool 5 | 6 | class GIN_graph(torch.nn.Module): 7 | def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, pooling='sum'): 8 | super(GIN_graph, self).__init__() 9 | 10 | self.n_layer = n_layer 11 | self.dropout = dropout 12 | 13 | if pooling == 'sum': 14 | self.pool = global_add_pool 15 | elif pooling == 'mean': 16 | self.pool = global_mean_pool 17 | elif pooling == 'max': 18 | self.pool = global_max_pool 19 | else: 20 | raise ValueError(f"Unsupported pooling method: {pooling}. Choose from 'sum', 'mean', 'max'.") 21 | 22 | self.convs = ModuleList() 23 | for i in range(n_layer): 24 | in_dim = n_feat if i == 0 else n_hidden 25 | self.convs.append( 26 | GINConv( 27 | Sequential( 28 | Linear(in_dim, n_hidden), 29 | BatchNorm1d(n_hidden), 30 | ReLU(), 31 | Linear(n_hidden, n_hidden), 32 | ReLU() 33 | ) 34 | ) 35 | ) 36 | 37 | self.out_layer = Linear(n_hidden, n_class) 38 | 39 | def forward(self, x, edge_index, batch): 40 | for conv in self.convs: 41 | x = conv(x, edge_index) 42 | x = F.dropout(x, p=self.dropout, training=self.training) 43 | 44 | x = self.pool(x, batch) 45 | x = self.out_layer(x) 46 | return x 47 | 48 | def encode(self, x, edge_index, batch): 49 | for conv in self.convs: 50 | x = conv(x, edge_index) 51 | x = F.dropout(x, p=self.dropout, training=self.training) 52 | x = self.pool(x, batch) 53 | return x 54 | 55 | def cls(self, encoded_features): 56 | return self.out_layer(encoded_features) -------------------------------------------------------------------------------- /IGL_Bench/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .util import load_conf -------------------------------------------------------------------------------- /IGL_Bench/config/util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import yaml 3 | import argparse 4 | 5 | def load_conf(task: str, imbtype: str, algorithm: str, to_parser: bool = True): 6 | config_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../config")) 7 | config_path = os.path.join(config_dir, task, imbtype, algorithm + ".yml") 8 | 9 | print(f"Load config file from: {config_path}") 10 | 11 | if not os.path.exists(config_path): 12 | raise FileNotFoundError(f"Config file not found: {config_path}") 13 | 14 | with open(config_path, 'r') as file: 15 | try: 16 | config = yaml.safe_load(file) 17 | except yaml.YAMLError as e: 18 | raise ValueError(f"Error parsing YAML file: {e}") 19 | 20 | if not to_parser: 21 | return config 22 | 23 | parser = argparse.ArgumentParser(description=f"Configuration for {algorithm}") 24 | 25 | for key, value in config.items(): 26 | if isinstance(value, bool): 27 | if value: 28 | parser.add_argument(f"--{key}", action="store_true", default=True, 29 | help=f"Enable {key} (default: {value})") 30 | parser.add_argument(f"--no-{key}", dest=key, action="store_false", 31 | help=f"Disable {key}") 32 | else: 33 | parser.add_argument(f"--{key}", action="store_true", default=False, 34 | help=f"Enable {key} (default: {value})") 35 | parser.add_argument(f"--no-{key}", dest=key, action="store_false", 36 | help=f"Disable {key} (default: {value})") 37 | else: 38 | parser.add_argument(f"--{key}", type=type(value), default=value, 39 | help=f"{key} (default: {value})") 40 | 41 | return parser.parse_args() -------------------------------------------------------------------------------- /IGL_Bench/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Dataset -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/DD/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/DD/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/DD/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_high.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_high.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_low.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_low.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_mid.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_mid.pt -------------------------------------------------------------------------------- /IGL_Bench/dataset/load_node.py: -------------------------------------------------------------------------------- 1 | import scipy.sparse as sp 2 | import numpy as np 3 | import torch 4 | import random 5 | import os 6 | from torch_geometric.datasets import Planetoid 7 | from torch_geometric.datasets import Amazon,Actor,WikipediaNetwork 8 | from ogb.nodeproppred import PygNodePropPredDataset 9 | import networkx as nx 10 | 11 | def load_node_data(data_name, data_path): 12 | if data_name == 'ogbn-arxiv': 13 | path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..',data_path, 'ogb') 14 | else: 15 | path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..',data_path, 'pyG') 16 | 17 | data_path = os.path.join(path, data_name) 18 | data_dict = {'Cora':'planetoid','CiteSeer':'planetoid','PubMed':'planetoid', 19 | 'Photo':'amazon','Computers':'amazon','Actor':'Actor', 20 | 'Chameleon':'WikipediaNetwork','Squirrel':'WikipediaNetwork','ogbn-arxiv':'ogbn'} 21 | target_type = data_dict[data_name] 22 | if target_type == 'amazon': 23 | target_dataset = Amazon(data_path, name=data_name) 24 | elif target_type == 'planetoid': 25 | target_dataset = Planetoid(data_path, name=data_name) 26 | elif target_type == 'WikipediaNetwork': 27 | target_dataset = WikipediaNetwork(root=data_path, name=data_name, geom_gcn_preprocess=True) 28 | elif target_type == 'Actor': 29 | target_dataset = Actor(data_path) 30 | elif data_name == 'ogbn-arxiv': 31 | target_dataset = PygNodePropPredDataset(root=data_path, name='ogbn-arxiv') 32 | 33 | target_data=target_dataset[0] 34 | features = target_data.x 35 | 36 | if data_name in ['Cora',"CiteSeer"]: 37 | features = normalize_features(features) 38 | features = torch.FloatTensor(np.array(features)) 39 | 40 | if data_name not in ['ogbn-arxiv','PubMed']: 41 | adj = index2dense(target_data.edge_index,target_data.num_nodes) 42 | adj = nx.adjacency_matrix(nx.from_numpy_array(adj)) 43 | adj = adj + sp.eye(adj.shape[0]) 44 | adj_norm = normalize_sparse_adj(adj) 45 | adj_norm = torch.Tensor(adj_norm.todense()) 46 | adj = torch.Tensor(adj.todense()) 47 | target_data.adj = adj 48 | target_data.adj_norm = adj_norm 49 | 50 | target_data.x = features 51 | 52 | if target_data.y.dim() == 2: 53 | if target_data.y.size(1) > 1: 54 | target_data.y = target_data.y.argmax(dim=1) 55 | else: 56 | target_data.y = target_data.y.squeeze(1) 57 | 58 | return target_data 59 | 60 | def normalize_sparse_adj(mx): 61 | rowsum = np.array(mx.sum(1)) 62 | r_inv_sqrt = np.power(rowsum, -0.5).flatten() 63 | r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0. 64 | r_mat_inv_sqrt = sp.diags(r_inv_sqrt) 65 | return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt) 66 | 67 | def normalize_features(mx): 68 | rowsum = np.array(mx.sum(1)) 69 | r_inv = np.power(rowsum, -1).flatten() 70 | r_inv[np.isinf(r_inv)] = 0. 71 | r_mat_inv = sp.diags(r_inv) 72 | mx = r_mat_inv.dot(mx) 73 | return mx 74 | 75 | def index2dense(edge_index, nnode): 76 | idx = edge_index.numpy() 77 | adj = np.zeros((nnode,nnode)) 78 | adj[(idx[0], idx[1])] = 1 79 | sum = np.sum(adj) 80 | 81 | return adj -------------------------------------------------------------------------------- /IGL_Bench/manage/__init__.py: -------------------------------------------------------------------------------- 1 | from .runner import Manager -------------------------------------------------------------------------------- /IGL_Bench/manage/runner.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import numpy as np 3 | import random 4 | import torch 5 | 6 | def set_seed(seed): 7 | random.seed(seed) 8 | np.random.seed(seed) 9 | torch.manual_seed(seed) 10 | if torch.cuda.is_available(): 11 | torch.cuda.manual_seed_all(seed) 12 | 13 | class Manager: 14 | def __init__(self, config, dataset): 15 | self.config = config 16 | self.dataset = dataset 17 | self.solver = self.initialize_solver() 18 | 19 | def initialize_solver(self): 20 | solver_name = f"{self.config.algorithm}_{self.config.task}_solver" 21 | module_path = f"IGL_Bench.algorithm.{self.config.algorithm}.solver" 22 | try: 23 | module = importlib.import_module(module_path) 24 | solver_class = getattr(module, solver_name) 25 | return solver_class(self.config, self.dataset) 26 | except (ModuleNotFoundError, AttributeError) as e: 27 | raise ImportError(f"Failed to import solver {solver_name} from {module_path}: {e}") 28 | 29 | def run(self, num_runs=1, random_seed=1): 30 | all_acc = [] 31 | all_bacc = [] 32 | all_mf1 = [] 33 | all_roc = [] 34 | 35 | for run in range(num_runs): 36 | print(f"Run {run + 1}/{num_runs} for algorithm {self.solver.__class__.__name__}") 37 | set_seed(random_seed+run) 38 | self.solver.train() 39 | acc, bacc, mf1, roc = self.solver.test() 40 | 41 | all_acc.append(acc) 42 | all_bacc.append(bacc) 43 | all_mf1.append(mf1) 44 | all_roc.append(roc) 45 | 46 | avg_acc = np.mean(all_acc) * 100 47 | std_acc = np.std(all_acc) * 100 48 | avg_bacc = np.mean(all_bacc) * 100 49 | std_bacc = np.std(all_bacc) * 100 50 | avg_mf1 = np.mean(all_mf1) * 100 51 | std_mf1 = np.std(all_mf1) * 100 52 | avg_roc = np.mean(all_roc) * 100 53 | std_roc = np.std(all_roc) * 100 54 | 55 | self.print_results(avg_acc, std_acc, avg_bacc, std_bacc, avg_mf1, std_mf1, avg_roc, std_roc) 56 | 57 | def print_results(self, avg_acc, std_acc, avg_bacc, std_bacc, avg_mf1, std_mf1, avg_roc, std_roc): 58 | print(f"\nTest results for {self.config.algorithm} (averaged across runs):") 59 | print("+----------------------+------------------+------------------+") 60 | print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Accuracy", avg_acc, std_acc)) 61 | print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Balanced Accuracy", avg_bacc, std_bacc)) 62 | print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Macro F1", avg_mf1, std_mf1)) 63 | print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("ROC-AUC", avg_roc, std_roc)) 64 | print("+----------------------+------------------+------------------+") 65 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 RingBDStack 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include config * 2 | -------------------------------------------------------------------------------- /config/graph/class/DataDec.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'DataDec' 2 | task: 'graph' 3 | backbone: 'GIN' 4 | n_layer: 2 5 | lr: 0.005 6 | hidden_dim: 256 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | batch_size: 128 11 | 12 | prune_percent: 0.25 13 | random_prune_percent: 0.25 14 | biggest_prune_percent: 0.75 15 | random_prune_percent: 0.25 16 | explore_rate: 0.1 17 | 18 | is_error_rank: False 19 | fine_tune_ratio: 0.3 -------------------------------------------------------------------------------- /config/graph/class/G2GNN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'G2GNN' 2 | task: 'graph' 3 | backbone: 'GCN' 4 | setting: 'knn_aug' 5 | aug_num: 2 6 | kernel_type: SP 7 | knn_layer: 3 8 | knn_nei_num: 3 9 | n_layer: 2 10 | lr: 0.005 11 | hidden_dim: 128 12 | dropout: 0.5 13 | weight_decay: 0.01 14 | epoch: 500 15 | use_batch_norm: true 16 | mask_node_ratio: 0.0 17 | drop_edge_ratio: 0.005 18 | temp: 0.5 -------------------------------------------------------------------------------- /config/graph/class/GCN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GCN' 2 | task: 'graph' 3 | n_layer: 2 4 | lr: 0.005 5 | hidden_dim: 128 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 500 9 | use_batch_norm: true -------------------------------------------------------------------------------- /config/graph/class/GIN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GIN' 2 | task: 'graph' 3 | n_layer: 3 4 | lr: 0.005 5 | hidden_dim: 256 6 | dropout: 0.5 7 | weight_decay: 0.000 8 | epoch: 500 9 | use_batch_norm: true -------------------------------------------------------------------------------- /config/graph/class/ImGKB.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'ImGKB' 2 | task: 'graph' 3 | n_layer: 3 4 | backbone: 'GIN' 5 | batch_size: 128 6 | lr: 0.005 7 | hidden_dim: 96 8 | dropout: 0.5 9 | weight_decay: 0.0005 10 | epoch: 500 11 | 12 | hidden_graphs: 6 13 | size_hidden_graphs: 4 14 | max_step: 1 15 | beta: 0.3 -------------------------------------------------------------------------------- /config/graph/class/TopoImb.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'TopoImb' 2 | task: 'graph' 3 | backbone: 'GIN' 4 | n_layer: 3 5 | lr: 0.002 6 | hidden_dim: 128 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | batch_size: 128 11 | use_key: False 12 | att: 'dp' 13 | reweight_weight: 0.2 14 | reweight_lr: 0.005 15 | adv_step: 1 16 | shared_encoder: False 17 | re_task: 'wlcls' 18 | n_mem: 19 | - 8 20 | - 8 21 | - 8 22 | - 8 23 | - 8 -------------------------------------------------------------------------------- /config/graph/topology/GIN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GIN' 2 | task: 'graph' 3 | n_layer: 3 4 | lr: 0.01 5 | hidden_dim: 32 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 500 9 | use_batch_norm: true 10 | batch_size: 32 -------------------------------------------------------------------------------- /config/graph/topology/SOLTGNN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'SOLTGNN' 2 | task: 'graph' 3 | backbone: 'GIN' 4 | n_layer: 2 5 | lr: 0.01 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | use_batch_norm: true 9 | device: 0 10 | batch_size: 32 11 | epochs: 500 12 | hidden_dim: 32 13 | graph_pooling_type: "sum" 14 | degree_as_tag: false 15 | alpha: 0.5 16 | mu1: 1.0 17 | mu2: 1.0 18 | lbd: 0.0001 19 | dm: 64 20 | K: 72 21 | n_n: 1 22 | n_g: 1 23 | patience: 100 24 | -------------------------------------------------------------------------------- /config/graph/topology/TopoImb.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'TopoImb' 2 | task: 'graph' 3 | backbone: 'GIN' 4 | n_layer: 3 5 | lr: 0.005 6 | hidden_dim: 128 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | use_batch_norm: true 11 | datatype: 'graph' 12 | topo_initial: 'label' 13 | shared_encoder: false 14 | cls_layer: 2 15 | res: false 16 | reweighter: 'struct' 17 | n_mem: [8,8,8,8,8] 18 | use_key: false 19 | att: 'dp' 20 | reweight_weight: 0.2 21 | reweight_task: ['wlcls'] 22 | pretrain_reweighter: false 23 | 24 | # original config 25 | imb_ratio: 0.9 26 | nlayer: 3 27 | nhid: 128 28 | nclass: 2 29 | nfeat: 64 30 | epochs: 500 31 | test_interval: 20 32 | batch_size: 128 33 | batch_nums: 6000 34 | sup_ratio: 0.1 35 | val_ratio: 0.3 36 | test_raion: 0.6 37 | model: 'gin' 38 | explainer: 'gnnexplainer' 39 | directional: false 40 | edge_size: 0.05 41 | edge_ent: 1.0 42 | expl_loss: 'Tgt' 43 | aligner: 'emb' 44 | aligner_combine_weight: 1.0 45 | align_emb: false 46 | align_with_grad: false 47 | split: 0 48 | reweight_lr: 0.01 49 | adv_step: 3 50 | EM: false 51 | intra_im_ratio: 0.1 52 | inter_im_ratio: 0.6 53 | 54 | -------------------------------------------------------------------------------- /config/node/class/DPGNN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'DPGNN' 2 | task: 'node' 3 | backbone: 'GCN' 4 | n_hidden: 256 5 | epochs: 3000 6 | 7 | episodic_samp: 1 8 | eta: 3 9 | ssl: 'yes' 10 | label_prop: 'yes' 11 | lamb1: 10 12 | lamb2: 20 -------------------------------------------------------------------------------- /config/node/class/DRGCN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'DRGCN' 2 | task: 'node' 3 | n_layer: 2 4 | hidden_dim: 164 5 | dropout_prob: 0 6 | weight_decay: 0.0005 7 | epoch: 500 8 | least_epoch: 40 9 | 10 | noise_dim: 20 11 | learning_rate: 0.005 -------------------------------------------------------------------------------- /config/node/class/ImGAGN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'ImGAGN' 2 | task: 'node' 3 | 4 | ratio_generated: 1.0 5 | hidden: 128 6 | dropout: 0.5 7 | epochs_gen: 10 8 | lr: 0.01 9 | weight_decay: 0.0005 10 | epochs: 100 -------------------------------------------------------------------------------- /config/node/topo_global/GCN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GCN' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.01 5 | hidden_dim: 128 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 500 9 | least_epoch: 40 -------------------------------------------------------------------------------- /config/node/topo_global/HyperIMBA.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'HyperIMBA' 2 | backbone: 'GCN' 3 | task: 'node' 4 | n_layer: 2 5 | lr: 0.0075 6 | hidden_dim: 64 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | least_epoch: 100 11 | 12 | loss_hp: 1 -------------------------------------------------------------------------------- /config/node/topo_global/PASTEL.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'PASTEL' 2 | task: 'node' 3 | backbone: 'GCN' 4 | 5 | hidden_size: 64 6 | 7 | # Regularization 8 | lr: 0.0075 9 | dropout: 0.5 10 | feat_adj_dropout: 0.5 11 | gl_dropout: 0.5 12 | 13 | # GNN 14 | gnn: 'gcn' 15 | graph_learn: True 16 | graph_skip_conn: 0.8 17 | update_adj_ratio: 0.1 18 | graph_learn_regularization: True 19 | smoothness_ratio: 0.2 20 | degree_ratio: 0 21 | sparsity_ratio: 0 22 | graph_learn_ratio: 0 23 | graph_learn_hidden_size: 70 24 | graph_learn_epsilon: 0 25 | graph_learn_topk: null 26 | graph_learn_num_pers: 4 27 | graph_hops: 2 28 | 29 | # Training 30 | optimizer: 'adam' 31 | weight_decay: 0.0005 32 | lr_patience: 2 33 | lr_reduce_factor: 0.5 34 | grad_clipping: null 35 | grad_accumulated_steps: 1 36 | early_stop_metric: 'acc' 37 | pretrain_epoch: 0 # 0 38 | max_iter: 10 39 | eps_adj: 4e-5 40 | shuffle: True 41 | epoch: 200 42 | least_epoch: 100 43 | patience: 1000 44 | verbose: 20 45 | print_every_epochs: 50 46 | pe_every_epochs: 50 47 | gpr_every_epochs: 50 48 | num_anchors: 0 49 | 50 | save_params: True -------------------------------------------------------------------------------- /config/node/topo_global/ReNode.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GCN' 2 | task: 'node' 3 | algorithm: 'ReNode' 4 | n_layer: 2 5 | lr: 0.01 6 | hidden_dim: 64 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | least_epoch: 40 11 | 12 | pagerank_prob: 0.85 13 | rn_base_weight: 0.5 14 | rn_scale_weight: 1.0 -------------------------------------------------------------------------------- /config/node/topo_global/TAM.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'TAM' 2 | task: 'node' 3 | backebone: 'GCN' 4 | n_layer: 2 5 | lr: 0.01 6 | hidden_dim: 128 7 | dropout: 0.5 8 | weight_decay: 0.0005 9 | epoch: 500 10 | least_epoch: 40 11 | 12 | warmup: 5 13 | tam: True 14 | tam_alpha: 2.5 15 | tam_beta: 0.5 16 | temp_phi: 1.2 -------------------------------------------------------------------------------- /config/node/topo_global/TOPOAUC.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'TOPOAUC' 2 | task: 'node' 3 | backbone: 'GCN' 4 | n_layer: 2 5 | lr: 0.005 6 | hidden_dim: 128 7 | dropout: 0.5 8 | weight_decay: 0 9 | epoch: 500 10 | least_epoch: 40 11 | 12 | loss: 'ExpGAUC' 13 | weight_global_dim: 64 14 | weight_inter_dim: 64 15 | weight_sub_dim: 64 16 | topo_dim: 64 17 | pagerank_prob: 0.15 18 | beta: 0.5 19 | gamma: 0.5 -------------------------------------------------------------------------------- /config/node/topo_local/COLDBREW.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'COLDBREW' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.01 5 | hidden_dim: 128 6 | dropout: 0.2 7 | weight_decay: 0.0005 8 | epoch: 500 9 | least_epoch: 40 10 | patience: 100 11 | exp_mode: 'coldbrew' 12 | batch_size: 65536 13 | epochs: 1500 14 | samp_size_p: 200 15 | samp_size_n_train: 200 16 | samp_size_n_test_times_p: 20 17 | dim_learnable_input: 0 18 | unify_mlps: 0 19 | force_set_to_best_config: 1 20 | want_headtail: 1 21 | num_layers: 2 22 | studentMLP__skip_conn_T_and_res_blks: '' 23 | StudentMLP__dim_model: -1 24 | studentMLP__opt_lr: '' 25 | LP__which_corr_and_DAD: '' 26 | LP__num_propagations: -1 27 | LP__alpha: -1 28 | SEMLP_topK_2_replace: 2 29 | SEMLP__include_part1out: 1 30 | dropout_MLP: 0.2 31 | SEMLP_part1_arch: '2layer' 32 | has_proj2class: 0 33 | whetherHasSE: '100' 34 | se_reg: 32 35 | graphMLP_reg: 0.0 36 | graphMLP_tau: 2.0 37 | graphMLP_r: 3 38 | change_to_featureless: 0 39 | do_deg_analyze: 1 40 | train_which: 'TeacherGNN' 41 | use_special_split: 1 42 | optfun: 'torch.optim.Adam' 43 | manual_assign_GPU: -9999 44 | random_seed: 100 45 | N_exp: 1 46 | resume: False 47 | cuda: True 48 | cuda_num: 0 49 | records_desc: 'res_connection' 50 | records_path: '.' 51 | compare_model: 0 52 | type_model: 'GCN' 53 | type_trick: 'Initial+BatchNorm' 54 | layer_agg: 'concat' 55 | res_alpha: 0.1 56 | multi_label: False 57 | dim_hidden: 64 58 | transductive: True 59 | float_or_double: 'float' 60 | type_norm: 'None' 61 | adj_dropout: 0.5 62 | edge_dropout: 0.2 63 | node_norm_type: 'n' 64 | skip_weight: None 65 | num_groups: None 66 | prog: '' 67 | rexName: 'res.npy' 68 | graph_dropout: 0.2 69 | layerwise_dropout: False -------------------------------------------------------------------------------- /config/node/topo_local/DEMONet.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'DEMONet' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.005 5 | hidden_dim: 64 6 | dropout: 0.1 7 | weight_decay: 0.0005 8 | epoch: 1000 9 | least_epoch: 40 10 | patience: 100 11 | hash_dim: 256 12 | n_hash_kernel: 1 13 | n_layers: 2 -------------------------------------------------------------------------------- /config/node/topo_local/GCN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GCN' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.01 5 | hidden_dim: 128 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 500 9 | least_epoch: 40 10 | patience: 100 -------------------------------------------------------------------------------- /config/node/topo_local/GRAPHPATCHER.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GRAPHPATCHER' 2 | task: 'node' 3 | pretrain: 4 | hid_dim: [64] 5 | lr: 1e-3 6 | weight_decay: 5e-4 7 | norm: 'identity' 8 | mp_norm: 'right' 9 | generator: 10 | target_gnn: '' 11 | backbone: 'gcn' 12 | hid_dim: [ 1024 ] 13 | warmup_steps: 100 14 | lr: 1e-4 15 | device: 0 16 | weight_decay: 5e-4 17 | degree_train: 1 18 | drop_ratio: [ ] 19 | three_layer: False 20 | k: 3 21 | generation_iteration: -1 22 | total_generation_iteration: 5 23 | norm: 'identity' 24 | training_iteration: 10000 25 | dropout: 0.0 26 | batch_size: 128 27 | accumulate_step: 1 28 | eval_iteration: 100 29 | patience: 30 30 | bar: False 31 | workers: 10 32 | mp_norm: 'right' 33 | seed: 123 34 | -------------------------------------------------------------------------------- /config/node/topo_local/LTE4G.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'GCN' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.01 5 | hidden_dim: 128 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 500 9 | least_epoch: 40 10 | patience: 100 11 | im_class_num: 3 12 | im_ratio: 1 13 | layer: 'gcn' 14 | rw: 0.000001 15 | ep_pre: 50 16 | ep: 10000 17 | ep_early: 1000 18 | add_sl: True 19 | adj_norm_1: True 20 | adj_norm_2: False 21 | nhid: 64 22 | nhead: 1 23 | wd: 5e-4 24 | num_seed: 5 25 | is_normalize: False 26 | cls_og: 'GNN' 27 | type: 'mid' 28 | embedder: 'lte4g' 29 | rec: False 30 | lr_expert: 0.01 31 | criterion: 'mean' 32 | sep_class: 'pareto_73' 33 | sep_degree: 5 34 | class_weight: True 35 | gamma: 1 36 | alpha: 0.6 37 | T: 1 38 | expert_ep: 1000 39 | curriculum_ep: 500 40 | pretrained_encoder: False 41 | save_encoder: False -------------------------------------------------------------------------------- /config/node/topo_local/RAWLSGCN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'RAWLSGCN' 2 | task: 'node' 3 | n_layer: 2 4 | lr: 0.05 5 | hidden_dim: 64 6 | dropout: 0.5 7 | weight_decay: 0.0005 8 | epoch: 100 9 | least_epoch: 40 10 | patience: 100 11 | model: 'rawlsgcn_graph' 12 | loss: 'negative_log_likelihood' -------------------------------------------------------------------------------- /config/node/topo_local/TAILGNN.yml: -------------------------------------------------------------------------------- 1 | algorithm: 'TAILGNN' 2 | task: 'node' 3 | lr: 0.01 4 | hidden_dim: 128 5 | dropout: 0.5 6 | weight_decay: 0.0005 7 | epoch: 1000 8 | least_epoch: 40 9 | patience: 200 10 | hidden: 32 11 | eta: 0.1 12 | mu: 0.001 13 | lamda: 0.0001 14 | k: 5 15 | arch: 1 16 | id: 0 17 | ablation: 0 18 | g_sigma: 0.1 -------------------------------------------------------------------------------- /dataset/link.txt: -------------------------------------------------------------------------------- 1 | Datasets are available on Google Drive due to space limits on GitHub: https://drive.google.com/drive/folders/1GFfu6oXEaaB8-DkgBEsIXMid_i3br7HI?usp=drive_link -------------------------------------------------------------------------------- /demo.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "6c3f684e", 6 | "metadata": {}, 7 | "source": [ 8 | "# 🧪 IGL-Bench: Quick Start for Node and Graph Classification" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "df4c4617", 14 | "metadata": {}, 15 | "source": [ 16 | "\n", 17 | "This notebook demonstrates how to use the **IGL-Bench** framework for running node-level and graph-level classification tasks under various imbalance settings. \n", 18 | "It shows how to:\n", 19 | "- Initialize datasets with imbalance configurations\n", 20 | "- Load benchmark configurations\n", 21 | "- Run a selected algorithm on the data using the unified interface.\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "id": "6ef10ee1", 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "# Step 1: Import Benchmark Framework\n", 32 | "import IGL_Bench as igl" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "id": "4fd58a73", 38 | "metadata": {}, 39 | "source": [ 40 | "## 🔹 Node-Level Classification Task" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "id": "40b9512f", 47 | "metadata": {}, 48 | "outputs": [], 49 | "source": [ 50 | "# Step 2: Initialize Node-Level Dataset\n", 51 | "node_dataset_builder = igl.dataset.Dataset(\n", 52 | " task=\"node\",\n", 53 | " data_name=\"Cora\", # Choose from: ['Cora', 'CiteSeer', 'PubMed', 'Photo', 'Computers', 'ogbn-arxiv', 'Chameleon', 'Squirrel', 'Actor']\n", 54 | " imb_type=\"topo_global\", # Choose from: ['class', 'topo_local', 'topo_global']\n", 55 | " imb_level=\"high\" # Choose from: ['low', 'mid', 'high']\n", 56 | ")" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "id": "dc9c85ba", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "# Step 3: Load Node-Level Dataset\n", 67 | "node_dataset = node_dataset_builder.load_dataset()" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "execution_count": null, 73 | "id": "74a4cc02", 74 | "metadata": {}, 75 | "outputs": [], 76 | "source": [ 77 | "# Step 4: Load Configuration for Node-Level Algorithm\n", 78 | "node_config = igl.config.load_conf(\n", 79 | " task=\"node\",\n", 80 | " imbtype=\"topo_global\",\n", 81 | " algorithm=\"PASTEL\" # Replace with any implemented algorithm\n", 82 | ")" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": null, 88 | "id": "f4e11f62", 89 | "metadata": {}, 90 | "outputs": [], 91 | "source": [ 92 | "# Step 5: Run Node-Level Solver\n", 93 | "node_solver = igl.manage.Manager(node_config, node_dataset)\n", 94 | "node_solver.run(num_runs=5)" 95 | ] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "id": "97ff816c", 100 | "metadata": {}, 101 | "source": [ 102 | "## 🔸 Graph-Level Classification Task" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "id": "efb42c56", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# Step 6: Initialize Graph-Level Dataset\n", 113 | "graph_dataset_builder = igl.dataset.Dataset(\n", 114 | " task=\"graph\",\n", 115 | " data_name=\"D&D\", # Choose from: ['PTC-MR', 'FRANKENSTEIN', 'PROTEINS', 'IMDB-B', 'REDDIT-B', 'ogbg-molhiv', 'COLLAB', 'D&D']\n", 116 | " imb_type=\"class\", # Choose from: ['class', 'topology']\n", 117 | " imb_level=\"low\" # Choose from: ['low', 'mid', 'high']\n", 118 | ")" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "eb15f120", 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# Step 7: Load Graph-Level Dataset\n", 129 | "graph_dataset = graph_dataset_builder.load_dataset()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "id": "d91ed1e5", 136 | "metadata": {}, 137 | "outputs": [], 138 | "source": [ 139 | "# Step 8: Load Configuration for Graph-Level Algorithm\n", 140 | "graph_config = igl.config.load_conf(\n", 141 | " task=\"graph\",\n", 142 | " imbtype=\"class\",\n", 143 | " algorithm=\"G2GNN\" # Replace with any implemented algorithm\n", 144 | ")" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "d88600d0", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# Step 9: Run Graph-Level Solver\n", 155 | "graph_solver = igl.manage.Manager(graph_config, graph_dataset)\n", 156 | "graph_solver.run(num_runs=10)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "id": "1cdffa79", 162 | "metadata": {}, 163 | "source": [ 164 | "\n", 165 | "### ✅ Summary\n", 166 | "\n", 167 | "This notebook walked through the unified API provided by IGL-Bench for running experiments on imbalanced graph datasets. \n", 168 | "You can now:\n", 169 | "- Switch datasets, imbalance settings, and algorithms easily\n", 170 | "- Modify the number of runs or inspect detailed solver outputs\n", 171 | "- Customize training pipelines by editing the algorithm-specific configuration files (`config/`) for hyperparameter tuning\n", 172 | "- Extend this workflow with visualization, logging, or evaluation as needed\n" 173 | ] 174 | } 175 | ], 176 | "metadata": { 177 | "language_info": { 178 | "name": "python" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 5 183 | } 184 | -------------------------------------------------------------------------------- /figs/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/logo.png -------------------------------------------------------------------------------- /figs/package.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/package.png -------------------------------------------------------------------------------- /figs/scope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/scope.png -------------------------------------------------------------------------------- /figs/timeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/timeline.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Core scientific computing stack 2 | numpy==1.24.4 3 | pandas==2.0.3 4 | scikit_learn==1.3.2 5 | matplotlib==3.7.5 6 | h5py==2.10.0 7 | 8 | # PyTorch & GNN frameworks (no version constraints) 9 | torch 10 | dgl 11 | scipy 12 | torch_scatter 13 | torch_sparse 14 | torch_geometric 15 | 16 | # Benchmark-related libraries 17 | ogb==1.3.6 18 | tqdm==4.66.4 19 | PyYAML==6.0.1 20 | GPUtil==1.4.0 21 | networkx==3.1 22 | 23 | # Graph contrastive learning & curvature 24 | PyGCL==0.1.2 25 | GCL==0.6.11 26 | GraphRicciCurvature==0.5.3.2 27 | GraKeL==0.1.10 28 | 29 | # Debugging & dev tools 30 | ipdb==0.13.13 31 | julia==0.6.2 32 | 33 | # TensorFlow branch (used by DRGCN ) 34 | tensorflow 35 | tensorflow_probability 36 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from pathlib import Path 3 | 4 | readme_path = Path(__file__).parent / "README.md" 5 | long_description = readme_path.read_text(encoding="utf-8") 6 | 7 | setup( 8 | name="IGL_Bench", 9 | version="0.1.0", 10 | description="Imbalanced Graph Learning Benchmark", 11 | url="https://github.com/RingBDStack/IGL-Bench", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | packages=find_packages(), 15 | include_package_data=True, 16 | install_requires=[ 17 | "torch>=1.13.1", 18 | "torch-geometric>=2.1.0", 19 | "scipy", 20 | "numpy", 21 | "dgl", 22 | "tqdm", 23 | "scikit_learn", 24 | "ogb", 25 | "networkx" 26 | ], 27 | classifiers=[ 28 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 29 | "License :: OSI Approved :: MIT License", 30 | "Programming Language :: Python :: 3.8", 31 | "Programming Language :: Python :: 3.9", 32 | "Programming Language :: Python :: 3.10", 33 | "Operating System :: OS Independent", 34 | ], 35 | python_requires='>=3.8', 36 | keywords=[ 37 | "graph learning", 38 | "GNN", 39 | "imbalanced learning", 40 | "graph neural networks", 41 | "benchmark" 42 | ] 43 | ) 44 | --------------------------------------------------------------------------------