├── Build_Your_Own_IGL.ipynb
├── Dockerfile
├── IGL_Bench
    ├── __init__.py
    ├── algorithm
    │   ├── COLDBREW
    │   │   ├── GCN.py
    │   │   ├── GNN_normalizations.py
    │   │   ├── Label_propagation_model
    │   │   │   ├── LP_Adj.py
    │   │   │   ├── diffusion_feature.py
    │   │   │   ├── norm_spec.jl
    │   │   │   └── outcome_correlation.py
    │   │   ├── __init__.py
    │   │   ├── norm_tricks.py
    │   │   ├── solver.py
    │   │   └── utils.py
    │   ├── DEMONet
    │   │   ├── __init__.py
    │   │   ├── models.py
    │   │   ├── solver.py
    │   │   └── util.py
    │   ├── DPGNN
    │   │   ├── __init__.py
    │   │   ├── learn.py
    │   │   ├── model.py
    │   │   ├── solver.py
    │   │   └── utils.py
    │   ├── DRGCN
    │   │   ├── __init__.py
    │   │   ├── load_data.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── adversarialNets.py
    │   │   │   ├── gmm.py
    │   │   │   └── graph.py
    │   │   ├── solver.py
    │   │   └── sparse.py
    │   ├── DataDec
    │   │   ├── __init__.py
    │   │   ├── contrast.py
    │   │   ├── dataloader.py
    │   │   ├── model.py
    │   │   ├── prune.py
    │   │   └── solver.py
    │   ├── G2GNN
    │   │   ├── __init__.py
    │   │   ├── aug.py
    │   │   ├── dataloader.py
    │   │   ├── kernel.py
    │   │   └── solver.py
    │   ├── GCN
    │   │   ├── __init__.py
    │   │   └── solver.py
    │   ├── GIN
    │   │   ├── __init__.py
    │   │   └── solver.py
    │   ├── GRAPHPATCHER
    │   │   ├── GCN.py
    │   │   ├── __init__.py
    │   │   ├── data_load.py
    │   │   ├── solver.py
    │   │   └── utils.py
    │   ├── HyperIMBA
    │   │   ├── GcnHyper.py
    │   │   ├── Poincare.py
    │   │   ├── __init__.py
    │   │   ├── cal.py
    │   │   └── solver.py
    │   ├── ImGAGN
    │   │   ├── __init__.py
    │   │   ├── layers.py
    │   │   ├── models.py
    │   │   ├── solver.py
    │   │   └── utils.py
    │   ├── ImGKB
    │   │   ├── __init__.py
    │   │   ├── dataloader.py
    │   │   ├── inforneck.py
    │   │   ├── kernel.py
    │   │   ├── layers.py
    │   │   ├── loss.py
    │   │   ├── model.py
    │   │   ├── solver.py
    │   │   └── util.py
    │   ├── PASTEL
    │   │   ├── __init__.py
    │   │   ├── cal.py
    │   │   ├── eval.py
    │   │   ├── graph_clf.py
    │   │   ├── graph_learner.py
    │   │   ├── model.py
    │   │   └── solver.py
    │   ├── RAWLSGCN
    │   │   ├── RawlsGCN.py
    │   │   ├── __init__.py
    │   │   ├── solver.py
    │   │   └── utils.py
    │   ├── ReNode
    │   │   ├── __init__.py
    │   │   ├── reweight.py
    │   │   ├── solver.py
    │   │   └── util.py
    │   ├── SOLTGNN
    │   │   ├── PatternMemory.py
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── sampling.zip
    │   │   ├── solver.py
    │   │   ├── subgraph_sample.py
    │   │   └── utils.py
    │   ├── TAILGNN
    │   │   ├── TailGNN.py
    │   │   ├── __init__.py
    │   │   ├── layers.py
    │   │   ├── solver.py
    │   │   └── util.py
    │   ├── TAM
    │   │   ├── __init__.py
    │   │   ├── solver.py
    │   │   └── tam.py
    │   ├── TOPOAUC
    │   │   ├── __init__.py
    │   │   ├── cal.py
    │   │   ├── myloss.py
    │   │   ├── solver.py
    │   │   └── util.py
    │   └── TopoImb
    │   │   ├── __init__.py
    │   │   ├── layers.py
    │   │   ├── model.py
    │   │   ├── solver.py
    │   │   ├── topo_util.py
    │   │   ├── trainer.py
    │   │   └── utils.py
    ├── backbone
    │   ├── __init__.py
    │   ├── gcn.py
    │   └── gin.py
    ├── config
    │   ├── __init__.py
    │   └── util.py
    ├── dataset
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── graph_topology_imbalance
    │   │   ├── COLLAB
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   ├── DD
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   ├── FRANKENSTEIN
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   ├── IMDB-BINARY
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   ├── PROTEINS
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   ├── PTC_MR
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   │   └── REDDIT-BINARY
    │   │   │   ├── split_high.pt
    │   │   │   ├── split_low.pt
    │   │   │   └── split_mid.pt
    │   ├── load_graph.py
    │   ├── load_node.py
    │   └── split.py
    └── manage
    │   ├── __init__.py
    │   └── runner.py
├── LICENSE
├── MANIFEST.in
├── README.md
├── config
    ├── graph
    │   ├── class
    │   │   ├── DataDec.yml
    │   │   ├── G2GNN.yml
    │   │   ├── GCN.yml
    │   │   ├── GIN.yml
    │   │   ├── ImGKB.yml
    │   │   └── TopoImb.yml
    │   └── topology
    │   │   ├── GIN.yml
    │   │   ├── SOLTGNN.yml
    │   │   └── TopoImb.yml
    └── node
    │   ├── class
    │       ├── DPGNN.yml
    │       ├── DRGCN.yml
    │       └── ImGAGN.yml
    │   ├── topo_global
    │       ├── GCN.yml
    │       ├── HyperIMBA.yml
    │       ├── PASTEL.yml
    │       ├── ReNode.yml
    │       ├── TAM.yml
    │       └── TOPOAUC.yml
    │   └── topo_local
    │       ├── COLDBREW.yml
    │       ├── DEMONet.yml
    │       ├── GCN.yml
    │       ├── GRAPHPATCHER.yml
    │       ├── LTE4G.yml
    │       ├── RAWLSGCN.yml
    │       └── TAILGNN.yml
├── dataset
    └── link.txt
├── demo.ipynb
├── figs
    ├── logo.png
    ├── package.png
    ├── scope.png
    └── timeline.png
├── requirements.txt
└── setup.py


/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM pytorch/pytorch:2.4.0-cuda12.4-cudnn9-runtime
 2 | 
 3 | USER root
 4 | 
 5 | RUN apt-get update && apt-get install -y --no-install-recommends \
 6 |     sudo \
 7 |     git \
 8 |     wget \
 9 |     vim \
10 |     ffmpeg \
11 |     libgl1-mesa-glx \
12 |     libglib2.0-0 \
13 |     libssl-dev \
14 |     cmake \
15 |     g++ \
16 |     python3-dev \
17 |     libgomp1 && \
18 |     rm -rf /var/lib/apt/lists/*
19 | 
20 | RUN conda install -c conda-forge networkit -y && conda clean -afy
21 | 
22 | RUN pip install torchdata==0.7.1
23 | 
24 | RUN pip install --no-cache-dir pyg-lib torch-scatter torch-sparse torch-cluster torch-spline-conv \
25 |     -f https://data.pyg.org/whl/torch-2.4.0+cu124.html && \
26 |     pip install --no-cache-dir torch-geometric
27 | 
28 | RUN pip install --no-cache-dir pydantic
29 | RUN pip install --no-cache-dir dgl==1.1.2 -f https://data.dgl.ai/wheels/cu124.html
30 | 
31 | RUN pip install --no-cache-dir \
32 |     huggingface-hub \
33 |     scipy \
34 |     GPUtil \
35 |     networkx \
36 |     ogb \
37 |     Tree \
38 |     GCL \
39 |     PyGCL \
40 |     PyYAML \
41 |     scikit-learn \
42 |     GraKeL \
43 |     GraphRicciCurvature \
44 |     ipdb \
45 |     dill \
46 |     julia
47 | 
48 | ENV DGLBACKEND=pytorch
49 | 
50 | RUN echo 'echo "🐳 Welcome to IGL-Bench Dev Container!"' >> ~/.bashrc && \
51 |     echo 'alias ll="ls -alh"' >> ~/.bashrc
52 | 
53 | RUN python3 -c "import torch; print('✔️ PyTorch:', torch.__version__)" && \
54 |     python3 -c "import dgl; print('✔️ DGL:', dgl.__version__)" && \
55 |     python3 -c "import torch_geometric; print('✔️ PyG:', torch_geometric.__version__)" && \
56 |     python3 -c "import torchdata; print('✔️ torchdata:', torchdata.__version__)"
57 | 
58 | CMD ["/bin/bash"]
59 | 


--------------------------------------------------------------------------------
/IGL_Bench/__init__.py:
--------------------------------------------------------------------------------
1 | from . import dataset as dataset
2 | from . import config as config
3 | from . import manage as manage


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/COLDBREW/GNN_normalizations.py:
--------------------------------------------------------------------------------
 1 | # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 2 | # SPDX-License-Identifier: Apache-2.0
 3 | 
 4 | from .norm_tricks import *
 5 | from .GCN import TricksComb
 6 | from torch import nn
 7 | from utils import D
 8 | 
 9 | class TeacherGNN(nn.Module):
10 |     # This class is the teacher GCN model (with structural embedding) for cold brew
11 |     def __init__(self, args, proj2class=None):
12 |         super().__init__()
13 |         proj2class = proj2class or nn.Identity()
14 |         args.num_classes_bkup = args.num_classes
15 |         args.num_classes = args.dim_commonEmb
16 |         self.args = args
17 | 
18 |         if self.args.dim_learnable_input>0:
19 |             embs = torch.randn(args.N_nodes, args.dim_learnable_input)*0.001
20 |             self.embs = nn.Parameter(embs, requires_grad=True)
21 |             self.args.num_feats_bkup = self.args.num_feats
22 |             self.args.num_feats = self.args.dim_learnable_input
23 | 
24 |         self.model = GNN_norm(args)
25 | 
26 |         self.proj2linkp = nn.Identity()
27 |         self.proj2class = proj2class
28 |         self.dglgraph = None
29 | 
30 |     def forward(self, x, edge_index):        
31 |         if self.args.TeacherGNN.change_to_featureless:
32 |             x = x*0
33 |         if self.args.dim_learnable_input>0:
34 |             x = self.embs
35 |         commonEmb, self.se_reg_all = self.model(x, edge_index)
36 |         self.out = commonEmb
37 |         return commonEmb
38 | 
39 |     def get_3_embs(self, x, edge_index, mask=None, want_heads=True):
40 |         commonEmb = self.forward(x, edge_index)
41 |         emb4classi_full = self.proj2class(commonEmb)
42 |         if want_heads:
43 |             if mask is not None:
44 |                 emb4classi = emb4classi_full[mask]
45 |             else:
46 |                 emb4classi = emb4classi_full
47 | 
48 |             emb4linkp = self.proj2linkp(commonEmb)
49 |         else:
50 |             emb4linkp = emb4classi = None
51 |         res = D()
52 |         res.commonEmb, res.emb4classi, res.emb4classi_full, res.emb4linkp = commonEmb, emb4classi, emb4classi_full, emb4linkp
53 | 
54 |         return res
55 | 
56 |     def get_emb4linkp(self, x, edge_index, mask=None):
57 |         # return ALL nodes
58 |         _, _, emb4linkp = self.get_3_embs(x, edge_index, want_heads=True)
59 |         return emb4linkp
60 | 
61 |     def graph2commonEmb(self, x, edge_index, train_mask):
62 |         commonEmb = self.forward(x, edge_index)
63 |         commonEmb_train = commonEmb[train_mask]
64 |         return commonEmb_train, commonEmb
65 | 
66 | class GNN_norm(nn.Module):
67 |     def __init__(self, args):
68 |         super(GNN_norm, self).__init__()
69 |         self.model = TricksComb(args)
70 |         
71 |     def forward(self, x, edge_index):
72 |         return self.model.forward(x, edge_index)
73 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/COLDBREW/Label_propagation_model/norm_spec.jl:
--------------------------------------------------------------------------------
 1 | using LinearAlgebra
 2 | using LinearMaps
 3 | using MAT
 4 | using SparseArrays
 5 | using Arpack
 6 | 
 7 | using PyCall, SparseArrays
 8 | 
 9 | function scipyCSC_to_julia(A)
10 |     m, n = A.shape
11 |     colPtr = Int[i+1 for i in PyArray(A."indptr")]
12 |     rowVal = Int[i+1 for i in PyArray(A."indices")]
13 |     nzVal = Vector{Float64}(PyArray(A."data"))
14 |     B = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal)
15 |     return PyCall.pyjlwrap_new(B)
16 | end
17 | 
18 | function read_arxiv(file::String)
19 |     I = Int64[]
20 |     J = Int64[]
21 |     open(file) do f
22 |         for line in eachline(f)
23 |             if line[1] == '#'; continue; end
24 |             data = split(line, ",")
25 |             push!(I, parse(Int64, data[1]))
26 |             push!(J, parse(Int64, data[2]))
27 |         end
28 |     end
29 |     I .+= 1
30 |     J .+= 1    
31 |     n = max(maximum(I), maximum(J))
32 |     A = sparse(I, J, 1, n, n)
33 |     A = max.(A, A')
34 |     A = min.(A, 1)
35 |     return A
36 | end
37 | 
38 | 
39 | function main(PyA, k::Int64)
40 |     m, n = PyA.shape
41 |     colPtr = Int[i+1 for i in PyArray(PyA."indptr")]
42 |     rowVal = Int[i+1 for i in PyArray(PyA."indices")]
43 |     nzVal = Vector{Float64}(PyArray(PyA."data"))
44 |     A = SparseMatrixCSC{Float64,Int}(m, n, colPtr, rowVal, nzVal)
45 |     d = vec(sum(A, dims=2))
46 |     τ = sum(d) / length(d)
47 |     N = size(A)[1]
48 | 
49 |     # normalized regularized laplacian
50 |     D = Diagonal(1.0 ./ sqrt.(d .+ τ))
51 |     Aop = LinearMap{Float64}(X -> A * X .+ (τ / N) * sum(X), N, N, isposdef=true, issymmetric=true)
52 |     NRL = I + D * Aop * D
53 | 
54 |     (Λ, V) = eigs(NRL, nev=k, tol=1e-6, ncv=2*k+1, which=:LM)
55 | 
56 |     # axis rotation (not necessary, but could be helpful)
57 |     piv = qr(V', Val(true)).jpvt[1:k]
58 |     piv_svd = svd(V[piv,:]', full=false)
59 |     SCDM_V = V * (piv_svd.U * piv_svd.Vt)
60 | 
61 |     # save
62 | 
63 |     return SCDM_V
64 | end
65 | 
66 | #A = read_arxiv(ARGS[1])
67 | #embed = main(A, 128)
68 | #matwrite("$(ARGS[2])_spectral_embedding.mat", Dict("V" => embed), compress=true)
69 | 
70 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/COLDBREW/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/COLDBREW/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DEMONet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DEMONet/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DPGNN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DPGNN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DPGNN/learn.py:
--------------------------------------------------------------------------------
 1 | from IGL_Bench.algorithm.DPGNN.utils import *
 2 | import torch
 3 | import copy
 4 | 
 5 | def train(encoder, dist_encoder, prototype, data, optimizer, criterion, args):
 6 |     encoder.train()
 7 | 
 8 |     support, query = episodic_generator(
 9 |         data, args.episodic_samp, args.classes, data.x.size(0))
10 | 
11 |     embedding = encoder(data)
12 | 
13 |     support_embed = [embedding[support[i]] for i in range(len(args.classes))]
14 | 
15 |     query_embed = [embedding[query[i]] for i in range(len(args.classes))]
16 |     query_size = [query_embed[i].size() for i in range(len(query_embed))]
17 | 
18 |     query_embed = torch.stack(query_embed, dim=0)
19 | 
20 |     proto_embed = [prototype(support_embed[i])
21 |                    for i in range(len(args.classes))]
22 | 
23 |     proto_embed = torch.stack(proto_embed, dim=0)  # C*D
24 | 
25 |     query_dist_embed = dist_encoder(query_embed, proto_embed, args.classes)
26 |     proto_dist_embed = dist_encoder(proto_embed, proto_embed, args.classes)
27 | 
28 |     logits = torch.log_softmax(
29 |         torch.mm(query_dist_embed, proto_dist_embed), dim=1)
30 | 
31 |     loss1 = criterion(logits, args.classes)
32 | 
33 |     # topo
34 |     if(args.ssl == 'yes'):
35 |         dist_embed = dist_encoder(embedding, proto_embed, args.classes)
36 |         loss3 = torch.mean((dist_embed[data.edge_index[0]] * args.deg_inv_sqrt[data.edge_index[0]].view(-1, 1) -
37 |                             dist_embed[data.edge_index[1]] * args.deg_inv_sqrt[data.edge_index[1]].view(-1, 1))**2)
38 | 
39 |         class_sim = cos_sim_pair(proto_embed)
40 |         loss2 = (torch.sum(class_sim) - torch.trace(class_sim)) / \
41 |             ((class_sim.size(0)**2 - class_sim.size(0)) / 2)
42 |     else:
43 |         loss3 = 0
44 |         loss2 = 0
45 | 
46 |     loss = loss1 + args.lamb1 * loss2 + args.lamb2 * loss3
47 | 
48 |     optimizer.zero_grad()
49 |     loss.backward()
50 |     optimizer.step()
51 | 
52 | 
53 | def test(encoder, dist_encoder, prototype, data, args):
54 |     encoder.eval()
55 | 
56 |     with torch.no_grad():
57 |         embedding = encoder(data)
58 | 
59 |         support, query = episodic_generator(
60 |             data, 1, args.classes, data.x.size(0))  # take all samples in that class
61 |         support_embed = [embedding[support[i]]
62 |                          for i in range(len(args.classes))]
63 | 
64 |         proto_embed = [prototype(support_embed[i])
65 |                        for i in range(len(args.classes))]
66 | 
67 |         proto_embed = torch.stack(proto_embed, dim=0)  # C*D
68 | 
69 |         f1, f1w, acc = [], [], []
70 |         for _, mask in data('train_mask', 'val_mask', 'test_mask'):
71 |             y = data.y[mask]
72 | 
73 |             query_embed = embedding[mask]  # N*D
74 |             # query_dist = torch.cdist(query_embed, proto_embed, p = 2) #N*D, C*D --> N*C
75 |             query_dist_embed = dist_encoder(
76 |                 query_embed, proto_embed, args.classes)
77 |             proto_dist_embed = dist_encoder(
78 |                 proto_embed, proto_embed, args.classes)
79 |             # logits = torch.softmax(-query_dist, dim = 1) #N*C
80 |             logits = torch.log_softmax(
81 |                 torch.mm(query_dist_embed, proto_dist_embed), dim=1)
82 | 
83 |             pred = logits.max(dim=1)[1]
84 | 
85 |             acc.append(pred.eq(y).sum().item() / mask.sum().item())
86 |             f1.append(f1_score(y.tolist(), pred.tolist(), labels=np.arange(
87 |                 0, len(args.classes)), average=None, zero_division=0))
88 |             f1w.append(f1_score(y.tolist(), pred.tolist(), labels=np.arange(
89 |                 0, len(args.classes)), average='weighted', zero_division=0))
90 | 
91 |     return f1, f1w, acc
92 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DPGNN/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch_geometric.nn import GCNConv, MessagePassing
 4 | from torch.nn import Linear
 5 | from torch_geometric.utils import add_remaining_self_loops
 6 | from torch_scatter import scatter_add
 7 | import copy
 8 | 
 9 | class GCN(torch.nn.Module):
10 |     def __init__(self, args):
11 |         super(GCN, self).__init__()
12 |         self.conv1 = GCNConv(args.num_features, args.n_hidden)
13 |         self.conv2 = GCNConv(args.n_hidden, args.n_hidden)
14 | 
15 |     def forward(self, data):
16 |         x, edge_index = data.x, data.edge_index
17 |         x = F.relu(self.conv1(x, edge_index))
18 |         x = F.dropout(x, training=self.training)
19 |         x = self.conv2(x, edge_index)
20 |         return x
21 | 
22 | 
23 | class prototype(torch.nn.Module):
24 |     def __init__(self):
25 |         super(prototype, self).__init__()
26 |         
27 | 
28 |     def forward(self, x):
29 |         return torch.mean(x, dim = 0)
30 | 
31 | 
32 | class dist_embed(torch.nn.Module):
33 |     def __init__(self, args):
34 |         super(dist_embed, self).__init__()
35 |         self.lin = Linear(args.n_hidden*args.num_classes, args.num_classes)
36 | 
37 |     def forward(self, query, proto, classes):
38 |         d1 = query.size(0)
39 |         d2 = proto.size(0)
40 | 
41 |         query = torch.repeat_interleave(query, d2, dim = 0)
42 |         proto = torch.tile(proto, (d1, 1))
43 | 
44 |         dist = self.lin((query - proto).view(d1, -1))
45 | 
46 |         return dist
47 | 
48 | 
49 | 
50 | def gcn_norm(edge_index, edge_weight=None, num_nodes=None, improved=False,
51 |              add_self_loops=True, dtype=None):
52 | 
53 |     fill_value = 2. if improved else 1.
54 |     num_nodes = int(edge_index.max()) + 1 if num_nodes is None else num_nodes
55 |     if edge_weight is None:
56 |         edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
57 |                                  device=edge_index.device)
58 | 
59 |     if add_self_loops:
60 |         edge_index, tmp_edge_weight = add_remaining_self_loops(
61 |             edge_index, edge_weight, fill_value, num_nodes)
62 |         assert tmp_edge_weight is not None
63 |         edge_weight = tmp_edge_weight
64 | 
65 |     row, col = edge_index[0], edge_index[1]
66 |     deg = scatter_add(edge_weight, col, dim=0, dim_size=num_nodes)
67 |     deg_inv_sqrt = deg.pow_(-0.5)
68 |     deg_inv_sqrt.masked_fill_(deg_inv_sqrt == float('inf'), 0)
69 |     return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]
70 | 
71 | 
72 | class Prop(MessagePassing):
73 |     def __init__(self, num_classes, K=10, bias=True, **kwargs):
74 |         super(Prop, self).__init__(aggr='add', **kwargs)
75 |         self.K = K
76 | 
77 |     def forward(self, x, edge_index, edge_weight=None):
78 |         edge_index, norm = gcn_norm(edge_index, edge_weight, x.size(0), dtype=x.dtype)
79 | 
80 | 
81 |         preds = []
82 |         preds.append(x)
83 |         for k in range(self.K):
84 |             x = self.propagate(edge_index, x=x, norm=norm)
85 |             preds.append(x)
86 | 
87 |         pps = torch.stack(preds)
88 |         out = torch.sum(pps, dim = 0)
89 |         return out
90 | 
91 |     def message(self, x_j, norm):
92 |         return norm.view(-1, 1) * x_j
93 | 
94 |     def __repr__(self):
95 |         return '{}(K={})'.format(self.__class__.__name__, self.K)
96 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DPGNN/solver.py:
--------------------------------------------------------------------------------
  1 | from IGL_Bench.algorithm.DPGNN.utils import *
  2 | from IGL_Bench.algorithm.DPGNN.model import *
  3 | from IGL_Bench.algorithm.DPGNN.learn import *
  4 | 
  5 | import torch
  6 | import numpy as np
  7 | import copy
  8 | from sklearn.metrics import balanced_accuracy_score, f1_score, roc_auc_score
  9 | 
 10 | class DPGNN_node_solver:
 11 |     def __init__(self, config, dataset, device: str = 'cuda'):
 12 |         self.config = config
 13 |         self.device = torch.device(device if torch.cuda.is_available() else 'cpu')
 14 |         self.data = dataset.to(self.device)
 15 |         self.num_classes: int = int(self.data.y.max().item() + 1)
 16 |         self.num_features: int = self.data.x.size(1)
 17 |         self.config.num_classes = self.num_classes
 18 |         self.config.num_features = self.num_features
 19 |         self.classes = torch.arange(self.num_classes, device=self.device)
 20 |         self.config.classes = self.classes
 21 |         train_counts = torch.bincount(self.data.y[self.data.train_mask].cpu(),
 22 |                                       minlength=self.num_classes)
 23 |         self.config.c_train_num = train_counts
 24 |         if getattr(self.config, 'ssl', 'no') == 'yes':
 25 |             self.config.deg_inv_sqrt = deg(self.data.edge_index, self.data.x).to(self.device)
 26 |         if getattr(self.config, 'backbone', 'GCN') == 'GCN':
 27 |             self.encoder = GCN(self.config).to(self.device)
 28 |         else:
 29 |             raise ValueError(f"Unsupported encoder: {self.config.encoder}")
 30 |         self.prototype_net = prototype().to(self.device)
 31 |         self.dist_encoder = dist_embed(self.config).to(self.device)
 32 |         self._build_optimizer()
 33 |         self.criterion = torch.nn.NLLLoss()
 34 |         self.data.y_aug = self.data.y.clone()
 35 | 
 36 |     def _build_optimizer(self):
 37 |         param_groups = [
 38 |             {'params': self.encoder.conv1.parameters(), 'lr': 1e-2, 'weight_decay': 5e-4},
 39 |             {'params': self.encoder.conv2.parameters(), 'lr': 1e-2, 'weight_decay': 0.0},
 40 |             {'params': self.dist_encoder.lin.parameters(), 'lr': 1e-2, 'weight_decay': 0.0},
 41 |         ]
 42 |         self.optimizer = torch.optim.Adam(param_groups)
 43 | 
 44 |     def reset_parameters(self):
 45 |         self.encoder.conv1.reset_parameters()
 46 |         self.encoder.conv2.reset_parameters()
 47 |         self.dist_encoder.lin.reset_parameters()
 48 | 
 49 |     def _label_prop_augment(self):
 50 |         if getattr(self.config, 'label_prop', 'no') != 'yes':
 51 |             return
 52 |         y_prop = label_prop(self.data.edge_index,
 53 |                             self.data.train_mask,
 54 |                             self.config.c_train_num,
 55 |                             self.data.y,
 56 |                             epochs=20)
 57 |         y_aug, new_train_mask = sample(self.data.train_mask,
 58 |                                        self.config.c_train_num,
 59 |                                        y_prop,
 60 |                                        self.data.y,
 61 |                                        eta=self.config.eta)
 62 |         self.data.y_aug = y_aug.to(self.device)
 63 |         self.data.train_mask = new_train_mask.to(self.device)
 64 | 
 65 |     def train(self):
 66 |         self.reset_parameters()
 67 |         self._label_prop_augment()
 68 |         best_val_f1 = -1.0
 69 |         early_stopping = getattr(self.config, 'early_stopping', 10)
 70 |         history = []
 71 |         for epoch in range(getattr(self.config, 'epochs', 500)):
 72 |             train(self.encoder, self.dist_encoder, self.prototype_net,
 73 |                   self.data, self.optimizer, self.criterion, self.config)
 74 |             f1_all, _, _ = test(self.encoder, self.dist_encoder, self.prototype_net,
 75 |                                  self.data, self.config)
 76 |             val_f1_mean = np.mean(f1_all[1])
 77 |             print('Epoch: {:03d}, val_f1_mean: {:.4f}'.format(epoch, val_f1_mean))
 78 |             history.append(val_f1_mean)
 79 |             if val_f1_mean > best_val_f1:
 80 |                 best_val_f1 = val_f1_mean
 81 |                 self._best_state = {
 82 |                     'encoder': copy.deepcopy(self.encoder.state_dict()),
 83 |                     'dist': copy.deepcopy(self.dist_encoder.state_dict())
 84 |                 }
 85 |             if early_stopping > 0 and epoch > self.config.epochs // 10:
 86 |                 if len(history) > early_stopping:
 87 |                     recent = np.array(history[-early_stopping:])
 88 |                     if val_f1_mean < recent.mean():
 89 |                         break
 90 |         if hasattr(self, '_best_state'):
 91 |             self.encoder.load_state_dict(self._best_state['encoder'])
 92 |             self.dist_encoder.load_state_dict(self._best_state['dist'])
 93 | 
 94 |     def test(self):
 95 |         self.encoder.eval()
 96 |         with torch.no_grad():
 97 |             embedding = self.encoder(self.data)
 98 |             proto_list = []
 99 |             for c in self.classes:
100 |                 idx = (self.data.y_aug == c) & self.data.train_mask
101 |                 proto_list.append(self.prototype_net(embedding[idx]))
102 |             proto = torch.stack(proto_list, dim=0)
103 |             query_emb = embedding[self.data.test_mask]
104 |             query_dist = self.dist_encoder(query_emb, proto, self.classes)
105 |             proto_dist = self.dist_encoder(proto, proto, self.classes)
106 |             logits = torch.log_softmax(torch.mm(query_dist, proto_dist), dim=1)
107 |             probs = torch.exp(logits).cpu().numpy()
108 |             preds = logits.max(dim=1)[1].cpu()
109 |             labels = self.data.y[self.data.test_mask].cpu()
110 |             acc = (preds == labels).sum().item() / labels.size(0)
111 |             bacc = balanced_accuracy_score(labels, preds)
112 |             mf1 = f1_score(labels, preds, average='macro', zero_division=0)
113 |             try:
114 |                 roc = roc_auc_score(labels, probs, multi_class='ovr')
115 |             except Exception:
116 |                 roc = float('nan')
117 |         return acc, bacc, mf1, roc
118 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DRGCN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/load_data.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import scipy.sparse as sp
  4 | import torch
  5 | 
  6 | def _torch_sparse_to_scipy(tsp, shape=None):
  7 |     tsp = tsp.coalesce()
  8 |     idx = tsp.indices().cpu().numpy()
  9 |     val = tsp.values().cpu().numpy()
 10 |     if shape is None:
 11 |         shape = tsp.shape
 12 |     return sp.coo_matrix((val, (idx[0], idx[1])), shape=shape).tocsr()
 13 | 
 14 | def _dense_to_scipy(mat_like, shape=None):
 15 |     """torch / numpy 稠密矩阵 → CSR"""
 16 |     if isinstance(mat_like, torch.Tensor):
 17 |         mat_like = mat_like.cpu().numpy()
 18 |     if shape is None:
 19 |         shape = mat_like.shape
 20 |     return sp.coo_matrix(mat_like.reshape(shape)).tocsr()
 21 | 
 22 | def _edge_index_to_scipy(edge_index, num_nodes, edge_weight=None):
 23 |     row, col = edge_index.cpu().numpy()
 24 |     if edge_weight is None:
 25 |         edge_weight = np.ones(row.shape[0], dtype=np.float32)
 26 |     else:
 27 |         edge_weight = edge_weight.cpu().numpy()
 28 |     return sp.coo_matrix((edge_weight, (row, col)),
 29 |                          shape=(num_nodes, num_nodes)).tocsr()
 30 | 
 31 | def _any_adj_to_scipy(adj_like, num_nodes):
 32 |     """
 33 |     将任意 PyG 里可能出现的邻接存储格式统一转 CSR
 34 |     """
 35 |     # 1) 直接是 scipy
 36 |     if isinstance(adj_like, sp.spmatrix):
 37 |         return adj_like.tocsr()
 38 | 
 39 |     # 2) torch 稀疏张量
 40 |     if isinstance(adj_like, torch.Tensor):
 41 |         if adj_like.is_sparse:
 42 |             return _torch_sparse_to_scipy(adj_like, (num_nodes, num_nodes))
 43 |         else:                       # 稠密 torch.Tensor
 44 |             return _dense_to_scipy(adj_like, (num_nodes, num_nodes))
 45 | 
 46 |     # 3) torch_sparse.SparseTensor
 47 |     try:
 48 |         from torch_sparse import SparseTensor
 49 |         if isinstance(adj_like, SparseTensor):
 50 |             row, col, val = adj_like.coo()
 51 |             return sp.coo_matrix(
 52 |                 (val.cpu().numpy(),
 53 |                  (row.cpu().numpy(), col.cpu().numpy())),
 54 |                 shape=(num_nodes, num_nodes)).tocsr()
 55 |     except ImportError:
 56 |         pass
 57 | 
 58 |     # 4) numpy ndarray / list 等稠密
 59 |     if isinstance(adj_like, (np.ndarray, list)):
 60 |         return _dense_to_scipy(np.asarray(adj_like, dtype=np.float32),
 61 |                                (num_nodes, num_nodes))
 62 | 
 63 |     raise TypeError(f"Unsupported adjacency type: {type(adj_like)}")
 64 | 
 65 | # ------------------------------------------------------------------ #
 66 | def data_process(dataset):
 67 | 
 68 |     # ---------- 基础数据 ----------
 69 |     x = dataset.x.cpu().numpy().astype(np.float32)
 70 |     label = dataset.y.squeeze().cpu().numpy().astype(np.int64)
 71 |     num_nodes = x.shape[0]
 72 | 
 73 |     # ---------- 邻接矩阵 ----------
 74 |     if hasattr(dataset, 'adj') and dataset.adj is not None:
 75 |         adj = _any_adj_to_scipy(dataset.adj, num_nodes)
 76 |     elif hasattr(dataset, 'edge_index'):
 77 |         edge_weight = getattr(dataset, 'edge_weight', None)
 78 |         adj = _edge_index_to_scipy(dataset.edge_index, num_nodes, edge_weight)
 79 |     else:
 80 |         raise ValueError("Dataset 必须包含 adj / edge_index")
 81 | 
 82 |     # ---------- 归一化邻接 ----------
 83 |     if hasattr(dataset, 'adj_norm') and dataset.adj_norm is not None:
 84 |         adj_norm = _any_adj_to_scipy(dataset.adj_norm, num_nodes)
 85 |     else:
 86 |         deg = np.array(adj.sum(1)).flatten()
 87 |         deg_inv_sqrt = np.power(deg, -0.5, where=deg > 0)
 88 |         D_inv_sqrt = sp.diags(deg_inv_sqrt)
 89 |         adj_norm = D_inv_sqrt @ adj @ D_inv_sqrt
 90 | 
 91 |     # ---------- 划分索引 ----------
 92 |     train_indexes = np.asarray(dataset.train_index, dtype=np.int64)
 93 |     validation_indexes = np.asarray(dataset.val_index, dtype=np.int64)
 94 |     test_indexes = np.asarray(dataset.test_index, dtype=np.int64)
 95 | 
 96 |     # ---------- GAN 采样 ----------
 97 |     label_counts = {}
 98 |     for idx in train_indexes:
 99 |         lab = int(label[idx])
100 |         label_counts.setdefault(lab, []).append(idx)
101 |     balance_num = max(len(v) for v in label_counts.values())
102 | 
103 |     real_gan_nodes, generated_gan_nodes, real_node_sequence = [], [], []
104 |     for lab, nodes in label_counts.items():
105 |         for n in nodes:                       # 全量真实
106 |             real_gan_nodes.append([n, lab])
107 |             real_node_sequence.append(n)
108 |         for _ in range(balance_num - len(nodes)):  # 随机补足
109 |             s = random.choice(nodes)
110 |             real_gan_nodes.append([s, lab])
111 |             real_node_sequence.append(s)
112 |             generated_gan_nodes.append([s, lab])
113 | 
114 |     perm = np.random.permutation(len(real_gan_nodes))
115 |     real_gan_nodes = [real_gan_nodes[i] for i in perm]
116 |     real_node_sequence = [real_node_sequence[i] for i in perm]
117 | 
118 |     # ---------- 二部图 ----------
119 |     adj_coo = adj.tocoo()
120 |     neighbor_dict = {}
121 |     for r, c in zip(adj_coo.row, adj_coo.col):
122 |         neighbor_dict.setdefault(r, []).append(c)
123 | 
124 |     all_neighbor_nodes = sorted(
125 |         {nbr for v in real_node_sequence for nbr in neighbor_dict.get(v, [])})
126 |     real_num = len(real_node_sequence)
127 |     neigh_num = len(all_neighbor_nodes)
128 | 
129 |     adj_neighbor = np.zeros((real_num, neigh_num), dtype=np.float32)
130 |     col_map = {n: j for j, n in enumerate(all_neighbor_nodes)}
131 |     for i, v in enumerate(real_node_sequence):
132 |         for nbr in neighbor_dict.get(v, []):
133 |             j = col_map.get(nbr)
134 |             if j is not None:
135 |                 adj_neighbor[i, j] = 1.0
136 | 
137 |     # ---------- 返回 ----------
138 |     return (x, adj, adj_norm, label,
139 |             train_indexes, test_indexes, validation_indexes,
140 |             real_gan_nodes, generated_gan_nodes,
141 |             adj_neighbor, all_neighbor_nodes)
142 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DRGCN/models/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/models/adversarialNets.py:
--------------------------------------------------------------------------------
 1 | import tensorflow.compat.v1 as tf
 2 | import numpy as np
 3 | 
 4 | tf.disable_v2_behavior()
 5 | 
 6 | 
 7 | def glorot(shape, name):
 8 |     init_range = np.sqrt(6.0 / (shape[0] + shape[1]))
 9 |     initializer = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
10 |     return tf.get_variable(name, initializer=initializer)
11 | 
12 | def zeros(shape, name):
13 |     initializer = tf.zeros(shape, dtype=tf.float32)
14 |     return tf.get_variable(name, initializer=initializer)
15 | 
16 | 
17 | class Generator:
18 |     def __init__(self, x_dim, y_dim, z_dim, h_dim):
19 |         self.x_dim = x_dim
20 |         self.y_dim = y_dim
21 |         self.z_dim = z_dim
22 |         self.h_dim = h_dim
23 |         self._build_model()
24 | 
25 |     def _build_model(self):
26 |         with tf.variable_scope("gan/generator"):
27 |             self.G_W1 = glorot([self.z_dim + self.y_dim, self.h_dim], name='G_W1')
28 |             self.G_b1 = zeros([self.h_dim], name='G_b1')
29 |             self.G_W2 = glorot([self.h_dim, self.x_dim], name='G_W2')
30 |             self.G_b2 = zeros([self.x_dim], name='G_b2')
31 | 
32 |     def call(self, z, y):
33 |         inputs = tf.concat([z, y], axis=1)
34 |         h1 = tf.nn.relu(tf.matmul(inputs, self.G_W1) + self.G_b1)
35 |         log_prob = tf.matmul(h1, self.G_W2) + self.G_b2
36 |         prob = tf.nn.softmax(tf.nn.tanh(log_prob))
37 |         return prob
38 | 
39 |     def __call__(self, *args, **kwargs):
40 |         return self.call(*args, **kwargs)
41 | 
42 | 
43 | class Discriminator:
44 |     def __init__(self, x_dim, y_dim, h_dim):
45 |         self.x_dim = x_dim
46 |         self.y_dim = y_dim
47 |         self.h_dim = h_dim
48 |         self._build_model()
49 | 
50 |     def _build_model(self):
51 |         with tf.variable_scope("gan/discriminator"):
52 |             self.D_W1 = glorot([self.x_dim + self.y_dim, self.h_dim], name='D_W1')
53 |             self.D_b1 = zeros([self.h_dim], name='D_b1')
54 |             self.D_W2 = glorot([self.h_dim, 1], name='D_W2')
55 |             self.D_b2 = zeros([1], name='D_b2')
56 | 
57 |     def call(self, x, y):
58 |         inputs = tf.concat([x, y], axis=1)
59 |         h1 = tf.nn.relu(tf.matmul(inputs, self.D_W1) + self.D_b1)
60 |         logit = tf.matmul(h1, self.D_W2) + self.D_b2
61 |         prob = tf.nn.sigmoid(logit)
62 |         return prob, logit
63 | 
64 |     def __call__(self, *args, **kwargs):
65 |         return self.call(*args, **kwargs)


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/models/gmm.py:
--------------------------------------------------------------------------------
 1 | import tensorflow.compat.v1 as tf
 2 | tf.disable_v2_behavior()
 3 | import tensorflow_probability as tfp
 4 | 
 5 | tfd = tfp.distributions
 6 | 
 7 | 
 8 | def _softplus_inverse(x):
 9 |     """Helper which computes the function inverse of `tf.nn.softplus`."""
10 |     return tf.math.log(tf.math.expm1(x))
11 | 
12 | class gaussianMixtureModel:
13 | 
14 |     def __init__(self, mixture_components, latent_size):
15 |         self.mixture_components = mixture_components
16 |         self.latent_size = latent_size
17 | 
18 |     def make_mixture_posterior(self, feats):
19 |         return tfd.MultivariateNormalDiag(
20 |             loc=feats,
21 |             scale_diag=tf.nn.softplus(feats + _softplus_inverse(1.0)),
22 |             name="unlabeled_dist"
23 |         )
24 | 
25 |     def make_mixture_prior(self):
26 |         """Creates the mixture of Gaussians prior distribution.
27 |         Returns:
28 |             A `tfd.Distribution` instance representing the prior over latent encodings.
29 |         """
30 |         if self.mixture_components == 1:
31 |             # Use fixed standard Gaussian
32 |             return tfd.MultivariateNormalDiag(
33 |                 loc=tf.zeros([self.latent_size]),
34 |                 scale_diag=tf.ones([self.latent_size]),  # <- replaced scale_identity_multiplier
35 |                 name="labeled_dist"
36 |             )
37 | 
38 |         # Learnable mixture parameters
39 |         loc = tf.compat.v1.get_variable(
40 |             name="loc", shape=[self.mixture_components, self.latent_size])
41 |         raw_scale_diag = tf.compat.v1.get_variable(
42 |             name="raw_scale_diag", shape=[self.mixture_components, self.latent_size])
43 |         mixture_logits = tf.compat.v1.get_variable(
44 |             name="mixture_logits", shape=[self.mixture_components])
45 | 
46 |         return tfd.MixtureSameFamily(
47 |             mixture_distribution=tfd.Categorical(logits=mixture_logits),
48 |             components_distribution=tfd.MultivariateNormalDiag(
49 |                 loc=loc,
50 |                 scale_diag=tf.nn.softplus(raw_scale_diag)
51 |             ),
52 |             name="labeled_dist"
53 |         )
54 |         
55 | 
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/models/graph.py:
--------------------------------------------------------------------------------
 1 | """Spectral Graph Convolutional filter cell."""
 2 | import numpy as np
 3 | import tensorflow.compat.v1 as tf
 4 | tf.disable_v2_behavior()
 5 | import os
 6 | 
 7 | def _dot(x, y, sparse=False):
 8 |     if sparse:
 9 |         return tf.sparse_tensor_dense_matmul(x, y)
10 |     return tf.matmul(x, y)
11 | 
12 | def sparse_dropout(x, keep_prob, noise_shape):
13 |     """Dropout for sparse tensors."""
14 |     random_tensor = keep_prob
15 |     random_tensor += tf.random_uniform(noise_shape)
16 |     dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
17 |     pre_out = tf.sparse_retain(x, dropout_mask)
18 |     return pre_out * (1./keep_prob)
19 | 
20 | def glorot(shape, name=None):
21 |     """Glorot & Bengio (AISTATS 2010) init."""
22 |     init_range = np.sqrt(6.0/(shape[0]+shape[1]))
23 |     initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)
24 |     return tf.Variable(initial, name=name)
25 | 
26 | def zeros(shape, name=None):
27 |     """All zeros."""
28 |     initial = tf.zeros(shape, dtype=tf.float32)
29 |     return tf.Variable(initial, name=name)
30 | 
31 | class GraphConvLayer:
32 |     def __init__(self, input_dim, output_dim, name, holders, act=tf.nn.relu,
33 |                   dropout=False, bias=True):
34 | #                  name, act=tf.nn.relu, bias=False, dropout=):
35 |         self.input_dim = input_dim
36 |         self.output_dim = output_dim
37 |         self.act=act
38 |         self.bias = bias
39 |         self.dropout = dropout
40 |         self.var={}
41 |         
42 |         with tf.variable_scope(name):
43 |             
44 |             with tf.name_scope('weights'):
45 |                 self.var['w']=glorot([input_dim, output_dim],
46 |                                           name='w')
47 |             if self.bias:
48 |                 self.var['b']=zeros([output_dim],
49 |                                           name='b')
50 |             if self.dropout:
51 |                 self.dropout_prob = holders['dropout_prob']
52 |             else:
53 |                 self.dropout_prob = 0.
54 |             self.num_features_nonzero = holders['num_features_nonzero']
55 |                     
56 |     def call(self, adj_norm, x, sparse=False):  
57 |         
58 |         if sparse:
59 |             x = sparse_dropout(x, 1-self.dropout_prob, self.num_features_nonzero)
60 |         else:
61 |             x = tf.nn.dropout(x, 1-self.dropout_prob)
62 |         hw = _dot(x=x, y=self.var['w'], sparse=sparse)  
63 |         ahw = _dot(x=adj_norm, y=hw, sparse=True)
64 |         
65 |         embed_out = self.act(ahw)
66 |         
67 |         if self.bias:
68 |             embed_out = self.act(tf.add(ahw, self.var['b']))   
69 |         return embed_out  
70 |     
71 |     def __call__(self, *args, **kwargs):
72 |         return self.call(*args, **kwargs)        


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DRGCN/sparse.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.sparse as sp
 3 | 
 4 | 
 5 | def preprocess_features(features):
 6 |     """Row-normalize feature matrix and convert to tuple representation"""
 7 |     rowsum = np.array(features.sum(1))
 8 |     r_inv = np.power(rowsum, -1).flatten()
 9 |     r_inv[np.isinf(r_inv)] = 0.
10 |     r_mat_inv = sp.diags(r_inv)
11 |     features = r_mat_inv.dot(features)
12 |     return sparse_to_tuple(features)
13 | 
14 | def sparse_to_tuple(sparse_mx):
15 |     """Convert sparse matrix to tuple representation."""
16 |     # The zeroth element of the tuple contains the cell location of each
17 |     # non-zero value in the sparse matrix
18 |     # The first element of the tuple contains the value at each cell location
19 |     # in the sparse matrix
20 |     # The second element of the tuple contains the full shape of the sparse
21 |     # matrix
22 |     def to_tuple(mx):
23 |         if not sp.isspmatrix_coo(mx):
24 |             mx = mx.tocoo()
25 |         coords = np.vstack((mx.row, mx.col)).transpose()
26 |         values = mx.data
27 |         shape = mx.shape
28 |         return coords, values, shape
29 | 
30 |     if isinstance(sparse_mx, list):
31 |         for i in range(len(sparse_mx)):
32 |             sparse_mx[i] = to_tuple(sparse_mx[i])
33 |     else:
34 |         sparse_mx = to_tuple(sparse_mx)
35 | 
36 |     return sparse_mx
37 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DataDec/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/DataDec/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DataDec/contrast.py:
--------------------------------------------------------------------------------
 1 | from GCL.models import DualBranchContrast
 2 | from GCL.models.contrast_model import add_extra_mask
 3 | from GCL.losses import Loss
 4 | import torch
 5 | import numpy as np
 6 | 
 7 | class DualBranchContrast_diet(DualBranchContrast):
 8 |     def __init__(self, loss: Loss, mode: str, intraview_negs: bool = False, use_grad_norm: bool = False, ord: int = 1 ):
 9 |         super(DualBranchContrast_diet, self).__init__(loss=loss, mode=mode, intraview_negs=intraview_negs)
10 |         self.use_grad_norm = use_grad_norm
11 |         self.ord = ord
12 | 
13 | 
14 |     def forward(self, h1=None, h2=None, g1=None, g2=None, batch=None, h3=None, h4=None,
15 |                 extra_pos_mask=None, extra_neg_mask=None):
16 |         if self.mode == 'L2L':
17 |             assert h1 is not None and h2 is not None
18 |             anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=h1, sample=h2)
19 |             anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=h2, sample=h1)
20 |         elif self.mode == 'G2G':
21 |             assert g1 is not None and g2 is not None
22 |             anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=g2)
23 |             anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=g1)
24 |         else:   
25 |             if batch is None or batch.max().item() + 1 <= 1:   
26 |                 assert all(v is not None for v in [h1, h2, g1, g2, h3, h4])
27 |                 anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=h2, neg_sample=h4)
28 |                 anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=h1, neg_sample=h3)
29 |             else:   
30 |                 assert all(v is not None for v in [h1, h2, g1, g2, batch])
31 |                 anchor1, sample1, pos_mask1, neg_mask1 = self.sampler(anchor=g1, sample=h2, batch=batch)
32 |                 anchor2, sample2, pos_mask2, neg_mask2 = self.sampler(anchor=g2, sample=h1, batch=batch)
33 | 
34 |         pos_mask1, neg_mask1 = add_extra_mask(pos_mask1, neg_mask1, extra_pos_mask, extra_neg_mask)
35 |         pos_mask2, neg_mask2 = add_extra_mask(pos_mask2, neg_mask2, extra_pos_mask, extra_neg_mask)
36 |         l1 = self.loss(anchor=anchor1, sample=sample1, pos_mask=pos_mask1, neg_mask=neg_mask1, **self.kwargs)
37 |         l2 = self.loss(anchor=anchor2, sample=sample2, pos_mask=pos_mask2, neg_mask=neg_mask2, **self.kwargs)
38 | 
39 |         sample1.retain_grad()
40 |         sample2.retain_grad()
41 |         scores1 = get_lord_error_fn(anchor1, sample1, self.ord)
42 |         scores2 = get_lord_error_fn(anchor2, sample2, self.ord)
43 |         return (l1 + l2) * 0.5, scores1, scores2, sample1, sample2
44 |     
45 | def get_lord_error_fn(logits, Y, ord):
46 |     errors = torch.nn.functional.softmax(logits, dim=1) - Y
47 |     scores = np.linalg.norm(errors.detach().cpu().numpy(), ord=ord, axis=-1)
48 |     return scores 
49 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DataDec/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data import Sampler
 3 | from torch_geometric.data import Dataset
 4 | 
 5 | class MyDataset(Dataset):
 6 |     def __init__(self, full_dataset, sampled_list):
 7 |         self.full_dataset = full_dataset
 8 |         self.sampled_list = sampled_list
 9 | 
10 |     def __len__(self):
11 |         return len(self.sampled_list)
12 | 
13 |     def __getitem__(self, idx):
14 |         actual_idx = self.sampled_list[idx]
15 |         data = self.full_dataset[actual_idx] 
16 |         return data, actual_idx  
17 | 
18 | class IndexSampler(Sampler):
19 |     def __init__(self, sampled_list):
20 |         self.sampled_list = sampled_list
21 | 
22 |     def __iter__(self):
23 |         return iter(self.sampled_list)
24 | 
25 |     def __len__(self):
26 |         return len(self.sampled_list)
27 | 
28 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/DataDec/prune.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import numpy as np
 3 | import torch
 4 | from collections import OrderedDict
 5 | 
 6 | from pdb import set_trace
 7 | 
 8 | 
 9 | class Mask(object):
10 |     def __init__(self, model, no_reset=False):
11 |         super(Mask, self).__init__()
12 |         self.model = model
13 |         if not no_reset:
14 |             self.reset()
15 | 
16 |     @property
17 |     def sparsity(self):
18 |         """Return the percent of weights that have been pruned as a decimal."""
19 |         prunableTensors = []
20 |         for name, module in self.model.named_modules():
21 |             if hasattr(module, "prune_mask"):
22 |                 prunableTensors.append(module.prune_mask.detach())
23 | 
24 |         unpruned = torch.sum(torch.tensor([torch.sum(v) for v in prunableTensors]))
25 |         total = torch.sum(torch.tensor([torch.sum(torch.ones_like(v)) for v in prunableTensors]))
26 |         return 1 - unpruned.float() / total.float()
27 | 
28 |     @property
29 |     def density(self):
30 |         return 1 - self.sparsity
31 | 
32 |     def magnitudePruning(self, magnitudePruneFraction, randomPruneFraction=0):
33 |         weights = []
34 |         for name, module in self.model.named_modules():
35 |             if hasattr(module, "prune_mask"):
36 |                 weights.append(module.weight.clone().cpu().detach().numpy())
37 | 
38 |          
39 |         self.reset()
40 |         prunableTensors = []
41 |         for name, module in self.model.named_modules():
42 |             if hasattr(module, "prune_mask"):
43 |                 prunableTensors.append(module.prune_mask.detach())
44 | 
45 |         number_of_remaining_weights = torch.sum(torch.tensor([torch.sum(v) for v in prunableTensors])).cpu().numpy()
46 |         number_of_weights_to_prune_magnitude = np.ceil(magnitudePruneFraction * number_of_remaining_weights).astype(int)
47 |         number_of_weights_to_prune_random = np.ceil(randomPruneFraction * number_of_remaining_weights).astype(int)
48 |         random_prune_prob = number_of_weights_to_prune_random / (number_of_remaining_weights - number_of_weights_to_prune_magnitude)
49 | 
50 |          
51 |         weight_vector = np.concatenate([v.flatten() for v in weights])
52 |         threshold = np.sort(np.abs(weight_vector))[min(number_of_weights_to_prune_magnitude, len(weight_vector) - 1)]
53 | 
54 |          
55 |         for name, module in self.model.named_modules():
56 |             if hasattr(module, "prune_mask"):
57 |                 module.prune_mask = (torch.abs(module.weight) >= threshold).float()
58 |                  
59 |                 module.prune_mask[torch.rand_like(module.prune_mask) < random_prune_prob] = 0
60 | 
61 |     def reset(self):
62 |         for name, module in self.model.named_modules():
63 |             if hasattr(module, "prune_mask"):
64 |                 module.prune_mask = torch.ones_like(module.weight)
65 | 
66 | 
67 | def save_mask(epoch, model, filename):
68 |     pruneMask = OrderedDict()
69 | 
70 |     for name, module in model.named_modules():
71 |         if hasattr(module, "prune_mask"):
72 |             pruneMask[name] = module.prune_mask.cpu().type(torch.bool)
73 | 
74 |     torch.save({"epoch": epoch, "pruneMask": pruneMask}, filename)
75 | 
76 | 
77 | def load_mask(model, state_dict, device):
78 |      
79 |     for name, module in model.named_modules():
80 |         if hasattr(module, "prune_mask"):
81 |             module.prune_mask.data = state_dict[name].to(device).float()
82 | 
83 |     return model
84 | 
85 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/G2GNN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/G2GNN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/G2GNN/aug.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch_geometric.utils.dropout import dropout_adj
 3 | import torch
 4 | import random
 5 | 
 6 | def remove_edge(edge_index, drop_ratio):
 7 |     edge_index, _ = dropout_adj(edge_index, p = drop_ratio)
 8 | 
 9 |     return edge_index
10 | 
11 | 
12 | def drop_node(x, drop_ratio):
13 |     node_num, _ = x.size()
14 |     drop_num = int(node_num * drop_ratio)
15 | 
16 |     idx_mask = np.random.choice(node_num, drop_num, replace = False).tolist()
17 | 
18 |     x[idx_mask] = 0
19 | 
20 |     return x
21 | 
22 | def upsample(dataset):
23 |     y = torch.tensor([dataset[i].y for i in range(len(dataset))])
24 |     classes = torch.unique(y)
25 | 
26 |     num_class_graph = [(y == i.item()).sum() for i in classes]
27 | 
28 |     max_num_class_graph = max(num_class_graph)
29 | 
30 |     chosen = []
31 |     for i in range(len(classes)):
32 |         train_idx = torch.where((y == classes[i]) == True)[0].tolist()
33 | 
34 |         up_sample_ratio = max_num_class_graph / num_class_graph[i]
35 |         up_sample_num = int(
36 |             num_class_graph[i] * up_sample_ratio - num_class_graph[i])
37 | 
38 |         if(up_sample_num <= len(train_idx)):
39 |             up_sample = random.sample(train_idx, up_sample_num)
40 |         else:
41 |             tmp = int(up_sample_num / len(train_idx))
42 |             up_sample = train_idx * tmp
43 |             tmp = up_sample_num - len(train_idx) * tmp
44 | 
45 |             up_sample.extend(random.sample(train_idx, tmp))
46 | 
47 |         chosen.extend(up_sample)
48 |     
49 |     if not chosen:
50 |         return list(dataset)
51 |     
52 |     chosen = torch.tensor(chosen)
53 |     extend_data = dataset[chosen]
54 | 
55 |     data = list(dataset) + list(extend_data)
56 | 
57 |     return data
58 | 
59 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/G2GNN/dataloader.py:
--------------------------------------------------------------------------------
 1 | from  IGL_Bench.algorithm.G2GNN.aug import *
 2 | from torch.utils.data import Dataset as BaseDataset
 3 | from torch_geometric.data.collate import collate
 4 | import torch
 5 | from torch_geometric.utils import subgraph, degree, add_remaining_self_loops
 6 | from torch_sparse import SparseTensor
 7 | 
 8 | class Dataset_knn_aug(BaseDataset):
 9 |     def __init__(self, dataset, all_dataset, args):
10 |         self.args = args
11 |         self.dataset = dataset
12 |         self.all_dataset = all_dataset
13 | 
14 |     def _get_feed_dict(self, index):
15 |         feed_dict = self.dataset[index]
16 | 
17 |         return feed_dict
18 | 
19 |     def __len__(self):
20 |         return len(self.dataset)
21 | 
22 |     def __getitem__(self, index):
23 |         return self._get_feed_dict(index)
24 | 
25 |     def collate_batch(self, feed_dicts):
26 |         batch_id = torch.tensor([feed_dict.id for feed_dict in feed_dicts])
27 |         # prevent testing data leakage
28 |         train_idx = torch.arange(batch_id.shape[0])
29 | 
30 |         # add_knn_dataset to feed_dicts
31 |         pad_knn_id = find_knn_id(batch_id, self.args.kernel_idx)
32 |         feed_dicts.extend([self.all_dataset[i] for i in pad_knn_id])
33 | 
34 |         data, slices, _ = collate(
35 |             feed_dicts[0].__class__,
36 |             data_list=feed_dicts,
37 |             increment=True,
38 |             add_batch=True,
39 |         )
40 | 
41 |         knn_edge_index, _ = subgraph(
42 |             data.id, self.args.knn_edge_index, relabel_nodes=True)
43 | 
44 |         knn_edge_index, _ = add_remaining_self_loops(knn_edge_index)
45 |         row, col = knn_edge_index
46 |         knn_deg = degree(col, data.id.shape[0])
47 |         deg_inv_sqrt = knn_deg.pow(-0.5)
48 |         edge_weight = deg_inv_sqrt[col] * deg_inv_sqrt[col]
49 | 
50 |         knn_adj_t = torch.sparse.FloatTensor(
51 |             knn_edge_index, edge_weight, (data.id.size(0), data.id.size(0)))
52 | 
53 | 
54 |         aug_xs, aug_adj_ts = [], []
55 |         for i in range(self.args.aug_num):
56 |             edge_index = torch.stack(data.adj_t.coo()[:2])
57 |             edge_index_aug = remove_edge(edge_index, self.args.drop_edge_ratio)
58 |             aug_adj_ts.append(SparseTensor(
59 |                 row=edge_index_aug[0], col=edge_index_aug[1], value=None, sparse_sizes=(data.x.size(0), data.x.size(0))))
60 | 
61 |             aug_xs.append(drop_node(data.x, self.args.mask_node_ratio))
62 | 
63 |         batch = {'data': data,
64 |                  'train_idx': train_idx,
65 |                  'aug_adj_ts': aug_adj_ts,
66 |                  'aug_xs': aug_xs,
67 |                  'knn_adj_t': knn_adj_t}
68 | 
69 |         return batch
70 |     
71 | def find_knn_id(batch_id, kernel_idx):
72 |     knn_id = set(kernel_idx[batch_id].view(-1).tolist())
73 |     pad_knn_id = knn_id.difference(set(batch_id.tolist()))
74 | 
75 |     return list(pad_knn_id)


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/G2GNN/kernel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | 
 4 | from grakel.kernels import ShortestPath
 5 | from grakel import Graph
 6 | 
 7 | def construct_knn(kernel_idx):
 8 |     edge_index = [[], []]
 9 | 
10 |     for i in range(len(kernel_idx)):
11 |         for j in range(len(kernel_idx[i])):
12 |             edge_index[0].append(kernel_idx[i, j].item())
13 |             edge_index[1].append(i)
14 | 
15 |             edge_index[1].append(kernel_idx[i, j].item())
16 |             edge_index[0].append(i)
17 | 
18 |     return torch.tensor(edge_index, dtype=torch.long)
19 | 
20 | def pyg_to_grakel(pyg_graph):
21 |     edge_index = pyg_graph.edge_index.numpy()
22 |     edges = list(zip(edge_index[0], edge_index[1]))
23 |     node_labels = {i: str(label) for i, label in enumerate(pyg_graph.x.numpy())}
24 |     return Graph(edges, node_labels=node_labels)
25 | 
26 | def get_kernel_knn(dataname, kernel_type, knn_nei_num, dataset):
27 |     current_dir = os.path.dirname(os.path.abspath(__file__))
28 |     kernel_file = os.path.join(current_dir, '../../../G2GNN_kernel', 
29 |                                f'{dataname}_{kernel_type}_{knn_nei_num}.txt')
30 | 
31 |     if(os.path.exists(kernel_file)):
32 |         kernel_simi = torch.load(kernel_file)
33 |     else:
34 |         #dataset = fetch_dataset(dataname, verbose=False)    
35 |         G = [pyg_to_grakel(graph) for graph in dataset]
36 |         if(dataname in ['IMDB-BINARY', 'REDDIT-BINARY']):
37 |             gk = ShortestPath(normalize=True, with_labels=False)
38 |         else:
39 |             gk = ShortestPath(normalize=True)
40 |         kernel_simi = torch.tensor(gk.fit_transform(G))
41 |         torch.save(kernel_simi, kernel_file)
42 | 
43 |     kernel_idx = torch.topk(kernel_simi, k=knn_nei_num,
44 |                             dim=1, largest=True)[1][:, 1:]
45 | 
46 |     knn_edge_index = construct_knn(kernel_idx)
47 | 
48 |     return kernel_idx, knn_edge_index


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/GCN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GCN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/GIN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GIN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/GRAPHPATCHER/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/GRAPHPATCHER/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/GRAPHPATCHER/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import dgl
 3 | import torch.nn.functional as F
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | def inject_nodes(batched_masked_graphs, generated_neighbors, masked_offset, device, mask=None):
 8 |     assert len(masked_offset) == len(generated_neighbors)
 9 |     batched_masked_graphs_ = dgl.add_nodes(batched_masked_graphs, len(masked_offset), {'feat':generated_neighbors})
10 |     temp = torch.arange(batched_masked_graphs_.number_of_nodes() - len(masked_offset), batched_masked_graphs_.number_of_nodes()).to(device)
11 |     masked_offset = masked_offset.to(device)
12 |     # src = torch.cat([temp, masked_offset])
13 |     # dst = torch.cat([masked_offset, temp])
14 |     src = temp[mask] if mask != None else temp
15 |     dst = masked_offset[mask] if mask != None else masked_offset
16 |     batched_masked_graphs_.add_edges(src, dst)
17 |     return batched_masked_graphs_
18 | 
19 | 
20 | def kl_div(x, y):
21 |     x = F.log_softmax(x, dim=1)
22 |     y = F.softmax(y, dim=1)
23 |     return F.kl_div(x, y, reduction='batchmean')
24 | 
25 | 
26 | def construct_placeholder(num_nodes, fea_size, num_classes):
27 |     with tf.name_scope('input'):
28 |         placeholders = {
29 |             'labels': tf.compat.v1.placeholder(tf.float32, shape=(None, num_classes), name='labels'),
30 |             'features': tf.compat.v1.placeholder(tf.float32, shape=(num_nodes, fea_size), name='features'),
31 |             'dropout': tf.compat.v1.placeholder_with_default(0., shape=(), name='dropout'),
32 |             'masks': tf.compat.v1.placeholder(dtype=tf.int32, shape=(num_nodes,), name='masks'),
33 |         }
34 |         return placeholders
35 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/HyperIMBA/Poincare.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import tqdm
  3 | import numpy as np
  4 | from multiprocessing import Lock, Manager, Pool
  5 | from numpy.linalg import norm
  6 | 
  7 | def norm(x, axis=None):
  8 |     return np.linalg.norm(x, axis=axis)
  9 | 
 10 | def poincare_dist(u, v, eps=1e-5):
 11 |     d = 1 + 2 * norm(u-v)**2 / ((1 - norm(u)**2) * (1 - norm(v)**2) + eps)
 12 |     return np.arccosh(d)
 13 | 
 14 | class PoincareModel():
 15 |     
 16 |     def __init__(self, relations, node_weights, node_labels, n_components=2, eta=0.01, n_negative=10,
 17 |                  eps=1e-5, burn_in=10, burn_in_eta=0.01, init_lower=-0.001,
 18 |                  init_upper=0.001, dtype=np.float64, seed=0, name="", device='cuda', batch_size=None):
 19 |         self.relations = relations
 20 |         self.n_components = n_components
 21 |         self.eta = eta  # Learning rate for training
 22 |         self.burn_in_eta = burn_in_eta  # Learning rate for burn-in
 23 |         self.n_negative = n_negative
 24 |         self.eps = eps
 25 |         self.burn_in = burn_in
 26 |         self.dtype = dtype
 27 |         self.init_lower = init_lower
 28 |         self.init_upper = init_upper
 29 |         self.node_weights = node_weights
 30 |         self.node_labels = node_labels
 31 |         self.network = nx.Graph()
 32 |         self.name = name
 33 |         self.device = device
 34 |         self.batch_size = batch_size
 35 |         self.manager = Manager()
 36 |         self.lock = self.manager.Lock()
 37 |        
 38 |     def init_embeddings(self):
 39 |         unique_nodes = np.unique([item for sublist in self.relations for item in sublist])
 40 |         theta_init = np.random.uniform(self.init_lower, self.init_upper, 
 41 |                                        size=(len(unique_nodes), self.n_components))
 42 |         embedding_dict = dict(zip(unique_nodes, theta_init))
 43 |         self.nodes = unique_nodes
 44 |         self.embeddings = theta_init
 45 |         self.emb_dict = embedding_dict
 46 | 
 47 |         
 48 |     def negative_sample(self, u):
 49 |         positives = [x[1] for x in self.relations if x[0] == u]
 50 |         negatives = np.array([x for x in self.nodes if x not in positives])
 51 |         random_ix = np.random.permutation(len(negatives))[:self.n_negative]
 52 |         neg_samples = [[u, x] for x in negatives[random_ix]]
 53 |         neg_samples.append([u,u])
 54 |         return neg_samples
 55 |     
 56 |     def partial_d(self, theta, x):
 57 |         alpha = 1 - norm(theta)**2
 58 |         beta = 1 - norm(x)**2
 59 |         gamma = 1 + 2/(alpha*beta + self.eps) * norm(theta-x)**2
 60 |         lhs = 4 / (beta*np.sqrt(gamma**2 - 1) + self.eps)
 61 |         rhs = 1/(alpha**2 + self.eps) * (norm(x)**2 - 2*np.inner(theta,x) + 1) * theta - x/(alpha + self.eps)
 62 |         return lhs*rhs
 63 |         
 64 |     def proj(self, theta):
 65 |         if norm(theta) >= 1:
 66 |             theta = theta/norm(theta) - self.eps
 67 |         return theta
 68 |     
 69 |     def update(self, u, grad):
 70 |         with self.lock:  
 71 |             theta = self.emb_dict[u]
 72 |             step = 1/4 * self.eta * (1 - norm(theta)**2)**2 * grad
 73 |             self.emb_dict[u] = self.proj(theta - step)
 74 |     
 75 |     def train(self, num_epochs=10, edge_index=None):
 76 |         node_rank = np.array([1 / self.node_labels[v] for v in self.node_labels])
 77 | 
 78 |         if edge_index is not None:
 79 |             self.relations = edge_index
 80 | 
 81 |         for _ in range(num_epochs):
 82 |             losses = 0
 83 | 
 84 |             for relation in tqdm.tqdm(self.relations):
 85 |                 u, v = relation[0], relation[1]
 86 |                 if u == v:
 87 |                     continue
 88 | 
 89 |                 theta, x = self.emb_dict[u], self.emb_dict[v]
 90 |                 neg_relations = [x[1] for x in self.negative_sample(u)]
 91 |                 neg_embed = np.array([self.emb_dict[x] for x in neg_relations])
 92 | 
 93 |                 # Vectorized computation for partial derivatives
 94 |                 rank_comparison = node_rank[u] > node_rank[v]
 95 |                 dd_theta = self.partial_d(theta, x) * rank_comparison
 96 |                 dd_x = self.partial_d(x, theta) * (~rank_comparison)
 97 | 
 98 |                 if np.isnan(dd_theta).any() or np.isinf(dd_theta).any() or np.isnan(dd_x).any() or np.isinf(dd_x).any():
 99 |                     return
100 | 
101 |                 # Loss gradients
102 |                 grad_theta = -dd_theta
103 |                 grad_x = -dd_x
104 | 
105 |                 self.update(u, grad_theta)
106 |                 self.update(v, grad_x)
107 | 
108 |                 # Vectorized gradient computation for negative samples
109 |                 neg_dists = np.array([np.exp(-poincare_dist(theta, self.emb_dict[vprime])) for vprime in neg_relations])
110 |                 Z = np.sum(neg_dists)
111 |                 losses_for_neg = neg_dists / Z
112 | 
113 |                 for idx, vprime in enumerate(neg_relations):
114 |                     if node_rank[u] < node_rank[vprime]:
115 |                         dd_u = self.partial_d(theta, self.emb_dict[vprime])
116 |                         grad_u = dd_u * (-losses_for_neg[idx])
117 |                         self.update(u, grad_u)
118 |                     else:
119 |                         dd_vprime = self.partial_d(self.emb_dict[vprime], theta)
120 |                         grad_vprime = dd_vprime * (-losses_for_neg[idx])
121 |                         self.update(vprime, grad_vprime)
122 | 
123 |                     losses += losses_for_neg[idx] * poincare_dist(theta, self.emb_dict[vprime])
124 | 
125 |                 pos_loss = np.exp(-poincare_dist(theta, x))
126 |                 losses += pos_loss


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/HyperIMBA/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/HyperIMBA/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/HyperIMBA/cal.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import torch
 4 | from IGL_Bench.algorithm.HyperIMBA.Poincare import PoincareModel
 5 | from torch_geometric.utils import degree, to_networkx
 6 | from GraphRicciCurvature.OllivierRicci import OllivierRicci
 7 | 
 8 | def compute_ricci_and_poincare(dataset):
 9 |     current_dir = os.path.dirname(os.path.abspath(__file__))
10 |     file_dir = os.path.join(current_dir, '../../..')
11 | 
12 |     ricci_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}.edge_list')
13 |     keys_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}_keys.npy')
14 |     values_file = os.path.join(file_dir,f'hyperemb/{dataset.data_name}_values.npy')
15 | 
16 |     if os.path.exists(ricci_file) and os.path.exists(keys_file) and os.path.exists(values_file):
17 |         print(f"Files for {dataset.data_name} already exist, skipping computation.")
18 |         return
19 |     
20 |     os.makedirs(os.path.dirname(ricci_file), exist_ok=True)
21 |     os.makedirs(os.path.dirname(keys_file), exist_ok=True)
22 |     os.makedirs(os.path.dirname(values_file), exist_ok=True)
23 | 
24 |     G = to_networkx(dataset)
25 |     orc = OllivierRicci(G, alpha=0.5, verbose="TRACE")
26 |     orc.compute_ricci_curvature()
27 |     G_orc = orc.G.copy()  # save an intermediate result
28 | 
29 |     curvature = "ricciCurvature"
30 |     ricci_results = {}
31 |     ricci = {}
32 |     for i, (n1, n2) in enumerate(list(G_orc.edges()), 0):
33 |         ricci[i] = [int(n1), int(n2), G_orc[n1][n2][curvature]]
34 | 
35 |     # Save ricci results
36 |     weights = [ricci[i] for i in ricci.keys()]
37 |     np.savetxt(ricci_file, weights, fmt="%d %d %.16f")
38 | 
39 |     # Poincare Model computation
40 |     degrees = np.array(degree(dataset.edge_index[0], num_nodes=dataset.num_nodes) + degree(dataset.edge_index[1], num_nodes=dataset.num_nodes))
41 |     edges_list = list(dataset.edge_index.t().numpy())
42 |     labels = dict(enumerate(dataset.y.numpy() + 1, 0))
43 |     device = torch.device('cpu')  
44 |     dim = 2
45 |     model = PoincareModel(edges_list, node_weights=degrees * 0.2, node_labels=labels, n_components=dim, 
46 |                           eta=0.01, n_negative=10, name="hierarchy", device=device)
47 |     model.init_embeddings()
48 |     model.train(num_epochs=1)  
49 | 
50 |     # Save the Poincare model embeddings
51 |     weights = model.embeddings
52 |     keys = np.array([item for item in model.emb_dict.keys()])
53 |     values = np.array([item for item in model.emb_dict.values()])
54 |     np.save(keys_file, keys)
55 |     np.save(values_file, values)
56 | 
57 |     print(f"Computation for {dataset.data_name} completed and files saved.")
58 | 
59 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/HyperIMBA/solver.py:
--------------------------------------------------------------------------------
 1 | from IGL_Bench.algorithm.HyperIMBA.cal import compute_ricci_and_poincare
 2 | import IGL_Bench.algorithm.HyperIMBA.GcnHyper as GcnHyper
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score
 6 | 
 7 | class HyperIMBA_node_solver:
 8 |     def __init__(self, config, dataset, device='cuda'):
 9 |         self.config = config
10 |         self.dataset = dataset
11 |         self.device = device
12 |         
13 |         compute_ricci_and_poincare(self.dataset)
14 |         
15 |         self.model = {}
16 |         self.optimizer = {}
17 |         self.initialization()
18 |         self.dataset = self.dataset.to(self.device) 
19 |         
20 |     def initialization(self):
21 |         if self.config.backbone == 'GCN':
22 |             self.model['default'], self.dataset = GcnHyper.set_model(self.dataset, self.config)
23 |             
24 |         self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
25 |         
26 |     def reset_parameters(self):
27 |         self.initialization()
28 |         
29 |     def train(self):
30 |         self.reset_parameters()
31 |         num_epochs = getattr(self.config, 'epoch', 500)
32 |         patience = getattr(self.config, 'patience', 50)
33 |         least_epoch = getattr(self.config, 'least_epoch', 40)
34 |         best_val_accuracy = 0
35 |         
36 |         for epoch in range(1, num_epochs + 1):
37 |             self.model['default'].train()
38 |             self.optimizer['default'].zero_grad()
39 | 
40 |             output = self.model['default'](self.dataset, self.config.loss_hp)
41 |             loss = F.cross_entropy(output[self.dataset.train_mask], self.dataset.y[self.dataset.train_mask])
42 |             loss.backward()
43 |             self.optimizer['default'].step()
44 |             
45 |             print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")
46 |             
47 |             val_accuracy = self.eval(metric="accuracy")
48 | 
49 |             if val_accuracy > best_val_accuracy:
50 |                 best_val_accuracy = val_accuracy
51 |                 patience_counter = 0
52 |             else:
53 |                 patience_counter += 1
54 | 
55 |             if patience_counter >= patience and epoch > least_epoch:
56 |                 print(f"Early stopping at epoch {epoch+1}.")
57 |                 break
58 | 
59 |         print("Training Finished!")
60 | 
61 |     def eval(self, metric="accuracy"):
62 |         self.model['default'].eval()
63 |         all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy()
64 | 
65 |         with torch.no_grad():
66 |             out = self.model['default'](self.dataset, self.config.loss_hp)
67 |             predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy()
68 | 
69 |         if metric == "accuracy":
70 |             return accuracy_score(all_labels, predictions)
71 |         elif metric == "bacc":
72 |             return balanced_accuracy_score(all_labels, predictions)
73 |         elif metric == "macro_f1":
74 |             return f1_score(all_labels, predictions, average='macro')
75 |         else:
76 |             raise ValueError(f"Unknown metric: {metric}")
77 |         
78 |     def test(self):
79 |         self.model['default'].eval()
80 |         all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy()
81 | 
82 |         with torch.no_grad():
83 |             out = self.model['default'](self.dataset, self.config.loss_hp)
84 |             predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy()
85 |             probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy()
86 | 
87 |         accuracy = accuracy_score(all_labels, predictions)
88 |         macro_f1 = f1_score(all_labels, predictions, average='macro')
89 |         bacc = balanced_accuracy_score(all_labels, predictions)
90 |         auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro')
91 |         
92 |         return accuracy, bacc, macro_f1, auc_roc            


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGAGN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ImGAGN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGAGN/layers.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | 
 5 | from torch.nn.parameter import Parameter
 6 | from torch.nn.modules.module import Module
 7 | 
 8 | 
 9 | class GraphConvolution(Module):
10 | 
11 |     def __init__(self, in_features, out_features, bias=True):
12 |         super(GraphConvolution, self).__init__()
13 |         self.in_features = in_features
14 |         self.out_features = out_features
15 |         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
16 |         if bias:
17 |             self.bias = Parameter(torch.FloatTensor(out_features))
18 |         else:
19 |             self.register_parameter('bias', None)
20 |         self.reset_parameters()
21 | 
22 |     def reset_parameters(self):
23 |         stdv = 1. / math.sqrt(self.weight.size(1))
24 |         self.weight.data.uniform_(-stdv, stdv)
25 |         if self.bias is not None:
26 |             self.bias.data.uniform_(-stdv, stdv)
27 | 
28 |     def forward(self, input, adj):
29 |         support = torch.mm(input, self.weight)
30 |         output = torch.spmm(adj, support)
31 |         if self.bias is not None:
32 |             return output + self.bias
33 |         else:
34 |             return output
35 | 
36 |     def __repr__(self):
37 |         return self.__class__.__name__ + ' (' \
38 |                + str(self.in_features) + ' -> ' \
39 |                + str(self.out_features) + ')'
40 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGAGN/models.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from IGL_Bench.algorithm.ImGAGN.layers import GraphConvolution
 4 | import torch
 5 | 
 6 | class Attention(nn.Module):
 7 |     def __init__(self, input_dim, output_dim):
 8 |         super(Attention, self).__init__()
 9 |         self.mlp = nn.Sequential(
10 |             nn.Linear(input_dim, input_dim // 2, bias=True),
11 |             nn.ReLU(),
12 |             nn.Linear(input_dim // 2, output_dim, bias=True),
13 |         )
14 | 
15 |     def forward(self, x):
16 |         return self.mlp(x)
17 | 
18 | class GCN(nn.Module):
19 |     def __init__(self, nfeat, nhid, nclass, dropout, generate_node, min_node):
20 |         super(GCN, self).__init__()
21 | 
22 |         self.gc1 = GraphConvolution(nfeat, nhid)
23 |         self.gc2 = GraphConvolution(nhid, nclass)
24 |         self.gc3 = GraphConvolution(nhid, 2)
25 |         self.attention = Attention(nfeat*2, 1)
26 |         self.generate_node = generate_node
27 |         self.min_node = min_node
28 |         self.dropout = dropout
29 |         self.eps = 1e-10
30 | 
31 |     def forward(self, x, adj):
32 | 
33 |         x = F.relu(self.gc1(x, adj))
34 |         x = F.dropout(x, self.dropout, training=self.training)
35 |         x1 = self.gc2(x, adj)
36 |         x2 = self.gc3(x, adj)
37 |         return F.log_softmax(x1, dim=1), F.log_softmax(x2, dim=1), F.softmax(x1, dim=1)[:,-1]
38 | 
39 |     def get_embedding(self,x , adj):
40 |         x = F.relu(self.gc1(x, adj))
41 |         x = torch.spmm(adj, x)
42 |         return x
43 |     
44 |     def reset_parameters(self):
45 |         for m in self.modules():
46 |             if isinstance(m, nn.Linear):
47 |                 nn.init.xavier_uniform_(m.weight)
48 |                 if m.bias is not None:
49 |                     nn.init.zeros_(m.bias)
50 |             elif isinstance(m, GraphConvolution):
51 |                 nn.init.xavier_uniform_(m.weight)
52 |                 if m.bias is not None:
53 |                     nn.init.zeros_(m.bias)
54 |             elif isinstance(m, Attention):
55 |                 for layer in m.mlp:
56 |                     if isinstance(layer, nn.Linear):
57 |                         nn.init.xavier_uniform_(layer.weight)
58 |                         if layer.bias is not None:
59 |                             nn.init.zeros_(layer.bias)
60 | 
61 | class Generator(nn.Module):
62 |     def __init__(self,  dim):
63 |         super(Generator, self).__init__( )
64 | 
65 |         self.fc1 = nn.Linear(100, 200)
66 |         self.fc2 = nn.Linear(200, 200)
67 |         self.fc3 = nn.Linear(200, dim)
68 |         self.fc4 = nn.Tanh()
69 | 
70 |     def forward(self, x):
71 |         x = F.relu(self.fc1(x))
72 |         x = F.relu(self.fc2(x))
73 |         x = self.fc3(x)
74 |         x = self.fc4(x)
75 |         x = (x+1)/2
76 |         return x
77 |     
78 |     def reset_parameters(self):
79 |         for m in self.modules():
80 |             if isinstance(m, nn.Linear):
81 |                 nn.init.xavier_uniform_(m.weight)
82 |                 if m.bias is not None:
83 |                     nn.init.zeros_(m.bias)
84 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGAGN/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.sparse as sp
  3 | import torch
  4 | from sklearn.metrics import classification_report
  5 | import sklearn
  6 | 
  7 | def load_data(ratio_generated, path="../dataset/citeseer/", dataset="citeseer"):
  8 |     print('Loading {} dataset...'.format(dataset))
  9 | 
 10 |     idx_features_labels = np.genfromtxt("{}features.{}".format(path, dataset),
 11 |                                         dtype=np.float32)
 12 |     features = sp.csr_matrix(idx_features_labels[:, 0:-1], dtype=np.float32)
 13 |     labels = idx_features_labels[:, -1]
 14 | 
 15 |     idx_train = np.genfromtxt("{}train.{}".format(path, dataset),
 16 |                               dtype=np.int32).squeeze()
 17 | 
 18 |     idx_test = np.genfromtxt("{}test.{}".format(path, dataset),
 19 |                              dtype=np.int32).squeeze()
 20 | 
 21 |     majority = np.array([x for x in idx_train if labels[x] == 0])
 22 |     minority = np.array([x for x in idx_train if labels[x] == 1])
 23 | 
 24 |     num_minority = minority.shape[0]
 25 |     num_majority = majority.shape[0]
 26 |     print("Number of majority: ", num_majority)
 27 |     print("Number of minority: ", num_minority)
 28 | 
 29 |     generate_node = []
 30 |     generate_label=[]
 31 |     for i in range(labels.shape[0], labels.shape[0]+int(ratio_generated*num_majority)-num_minority):
 32 |         generate_node.append(i)
 33 |         generate_label.append(1)
 34 |     idx_train= np.hstack((idx_train, np.array(generate_node)))
 35 |     print(idx_train.shape)
 36 | 
 37 |     minority_test = np.array([x for x in idx_test if labels[x] == 1])
 38 |     minority_all = np.hstack((minority, minority_test))
 39 | 
 40 | 
 41 |     labels= np.hstack((labels, np.array(generate_label)))
 42 | 
 43 | 
 44 |     edges = np.genfromtxt("{}edges.{}".format(path, dataset),
 45 |                                     dtype=np.int32)
 46 | 
 47 |     adj_real = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
 48 |                         shape=(labels.shape[0], labels.shape[0]),
 49 |                         dtype=np.float32)
 50 | 
 51 |     adj = adj_real + adj_real.T.multiply(adj_real.T > adj_real) - adj_real.multiply(adj_real.T > adj_real)
 52 | 
 53 |     features = normalize(features)
 54 |     adj = normalize(adj + sp.eye(adj.shape[0]))
 55 | 
 56 |     features = torch.FloatTensor(np.array(features.todense()))
 57 |     labels = torch.LongTensor(labels)
 58 |     adj = sparse_mx_to_torch_sparse_tensor(adj)
 59 | 
 60 |     idx_train = torch.LongTensor(idx_train)
 61 |     idx_test = torch.LongTensor(idx_test)
 62 |     generate_node=torch.LongTensor(np.array(generate_node))
 63 |     minority = torch.LongTensor(minority)
 64 |     majority = torch.LongTensor(majority)
 65 |     minority_all = torch.LongTensor(minority_all)
 66 | 
 67 |     return adj, adj_real,features, labels, idx_train, idx_test, generate_node, minority, majority, minority_all#, generate_node_test, minority_test
 68 | 
 69 | 
 70 | 
 71 | 
 72 | def normalize(mx):
 73 |     rowsum = np.array(mx.sum(1))
 74 |     r_inv = np.power(rowsum, -1).flatten()
 75 |     r_inv[np.isinf(r_inv)] = 0.
 76 |     r_mat_inv = sp.diags(r_inv)
 77 |     mx = r_mat_inv.dot(mx)
 78 |     return mx
 79 | 
 80 | 
 81 | def accuracy(output, labels, output_AUC):
 82 |     preds = output.max(1)[1].type_as(labels)
 83 | 
 84 |     y_true = labels.cpu().numpy()
 85 |     y_pred = preds.cpu().numpy()
 86 |     y_score = output_AUC.detach().cpu().numpy()
 87 | 
 88 |     recall = sklearn.metrics.recall_score(y_true, y_pred, average='macro')
 89 |     f1 = sklearn.metrics.f1_score(y_true, y_pred, average='macro')
 90 |     acc = sklearn.metrics.accuracy_score(y_true, y_pred)
 91 |     precision = sklearn.metrics.precision_score(y_true, y_pred, average='macro')
 92 | 
 93 |     try:
 94 |         auc = sklearn.metrics.roc_auc_score(y_true, y_score, multi_class='ovr', average='macro')
 95 |     except ValueError:
 96 |         auc = 0.0  
 97 | 
 98 |     return recall, f1, auc, acc, precision
 99 | 
100 | 
101 | 
102 | def sparse_mx_to_torch_sparse_tensor(sparse_mx):
103 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
104 |     indices = torch.from_numpy(
105 |         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
106 |     values = torch.from_numpy(sparse_mx.data)
107 |     shape = torch.Size(sparse_mx.shape)
108 |     return torch.sparse.FloatTensor(indices, values, shape)
109 | 
110 | def add_edges(adj_real, adj_new):
111 |     adj = adj_real+adj_new
112 |     adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
113 |     adj = normalize(adj + sp.eye(adj.shape[0]))
114 |     adj = sparse_mx_to_torch_sparse_tensor(adj)
115 |     return adj
116 | 
117 | def euclidean_dist(x, y):
118 |     m, n = x.size(0), y.size(0)
119 |     xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n)
120 |     yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t()
121 |     dist = xx + yy
122 |     dist.addmm_(1, -2, x, y.t())
123 |     dist = dist.clamp(min=1e-12).sqrt()  # for numerical stability
124 |     return dist
125 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ImGKB/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/dataloader.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from scipy.sparse import csr_matrix,lil_matrix
  4 | import math
  5 | 
  6 | class GraphBatchGenerator:
  7 |     def __init__(self, config, adj, features, y, index, device = 'cuda'):
  8 | 
  9 |         self.batch_size = config.batch_size
 10 |         self.graph_pooling_type = getattr(config,'graph_pooling_type','average')
 11 |         self.shuffle = getattr(config,"shuffle", True) 
 12 |         self.device = device
 13 | 
 14 |         self.adj = [adj[i] for i in index]
 15 |         self.features = [features[i] for i in index]
 16 |         self.y = [y[i] for i in index]
 17 | 
 18 |         self.adj_lst = []
 19 |         self.features_lst = []
 20 |         self.graph_pool_lst = []
 21 |         self.graph_indicator_lst = []
 22 |         self.y_lst = []
 23 |         self.n_valid_batches = 0
 24 | 
 25 |         self.generate_batches()
 26 | 
 27 |     def generate_batches(self):
 28 |         N = len(self.y)
 29 |         if self.shuffle:
 30 |             index = np.random.permutation(N)
 31 |         else:
 32 |             index = np.arange(N, dtype=np.int32)
 33 | 
 34 |         n_batches = math.ceil(N / self.batch_size)
 35 | 
 36 |         adj_lst_tmp = []
 37 |         features_lst_tmp = []
 38 |         graph_pool_lst_tmp = []
 39 |         graph_indicator_lst_tmp = []
 40 |         y_lst_tmp = []
 41 | 
 42 |         nu = 0
 43 | 
 44 |         for i in range(0, N, self.batch_size):
 45 |             n_graphs = min(i + self.batch_size, N) - i
 46 |             n_nodes = sum(self.adj[index[j]].shape[0] 
 47 |                           for j in range(i, i + n_graphs))
 48 | 
 49 |             adj_batch = lil_matrix((n_nodes, n_nodes))
 50 |             d_feat = self.features[0].shape[1]
 51 |             features_batch = np.zeros((n_nodes, d_feat), dtype=np.float32)
 52 | 
 53 |             graph_indicator_batch = np.zeros(n_nodes, dtype=np.int64)
 54 |             y_batch = np.zeros(n_graphs, dtype=np.int64)
 55 |             graph_pool_batch = lil_matrix((n_graphs, n_nodes))
 56 | 
 57 |             idx = 0
 58 |             for j in range(i, i + n_graphs):
 59 |                 n = self.adj[index[j]].shape[0]
 60 | 
 61 |                 adj_batch[idx: idx + n, idx: idx + n] = self.adj[index[j]]
 62 |                 features_batch[idx: idx + n, :] = self.features[index[j]]
 63 | 
 64 |                 graph_indicator_batch[idx: idx + n] = j - i
 65 | 
 66 |                 y_batch[j - i] = self.y[index[j]]
 67 | 
 68 |                 if self.graph_pooling_type == "average":
 69 |                     graph_pool_batch[j - i, idx: idx + n] = 1.0 / n
 70 |                 else:
 71 |                     graph_pool_batch[j - i, idx: idx + n] = 1
 72 | 
 73 |                 idx += n
 74 | 
 75 |             if sum(y_batch) == 0 or sum(y_batch) == n_graphs:
 76 |                 nu += 1
 77 |             else:
 78 |                 adj_lst_tmp.append(sparse_mx_to_torch_sparse_tensor(adj_batch).to(self.device))
 79 |                 features_lst_tmp.append(torch.FloatTensor(features_batch).to(self.device))
 80 |                 graph_pool_lst_tmp.append(sparse_mx_to_torch_sparse_tensor(graph_pool_batch).to(self.device))
 81 |                 graph_indicator_lst_tmp.append(torch.LongTensor(graph_indicator_batch).to(self.device))
 82 |                 y_lst_tmp.append(torch.LongTensor(y_batch).to(self.device))
 83 | 
 84 |         self.adj = adj_lst_tmp
 85 |         self.features = features_lst_tmp
 86 |         self.graph_pool = graph_pool_lst_tmp
 87 |         self.graph_indicator = graph_indicator_lst_tmp
 88 |         self.y = y_lst_tmp
 89 |         self.n_batches = n_batches - nu
 90 |         
 91 | 
 92 | def sparse_mx_to_torch_sparse_tensor(sparse_mx):
 93 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
 94 |     indices = torch.from_numpy(np.vstack((sparse_mx.row,
 95 |                                           sparse_mx.col))).long()
 96 |     values = torch.from_numpy(sparse_mx.data)
 97 |     shape = torch.Size(sparse_mx.shape)
 98 |     return torch.sparse.FloatTensor(indices, values, shape)
 99 | 
100 | def my_load_data(dataset):
101 |     edges = dataset.data.edge_index.numpy()  
102 |     graph_indicator = []
103 |     for graph_id, data in enumerate(dataset):
104 |         num_nodes = data.num_nodes
105 |         graph_indicator.extend([graph_id] * num_nodes)
106 |     graph_indicator = np.array(graph_indicator) 
107 | 
108 |     A = csr_matrix(
109 |         (np.ones(edges.shape[1]), (edges[0, :], edges[1, :])),
110 |         shape=(graph_indicator.size, graph_indicator.size)
111 |     )
112 | 
113 |     X = dataset.data.x.numpy()  
114 |     labels = [data.y.item() for data in dataset]  # shape: G
115 | 
116 |     _, graph_size = np.unique(graph_indicator, return_counts=True)
117 |     adj = []
118 |     features = []
119 |     start_idx = 0
120 |     for i in range(len(dataset)):
121 |         end_idx = start_idx + graph_size[i]
122 |         sub_adj = A[start_idx:end_idx, start_idx:end_idx]
123 |         sub_features = X[start_idx:end_idx, :]
124 | 
125 |         adj.append(sub_adj)
126 |         features.append(sub_features)
127 | 
128 |         start_idx = end_idx
129 |     labels = np.array(labels)
130 |     return adj, features, labels


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/inforneck.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | import torch.nn as nn
 4 | 
 5 | def MI_Est(discriminator, embeddings, positive):
 6 |     eps = 1e-10
 7 |     batch_size = embeddings.shape[0]
 8 |     shuffle_embeddings = positive[torch.randperm(batch_size)]
 9 |     joint = discriminator(embeddings,positive)
10 |     margin = discriminator(embeddings, shuffle_embeddings)
11 |     joint = joint + eps
12 |     margin = margin + eps
13 |     swich = 'Donsker'
14 |     if swich=='Donsker':
15 |         mi_est = torch.mean(joint) + torch.clamp(torch.log(torch.mean(torch.exp(margin))),-10000,10000)
16 |     elif swich=='JSD':
17 |         mi_est = -torch.mean(F.softplus(-joint)) - torch.mean(F.softplus(-margin)+margin)
18 |     elif swich=='x^2':
19 |         mi_est = torch.mean(joint**2) - 0.5* torch.mean((torch.sqrt(margin**2)+1.0)**2)
20 |     return mi_est
21 | 
22 | class InBo(torch.nn.Module):
23 |     def __init__(self, hidden_size):
24 |         super(InBo, self).__init__()
25 | 
26 |         self.input_size = hidden_size
27 |         self.hidden_size = hidden_size
28 |         self.lin1 = torch.nn.Linear(self.input_size,self.hidden_size)
29 |         self.lin2 = torch.nn.Linear(self.hidden_size, 1)
30 |         self.relu = torch.nn.ReLU()
31 |         self.reset_parameters()
32 | 
33 |     def reset_parameters(self):
34 |         self.lin1.reset_parameters()
35 |         self.lin2.reset_parameters()
36 |     def forward(self, embeddings,positive):
37 |         cat_embeddings = torch.cat((embeddings, positive),dim = 1)
38 |         pre = self.relu(self.lin1(cat_embeddings))
39 |         pre = self.relu(self.lin2(pre))
40 |         return pre
41 | 
42 | class Attention(nn.Module):
43 |     def __init__(self, in_size, hidden_size=16):
44 |         super(Attention, self).__init__()
45 | 
46 |         self.project = nn.Sequential(
47 |             nn.Linear(in_size, hidden_size),
48 |             nn.ReLU(),
49 |             nn.Linear(hidden_size, 1, bias=False)
50 |         )
51 |     def forward(self, z):
52 |         w = self.project(z)
53 |         beta = torch.softmax(w, dim=1)
54 |         return (beta * z).sum(1), beta


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/kernel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.parameter import Parameter
 4 | 
 5 | class KerRW(nn.Module):
 6 |     def __init__(self, max_step, hidden_graphs, size_hidden_graphs, hidden_dim, device):
 7 |         super(KerRW, self).__init__()
 8 |         self.max_step = max_step
 9 |         self.hidden_graphs = hidden_graphs
10 |         self.size_hidden_graphs = size_hidden_graphs
11 |         self.device = device
12 |         self.adj_hidden = Parameter(
13 |             torch.FloatTensor(hidden_graphs, (size_hidden_graphs * (size_hidden_graphs - 1)) // 2))
14 |         self.features_hidden = Parameter(torch.FloatTensor(hidden_graphs, size_hidden_graphs, hidden_dim))
15 |         self.bn = nn.BatchNorm1d(hidden_graphs * max_step)
16 |         self.relu = nn.ReLU()
17 |         self.sigmoid = nn.Sigmoid()
18 |         self.init_weights()
19 | 
20 |     def init_weights(self):
21 |         nn.init.kaiming_normal_(self.adj_hidden)
22 |         nn.init.kaiming_normal_(self.features_hidden)
23 | 
24 |     def forward(self, features):
25 | 
26 |         adj_hidden_norm = torch.zeros(self.hidden_graphs, self.size_hidden_graphs, self.size_hidden_graphs).to(self.device)
27 |         idx = torch.triu_indices(self.size_hidden_graphs, self.size_hidden_graphs, 1)
28 |         adj_hidden_norm[:, idx[0], idx[1]] = self.relu(self.adj_hidden)
29 |         adj_hidden_norm = adj_hidden_norm + torch.transpose(adj_hidden_norm, 1, 2)
30 |         x_o = features
31 |         z = self.features_hidden
32 |         zx = torch.einsum("abc,dc->abd", (z, x_o))
33 |         out = list()
34 |         for i in range(self.max_step):
35 |             z = torch.einsum("abc,acd->abd", (adj_hidden_norm, z))
36 |             t = torch.einsum("abc,dc->abd", (z, x_o))
37 |             t = torch.mul(zx, t)
38 |             t = torch.sum(t, dim=1)
39 |             t = torch.transpose(t, 0, 1) #N X m
40 |             out.append(t)
41 |         out_rw = torch.cat(out, dim=1)
42 |         out_att = torch.stack(out, dim=1)
43 |         return out_rw, out_att


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/loss.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn.functional as F
  4 | 
  5 | def focal_loss(logits, labels, alpha=None, gamma=2):
  6 |     """Compute the focal loss between `logits` and the ground truth `labels`.
  7 |     Focal loss = -alpha_t * (1-pt)^gamma * log(pt)
  8 |     where pt is the probability of being classified to the true class.
  9 |     pt = p (if true class), otherwise pt = 1 - p. p = sigmoid(logit).
 10 |     Args:
 11 |       logits: A float tensor of size [batch, num_classes].
 12 |       labels: A float tensor of size [batch, num_classes].
 13 |       alpha: A float tensor of size [batch_size]
 14 |         specifying per-example weight for balanced cross entropy.
 15 |       gamma: A float scalar modulating loss from hard and easy examples.
 16 |     Returns:
 17 |       focal_loss: A float32 scalar representing normalized total loss.
 18 |     """
 19 |     bc_loss = F.binary_cross_entropy_with_logits(input=logits, target=labels.float(), reduction="none")
 20 |     logits = torch.clamp(logits, 0.001, 0.999)
 21 | 
 22 |     if gamma == 0.0:
 23 |         modulator = 1.0
 24 |     else:
 25 |         modulator = torch.exp(-gamma * labels * logits - gamma * torch.log(1 + torch.exp(-1.0 * logits)))
 26 | 
 27 |     loss = modulator * bc_loss
 28 | 
 29 |     if alpha is not None:
 30 |         weighted_loss = alpha * loss
 31 |         focal_loss = torch.sum(weighted_loss)
 32 |     else:
 33 |         focal_loss = torch.sum(loss)
 34 | 
 35 |     focal_loss /= torch.sum(labels)
 36 |     return focal_loss
 37 | 
 38 | 
 39 | class Loss(torch.nn.Module):
 40 |     def __init__(
 41 |         self,
 42 |         loss_type: str = "cross_entropy",
 43 |         beta: float = 0.999,
 44 |         fl_gamma=2,
 45 |         samples_per_class=None,
 46 |         class_balanced=True,
 47 |     ):
 48 |         """
 49 |         Compute the Class Balanced Loss between `logits` and the ground truth `labels`.
 50 |         Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits)
 51 |         where Loss is one of the standard losses used for Neural Networks.
 52 |         reference: https://openaccess.thecvf.com/content_CVPR_2019/papers/Cui_Class-Balanced_Loss_Based_on_Effective_Number_of_Samples_CVPR_2019_paper.pdf
 53 |         Args:
 54 |             loss_type: string. One of "focal_loss", "cross_entropy",
 55 |                 "binary_cross_entropy", "softmax_binary_cross_entropy".
 56 |             beta: float. Hyperparameter for Class balanced loss.
 57 |             fl_gamma: float. Hyperparameter for Focal loss.
 58 |             samples_per_class: A python list of size [num_classes].
 59 |                 Required if class_balance is True.
 60 |             class_balanced: bool. Whether to use class balanced loss.
 61 |         Returns:
 62 |             Loss instance
 63 |         """
 64 |         super(Loss, self).__init__()
 65 | 
 66 |         # if class_balanced is True and samples_per_class is None:
 67 |         #     raise ValueError("samples_per_class cannot be None when class_balanced is True")
 68 | 
 69 |         self.loss_type = loss_type
 70 |         self.beta = beta
 71 |         self.fl_gamma = fl_gamma
 72 |         self.samples_per_class = samples_per_class
 73 |         self.class_balanced = class_balanced
 74 | 
 75 |     def forward(
 76 |         self,
 77 |         logits: torch.tensor,
 78 |         labels: torch.tensor,
 79 |     ):
 80 |         """
 81 |         Compute the Class Balanced Loss between `logits` and the ground truth `labels`.
 82 |         Class Balanced Loss: ((1-beta)/(1-beta^n))*Loss(labels, logits)
 83 |         where Loss is one of the standard losses used for Neural Networks.
 84 |         Args:
 85 |             logits: A float tensor of size [batch, num_classes].
 86 |             labels: An int tensor of size [batch].
 87 |         Returns:
 88 |             cb_loss: A float tensor representing class balanced loss
 89 |         """
 90 | 
 91 |         batch_size = logits.size(0)
 92 |         num_classes = logits.size(1)
 93 |         # labels_one_hot= labels
 94 |         labels_one_hot = F.one_hot(labels, num_classes)
 95 | 
 96 |         self.samples_per_class = labels_one_hot.sum(axis=0).cpu().numpy().tolist()
 97 | 
 98 |         if self.class_balanced:
 99 |             effective_num = 1.0 - np.power(self.beta, self.samples_per_class)
100 |             weights = (1.0 - self.beta) / np.array(effective_num)
101 |             weights = weights / np.sum(weights) * num_classes
102 |             weights = torch.tensor(weights, device=logits.device).float()
103 |             # print('weight= ',weights)
104 | 
105 |             if self.loss_type != "cross_entropy":
106 |                 weights = weights.unsqueeze(0)
107 |                 weights = weights.repeat(batch_size, 1) * labels_one_hot
108 |                 weights = weights.sum(1)
109 |                 weights = weights.unsqueeze(1)
110 |                 weights = weights.repeat(1, num_classes)
111 |         else:
112 |             weights = None
113 | 
114 |         if self.loss_type == "focal_loss":
115 |             cb_loss = focal_loss(logits, labels_one_hot, alpha=weights, gamma=self.fl_gamma)
116 |         elif self.loss_type == "cross_entropy":
117 |             cb_loss = F.cross_entropy(input=logits, target=labels, weight=weights)
118 |         elif self.loss_type == "binary_cross_entropy":
119 |             cb_loss = F.binary_cross_entropy_with_logits(input=logits, target=labels_one_hot, weight=weights)
120 |         elif self.loss_type == "softmax_binary_cross_entropy":
121 |             pred = logits.softmax(dim=1)
122 |             cb_loss = F.binary_cross_entropy(input=pred, target=labels_one_hot.to(torch.float32), weight=weights)
123 |         return cb_loss


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.nn.parameter import Parameter
 4 | from IGL_Bench.algorithm.ImGKB.layers import graph_convolution, MLP
 5 | from torch_geometric.nn import  global_mean_pool
 6 | from IGL_Bench.algorithm.ImGKB.inforneck import InBo, MI_Est, Attention
 7 | from IGL_Bench.algorithm.ImGKB.kernel import KerRW
 8 | import torch.nn.functional as F
 9 | 
10 | class KGIB(nn.Module):
11 |     def __init__(self, input_dim, hidden_dim, hidden_graphs, size_hidden_graphs, 
12 |                  nclass, max_step, num_layers, backbone, device='cuda'):
13 |         super(KGIB, self).__init__()
14 |         self.num_layers = num_layers
15 |         self.device = device
16 |         self.relu = nn.ReLU()
17 |         self.ker_layers = torch.nn.ModuleList()
18 |         self.batch_norms = torch.nn.ModuleList()
19 |         self.bn = nn.BatchNorm1d(hidden_graphs * max_step)
20 |         self.infoneck = InBo(hidden_dim + hidden_graphs)
21 |         self.atten = Attention(hidden_graphs)
22 |         self.linear_transform_in = nn.Sequential(nn.Linear(input_dim, 32), nn.Dropout(0.5),self.relu, nn.Linear(32, hidden_dim),self.relu)
23 |         self.linear_transform_out = nn.Sequential(nn.Linear(hidden_graphs * max_step, hidden_graphs * max_step),
24 |                                                   self.relu, nn.Linear(hidden_graphs * max_step, 2))
25 |         self.mlp_1 = nn.Linear(self.num_layers*hidden_graphs * max_step + hidden_dim, hidden_dim)
26 |         self.mlp_2 = nn.Linear(hidden_dim, 2)
27 |         self.GNN_features = graph_convolution(hidden_dim, hidden_dim, hidden_dim, device, backbone)
28 |         self.conv = nn.ModuleList([self.GNN_features for _ in range(self.num_layers)])
29 |         for layer in range(self.num_layers):
30 |             self.ker_layers.append(KerRW(max_step, hidden_graphs, size_hidden_graphs, hidden_dim, self.device))
31 |         self.linears_prediction = torch.nn.ModuleList()
32 |         num_mlp_layers = 2
33 |         hidden_dim1 = hidden_graphs * max_step
34 |         for layer in range(self.num_layers + 1):
35 |             if layer == 0:
36 |                 self.linears_prediction.append(MLP(num_mlp_layers, hidden_dim, hidden_dim, nclass))
37 |             else:
38 |                 self.linears_prediction.append(MLP(num_mlp_layers, hidden_dim1, hidden_dim, nclass))
39 | 
40 |     def forward(self, adj, features, graph_indicator):
41 |         h = self.linear_transform_in(features)
42 |         graph_embs = global_mean_pool(h, graph_indicator)
43 |         hidden_rep = [graph_embs]
44 |         loss_mi = 0
45 |         for layer in range(self.num_layers):
46 |             h = self.conv[layer](h, adj)
47 |             h_g = global_mean_pool(h,graph_indicator)
48 |             h1, h_att = self.ker_layers[layer](h_g)
49 |             h_a,_ =self.atten(h_att)
50 |             loss_mi += MI_Est(self.infoneck, h_g, h_a)  # I(H_G, H^_G)
51 |             hidden_rep.append(h1)
52 | 
53 |         # I(Y, H^_G)
54 |         score_over_layer = 0
55 |         for layer, h in enumerate(hidden_rep):
56 |             score_over_layer += self.linears_prediction[layer](h)
57 | 
58 |         return score_over_layer, loss_mi


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ImGKB/util.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import f1_score, accuracy_score, roc_auc_score, balanced_accuracy_score
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | class AverageMeter(object):
 6 |     """Computes and stores the average and current value"""
 7 |     def __init__(self):
 8 |         self.reset()
 9 |     def reset(self):
10 |         self.val = 0
11 |         self.avg = 0
12 |         self.sum = 0
13 |         self.count = 0
14 |     def update(self, val, n=1):
15 |         self.val = val
16 |         self.sum += val * n
17 |         self.count += n
18 |         self.avg = self.sum / self.count
19 |         
20 | def Roc_F(logits, labels, pre='valid'):
21 |     if labels.max() > 1:#require set(labels) to be the same as columns of logits 
22 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr')
23 |     else:
24 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro')
25 | 
26 |     macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro')
27 | 
28 |     return auc_score, macro_F
29 | 
30 | def compute_metrics(logits, labels):
31 |     preds = torch.argmax(logits, dim=-1)
32 | 
33 |     acc = accuracy_score(labels.detach().cpu(), preds.detach().cpu())
34 | 
35 |     bacc = balanced_accuracy_score(labels.detach().cpu(), preds.detach().cpu())
36 | 
37 |     mf1 = f1_score(labels.detach().cpu(), preds.detach().cpu(), average='macro')
38 | 
39 |     if labels.max() > 1:  
40 |         auc_roc = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr')
41 |     else:  
42 |         auc_roc = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:, 1].detach().cpu(), average='macro')
43 | 
44 |     return acc, bacc, mf1, auc_roc


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/PASTEL/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/PASTEL/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/PASTEL/cal.py:
--------------------------------------------------------------------------------
 1 | import networkx as nx
 2 | import numpy as np
 3 | import multiprocessing as mp
 4 | import math
 5 | import torch
 6 | 
 7 | def cal_shortest_path_distance(adj, approximate, n_nodes):
 8 |     Adj = adj.detach().cpu().numpy()
 9 |     G = nx.from_numpy_array(Adj)
10 |     G.edges(data=True)
11 |     dists_array = np.zeros((n_nodes, n_nodes))
12 |     dists_dict = all_pairs_shortest_path_length_parallel(G, cutoff=approximate if approximate > 0 else None)
13 | 
14 |     cnt_disconnected = 0
15 | 
16 |     for i, node_i in enumerate(G.nodes()):
17 |         shortest_dist = dists_dict[node_i]
18 |         for j, node_j in enumerate(G.nodes()):
19 |             dist = shortest_dist.get(node_j, -1)
20 |             if dist == -1:
21 |                 cnt_disconnected += 1
22 |             if dist != -1:
23 |                 dists_array[node_i, node_j] = dist
24 |     return dists_array
25 | 
26 | def all_pairs_shortest_path_length_parallel(graph, cutoff=None, num_workers=4):
27 |     nodes = list(graph.nodes)
28 |     if len(nodes) < 50:
29 |         num_workers = int(num_workers / 4)
30 |     elif len(nodes) < 400:
31 |         num_workers = int(num_workers / 2)
32 | 
33 |     pool = mp.Pool(processes=num_workers)
34 |     results = [pool.apply_async(single_source_shortest_path_length_range,
35 |                                 args=(graph, nodes[int(len(nodes) / num_workers * i):int(len(nodes) / num_workers * (i + 1))], cutoff)) for i in range(num_workers)]
36 |     output = [p.get() for p in results]
37 |     dists_dict = merge_dicts(output)
38 |     pool.close()
39 |     pool.join()
40 |     return dists_dict
41 | 
42 | def single_source_shortest_path_length_range(graph, node_range, cutoff):
43 |     dists_dict = {}
44 |     for node in node_range:
45 |         dists_dict[node] = nx.single_source_shortest_path_length(graph, node, cutoff)   # unweighted
46 |     return dists_dict
47 | 
48 | 
49 | 
50 | def merge_dicts(dicts):
51 |     result = {}
52 |     for dictionary in dicts:
53 |         result.update(dictionary)
54 |     return result
55 | 
56 | def cal_group_pagerank_args(pagerank_before, pagerank_after, num_nodes):
57 |     node_pair_group_pagerank_mat = rank_group_pagerank(pagerank_before, pagerank_after, num_nodes) # rank
58 |     PI = 3.1415926
59 |     for i in range(num_nodes):
60 |         for j in range(num_nodes):
61 |             node_pair_group_pagerank_mat[i][j] = 2 - (math.cos((node_pair_group_pagerank_mat[i][j] / (num_nodes * num_nodes)) * PI) + 1)
62 | 
63 |     return node_pair_group_pagerank_mat
64 | 
65 | def rank_group_pagerank(pagerank_before, pagerank_after, num_nodes):
66 |     pagerank_dist = torch.mm(pagerank_before, pagerank_after.transpose(-1, -2)).detach().cpu()
67 |     node_pair_group_pagerank_mat = np.zeros((num_nodes, num_nodes))
68 |     node_pair_group_pagerank_mat_list = []
69 |     for i in range(num_nodes):
70 |         for j in range(num_nodes):
71 |             node_pair_group_pagerank_mat_list.append(pagerank_dist[i, j])
72 |     node_pair_group_pagerank_mat_list = np.array(node_pair_group_pagerank_mat_list)
73 |     index = np.argsort(-node_pair_group_pagerank_mat_list)
74 |     rank = np.argsort(index)
75 |     rank = rank + 1
76 |     iter = 0
77 |     for i in range(num_nodes):
78 |         for j in range(num_nodes):
79 |             node_pair_group_pagerank_mat[i][j] = rank[iter]
80 |             iter = iter + 1
81 | 
82 |     return node_pair_group_pagerank_mat
83 | 
84 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/PASTEL/eval.py:
--------------------------------------------------------------------------------
 1 | class AverageMeter(object):
 2 |     def __init__(self):
 3 |         self.history = []
 4 |         self.last = None
 5 |         self.val = 0
 6 |         self.sum = 0
 7 |         self.count = 0
 8 | 
 9 |     def reset(self):
10 |         self.last = self.mean()
11 |         self.history.append(self.last)
12 |         self.val = 0
13 |         self.sum = 0
14 |         self.count = 0
15 | 
16 |     def update(self, val, n=1):
17 |         self.val = val
18 |         self.sum += val * n
19 |         self.count += n
20 | 
21 |     def mean(self):
22 |         if self.count == 0:
23 |             return 0.
24 |         return self.sum / self.count


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/PASTEL/graph_learner.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class GraphLearner(nn.Module):
 6 |     def __init__(self, input_size, hidden_size, n_nodes, n_class, n_anchors, topk=None, epsilon=None, n_pers=16, device=None):
 7 |         super(GraphLearner, self).__init__()
 8 |         self.n_nodes = n_nodes
 9 |         self.n_class = n_class
10 |         self.n_anchors = n_anchors
11 |         self.topk = topk
12 |         self.epsilon = epsilon
13 |         self.device = device
14 |         self.input_size=input_size
15 | 
16 |         self.weight_tensor = torch.Tensor(n_pers, input_size)
17 |         self.weight_tensor = nn.Parameter(nn.init.xavier_uniform_(self.weight_tensor))
18 |         
19 |         self.weight_tensor_for_pe = torch.Tensor(self.n_anchors, hidden_size)
20 |         self.weight_tensor_for_pe = nn.Parameter(nn.init.xavier_uniform_(self.weight_tensor_for_pe))
21 | 
22 |     def forward(self, context, position_encoding, gpr_rank, position_flag, ctx_mask=None):
23 |         expand_weight_tensor = self.weight_tensor.unsqueeze(1)
24 |         if len(context.shape) == 3:
25 |             expand_weight_tensor = expand_weight_tensor.unsqueeze(1)
26 |         context_fc = context.unsqueeze(0) * expand_weight_tensor
27 |         context_norm = F.normalize(context_fc, p=2, dim=-1)
28 |         attention = torch.bmm(context_norm, context_norm.transpose(-1, -2)).mean(0)
29 | 
30 |         if position_flag == 1:
31 |             pe_fc = torch.mm(position_encoding, self.weight_tensor_for_pe)
32 |             pe_attention = torch.mm(pe_fc, pe_fc.transpose(-1, -2))
33 |             try:
34 |                 attention = (attention * 0.5 + pe_attention * 0.5) * gpr_rank
35 |             except RuntimeError as e:
36 |                 attention_cpu = attention.to('cpu')
37 |                 pe_attention_cpu = pe_attention.to('cpu')
38 |                 gpr_rank = gpr_rank.to('cpu')
39 |                 attention_cpu = attention_cpu * 0.5 + pe_attention_cpu * 0.5
40 |                 attention_cpu = attention_cpu * gpr_rank
41 |                 attention = attention_cpu.to('cuda')
42 |         else:
43 |             attention = attention * gpr_rank
44 | 
45 |         markoff_value = 0
46 | 
47 |         if ctx_mask is not None:
48 |             attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(1), markoff_value)
49 |             attention = attention.masked_fill_(1 - ctx_mask.byte().unsqueeze(-1), markoff_value)
50 | 
51 |         if self.epsilon is not None:
52 |             if not self.epsilon == 0:
53 |                 attention = self.build_epsilon_neighbourhood(attention, self.epsilon, markoff_value)
54 | 
55 |         if self.topk is not None:
56 |             attention = self.build_knn_neighbourhood(attention, self.topk, markoff_value)
57 | 
58 |         return attention
59 | 
60 | 
61 |     def build_knn_neighbourhood(self, attention, topk, markoff_value):
62 |         topk = min(topk, attention.size(-1))
63 |         knn_val, knn_ind = torch.topk(attention, topk, dim=-1)
64 |         weighted_adjacency_matrix = to_cuda((markoff_value * torch.ones_like(attention)).scatter_(-1, knn_ind, knn_val), self.device)
65 |         return weighted_adjacency_matrix
66 | 
67 | 
68 |     def build_epsilon_neighbourhood(self, attention, epsilon, markoff_value):
69 |         mask = (attention > epsilon).detach().float()
70 | 
71 |         try:
72 |             weighted_adjacency_matrix = attention * mask + markoff_value * (1 - mask)
73 |         except:
74 |             attention_np = attention.cpu().detach().numpy()
75 |             mask_np = mask.cpu().detach().numpy()
76 |             weighted_adjacency_matrix_np = attention_np * mask_np + markoff_value * (1 - mask_np)
77 |             weighted_adjacency_matrix = torch.from_numpy(weighted_adjacency_matrix_np).to(self.device)
78 | 
79 |         return weighted_adjacency_matrix
80 |     
81 | def to_cuda(x, device=None):
82 |     if device:
83 |         x = x.to(device)
84 |     return x


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/PASTEL/model.py:
--------------------------------------------------------------------------------
 1 | from .graph_clf import GraphClf
 2 | import torch.nn.functional as F
 3 | import torch
 4 | from sklearn.metrics import f1_score, accuracy_score,balanced_accuracy_score,roc_auc_score
 5 | import numpy as np
 6 | import os
 7 | 
 8 | class Model(object):
 9 |     def __init__(self, config):
10 |         self.config = config
11 |         self.criterion = F.nll_loss
12 |         
13 |         self.score_func = accuracy
14 |         self.wf1 = wf1
15 |         self.mf1 = mf1
16 |         self.bacc = bacc
17 |         self.auroc = auroc
18 |         self.metric_name = 'acc'
19 |         
20 |         self._init_new_network()
21 |         self._init_optimizer()
22 |         
23 |     def _init_new_network(self):
24 |         self.network = GraphClf(self.config)
25 |         
26 |     def _init_optimizer(self):
27 |         self.optimizer = torch.optim.Adam(self.network.parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
28 |         
29 |     def save(self, dirname):
30 |         params = {
31 |             'state_dict': {
32 |                 'network': self.network.state_dict(),
33 |             },
34 |             'config': self.config,
35 |             'dir': dirname,
36 |         }
37 |         try:
38 |             torch.save(params, os.path.join(dirname, "params.saved"))
39 |         except BaseException:
40 |             print('[ WARN: Saving failed... continuing anyway. ]')
41 |             
42 |     def init_saved_network(self, saved_dir):
43 |         fname = os.path.join(saved_dir, "params.saved")
44 |         print('[ Loading saved models %s ]' % fname)
45 |         saved_params = torch.load(fname, map_location=lambda storage, loc: storage)
46 |         self.state_dict = saved_params['state_dict']
47 | 
48 |         self.network = GraphClf(self.config)
49 | 
50 |         if self.state_dict:
51 |             merged_state_dict = self.network.state_dict()
52 |             for k, v in self.state_dict['network'].items():
53 |                 if k in merged_state_dict:
54 |                     merged_state_dict[k] = v
55 |             self.network.load_state_dict(merged_state_dict)
56 |             
57 |     def reset_parameters(self):
58 |         print("[ Resetting model parameters ]")
59 |         # Reinitialize the network
60 |         self._init_new_network()
61 | 
62 |         # Reinitialize optimizer and scheduler
63 |         self._init_optimizer()
64 |         
65 | def accuracy(labels, output):
66 |     preds = output.max(1)[1].type_as(labels)
67 |     correct = preds.eq(labels).double()
68 |     correct = correct.sum().item()
69 |     return correct / len(labels)
70 | 
71 | 
72 | def wf1(labels, output):
73 |     pred = output.cpu().max(1)[1].numpy()
74 |     labels = labels.cpu().numpy()
75 |     return f1_score(labels, pred, average='weighted')
76 | 
77 | 
78 | def mf1(labels, output):
79 |     pred = output.cpu().max(1)[1].numpy()
80 |     labels = labels.cpu().numpy()
81 |     return f1_score(labels, pred, average='macro')
82 | 
83 | def bacc(labels, output):
84 |     pred = output.cpu().max(1)[1].numpy()
85 |     labels = labels.cpu().numpy()
86 |     return balanced_accuracy_score(labels, pred)
87 | 
88 | def auroc(labels, output):
89 |     labels = labels.cpu().numpy()  
90 |     output = output.cpu().detach().numpy()  
91 | 
92 |     n_classes = output.shape[1]  
93 |     labels_binary = np.eye(n_classes)[labels]
94 | 
95 |     auroc = roc_auc_score(labels_binary, output, multi_class='ovr', average='macro')
96 |     return auroc


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/RAWLSGCN/RawlsGCN.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from IGL_Bench.backbone.gcn import GCNLayer
 5 | 
 6 | 
 7 | class RawlsGCNGraph(nn.Module):
 8 |     def __init__(self, nfeat, nhid, nclass, dropout):
 9 |         super(RawlsGCNGraph, self).__init__()
10 |         self.gc1 = GCNLayer(nfeat, nhid, bias=True)
11 |         self.gc2 = GCNLayer(nhid, nclass, bias=True)
12 |         self.dropout = dropout
13 | 
14 |     def forward(self, x, adj):
15 |         x = F.relu(self.gc1(x, adj))
16 |         x = F.dropout(x, self.dropout, training=self.training)
17 |         x = self.gc2(x, adj)
18 |         return F.log_softmax(x, dim=1)
19 | 
20 | 
21 | class RawlsGCNGrad(nn.Module):
22 |     def __init__(self, nfeat, nhid, nclass, dropout):
23 |         super(RawlsGCNGrad, self).__init__()
24 |         self.gc1 = GCNLayer(nfeat, nhid, bias=True)
25 |         self.gc2 = GCNLayer(nhid, nclass, bias=True)
26 |         self.dropout = dropout
27 |         # to fix gradient in trainer
28 |         self.layers_info = {
29 |             "gc1": 0,
30 |             "gc2": 1,
31 |         }
32 | 
33 |     def forward(self, x, adj):
34 |         pre_act_embs, embs = [], [x]  # adding input node features to make index padding consistent
35 |         x = self.gc1(x, adj)
36 |         x.retain_grad()
37 |         pre_act_embs.append(x)
38 |         x = F.relu(x)
39 |         x = F.dropout(x, self.dropout, training=self.training)
40 |         embs.append(x)
41 | 
42 |         x = self.gc2(x, adj)
43 |         x.retain_grad()
44 |         pre_act_embs.append(x)
45 |         x = F.log_softmax(x, dim=1)
46 |         embs.append(x)
47 |         return pre_act_embs, embs
48 | 
49 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/RAWLSGCN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/RAWLSGCN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/RAWLSGCN/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.sparse as sp
  3 | import torch
  4 | 
  5 | 
  6 | def encode_onehot(labels):
  7 |     """Encode label to a one-hot vector."""
  8 |     classes = set(labels)
  9 |     classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)}
 10 |     labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32)
 11 |     return labels_onehot
 12 | 
 13 | 
 14 | def row_normalize(mx):
 15 |     """Row-normalize sparse matrix."""
 16 |     rowsum = np.array(mx.sum(1))
 17 |     r_inv = np.power(rowsum, -1).flatten()
 18 |     r_inv[np.isinf(r_inv)] = 0.0
 19 |     r_mat_inv = sp.diags(r_inv)
 20 |     mx = r_mat_inv @ mx
 21 |     return mx
 22 | 
 23 | 
 24 | def symmetric_normalize(mat):
 25 |     """Symmetric-normalize sparse matrix."""
 26 |     D = np.asarray(mat.sum(axis=0).flatten())
 27 |     D = np.divide(1, D, out=np.zeros_like(D), where=D != 0)
 28 |     D = sp.diags(np.asarray(D)[0, :])
 29 |     D.data = np.sqrt(D.data)
 30 |     return D @ mat @ D
 31 | 
 32 | 
 33 | def matrix2tensor(mat):
 34 |     """Convert a scipy sparse matrix to a torch sparse tensor."""
 35 |     mat = mat.tocoo().astype(np.float32)
 36 |     indices = torch.from_numpy(np.vstack((mat.row, mat.col)).astype(np.int64))
 37 |     values = torch.from_numpy(mat.data)
 38 |     shape = torch.Size(mat.shape)
 39 |     return torch.sparse_coo_tensor(indices, values, shape)
 40 | 
 41 | 
 42 | def tensor2matrix(t):
 43 |     """Convert a torch sparse tensor to a scipy sparse matrix."""
 44 |     indices = t.indices()
 45 |     row, col = indices[0, :].cpu().numpy(), indices[1, :].cpu().numpy()
 46 |     values = t.values().cpu().numpy()
 47 |     mat = sp.coo_matrix((values, (row, col)), shape=(t.shape[0], t.shape[1]))
 48 |     return mat
 49 | 
 50 | 
 51 | def sparse_to_tuple(sparse_mx):
 52 |     """Convert sparse matrix to tuple representation."""
 53 | 
 54 |     def to_tuple(mx):
 55 |         if not sp.isspmatrix_coo(mx):
 56 |             mx = mx.tocoo()
 57 |         coords = np.vstack((mx.row, mx.col)).transpose()
 58 |         values = mx.data
 59 |         shape = mx.shape
 60 |         return coords, values, shape
 61 | 
 62 |     if isinstance(sparse_mx, list):
 63 |         for i in range(len(sparse_mx)):
 64 |             sparse_mx[i] = to_tuple(sparse_mx[i])
 65 |     else:
 66 |         sparse_mx = to_tuple(sparse_mx)
 67 |     return sparse_mx
 68 | 
 69 | def get_doubly_stochastic(mat):
 70 |     sk = SinkhornKnopp(max_iter=1000, epsilon=1e-2)
 71 |     mat = matrix2tensor(
 72 |         sk.fit(mat)
 73 |     )
 74 |     return mat
 75 | 
 76 | 
 77 | class SinkhornKnopp:
 78 |     """
 79 |     Sinkhorn-Knopp algorithm to compute doubly stochastic matrix for a non-negative square matrix with total support.
 80 |     For reference, see original paper: http://msp.org/pjm/1967/21-2/pjm-v21-n2-p14-s.pdf
 81 |     """
 82 | 
 83 |     def __init__(self, max_iter=1000, epsilon=1e-3):
 84 |         """
 85 |         Args:
 86 |             max_iter (int): The maximum number of iterations, default is 1000.
 87 |             epsilon (float): Error tolerance for row/column sum, should be in the range of [0, 1], default is 1e-3.
 88 |         """
 89 | 
 90 |         assert isinstance(max_iter, int) or isinstance(max_iter, float), (
 91 |             "max_iter is not int or float: %r" % max_iter
 92 |         )
 93 |         assert max_iter > 0, "max_iter must be greater than 0: %r" % max_iter
 94 |         self.max_iter = int(max_iter)
 95 | 
 96 |         assert isinstance(epsilon, int) or isinstance(epsilon, float), (
 97 |             "epsilon is not of type float or int: %r" % epsilon
 98 |         )
 99 |         assert 0 <= epsilon < 1, (
100 |             "epsilon must be between 0 and 1 exclusive: %r" % epsilon
101 |         )
102 |         self.epsilon = epsilon
103 | 
104 |     def fit(self, mat):
105 |         """
106 | 
107 |         Args:
108 |             mat (scipy.sparse.matrix): The input non-negative square matrix. The matrix must have total support, i.e.,
109 |                 row/column sum must be non-zero.
110 |         Returns:
111 |             ds_mat (scipy.sparse.matrix): The doubly stochastic matrix of the input matrix.
112 |         """
113 |         assert sum(mat.data < 0) == 0  # must be non-negative
114 |         assert mat.ndim == 2  # must be a matrix
115 |         assert mat.shape[0] == mat.shape[1]  # must be square
116 | 
117 |         max_threshold, min_threshold = 1 + self.epsilon, 1 - self.epsilon
118 | 
119 |         right = np.ravel(mat.sum(axis=0).flatten())
120 |         right = np.divide(1, right, where=right != 0)
121 | 
122 |         left = mat @ right
123 |         left = np.divide(1, left, out=np.zeros_like(left), where=left != 0)
124 | 
125 |         for iter in range(self.max_iter):
126 |             row_sum = np.ravel(mat.sum(axis=1)).flatten()
127 |             col_sum = np.ravel(mat.sum(axis=0)).flatten()
128 |             if (
129 |                 sum(row_sum < min_threshold) == 0
130 |                 and sum(row_sum > max_threshold) == 0
131 |                 and sum(col_sum < min_threshold) == 0
132 |                 and sum(col_sum > max_threshold) == 0
133 |             ):
134 |                 logger.info(
135 |                     "Sinkhorn-Knopp - Converged in {iter} iterations.".format(iter=iter)
136 |                 )
137 |                 return mat
138 | 
139 |             right = left @ mat
140 |             right = np.divide(1, right, out=np.zeros_like(right), where=right != 0)
141 | 
142 |             left = mat @ right
143 |             left = np.divide(1, left, out=np.zeros_like(left), where=left != 0)
144 | 
145 |             right_diag = sp.diags(right)
146 |             left_diag = sp.diags(left)
147 |             mat = left_diag @ mat @ right_diag
148 |         logger.info("Sinkhorn-Knopp - Maximum number of iterations reached.")
149 |         return mat
150 | 
151 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ReNode/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/ReNode/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ReNode/reweight.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.nn.functional as F
 4 | import math
 5 | import numpy as np
 6 | from IGL_Bench.algorithm.ReNode.util import index2sparse, direct_sparse_eye, compute_degree_matrix, index2dense 
 7 | 
 8 | def get_renode_weight(config, data):
 9 |     ppr_matrix = data.Pi  
10 |     gpr_matrix = torch.tensor(data.gpr).float()  
11 | 
12 |     base_w = config.rn_base_weight
13 |     scale_w = config.rn_scale_weight
14 |     nnode = ppr_matrix.size(0)
15 |     unlabel_mask = data.train_mask.int().ne(1)  
16 | 
17 |     gpr_sum = torch.sum(gpr_matrix, dim=1)
18 |     gpr_rn = gpr_sum.unsqueeze(1) - gpr_matrix
19 |     rn_matrix = torch.mm(ppr_matrix, gpr_rn)
20 | 
21 |     labels = data.y.squeeze() 
22 |     label_matrix = F.one_hot(labels, gpr_matrix.size(1)).float()
23 |     label_matrix[unlabel_mask] = 0
24 | 
25 |     rn_matrix = torch.sum(rn_matrix * label_matrix, dim=1)
26 |     rn_matrix[unlabel_mask] = rn_matrix.max() + 99  
27 | 
28 |     train_size = torch.sum(data.train_mask.int()).item()
29 |     totoro_list = rn_matrix.tolist()
30 |     id2totoro = {i: totoro_list[i] for i in range(len(totoro_list))}
31 |     sorted_totoro = sorted(id2totoro.items(), key=lambda x: x[1], reverse=False)
32 |     id2rank = {sorted_totoro[i][0]: i for i in range(nnode)}
33 |     totoro_rank = [id2rank[i] for i in range(nnode)]
34 | 
35 |     rn_weight = [(base_w + 0.5 * scale_w * (1 + math.cos(x * 1.0 * math.pi / (train_size - 1)))) for x in totoro_rank]
36 |     rn_weight = torch.from_numpy(np.array(rn_weight)).type(torch.FloatTensor)
37 |     rn_weight = rn_weight * data.train_mask.float()
38 | 
39 |     return rn_weight
40 | 
41 | def compute_rn_weight(dataset, config):
42 |     target_data = dataset
43 |     
44 |     train_index = dataset.train_index
45 |     num_classes = dataset.y.numpy().max().item() + 1
46 |     train_node = [[] for _ in range(num_classes)]
47 |     num_classes = torch.max(target_data.y).item() + 1
48 |     for class_id in range(num_classes):
49 |         class_mask = target_data.y.eq(class_id) 
50 |         for idx in target_data.train_index:
51 |             if class_mask[idx]:  
52 |                 train_node[class_id].append(idx)
53 |         
54 |     current_dir = os.path.dirname(os.path.abspath(__file__))
55 |     ppr_file = os.path.join(current_dir, '../../../PPR_file', 
56 |                                f"{target_data.data_name}_pagerank.pt")
57 | 
58 |     if os.path.exists(ppr_file):
59 |         target_data.Pi = torch.load(ppr_file)
60 |     elif dataset.data_name == 'ogbn-arxiv': 
61 |         A = index2sparse(target_data.edge_index, target_data.num_nodes)
62 |         A = A + direct_sparse_eye(target_data.num_nodes)  # Add self-loop
63 |         D = compute_degree_matrix(A, target_data.num_nodes)
64 |         A_normalized = D @ A @ D
65 | 
66 |         ppr = torch.ones((target_data.num_nodes, 1)) / target_data.num_nodes
67 |         alpha = config.pagerank_prob
68 | 
69 |         for _ in range(40):  # Power iteration
70 |             ppr = (1 - alpha) * A_normalized @ ppr + alpha * (torch.ones((target_data.num_nodes, 1)) / target_data.num_nodes)
71 | 
72 |         target_data.Pi = ppr        
73 |     else:
74 |         pr_prob = 1 - config.pagerank_prob
75 |         A = index2dense(target_data.edge_index, target_data.num_nodes)
76 |         A_hat = A + torch.eye(A.size(0))  # Add self-loop
77 |         D = torch.diag(torch.sum(A_hat, 1))
78 |         D = D.inverse().sqrt()
79 |         A_hat = torch.mm(torch.mm(D, A_hat), D)
80 |         target_data.Pi = pr_prob * ((torch.eye(A.size(0)) - (1 - pr_prob) * A_hat).inverse())
81 |         target_data.Pi = target_data.Pi.cpu()
82 | 
83 |     gpr_matrix = []  
84 |     for iter_c in range(num_classes):
85 |         iter_Pi = target_data.Pi[torch.tensor(train_node[iter_c]).long()]
86 |         iter_gpr = torch.mean(iter_Pi, dim=0).squeeze()
87 |         gpr_matrix.append(iter_gpr)
88 | 
89 |     temp_gpr = torch.stack(gpr_matrix, dim=0)
90 |     if temp_gpr.dim() == 1:
91 |         temp_gpr = temp_gpr.unsqueeze(1)
92 |     temp_gpr = temp_gpr.transpose(0, 1)
93 |     target_data.gpr = temp_gpr
94 | 
95 |     rn_weight = get_renode_weight(config, target_data)
96 | 
97 |     return rn_weight
98 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ReNode/solver.py:
--------------------------------------------------------------------------------
  1 | from IGL_Bench.backbone.gcn import GCN_node_sparse
  2 | from IGL_Bench.algorithm.ReNode.reweight import compute_rn_weight
  3 | import torch
  4 | import torch.nn.functional as F
  5 | import numpy as np
  6 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score
  7 | 
  8 | 
  9 | class ReNode_node_solver:
 10 |     def __init__(self, config, dataset, device='cuda'):
 11 |         self.config = config
 12 |         self.dataset = dataset
 13 |         self.device = device
 14 |         
 15 |         self.rn_weight = compute_rn_weight(dataset, config)
 16 |         self.rn_weight = self.rn_weight.to(self.device)
 17 |         self.criterion = torch.nn.CrossEntropyLoss()
 18 |         
 19 |         self.model = {}
 20 |         self.optimizer = {}
 21 |         self.initializtion()
 22 |         self.dataset = self.dataset.to(self.device) 
 23 |         
 24 |     def initializtion(self):
 25 |         num_classes = self.dataset.y.numpy().max().item() + 1
 26 |         self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 
 27 |                                           n_hidden=self.config.hidden_dim, 
 28 |                                           n_class=num_classes, 
 29 |                                           n_layer=self.config.n_layer,dropout=self.config.dropout).to(self.device)
 30 |         
 31 |         self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
 32 |         
 33 |     def reset_parameters(self):
 34 |         for model_name, model in self.model.items():
 35 |             if hasattr(model, 'reset_parameters'):
 36 |                 model.reset_parameters()
 37 |             else:
 38 |                 for layer in model.modules():
 39 |                     if hasattr(layer, 'reset_parameters'):
 40 |                         layer.reset_parameters()
 41 | 
 42 |         self.optimizer = {}
 43 |         for model_name, model in self.model.items():
 44 |             self.optimizer[model_name] = torch.optim.Adam(
 45 |                 model.parameters(), 
 46 |                 lr=self.config.lr, 
 47 |                 weight_decay=self.config.weight_decay
 48 |             )    
 49 |             
 50 |     def train(self):
 51 |         self.reset_parameters()
 52 |         num_epochs = getattr(self.config, 'epoch', 500)
 53 |         patience = getattr(self.config, 'patience', 20)
 54 |         least_epoch = getattr(self.config, 'least_epoch', 40)
 55 |         best_val_accuracy = 0
 56 |         
 57 |         for epoch in range(1, num_epochs + 1):
 58 |             self.model['default'].train()
 59 |             self.optimizer['default'].zero_grad()
 60 | 
 61 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
 62 |             cls_loss = F.cross_entropy(out[self.dataset.train_mask], self.dataset.y[self.dataset.train_mask],weight=None,reduction='none')
 63 |             cls_loss = torch.sum(cls_loss * self.rn_weight[self.dataset.train_mask]) / cls_loss.size(0)
 64 |             
 65 |             cls_loss.backward()
 66 |             self.optimizer['default'].step()
 67 |             
 68 |             print(f"Epoch [{epoch}/{num_epochs}], Loss: {cls_loss.item():.4f}")
 69 |             
 70 |             val_accuracy = self.eval(metric="accuracy")
 71 | 
 72 |             if val_accuracy > best_val_accuracy:
 73 |                 best_val_accuracy = val_accuracy
 74 |                 patience_counter = 0
 75 |             else:
 76 |                 patience_counter += 1
 77 | 
 78 |             if patience_counter >= patience and epoch > least_epoch:
 79 |                 print(f"Early stopping at epoch {epoch+1}.")
 80 |                 break
 81 | 
 82 |         print("Training Finished!")
 83 | 
 84 |     def eval(self, metric="accuracy"):
 85 |         """ Evaluate the model on the validation or test set using the selected metric. """
 86 |         self.model['default'].eval()
 87 |         all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy()
 88 | 
 89 |         with torch.no_grad():
 90 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
 91 |             predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy()
 92 | 
 93 |         if metric == "accuracy":
 94 |             return accuracy_score(all_labels, predictions)
 95 |         elif metric == "bacc":
 96 |             return balanced_accuracy_score(all_labels, predictions)
 97 |         elif metric == "macro_f1":
 98 |             return f1_score(all_labels, predictions, average='macro')
 99 |         else:
100 |             raise ValueError(f"Unknown metric: {metric}")
101 |         
102 |     def test(self):
103 |         self.model['default'].eval()
104 |         all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy()
105 | 
106 |         with torch.no_grad():
107 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
108 |             predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy()
109 |             probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy()
110 | 
111 |         accuracy = accuracy_score(all_labels, predictions)
112 |         macro_f1 = f1_score(all_labels, predictions, average='macro')
113 |         bacc = balanced_accuracy_score(all_labels, predictions)
114 |         auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro')
115 |         
116 |         return accuracy, bacc, macro_f1, auc_roc


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/ReNode/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def index2sparse(edge_index, num_nodes):
 5 |     # edge_index to sparse format
 6 |     row, col = edge_index
 7 |     edge_weight = torch.ones(col.size(0), dtype=torch.float32)  # assuming edge weight = 1
 8 |     adj_sparse = torch.sparse_coo_tensor(torch.stack([row, col]), edge_weight, (num_nodes, num_nodes))
 9 |     return adj_sparse
10 | 
11 | def direct_sparse_eye(n):
12 |     indices = torch.arange(n)
13 |     indices = torch.stack([indices, indices])
14 |     values = torch.ones(n)
15 |     return torch.sparse_coo_tensor(indices, values, (n, n))
16 | 
17 | def compute_degree_matrix(A, num_nodes):
18 |     indices = A._indices()
19 |     values = A._values()
20 |     row_indices = indices[0]
21 | 
22 |     degree = torch.zeros(num_nodes, dtype=values.dtype)
23 | 
24 |     for idx, value in zip(row_indices, values):
25 |         degree[idx] += value
26 | 
27 |     degree = degree.pow(-0.5)
28 | 
29 |     diag_indices = torch.stack([torch.arange(num_nodes), torch.arange(num_nodes)])
30 |     D = torch.sparse_coo_tensor(diag_indices, degree, (num_nodes, num_nodes))
31 | 
32 |     return D
33 | 
34 | def index2dense(edge_index,nnode=2708):
35 |     indx = edge_index.numpy()
36 |     adj = np.zeros((nnode,nnode),dtype = 'int8')
37 |     adj[(indx[0],indx[1])]=1
38 |     new_adj = torch.from_numpy(adj).float()
39 |     return new_adj
40 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/SOLTGNN/PatternMemory.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class PatternMemory(nn.Module):
 8 |     def __init__(self, embeddings_dimension, modelsize = 64):
 9 |         '''
10 |             num_layers: number of layers in the neural networks (EXCLUDING the input layer). If num_layers=1, this reduces to linear model.
11 |             input_dim: dimensionality of input features
12 |             hidden_dim: dimensionality of hidden units at ALL layers
13 |             output_dim: number of classes for prediction
14 |             device: which device to use
15 |         '''
16 | 
17 |         super(PatternMemory, self).__init__()
18 | 
19 |         self.know_matrix = nn.Parameter(torch.FloatTensor(modelsize, modelsize))
20 |         self.size = modelsize
21 |         self.dim = embeddings_dimension
22 |         self.leakyrelu = nn.LeakyReLU(0.2)
23 | 
24 |         self.Wgama = nn.Parameter(torch.FloatTensor(
25 |             1, modelsize))
26 |         self.Wbeta = nn.Parameter(torch.FloatTensor(
27 |             1, modelsize))
28 |         self.Ugama = nn.Parameter(torch.FloatTensor(embeddings_dimension, modelsize))
29 |         self.Ubeta = nn.Parameter(torch.FloatTensor(embeddings_dimension, modelsize))
30 | 
31 |         self.M = nn.Parameter(torch.FloatTensor(
32 |             modelsize, embeddings_dimension))
33 |         
34 |         self.Eta = nn.Parameter(torch.FloatTensor(modelsize,1))
35 | 
36 |         self.reset_parameters()
37 | 
38 |     def reset_parameters(self):
39 |         def reset(tensor):
40 |             stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
41 |             tensor.data.uniform_(-stdv, stdv)
42 |         
43 |         reset(self.know_matrix)
44 |         reset(self.Wgama)
45 |         reset(self.Wbeta)
46 |         reset(self.Ugama)
47 |         reset(self.Ubeta)
48 |         reset(self.M)
49 |         reset(self.Eta)
50 |  
51 |     def forward(self, graph_rep):
52 |         x_g = torch.matmul(graph_rep, self.Ugama)
53 |         x_g = self.leakyrelu(torch.matmul(x_g.unsqueeze(2), self.Wgama).permute(0,2,1))
54 |         x_b = torch.matmul(graph_rep, self.Ubeta)
55 |         x_b = self.leakyrelu(torch.matmul(x_b.unsqueeze(2), self.Wbeta).permute(0,2,1))
56 |         P_q = torch.mul((x_g + 1),self.know_matrix) + x_b
57 |         H_q = self.leakyrelu(torch.matmul(P_q,self.M))
58 |         h_q = H_q.permute(0,2,1).matmul(self.Eta).squeeze(2)
59 | 
60 |         return h_q
61 |     


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/SOLTGNN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/SOLTGNN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/SOLTGNN/sampling.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/SOLTGNN/sampling.zip


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/SOLTGNN/subgraph_sample.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | 
 5 | subgraph_default_border = {
 6 |     'PTC_MR': 19,
 7 |     "PROTEINS": 54,
 8 |     "IMDB-BINARY": 25,
 9 |     "DD": 395,
10 |     "FRANKENSTEIN": 22,
11 |     "REDDIT": 469,
12 |     "COLLAB": 91
13 | }
14 | 
15 | 
16 | def subgraph_sample(dataset, graph_list, nums=500):
17 |     np.random.seed(0)
18 |     border = subgraph_default_border.get(dataset, 0)
19 |     for i in range(len(graph_list)):
20 |         if graph_list[i].g.number_of_nodes() >= border:
21 |             graph_list[i].nodegroup += 1
22 |     sample_path = os.path.join(os.path.dirname(__file__), f'sampling/{dataset}/sampling.txt')
23 |     with open(sample_path, 'w') as f:
24 |         f.write(str(len(graph_list)) + '\n')
25 |         for graph in graph_list:
26 |             if graph.nodegroup == 1:
27 |                 graph.sample_list = []
28 |                 graph.unsample_list = []
29 |                 graph.sample_x = []
30 |                 n = graph.g.number_of_nodes()
31 |                 K = int(min(border - 1, n / 2))
32 |                 f.write(str(K) + '\n')
33 |                 graph.K = K
34 |                 for i in range(nums):
35 |                     sample_idx = np.random.permutation(n)
36 |                     j = 0
37 |                     sample_set = set()
38 |                     wait_set = []
39 |                     cnt = 0
40 |                     if (len(graph.neighbors[j]) == 0):
41 |                         j += 1
42 |                     wait_set.append(sample_idx[j])
43 |                     while cnt < K:
44 |                         if len(wait_set) != 0:
45 |                             x = wait_set.pop()
46 |                         else:
47 |                             break
48 |                         while x in sample_set:
49 |                             if len(wait_set) != 0:
50 |                                 x = wait_set.pop()
51 |                             else:
52 |                                 cnt = K
53 |                                 break
54 |                         sample_set.add(x)
55 |                         cnt += 1
56 |                         wait_set.extend(graph.neighbors[x])
57 |                     unsample_set = set(range(n)).difference(sample_set)
58 |                     f.write(str(len(sample_set)) + ' ')
59 |                     for x in list(sample_set):
60 |                         f.write(str(x) + ' ')
61 |                     for x in list(unsample_set):
62 |                         f.write(str(x) + ' ')
63 |                     f.write('\n')
64 |             else:
65 |                 f.write('0\n')
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     pass
70 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAILGNN/TailGNN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import torch.sparse as sp
  5 | 
  6 | from IGL_Bench.algorithm.TAILGNN.layers import Relation, Relationv2, Generator
  7 | from IGL_Bench.backbone.gcn import GCNLayer
  8 | 
  9 | 
 10 | class TailGCN_SP(nn.Module):
 11 |     def __init__(self, nfeat, nclass, params, device, ver=1, ablation=0):
 12 |         super(TailGCN_SP, self).__init__()
 13 | 
 14 |         self.device = device
 15 |         self.nhid = params.hidden
 16 |         self.dropout = params.dropout
 17 |         self.ablation = ablation
 18 | 
 19 |         # self.rel1 = TransGCN_SP(nfeat, self.nhid, g_sigma=params.g_sigma, ver=ver)
 20 |         if ver == 1:
 21 |             self.r1 = Relation(nfeat, ablation=ablation)
 22 |         else:
 23 |             self.r1 = Relationv2(nfeat, self.nhid, ablation=ablation)
 24 |         self.g1 = Generator(nfeat, params.g_sigma, ablation).to(device)
 25 | 
 26 |         self.gc1 = GCNLayer(nfeat, self.nhid).to(device)
 27 |         self.rel2 = TransGCN_SP(self.nhid, nclass, g_sigma=params.g_sigma, ver=ver, ablation=ablation).to(device)
 28 | 
 29 |     def forward(self, x, adj, head, adj_self=None, norm=None):
 30 | 
 31 |         # rewrite rel1
 32 |         neighbor = sp.mm(adj, x)
 33 |         m1 = self.r1(x, neighbor)
 34 | 
 35 |         x = x.to(self.device)
 36 |         m1 = m1.to(self.device)
 37 |         adj = adj.to(self.device)
 38 |         adj_self = adj_self.to(self.device)
 39 |         norm = norm.to(self.device)
 40 | 
 41 |         if head or self.ablation == 2:
 42 |             x1 = self.gc1(x, adj_self, norm=norm)
 43 |         else:
 44 |             if self.ablation == 1:
 45 |                 h_s = self.g1(m1)
 46 |             else:
 47 |                 h_s = m1
 48 | 
 49 |             h_s = torch.mm(h_s, self.gc1.weight)
 50 |             h_k = self.gc1(x, adj_self)
 51 |             x1 = (h_k + h_s) / (norm + 1)
 52 | 
 53 |         x1 = F.elu(x1)
 54 |         x1 = F.dropout(x1, self.dropout, training=self.training)
 55 | 
 56 |         x2, m2 = self.rel2(x1, adj, adj_self, head, norm)
 57 |         norm_m1 = torch.norm(m1, dim=1)
 58 |         norm_m2 = torch.norm(m2, dim=1)
 59 | 
 60 |         return x2, norm_m1, norm_m2  # , head_prob, tail_prob
 61 | 
 62 | 
 63 | class TransGCN_SP(nn.Module):
 64 |     def __init__(self, nfeat, nhid, g_sigma, ver, ablation=0):
 65 |         super(TransGCN_SP, self).__init__()
 66 | 
 67 |         if ver == 1:
 68 |             self.r = Relation(nfeat, ablation)
 69 |         else:
 70 |             self.r = Relationv2(nfeat, nhid, ablation=ablation)
 71 | 
 72 |         self.g = Generator(nfeat, g_sigma, ablation)
 73 |         self.gc = GCNLayer(nfeat, nhid)
 74 |         self.ablation = ablation
 75 | 
 76 |     def forward(self, x, adj, adj_self, head, norm):
 77 | 
 78 |         # norm = sp.sum(adj, dim=1).to_dense().view(-1,1)
 79 |         neighbor = sp.mm(adj, x)
 80 |         m = self.r(x, neighbor)
 81 | 
 82 |         if head or self.ablation == 2:
 83 |             # norm = sp.sum(adj_self, dim=1).to_dense().view(-1,1)
 84 |             h_k = self.gc(x, adj_self, norm=norm)
 85 |         else:
 86 |             if self.ablation == 1:
 87 |                 h_s = self.g(m)
 88 |             else:
 89 |                 h_s = m
 90 | 
 91 |             h_s = torch.mm(h_s, self.gc.weight)
 92 |             h_k = self.gc(x, adj_self)
 93 |             h_k = (h_k + h_s) / (norm + 1)
 94 | 
 95 |         return h_k, m
 96 | 
 97 | 
 98 | class Discriminator(nn.Module):
 99 |     def __init__(self, in_features):
100 |         super(Discriminator, self).__init__()
101 | 
102 |         self.d = nn.Linear(in_features, in_features, bias=True)
103 |         self.wd = nn.Linear(in_features, 1, bias=False)
104 |         self.sigmoid = nn.Sigmoid()
105 | 
106 |     '''
107 |     def weight_init(self, m):
108 |         if isinstance(m, Parameter):
109 |             torch.nn.init.xavier_uniform_(m.weight.data)
110 | 
111 |         if isinstance(m, nn.Linear):
112 |             stdv = 1. / math.sqrt(m.weight.size(1))
113 |             torch.nn.init.xavier_uniform_(m.weight.data)
114 |             if m.bias is not None:                
115 |                 m.bias.data.uniform_(-stdv, stdv)
116 |     '''
117 | 
118 |     def forward(self, ft):
119 |         ft = F.elu(ft)
120 |         ft = F.dropout(ft, 0.5, training=self.training)
121 | 
122 |         fc = F.elu(self.d(ft))
123 |         prob = self.wd(fc)
124 | 
125 |         return self.sigmoid(prob)
126 | 
127 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAILGNN/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TAILGNN/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAILGNN/layers.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.nn.parameter import Parameter
  4 | import torch.nn.functional as F
  5 | import math
  6 | 
  7 | 
  8 | class Relation(nn.Module):
  9 |     def __init__(self, in_features, ablation):
 10 |         super(Relation, self).__init__()
 11 | 
 12 |         self.gamma_1 = nn.Linear(in_features, in_features, bias=False)
 13 |         self.gamma_2 = nn.Linear(in_features, in_features, bias=False)
 14 | 
 15 |         self.beta_1 = nn.Linear(in_features, in_features, bias=False)
 16 |         self.beta_2 = nn.Linear(in_features, in_features, bias=False)
 17 | 
 18 |         self.r = Parameter(torch.FloatTensor(1, in_features))
 19 | 
 20 |         self.elu = nn.ELU()
 21 |         self.lrelu = nn.LeakyReLU(0.2)
 22 | 
 23 |         self.sigmoid = nn.Sigmoid()
 24 |         self.reset_parameter()
 25 |         self.ablation = ablation
 26 | 
 27 |     def reset_parameter(self):
 28 |         stdv = 1. / math.sqrt(self.r.size(1))
 29 |         self.r.data.uniform_(-stdv, stdv)
 30 | 
 31 |     def forward(self, ft, neighbor):
 32 | 
 33 |         if self.ablation == 3:
 34 |             self.m = ft + self.r - neighbor
 35 |         else:
 36 |             gamma = self.gamma_1(ft) + self.gamma_2(neighbor)
 37 |             gamma = self.lrelu(gamma) + 1.0
 38 | 
 39 |             beta = self.beta_1(ft) + self.beta_2(neighbor)
 40 |             beta = self.lrelu(beta)
 41 | 
 42 |             self.r_v = gamma * self.r + beta
 43 | 
 44 |             # transE
 45 |             self.m = ft + self.r_v - neighbor
 46 |             '''
 47 |             #transH
 48 |             norm = F.normalize(self.r_v) 
 49 |             h_ft = ft - norm * torch.sum((norm * ft), dim=1, keepdim=True)
 50 |             h_neighbor = neighbor - norm * torch.sum((norm * neighbor), dim=1, keepdim=True)
 51 |             self.m = h_ft - h_neighbor
 52 |             '''
 53 |         return self.m  # F.normalize(self.m)
 54 | 
 55 | 
 56 | class Relationv2(nn.Module):
 57 |     def __init__(self, in_features, out_features, ablation=0):
 58 |         super(Relationv2, self).__init__()
 59 | 
 60 |         self.gamma1_1 = nn.Linear(in_features, out_features, bias=False)
 61 |         self.gamma1_2 = nn.Linear(out_features, in_features, bias=False)
 62 | 
 63 |         self.gamma2_1 = nn.Linear(in_features, out_features, bias=False)
 64 |         self.gamma2_2 = nn.Linear(out_features, in_features, bias=False)
 65 | 
 66 |         self.beta1_1 = nn.Linear(in_features, out_features, bias=False)
 67 |         self.beta1_2 = nn.Linear(out_features, in_features, bias=False)
 68 | 
 69 |         self.beta2_1 = nn.Linear(in_features, out_features, bias=False)
 70 |         self.beta2_2 = nn.Linear(out_features, in_features, bias=False)
 71 | 
 72 |         self.r = Parameter(torch.FloatTensor(1, in_features))
 73 | 
 74 |         self.ablation = ablation
 75 |         self.elu = nn.ELU()
 76 |         self.lrelu = nn.LeakyReLU(0.2)
 77 |         self.sigmoid = nn.Sigmoid()
 78 |         self.reset_parameter()
 79 | 
 80 |     def weight_init(self, m):
 81 |         return
 82 | 
 83 |     def reset_parameter(self):
 84 |         stdv = 1. / math.sqrt(self.r.size(1))
 85 |         self.r.data.uniform_(-stdv, stdv)
 86 | 
 87 |     def forward(self, ft, neighbor):
 88 | 
 89 |         if self.ablation == 3:
 90 |             self.m = ft + self.r - neighbor
 91 |         else:
 92 | 
 93 |             gamma1 = self.gamma1_2(self.gamma1_1(ft))
 94 |             gamma2 = self.gamma2_2(self.gamma2_1(neighbor))
 95 |             gamma = self.lrelu(gamma1 + gamma2) + 1.0
 96 | 
 97 |             beta1 = self.beta1_2(self.beta1_1(ft))
 98 |             beta2 = self.beta2_2(self.beta2_1(neighbor))
 99 |             beta = self.lrelu(beta1 + beta2)
100 | 
101 |             self.r_v = gamma * self.r + beta
102 |             self.m = ft + self.r_v - neighbor
103 | 
104 |         return F.normalize(self.m)
105 | 
106 | 
107 | class Generator(nn.Module):
108 |     def __init__(self, in_features, std, ablation):
109 |         super(Generator, self).__init__()
110 | 
111 |         self.g = nn.Linear(in_features, in_features, bias=True)
112 |         self.std = std
113 |         self.ablation = ablation
114 | 
115 |     def forward(self, ft):
116 |         # h_s = ft
117 |         if self.training:
118 |             # if self.ablation == 2:
119 |             mean = torch.zeros(ft.shape, device='cuda')
120 |             ft = torch.normal(mean, 1.)
121 |             # else:
122 |             #    ft = torch.normal(ft, self.std)
123 |         h_s = F.elu(self.g(ft))
124 | 
125 |         return h_s


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAILGNN/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy as sp
 3 | import torch
 4 | 
 5 | 
 6 | def link_dropout(adj, idx, k=5):
 7 |     tail_adj = adj.copy()
 8 |     num_links = np.random.randint(k, size=idx.shape[0])
 9 |     num_links += 1
10 | 
11 |     for i in range(idx.shape[0]):
12 |         index = tail_adj[idx[i]].nonzero()[1]
13 |         new_idx = np.random.choice(index, min(len(index), num_links[i]), replace=False)
14 |         tail_adj[idx[i]] = 0.0
15 |         for j in new_idx:
16 |             tail_adj[idx[i], j] = 1.0
17 |     return tail_adj
18 | 
19 | def normalize(mx):
20 |     """Row-normalize sparse matrix"""
21 |     rowsum = np.array(mx.sum(1))
22 |     rowsum = np.where(rowsum==0, 1, rowsum)
23 |     r_inv = np.power(rowsum, -1).flatten()
24 |     r_inv[np.isinf(r_inv)] = 0.
25 |     r_mat_inv = sp.diags(r_inv)
26 |     mx = r_mat_inv.dot(mx)
27 | 
28 |     return mx
29 | 
30 | def convert_sparse_tensor(sparse_mx):
31 |     """Convert a scipy sparse matrix to a torch sparse tensor."""
32 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
33 |     indices = torch.from_numpy(
34 |         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
35 |     values = torch.from_numpy(sparse_mx.data)
36 |     shape = torch.Size(sparse_mx.shape)
37 |     return torch.sparse.FloatTensor(indices, values, shape)
38 | 
39 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAM/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TAM/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAM/solver.py:
--------------------------------------------------------------------------------
  1 | from IGL_Bench.backbone.gcn import GCN_node_sparse
  2 | from IGL_Bench.algorithm.TAM.tam import *
  3 | import torch
  4 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score
  5 | 
  6 | class TAM_node_solver:
  7 |     def __init__(self, config, dataset, device='cuda'):
  8 |         self.config = config
  9 |         self.dataset = dataset
 10 |         self.device = device
 11 | 
 12 |         num_classes = self.dataset.y.numpy().max().item() + 1
 13 |         stats = dataset.y[dataset.train_mask]
 14 |         n_data = []
 15 |         for i in range(num_classes):
 16 |             data_num = (stats == i).sum()
 17 |             n_data.append(int(data_num.item()))
 18 |         self.class_num_list = n_data
 19 |         
 20 |         self.criterion = torch.nn.CrossEntropyLoss()
 21 |         
 22 |         self.model = {}
 23 |         self.optimizer = {}
 24 |         self.initializtion()
 25 |         self.aggregator = MeanAggregation()
 26 |         
 27 |         self.model['default'] = self.model['default'].to(device)  
 28 |         self.dataset = self.dataset.to(self.device) 
 29 |         
 30 |     def initializtion(self):
 31 |         num_classes = self.dataset.y.numpy().max().item() + 1
 32 |         self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 
 33 |                                           n_hidden=self.config.hidden_dim, 
 34 |                                           n_class=num_classes, 
 35 |                                           n_layer=self.config.n_layer,dropout=self.config.dropout)
 36 |         
 37 |         self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
 38 |         
 39 |     def reset_parameters(self):
 40 |         for model_name, model in self.model.items():
 41 |             if hasattr(model, 'reset_parameters'):
 42 |                 model.reset_parameters()
 43 |             else:
 44 |                 for layer in model.modules():
 45 |                     if hasattr(layer, 'reset_parameters'):
 46 |                         layer.reset_parameters()
 47 | 
 48 |         self.optimizer = {}
 49 |         for model_name, model in self.model.items():
 50 |             self.optimizer[model_name] = torch.optim.Adam(
 51 |                 model.parameters(), 
 52 |                 lr=self.config.lr, 
 53 |                 weight_decay=self.config.weight_decay
 54 |             )    
 55 |             
 56 |     def train(self):
 57 |         self.reset_parameters()
 58 |         num_epochs = getattr(self.config, 'epoch', 500)
 59 |         patience = getattr(self.config, 'patience', 10)
 60 |         least_epoch = getattr(self.config, 'least_epoch', 100)
 61 | 
 62 |         best_loss = float('inf')
 63 |         patience_counter = 0
 64 |         best_val_accuracy = 0
 65 | 
 66 |         for epoch in range(1, num_epochs + 1):
 67 |             self.model['default'].train()
 68 |             self.optimizer['default'].zero_grad()
 69 |             
 70 |             output = self.model['default'](self.dataset.x, self.dataset.edge_index)
 71 |             output = adjust_output(self.config, output, self.dataset.edge_index, self.dataset.y, \
 72 |                 self.dataset.train_mask, self.aggregator, self.class_num_list, epoch)
 73 |             
 74 |             loss = self.criterion(output, self.dataset.y[self.dataset.train_mask])
 75 |             loss.backward()
 76 |             self.optimizer['default'].step()
 77 |             
 78 |             print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")
 79 |             
 80 |             val_accuracy = self.eval(metric="accuracy")
 81 | 
 82 |             if val_accuracy > best_val_accuracy:
 83 |                 best_val_accuracy = val_accuracy
 84 |                 patience_counter = 0
 85 |             else:
 86 |                 patience_counter += 1
 87 | 
 88 |             if patience_counter >= patience and epoch > least_epoch:
 89 |                 print(f"Early stopping at epoch {epoch+1}.")
 90 |                 break
 91 | 
 92 |         print("Training Finished!")
 93 |             
 94 |     def eval(self, metric="accuracy"):
 95 |         """ Evaluate the model on the validation or test set using the selected metric. """
 96 |         self.model['default'].eval()
 97 |         all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy()
 98 | 
 99 |         with torch.no_grad():
100 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
101 |             predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy()
102 | 
103 |         if metric == "accuracy":
104 |             return accuracy_score(all_labels, predictions)
105 |         elif metric == "bacc":
106 |             return balanced_accuracy_score(all_labels, predictions)
107 |         elif metric == "macro_f1":
108 |             return f1_score(all_labels, predictions, average='macro')
109 |         else:
110 |             raise ValueError(f"Unknown metric: {metric}")
111 |         
112 |     def test(self):
113 |         self.model['default'].eval()
114 |         all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy()
115 | 
116 |         with torch.no_grad():
117 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
118 |             predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy()
119 |             probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy()
120 | 
121 |         accuracy = accuracy_score(all_labels, predictions)
122 |         macro_f1 = f1_score(all_labels, predictions, average='macro')
123 |         bacc = balanced_accuracy_score(all_labels, predictions)
124 |         auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro')
125 |         
126 |         return accuracy, bacc, macro_f1, auc_roc


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TAM/tam.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch_scatter import scatter_add
  6 | from torch_geometric.utils import to_dense_batch
  7 | from torch_geometric.nn import MessagePassing
  8 | from torch_geometric.utils import add_self_loops, degree
  9 | 
 10 | 
 11 | ## Jensen-Shanon Divergence ##
 12 | def compute_jsd(dist1, dist2):
 13 |     dist_mean = (dist1 + dist2) / 2.
 14 |     jsd = (F.kl_div(dist_mean.log(), dist1, reduction = 'none') + F.kl_div(dist_mean.log(), dist2, reduction = 'none')) / 2.
 15 |     return jsd
 16 | 
 17 | 
 18 | ## TAM ##
 19 | @torch.no_grad()
 20 | def compute_tam(output, edge_index, label, train_mask, aggregator, class_num_list=None, temp_phi = None, temp_gamma = None):
 21 |     n_cls = label.max().item() + 1
 22 |     if label.dim()==2:
 23 |         label = label.squeeze() 
 24 |     # Apply class-wise temperature
 25 |     cls_num_list = torch.FloatTensor(class_num_list).to(output.device)
 26 |     cls_num_ratio = cls_num_list / cls_num_list.sum()
 27 |     cls_num_ratio = cls_num_ratio * temp_gamma + (1- temp_gamma)
 28 |     max_beta = torch.max(cls_num_ratio)
 29 |     cls_temperature = (temp_phi * (cls_num_ratio + 1 - max_beta)).unsqueeze(0)
 30 |     temp = 1 / cls_temperature
 31 | 
 32 |     # Predict unlabeled nodes
 33 |     agg_out = F.softmax(output.clone().detach()/temp, dim=1)
 34 |     agg_out[train_mask] = F.one_hot(label[train_mask].clone(), num_classes=n_cls).float() # only use labeled nodes
 35 |     neighbor_dist = aggregator(agg_out, edge_index)[train_mask] # (# of labeled nodes, # of classes)
 36 | 
 37 |     # Compute class-wise connectivity matrix
 38 |     connectivity_matrix= []
 39 |     for c in range(n_cls):
 40 |         c_mask = (label[train_mask] == c)
 41 |         connectivity_matrix.append(neighbor_dist[c_mask].mean(dim=0))
 42 |     connectivity_matrix= torch.stack(connectivity_matrix, dim=0)
 43 | 
 44 |     # Preprocess class-wise connectivity matrix and NLD for numerical stability
 45 |     center_mask = F.one_hot(label[train_mask].clone(), num_classes=n_cls).bool()
 46 |     neighbor_dist[neighbor_dist<1e-6] = 1e-6
 47 |     connectivity_matrix[connectivity_matrix<1e-6] = 1e-6
 48 | 
 49 |     # Compute ACM
 50 |     acm = (neighbor_dist[center_mask].unsqueeze(dim=1) / torch.diagonal(connectivity_matrix).unsqueeze(dim=1)[label[train_mask]]) \
 51 |                 * (connectivity_matrix[label[train_mask]] / neighbor_dist)
 52 |     acm[acm>1] = 1
 53 |     acm[center_mask] = 1
 54 | 
 55 |     # Compute ADM
 56 |     cls_pair_jsd = compute_jsd(connectivity_matrix.unsqueeze(dim=0), connectivity_matrix.unsqueeze(dim=1)).sum(dim=-1) # distance between classes
 57 |     cls_pair_jsd[cls_pair_jsd<1e-6] = 1e-6
 58 |     self_kl = compute_jsd(neighbor_dist, connectivity_matrix[label[train_mask]]).sum(dim=-1,keepdim=True) # devation from self-class averaged nld
 59 |     neighbor_kl = compute_jsd(neighbor_dist.unsqueeze(1),connectivity_matrix.unsqueeze(0)).sum(dim=-1) # distance between node nld and each class averaged nld
 60 |     adm = (self_kl**2 + (cls_pair_jsd**2)[label[train_mask]] - neighbor_kl**2) / (2*(cls_pair_jsd**2)[label[train_mask]])
 61 | 
 62 |     adm[center_mask] = 0
 63 |     
 64 |     return acm, adm
 65 | 
 66 | 
 67 | def adjust_output(args, output, edge_index, label, train_mask, aggregator, class_num_list, epoch):
 68 |     """
 69 |     Adjust the margin of each labeled nodes according to local topolgy
 70 |     Input:
 71 |         args:               hyperparameters for TAM
 72 |         output:             model prediction for whole nodes (include unlabeled nodes); [# of nodes, # of classes]
 73 |         edge_index:         ; [2, # of nodes]
 74 |         label:              ; [# of nodes]
 75 |         train_mask:         ; [# of nodes]
 76 |         aggregator:         function (below)
 77 |         class_num_list:     the number of nodes for each class; [# of classes]
 78 |         epoch:              current epoch; integer
 79 |     Output:
 80 |         output:             adjusted logits
 81 |     """
 82 | 
 83 |     # Compute ACM and ADM
 84 |     if args.tam and epoch > args.warmup:
 85 |         acm, adm = compute_tam(output, edge_index, label, train_mask, aggregator, \
 86 |                                 class_num_list=class_num_list, temp_phi = args.temp_phi, temp_gamma = 0.4)
 87 | 
 88 |     output = output[train_mask]
 89 |     # Adjust outputs
 90 |     if args.tam and epoch > args.warmup:
 91 |         acm = acm.log()
 92 |         adm = - adm
 93 |         output = output + args.tam_alpha*acm + args.tam_beta*adm
 94 | 
 95 |     return output
 96 | 
 97 | 
 98 | class MeanAggregation(MessagePassing):
 99 |     def __init__(self):
100 |         super(MeanAggregation, self).__init__(aggr='mean')
101 | 
102 |     def forward(self, x, edge_index):
103 |         # x has shape [N, in_channels]
104 |         # edge_index has shape [2, E]
105 | 
106 |         # Step 1: Add self-loops to the adjacency matrix.
107 |         _edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
108 | 
109 |         # Step 4-5: Start propagating messages.
110 |         return self.propagate(_edge_index, x=x)


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TOPOAUC/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TOPOAUC/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TOPOAUC/cal.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | def compute_ppr_and_gpr(dataset, pr_prob):
 4 |     def index2dense(edge_index, num_nodes):
 5 |         A = torch.zeros((num_nodes, num_nodes), device=edge_index.device)
 6 |         A[edge_index[0], edge_index[1]] = 1
 7 |         return A
 8 | 
 9 |     edge_index = dataset.edge_index
10 |     num_nodes = dataset.num_nodes
11 |     labels = dataset.y
12 |     train_index = torch.tensor(dataset.train_index, device=edge_index.device, dtype=torch.long)
13 | 
14 |     num_classes = labels.max().item() + 1
15 |     train_nodes_per_class = []
16 |     for cls in range(num_classes):
17 |         class_train_nodes = train_index[labels[train_index] == cls].tolist()
18 |         train_nodes_per_class.append(class_train_nodes)
19 | 
20 |     A = index2dense(edge_index, num_nodes)
21 |     A_hat = A + torch.eye(num_nodes, device=edge_index.device)  
22 |     D = torch.diag(torch.sum(A_hat, dim=1))  
23 |     D = D.inverse().sqrt()  
24 |     A_hat = torch.mm(torch.mm(D, A_hat), D)  
25 | 
26 |     I = torch.eye(num_nodes, device=edge_index.device)
27 |     PPR = pr_prob * ((I - (1 - pr_prob) * A_hat).inverse())
28 | 
29 |     gpr_matrix = []
30 |     for class_nodes in train_nodes_per_class:
31 |         class_nodes_tensor = torch.tensor(class_nodes, device=edge_index.device, dtype=torch.long)
32 |         class_ppr = PPR[class_nodes_tensor] 
33 |         class_gpr = torch.mean(class_ppr, dim=0).squeeze()  
34 |         gpr_matrix.append(class_gpr)
35 | 
36 |     GPR = torch.stack(gpr_matrix, dim=0).transpose(0, 1) 
37 | 
38 |     return PPR.cpu(), GPR.cpu()


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TOPOAUC/solver.py:
--------------------------------------------------------------------------------
  1 | from IGL_Bench.backbone.gcn import GCN_node_sparse
  2 | from IGL_Bench.algorithm.TOPOAUC.myloss import ELossFN
  3 | from IGL_Bench.algorithm.TOPOAUC.cal import compute_ppr_and_gpr
  4 | from IGL_Bench.algorithm.TOPOAUC.util import *
  5 | import torch
  6 | import torch.nn.functional as F
  7 | import numpy as np
  8 | from sklearn.metrics import accuracy_score, balanced_accuracy_score, f1_score, roc_auc_score
  9 | 
 10 | class TOPOAUC_node_solver:
 11 |     def __init__(self, config, dataset, device='cuda'):
 12 |         self.config = config
 13 |         self.dataset = dataset
 14 |         self.device = device
 15 |         
 16 |         self.model = {}
 17 |         self.optimizer = {}
 18 |         self.ppr, self.gpr = compute_ppr_and_gpr(self.dataset, self.config.pagerank_prob)
 19 |         self.initializtion()
 20 |         
 21 |         self.model['default'] = self.model['default'].to(device)  
 22 |         self.my_loss = self.my_loss.to(device)
 23 |         self.dataset = self.dataset.to(device) 
 24 |         
 25 |     def initializtion(self):
 26 |         num_classes = self.dataset.y.numpy().max().item() + 1
 27 |         self.model['default'] = GCN_node_sparse(n_feat=self.dataset.num_features, 
 28 |                                           n_hidden=self.config.hidden_dim, 
 29 |                                           n_class=num_classes, 
 30 |                                           n_layer=self.config.n_layer,dropout=self.config.dropout)
 31 |         
 32 |         adj_bool=index2adj_bool(self.dataset.edge_index,self.dataset.num_nodes)
 33 |         
 34 |         self.my_loss=ELossFN(num_classes,self.dataset.num_nodes,adj_bool,self.ppr,self.gpr,self.dataset.train_mask,
 35 |                         self.device,weight_sub_dim=self.config.weight_sub_dim,weight_inter_dim=self.config.weight_inter_dim,
 36 |                         weight_global_dim=self.config.weight_global_dim,beta= self.config.beta,gamma=self.config.gamma,
 37 |                         loss_type=self.config.loss)
 38 |         
 39 |         self.optimizer['default'] = torch.optim.Adam(self.model['default'].parameters(), lr=self.config.lr, weight_decay=self.config.weight_decay)
 40 |         
 41 |     def reset_parameters(self):
 42 |         for model_name, model in self.model.items():
 43 |             if hasattr(model, 'reset_parameters'):
 44 |                 model.reset_parameters()
 45 |             else:
 46 |                 for layer in model.modules():
 47 |                     if hasattr(layer, 'reset_parameters'):
 48 |                         layer.reset_parameters()
 49 |                         
 50 |         self.optimizer = {}
 51 |         for model_name, model in self.model.items():
 52 |             self.optimizer[model_name] = torch.optim.Adam(
 53 |                 model.parameters(), 
 54 |                 lr=self.config.lr, 
 55 |                 weight_decay=self.config.weight_decay
 56 |             )    
 57 |             
 58 |     def train(self):
 59 |         self.reset_parameters()
 60 |         
 61 |         num_epochs = getattr(self.config, 'epoch', 500)
 62 |         patience = getattr(self.config, 'patience', 10)
 63 |         least_epoch = getattr(self.config, 'least_epoch', 40)
 64 |         
 65 |         criterion = torch.nn.CrossEntropyLoss()
 66 | 
 67 |         best_loss = float('inf')
 68 |         patience_counter = 0
 69 |         best_val_accuracy = 0
 70 | 
 71 |         for epoch in range(1, num_epochs + 1):
 72 |             self.model['default'].train()
 73 |             self.optimizer['default'].zero_grad()
 74 | 
 75 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
 76 |             logits = F.softmax(out, dim=-1)
 77 |             loss = self.my_loss(logits, self.dataset.y,self.dataset.train_mask)
 78 |             loss = torch.mean(loss)
 79 |             loss.backward()
 80 |             self.optimizer['default'].step()
 81 | 
 82 |             print(f"Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}")
 83 | 
 84 |             val_accuracy = self.eval(metric="accuracy")
 85 | 
 86 |             if val_accuracy > best_val_accuracy:
 87 |                 best_val_accuracy = val_accuracy
 88 |                 patience_counter = 0
 89 |             else:
 90 |                 patience_counter += 1
 91 | 
 92 |             if patience_counter >= patience and epoch > least_epoch:
 93 |                 print(f"Early stopping at epoch {epoch+1}.")
 94 |                 break
 95 | 
 96 |         print("Training Finished!")
 97 |         
 98 |     def eval(self, metric="accuracy"):
 99 |         self.model['default'].eval()
100 |         all_labels = self.dataset.y[self.dataset.val_mask].cpu().numpy()
101 | 
102 |         with torch.no_grad():
103 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
104 |             predictions = out[self.dataset.val_mask].argmax(dim=1).cpu().numpy()
105 | 
106 |         if metric == "accuracy":
107 |             return accuracy_score(all_labels, predictions)
108 |         elif metric == "bacc":
109 |             return balanced_accuracy_score(all_labels, predictions)
110 |         elif metric == "macro_f1":
111 |             return f1_score(all_labels, predictions, average='macro')
112 |         else:
113 |             raise ValueError(f"Unknown metric: {metric}")
114 |         
115 |     def test(self):
116 |         self.model['default'].eval()
117 |         all_labels = self.dataset.y[self.dataset.test_mask].cpu().numpy()
118 | 
119 |         with torch.no_grad():
120 |             out = self.model['default'](self.dataset.x, self.dataset.edge_index)
121 |             predictions = out[self.dataset.test_mask].argmax(dim=1).cpu().numpy()
122 |             probabilities = torch.nn.functional.softmax(out[self.dataset.test_mask], dim=1).cpu().numpy()
123 | 
124 |         accuracy = accuracy_score(all_labels, predictions)
125 |         macro_f1 = f1_score(all_labels, predictions, average='macro')
126 |         bacc = balanced_accuracy_score(all_labels, predictions)
127 |         auc_roc = roc_auc_score(all_labels, probabilities, multi_class='ovr', average='macro')
128 |         
129 |         return accuracy, bacc, macro_f1, auc_roc


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TOPOAUC/util.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | def index2adj_bool(edge_index,nnode):
 5 | 
 6 |     indx = edge_index.numpy()
 7 |     adj = np.zeros((nnode,nnode),dtype = 'bool')
 8 |     adj[(indx[0],indx[1])]=1
 9 |     new_adj = torch.from_numpy(adj)
10 |     
11 |     return new_adj


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TopoImb/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/algorithm/TopoImb/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TopoImb/topo_util.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import numpy as np
  4 | 
  5 | import torch
  6 | import torch.nn.functional as F
  7 | from torch.nn import Linear
  8 | from torch_geometric.nn import WLConv
  9 | from torch_geometric.loader import DataLoader
 10 | 
 11 | from sklearn.cluster import SpectralClustering
 12 | from IGL_Bench.algorithm.TopoImb.trainer import GClsTrainer
 13 | 
 14 | def clust(features, n_clusters=8):
 15 |     clustering = SpectralClustering(
 16 |         n_clusters=n_clusters,
 17 |         assign_labels='discretize',
 18 |         random_state=0,
 19 |         affinity='nearest_neighbors'
 20 |     ).fit(features)
 21 | 
 22 |     return clustering.labels_
 23 | 
 24 | class WLGraph_model(torch.nn.Module):
 25 |     def __init__(self, args, nfeat, nhid, nclass, dropout, nlayer=2, res=True):
 26 |         super().__init__()
 27 |         self.args = args
 28 |         self.nhid = nhid
 29 |         self.res = res
 30 | 
 31 |         self.convs = torch.nn.ModuleList()
 32 |         for layer in range(nlayer):
 33 |             self.convs.append(WLConv())
 34 | 
 35 |         self.emb = torch.nn.Embedding(5000, 32)
 36 | 
 37 |         self.color_size = -1
 38 |         self.graph_map = {}
 39 | 
 40 |         self.lin1 = Linear(32, nhid)
 41 |         self.lin2 = Linear(nhid, nclass)
 42 | 
 43 |         self.dropout = dropout
 44 | 
 45 |     def forward(self, x, edge_index, edge_weight=None, batch=None):
 46 |         if batch is None:  # No batch given
 47 |             print('no batch info given')
 48 |             batch = x.new(x.size(0)).long().fill_(0)
 49 | 
 50 |         x = self.embedding(x, edge_index, edge_weight, batch=batch)
 51 |         x = self.lin2(F.leaky_relu(self.lin1(x)))
 52 |         return F.log_softmax(x, dim=1)
 53 | 
 54 |     def embedding(self, x, edge_index, edge_weight=None, batch=None):
 55 |         if x.shape[-1] != 1:
 56 |             x = x.argmax(-1)
 57 | 
 58 |         for gconv in self.convs:
 59 |             x = gconv(x, edge_index)
 60 | 
 61 |         out = []
 62 |         for b_i in set(batch.cpu().numpy()):
 63 |             b_idx = (batch == b_i)
 64 |             g_i = x[b_idx]
 65 |             idx = hash(tuple(g_i.cpu().numpy().tolist()))
 66 |             if idx not in self.graph_map:
 67 |                 self.graph_map[idx] = len(self.graph_map)
 68 |             out.append(self.graph_map[idx])
 69 |         g_x = torch.tensor(out, device=x.device)
 70 | 
 71 |         if self.color_size == -1:
 72 |             self.color_size = len(self.graph_map)
 73 | 
 74 |         g_x = self.emb(g_x)
 75 |         gx = F.dropout(g_x, self.dropout, training=self.training)
 76 |         return gx
 77 | 
 78 |     def wl(self, x, edge_index, batch=None):
 79 |         if x.shape[-1] != 1:
 80 |             x = x.argmax(-1)
 81 | 
 82 |         for gconv in self.convs:
 83 |             x = gconv(x, edge_index)
 84 | 
 85 |         out = []
 86 |         for b_i in set(batch.cpu().numpy()):
 87 |             b_idx = batch == b_i
 88 |             g_i = x[b_idx]
 89 |             idx = hash(tuple(g_i.cpu().numpy().tolist()))
 90 |             if idx not in self.graph_map:
 91 |                 self.graph_map[idx] = len(self.graph_map)
 92 |             out.append(self.graph_map[idx])
 93 |         g_x = torch.tensor(out, device=x.device)
 94 |         return g_x
 95 | 
 96 |     def graph_wl_dist(self, x, edge_index, batch=None):
 97 |         if x.shape[-1] != 1:
 98 |             x = x.argmax(-1)
 99 | 
100 |         for gconv in self.convs:
101 |             x = gconv(x, edge_index)
102 |             
103 |         out = self.convs[-1].histogram(x, batch, norm=True)  # (batch_size, num_colors)
104 |         out = out.to(x.device)
105 |         return out
106 | 
107 | 
108 | def generate_topo_labels(dataset, config):
109 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
110 | 
111 |     allloader = DataLoader(dataset, batch_size=len(dataset), shuffle=False)
112 |     steps = math.ceil(len(dataset) / config.batch_size)
113 | 
114 |     current_dir = os.path.dirname(os.path.abspath(__file__))
115 |     topo_path = os.path.join(current_dir, f"../../../TopoImb_topo_file/{dataset.name}_topo_labels.npy")
116 |     print(f"Checking path: {os.path.abspath(topo_path)}")
117 | 
118 |     if os.path.exists(topo_path):
119 |         topo_labels_np = np.load(topo_path)
120 |         topo_labels = torch.tensor(topo_labels_np, dtype=torch.long, device=device)
121 |         return topo_labels
122 |     
123 |     wlmodel = WLGraph_model(
124 |         args=config,
125 |         nfeat=dataset.num_features,
126 |         nhid=config.hidden_dim,
127 |         nclass=dataset.num_classes,
128 |         dropout=0,
129 |         nlayer=config.n_layer
130 |     ).to(device)
131 | 
132 |     WLtrainer = GClsTrainer(config, wlmodel, dataset=dataset)
133 | 
134 |     for epoch in range(5):
135 |         for batch, data in enumerate(allloader):
136 |             log_info = WLtrainer.train_step(data.to(device), epoch)
137 |             # print(f"[Epoch {epoch}] Train log: {log_info}")
138 | 
139 |     wlmodel.eval()
140 |     wl_dists = []
141 |     for batch, data in enumerate(allloader):
142 |         graph_wl_tensor = wlmodel.graph_wl_dist(
143 |             data.x.float().to(device),
144 |             data.edge_index.to(device),
145 |             batch=data.batch.to(device)
146 |         ).detach()
147 |         wl_dists.append(graph_wl_tensor.cpu().numpy())
148 | 
149 |     wl_dists = np.concatenate(wl_dists, axis=0)
150 | 
151 |     graph_clust = clust(wl_dists, n_clusters=8)
152 |     topo_labels = torch.tensor(graph_clust, device=device)
153 | 
154 |     # torch.save(topo_labels, topo_path)
155 |     topo_dir = os.path.dirname(topo_path)
156 |     os.makedirs(topo_dir, exist_ok=True)
157 |     
158 |     np.save(topo_path, topo_labels.cpu().numpy())
159 | 
160 |     return topo_labels
161 | 


--------------------------------------------------------------------------------
/IGL_Bench/algorithm/TopoImb/utils.py:
--------------------------------------------------------------------------------
 1 | from sklearn.metrics import roc_auc_score, f1_score
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | def accuracy(logits, labels):
 6 |     preds = logits.max(1)[1].type_as(labels)
 7 |     correct = preds.eq(labels).double()
 8 |     correct = correct.sum()
 9 |     return correct / len(labels)
10 | 
11 | def grouped_accuracy(logits, labels, group_labels):
12 |     preds = logits.argmax(1)
13 |     group_ac_dict={}
14 |     for group in set(group_labels):
15 |         group_idx = group_labels==group
16 |         group_ac = (preds[group_idx]==labels[group_idx]).sum()/(group_idx.sum()+0.00000001)
17 |         group_ac_dict[group] = group_ac
18 | 
19 |     return group_ac_dict
20 | 
21 | def print_class_acc(logits, labels, pre='valid'):
22 |     pre_num = 0
23 |     #print class-wise performance
24 |     
25 |     for i in range(labels.max()+1):
26 |         index_pos = labels==i
27 |         cur_tpr = accuracy(logits[index_pos], labels[index_pos])
28 |         print(str(pre)+" class {:d} True Positive Rate: {:.3f}".format(i,cur_tpr.item()))
29 | 
30 |         index_neg = labels != i
31 |         labels_neg = labels.new(labels.shape).fill_(i)
32 |         
33 |         cur_fpr = accuracy(logits[index_neg,:], labels_neg[index_neg])
34 |         print(str(pre)+" class {:d} False Positive Rate: {:.3f}".format(i,cur_fpr.item()))
35 |     
36 | 
37 |     if labels.max() > 1:
38 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr')
39 |     else:
40 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro')
41 | 
42 |     macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro')
43 |     print(str(pre)+' current auc-roc score: {:f}, current macro_F score: {:f}'.format(auc_score,macro_F))
44 | 
45 |     return
46 | 
47 | def Roc_F(logits, labels, pre='valid'):
48 |     if labels.max() > 1:#require set(labels) to be the same as columns of logits 
49 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1).detach().cpu(), average='macro', multi_class='ovr')
50 |     else:
51 |         auc_score = roc_auc_score(labels.detach().cpu(), F.softmax(logits, dim=-1)[:,1].detach().cpu(), average='macro')
52 | 
53 |     macro_F = f1_score(labels.detach().cpu(), torch.argmax(logits, dim=-1).detach().cpu(), average='macro')
54 | 
55 |     return auc_score, macro_F
56 | 
57 | class meters:
58 |     '''
59 |     collects the results at each inference batch, and return the result in total
60 |     param orders: the order in updating values
61 |     '''
62 |     def __init__(self, orders=1):
63 |         self.avg_value = 0
64 |         self.tot_weight = 0
65 |         self.orders = orders
66 |         
67 |     def update(self, value, weight=1.0):
68 |         value = float(value)
69 | 
70 |         if self.orders == 1:
71 |             update_step = self.tot_weight/(self.tot_weight+weight)
72 |             self.avg_value = self.avg_value*update_step + value*(1-update_step)
73 |             self.tot_weight += weight
74 |         
75 | 
76 |     def avg(self):
77 | 
78 |         return self.avg_value


--------------------------------------------------------------------------------
/IGL_Bench/backbone/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/backbone/__init__.py


--------------------------------------------------------------------------------
/IGL_Bench/backbone/gcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch_geometric.nn import GCNConv, global_add_pool, global_mean_pool, global_max_pool
  5 | from torch_geometric.nn import global_add_pool
  6 | 
  7 | class GCN_graph(torch.nn.Module):
  8 |     def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, pooling='sum'):
  9 |         super(GCN_graph, self).__init__()
 10 | 
 11 |         self.n_layer = n_layer
 12 |         self.dropout = dropout
 13 | 
 14 |         if pooling == 'sum':
 15 |             self.pool = global_add_pool
 16 |         elif pooling == 'mean':
 17 |             self.pool = global_mean_pool
 18 |         elif pooling == 'max':
 19 |             self.pool = global_max_pool
 20 |         else:
 21 |             raise ValueError(f"Unsupported pooling method: {pooling}. Choose from 'sum', 'mean', 'max'.")
 22 | 
 23 |         self.convs = nn.ModuleList()
 24 |         for i in range(n_layer):
 25 |             in_dim = n_feat if i == 0 else n_hidden
 26 |             self.convs.append(
 27 |                 GCNConv(in_dim, n_hidden)
 28 |             )
 29 | 
 30 |         self.out_layer = nn.Linear(n_hidden, n_class)
 31 | 
 32 |     def forward(self, x, edge_index, batch):
 33 |         for conv in self.convs:
 34 |             x = conv(x, edge_index)
 35 |             x = F.relu(x)
 36 |             x = F.dropout(x, p=self.dropout, training=self.training)
 37 | 
 38 |         x = self.pool(x, batch)
 39 |         x = self.out_layer(x)
 40 |         return x
 41 |     
 42 |     def encode(self, x, edge_index, batch):
 43 |         for conv in self.convs:
 44 |             x = conv(x, edge_index)
 45 |             x = F.relu(x)
 46 |             x = F.dropout(x, p=self.dropout, training=self.training)
 47 |         x = self.pool(x, batch)
 48 |         return x
 49 |     
 50 |     def cls(self, encoded_features):
 51 |         return self.out_layer(encoded_features)
 52 | 
 53 | class GCNLayer(nn.Module):
 54 |     def __init__(self, n_feat, n_hidden, bias=False, batch_norm=False):
 55 |         super(GCNLayer, self).__init__()
 56 |         self.weight = torch.Tensor(n_feat, n_hidden)
 57 |         self.weight = nn.Parameter(nn.init.xavier_uniform_(self.weight))
 58 |         if bias:
 59 |             self.bias = torch.Tensor(n_hidden)
 60 |             self.bias = nn.Parameter(nn.init.xavier_uniform_(self.bias))
 61 |         else:
 62 |             self.register_parameter('bias', None)
 63 | 
 64 |         self.bn = nn.BatchNorm1d(n_hidden) if batch_norm else None
 65 | 
 66 | 
 67 |     def forward(self, input, adj, batch_norm=True):
 68 |         support = torch.matmul(input, self.weight)
 69 |         output = torch.matmul(adj, support)
 70 |         if self.bias is not None:
 71 |             output = output + self.bias
 72 |         if self.bn is not None and batch_norm:
 73 |             output = self.compute_bn(output)
 74 |         return output
 75 | 
 76 | 
 77 |     def compute_bn(self, x):
 78 |         if len(x.shape) == 2:
 79 |             return self.bn(x)
 80 |         else:
 81 |             return self.bn(x.view(-1, x.size(-1))).view(x.size())
 82 | 
 83 | 
 84 | class GCN_node_dense(nn.Module):
 85 |     def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, batch_norm=False):
 86 |         super(GCN_node_dense, self).__init__()
 87 |         self.dropout = dropout
 88 | 
 89 |         self.graph_encoders = nn.ModuleList()
 90 |         self.graph_encoders.append(GCNLayer(n_feat, n_hidden, batch_norm=batch_norm))
 91 | 
 92 |         for _ in range(n_layer - 2):
 93 |             self.graph_encoders.append(GCNLayer(n_hidden, n_hidden, batch_norm=batch_norm))
 94 | 
 95 |         self.graph_encoders.append(GCNLayer(n_hidden, n_class, batch_norm=False))
 96 | 
 97 | 
 98 |     def forward(self, x, adj):
 99 |         for i, encoder in enumerate(self.graph_encoders[:-1]):
100 |             x = F.relu(encoder(x, adj))
101 |             x = F.dropout(x, self.dropout, training=self.training)
102 | 
103 |         x = self.graph_encoders[-1](x, adj)
104 |         return x
105 |     
106 | class GCN_node_sparse(nn.Module):
107 |     def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, batch_norm=False):
108 |         super(GCN_node_sparse, self).__init__()
109 |         self.dropout = dropout
110 | 
111 |         self.graph_encoders = nn.ModuleList()
112 |         self.graph_encoders.append(GCNConv(n_feat, n_hidden))
113 | 
114 |         for _ in range(n_layer - 2):
115 |             self.graph_encoders.append(GCNConv(n_hidden, n_hidden))
116 | 
117 |         self.graph_encoders.append(GCNConv(n_hidden, n_class))
118 | 
119 |         self.bn = nn.ModuleList([nn.BatchNorm1d(n_hidden) for _ in range(n_layer - 1)]) if batch_norm else None
120 | 
121 |     def forward(self, x, edge_index, edge_weight=None):
122 |         for i, encoder in enumerate(self.graph_encoders[:-1]):
123 |             x = encoder(x, edge_index, edge_weight)
124 |             if self.bn is not None:
125 |                 x = self.bn[i](x)
126 |             x = F.relu(x)
127 |             x = F.dropout(x, self.dropout, training=self.training)
128 | 
129 |         x = self.graph_encoders[-1](x, edge_index, edge_weight)
130 |         return x


--------------------------------------------------------------------------------
/IGL_Bench/backbone/gin.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch.nn import Linear, BatchNorm1d, ReLU, Sequential, ModuleList
 4 | from torch_geometric.nn import GINConv, global_add_pool, global_mean_pool, global_max_pool
 5 | 
 6 | class GIN_graph(torch.nn.Module):
 7 |     def __init__(self, n_feat, n_hidden, n_class, n_layer, dropout=0.5, pooling='sum'):
 8 |         super(GIN_graph, self).__init__()
 9 | 
10 |         self.n_layer = n_layer
11 |         self.dropout = dropout
12 | 
13 |         if pooling == 'sum':
14 |             self.pool = global_add_pool
15 |         elif pooling == 'mean':
16 |             self.pool = global_mean_pool
17 |         elif pooling == 'max':
18 |             self.pool = global_max_pool
19 |         else:
20 |             raise ValueError(f"Unsupported pooling method: {pooling}. Choose from 'sum', 'mean', 'max'.")
21 | 
22 |         self.convs = ModuleList()
23 |         for i in range(n_layer):
24 |             in_dim = n_feat if i == 0 else n_hidden
25 |             self.convs.append(
26 |                 GINConv(
27 |                     Sequential(
28 |                         Linear(in_dim, n_hidden),
29 |                         BatchNorm1d(n_hidden),
30 |                         ReLU(),
31 |                         Linear(n_hidden, n_hidden),
32 |                         ReLU()
33 |                     )
34 |                 )
35 |             )
36 | 
37 |         self.out_layer = Linear(n_hidden, n_class)
38 | 
39 |     def forward(self, x, edge_index, batch):
40 |         for conv in self.convs:
41 |             x = conv(x, edge_index)
42 |             x = F.dropout(x, p=self.dropout, training=self.training)
43 | 
44 |         x = self.pool(x, batch)
45 |         x = self.out_layer(x)
46 |         return x
47 |     
48 |     def encode(self, x, edge_index, batch):
49 |         for conv in self.convs:
50 |             x = conv(x, edge_index)
51 |             x = F.dropout(x, p=self.dropout, training=self.training)
52 |         x = self.pool(x, batch)
53 |         return x
54 |     
55 |     def cls(self, encoded_features):
56 |         return self.out_layer(encoded_features)


--------------------------------------------------------------------------------
/IGL_Bench/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .util import load_conf


--------------------------------------------------------------------------------
/IGL_Bench/config/util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import yaml  
 3 | import argparse
 4 | 
 5 | def load_conf(task: str, imbtype: str, algorithm: str, to_parser: bool = True):
 6 |     config_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../config"))
 7 |     config_path = os.path.join(config_dir, task, imbtype, algorithm + ".yml")
 8 |     
 9 |     print(f"Load config file from: {config_path}")
10 |     
11 |     if not os.path.exists(config_path):
12 |         raise FileNotFoundError(f"Config file not found: {config_path}")
13 | 
14 |     with open(config_path, 'r') as file:
15 |         try:
16 |             config = yaml.safe_load(file)  
17 |         except yaml.YAMLError as e:
18 |             raise ValueError(f"Error parsing YAML file: {e}")
19 | 
20 |     if not to_parser:
21 |         return config
22 | 
23 |     parser = argparse.ArgumentParser(description=f"Configuration for {algorithm}")
24 |     
25 |     for key, value in config.items():
26 |         if isinstance(value, bool):
27 |             if value:
28 |                 parser.add_argument(f"--{key}", action="store_true", default=True, 
29 |                                     help=f"Enable {key} (default: {value})")
30 |                 parser.add_argument(f"--no-{key}", dest=key, action="store_false", 
31 |                                     help=f"Disable {key}")
32 |             else:
33 |                 parser.add_argument(f"--{key}", action="store_true", default=False, 
34 |                                     help=f"Enable {key} (default: {value})")
35 |                 parser.add_argument(f"--no-{key}", dest=key, action="store_false", 
36 |                                     help=f"Disable {key} (default: {value})")
37 |         else:
38 |             parser.add_argument(f"--{key}", type=type(value), default=value, 
39 |                                 help=f"{key} (default: {value})")
40 | 
41 |     return parser.parse_args()


--------------------------------------------------------------------------------
/IGL_Bench/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .dataset import Dataset


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/COLLAB/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/DD/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/DD/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/DD/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/DD/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/FRANKENSTEIN/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/IMDB-BINARY/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PROTEINS/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/PTC_MR/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_high.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_high.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_low.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_low.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_mid.pt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/IGL_Bench/dataset/graph_topology_imbalance/REDDIT-BINARY/split_mid.pt


--------------------------------------------------------------------------------
/IGL_Bench/dataset/load_node.py:
--------------------------------------------------------------------------------
 1 | import scipy.sparse as sp
 2 | import numpy as np
 3 | import torch
 4 | import random
 5 | import os
 6 | from torch_geometric.datasets import Planetoid
 7 | from torch_geometric.datasets import Amazon,Actor,WikipediaNetwork
 8 | from ogb.nodeproppred import PygNodePropPredDataset
 9 | import networkx as nx
10 | 
11 | def load_node_data(data_name, data_path):
12 |     if data_name == 'ogbn-arxiv':
13 |         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..',data_path, 'ogb')
14 |     else:
15 |         path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..',data_path, 'pyG')
16 |     
17 |     data_path = os.path.join(path, data_name)
18 |     data_dict = {'Cora':'planetoid','CiteSeer':'planetoid','PubMed':'planetoid',
19 |                 'Photo':'amazon','Computers':'amazon','Actor':'Actor',
20 |                 'Chameleon':'WikipediaNetwork','Squirrel':'WikipediaNetwork','ogbn-arxiv':'ogbn'}    
21 |     target_type = data_dict[data_name]
22 |     if target_type == 'amazon':
23 |         target_dataset = Amazon(data_path, name=data_name)
24 |     elif target_type == 'planetoid':
25 |         target_dataset = Planetoid(data_path, name=data_name)
26 |     elif target_type == 'WikipediaNetwork':
27 |          target_dataset = WikipediaNetwork(root=data_path, name=data_name, geom_gcn_preprocess=True)    
28 |     elif target_type == 'Actor':
29 |         target_dataset = Actor(data_path)
30 |     elif data_name == 'ogbn-arxiv':
31 |         target_dataset = PygNodePropPredDataset(root=data_path, name='ogbn-arxiv')
32 |     
33 |     target_data=target_dataset[0]
34 |     features = target_data.x
35 | 
36 |     if data_name in ['Cora',"CiteSeer"]:
37 |         features = normalize_features(features)
38 |         features = torch.FloatTensor(np.array(features))
39 |         
40 |     if data_name not in ['ogbn-arxiv','PubMed']:
41 |         adj = index2dense(target_data.edge_index,target_data.num_nodes)
42 |         adj = nx.adjacency_matrix(nx.from_numpy_array(adj))
43 |         adj = adj + sp.eye(adj.shape[0])
44 |         adj_norm = normalize_sparse_adj(adj)
45 |         adj_norm = torch.Tensor(adj_norm.todense())
46 |         adj = torch.Tensor(adj.todense())
47 |         target_data.adj = adj
48 |         target_data.adj_norm = adj_norm
49 |     
50 |     target_data.x = features
51 |     
52 |     if target_data.y.dim() == 2:
53 |         if target_data.y.size(1) > 1:
54 |             target_data.y = target_data.y.argmax(dim=1)
55 |         else:
56 |             target_data.y = target_data.y.squeeze(1)
57 |     
58 |     return target_data
59 | 
60 | def normalize_sparse_adj(mx):
61 |     rowsum = np.array(mx.sum(1))
62 |     r_inv_sqrt = np.power(rowsum, -0.5).flatten()
63 |     r_inv_sqrt[np.isinf(r_inv_sqrt)] = 0.
64 |     r_mat_inv_sqrt = sp.diags(r_inv_sqrt)
65 |     return mx.dot(r_mat_inv_sqrt).transpose().dot(r_mat_inv_sqrt)
66 | 
67 | def normalize_features(mx):
68 |     rowsum = np.array(mx.sum(1))
69 |     r_inv = np.power(rowsum, -1).flatten()
70 |     r_inv[np.isinf(r_inv)] = 0.
71 |     r_mat_inv = sp.diags(r_inv)
72 |     mx = r_mat_inv.dot(mx)
73 |     return mx
74 | 
75 | def index2dense(edge_index, nnode):
76 |     idx = edge_index.numpy()
77 |     adj = np.zeros((nnode,nnode))
78 |     adj[(idx[0], idx[1])] = 1
79 |     sum = np.sum(adj)
80 | 
81 |     return adj


--------------------------------------------------------------------------------
/IGL_Bench/manage/__init__.py:
--------------------------------------------------------------------------------
1 | from .runner import Manager


--------------------------------------------------------------------------------
/IGL_Bench/manage/runner.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import numpy as np
 3 | import random
 4 | import torch
 5 | 
 6 | def set_seed(seed):
 7 |     random.seed(seed)
 8 |     np.random.seed(seed)
 9 |     torch.manual_seed(seed)
10 |     if torch.cuda.is_available():
11 |         torch.cuda.manual_seed_all(seed)
12 |         
13 | class Manager:
14 |     def __init__(self, config, dataset):
15 |         self.config = config
16 |         self.dataset = dataset
17 |         self.solver = self.initialize_solver()
18 | 
19 |     def initialize_solver(self):
20 |         solver_name = f"{self.config.algorithm}_{self.config.task}_solver"
21 |         module_path = f"IGL_Bench.algorithm.{self.config.algorithm}.solver"
22 |         try:
23 |             module = importlib.import_module(module_path)
24 |             solver_class = getattr(module, solver_name)
25 |             return solver_class(self.config, self.dataset)
26 |         except (ModuleNotFoundError, AttributeError) as e:
27 |             raise ImportError(f"Failed to import solver {solver_name} from {module_path}: {e}")
28 | 
29 |     def run(self, num_runs=1, random_seed=1):
30 |         all_acc = []
31 |         all_bacc = []
32 |         all_mf1 = []
33 |         all_roc = []
34 |         
35 |         for run in range(num_runs):
36 |             print(f"Run {run + 1}/{num_runs} for algorithm {self.solver.__class__.__name__}")
37 |             set_seed(random_seed+run)
38 |             self.solver.train()
39 |             acc, bacc, mf1, roc = self.solver.test()
40 |             
41 |             all_acc.append(acc)
42 |             all_bacc.append(bacc)
43 |             all_mf1.append(mf1)
44 |             all_roc.append(roc)
45 |         
46 |         avg_acc = np.mean(all_acc) * 100
47 |         std_acc = np.std(all_acc) * 100
48 |         avg_bacc = np.mean(all_bacc) * 100
49 |         std_bacc = np.std(all_bacc) * 100
50 |         avg_mf1 = np.mean(all_mf1) * 100
51 |         std_mf1 = np.std(all_mf1) * 100
52 |         avg_roc = np.mean(all_roc) * 100
53 |         std_roc = np.std(all_roc) * 100
54 |         
55 |         self.print_results(avg_acc, std_acc, avg_bacc, std_bacc, avg_mf1, std_mf1, avg_roc, std_roc)
56 |     
57 |     def print_results(self, avg_acc, std_acc, avg_bacc, std_bacc, avg_mf1, std_mf1, avg_roc, std_roc):
58 |         print(f"\nTest results for {self.config.algorithm} (averaged across runs):")
59 |         print("+----------------------+------------------+------------------+")
60 |         print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Accuracy", avg_acc, std_acc))
61 |         print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Balanced Accuracy", avg_bacc, std_bacc))
62 |         print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("Macro F1", avg_mf1, std_mf1))
63 |         print("| {:<20} | {:>8.2f} ± {:>8.2f} |".format("ROC-AUC", avg_roc, std_roc))
64 |         print("+----------------------+------------------+------------------+")
65 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 RingBDStack
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include config *
2 | 


--------------------------------------------------------------------------------
/config/graph/class/DataDec.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'DataDec'
 2 | task: 'graph'
 3 | backbone: 'GIN'
 4 | n_layer: 2
 5 | lr: 0.005
 6 | hidden_dim: 256
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | batch_size: 128
11 | 
12 | prune_percent: 0.25
13 | random_prune_percent: 0.25
14 | biggest_prune_percent: 0.75
15 | random_prune_percent: 0.25
16 | explore_rate: 0.1
17 | 
18 | is_error_rank: False
19 | fine_tune_ratio: 0.3


--------------------------------------------------------------------------------
/config/graph/class/G2GNN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'G2GNN'
 2 | task: 'graph'
 3 | backbone: 'GCN'
 4 | setting: 'knn_aug'
 5 | aug_num: 2
 6 | kernel_type: SP
 7 | knn_layer: 3
 8 | knn_nei_num: 3
 9 | n_layer: 2
10 | lr: 0.005
11 | hidden_dim: 128
12 | dropout: 0.5
13 | weight_decay: 0.01
14 | epoch: 500
15 | use_batch_norm: true
16 | mask_node_ratio: 0.0
17 | drop_edge_ratio: 0.005
18 | temp: 0.5


--------------------------------------------------------------------------------
/config/graph/class/GCN.yml:
--------------------------------------------------------------------------------
1 | algorithm: 'GCN'
2 | task: 'graph'
3 | n_layer: 2
4 | lr: 0.005
5 | hidden_dim: 128
6 | dropout: 0.5
7 | weight_decay: 0.0005
8 | epoch: 500
9 | use_batch_norm: true


--------------------------------------------------------------------------------
/config/graph/class/GIN.yml:
--------------------------------------------------------------------------------
1 | algorithm: 'GIN'
2 | task: 'graph'
3 | n_layer: 3
4 | lr: 0.005
5 | hidden_dim: 256
6 | dropout: 0.5
7 | weight_decay: 0.000
8 | epoch: 500
9 | use_batch_norm: true


--------------------------------------------------------------------------------
/config/graph/class/ImGKB.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'ImGKB'
 2 | task: 'graph'
 3 | n_layer: 3
 4 | backbone: 'GIN'
 5 | batch_size: 128
 6 | lr: 0.005
 7 | hidden_dim: 96
 8 | dropout: 0.5
 9 | weight_decay: 0.0005
10 | epoch: 500
11 | 
12 | hidden_graphs: 6
13 | size_hidden_graphs: 4
14 | max_step: 1
15 | beta: 0.3


--------------------------------------------------------------------------------
/config/graph/class/TopoImb.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'TopoImb'
 2 | task: 'graph'
 3 | backbone: 'GIN'
 4 | n_layer: 3
 5 | lr: 0.002
 6 | hidden_dim: 128
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | batch_size: 128
11 | use_key: False
12 | att: 'dp'
13 | reweight_weight: 0.2
14 | reweight_lr: 0.005
15 | adv_step: 1
16 | shared_encoder: False
17 | re_task: 'wlcls'
18 | n_mem:
19 | - 8
20 | - 8
21 | - 8
22 | - 8
23 | - 8


--------------------------------------------------------------------------------
/config/graph/topology/GIN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'GIN'
 2 | task: 'graph'
 3 | n_layer: 3
 4 | lr: 0.01
 5 | hidden_dim: 32
 6 | dropout: 0.5
 7 | weight_decay: 0.0005
 8 | epoch: 500
 9 | use_batch_norm: true
10 | batch_size: 32


--------------------------------------------------------------------------------
/config/graph/topology/SOLTGNN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'SOLTGNN'
 2 | task: 'graph'
 3 | backbone: 'GIN'
 4 | n_layer: 2
 5 | lr: 0.01
 6 | dropout: 0.5
 7 | weight_decay: 0.0005
 8 | use_batch_norm: true
 9 | device: 0
10 | batch_size: 32
11 | epochs: 500
12 | hidden_dim: 32
13 | graph_pooling_type: "sum"
14 | degree_as_tag: false
15 | alpha: 0.5
16 | mu1: 1.0
17 | mu2: 1.0
18 | lbd: 0.0001
19 | dm: 64
20 | K: 72
21 | n_n: 1
22 | n_g: 1
23 | patience: 100
24 | 


--------------------------------------------------------------------------------
/config/graph/topology/TopoImb.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'TopoImb'
 2 | task: 'graph'
 3 | backbone: 'GIN'
 4 | n_layer: 3
 5 | lr: 0.005
 6 | hidden_dim: 128
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | use_batch_norm: true
11 | datatype: 'graph'
12 | topo_initial: 'label'
13 | shared_encoder: false
14 | cls_layer: 2
15 | res: false
16 | reweighter: 'struct'
17 | n_mem: [8,8,8,8,8]
18 | use_key: false
19 | att: 'dp'
20 | reweight_weight: 0.2
21 | reweight_task: ['wlcls']
22 | pretrain_reweighter: false
23 | 
24 | # original config
25 | imb_ratio: 0.9
26 | nlayer: 3
27 | nhid: 128
28 | nclass: 2
29 | nfeat: 64
30 | epochs: 500
31 | test_interval: 20
32 | batch_size: 128
33 | batch_nums: 6000
34 | sup_ratio: 0.1
35 | val_ratio: 0.3
36 | test_raion: 0.6
37 | model: 'gin'
38 | explainer: 'gnnexplainer'
39 | directional: false
40 | edge_size: 0.05
41 | edge_ent: 1.0
42 | expl_loss: 'Tgt'
43 | aligner: 'emb'
44 | aligner_combine_weight: 1.0
45 | align_emb: false
46 | align_with_grad: false
47 | split: 0
48 | reweight_lr: 0.01
49 | adv_step: 3
50 | EM: false
51 | intra_im_ratio: 0.1
52 | inter_im_ratio: 0.6
53 | 
54 | 


--------------------------------------------------------------------------------
/config/node/class/DPGNN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'DPGNN'
 2 | task: 'node'
 3 | backbone: 'GCN'
 4 | n_hidden: 256
 5 | epochs: 3000
 6 | 
 7 | episodic_samp: 1
 8 | eta: 3
 9 | ssl: 'yes'
10 | label_prop: 'yes'
11 | lamb1: 10
12 | lamb2: 20


--------------------------------------------------------------------------------
/config/node/class/DRGCN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'DRGCN'
 2 | task: 'node'
 3 | n_layer: 2
 4 | hidden_dim: 164
 5 | dropout_prob: 0
 6 | weight_decay: 0.0005
 7 | epoch: 500
 8 | least_epoch: 40
 9 | 
10 | noise_dim: 20
11 | learning_rate: 0.005


--------------------------------------------------------------------------------
/config/node/class/ImGAGN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'ImGAGN'
 2 | task: 'node'
 3 | 
 4 | ratio_generated: 1.0
 5 | hidden: 128
 6 | dropout: 0.5
 7 | epochs_gen: 10
 8 | lr: 0.01
 9 | weight_decay: 0.0005
10 | epochs: 100


--------------------------------------------------------------------------------
/config/node/topo_global/GCN.yml:
--------------------------------------------------------------------------------
1 | algorithm: 'GCN'
2 | task: 'node'
3 | n_layer: 2
4 | lr: 0.01
5 | hidden_dim: 128
6 | dropout: 0.5
7 | weight_decay: 0.0005
8 | epoch: 500
9 | least_epoch: 40


--------------------------------------------------------------------------------
/config/node/topo_global/HyperIMBA.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'HyperIMBA'
 2 | backbone: 'GCN'
 3 | task: 'node'
 4 | n_layer: 2
 5 | lr: 0.0075
 6 | hidden_dim: 64
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | least_epoch: 100
11 | 
12 | loss_hp: 1


--------------------------------------------------------------------------------
/config/node/topo_global/PASTEL.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'PASTEL'
 2 | task: 'node'
 3 | backbone: 'GCN'
 4 | 
 5 | hidden_size: 64
 6 | 
 7 | # Regularization
 8 | lr: 0.0075
 9 | dropout: 0.5
10 | feat_adj_dropout: 0.5
11 | gl_dropout: 0.5
12 | 
13 | # GNN
14 | gnn: 'gcn'
15 | graph_learn: True
16 | graph_skip_conn: 0.8
17 | update_adj_ratio: 0.1
18 | graph_learn_regularization: True
19 | smoothness_ratio: 0.2
20 | degree_ratio: 0
21 | sparsity_ratio: 0
22 | graph_learn_ratio: 0
23 | graph_learn_hidden_size: 70
24 | graph_learn_epsilon: 0
25 | graph_learn_topk: null
26 | graph_learn_num_pers: 4
27 | graph_hops: 2
28 | 
29 | # Training
30 | optimizer: 'adam'
31 | weight_decay: 0.0005
32 | lr_patience: 2
33 | lr_reduce_factor: 0.5
34 | grad_clipping: null
35 | grad_accumulated_steps: 1
36 | early_stop_metric: 'acc'
37 | pretrain_epoch: 0 # 0
38 | max_iter: 10
39 | eps_adj: 4e-5
40 | shuffle: True
41 | epoch: 200
42 | least_epoch: 100
43 | patience: 1000
44 | verbose: 20
45 | print_every_epochs: 50
46 | pe_every_epochs: 50
47 | gpr_every_epochs: 50
48 | num_anchors: 0
49 | 
50 | save_params: True


--------------------------------------------------------------------------------
/config/node/topo_global/ReNode.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'GCN'
 2 | task: 'node'
 3 | algorithm: 'ReNode'
 4 | n_layer: 2
 5 | lr: 0.01
 6 | hidden_dim: 64
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | least_epoch: 40
11 | 
12 | pagerank_prob: 0.85
13 | rn_base_weight: 0.5
14 | rn_scale_weight: 1.0


--------------------------------------------------------------------------------
/config/node/topo_global/TAM.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'TAM'
 2 | task: 'node'
 3 | backebone: 'GCN'
 4 | n_layer: 2
 5 | lr: 0.01
 6 | hidden_dim: 128
 7 | dropout: 0.5
 8 | weight_decay: 0.0005
 9 | epoch: 500
10 | least_epoch: 40
11 | 
12 | warmup: 5
13 | tam: True
14 | tam_alpha: 2.5
15 | tam_beta: 0.5
16 | temp_phi: 1.2


--------------------------------------------------------------------------------
/config/node/topo_global/TOPOAUC.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'TOPOAUC'
 2 | task: 'node'
 3 | backbone: 'GCN'
 4 | n_layer: 2
 5 | lr: 0.005
 6 | hidden_dim: 128
 7 | dropout: 0.5
 8 | weight_decay: 0
 9 | epoch: 500
10 | least_epoch: 40
11 | 
12 | loss: 'ExpGAUC'
13 | weight_global_dim: 64
14 | weight_inter_dim: 64
15 | weight_sub_dim: 64
16 | topo_dim: 64
17 | pagerank_prob: 0.15
18 | beta: 0.5
19 | gamma: 0.5


--------------------------------------------------------------------------------
/config/node/topo_local/COLDBREW.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'COLDBREW'
 2 | task: 'node'
 3 | n_layer: 2
 4 | lr: 0.01
 5 | hidden_dim: 128
 6 | dropout: 0.2
 7 | weight_decay: 0.0005
 8 | epoch: 500
 9 | least_epoch: 40
10 | patience: 100
11 | exp_mode: 'coldbrew'
12 | batch_size: 65536
13 | epochs: 1500
14 | samp_size_p: 200
15 | samp_size_n_train: 200
16 | samp_size_n_test_times_p: 20
17 | dim_learnable_input: 0
18 | unify_mlps: 0
19 | force_set_to_best_config: 1
20 | want_headtail: 1
21 | num_layers: 2
22 | studentMLP__skip_conn_T_and_res_blks: ''
23 | StudentMLP__dim_model: -1
24 | studentMLP__opt_lr: ''
25 | LP__which_corr_and_DAD: ''
26 | LP__num_propagations: -1
27 | LP__alpha: -1
28 | SEMLP_topK_2_replace: 2
29 | SEMLP__include_part1out: 1
30 | dropout_MLP: 0.2
31 | SEMLP_part1_arch: '2layer'
32 | has_proj2class: 0
33 | whetherHasSE: '100'
34 | se_reg: 32
35 | graphMLP_reg: 0.0
36 | graphMLP_tau: 2.0
37 | graphMLP_r: 3
38 | change_to_featureless: 0
39 | do_deg_analyze: 1
40 | train_which: 'TeacherGNN'
41 | use_special_split: 1
42 | optfun: 'torch.optim.Adam'
43 | manual_assign_GPU: -9999
44 | random_seed: 100
45 | N_exp: 1
46 | resume: False
47 | cuda: True
48 | cuda_num: 0
49 | records_desc: 'res_connection'
50 | records_path: '.'
51 | compare_model: 0
52 | type_model: 'GCN'
53 | type_trick: 'Initial+BatchNorm'
54 | layer_agg: 'concat'
55 | res_alpha: 0.1
56 | multi_label: False
57 | dim_hidden: 64
58 | transductive: True
59 | float_or_double: 'float'
60 | type_norm: 'None'
61 | adj_dropout: 0.5
62 | edge_dropout: 0.2
63 | node_norm_type: 'n'
64 | skip_weight: None
65 | num_groups: None
66 | prog: ''
67 | rexName: 'res.npy'
68 | graph_dropout: 0.2
69 | layerwise_dropout: False


--------------------------------------------------------------------------------
/config/node/topo_local/DEMONet.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'DEMONet'
 2 | task: 'node'
 3 | n_layer: 2
 4 | lr: 0.005
 5 | hidden_dim: 64
 6 | dropout: 0.1
 7 | weight_decay: 0.0005
 8 | epoch: 1000
 9 | least_epoch: 40
10 | patience: 100
11 | hash_dim: 256
12 | n_hash_kernel: 1
13 | n_layers: 2


--------------------------------------------------------------------------------
/config/node/topo_local/GCN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'GCN'
 2 | task: 'node'
 3 | n_layer: 2
 4 | lr: 0.01
 5 | hidden_dim: 128
 6 | dropout: 0.5
 7 | weight_decay: 0.0005
 8 | epoch: 500
 9 | least_epoch: 40
10 | patience: 100


--------------------------------------------------------------------------------
/config/node/topo_local/GRAPHPATCHER.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'GRAPHPATCHER'
 2 | task: 'node'
 3 | pretrain:
 4 |   hid_dim: [64]
 5 |   lr: 1e-3
 6 |   weight_decay: 5e-4
 7 |   norm: 'identity'
 8 |   mp_norm: 'right'
 9 | generator:
10 |   target_gnn: ''
11 |   backbone: 'gcn'
12 |   hid_dim: [ 1024 ]
13 |   warmup_steps: 100
14 |   lr: 1e-4
15 |   device: 0
16 |   weight_decay: 5e-4
17 |   degree_train: 1
18 |   drop_ratio: [ ]
19 |   three_layer: False
20 |   k: 3
21 |   generation_iteration: -1
22 |   total_generation_iteration: 5
23 |   norm: 'identity'
24 |   training_iteration: 10000
25 |   dropout: 0.0
26 |   batch_size: 128
27 |   accumulate_step: 1
28 |   eval_iteration: 100
29 |   patience: 30
30 |   bar: False
31 |   workers: 10
32 |   mp_norm: 'right'
33 |   seed: 123
34 | 


--------------------------------------------------------------------------------
/config/node/topo_local/LTE4G.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'GCN'
 2 | task: 'node'
 3 | n_layer: 2
 4 | lr: 0.01
 5 | hidden_dim: 128
 6 | dropout: 0.5
 7 | weight_decay: 0.0005
 8 | epoch: 500
 9 | least_epoch: 40
10 | patience: 100
11 | im_class_num: 3
12 | im_ratio: 1
13 | layer: 'gcn'
14 | rw: 0.000001
15 | ep_pre: 50
16 | ep: 10000
17 | ep_early: 1000
18 | add_sl: True
19 | adj_norm_1: True
20 | adj_norm_2: False
21 | nhid: 64
22 | nhead: 1
23 | wd: 5e-4
24 | num_seed: 5
25 | is_normalize: False
26 | cls_og: 'GNN'
27 | type: 'mid'
28 | embedder: 'lte4g'
29 | rec: False
30 | lr_expert: 0.01
31 | criterion: 'mean'
32 | sep_class: 'pareto_73'
33 | sep_degree: 5
34 | class_weight: True
35 | gamma: 1
36 | alpha: 0.6
37 | T: 1
38 | expert_ep: 1000
39 | curriculum_ep: 500
40 | pretrained_encoder: False
41 | save_encoder: False


--------------------------------------------------------------------------------
/config/node/topo_local/RAWLSGCN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'RAWLSGCN'
 2 | task: 'node'
 3 | n_layer: 2
 4 | lr: 0.05
 5 | hidden_dim: 64
 6 | dropout: 0.5
 7 | weight_decay: 0.0005
 8 | epoch: 100
 9 | least_epoch: 40
10 | patience: 100
11 | model: 'rawlsgcn_graph'
12 | loss: 'negative_log_likelihood'


--------------------------------------------------------------------------------
/config/node/topo_local/TAILGNN.yml:
--------------------------------------------------------------------------------
 1 | algorithm: 'TAILGNN'
 2 | task: 'node'
 3 | lr: 0.01
 4 | hidden_dim: 128
 5 | dropout: 0.5
 6 | weight_decay: 0.0005
 7 | epoch: 1000
 8 | least_epoch: 40
 9 | patience: 200
10 | hidden: 32
11 | eta: 0.1
12 | mu: 0.001
13 | lamda: 0.0001
14 | k: 5
15 | arch: 1
16 | id: 0
17 | ablation: 0
18 | g_sigma: 0.1


--------------------------------------------------------------------------------
/dataset/link.txt:
--------------------------------------------------------------------------------
1 | Datasets are available on Google Drive due to space limits on GitHub: https://drive.google.com/drive/folders/1GFfu6oXEaaB8-DkgBEsIXMid_i3br7HI?usp=drive_link


--------------------------------------------------------------------------------
/demo.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "6c3f684e",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# 🧪 IGL-Bench: Quick Start for Node and Graph Classification"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "markdown",
 13 |    "id": "df4c4617",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "\n",
 17 |     "This notebook demonstrates how to use the **IGL-Bench** framework for running node-level and graph-level classification tasks under various imbalance settings.  \n",
 18 |     "It shows how to:\n",
 19 |     "- Initialize datasets with imbalance configurations\n",
 20 |     "- Load benchmark configurations\n",
 21 |     "- Run a selected algorithm on the data using the unified interface.\n"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "id": "6ef10ee1",
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# Step 1: Import Benchmark Framework\n",
 32 |     "import IGL_Bench as igl"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "markdown",
 37 |    "id": "4fd58a73",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## 🔹 Node-Level Classification Task"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "40b9512f",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "# Step 2: Initialize Node-Level Dataset\n",
 51 |     "node_dataset_builder = igl.dataset.Dataset(\n",
 52 |     "    task=\"node\",\n",
 53 |     "    data_name=\"Cora\",              # Choose from: ['Cora', 'CiteSeer', 'PubMed', 'Photo', 'Computers', 'ogbn-arxiv', 'Chameleon', 'Squirrel', 'Actor']\n",
 54 |     "    imb_type=\"topo_global\",        # Choose from: ['class', 'topo_local', 'topo_global']\n",
 55 |     "    imb_level=\"high\"               # Choose from: ['low', 'mid', 'high']\n",
 56 |     ")"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "id": "dc9c85ba",
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# Step 3: Load Node-Level Dataset\n",
 67 |     "node_dataset = node_dataset_builder.load_dataset()"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "id": "74a4cc02",
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# Step 4: Load Configuration for Node-Level Algorithm\n",
 78 |     "node_config = igl.config.load_conf(\n",
 79 |     "    task=\"node\",\n",
 80 |     "    imbtype=\"topo_global\",\n",
 81 |     "    algorithm=\"PASTEL\"             # Replace with any implemented algorithm\n",
 82 |     ")"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "id": "f4e11f62",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "# Step 5: Run Node-Level Solver\n",
 93 |     "node_solver = igl.manage.Manager(node_config, node_dataset)\n",
 94 |     "node_solver.run(num_runs=5)"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "markdown",
 99 |    "id": "97ff816c",
100 |    "metadata": {},
101 |    "source": [
102 |     "## 🔸 Graph-Level Classification Task"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "id": "efb42c56",
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# Step 6: Initialize Graph-Level Dataset\n",
113 |     "graph_dataset_builder = igl.dataset.Dataset(\n",
114 |     "    task=\"graph\",\n",
115 |     "    data_name=\"D&D\",               # Choose from: ['PTC-MR', 'FRANKENSTEIN', 'PROTEINS', 'IMDB-B', 'REDDIT-B', 'ogbg-molhiv', 'COLLAB', 'D&D']\n",
116 |     "    imb_type=\"class\",              # Choose from: ['class', 'topology']\n",
117 |     "    imb_level=\"low\"                # Choose from: ['low', 'mid', 'high']\n",
118 |     ")"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "id": "eb15f120",
125 |    "metadata": {},
126 |    "outputs": [],
127 |    "source": [
128 |     "# Step 7: Load Graph-Level Dataset\n",
129 |     "graph_dataset = graph_dataset_builder.load_dataset()"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "id": "d91ed1e5",
136 |    "metadata": {},
137 |    "outputs": [],
138 |    "source": [
139 |     "# Step 8: Load Configuration for Graph-Level Algorithm\n",
140 |     "graph_config = igl.config.load_conf(\n",
141 |     "    task=\"graph\",\n",
142 |     "    imbtype=\"class\",\n",
143 |     "    algorithm=\"G2GNN\"              # Replace with any implemented algorithm\n",
144 |     ")"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "id": "d88600d0",
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "# Step 9: Run Graph-Level Solver\n",
155 |     "graph_solver = igl.manage.Manager(graph_config, graph_dataset)\n",
156 |     "graph_solver.run(num_runs=10)"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "id": "1cdffa79",
162 |    "metadata": {},
163 |    "source": [
164 |     "\n",
165 |     "### ✅ Summary\n",
166 |     "\n",
167 |     "This notebook walked through the unified API provided by IGL-Bench for running experiments on imbalanced graph datasets.  \n",
168 |     "You can now:\n",
169 |     "- Switch datasets, imbalance settings, and algorithms easily\n",
170 |     "- Modify the number of runs or inspect detailed solver outputs\n",
171 |     "- Customize training pipelines by editing the algorithm-specific configuration files (`config/`) for hyperparameter tuning\n",
172 |     "- Extend this workflow with visualization, logging, or evaluation as needed\n"
173 |    ]
174 |   }
175 |  ],
176 |  "metadata": {
177 |   "language_info": {
178 |    "name": "python"
179 |   }
180 |  },
181 |  "nbformat": 4,
182 |  "nbformat_minor": 5
183 | }
184 | 


--------------------------------------------------------------------------------
/figs/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/logo.png


--------------------------------------------------------------------------------
/figs/package.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/package.png


--------------------------------------------------------------------------------
/figs/scope.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/scope.png


--------------------------------------------------------------------------------
/figs/timeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RingBDStack/IGL-Bench/8d2522702c9b0de919c6d96a3613f941b21bdc75/figs/timeline.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Core scientific computing stack
 2 | numpy==1.24.4
 3 | pandas==2.0.3
 4 | scikit_learn==1.3.2
 5 | matplotlib==3.7.5
 6 | h5py==2.10.0
 7 | 
 8 | # PyTorch & GNN frameworks (no version constraints)
 9 | torch
10 | dgl
11 | scipy
12 | torch_scatter
13 | torch_sparse
14 | torch_geometric
15 | 
16 | # Benchmark-related libraries
17 | ogb==1.3.6
18 | tqdm==4.66.4
19 | PyYAML==6.0.1
20 | GPUtil==1.4.0
21 | networkx==3.1
22 | 
23 | # Graph contrastive learning & curvature
24 | PyGCL==0.1.2
25 | GCL==0.6.11
26 | GraphRicciCurvature==0.5.3.2
27 | GraKeL==0.1.10
28 | 
29 | # Debugging & dev tools
30 | ipdb==0.13.13
31 | julia==0.6.2
32 | 
33 | # TensorFlow branch (used by DRGCN )
34 | tensorflow
35 | tensorflow_probability
36 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from pathlib import Path
 3 | 
 4 | readme_path = Path(__file__).parent / "README.md"
 5 | long_description = readme_path.read_text(encoding="utf-8")
 6 | 
 7 | setup(
 8 |     name="IGL_Bench",
 9 |     version="0.1.0",
10 |     description="Imbalanced Graph Learning Benchmark",
11 |     url="https://github.com/RingBDStack/IGL-Bench",
12 |     long_description=long_description,
13 |     long_description_content_type="text/markdown",
14 |     packages=find_packages(),
15 |     include_package_data=True,  
16 |     install_requires=[
17 |         "torch>=1.13.1",
18 |         "torch-geometric>=2.1.0",
19 |         "scipy",
20 |         "numpy",
21 |         "dgl",
22 |         "tqdm",
23 |         "scikit_learn",
24 |         "ogb",
25 |         "networkx"
26 |     ],
27 |     classifiers=[
28 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
29 |         "License :: OSI Approved :: MIT License",
30 |         "Programming Language :: Python :: 3.8",
31 |         "Programming Language :: Python :: 3.9",
32 |         "Programming Language :: Python :: 3.10",
33 |         "Operating System :: OS Independent",
34 |     ],
35 |     python_requires='>=3.8',
36 |     keywords=[
37 |         "graph learning",
38 |         "GNN",
39 |         "imbalanced learning",
40 |         "graph neural networks",
41 |         "benchmark"
42 |     ]
43 | )
44 | 


--------------------------------------------------------------------------------