├── aminer.sh ├── sbm.sh ├── ogb.sh ├── setup.py ├── propagation.pxd ├── propagation.pyx ├── model.py ├── README.md ├── instantAlg.h ├── Graph.h ├── utils.py ├── ogb_exp.py ├── aminer_dense.py ├── sbm.py ├── convert ├── gen_SBM.cpp └── convert_ogb.py ├── instantAlg_arxiv.cpp └── instantAlg.cpp /aminer.sh: -------------------------------------------------------------------------------- 1 | python aminer_dense.py --layer 4 --dataset 1984_author_dense --alpha 0.1 2 | -------------------------------------------------------------------------------- /sbm.sh: -------------------------------------------------------------------------------- 1 | python sbm.py --alpha 0.001 --epochs 200 --dataset SBM-500000-50-20+1 --lr 0.01 --batch 1024 2 | -------------------------------------------------------------------------------- /ogb.sh: -------------------------------------------------------------------------------- 1 | python ogb_exp.py --dataset papers100M --layer 3 --hidden 2048 --alpha 0.2 --dropout 0.3 --rmax 1e-8 2 | python ogb_exp.py --dataset products --layer 4 --hidden 1024 --alpha 0.1 --dropout 0.5 3 | python ogb_exp.py --dataset arxiv --layer 4 --hidden 1024 --alpha 0.1 --dropout 0.3 4 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup,Extension 2 | from Cython.Build import cythonize 3 | import eigency 4 | 5 | setup( 6 | ext_modules=cythonize(Extension( 7 | name='propagation', 8 | author='anonymous', 9 | version='0.0.1', 10 | sources=['propagation.pyx'], 11 | language='c++', 12 | extra_compile_args=["-std=c++11"], 13 | include_dirs=[".", "module-dir-name"] + eigency.get_includes()+["./eigen3"], 14 | #If you have installed eigen, you can configure your own path. When numpy references errors, you need to import its header file 15 | install_requires=['Cython>=0.2.15','eigency>=1.77'], 16 | packages=['little-try'], 17 | python_requires='>=3' 18 | )) 19 | ) -------------------------------------------------------------------------------- /propagation.pxd: -------------------------------------------------------------------------------- 1 | from eigency.core cimport * 2 | from libcpp.string cimport string 3 | 4 | ctypedef unsigned int uint 5 | 6 | cdef extern from "instantAlg.cpp": 7 | #cdef extern from "instantAlg_arxiv.cpp": 8 | pass 9 | 10 | cdef extern from "instantAlg.h" namespace "propagation": 11 | cdef cppclass Instantgnn: 12 | Instantgnn() except+ 13 | double initial_operation(string,string,uint,uint,double,double,Map[MatrixXd] &) except + 14 | void snapshot_operation(string, double, double, Map[MatrixXd] &) except + 15 | void overall_operation(double,double, Map[MatrixXd] &) except + 16 | void linenum_operation(string, int,int,double,double, Map[MatrixXd] &) except + 17 | int snapshot_operation_rate_Z(string, int, double, double, double, Map[MatrixXd] &, Map[MatrixXd] &) 18 | -------------------------------------------------------------------------------- /propagation.pyx: -------------------------------------------------------------------------------- 1 | from propagation cimport Instantgnn 2 | 3 | cdef class InstantGNN: 4 | cdef Instantgnn c_instantgnn 5 | 6 | def __cinit__(self): 7 | self.c_instantgnn=Instantgnn() 8 | 9 | def initial_operation(self,path,dataset,unsigned int m,unsigned int n,rmax,alpha,np.ndarray array3): 10 | return self.c_instantgnn.initial_operation(path.encode(),dataset.encode(),m,n,rmax,alpha,Map[MatrixXd](array3)) 11 | 12 | def snapshot_operation(self, upfile, rmax,alpha, np.ndarray array3): 13 | return self.c_instantgnn.snapshot_operation(upfile.encode(), rmax, alpha, Map[MatrixXd](array3)) 14 | 15 | def overall_operation(self, rmax,alpha, np.ndarray array3): 16 | return self.c_instantgnn.overall_operation(rmax, alpha, Map[MatrixXd](array3)) 17 | 18 | def snapshot_operation_rate_Z(self, upfile, begin, rmax,alpha, threshold, np.ndarray array3, np.ndarray array4): 19 | return self.c_instantgnn.snapshot_operation_rate_Z(upfile.encode(), begin, rmax, alpha, threshold, Map[MatrixXd](array3), Map[MatrixXd](array4)) 20 | 21 | def linenum_operation(self, upfile, begin, end, rmax,alpha, np.ndarray array3): 22 | return self.c_instantgnn.linenum_operation(upfile.encode(), begin, end, rmax, alpha, Map[MatrixXd](array3)) -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | import torch.nn.functional as F 5 | 6 | class ClassMLP(torch.nn.Module): 7 | def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout): 8 | super(ClassMLP, self).__init__() 9 | 10 | self.lins = torch.nn.ModuleList() 11 | self.lins.append(torch.nn.Linear(in_channels, hidden_channels)) 12 | self.bns = torch.nn.ModuleList() 13 | self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) 14 | for _ in range(num_layers - 2): 15 | self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels)) 16 | self.bns.append(torch.nn.BatchNorm1d(hidden_channels)) 17 | self.lins.append(torch.nn.Linear(hidden_channels, out_channels)) 18 | self.dropout = dropout 19 | 20 | def reset_parameters(self): 21 | for lin in self.lins: 22 | lin.reset_parameters() 23 | for bn in self.bns: 24 | bn.reset_parameters() 25 | 26 | def forward(self, x): 27 | for i, lin in enumerate(self.lins[:-1]): 28 | x = lin(x) 29 | x = self.bns[i](x) 30 | x = F.relu(x) 31 | x = F.dropout(x, p=self.dropout, training=self.training) 32 | x = self.lins[-1](x) 33 | return torch.log_softmax(x, dim=-1) 34 | #return x -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Instant Graph Neural Networks for Dynamic Graphs 2 | 3 | ## Requirements 4 | - CUDA 10.1 5 | - python 3.8.5 6 | - pytorch 1.7.1 7 | - GCC 5.4.0 8 | - cython 0.29.21 9 | - eigency 1.77 10 | - numpy 1.18.1 11 | - torch-geometric 1.6.3 12 | - tqdm 4.56.0 13 | - ogb 1.2.4 14 | - [eigen 3.3.9] (https://gitlab.com/libeigen/eigen.git) 15 | 16 | ## Datasets 17 | OGB Datasets can be downloaded from [here](https://ogb.stanford.edu). The website 'Open Graph Benchmark' provides an automatic method to download and convert the three datasets. So you can straightly run 'python convert_ogb.py' instead of downloading these datasets manually. We drop several edges to simulate the graphs' evolving nature. In the folder './convert/', we provide the codes to convert the three datasets. 18 | 19 | We generate a real dataset with dynamic labels, Aminer, which is processed from the [raw data](https://www.aminer.cn/aminernetwork). The processed version can be downloaded from [here](https://drive.google.com/drive/folders/1bYcVslvdS-cEcQbFAkABFTyqR_RoHw1i). 20 | 21 | In our paper, we also use synthetic datasets generated by the SBM. In the folder './convert/', we provide the codes to generate and convert the datasets. 22 | For example, you can run the following codes to generate SBM-500K 23 | ``` 24 | g++ -std=c++11 gen_SBM.cpp -o rd_dynamic 25 | ./rd_dynamic -n 500000 -c 50 -ind 20 -outd 1 -snap 10 -change 2500 26 | ``` 27 | 28 | ## Compilation 29 | Cython needs to be compiled before running, run this command: 30 | ``` 31 | python setup.py build_ext --inplace 32 | ``` 33 | 34 | ## Running the code 35 | - On OGB datasets 36 | ``` 37 | ./ogb.sh 38 | ``` 39 | 40 | - On the Aminer dataset 41 | ``` 42 | ./aminer.sh 43 | ``` 44 | 45 | - On SBM datasets 46 | ``` 47 | ./sbm.sh 48 | ``` 49 | -------------------------------------------------------------------------------- /instantAlg.h: -------------------------------------------------------------------------------- 1 | #ifndef InstantGNN_H 2 | #define InstantGNN_H 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | #include "Graph.h" 25 | 26 | using namespace std; 27 | using namespace Eigen; 28 | typedef unsigned int uint; 29 | 30 | namespace propagation{ 31 | class Instantgnn{ 32 | Eigen::MatrixXd X; 33 | public: 34 | EIGEN_MAKE_ALIGNED_OPERATOR_NEW 35 | int NUMTHREAD=40;//Number of threads 36 | uint edges, vert; 37 | Graph g; 38 | vector> R; 39 | double rmax,alpha,t; 40 | string dataset_name; 41 | string updateFile; 42 | vectorrowsum_pos; 43 | vectorrowsum_neg; 44 | vectorrandom_w; 45 | vectorupdate_w; 46 | vectorDu; 47 | int dimension; 48 | double initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map &feat); 49 | void ppr_push(int dimension, Eigen::Reffeat, bool init,vector>& candidate_sets,vector>& isCandidates, bool log); 50 | void ppr_residue(Eigen::Reffeats,int st,int ed, bool init,vector>& candidate_sets,vector>& isCandidates); 51 | void snapshot_operation(string updatefilename, double rmaxx,double alphaa, Eigen::Map &feat); 52 | void overall_operation(double rmaxx,double alphaa, Eigen::Map &feat); 53 | vector> update_graph(string updatefilename, vector&affected_nodelst, vector>&delete_neighbors); 54 | int snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map &feat, Eigen::Map &init_Z); 55 | void linenum_operation(string updatefilename, int begin, int end, double rmaxx,double alphaa, Eigen::Map &feat); 56 | }; 57 | } 58 | 59 | 60 | #endif // InstantGNN_H -------------------------------------------------------------------------------- /Graph.h: -------------------------------------------------------------------------------- 1 | #ifndef GRAPH_H 2 | #define GRAPH_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | using namespace std; 11 | 12 | class Graph 13 | { 14 | public: 15 | uint n; //number of nodes 16 | uint m; //number of edges 17 | 18 | vector> inAdj; 19 | vector> outAdj; 20 | uint* indegree; 21 | uint* outdegree; 22 | vectorindices; 23 | vectorindptr; 24 | Graph() 25 | { 26 | } 27 | ~Graph() 28 | { 29 | } 30 | 31 | void insertEdge(uint from, uint to) { 32 | outAdj[from].push_back(to); 33 | inAdj[to].push_back(from); 34 | outdegree[from]++; 35 | indegree[to]++; 36 | } 37 | 38 | void deleteEdge(uint from, uint to) { 39 | uint j; 40 | for (j=0; j < indegree[to]; j++) { 41 | if (inAdj[to][j] == from) { 42 | break; 43 | } 44 | } 45 | inAdj[to].erase(inAdj[to].begin()+j); 46 | indegree[to]--; 47 | 48 | for (j=0; j < outdegree[from]; j++) { 49 | if (outAdj[from][j] == to) { 50 | break; 51 | } 52 | } 53 | 54 | outAdj[from].erase(outAdj[from].begin() + j); 55 | outdegree[from]--; 56 | } 57 | 58 | int isEdgeExist(uint u, uint v) { 59 | for (uint j = 0; j < outdegree[u]; j++) { 60 | if (outAdj[u][j] == v) { 61 | return -1; 62 | } 63 | } 64 | return 1; 65 | } 66 | 67 | void inputGraph(string path, string dataset, uint nodenum, uint edgenum) 68 | { 69 | n = nodenum; 70 | m = edgenum; 71 | indices=vector(m); 72 | indptr=vector(n+1); 73 | //string dataset_el="data/"+dataset+"_adj_el.txt"; 74 | string dataset_el=path+dataset+"_adj_el.txt"; 75 | const char *p1=dataset_el.c_str(); 76 | if (FILE *f1 = fopen(p1, "rb")) 77 | { 78 | size_t rtn = fread(indices.data(), sizeof indices[0], indices.size(), f1); 79 | if(rtn!=m) 80 | cout<<"Error! "< templst(indices.begin() + indptr[i],indices.begin() + indptr[i+1]); 111 | outAdj.push_back(templst); 112 | inAdj.push_back(templst); 113 | } 114 | 115 | clock_t t2=clock(); 116 | cout<<"m="<>from>>to) 138 | { 139 | outdegree[from]++; 140 | indegree[to]++; 141 | } 142 | 143 | cout<<"..."< templst; 148 | inAdj.push_back(templst); 149 | outAdj.push_back(templst); 150 | } 151 | 152 | infile.clear(); 153 | infile.seekg(0); 154 | 155 | clock_t t1=clock(); 156 | 157 | while(infile>>from>>to) 158 | { 159 | outAdj[from].push_back(to); 160 | inAdj[to].push_back(from); 161 | } 162 | infile.close(); 163 | clock_t t2=clock(); 164 | cout<<"m="< getOutAdjs(uint vert){ 181 | return outAdj[vert]; 182 | } 183 | 184 | }; 185 | 186 | 187 | #endif 188 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gc 3 | import numpy as np 4 | from sklearn.metrics import f1_score 5 | from torch.utils.data import Dataset 6 | from propagation import InstantGNN 7 | import pdb 8 | 9 | def load_aminer_init(datastr, rmax, alpha): 10 | if datastr == "1984_author_dense": 11 | m = 3787605; n = 1252095 12 | elif datastr == "2013_author_dense": 13 | m = 9237799; n = 1252095 14 | 15 | print("Load %s!" % datastr) 16 | labels = np.load("./data/aminer/"+ datastr +"_labels.npy") 17 | 18 | py_alg = InstantGNN() 19 | 20 | features = np.load('./data/aminer/aminer_dense_feat.npy') 21 | memory_dataset = py_alg.initial_operation('./data/aminer/',datastr, m, n, rmax, alpha, features) 22 | split = np.load('./data/aminer/aminer_dense_idx_split.npz') 23 | train_idx, val_idx, test_idx = split['train'], split['valid'], split['test'] 24 | 25 | train_idx = torch.LongTensor(train_idx) 26 | val_idx = torch.LongTensor(val_idx) 27 | test_idx = torch.LongTensor(test_idx) 28 | 29 | train_labels = torch.LongTensor(labels[train_idx]) 30 | val_labels = torch.LongTensor(labels[val_idx]) 31 | test_labels = torch.LongTensor(labels[test_idx]) 32 | 33 | train_labels = train_labels.reshape(train_labels.size(0), 1) 34 | val_labels = val_labels.reshape(val_labels.size(0), 1) 35 | test_labels = test_labels.reshape(test_labels.size(0), 1) 36 | 37 | return features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg 38 | 39 | def load_ogb_init(datastr, alpha, rmax): 40 | if(datastr=="papers100M"): 41 | m=3259203018; n=111059956 ##init graph 42 | elif(datastr=="arxiv"): 43 | m=597039; n=169343 44 | elif(datastr=="products"): 45 | m=69634445; n=2449029 46 | print("Load %s!" % datastr) 47 | 48 | py_alg = InstantGNN() 49 | features = np.load('./data/'+datastr+'/'+datastr+'_feat.npy') 50 | memory_dataset = py_alg.initial_operation('./data/'+datastr+'/', datastr+'_init', m, n, rmax, alpha, features) 51 | 52 | data = np.load('./data/'+datastr+'/'+datastr+'_labels.npz') 53 | train_idx = torch.LongTensor(data['train_idx']) 54 | val_idx = torch.LongTensor(data['val_idx']) 55 | test_idx =torch.LongTensor(data['test_idx']) 56 | 57 | train_labels = torch.LongTensor(data['train_labels']) 58 | val_labels = torch.LongTensor(data['val_labels']) 59 | test_labels = torch.LongTensor(data['test_labels']) 60 | train_labels=train_labels.reshape(train_labels.size(0),1) 61 | val_labels=val_labels.reshape(val_labels.size(0),1) 62 | test_labels=test_labels.reshape(test_labels.size(0),1) 63 | 64 | return features,train_labels,val_labels,test_labels,train_idx,val_idx,test_idx,memory_dataset, py_alg 65 | 66 | def load_sbm_init(datastr, rmax, alpha): 67 | if datastr == "SBM-50000-50-20+1": 68 | m=1412466; n=50000 69 | elif datastr == "SBM-500000-50-20+1": 70 | m=14141662; n=500000 71 | elif datastr == "SBM-10000000-100-20+1": 72 | m=282938572;n=10000000 73 | elif datastr == "SBM-1000000-50-20+1": 74 | m=28293138;n=1000000 75 | 76 | print("Load %s!" % datastr) 77 | 78 | labels = np.loadtxt('./data/'+datastr+'/'+datastr+'_label.txt') 79 | 80 | py_alg = InstantGNN() 81 | 82 | if datastr == "SBM-1000000-50-20+1" or datastr== "SBM-500000-50-20+1": 83 | encode_len = 256 84 | else: 85 | encode_len = 1024 86 | 87 | split = np.load('./data/'+datastr+'/'+datastr+'_idx_split.npz') 88 | train_idx, val_idx, test_idx = split['train'], split['valid'], split['test'] 89 | train_idx = torch.LongTensor(train_idx) 90 | val_idx = torch.LongTensor(val_idx) 91 | test_idx = torch.LongTensor(test_idx) 92 | 93 | features = np.load('./data/'+datastr+'/'+datastr+'_encode_'+str(encode_len)+'_feat.npy') 94 | memory_dataset = py_alg.initial_operation('./data/'+datastr+'/adjs/', datastr+'_init', m, n, rmax, alpha, features) 95 | 96 | train_labels = torch.LongTensor(labels[train_idx]) 97 | val_labels = torch.LongTensor(labels[val_idx]) 98 | test_labels = torch.LongTensor(labels[test_idx]) 99 | 100 | train_labels = train_labels.reshape(train_labels.size(0), 1) 101 | val_labels = val_labels.reshape(val_labels.size(0), 1) 102 | test_labels = test_labels.reshape(test_labels.size(0), 1) 103 | 104 | return features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg 105 | 106 | def muticlass_f1(output, labels): 107 | preds = output.max(1)[1] 108 | preds = preds.cpu().detach().numpy() 109 | labels = labels.cpu().detach().numpy() 110 | macro = f1_score(labels, preds, average='macro') 111 | return macro 112 | 113 | def com_accuracy(y_pred, y): 114 | pred = y_pred.data.max(1)[1] 115 | pred = pred.reshape(pred.size(0),1) 116 | correct = pred.eq(y.data).cpu().sum() 117 | accuracy = correct.to(dtype=torch.long) * 100. / len(y) 118 | return accuracy 119 | 120 | class SimpleDataset(Dataset): 121 | def __init__(self,x,y): 122 | self.x=x 123 | self.y=y 124 | assert self.x.size(0)==self.y.size(0) 125 | 126 | def __len__(self): 127 | return self.x.size(0) 128 | 129 | def __getitem__(self,idx): 130 | return self.x[idx],self.y[idx] 131 | 132 | -------------------------------------------------------------------------------- /ogb_exp.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | import random 4 | import argparse 5 | import gc 6 | import torch 7 | import resource 8 | import numpy as np 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | import torch.nn.functional as F 12 | from torch.utils.data import Dataset, DataLoader 13 | from ogb.nodeproppred import Evaluator 14 | from utils import SimpleDataset 15 | from model import ClassMLP 16 | from utils import * 17 | from glob import glob 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | # Dataset and Algorithom 22 | parser.add_argument('--seed', type=int, default=20159, help='random seed..') 23 | parser.add_argument('--dataset', default='papers100M', help='dateset.') 24 | # Algorithm parameters 25 | parser.add_argument('--alpha', type=float, default=0.2, help='alpha.') 26 | parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.') 27 | # Learining parameters 28 | parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.') 29 | parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.') 30 | parser.add_argument('--layer', type=int, default=3, help='number of layers.') 31 | parser.add_argument('--hidden', type=int, default=2048, help='hidden dimensions.') 32 | parser.add_argument('--dropout', type=float, default=0.3, help='dropout rate.') 33 | parser.add_argument('--bias', default='none', help='bias.') 34 | parser.add_argument('--epochs', type=int, default=1000, help='number of epochs.') 35 | parser.add_argument('--batch', type=int, default=10000, help='batch size.') 36 | parser.add_argument('--patience', type=int, default=50, help='patience.') 37 | parser.add_argument('--dev', type=int, default=1, help='device id.') 38 | args = parser.parse_args() 39 | random.seed(args.seed) 40 | np.random.seed(args.seed) 41 | torch.manual_seed(args.seed) 42 | torch.cuda.manual_seed(args.seed) 43 | print("--------------------------") 44 | print(args) 45 | checkpt_file = 'pretrained/'+uuid.uuid4().hex+'.pt' 46 | 47 | features,train_labels,val_labels,test_labels,train_idx,val_idx,test_idx,memory_dataset, py_alg = load_ogb_init(args.dataset, args.alpha,args.rmax) ## 48 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file) 49 | print('------------------ update -------------------') 50 | snapList = [f for f in glob('./data/'+args.dataset+'/*Edgeupdate_snap*.txt')] 51 | print('number of snapshots: ', len(snapList)) 52 | for i in range(len(snapList)): 53 | py_alg.snapshot_operation('data/'+args.dataset+'/'+args.dataset+'_Edgeupdate_snap'+str(i+1)+'.txt', args.rmax, args.alpha, features) 54 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file) 55 | 56 | def train(model, device, train_loader, optimizer): 57 | model.train() 58 | 59 | time_epoch=0 60 | loss_list=[] 61 | for step, (x, y) in enumerate(train_loader): 62 | t_st=time.time() 63 | x, y = x.cuda(device), y.cuda(device) 64 | optimizer.zero_grad() 65 | out = model(x) 66 | loss = F.nll_loss(out, y.squeeze(1)) 67 | loss.backward() 68 | optimizer.step() 69 | time_epoch+=(time.time()-t_st) 70 | loss_list.append(loss.item()) 71 | return np.mean(loss_list),time_epoch 72 | 73 | 74 | @torch.no_grad() 75 | def validate(model, device, loader, evaluator): 76 | model.eval() 77 | y_pred, y_true = [], [] 78 | for step,(x,y) in enumerate(loader): 79 | x = x.cuda(device) 80 | out = model(x) 81 | y_pred.append(torch.argmax(out, dim=1, keepdim=True).cpu()) 82 | y_true.append(y) 83 | return evaluator.eval({ 84 | "y_true": torch.cat(y_true, dim=0), 85 | "y_pred": torch.cat(y_pred, dim=0), 86 | })['acc'] 87 | 88 | 89 | @torch.no_grad() 90 | def test(model, device, loader, evaluator,checkpt_file): 91 | model.load_state_dict(torch.load(checkpt_file)) 92 | model.eval() 93 | y_pred, y_true = [], [] 94 | for step,(x,y) in enumerate(loader): 95 | x = x.cuda(device) 96 | out = model(x) 97 | y_pred.append(torch.argmax(out, dim=1, keepdim=True).cpu()) 98 | y_true.append(y) 99 | return evaluator.eval({ 100 | "y_true": torch.cat(y_true, dim=0), 101 | "y_pred": torch.cat(y_pred, dim=0), 102 | })['acc'] 103 | 104 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file): 105 | features = torch.FloatTensor(features) 106 | features_train = features[train_idx] 107 | features_val = features[val_idx] 108 | features_test = features[test_idx] 109 | del features 110 | gc.collect() 111 | 112 | label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1 113 | train_dataset = SimpleDataset(features_train,train_labels) 114 | valid_dataset = SimpleDataset(features_val,val_labels) 115 | test_dataset = SimpleDataset(features_test, test_labels) 116 | 117 | train_loader = DataLoader(train_dataset, batch_size=args.batch,shuffle=True) 118 | valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False) 119 | test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False) 120 | 121 | model = ClassMLP(features_train.size(-1),args.hidden,label_dim,args.layer,args.dropout).cuda(args.dev) 122 | evaluator = Evaluator(name='ogbn-papers100M') 123 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 124 | 125 | bad_counter = 0 126 | best = 0 127 | best_epoch = 0 128 | train_time = 0 129 | model.reset_parameters() 130 | print("--------------------------") 131 | print("Training...") 132 | for epoch in range(args.epochs): 133 | loss_tra,train_ep = train(model,args.dev,train_loader,optimizer) 134 | t_st=time.time() 135 | f1_val = validate(model, args.dev, valid_loader, evaluator) 136 | train_time+=train_ep 137 | if(epoch+1)%20 == 0: 138 | print(f'Epoch:{epoch+1:02d},' 139 | f'Train_loss:{loss_tra:.3f}', 140 | f'Valid_acc:{100*f1_val:.2f}%', 141 | f'Time_cost:{train_ep:.3f}/{train_time:.3f}') 142 | if f1_val > best: 143 | best = f1_val 144 | best_epoch = epoch+1 145 | t_st=time.time() 146 | torch.save(model.state_dict(), checkpt_file) 147 | bad_counter = 0 148 | else: 149 | bad_counter += 1 150 | if bad_counter == args.patience: 151 | break 152 | 153 | test_acc = test(model, args.dev, test_loader, evaluator,checkpt_file) 154 | print(f"Train cost: {train_time:.2f}s") 155 | print('Load {}th epoch'.format(best_epoch)) 156 | print(f"Test accuracy:{100*test_acc:.2f}%") 157 | 158 | if __name__ == '__main__': 159 | main() 160 | -------------------------------------------------------------------------------- /aminer_dense.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | import random 4 | import argparse 5 | import gc 6 | import torch 7 | import resource 8 | import numpy as np 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | import torch.nn.functional as F 12 | from torch.utils.data import Dataset, DataLoader 13 | from utils import * 14 | from model import ClassMLP 15 | from propagation import InstantGNN 16 | import math 17 | import sklearn.preprocessing 18 | 19 | import os 20 | import pdb 21 | 22 | def train(model, device, train_loader, optimizer, loss_fn, use_pdb=False): 23 | model.train() 24 | 25 | time_epoch = 0 26 | loss_list, acc_list = [], [] 27 | 28 | for i, (x, y) in enumerate(train_loader): 29 | t_st = time.time() 30 | x, y = x.cuda(device), y.cuda(device) 31 | optimizer.zero_grad() 32 | out = model(x) 33 | loss = F.nll_loss(out, y.squeeze(1)) 34 | acc = com_accuracy(out, y) 35 | acc_list.append(acc.item()) 36 | 37 | if use_pdb: 38 | pdb.set_trace(header='train') 39 | 40 | loss.backward() 41 | optimizer.step() 42 | loss_list.append(loss.item()) 43 | time_epoch += (time.time() - t_st) 44 | return np.mean(loss_list), np.mean(acc_list), time_epoch 45 | 46 | @torch.no_grad() 47 | def validate(model, device, loader, loss_fn, use_pdb=False): 48 | model.eval() 49 | loss_list, acc_list = [], [] 50 | for i, (x, y) in enumerate(loader): 51 | x, y = x.cuda(device), y.cuda(device) 52 | out = model(x) 53 | 54 | loss = F.nll_loss(out, y.squeeze(1)) 55 | loss_list.append(loss.item()) 56 | acc = com_accuracy(out, y) 57 | acc_list.append(acc.item()) 58 | if use_pdb: 59 | pdb.set_trace(header='valid') 60 | 61 | return np.mean(loss_list), np.mean(acc_list) 62 | 63 | @torch.no_grad() 64 | def test(model, device, loader, checkpt_file, loss_fn, use_pdb=False): 65 | model.load_state_dict(torch.load(checkpt_file)) 66 | model.eval() 67 | loss_list, acc_list = [], [] 68 | for step, (x, y) in enumerate(loader): 69 | x, y = x.cuda(device), y.cuda(device) 70 | out = model(x) 71 | 72 | loss = F.nll_loss(out, y.squeeze(1)) 73 | loss_list.append(loss.item()) 74 | acc = com_accuracy(out, y) 75 | acc_list.append(acc.item()) 76 | 77 | if use_pdb: 78 | pdb.set_trace(header='test') 79 | 80 | return np.mean(loss_list), np.mean(acc_list) 81 | 82 | ## load feat and generate model 83 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args,fineturn=False): 84 | print(args) 85 | scaler = sklearn.preprocessing.StandardScaler() 86 | scaler.fit(features) 87 | features = scaler.transform(features) 88 | 89 | features = torch.FloatTensor(features) 90 | 91 | features_train = features[train_idx] 92 | features_val = features[val_idx] 93 | features_test = features[test_idx] 94 | del features 95 | gc.collect() 96 | 97 | train_dataset = SimpleDataset(features_train, train_labels) 98 | valid_dataset = SimpleDataset(features_val, val_labels) 99 | test_dataset = SimpleDataset(features_test, test_labels) 100 | 101 | train_loader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True) 102 | valid_loader = DataLoader(valid_dataset, batch_size=len(val_labels), shuffle=False) 103 | test_loader = DataLoader(test_dataset, batch_size=len(test_labels), shuffle=False) 104 | 105 | label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1 106 | model = ClassMLP(features_train.size(-1), args.hidden, label_dim, args.layer, args.dropout).cuda(args.dev) 107 | if fineturn: 108 | model.load_state_dict(torch.load(args.checkpt_file)) 109 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 110 | 111 | #### begin train 112 | bad_counter = 0 113 | best = 0 114 | best_epoch = 0 115 | train_time = 0 116 | best_loss = 1e+8 * 1.0 117 | model.reset_parameters() 118 | print("--------------------------") 119 | print("Training...") 120 | for epoch in range(args.epochs): 121 | loss_tra, acc_tra, train_ep = train(model, args.dev, train_loader, optimizer, loss_fn) 122 | loss_val, acc_val = validate(model, args.dev, valid_loader, loss_fn) 123 | train_time += train_ep 124 | if (epoch + 1) % 2 == 0: 125 | print(f'Epoch:{epoch + 1:02d},' 126 | f'Train_loss:{loss_tra:.8f}', 127 | f'Train_acc:{acc_tra:.5f}', 128 | f'Valid_loss:{loss_val:.8f}', 129 | f'Valid_acc:{acc_val:.5f}', 130 | f'Time_cost:{train_ep:.3f} / {train_time:.3f}') 131 | if acc_val > best: 132 | best = acc_val 133 | best_epoch = epoch + 1 134 | torch.save(model.state_dict(), args.checkpt_file) 135 | bad_counter = 0 136 | else: 137 | bad_counter += 1 138 | if bad_counter == args.patience: 139 | break 140 | 141 | loss_test, acc_test = test(model, args.dev, test_loader, args.checkpt_file, loss_fn) 142 | print('Load {}th epoch'.format(best_epoch)) 143 | print(f"Test loss:{loss_test:.8f}, acc:{acc_test:.5f}") 144 | 145 | def main(): 146 | parser = argparse.ArgumentParser() 147 | # Dataset and Algorithom 148 | parser.add_argument('--seed', type=int, default=20159, help='random seed.') 149 | parser.add_argument('--dataset', default='1984_author_dense', help='dateset.') 150 | # Algorithm parameters 151 | parser.add_argument('--alpha', type=float, default=0.2, help='alpha.') 152 | parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.') 153 | # Learining parameters 154 | parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.') 155 | parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.') 156 | parser.add_argument('--layer', type=int, default=2, help='number of layers.') 157 | parser.add_argument('--hidden', type=int, default=256, help='hidden dimensions.') 158 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout rate.') 159 | parser.add_argument('--bias', default='none', help='bias.') 160 | parser.add_argument('--epochs', type=int, default=3000, help='number of epochs.') 161 | parser.add_argument('--batch', type=int, default=1024, help='batch size.') 162 | parser.add_argument('--patience', type=int, default=20, help='patience.') 163 | parser.add_argument('--dev', type=int, default=1, help='device id.') 164 | args = parser.parse_args() 165 | random.seed(args.seed) 166 | np.random.seed(args.seed) 167 | torch.manual_seed(args.seed) 168 | torch.cuda.manual_seed(args.seed) 169 | print("--------------------------") 170 | print(args) 171 | args.checkpt_file = 'pretrained/' + uuid.uuid4().hex + '.pt' 172 | 173 | features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg= load_aminer_init(args.dataset, args.rmax, args.alpha) # 174 | loss_fn = torch.nn.CrossEntropyLoss() 175 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args) 176 | 177 | print('--------------------- update ----------------------') 178 | begin = 1985 179 | pdb.set_trace() 180 | for i in range(30): 181 | py_alg.snapshot_operation('./data/aminer/' + str(begin+i) + '_coauthor_dense.txt', args.rmax, args.alpha, features) 182 | continue 183 | data = np.load('./data/aminer/' + str(begin+i) + '_author_dense_labels.npy') 184 | train_labels = torch.LongTensor(data[train_idx]) 185 | val_labels = torch.LongTensor(data[val_idx]) 186 | test_labels = torch.LongTensor(data[test_idx]) 187 | train_labels = train_labels.reshape(train_labels.size(0), 1) 188 | val_labels = val_labels.reshape(val_labels.size(0), 1) 189 | test_labels = test_labels.reshape(test_labels.size(0), 1) 190 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args) 191 | 192 | if __name__ == '__main__': 193 | main() 194 | 195 | -------------------------------------------------------------------------------- /sbm.py: -------------------------------------------------------------------------------- 1 | import time 2 | import uuid 3 | import random 4 | import argparse 5 | import gc 6 | import torch 7 | import resource 8 | import numpy as np 9 | import torch.nn as nn 10 | import torch.optim as optim 11 | import torch.nn.functional as F 12 | from torch.utils.data import Dataset, DataLoader 13 | from utils import * 14 | from model import ClassMLP 15 | from propagation import InstantGNN 16 | import math 17 | import sklearn.preprocessing 18 | 19 | import os 20 | 21 | import psutil 22 | import sys 23 | import pdb 24 | 25 | def train(model, device, train_loader, optimizer, loss_fn, use_pdb=False): 26 | model.train() 27 | 28 | time_epoch = 0 29 | loss_list, acc_list = [], [] 30 | 31 | for i, (x, y) in enumerate(train_loader): 32 | t_st = time.time() 33 | x, y = x.cuda(device), y.cuda(device) 34 | optimizer.zero_grad() 35 | out = model(x) 36 | loss = F.nll_loss(out, y.squeeze(1)) 37 | b = 0.1 38 | flood = (loss - b).abs() + b 39 | acc = com_accuracy(out, y) 40 | acc_list.append(acc.item()) 41 | if use_pdb: 42 | pdb.set_trace(header='train') 43 | 44 | flood.backward() 45 | optimizer.step() 46 | #time_epoch += (time.time() - t_st) 47 | loss_list.append(loss.item()) 48 | time_epoch += (time.time() - t_st) 49 | return np.mean(loss_list), np.mean(acc_list), time_epoch 50 | 51 | 52 | @torch.no_grad() 53 | def validate(model, device, loader, loss_fn, use_pdb=False): 54 | model.eval() 55 | loss_list, acc_list = [], [] 56 | for i, (x, y) in enumerate(loader): 57 | x, y = x.cuda(device), y.cuda(device) 58 | out = model(x) 59 | 60 | #loss = loss_fn(out.view(y.shape), y.float()) 61 | loss = F.nll_loss(out, y.squeeze(1)) 62 | loss_list.append(loss.item()) 63 | acc = com_accuracy(out, y) 64 | acc_list.append(acc.item()) 65 | if use_pdb: 66 | pdb.set_trace(header='valid') 67 | 68 | return np.mean(loss_list), np.mean(acc_list) 69 | 70 | @torch.no_grad() 71 | def test(model, device, loader, checkpt_file, loss_fn, use_pdb=False): 72 | model.load_state_dict(torch.load(checkpt_file)) 73 | model.eval() 74 | loss_list, acc_list = [], [] 75 | for step, (x, y) in enumerate(loader): 76 | x, y = x.cuda(device), y.cuda(device) 77 | out = model(x) 78 | 79 | #loss = loss_fn(out.view(y.shape), y.float()) 80 | loss = F.nll_loss(out, y.squeeze(1)) 81 | loss_list.append(loss.item()) 82 | acc = com_accuracy(out, y) 83 | acc_list.append(acc.item()) 84 | 85 | if use_pdb: 86 | pdb.set_trace(header='test') 87 | 88 | return np.mean(loss_list), np.mean(acc_list) 89 | 90 | ## load feat and generate model 91 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args,fineturn=False): 92 | print(args) 93 | scaler = sklearn.preprocessing.StandardScaler() 94 | scaler.fit(features) 95 | features = scaler.transform(features) 96 | 97 | features = torch.FloatTensor(features) 98 | 99 | features_train = features[train_idx] 100 | features_val = features[val_idx] 101 | features_test = features[test_idx] 102 | del features 103 | gc.collect() 104 | 105 | train_dataset = SimpleDataset(features_train, train_labels) 106 | valid_dataset = SimpleDataset(features_val, val_labels) 107 | test_dataset = SimpleDataset(features_test, test_labels) 108 | 109 | train_loader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True) 110 | valid_loader = DataLoader(valid_dataset, batch_size=len(val_labels), shuffle=False) 111 | test_loader = DataLoader(test_dataset, batch_size=len(test_labels), shuffle=False) 112 | 113 | label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1 114 | model = ClassMLP(features_train.size(-1), args.hidden, label_dim, args.layer, args.dropout).cuda(args.dev) 115 | if fineturn: 116 | model.load_state_dict(torch.load(args.checkpt_file)) 117 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 118 | 119 | #### begin train 120 | bad_counter = 0 121 | best = 0 122 | best_epoch = 0 123 | train_time = 0 124 | best_loss = 1e+8 * 1.0 125 | model.reset_parameters() 126 | print("--------------------------") 127 | print("Training...") 128 | for epoch in range(args.epochs): 129 | loss_tra, acc_tra, train_ep = train(model, args.dev, train_loader, optimizer, loss_fn) 130 | loss_val, acc_val = validate(model, args.dev, valid_loader, loss_fn) 131 | train_time += train_ep 132 | if (epoch + 1) % 2 == 0: 133 | print(f'Epoch:{epoch + 1:02d},' 134 | f'Train_loss:{loss_tra:.8f}', 135 | f'Train_acc:{acc_tra:.5f}', 136 | f'Valid_loss:{loss_val:.8f}', 137 | f'Valid_acc:{acc_val:.5f}', 138 | f'Time_cost:{train_ep:.3f} / {train_time:.3f}') 139 | if acc_val > best: 140 | best = acc_val 141 | best_epoch = epoch + 1 142 | torch.save(model.state_dict(), args.checkpt_file) 143 | bad_counter = 0 144 | else: 145 | bad_counter += 1 146 | if bad_counter == args.patience: 147 | break 148 | 149 | loss_test, acc_test = test(model, args.dev, test_loader, args.checkpt_file, loss_fn) 150 | print('Load {}th epoch'.format(best_epoch)) 151 | print(f"Test loss:{loss_test:.8f}, acc:{acc_test:.5f}") 152 | 153 | def main(): 154 | mem = psutil.virtual_memory() 155 | initial_memory = mem.used / 1024 / 1024 / 1024 156 | parser = argparse.ArgumentParser() 157 | # Dataset and Algorithom 158 | parser.add_argument('--seed', type=int, default=20159, help='random seed.') 159 | parser.add_argument('--dataset', default='SBM-50000-50-20+1', help='dateset.') 160 | # Algorithm parameters 161 | parser.add_argument('--alpha', type=float, default=0.2, help='alpha for APPNP_AGP.') 162 | parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.') 163 | # Learining parameters 164 | parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.') 165 | parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.') 166 | parser.add_argument('--layer', type=int, default=2, help='number of layers.') 167 | parser.add_argument('--hidden', type=int, default=256, help='hidden dimensions.') 168 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout rate.') 169 | parser.add_argument('--bias', default='none', help='bias.') 170 | parser.add_argument('--epochs', type=int, default=3000, help='number of epochs.') 171 | parser.add_argument('--batch', type=int, default=1024, help='batch size.') 172 | parser.add_argument('--patience', type=int, default=20, help='patience.') 173 | parser.add_argument('--dev', type=int, default=1, help='device id.') 174 | args = parser.parse_args() 175 | random.seed(args.seed) 176 | np.random.seed(args.seed) 177 | torch.manual_seed(args.seed) 178 | torch.cuda.manual_seed(args.seed) 179 | print("--------------------------") 180 | print(args) 181 | args.checkpt_file = 'pretrained/' + uuid.uuid4().hex + '.pt' 182 | 183 | features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg= load_sbm_init(args.dataset, args.rmax, args.alpha) 184 | mem = psutil.virtual_memory() 185 | memory_cost = mem.used / 1024 / 1024 / 1024 - initial_memory 186 | print('load_init cost: ', memory_cost) 187 | 188 | loss_fn = torch.nn.CrossEntropyLoss() 189 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args) 190 | 191 | print('--------------------- update ----------------------') 192 | for i in range(10): 193 | py_alg.snapshot_operation('./data/'+args.dataset+'/'+args.dataset+'_Edgeupdate_snap'+str(i)+'.txt', args.rmax, args.alpha, features) 194 | mem = psutil.virtual_memory() 195 | memory_cost = mem.used / 1024 / 1024 / 1024 - initial_memory 196 | print('snapshot_operation ' + str(i) + ' cost: ', memory_cost) 197 | data = np.loadtxt('./data/'+args.dataset+'/'+args.dataset+'_label_snap'+str(i)+'.txt') 198 | train_labels = torch.LongTensor(data[train_idx]) 199 | val_labels = torch.LongTensor(data[val_idx]) 200 | test_labels = torch.LongTensor(data[test_idx]) 201 | train_labels = train_labels.reshape(train_labels.size(0), 1) 202 | val_labels = val_labels.reshape(val_labels.size(0), 1) 203 | test_labels = test_labels.reshape(test_labels.size(0), 1) 204 | 205 | prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args) 206 | 207 | if __name__ == '__main__': 208 | main() 209 | 210 | 211 | -------------------------------------------------------------------------------- /convert/gen_SBM.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | using namespace std; 14 | 15 | bool cmp(const int& a,const int&b){ 16 | return a>& Adj, vector>& out_Adj,int* clusterID){ 30 | stringstream dy_out; 31 | dy_out<<"../data/SBM-"< ChangeNodes; 42 | for(uint i=0; i::iterator itr; 66 | itr=find(Adj[tmp_node].begin(),Adj[tmp_node].end(), change_node); 67 | int idx=distance(Adj[tmp_node].begin(), itr); 68 | Adj[tmp_node].erase(itr); 69 | 70 | fdy<::iterator itr; 88 | itr=find(out_Adj[change_node].begin(),out_Adj[change_node].end(), old_out_neibor); 89 | out_Adj[change_node].erase(itr); 90 | itr=find(out_Adj[old_out_neibor].begin(),out_Adj[old_out_neibor].end(), change_node); 91 | out_Adj[old_out_neibor].erase(itr); 92 | } 93 | } 94 | if(dd>0){ 95 | for(int j=0; j0/for" <> Adj; 254 | vector> out_Adj; 255 | vector random_w = vector(vert); 256 | 257 | for (uint i = 0; i < vert; i++) 258 | { 259 | vector templst; 260 | Adj.push_back(templst); 261 | out_Adj.push_back(templst); 262 | random_w[i] = i; 263 | } 264 | random_shuffle(random_w.begin(),random_w.end()); 265 | 266 | for(uint i=0;i> Instantgnn::update_graph(string updatefilename, vector&affected_nodelst, vector>&delete_neighbors) // vector>&add_adjs 10 | { 11 | ifstream infile(updatefilename.c_str()); 12 | //cout<<"updating graph " << updatefilename <> new_neighbors(vert); 17 | vector isAffected(vert, false); 18 | while (infile >> v_from >> v_to) 19 | { 20 | insertFLAG = g.isEdgeExist(v_from, v_to); 21 | 22 | // update graph 23 | if(!isAffected[v_from]){ 24 | affected_nodelst.push_back(v_from); 25 | isAffected[v_from] = true; 26 | } 27 | 28 | if(insertFLAG == 1){ 29 | g.insertEdge(v_from, v_to); 30 | new_neighbors[v_from].push_back(v_to); 31 | } 32 | else if(insertFLAG == -1){ 33 | cout<<"delete......"< &feat) 46 | { 47 | alpha=alphaa; 48 | rmax=rmaxx; 49 | 50 | vector> candidate_sets(dimension); 51 | vector> isCandidates(dimension, vector(vert, false)); 52 | vector isUpdateW(dimension, false); 53 | 54 | clock_t start_t, end_t; 55 | start_t = clock(); 56 | cout<<"updating begin, for snapshot: " << updatefilename < affected_nodelst; 60 | 61 | vector> delete_neighbors(vert); 62 | vector> add_neighbors(vert); 63 | 64 | add_neighbors = update_graph(updatefilename, affected_nodelst, delete_neighbors); 65 | end_t = clock(); 66 | //cout<<"-----update_graph finish-------- time: " << (end_t - start_t)/(1.0*CLOCKS_PER_SEC)<<" s"< oldDu(affected_nodelst.size(), 0); 71 | //double oldDu[affected_nodelst.size()]; 72 | for(uint i=0;irmax_p || R[dim][affected_node]0) 136 | { 137 | cout<<"dims of feats that need push:"< &feat) 143 | { 144 | alpha=alphaa; 145 | rmax=rmaxx; 146 | 147 | int insertFLAG = 0; 148 | ifstream infile(updatefilename.c_str()); 149 | int k = 0; 150 | uint v_from, v_to; 151 | cout<<"updating begin, for snapshot: " << updatefilename <> v_from >> v_to) 157 | { 158 | line_num += 1; 159 | if(line_num <= begin) 160 | continue; 161 | else if (line_num > end) 162 | break; 163 | //cout << "line:" << line_num << " from:" << v_from << " to: " << v_to << endl; 164 | insertFLAG = g.isEdgeExist(v_from, v_to); 165 | //cout << "insertFLAG: " << insertFLAG << endl; 166 | // update graph 167 | if(insertFLAG == 1) 168 | g.insertEdge(v_from, v_to); 169 | else if(insertFLAG == -1) 170 | g.deleteEdge(v_from, v_to); 171 | // update Du 172 | double oldDu = Du[v_from]; 173 | Du[v_from] = pow(g.getOutSize(v_from), 0.5); 174 | 175 | vector> candidate_sets(dimension); 176 | vector> isCandidates(dimension, vector(vert, false)); 177 | vector isUpdateW(dimension, false); 178 | 179 | for(int i=0; i 0) 192 | { 193 | increment += in_v; 194 | increment /= alpha; 195 | R[i][v_from] += increment; 196 | } 197 | else //delete edge 198 | { 199 | increment -= in_v; 200 | increment /= alpha; 201 | R[i][v_from] += increment; 202 | } 203 | if( R[i][v_from]>rmax_p || R[i][v_from]rmax_p || R[i][node_w]0) 240 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false); 241 | } 242 | infile.close(); 243 | } 244 | 245 | int Instantgnn::snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map &feat, Eigen::Map &init_Z) 246 | { 247 | alpha=alphaa; 248 | rmax=rmaxx; 249 | 250 | int insertFLAG = 0; 251 | ifstream infile(updatefilename.c_str()); 252 | int k = 0; 253 | uint v_from, v_to; 254 | clock_t start_t, end_t; 255 | start_t = clock(); 256 | cout<<"updating begin, for snapshot: " << updatefilename <> v_from >> v_to) 270 | { 271 | line_num += 1; 272 | if(line_num <= begin) 273 | continue; 274 | 275 | //cout << "from:" << v_from << " to: " << v_to << endl; 276 | insertFLAG = g.isEdgeExist(v_from, v_to); 277 | //cout << "insertFLAG: " << insertFLAG << endl; 278 | // update graph 279 | if(insertFLAG == 1) 280 | g.insertEdge(v_from, v_to); 281 | else if(insertFLAG == -1) 282 | g.deleteEdge(v_from, v_to); 283 | // update Du 284 | double oldDu = Du[v_from]; 285 | Du[v_from] = pow(g.getOutSize(v_from), 0.5); 286 | 287 | vector> candidate_sets(dimension); 288 | vector> isCandidates(dimension, vector(vert, false)); 289 | vector isUpdateW(dimension, false); 290 | 291 | for(int i=0; i 0) 304 | { 305 | increment += in_v; 306 | increment /= alpha; 307 | R[i][v_from] += increment; 308 | } 309 | else //delete edge 310 | { 311 | increment -= in_v; 312 | increment /= alpha; 313 | R[i][v_from] += increment; 314 | } 315 | if( R[i][v_from]>rmax_p || R[i][v_from]rmax_p || R[i][node_w]0) 353 | { 354 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false); 355 | diff = feat - feat_last_time; 356 | delta_feat = feat - init_Z; 357 | double diff_F = diff.norm(); 358 | double delta_feat_F = delta_feat.norm(); 359 | rate = diff_F / delta_feat_F; 360 | //double diff_l1 = diff.lpNorm<1>(); 361 | fout<threshold) 366 | break; 367 | } 368 | infile.close(); 369 | fout.close(); 370 | end_t = clock(); 371 | double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; 372 | cout<<"snapshot time: " << total_t << endl; 373 | return line_num; 374 | } 375 | 376 | void Instantgnn::overall_operation(double rmaxx,double alphaa, Eigen::Map &feat){ 377 | alpha=alphaa; 378 | rmax=rmaxx; 379 | 380 | vector> candidate_sets(dimension); 381 | vector> isCandidates(dimension, vector(vert, false)); 382 | vector isUpdateW(dimension, false); 383 | for(int i=0; irmax_p || R[i][j]0) 406 | { 407 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true); 408 | } 409 | } 410 | 411 | int startsWith(string s, string sub){ 412 | return s.find(sub)==0?1:0; 413 | } 414 | double Instantgnn::initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map &feat) 415 | { 416 | X = feat; // change in feat not influence X 417 | rmax=rmaxx; 418 | edges=mm; 419 | vert=nn; 420 | alpha=alphaa; 421 | dataset_name=dataset; 422 | cout<(vert,0); 429 | double rrr=0.5; 430 | for(uint i=0; i>(dimension, vector(vert, 0)); 436 | rowsum_pos = vector(dimension,0); 437 | rowsum_neg = vector(dimension,0); 438 | 439 | random_w = vector(dimension); 440 | 441 | for(int i = 0 ; i < dimension ; i++ ) 442 | random_w[i] = i; 443 | random_shuffle(random_w.begin(),random_w.end()); 444 | for(int i=0; i0) 449 | rowsum_pos[i]+=feat(j,i); 450 | else 451 | rowsum_neg[i]+=feat(j,i); 452 | } 453 | } 454 | 455 | vector> candidate_sets(dimension); 456 | vector> isCandidates(dimension, vector(vert, false)); 457 | 458 | Instantgnn::ppr_push(dimension, feat, true,candidate_sets,isCandidates,true); 459 | 460 | double dataset_size=(double)(((long long)edges+vert)*4+(long long)vert*dimension*8)/1024.0/1024.0/1024.0; 461 | return dataset_size; 462 | } 463 | 464 | void Instantgnn::ppr_push(int dimension, Eigen::Reffeat, bool init,vector>& candidate_sets,vector>& isCandidates, bool log) 465 | { 466 | vector threads; 467 | 468 | struct timeval t_start,t_end; 469 | double timeCost; 470 | //clock_t start_t, end_t; 471 | gettimeofday(&t_start,NULL); 472 | if(log) 473 | cout<<"Begin propagation..."<().swap(threads); 500 | update_w.clear(); 501 | 502 | //end_t = clock(); 503 | //double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; 504 | gettimeofday(&t_end, NULL); 505 | timeCost = t_end.tv_sec - t_start.tv_sec + (t_end.tv_usec - t_start.tv_usec)/1000000.0; 506 | if(log){ 507 | cout<<"The propagation time: "<>().swap(isCandidates); 511 | vector>().swap(candidate_sets); 512 | } 513 | 514 | void Instantgnn::ppr_residue(Eigen::Reffeats,int st,int ed, bool init,vector>& candidate_sets,vector>& isCandidates) 515 | { 516 | int w; 517 | for(int it=st;it candidate_set = candidate_sets[w]; 525 | vector isCandidate = isCandidates[w]; 526 | 527 | double rowsum_p=rowsum_pos[w]; 528 | double rowsum_n=rowsum_neg[w]; 529 | double rmax_p=rowsum_p*rmax; 530 | double rmax_n=rowsum_n*rmax; 531 | if(rmax_n == 0) rmax_n = -rowsum_p; 532 | 533 | if(init) 534 | { 535 | for(uint i=0; irmax_p || R[w][i] 0) 548 | { 549 | uint tempNode = candidate_set.front(); 550 | candidate_set.pop(); 551 | isCandidate[tempNode] = false; 552 | double old = R[w][tempNode]; 553 | R[w][tempNode] = 0; 554 | feats(tempNode,w) += alpha*old; 555 | 556 | uint inSize = g.getInSize(tempNode); 557 | for(uint i=0; i rmax_p || R[w][v] < rmax_n) 564 | { 565 | candidate_set.push(v); 566 | isCandidate[v] = true; 567 | } 568 | } 569 | } 570 | } 571 | vector().swap(isCandidates[w]); 572 | } 573 | } 574 | 575 | } -------------------------------------------------------------------------------- /instantAlg.cpp: -------------------------------------------------------------------------------- 1 | #include "instantAlg.h" 2 | #include "Graph.h" 3 | 4 | using namespace std; 5 | using namespace Eigen; 6 | 7 | namespace propagation 8 | { 9 | vector> Instantgnn::update_graph(string updatefilename, vector&affected_nodelst, vector>&delete_neighbors) // vector>&add_adjs 10 | { 11 | ifstream infile(updatefilename.c_str()); 12 | //cout<<"updating graph " << updatefilename <> new_neighbors(vert); 17 | vector isAffected(vert, false); 18 | while (infile >> v_from >> v_to) 19 | { 20 | insertFLAG = g.isEdgeExist(v_from, v_to); 21 | 22 | // update graph 23 | if(!isAffected[v_from]){ 24 | affected_nodelst.push_back(v_from); 25 | isAffected[v_from] = true; 26 | } 27 | 28 | if(insertFLAG == 1){ 29 | g.insertEdge(v_from, v_to); 30 | new_neighbors[v_from].push_back(v_to); 31 | } 32 | else if(insertFLAG == -1){ 33 | g.deleteEdge(v_from, v_to); 34 | delete_neighbors[v_from].push_back(v_to); 35 | } 36 | } 37 | infile.close(); 38 | cout<<"update graph finish..."<<"affected_nodelst.size():"< &feat) 45 | { 46 | alpha=alphaa; 47 | rmax=rmaxx; 48 | 49 | vector> candidate_sets(dimension); 50 | vector> isCandidates(dimension, vector(vert, false)); 51 | vector isUpdateW(dimension, false); 52 | 53 | clock_t start_t, end_t; 54 | start_t = clock(); 55 | cout<<"updating begin, for snapshot: " << updatefilename < affected_nodelst; 59 | 60 | vector> delete_neighbors(vert); 61 | vector> add_neighbors(vert); 62 | 63 | add_neighbors = update_graph(updatefilename, affected_nodelst, delete_neighbors); 64 | end_t = clock(); 65 | //cout<<"-----update_graph finish-------- time: " << (end_t - start_t)/(1.0*CLOCKS_PER_SEC)<<" s"< oldDu(affected_nodelst.size(), 0); 70 | //double oldDu[affected_nodelst.size()]; 71 | for(uint i=0;irmax_p || R[dim][affected_node]0) 135 | { 136 | cout<<"dims of feats that need push:"< &feat) 142 | { 143 | alpha=alphaa; 144 | rmax=rmaxx; 145 | 146 | int insertFLAG = 0; 147 | ifstream infile(updatefilename.c_str()); 148 | int k = 0; 149 | uint v_from, v_to; 150 | cout<<"updating begin, for snapshot: " << updatefilename <> v_from >> v_to) 156 | { 157 | line_num += 1; 158 | if(line_num <= begin) 159 | continue; 160 | else if (line_num > end) 161 | break; 162 | //cout << "line:" << line_num << " from:" << v_from << " to: " << v_to << endl; 163 | insertFLAG = g.isEdgeExist(v_from, v_to); 164 | //cout << "insertFLAG: " << insertFLAG << endl; 165 | // update graph 166 | if(insertFLAG == 1) 167 | g.insertEdge(v_from, v_to); 168 | else if(insertFLAG == -1) 169 | g.deleteEdge(v_from, v_to); 170 | // update Du 171 | double oldDu = Du[v_from]; 172 | Du[v_from] = pow(g.getOutSize(v_from), 0.5); 173 | 174 | vector> candidate_sets(dimension); 175 | vector> isCandidates(dimension, vector(vert, false)); 176 | vector isUpdateW(dimension, false); 177 | 178 | for(int i=0; i 0) 191 | { 192 | increment += in_v; 193 | increment /= alpha; 194 | R[i][v_from] += increment; 195 | } 196 | else //delete edge 197 | { 198 | increment -= in_v; 199 | increment /= alpha; 200 | R[i][v_from] += increment; 201 | } 202 | if( R[i][v_from]>rmax_p || R[i][v_from]rmax_p || R[i][node_w]0) 239 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false); 240 | } 241 | infile.close(); 242 | } 243 | 244 | int Instantgnn::snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map &feat, Eigen::Map &init_Z) 245 | { 246 | alpha=alphaa; 247 | rmax=rmaxx; 248 | 249 | int insertFLAG = 0; 250 | ifstream infile(updatefilename.c_str()); 251 | int k = 0; 252 | int v_from, v_to; 253 | clock_t start_t, end_t; 254 | start_t = clock(); 255 | cout<<"updating begin, for snapshot: " << updatefilename <> v_from >> v_to) 269 | { 270 | line_num += 1; 271 | if(line_num <= begin) 272 | continue; 273 | 274 | //cout << "from:" << v_from << " to: " << v_to << endl; 275 | insertFLAG = g.isEdgeExist(v_from, v_to); 276 | //cout << "insertFLAG: " << insertFLAG << endl; 277 | // update graph 278 | if(insertFLAG == 1) 279 | g.insertEdge(v_from, v_to); 280 | else if(insertFLAG == -1) 281 | g.deleteEdge(v_from, v_to); 282 | // update Du 283 | double oldDu = Du[v_from]; 284 | Du[v_from] = pow(g.getOutSize(v_from), 0.5); 285 | 286 | vector> candidate_sets(dimension); 287 | vector> isCandidates(dimension, vector(vert, false)); 288 | vector isUpdateW(dimension, false); 289 | 290 | for(int i=0; i 0) 303 | { 304 | increment += in_v; 305 | increment /= alpha; 306 | R[i][v_from] += increment; 307 | } 308 | else //delete edge 309 | { 310 | increment -= in_v; 311 | increment /= alpha; 312 | R[i][v_from] += increment; 313 | } 314 | if( R[i][v_from]>rmax_p || R[i][v_from]rmax_p || R[i][node_w]0) 352 | { 353 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false); 354 | diff = feat - feat_last_time; 355 | delta_feat = feat - init_Z; 356 | double diff_F = diff.norm(); 357 | double delta_feat_F = delta_feat.norm(); 358 | rate = diff_F / delta_feat_F; 359 | //double diff_l1 = diff.lpNorm<1>(); 360 | fout<threshold) 365 | break; 366 | } 367 | infile.close(); 368 | fout.close(); 369 | end_t = clock(); 370 | double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; 371 | cout<<"snapshot time: " << total_t << endl; 372 | return line_num; 373 | } 374 | 375 | void Instantgnn::overall_operation(double rmaxx,double alphaa, Eigen::Map &feat){ 376 | alpha=alphaa; 377 | rmax=rmaxx; 378 | 379 | vector> candidate_sets(dimension); 380 | vector> isCandidates(dimension, vector(vert, false)); 381 | vector isUpdateW(dimension, false); 382 | for(int i=0; irmax_p || R[i][j]0) 405 | { 406 | Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true); 407 | } 408 | } 409 | 410 | int startsWith(string s, string sub){ 411 | return s.find(sub)==0?1:0; 412 | } 413 | double Instantgnn::initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map &feat) 414 | { 415 | X = feat; // change in feat not influence X 416 | /*if(startsWith(dataset, "arxiv")){ 417 | cout<<"X.cols():"<(vert,0); 433 | double rrr=0.5; 434 | for(uint i=0; i>(dimension, vector(vert, 0)); 440 | rowsum_pos = vector(dimension,0); 441 | rowsum_neg = vector(dimension,0); 442 | 443 | random_w = vector(dimension); 444 | 445 | for(int i = 0 ; i < dimension ; i++ ) 446 | random_w[i] = i; 447 | random_shuffle(random_w.begin(),random_w.end()); 448 | for(int i=0; i0) 453 | rowsum_pos[i]+=feat(i,j); 454 | else 455 | rowsum_neg[i]+=feat(i,j); 456 | } 457 | } 458 | 459 | vector> candidate_sets(dimension); 460 | vector> isCandidates(dimension, vector(vert, false)); 461 | 462 | Instantgnn::ppr_push(dimension, feat, true,candidate_sets,isCandidates,true); 463 | 464 | double dataset_size=(double)(((long long)edges+vert)*4+(long long)vert*dimension*8)/1024.0/1024.0/1024.0; 465 | return dataset_size; 466 | } 467 | 468 | void Instantgnn::ppr_push(int dimension, Eigen::Reffeat, bool init,vector>& candidate_sets,vector>& isCandidates, bool log) 469 | { 470 | vector threads; 471 | 472 | struct timeval t_start,t_end; 473 | double timeCost; 474 | //clock_t start_t, end_t; 475 | gettimeofday(&t_start,NULL); 476 | if(log) 477 | cout<<"Begin propagation..."<().swap(threads); 504 | update_w.clear(); 505 | 506 | //end_t = clock(); 507 | //double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC; 508 | gettimeofday(&t_end, NULL); 509 | timeCost = t_end.tv_sec - t_start.tv_sec + (t_end.tv_usec - t_start.tv_usec)/1000000.0; 510 | if(log){ 511 | cout<<"The propagation time: "<>().swap(isCandidates); 515 | vector>().swap(candidate_sets); 516 | } 517 | 518 | void Instantgnn::ppr_residue(Eigen::Reffeats,int st,int ed, bool init,vector>& candidate_sets,vector>& isCandidates) 519 | { 520 | int w; 521 | for(int it=st;it candidate_set = candidate_sets[w]; 529 | vector isCandidate = isCandidates[w]; 530 | 531 | double rowsum_p=rowsum_pos[w]; 532 | double rowsum_n=rowsum_neg[w]; 533 | double rmax_p=rowsum_p*rmax; 534 | double rmax_n=rowsum_n*rmax; 535 | if(rmax_n == 0) rmax_n = -rowsum_p; 536 | 537 | if(init) 538 | { 539 | for(uint i=0; irmax_p || R[w][i] 0) 552 | { 553 | uint tempNode = candidate_set.front(); 554 | candidate_set.pop(); 555 | isCandidate[tempNode] = false; 556 | double old = R[w][tempNode]; 557 | R[w][tempNode] = 0; 558 | feats(w,tempNode) += alpha*old; 559 | 560 | uint inSize = g.getInSize(tempNode); 561 | for(uint i=0; i rmax_p || R[w][v] < rmax_n) 568 | { 569 | candidate_set.push(v); 570 | isCandidate[v] = true; 571 | } 572 | } 573 | } 574 | } 575 | vector().swap(isCandidates[w]); 576 | } 577 | } 578 | 579 | } --------------------------------------------------------------------------------