├── aminer.sh
├── sbm.sh
├── ogb.sh
├── setup.py
├── propagation.pxd
├── propagation.pyx
├── model.py
├── README.md
├── instantAlg.h
├── Graph.h
├── utils.py
├── ogb_exp.py
├── aminer_dense.py
├── sbm.py
├── convert
    ├── gen_SBM.cpp
    └── convert_ogb.py
├── instantAlg_arxiv.cpp
└── instantAlg.cpp


/aminer.sh:
--------------------------------------------------------------------------------
1 | python aminer_dense.py --layer 4 --dataset 1984_author_dense --alpha 0.1
2 | 


--------------------------------------------------------------------------------
/sbm.sh:
--------------------------------------------------------------------------------
1 | python sbm.py --alpha 0.001 --epochs 200 --dataset SBM-500000-50-20+1 --lr 0.01 --batch 1024
2 | 


--------------------------------------------------------------------------------
/ogb.sh:
--------------------------------------------------------------------------------
1 | python ogb_exp.py --dataset papers100M --layer 3 --hidden 2048 --alpha 0.2 --dropout 0.3 --rmax 1e-8
2 | python ogb_exp.py --dataset products --layer 4 --hidden 1024 --alpha 0.1 --dropout 0.5
3 | python ogb_exp.py --dataset arxiv --layer 4 --hidden 1024 --alpha 0.1 --dropout 0.3
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup,Extension
 2 | from Cython.Build import cythonize
 3 | import eigency
 4 | 
 5 | setup(
 6 | 	ext_modules=cythonize(Extension(
 7 | 		name='propagation',
 8 | 		author='anonymous',
 9 | 		version='0.0.1',
10 | 		sources=['propagation.pyx'],
11 | 		language='c++',
12 | 		extra_compile_args=["-std=c++11"],
13 | 		include_dirs=[".", "module-dir-name"] + eigency.get_includes()+["./eigen3"],
14 | 		#If you have installed eigen, you can configure your own path. When numpy references errors, you need to import its header file
15 | 		install_requires=['Cython>=0.2.15','eigency>=1.77'],
16 | 		packages=['little-try'],
17 | 		python_requires='>=3'
18 | 	))
19 | )


--------------------------------------------------------------------------------
/propagation.pxd:
--------------------------------------------------------------------------------
 1 | from eigency.core cimport *
 2 | from libcpp.string cimport string
 3 | 
 4 | ctypedef unsigned int uint
 5 | 
 6 | cdef extern from "instantAlg.cpp":
 7 | #cdef extern from "instantAlg_arxiv.cpp":
 8 | 	pass
 9 | 
10 | cdef extern from "instantAlg.h" namespace "propagation":
11 | 	cdef cppclass Instantgnn:
12 | 		Instantgnn() except+
13 | 		double initial_operation(string,string,uint,uint,double,double,Map[MatrixXd] &) except +
14 | 		void snapshot_operation(string, double, double, Map[MatrixXd] &) except +
15 | 		void overall_operation(double,double, Map[MatrixXd] &) except +
16 | 		void linenum_operation(string, int,int,double,double, Map[MatrixXd] &) except +
17 | 		int snapshot_operation_rate_Z(string, int, double, double, double, Map[MatrixXd] &, Map[MatrixXd] &)
18 | 


--------------------------------------------------------------------------------
/propagation.pyx:
--------------------------------------------------------------------------------
 1 | from propagation cimport Instantgnn
 2 | 
 3 | cdef class InstantGNN:
 4 | 	cdef Instantgnn c_instantgnn
 5 | 
 6 | 	def __cinit__(self):
 7 | 		self.c_instantgnn=Instantgnn()
 8 | 
 9 | 	def initial_operation(self,path,dataset,unsigned int m,unsigned int n,rmax,alpha,np.ndarray array3):
10 | 		return self.c_instantgnn.initial_operation(path.encode(),dataset.encode(),m,n,rmax,alpha,Map[MatrixXd](array3))
11 | 
12 | 	def snapshot_operation(self, upfile, rmax,alpha, np.ndarray array3):
13 | 		return self.c_instantgnn.snapshot_operation(upfile.encode(), rmax, alpha, Map[MatrixXd](array3))
14 | 
15 | 	def overall_operation(self, rmax,alpha, np.ndarray array3):
16 | 		return self.c_instantgnn.overall_operation(rmax, alpha, Map[MatrixXd](array3))
17 | 
18 | 	def snapshot_operation_rate_Z(self, upfile, begin, rmax,alpha, threshold, np.ndarray array3, np.ndarray array4):
19 | 		return self.c_instantgnn.snapshot_operation_rate_Z(upfile.encode(), begin, rmax, alpha, threshold, Map[MatrixXd](array3), Map[MatrixXd](array4))
20 | 
21 | 	def linenum_operation(self, upfile, begin, end, rmax,alpha, np.ndarray array3):
22 | 		return self.c_instantgnn.linenum_operation(upfile.encode(), begin, end, rmax, alpha, Map[MatrixXd](array3))


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import math
 4 | import torch.nn.functional as F
 5 | 
 6 | class ClassMLP(torch.nn.Module):
 7 |     def __init__(self, in_channels, hidden_channels, out_channels, num_layers, dropout):
 8 |         super(ClassMLP, self).__init__()
 9 | 
10 |         self.lins = torch.nn.ModuleList()
11 |         self.lins.append(torch.nn.Linear(in_channels, hidden_channels))
12 |         self.bns = torch.nn.ModuleList()
13 |         self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
14 |         for _ in range(num_layers - 2):
15 |             self.lins.append(torch.nn.Linear(hidden_channels, hidden_channels))
16 |             self.bns.append(torch.nn.BatchNorm1d(hidden_channels))
17 |         self.lins.append(torch.nn.Linear(hidden_channels, out_channels))
18 |         self.dropout = dropout
19 | 
20 |     def reset_parameters(self):
21 |         for lin in self.lins:
22 |             lin.reset_parameters()
23 |         for bn in self.bns:
24 |             bn.reset_parameters()
25 | 
26 |     def forward(self, x):
27 |         for i, lin in enumerate(self.lins[:-1]):
28 |             x = lin(x)
29 |             x = self.bns[i](x)
30 |             x = F.relu(x)
31 |             x = F.dropout(x, p=self.dropout, training=self.training)
32 |         x = self.lins[-1](x)
33 |         return torch.log_softmax(x, dim=-1)
34 |         #return x


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Instant Graph Neural Networks for Dynamic Graphs
 2 | 
 3 | ## Requirements
 4 | - CUDA 10.1
 5 | - python 3.8.5
 6 | - pytorch 1.7.1
 7 | - GCC 5.4.0
 8 | - cython 0.29.21
 9 | - eigency 1.77
10 | - numpy 1.18.1
11 | - torch-geometric 1.6.3 
12 | - tqdm 4.56.0
13 | - ogb 1.2.4
14 | - [eigen 3.3.9] (https://gitlab.com/libeigen/eigen.git)
15 | 
16 | ## Datasets
17 | OGB Datasets can be downloaded from [here](https://ogb.stanford.edu). The website 'Open Graph Benchmark' provides an automatic method to download and convert the three datasets. So you can straightly run 'python convert_ogb.py' instead of downloading these datasets manually. We drop several edges to simulate the graphs' evolving nature. In the folder './convert/', we provide the codes to convert the three datasets.
18 | 
19 | We generate a real dataset with dynamic labels, Aminer, which is processed from the [raw data](https://www.aminer.cn/aminernetwork). The processed version can be downloaded from [here](https://drive.google.com/drive/folders/1bYcVslvdS-cEcQbFAkABFTyqR_RoHw1i).
20 | 
21 | In our paper, we also use synthetic datasets generated by the SBM. In the folder './convert/', we provide the codes to generate and convert the datasets. 
22 | For example, you can run the following codes to generate SBM-500K
23 | ```
24 |     g++ -std=c++11 gen_SBM.cpp -o rd_dynamic
25 |     ./rd_dynamic -n 500000 -c 50 -ind 20 -outd 1 -snap 10 -change 2500
26 | ```
27 | 
28 | ## Compilation
29 | Cython needs to be compiled before running, run this command:
30 | ```
31 | python setup.py build_ext --inplace
32 | ```
33 | 
34 | ## Running the code
35 | - On OGB datasets
36 | ```
37 | ./ogb.sh
38 | ```
39 | 
40 | - On the Aminer dataset
41 | ```
42 | ./aminer.sh
43 | ```
44 | 
45 | - On SBM datasets
46 | ```
47 | ./sbm.sh
48 | ```
49 | 


--------------------------------------------------------------------------------
/instantAlg.h:
--------------------------------------------------------------------------------
 1 | #ifndef InstantGNN_H
 2 | #define InstantGNN_H
 3 | #include<iostream>
 4 | #include <vector>
 5 | #include <iostream>
 6 | #include <algorithm>
 7 | #include <random>
 8 | #include <queue>
 9 | #include <unordered_map>
10 | #include <math.h>
11 | #include <cmath>
12 | #include <limits.h>
13 | #include <unistd.h>
14 | #include <math.h>
15 | #include <fstream>
16 | #include <sstream>
17 | #include <assert.h>
18 | #include <thread>
19 | #include <string>
20 | #include <unistd.h>
21 | #include <sys/time.h>
22 | #include<Eigen/Dense>
23 | 
24 | #include "Graph.h"
25 | 
26 | using namespace std;
27 | using namespace Eigen;
28 | typedef unsigned int uint;
29 | 
30 | namespace propagation{
31 |     class Instantgnn{
32 |         Eigen::MatrixXd X;
33 |     public:
34 |         EIGEN_MAKE_ALIGNED_OPERATOR_NEW
35 |         int NUMTHREAD=40;//Number of threads
36 |         uint edges, vert;
37 |         Graph g;
38 |         vector<vector<double>> R;
39 |     	  double rmax,alpha,t;
40 |         string dataset_name;
41 |         string updateFile;
42 |         vector<double>rowsum_pos;
43 |         vector<double>rowsum_neg;
44 |         vector<int>random_w;
45 |         vector<int>update_w;
46 |         vector<double>Du;
47 |         int dimension;
48 |         double initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map<Eigen::MatrixXd> &feat);
49 |         void ppr_push(int dimension, Eigen::Ref<Eigen::MatrixXd>feat, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates, bool log);
50 |         void ppr_residue(Eigen::Ref<Eigen::MatrixXd>feats,int st,int ed, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates);
51 |         void snapshot_operation(string updatefilename, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat);
52 |         void overall_operation(double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat);
53 |         vector<vector<uint>> update_graph(string updatefilename, vector<uint>&affected_nodelst, vector<vector<uint>>&delete_neighbors);
54 |         int snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map<Eigen::MatrixXd> &feat, Eigen::Map<Eigen::MatrixXd> &init_Z);
55 |         void linenum_operation(string updatefilename, int begin, int end, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat);
56 |     };
57 | }
58 | 
59 | 
60 | #endif // InstantGNN_H


--------------------------------------------------------------------------------
/Graph.h:
--------------------------------------------------------------------------------
  1 | #ifndef GRAPH_H
  2 | #define GRAPH_H
  3 | 
  4 | #include <cstdlib>
  5 | #include <cstdio>
  6 | #include <iostream>
  7 | #include <fstream>
  8 | #include <time.h>
  9 | #include <vector>
 10 | using namespace std;
 11 | 
 12 | class Graph
 13 | {
 14 | public:
 15 | 	uint n;	//number of nodes
 16 | 	uint m;	//number of edges
 17 | 
 18 | 	vector<vector<uint>> inAdj;
 19 | 	vector<vector<uint>> outAdj;
 20 | 	uint* indegree;
 21 | 	uint* outdegree;
 22 |   vector<uint>indices;
 23 |   vector<uint>indptr;
 24 | 	Graph()
 25 | 	{
 26 | 	}
 27 | 	~Graph()
 28 | 	{
 29 | 	}
 30 | 
 31 | 	void insertEdge(uint from, uint to) {
 32 | 		outAdj[from].push_back(to);
 33 | 		inAdj[to].push_back(from);
 34 | 		outdegree[from]++;
 35 | 		indegree[to]++;
 36 | 	}
 37 | 
 38 | 	void deleteEdge(uint from, uint to) {
 39 | 		uint j;
 40 | 		for (j=0; j < indegree[to]; j++) {
 41 | 			if (inAdj[to][j] == from) {
 42 | 				break;
 43 | 			}
 44 | 		}
 45 | 		inAdj[to].erase(inAdj[to].begin()+j);
 46 | 		indegree[to]--;
 47 | 
 48 | 		for (j=0; j < outdegree[from]; j++) {
 49 | 			if (outAdj[from][j] == to) {
 50 | 				break;
 51 | 			}
 52 | 		}
 53 | 
 54 | 		outAdj[from].erase(outAdj[from].begin() + j);
 55 | 		outdegree[from]--;
 56 | 	}
 57 | 
 58 | 	int isEdgeExist(uint u, uint v) {
 59 | 		for (uint j = 0; j < outdegree[u]; j++) {
 60 | 			if (outAdj[u][j] == v) {
 61 | 				return -1;
 62 | 			}
 63 | 		}
 64 | 		return 1;
 65 | 	}
 66 | 
 67 | 	void inputGraph(string path, string dataset, uint nodenum, uint edgenum)
 68 | 	{
 69 |     n = nodenum;
 70 |     m = edgenum;
 71 |     indices=vector<uint>(m);
 72 |     indptr=vector<uint>(n+1);
 73 |     //string dataset_el="data/"+dataset+"_adj_el.txt";
 74 |     string dataset_el=path+dataset+"_adj_el.txt";
 75 |     const char *p1=dataset_el.c_str();
 76 |     if (FILE *f1 = fopen(p1, "rb"))
 77 |     {
 78 |         size_t rtn = fread(indices.data(), sizeof indices[0], indices.size(), f1);
 79 |         if(rtn!=m)
 80 |             cout<<"Error! "<<dataset_el<<" Incorrect read!"<<endl;
 81 |         fclose(f1);
 82 |     }
 83 |     else
 84 |     {
 85 |         cout<<dataset_el<<" Not Exists."<<endl;
 86 |         exit(1);
 87 |     }
 88 |     string dataset_pl=path+dataset+"_adj_pl.txt";
 89 |     const char *p2=dataset_pl.c_str();
 90 | 
 91 |     if (FILE *f2 = fopen(p2, "rb"))
 92 |     {
 93 |         size_t rtn = fread(indptr.data(), sizeof indptr[0], indptr.size(), f2);
 94 |         if(rtn!=n+1)
 95 |             cout<<"Error! "<<dataset_pl<<" Incorrect read!"<<endl;
 96 |         fclose(f2);
 97 |     }
 98 |     else
 99 |     {
100 |         cout<<dataset_pl<<" Not Exists."<<endl;
101 |         exit(1);
102 |     }
103 | 		indegree=new uint[n];
104 | 		outdegree=new uint[n];
105 |         clock_t t1=clock();
106 | 		for(uint i=0;i<n;i++)
107 | 		{
108 | 			indegree[i] = indptr[i+1]-indptr[i];
109 |             outdegree[i] = indptr[i+1]-indptr[i];
110 |             vector<uint> templst(indices.begin() + indptr[i],indices.begin() + indptr[i+1]);
111 |             outAdj.push_back(templst);
112 |             inAdj.push_back(templst);
113 | 		}
114 | 		
115 | 		clock_t t2=clock();
116 | 		cout<<"m="<<m<<endl;
117 | 		cout<<"reading in graph takes "<<(t2-t1)/(1.0*CLOCKS_PER_SEC)<<" s."<<endl;
118 | 	}
119 | 
120 | 	void inputGraph_fromedgelist(string dataset, uint nodenum, uint edgenum)
121 | 	{
122 | 		m=edgenum;
123 |     n=nodenum;
124 |     string filename = "data/"+dataset+"_adj.txt";
125 | 		ifstream infile(filename.c_str());
126 | 
127 | 		indegree=new uint[n];
128 | 		outdegree=new uint[n];
129 | 		for(uint i=0;i<n;i++)
130 | 		{
131 | 			indegree[i]=0;
132 | 			outdegree[i]=0;
133 | 		}
134 | 		//read graph and get degree info
135 | 		uint from;
136 | 		uint to;
137 | 		while(infile>>from>>to)
138 | 		{
139 | 			outdegree[from]++;
140 | 			indegree[to]++;
141 | 		}
142 | 
143 | 		cout<<"..."<<endl;
144 | 
145 | 		for (uint i = 0; i < n; i++)
146 | 		{
147 | 			vector<uint> templst;
148 | 			inAdj.push_back(templst);
149 | 			outAdj.push_back(templst);
150 | 		}
151 | 
152 | 		infile.clear();
153 | 		infile.seekg(0);
154 | 
155 | 		clock_t t1=clock();
156 | 
157 | 		while(infile>>from>>to)
158 | 		{
159 | 			outAdj[from].push_back(to);
160 | 			inAdj[to].push_back(from);
161 | 		}
162 | 		infile.close();
163 | 		clock_t t2=clock();
164 | 		cout<<"m="<<m<<endl;
165 | 		cout<<"reading in graph takes "<<(t2-t1)/(1.0*CLOCKS_PER_SEC)<<" s."<<endl;
166 | 	} 
167 | 
168 | 	uint getInSize(uint vert){
169 | 		return indegree[vert];
170 | 	}
171 | 	uint getInVert(uint vert, uint pos){
172 | 		return inAdj[vert][pos];
173 | 	}
174 | 	uint getOutSize(uint vert){
175 | 		return outdegree[vert];
176 | 	}
177 | 	uint getOutVert(uint vert, uint pos){
178 | 		return outAdj[vert][pos];
179 | 	}
180 |   vector<uint> getOutAdjs(uint vert){
181 |         return outAdj[vert];
182 |     }
183 | 
184 | };
185 | 
186 | 
187 | #endif
188 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import gc
  3 | import numpy as np
  4 | from sklearn.metrics import f1_score
  5 | from torch.utils.data import Dataset
  6 | from propagation import InstantGNN
  7 | import pdb
  8 | 
  9 | def load_aminer_init(datastr, rmax, alpha):
 10 |     if datastr == "1984_author_dense":
 11 |         m = 3787605; n = 1252095
 12 |     elif datastr == "2013_author_dense":
 13 |         m = 9237799; n = 1252095
 14 | 
 15 |     print("Load %s!" % datastr)
 16 |     labels = np.load("./data/aminer/"+ datastr +"_labels.npy")
 17 |     
 18 |     py_alg = InstantGNN()
 19 | 
 20 |     features = np.load('./data/aminer/aminer_dense_feat.npy')
 21 |     memory_dataset = py_alg.initial_operation('./data/aminer/',datastr, m, n, rmax, alpha, features)
 22 |     split = np.load('./data/aminer/aminer_dense_idx_split.npz')
 23 |     train_idx, val_idx, test_idx = split['train'], split['valid'], split['test']
 24 |     
 25 |     train_idx = torch.LongTensor(train_idx)
 26 |     val_idx = torch.LongTensor(val_idx)
 27 |     test_idx = torch.LongTensor(test_idx)
 28 |     
 29 |     train_labels = torch.LongTensor(labels[train_idx])
 30 |     val_labels = torch.LongTensor(labels[val_idx])
 31 |     test_labels = torch.LongTensor(labels[test_idx])
 32 | 
 33 |     train_labels = train_labels.reshape(train_labels.size(0), 1)
 34 |     val_labels = val_labels.reshape(val_labels.size(0), 1)
 35 |     test_labels = test_labels.reshape(test_labels.size(0), 1)
 36 |     
 37 |     return features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg
 38 | 
 39 | def load_ogb_init(datastr, alpha, rmax):
 40 |     if(datastr=="papers100M"):
 41 |         m=3259203018; n=111059956 ##init graph
 42 |     elif(datastr=="arxiv"):
 43 |         m=597039; n=169343
 44 |     elif(datastr=="products"):
 45 |         m=69634445; n=2449029
 46 |     print("Load %s!" % datastr)
 47 |     
 48 |     py_alg = InstantGNN()
 49 |     features = np.load('./data/'+datastr+'/'+datastr+'_feat.npy')
 50 |     memory_dataset = py_alg.initial_operation('./data/'+datastr+'/', datastr+'_init', m, n, rmax, alpha, features)
 51 |     
 52 |     data = np.load('./data/'+datastr+'/'+datastr+'_labels.npz')
 53 |     train_idx = torch.LongTensor(data['train_idx'])
 54 |     val_idx = torch.LongTensor(data['val_idx'])
 55 |     test_idx =torch.LongTensor(data['test_idx'])
 56 |     
 57 |     train_labels = torch.LongTensor(data['train_labels'])
 58 |     val_labels = torch.LongTensor(data['val_labels'])
 59 |     test_labels = torch.LongTensor(data['test_labels'])
 60 |     train_labels=train_labels.reshape(train_labels.size(0),1)
 61 |     val_labels=val_labels.reshape(val_labels.size(0),1)
 62 |     test_labels=test_labels.reshape(test_labels.size(0),1)
 63 |     
 64 |     return features,train_labels,val_labels,test_labels,train_idx,val_idx,test_idx,memory_dataset, py_alg
 65 | 
 66 | def load_sbm_init(datastr, rmax, alpha):
 67 |     if datastr == "SBM-50000-50-20+1":
 68 |         m=1412466; n=50000
 69 |     elif datastr == "SBM-500000-50-20+1":
 70 |         m=14141662; n=500000
 71 |     elif datastr == "SBM-10000000-100-20+1":
 72 |         m=282938572;n=10000000
 73 |     elif datastr == "SBM-1000000-50-20+1":
 74 |         m=28293138;n=1000000
 75 | 
 76 |     print("Load %s!" % datastr)
 77 | 
 78 |     labels = np.loadtxt('./data/'+datastr+'/'+datastr+'_label.txt')
 79 |     
 80 |     py_alg = InstantGNN()
 81 |     
 82 |     if datastr == "SBM-1000000-50-20+1" or datastr== "SBM-500000-50-20+1":
 83 |         encode_len = 256
 84 |     else:
 85 |         encode_len = 1024
 86 |     
 87 |     split = np.load('./data/'+datastr+'/'+datastr+'_idx_split.npz')
 88 |     train_idx, val_idx, test_idx = split['train'], split['valid'], split['test']
 89 |     train_idx = torch.LongTensor(train_idx)
 90 |     val_idx = torch.LongTensor(val_idx)
 91 |     test_idx = torch.LongTensor(test_idx)
 92 |         
 93 |     features = np.load('./data/'+datastr+'/'+datastr+'_encode_'+str(encode_len)+'_feat.npy')
 94 |     memory_dataset = py_alg.initial_operation('./data/'+datastr+'/adjs/', datastr+'_init', m, n, rmax, alpha, features)
 95 |     
 96 |     train_labels = torch.LongTensor(labels[train_idx])
 97 |     val_labels = torch.LongTensor(labels[val_idx])
 98 |     test_labels = torch.LongTensor(labels[test_idx])
 99 | 
100 |     train_labels = train_labels.reshape(train_labels.size(0), 1)
101 |     val_labels = val_labels.reshape(val_labels.size(0), 1)
102 |     test_labels = test_labels.reshape(test_labels.size(0), 1)
103 |     
104 |     return features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg
105 | 
106 | def muticlass_f1(output, labels):
107 |     preds = output.max(1)[1]
108 |     preds = preds.cpu().detach().numpy()
109 |     labels = labels.cpu().detach().numpy()
110 |     macro = f1_score(labels, preds, average='macro')
111 |     return macro
112 | 
113 | def com_accuracy(y_pred, y):
114 |     pred = y_pred.data.max(1)[1]
115 |     pred = pred.reshape(pred.size(0),1)
116 |     correct = pred.eq(y.data).cpu().sum()
117 |     accuracy = correct.to(dtype=torch.long) * 100. / len(y)
118 |     return accuracy
119 | 
120 | class SimpleDataset(Dataset):
121 |     def __init__(self,x,y):
122 |         self.x=x
123 |         self.y=y
124 |         assert self.x.size(0)==self.y.size(0)
125 | 
126 |     def __len__(self):
127 |         return self.x.size(0)
128 | 
129 |     def __getitem__(self,idx):
130 |         return self.x[idx],self.y[idx]
131 | 
132 | 


--------------------------------------------------------------------------------
/ogb_exp.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import uuid
  3 | import random
  4 | import argparse
  5 | import gc
  6 | import torch
  7 | import resource
  8 | import numpy as np
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import Dataset, DataLoader
 13 | from ogb.nodeproppred import Evaluator
 14 | from utils import SimpleDataset
 15 | from model import ClassMLP
 16 | from utils import *
 17 | from glob import glob
 18 | 
 19 | def main():
 20 |     parser = argparse.ArgumentParser()
 21 |     # Dataset and Algorithom
 22 |     parser.add_argument('--seed', type=int, default=20159, help='random seed..')
 23 |     parser.add_argument('--dataset', default='papers100M', help='dateset.')
 24 |     # Algorithm parameters
 25 |     parser.add_argument('--alpha', type=float, default=0.2, help='alpha.')
 26 |     parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.')
 27 |     # Learining parameters
 28 |     parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.')
 29 |     parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.')
 30 |     parser.add_argument('--layer', type=int, default=3, help='number of layers.')
 31 |     parser.add_argument('--hidden', type=int, default=2048, help='hidden dimensions.')
 32 |     parser.add_argument('--dropout', type=float, default=0.3, help='dropout rate.')
 33 |     parser.add_argument('--bias', default='none', help='bias.')
 34 |     parser.add_argument('--epochs', type=int, default=1000, help='number of epochs.')
 35 |     parser.add_argument('--batch', type=int, default=10000, help='batch size.')
 36 |     parser.add_argument('--patience', type=int, default=50, help='patience.')
 37 |     parser.add_argument('--dev', type=int, default=1, help='device id.')
 38 |     args = parser.parse_args()
 39 |     random.seed(args.seed)
 40 |     np.random.seed(args.seed)
 41 |     torch.manual_seed(args.seed)
 42 |     torch.cuda.manual_seed(args.seed)
 43 |     print("--------------------------")
 44 |     print(args)
 45 |     checkpt_file = 'pretrained/'+uuid.uuid4().hex+'.pt'
 46 | 
 47 |     features,train_labels,val_labels,test_labels,train_idx,val_idx,test_idx,memory_dataset, py_alg = load_ogb_init(args.dataset, args.alpha,args.rmax) ##
 48 |     prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file)
 49 |     print('------------------ update -------------------')
 50 |     snapList = [f for f in glob('./data/'+args.dataset+'/*Edgeupdate_snap*.txt')]
 51 |     print('number of snapshots: ', len(snapList))
 52 |     for i in range(len(snapList)):
 53 |         py_alg.snapshot_operation('data/'+args.dataset+'/'+args.dataset+'_Edgeupdate_snap'+str(i+1)+'.txt', args.rmax, args.alpha, features)
 54 |         prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file)
 55 | 
 56 | def train(model, device, train_loader, optimizer):
 57 |     model.train()
 58 | 
 59 |     time_epoch=0
 60 |     loss_list=[]
 61 |     for step, (x, y) in enumerate(train_loader):
 62 |         t_st=time.time()
 63 |         x, y = x.cuda(device), y.cuda(device)
 64 |         optimizer.zero_grad()
 65 |         out = model(x)
 66 |         loss = F.nll_loss(out, y.squeeze(1))
 67 |         loss.backward()
 68 |         optimizer.step()
 69 |         time_epoch+=(time.time()-t_st)
 70 |         loss_list.append(loss.item())
 71 |     return np.mean(loss_list),time_epoch
 72 | 
 73 | 
 74 | @torch.no_grad()
 75 | def validate(model, device, loader, evaluator):
 76 |     model.eval()
 77 |     y_pred, y_true = [], []
 78 |     for step,(x,y) in enumerate(loader):
 79 |         x = x.cuda(device)
 80 |         out = model(x)
 81 |         y_pred.append(torch.argmax(out, dim=1, keepdim=True).cpu())
 82 |         y_true.append(y)
 83 |     return evaluator.eval({
 84 |         "y_true": torch.cat(y_true, dim=0),
 85 |         "y_pred": torch.cat(y_pred, dim=0),
 86 |     })['acc']
 87 | 
 88 | 
 89 | @torch.no_grad()
 90 | def test(model, device, loader, evaluator,checkpt_file):
 91 |     model.load_state_dict(torch.load(checkpt_file))
 92 |     model.eval()
 93 |     y_pred, y_true = [], []
 94 |     for step,(x,y) in enumerate(loader):
 95 |         x = x.cuda(device)
 96 |         out = model(x)
 97 |         y_pred.append(torch.argmax(out, dim=1, keepdim=True).cpu())
 98 |         y_true.append(y)
 99 |     return evaluator.eval({
100 |         "y_true": torch.cat(y_true, dim=0),
101 |         "y_pred": torch.cat(y_pred, dim=0),
102 |     })['acc']
103 | 
104 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, args, checkpt_file):
105 |     features = torch.FloatTensor(features)
106 |     features_train = features[train_idx]
107 |     features_val = features[val_idx]
108 |     features_test = features[test_idx]
109 |     del features
110 |     gc.collect()
111 | 
112 |     label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1
113 |     train_dataset = SimpleDataset(features_train,train_labels)
114 |     valid_dataset = SimpleDataset(features_val,val_labels)
115 |     test_dataset = SimpleDataset(features_test, test_labels)
116 | 
117 |     train_loader = DataLoader(train_dataset, batch_size=args.batch,shuffle=True)
118 |     valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
119 |     test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
120 | 
121 |     model = ClassMLP(features_train.size(-1),args.hidden,label_dim,args.layer,args.dropout).cuda(args.dev)
122 |     evaluator = Evaluator(name='ogbn-papers100M')
123 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
124 | 
125 |     bad_counter = 0
126 |     best = 0
127 |     best_epoch = 0
128 |     train_time = 0
129 |     model.reset_parameters()
130 |     print("--------------------------")
131 |     print("Training...")
132 |     for epoch in range(args.epochs):
133 |         loss_tra,train_ep = train(model,args.dev,train_loader,optimizer)
134 |         t_st=time.time()
135 |         f1_val = validate(model, args.dev, valid_loader, evaluator)
136 |         train_time+=train_ep
137 |         if(epoch+1)%20 == 0:
138 |             print(f'Epoch:{epoch+1:02d},'
139 |             f'Train_loss:{loss_tra:.3f}',
140 |             f'Valid_acc:{100*f1_val:.2f}%',
141 |             f'Time_cost:{train_ep:.3f}/{train_time:.3f}')
142 |         if f1_val > best:
143 |             best = f1_val
144 |             best_epoch = epoch+1
145 |             t_st=time.time()
146 |             torch.save(model.state_dict(), checkpt_file)
147 |             bad_counter = 0
148 |         else:
149 |             bad_counter += 1
150 |         if bad_counter == args.patience:
151 |             break
152 | 
153 |     test_acc = test(model, args.dev, test_loader, evaluator,checkpt_file)
154 |     print(f"Train cost: {train_time:.2f}s")
155 |     print('Load {}th epoch'.format(best_epoch))
156 |     print(f"Test accuracy:{100*test_acc:.2f}%")
157 | 
158 | if __name__ == '__main__':
159 |     main()
160 | 


--------------------------------------------------------------------------------
/aminer_dense.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import uuid
  3 | import random
  4 | import argparse
  5 | import gc
  6 | import torch
  7 | import resource
  8 | import numpy as np
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import Dataset, DataLoader
 13 | from utils import *
 14 | from model import ClassMLP
 15 | from propagation import InstantGNN
 16 | import math
 17 | import sklearn.preprocessing
 18 | 
 19 | import os
 20 | import pdb
 21 | 
 22 | def train(model, device, train_loader, optimizer, loss_fn, use_pdb=False):
 23 |     model.train()
 24 | 
 25 |     time_epoch = 0
 26 |     loss_list, acc_list = [], []
 27 | 
 28 |     for i, (x, y) in enumerate(train_loader):
 29 |         t_st = time.time()
 30 |         x, y = x.cuda(device), y.cuda(device)
 31 |         optimizer.zero_grad()
 32 |         out = model(x)
 33 |         loss = F.nll_loss(out, y.squeeze(1))
 34 |         acc = com_accuracy(out, y)
 35 |         acc_list.append(acc.item())
 36 | 
 37 |         if use_pdb:
 38 |             pdb.set_trace(header='train')
 39 | 
 40 |         loss.backward()
 41 |         optimizer.step()
 42 |         loss_list.append(loss.item())
 43 |         time_epoch += (time.time() - t_st)
 44 |     return np.mean(loss_list), np.mean(acc_list), time_epoch
 45 | 
 46 | @torch.no_grad()
 47 | def validate(model, device, loader, loss_fn, use_pdb=False):
 48 |     model.eval()
 49 |     loss_list, acc_list = [], []
 50 |     for i, (x, y) in enumerate(loader):
 51 |         x, y = x.cuda(device), y.cuda(device)
 52 |         out = model(x)
 53 | 
 54 |         loss = F.nll_loss(out, y.squeeze(1))
 55 |         loss_list.append(loss.item())
 56 |         acc = com_accuracy(out, y)
 57 |         acc_list.append(acc.item())
 58 |         if use_pdb:
 59 |             pdb.set_trace(header='valid')
 60 | 
 61 |     return np.mean(loss_list), np.mean(acc_list)
 62 | 
 63 | @torch.no_grad()
 64 | def test(model, device, loader, checkpt_file, loss_fn, use_pdb=False):
 65 |     model.load_state_dict(torch.load(checkpt_file))
 66 |     model.eval()
 67 |     loss_list, acc_list = [], []
 68 |     for step, (x, y) in enumerate(loader):
 69 |         x, y = x.cuda(device), y.cuda(device)
 70 |         out = model(x)
 71 | 
 72 |         loss = F.nll_loss(out, y.squeeze(1))
 73 |         loss_list.append(loss.item())
 74 |         acc = com_accuracy(out, y)
 75 |         acc_list.append(acc.item())
 76 | 
 77 |         if use_pdb:
 78 |             pdb.set_trace(header='test')
 79 | 
 80 |     return np.mean(loss_list), np.mean(acc_list)
 81 | 
 82 | ## load feat and generate model
 83 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args,fineturn=False):
 84 |     print(args)
 85 |     scaler = sklearn.preprocessing.StandardScaler()
 86 |     scaler.fit(features)
 87 |     features = scaler.transform(features)
 88 |     
 89 |     features = torch.FloatTensor(features)
 90 | 
 91 |     features_train = features[train_idx]
 92 |     features_val = features[val_idx]
 93 |     features_test = features[test_idx]
 94 |     del features
 95 |     gc.collect()
 96 |     
 97 |     train_dataset = SimpleDataset(features_train, train_labels)
 98 |     valid_dataset = SimpleDataset(features_val, val_labels)
 99 |     test_dataset = SimpleDataset(features_test, test_labels)
100 | 
101 |     train_loader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True)
102 |     valid_loader = DataLoader(valid_dataset, batch_size=len(val_labels), shuffle=False)
103 |     test_loader = DataLoader(test_dataset, batch_size=len(test_labels), shuffle=False)
104 | 
105 |     label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1
106 |     model = ClassMLP(features_train.size(-1), args.hidden, label_dim, args.layer, args.dropout).cuda(args.dev)
107 |     if fineturn:
108 |         model.load_state_dict(torch.load(args.checkpt_file))
109 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
110 | 
111 |     #### begin train
112 |     bad_counter = 0
113 |     best = 0
114 |     best_epoch = 0
115 |     train_time = 0
116 |     best_loss = 1e+8 * 1.0
117 |     model.reset_parameters()
118 |     print("--------------------------")
119 |     print("Training...")
120 |     for epoch in range(args.epochs):
121 |         loss_tra, acc_tra, train_ep = train(model, args.dev, train_loader, optimizer, loss_fn)
122 |         loss_val, acc_val = validate(model, args.dev, valid_loader, loss_fn)
123 |         train_time += train_ep
124 |         if (epoch + 1) % 2 == 0:
125 |             print(f'Epoch:{epoch + 1:02d},'
126 |                   f'Train_loss:{loss_tra:.8f}',
127 |                   f'Train_acc:{acc_tra:.5f}',
128 |                   f'Valid_loss:{loss_val:.8f}',
129 |                   f'Valid_acc:{acc_val:.5f}',
130 |                   f'Time_cost:{train_ep:.3f} / {train_time:.3f}')
131 |         if acc_val > best:
132 |             best = acc_val
133 |             best_epoch = epoch + 1
134 |             torch.save(model.state_dict(), args.checkpt_file)
135 |             bad_counter = 0
136 |         else:
137 |             bad_counter += 1
138 |         if bad_counter == args.patience:
139 |             break
140 | 
141 |     loss_test, acc_test = test(model, args.dev, test_loader, args.checkpt_file, loss_fn)
142 |     print('Load {}th epoch'.format(best_epoch))
143 |     print(f"Test loss:{loss_test:.8f}, acc:{acc_test:.5f}")
144 | 
145 | def main():
146 |     parser = argparse.ArgumentParser()
147 |     # Dataset and Algorithom
148 |     parser.add_argument('--seed', type=int, default=20159, help='random seed.')
149 |     parser.add_argument('--dataset', default='1984_author_dense', help='dateset.')
150 |     # Algorithm parameters
151 |     parser.add_argument('--alpha', type=float, default=0.2, help='alpha.')
152 |     parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.')
153 |     # Learining parameters
154 |     parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.')
155 |     parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.')
156 |     parser.add_argument('--layer', type=int, default=2, help='number of layers.')
157 |     parser.add_argument('--hidden', type=int, default=256, help='hidden dimensions.')
158 |     parser.add_argument('--dropout', type=float, default=0.1, help='dropout rate.')
159 |     parser.add_argument('--bias', default='none', help='bias.')
160 |     parser.add_argument('--epochs', type=int, default=3000, help='number of epochs.')
161 |     parser.add_argument('--batch', type=int, default=1024, help='batch size.')
162 |     parser.add_argument('--patience', type=int, default=20, help='patience.')
163 |     parser.add_argument('--dev', type=int, default=1, help='device id.')
164 |     args = parser.parse_args()
165 |     random.seed(args.seed)
166 |     np.random.seed(args.seed)
167 |     torch.manual_seed(args.seed)
168 |     torch.cuda.manual_seed(args.seed)
169 |     print("--------------------------")
170 |     print(args)
171 |     args.checkpt_file = 'pretrained/' + uuid.uuid4().hex + '.pt'
172 | 
173 |     features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg= load_aminer_init(args.dataset, args.rmax, args.alpha) #
174 |     loss_fn = torch.nn.CrossEntropyLoss()
175 |     prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args)
176 | 
177 |     print('--------------------- update ----------------------')
178 |     begin = 1985
179 |     pdb.set_trace()
180 |     for i in range(30):
181 |         py_alg.snapshot_operation('./data/aminer/' + str(begin+i) + '_coauthor_dense.txt', args.rmax, args.alpha, features)
182 |         continue
183 |         data = np.load('./data/aminer/' + str(begin+i) + '_author_dense_labels.npy')
184 |         train_labels = torch.LongTensor(data[train_idx])
185 |         val_labels = torch.LongTensor(data[val_idx])
186 |         test_labels = torch.LongTensor(data[test_idx])
187 |         train_labels = train_labels.reshape(train_labels.size(0), 1)
188 |         val_labels = val_labels.reshape(val_labels.size(0), 1)
189 |         test_labels = test_labels.reshape(test_labels.size(0), 1)
190 |         prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args)
191 | 
192 | if __name__ == '__main__':
193 |     main()
194 | 
195 | 


--------------------------------------------------------------------------------
/sbm.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import uuid
  3 | import random
  4 | import argparse
  5 | import gc
  6 | import torch
  7 | import resource
  8 | import numpy as np
  9 | import torch.nn as nn
 10 | import torch.optim as optim
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import Dataset, DataLoader
 13 | from utils import *
 14 | from model import ClassMLP
 15 | from propagation import InstantGNN
 16 | import math
 17 | import sklearn.preprocessing
 18 | 
 19 | import os
 20 | 
 21 | import psutil
 22 | import sys
 23 | import pdb
 24 | 
 25 | def train(model, device, train_loader, optimizer, loss_fn, use_pdb=False):
 26 |     model.train()
 27 | 
 28 |     time_epoch = 0
 29 |     loss_list, acc_list = [], []
 30 | 
 31 |     for i, (x, y) in enumerate(train_loader):
 32 |         t_st = time.time()
 33 |         x, y = x.cuda(device), y.cuda(device)
 34 |         optimizer.zero_grad()
 35 |         out = model(x)
 36 |         loss = F.nll_loss(out, y.squeeze(1))
 37 |         b = 0.1
 38 |         flood = (loss - b).abs() + b
 39 |         acc = com_accuracy(out, y)
 40 |         acc_list.append(acc.item())
 41 |         if use_pdb:
 42 |             pdb.set_trace(header='train')
 43 | 
 44 |         flood.backward()
 45 |         optimizer.step()
 46 |         #time_epoch += (time.time() - t_st)
 47 |         loss_list.append(loss.item())
 48 |         time_epoch += (time.time() - t_st)
 49 |     return np.mean(loss_list), np.mean(acc_list), time_epoch
 50 | 
 51 | 
 52 | @torch.no_grad()
 53 | def validate(model, device, loader, loss_fn, use_pdb=False):
 54 |     model.eval()
 55 |     loss_list, acc_list = [], []
 56 |     for i, (x, y) in enumerate(loader):
 57 |         x, y = x.cuda(device), y.cuda(device)
 58 |         out = model(x)
 59 | 
 60 |         #loss = loss_fn(out.view(y.shape), y.float())
 61 |         loss = F.nll_loss(out, y.squeeze(1))
 62 |         loss_list.append(loss.item())
 63 |         acc = com_accuracy(out, y)
 64 |         acc_list.append(acc.item())
 65 |         if use_pdb:
 66 |             pdb.set_trace(header='valid')
 67 | 
 68 |     return np.mean(loss_list), np.mean(acc_list)
 69 | 
 70 | @torch.no_grad()
 71 | def test(model, device, loader, checkpt_file, loss_fn, use_pdb=False):
 72 |     model.load_state_dict(torch.load(checkpt_file))
 73 |     model.eval()
 74 |     loss_list, acc_list = [], []
 75 |     for step, (x, y) in enumerate(loader):
 76 |         x, y = x.cuda(device), y.cuda(device)
 77 |         out = model(x)
 78 | 
 79 |         #loss = loss_fn(out.view(y.shape), y.float())
 80 |         loss = F.nll_loss(out, y.squeeze(1))
 81 |         loss_list.append(loss.item())
 82 |         acc = com_accuracy(out, y)
 83 |         acc_list.append(acc.item())
 84 | 
 85 |         if use_pdb:
 86 |             pdb.set_trace(header='test')
 87 | 
 88 |     return np.mean(loss_list), np.mean(acc_list)
 89 | 
 90 | ## load feat and generate model
 91 | def prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args,fineturn=False):
 92 |     print(args)
 93 |     scaler = sklearn.preprocessing.StandardScaler()
 94 |     scaler.fit(features)
 95 |     features = scaler.transform(features)
 96 |     
 97 |     features = torch.FloatTensor(features)
 98 | 
 99 |     features_train = features[train_idx]
100 |     features_val = features[val_idx]
101 |     features_test = features[test_idx]
102 |     del features
103 |     gc.collect()
104 | 
105 |     train_dataset = SimpleDataset(features_train, train_labels)
106 |     valid_dataset = SimpleDataset(features_val, val_labels)
107 |     test_dataset = SimpleDataset(features_test, test_labels)
108 | 
109 |     train_loader = DataLoader(train_dataset, batch_size=args.batch, shuffle=True)
110 |     valid_loader = DataLoader(valid_dataset, batch_size=len(val_labels), shuffle=False)
111 |     test_loader = DataLoader(test_dataset, batch_size=len(test_labels), shuffle=False)
112 | 
113 |     label_dim = int(max(train_labels.max(),val_labels.max(),test_labels.max()))+1
114 |     model = ClassMLP(features_train.size(-1), args.hidden, label_dim, args.layer, args.dropout).cuda(args.dev)
115 |     if fineturn:
116 |         model.load_state_dict(torch.load(args.checkpt_file))
117 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
118 | 
119 |     #### begin train
120 |     bad_counter = 0
121 |     best = 0
122 |     best_epoch = 0
123 |     train_time = 0
124 |     best_loss = 1e+8 * 1.0
125 |     model.reset_parameters()
126 |     print("--------------------------")
127 |     print("Training...")
128 |     for epoch in range(args.epochs):
129 |         loss_tra, acc_tra, train_ep = train(model, args.dev, train_loader, optimizer, loss_fn)
130 |         loss_val, acc_val = validate(model, args.dev, valid_loader, loss_fn)
131 |         train_time += train_ep
132 |         if (epoch + 1) % 2 == 0:
133 |             print(f'Epoch:{epoch + 1:02d},'
134 |                   f'Train_loss:{loss_tra:.8f}',
135 |                   f'Train_acc:{acc_tra:.5f}',
136 |                   f'Valid_loss:{loss_val:.8f}',
137 |                   f'Valid_acc:{acc_val:.5f}',
138 |                   f'Time_cost:{train_ep:.3f} / {train_time:.3f}')
139 |         if acc_val > best:
140 |             best = acc_val
141 |             best_epoch = epoch + 1
142 |             torch.save(model.state_dict(), args.checkpt_file)
143 |             bad_counter = 0
144 |         else:
145 |             bad_counter += 1
146 |         if bad_counter == args.patience:
147 |             break
148 | 
149 |     loss_test, acc_test = test(model, args.dev, test_loader, args.checkpt_file, loss_fn)
150 |     print('Load {}th epoch'.format(best_epoch))
151 |     print(f"Test loss:{loss_test:.8f}, acc:{acc_test:.5f}")
152 | 
153 | def main():
154 |     mem = psutil.virtual_memory()
155 |     initial_memory = mem.used / 1024 / 1024 / 1024
156 |     parser = argparse.ArgumentParser()
157 |     # Dataset and Algorithom
158 |     parser.add_argument('--seed', type=int, default=20159, help='random seed.')
159 |     parser.add_argument('--dataset', default='SBM-50000-50-20+1', help='dateset.')
160 |     # Algorithm parameters
161 |     parser.add_argument('--alpha', type=float, default=0.2, help='alpha for APPNP_AGP.')
162 |     parser.add_argument('--rmax', type=float, default=1e-7, help='threshold.')
163 |     # Learining parameters
164 |     parser.add_argument('--lr', type=float, default=0.0001, help='learning rate.')
165 |     parser.add_argument('--weight_decay', type=float, default=0, help='weight decay.')
166 |     parser.add_argument('--layer', type=int, default=2, help='number of layers.')
167 |     parser.add_argument('--hidden', type=int, default=256, help='hidden dimensions.')
168 |     parser.add_argument('--dropout', type=float, default=0.1, help='dropout rate.')
169 |     parser.add_argument('--bias', default='none', help='bias.')
170 |     parser.add_argument('--epochs', type=int, default=3000, help='number of epochs.')
171 |     parser.add_argument('--batch', type=int, default=1024, help='batch size.')
172 |     parser.add_argument('--patience', type=int, default=20, help='patience.')
173 |     parser.add_argument('--dev', type=int, default=1, help='device id.')
174 |     args = parser.parse_args()
175 |     random.seed(args.seed)
176 |     np.random.seed(args.seed)
177 |     torch.manual_seed(args.seed)
178 |     torch.cuda.manual_seed(args.seed)
179 |     print("--------------------------")
180 |     print(args)
181 |     args.checkpt_file = 'pretrained/' + uuid.uuid4().hex + '.pt'
182 | 
183 |     features, train_labels, val_labels, test_labels, train_idx, val_idx, test_idx, memory_dataset, py_alg= load_sbm_init(args.dataset, args.rmax, args.alpha)
184 |     mem = psutil.virtual_memory()
185 |     memory_cost = mem.used / 1024 / 1024 / 1024 - initial_memory
186 |     print('load_init cost: ', memory_cost)
187 | 
188 |     loss_fn = torch.nn.CrossEntropyLoss()
189 |     prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args)
190 | 
191 |     print('--------------------- update ----------------------')
192 |     for i in range(10):
193 |         py_alg.snapshot_operation('./data/'+args.dataset+'/'+args.dataset+'_Edgeupdate_snap'+str(i)+'.txt', args.rmax, args.alpha, features)
194 |         mem = psutil.virtual_memory()
195 |         memory_cost = mem.used / 1024 / 1024 / 1024 - initial_memory
196 |         print('snapshot_operation ' + str(i) + ' cost: ', memory_cost)
197 |         data = np.loadtxt('./data/'+args.dataset+'/'+args.dataset+'_label_snap'+str(i)+'.txt')
198 |         train_labels = torch.LongTensor(data[train_idx])
199 |         val_labels = torch.LongTensor(data[val_idx])
200 |         test_labels = torch.LongTensor(data[test_idx])
201 |         train_labels = train_labels.reshape(train_labels.size(0), 1)
202 |         val_labels = val_labels.reshape(val_labels.size(0), 1)
203 |         test_labels = test_labels.reshape(test_labels.size(0), 1)
204 | 
205 |         prepare_to_train(features, train_idx, val_idx, test_idx, train_labels, val_labels, test_labels, loss_fn, args)
206 | 
207 | if __name__ == '__main__':
208 |     main()
209 | 
210 | 
211 | 


--------------------------------------------------------------------------------
/convert/gen_SBM.cpp:
--------------------------------------------------------------------------------
  1 | #include <vector>
  2 | #include <stdio.h>
  3 | #include <iostream>
  4 | #include <string>
  5 | #include <sstream>
  6 | #include <fstream>
  7 | #include <algorithm>
  8 | #include <functional>
  9 | #include <time.h>
 10 | #include <vector>
 11 | #include <string.h>
 12 | 
 13 | using namespace std;
 14 | 
 15 | bool cmp(const int& a,const int&b){
 16 | 	return a<b;
 17 | }
 18 | 
 19 | //Check parameters
 20 | long check_inc(long i, long max) {
 21 |     if (i == max) {
 22 |         //usage();
 23 |         cout<<"i==max"<<endl;
 24 | 	exit(1);
 25 |     }
 26 |     return i + 1;
 27 | }
 28 | 
 29 | void gen_snap(uint vert, int cluster,int mean_degree, int in_degree, int out_degree, uint changeNum, int snap, vector<vector<uint>>& Adj, vector<vector<uint>>& out_Adj,int* clusterID){
 30 |     stringstream dy_out;
 31 |     dy_out<<"../data/SBM-"<<vert<<"-"<<cluster<<"-"<<in_degree<<"+"<<out_degree<<"_Edgeupdate_snap"<<snap<<".txt";
 32 |     cout<<dy_out.str()<<endl;
 33 |     
 34 |     ofstream fdy;
 35 |     fdy.open(dy_out.str());
 36 |     if(!fdy){
 37 |         cout<<"ERROR:can not open out file"<<endl;
 38 |         return;
 39 |     }
 40 |     //dynamic change
 41 |     vector<uint> ChangeNodes;
 42 |     for(uint i=0; i<changeNum; i++){
 43 |         uint change_node=rand()%vert;
 44 |         while( find(ChangeNodes.begin(),ChangeNodes.end(),change_node)!=ChangeNodes.end() ){
 45 |             change_node=rand()%vert;
 46 |         }
 47 |         ChangeNodes.push_back(change_node);
 48 |         cout<<"change_node: "<< change_node << "; ori-com: "<<clusterID[change_node]<<endl;
 49 |         
 50 |         int new_comm = rand()%cluster;
 51 |         while(new_comm == clusterID[change_node]){
 52 |             new_comm = rand()%cluster;
 53 |         }
 54 |         clusterID[change_node] = new_comm;
 55 |         cout<<"change to com: "<<clusterID[change_node]<<endl;
 56 |         
 57 |         int new_in_degree = in_degree;
 58 |         int new_out_degree = out_degree;
 59 |         int drop_degree = Adj[change_node].size();
 60 |         cout<<"drop_degree:"<<drop_degree<<endl;
 61 |         for(int j=0; j<drop_degree; j++){
 62 |             int tmp_index=rand()%Adj[change_node].size();
 63 |             uint tmp_node=Adj[change_node][tmp_index];
 64 |             Adj[change_node].erase(Adj[change_node].begin() + tmp_index);
 65 |             vector<uint>::iterator itr;
 66 |             itr=find(Adj[tmp_node].begin(),Adj[tmp_node].end(), change_node);
 67 |             int idx=distance(Adj[tmp_node].begin(), itr);
 68 |             Adj[tmp_node].erase(itr);
 69 |             
 70 |             fdy<<change_node<<" "<<tmp_node<<"\n";
 71 |             fdy<<tmp_node<<" "<<change_node<<"\n";
 72 |         }
 73 |         int add_degree = new_in_degree;
 74 |         
 75 |         int rd_seed = rand()%10;
 76 |         if(rd_seed<=6)
 77 |             add_degree = mean_degree;
 78 |         else
 79 |             add_degree = mean_degree + 1;
 80 |         
 81 |         int dd=0; // find edges in out_adj but have some label with change_node's new_label and delete
 82 |         for(int j=0; j<out_Adj[change_node].size(); j++){
 83 |             int old_out_neibor = out_Adj[change_node][j];
 84 |             if(clusterID[old_out_neibor] == clusterID[change_node]){
 85 |                 dd += 1;
 86 |                 //delete 
 87 |                 vector<uint>::iterator itr;
 88 |                 itr=find(out_Adj[change_node].begin(),out_Adj[change_node].end(), old_out_neibor);
 89 |                 out_Adj[change_node].erase(itr);
 90 |                 itr=find(out_Adj[old_out_neibor].begin(),out_Adj[old_out_neibor].end(), change_node);
 91 |                 out_Adj[old_out_neibor].erase(itr);
 92 |             }
 93 |         }
 94 |         if(dd>0){
 95 |             for(int j=0; j<dd; j++){
 96 |                 uint tmp_node=rand()%vert;
 97 |                 while(clusterID[tmp_node]==clusterID[change_node] || find(out_Adj[change_node].begin(),out_Adj[change_node].end(),tmp_node)!=out_Adj[change_node].end() ||tmp_node==change_node){
 98 |                     tmp_node=rand()%vert;
 99 |                 }
100 |                 //cout<<"tmp_node="<<tmp_node<<endl;
101 |                 if( find(out_Adj[tmp_node].begin(),out_Adj[tmp_node].end(),change_node)==out_Adj[tmp_node].end() ){
102 |                     out_Adj[change_node].push_back(tmp_node);
103 |                     out_Adj[tmp_node].push_back(change_node);
104 |                 }else{
105 |                     cout<< "!!!!!!!!!!!!!Error----dd>0/for" <<endl;
106 |                 }
107 |                 fdy<<change_node<<" "<<tmp_node<<"\n";
108 |                 fdy<<tmp_node<<" "<<change_node<<"\n";
109 |             }
110 |         }
111 |         cout<<"add_degree:"<<add_degree<<endl;
112 |         for(int j=0; j<add_degree; j++){
113 |             uint tmp_node=rand()%vert;
114 |             while(clusterID[tmp_node]!=clusterID[change_node] || tmp_node==change_node || find(Adj[change_node].begin(),Adj[change_node].end(),tmp_node)!=Adj[change_node].end()){
115 |                 tmp_node=rand()%vert;
116 |             }
117 |             
118 |             Adj[change_node].push_back(tmp_node);
119 |             fdy<<change_node<<" "<<tmp_node<<"\n";
120 |             if( find(out_Adj[tmp_node].begin(),out_Adj[tmp_node].end(),change_node)==out_Adj[tmp_node].end() && find(Adj[tmp_node].begin(),Adj[tmp_node].end(),change_node)==Adj[tmp_node].end() ){
121 |                 Adj[tmp_node].push_back(change_node);
122 |                 fdy<<tmp_node<<" "<< change_node <<"\n";
123 |             }else{
124 |                 cout<<"!!!!!!!!!!!Error----edge is already exist"<<endl;
125 |                 return;
126 |             }
127 |         }
128 |     }
129 |     fdy.close();
130 |     stringstream label_out2;
131 |     label_out2<<"../data/SBM-"<<vert<<"-"<<cluster<<"-"<<in_degree<<"+"<<out_degree<<"_label_snap"<<snap<<".txt";
132 |     cout<<label_out2.str()<<endl;
133 |     ofstream f2;
134 |     f2.open(label_out2.str());
135 |     for(uint i=0;i<vert;i++){
136 |         f2<<clusterID[i]<<"\n";
137 |     }
138 |     f2.close();
139 |     
140 |     stringstream new_out;
141 |     new_out<<"../data/SBM-"<<vert<<"-"<<cluster<<"-"<<in_degree<<"+"<<out_degree<<"_snap"<<snap<<".txt";
142 |     cout<<new_out.str()<<endl;
143 | 
144 |     ofstream fsnap;
145 |     fsnap.open(new_out.str());
146 |     if(!fsnap){
147 |         cout<<"ERROR:can not open out file"<<endl;
148 |         return;
149 |     }
150 |     //save edge list
151 |     for(uint j=0; j<vert; j++){  
152 |         sort(Adj[j].begin(),Adj[j].end(),cmp);
153 |         for(int k=0; k<Adj[j].size(); k++){
154 |             fsnap<<j<<" "<<Adj[j][k]<<"\n";
155 |         }
156 |         sort(out_Adj[j].begin(),out_Adj[j].end(),cmp);
157 |         for(int k=0; k<out_Adj[j].size(); k++){
158 |             fsnap<<j<<" "<<out_Adj[j][k]<<"\n";
159 |         }
160 |     }
161 |     fsnap.close();
162 | }
163 | 
164 | int main(int argc,char **argv){
165 |     //srand(time(NULL));
166 |     srand(20);
167 |     char *endptr;
168 |     uint vert=10000;
169 |     int cluster=2;
170 |     double in_com=1;
171 |     double between_com=1;
172 |     int in_degree = 20;
173 |     int out_degree = 5;
174 |     uint changeNum = 5;
175 |     int snapeNum=0; 
176 |     int i=1;
177 |     while (i < argc) {
178 |         if (!strcmp(argv[i], "-n")) {
179 |             i = check_inc(i, argc);
180 |             vert = strtod(argv[i],&endptr);
181 |         } else if (!strcmp(argv[i], "-c")) {
182 |             i = check_inc(i, argc);
183 |             cluster = strtod(argv[i],&endptr);
184 |         } else if (!strcmp(argv[i], "-ind")) {
185 |             i = check_inc(i, argc);
186 |             in_degree = strtod(argv[i],&endptr);
187 |         } else if (!strcmp(argv[i], "-outd")) {
188 |             i = check_inc(i, argc);
189 |             out_degree = strtod(argv[i], &endptr);
190 |         } else if (!strcmp(argv[i], "-inp")) {
191 |             i = check_inc(i, argc);
192 |             in_com = strtod(argv[i],&endptr);
193 |         } else if (!strcmp(argv[i], "-outp")) {
194 |             i = check_inc(i, argc);
195 |             between_com = strtod(argv[i], &endptr);
196 |         }else if (!strcmp(argv[i], "-change")) {
197 |             i = check_inc(i, argc);
198 |             changeNum = strtod(argv[i], &endptr);
199 |         }else if (!strcmp(argv[i], "-snap")) {
200 |             i = check_inc(i, argc);
201 |             snapeNum = strtod(argv[i], &endptr);
202 |         } else {
203 |             cout<<"ERROR parameter!!!"<<endl;
204 |             exit(1);
205 |         }
206 |         i++;
207 |     }
208 | 
209 |     uint N_perCluster=vert/cluster;
210 |     if(in_com<1 && between_com<1){
211 |         in_degree = in_com*N_perCluster;
212 |         out_degree = between_com*N_perCluster;
213 |     }
214 |     
215 |     cout<<"vert="<<vert<<endl;
216 |     cout<<"cluster="<<cluster<<endl;
217 |     cout<<"in_com="<<in_com<<endl;
218 |     cout<<"between_com="<<between_com<<endl;
219 |     cout<<"in_degree="<<in_degree<<endl;
220 |     cout<<"out_degree="<<out_degree<<endl;
221 |     cout<<"N_perCluster="<<N_perCluster<<endl;
222 |     cout<<"snapNum="<<snapeNum<<endl;
223 | 
224 |     int *clusterID=new int[vert];
225 |     for(uint i=0;i<vert;i++){
226 |         int clusterFlag=i/N_perCluster;
227 |         if(clusterFlag>=cluster){
228 |             clusterFlag=cluster-1;
229 |         }
230 |         clusterID[i]=clusterFlag;
231 |     }
232 | 
233 |     stringstream label_out;
234 |     label_out<<"../data/SBM-"<<vert<<"-"<<cluster<<"-"<<in_degree<<"+"<<out_degree<<"_label.txt";
235 |     cout<<label_out.str()<<endl;
236 |     ofstream f1;
237 |     f1.open(label_out.str());
238 |     for(uint i=0;i<vert;i++){
239 |         f1<<clusterID[i]<<"\n";
240 |     }
241 |     f1.close();
242 | 
243 |     stringstream ss_out;
244 |     ss_out<<"../data/SBM-"<<vert<<"-"<<cluster<<"-"<<in_degree<<"+"<<out_degree<<"_init.txt";
245 |     cout<<ss_out.str()<<endl;
246 | 
247 |     ofstream fout;
248 |     fout.open(ss_out.str());
249 |     if(!fout){
250 |         cout<<"ERROR:can not open out file"<<endl;
251 |         return 0;
252 |     }
253 |     vector<vector<uint>> Adj;
254 |     vector<vector<uint>> out_Adj;
255 |     vector<uint> random_w = vector<uint>(vert);
256 | 
257 |     for (uint i = 0; i < vert; i++)
258 |     {
259 |         vector<uint> templst;
260 |         Adj.push_back(templst);
261 |         out_Adj.push_back(templst);
262 |         random_w[i] = i;
263 |     }
264 |     random_shuffle(random_w.begin(),random_w.end());
265 |     
266 |     for(uint i=0;i<vert;i++){
267 |         uint w = random_w[i];
268 |         int di = Adj[w].size();
269 |         for(int j=0;j<(in_degree - di);j++){
270 |             uint tmp_node=rand()%N_perCluster;
271 |             tmp_node+=clusterID[w]*N_perCluster;
272 |             while(find(Adj[w].begin(),Adj[w].end(),tmp_node)!=Adj[w].end() || tmp_node==w )
273 |             {
274 |                 tmp_node=rand()%N_perCluster;
275 |                 tmp_node+=clusterID[w]*N_perCluster;
276 |             }
277 | 
278 |             Adj[w].push_back(tmp_node);
279 |             if( find(Adj[tmp_node].begin(),Adj[tmp_node].end(),w)==Adj[tmp_node].end() ){
280 |                 Adj[tmp_node].push_back(w);
281 |             }
282 | 
283 |         }
284 |         for(int j=0;j<out_degree;j++){
285 |             uint tmp_node=rand()%vert;
286 |             while(clusterID[tmp_node]==clusterID[w]){
287 |                 tmp_node=rand()%vert;
288 |             }
289 |             if(find(out_Adj[w].begin(),out_Adj[w].end(),tmp_node)==out_Adj[w].end() && tmp_node!=w ){
290 |                 out_Adj[w].push_back(tmp_node);
291 |                 if( find(out_Adj[tmp_node].begin(),out_Adj[tmp_node].end(),w)==out_Adj[tmp_node].end() ){
292 |                     out_Adj[tmp_node].push_back(w);
293 |                 }
294 |             }
295 |         }
296 | 
297 |     }
298 |     int edges = 0;
299 |     for(uint j=0; j<vert; j++){  //init
300 |         sort(Adj[j].begin(),Adj[j].end(),cmp);
301 |         for(int k=0; k<Adj[j].size(); k++){
302 |             fout<<j<<" "<<Adj[j][k]<<"\n";
303 |             edges += 1;
304 |         }
305 |         sort(out_Adj[j].begin(),out_Adj[j].end(),cmp);
306 |         for(int k=0; k<out_Adj[j].size(); k++){
307 |             fout<<j<<" "<<out_Adj[j][k]<<"\n";
308 |         }
309 |     }
310 |     fout.close();
311 |     
312 |     int mean_degree = edges / vert;
313 |     cout << "m=" << edges << ", mean_degree=" << mean_degree << endl;
314 |     for(int i=0; i<snapeNum;i++){
315 |         gen_snap(vert,cluster,mean_degree,in_degree,out_degree,changeNum,i,Adj,out_Adj,clusterID);
316 |     }
317 |     
318 |     delete[] clusterID;
319 |     return 0;
320 | }
321 | 
322 | 
323 | 
324 | 


--------------------------------------------------------------------------------
/convert/convert_ogb.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from tqdm import tqdm
  3 | import numpy as np
  4 | import random
  5 | import scipy.sparse as sp
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from torch_sparse import SparseTensor
  9 | from torch_geometric.utils import to_undirected
 10 | from ogb.nodeproppred import PygNodePropPredDataset
 11 | import sklearn.preprocessing
 12 | import tracemalloc
 13 | import gc
 14 | import struct
 15 | from torch_sparse import coalesce
 16 | import math
 17 | import pdb
 18 | import time
 19 | 
 20 | np.random.seed(0)
 21 | random.seed(0)
 22 | torch.manual_seed(0)
 23 |     
 24 | def dropout_adj(edge_index, rmnode_idx, edge_attr=None, force_undirected=True,
 25 |                 num_nodes=None):
 26 | 
 27 |     N = int(edge_index.max()) + 1 if num_nodes is None else num_nodes
 28 |     row, col = edge_index
 29 |     
 30 |     if force_undirected:
 31 |         row, col, edge_attr = filter_adj(row, col, edge_attr, row < col)
 32 |     convert_start = time.time()
 33 |     row_convert = row.numpy().tolist()
 34 |     col_convert = col.numpy().tolist()
 35 |     convert_end = time.time()
 36 |     print('convert cost:', convert_end - convert_start)
 37 | 
 38 |     row_mask = np.isin(row, rmnode_idx)
 39 |     col_mask = np.isin(col, rmnode_idx)
 40 |     drop_mask = torch.from_numpy(np.logical_or(row_mask, col_mask)).to(torch.bool)
 41 | 
 42 |     mask = ~drop_mask
 43 | 
 44 |     new_row, new_col, edge_attr = filter_adj(row, col, edge_attr, mask)
 45 |     drop_row, drop_col, edge_attr = filter_adj(row, col, edge_attr, drop_mask)
 46 |     print('init:',len(new_row), ', drop:', len(drop_row))
 47 | 
 48 |     if force_undirected:
 49 |         edge_index = torch.stack(
 50 |             [torch.cat([new_row, new_col], dim=0),
 51 |              torch.cat([new_col, new_row], dim=0)], dim=0)
 52 |         if edge_attr is not None:
 53 |             edge_attr = torch.cat([edge_attr, edge_attr], dim=0)
 54 |         edge_index, edge_attr = coalesce(edge_index, edge_attr, N, N)
 55 |     else:
 56 |         edge_index = torch.stack([new_row, new_col], dim=0)
 57 |     drop_edge_index = torch.stack([drop_row, drop_col], dim=0)  ### only u->v (no v->u)
 58 | 
 59 |     return edge_index, drop_edge_index, edge_attr
 60 | 
 61 | def filter_adj(row, col, edge_attr, mask):
 62 |     return row[mask], col[mask], None if edge_attr is None else edge_attr[mask]
 63 | 
 64 | def arxiv():
 65 |     dataset=PygNodePropPredDataset(name='ogbn-arxiv')
 66 |     data = dataset[0]
 67 |     split_idx = dataset.get_idx_split()
 68 |     train_idx, val_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test']
 69 |     all_idx = torch.cat([train_idx, val_idx, test_idx])
 70 | 
 71 |     feat=data.x.numpy()
 72 |     feat=np.array(feat,dtype=np.float64)
 73 |     scaler = sklearn.preprocessing.StandardScaler()
 74 |     scaler.fit(feat)
 75 |     feat = scaler.transform(feat)
 76 |     np.save('./data/arxiv/arxiv_feat.npy',feat)
 77 |     
 78 |     #get labels
 79 |     labels=data.y
 80 |     train_labels=labels.data[train_idx]
 81 |     val_labels=labels.data[val_idx]
 82 |     test_labels=labels.data[test_idx]
 83 | 
 84 |     train_idx=train_idx.numpy()
 85 |     val_idx=val_idx.numpy()
 86 |     test_idx=test_idx.numpy()
 87 |     train_idx=np.array(train_idx, dtype=np.int32)
 88 |     val_idx=np.array(val_idx,dtype=np.int32)
 89 |     test_idx=np.array(test_idx,dtype=np.int32)
 90 | 
 91 |     train_labels=train_labels.numpy().T
 92 |     val_labels=val_labels.numpy().T
 93 |     test_labels=test_labels.numpy().T
 94 | 
 95 |     train_labels=np.array(train_labels,dtype=np.int32)
 96 |     val_labels=np.array(val_labels,dtype=np.int32)
 97 |     test_labels=np.array(test_labels,dtype=np.int32)
 98 |     train_labels=train_labels.reshape(train_labels.shape[1])
 99 |     val_labels=val_labels.reshape(val_labels.shape[1])
100 |     test_labels=test_labels.reshape(test_labels.shape[1])
101 |     np.savez('./data/arxiv/arxiv_labels.npz',train_idx=train_idx,val_idx=val_idx,test_idx=test_idx,train_labels=train_labels,val_labels=val_labels,test_labels=test_labels)
102 | 
103 |     data.edge_index = to_undirected(data.edge_index, data.num_nodes)
104 |     data.edge_index, drop_edge_index, _ = dropout_adj(data.edge_index,train_idx, num_nodes= data.num_nodes)
105 |     data.edge_index = to_undirected(data.edge_index, data.num_nodes)
106 |     
107 |     row_drop, col_drop = np.array(drop_edge_index)
108 | 
109 |     f = open('./data/arxiv/ogbn-arxiv_update_full.txt', 'w+')
110 |     for k in range(row_drop.shape[0]):
111 |         v_from = row_drop[k]
112 |         v_to = col_drop[k]
113 |         f.write('%d %d\n' % (v_from, v_to))
114 |         f.write('%d %d\n' % (v_to, v_from))
115 |     f.close()
116 |     
117 |     row,col=data.edge_index
118 |     print(row_drop.shape)
119 |     row=row.numpy()
120 |     col=col.numpy()
121 |     
122 |     save_adj(row, col, N=data.num_nodes, dataset_name='arxiv', savename='arxiv_init', snap='init')
123 |     num_snap = 16
124 |     snapshot = math.floor(row_drop.shape[0] / num_snap)
125 |     print('num_snap: ', num_snap)
126 | 
127 |     for sn in range(num_snap):
128 |         print(sn)
129 |         row_sn = row_drop[ sn*snapshot : (sn+1)*snapshot ]
130 |         col_sn = col_drop[ sn*snapshot : (sn+1)*snapshot ]
131 |         if sn == 0:
132 |             row_tmp=row
133 |             col_tmp=col
134 |         
135 |         row_tmp=np.concatenate((row_tmp,row_sn))
136 |         col_tmp=np.concatenate((col_tmp,col_sn))
137 |         row_tmp=np.concatenate((row_tmp,col_sn))
138 |         col_tmp=np.concatenate((col_tmp,row_sn))
139 |         if (sn+1) % 20 ==0 or (sn+1)==num_snap:
140 |             save_adj(row_tmp, col_tmp, N=data.num_nodes, dataset_name='arxiv', savename='arxiv_snap'+str(sn+1), snap=(sn+1)) 
141 |         
142 |         with open('./data/arxiv/arxiv_Edgeupdate_snap' + str(sn+1) + '.txt', 'w') as f:
143 |             for i, j in zip(row_sn, col_sn):
144 |                 f.write("%d %d\n" % (i, j))
145 |                 f.write("%d %d\n" % (j, i))
146 |     print('Arxiv -- save snapshots finish')
147 | 
148 | def products():
149 |     dataset=PygNodePropPredDataset(name='ogbn-products')
150 |     data = dataset[0]
151 |     split_idx = dataset.get_idx_split()
152 |     train_idx, val_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test']
153 |     all_idx = torch.cat([train_idx, val_idx, test_idx])
154 |     
155 |     #save feat
156 |     feat=data.x.numpy()
157 |     feat=np.array(feat,dtype=np.float64)
158 |     scaler = sklearn.preprocessing.StandardScaler()
159 |     scaler.fit(feat)
160 |     feat = scaler.transform(feat)
161 |     np.save('./data/products/products_feat.npy',feat)
162 | 
163 |     #get labels
164 |     print("save labels.....")
165 |     train_idx, val_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test']
166 |     
167 |     labels=data.y
168 |     train_labels=labels.data[train_idx]
169 |     val_labels=labels.data[val_idx]
170 |     test_labels=labels.data[test_idx]
171 | 
172 |     train_idx=train_idx.numpy()
173 |     val_idx=val_idx.numpy()
174 |     test_idx=test_idx.numpy()
175 |     train_idx=np.array(train_idx, dtype=np.int32)
176 |     val_idx=np.array(val_idx,dtype=np.int32)
177 |     test_idx=np.array(test_idx,dtype=np.int32)
178 | 
179 |     train_labels=train_labels.numpy().T
180 |     val_labels=val_labels.numpy().T
181 |     test_labels=test_labels.numpy().T
182 | 
183 |     train_labels=np.array(train_labels,dtype=np.int32)
184 |     val_labels=np.array(val_labels,dtype=np.int32)
185 |     test_labels=np.array(test_labels,dtype=np.int32)
186 |     train_labels=train_labels.reshape(train_labels.shape[1])
187 |     val_labels=val_labels.reshape(val_labels.shape[1])
188 |     test_labels=test_labels.reshape(test_labels.shape[1])
189 |     np.savez('./data/products/products_labels.npz',train_idx=train_idx,val_idx=val_idx,test_idx=test_idx,train_labels=train_labels,val_labels=val_labels,test_labels=test_labels)
190 |     
191 |     data.edge_index = to_undirected(data.edge_index, data.num_nodes)
192 |     data.edge_index, drop_edge_index, _ = dropout_adj(data.edge_index,train_idx, num_nodes= data.num_nodes)
193 |     
194 |     data.edge_index = to_undirected(data.edge_index, data.num_nodes)
195 |     
196 |     row_drop, col_drop = np.array(drop_edge_index)
197 |     print('row_drop.shape:', row_drop.shape)
198 |     f = open('./data/products/ogbn-products_update_full.txt', 'w+')
199 |     for k in range(row_drop.shape[0]):
200 |         v_from = row_drop[k]
201 |         v_to = col_drop[k]
202 |         f.write('%d %d\n' % (v_from, v_to))
203 |         f.write('%d %d\n' % (v_to, v_from))
204 |     f.close()
205 |     
206 |     row,col=data.edge_index
207 |     row=row.numpy()
208 |     col=col.numpy()
209 |     save_adj(row, col, N=data.num_nodes, dataset_name='products', savename='products_init', snap='init')
210 |     num_snap = 15
211 |     snapshot = math.floor(row_drop.shape[0] / num_snap)
212 |     print('num_snap: ', num_snap)
213 | 
214 |     for sn in range(num_snap):
215 |         print(sn)
216 |         row_sn = row_drop[ sn*snapshot : (sn+1)*snapshot ]
217 |         col_sn = col_drop[ sn*snapshot : (sn+1)*snapshot ]
218 |         if sn == 0:
219 |             row_tmp=row
220 |             col_tmp=col
221 |         
222 |         row_tmp=np.concatenate((row_tmp,row_sn))
223 |         col_tmp=np.concatenate((col_tmp,col_sn))
224 |         row_tmp=np.concatenate((row_tmp,col_sn))
225 |         col_tmp=np.concatenate((col_tmp,row_sn))
226 |         
227 |         save_adj(row_tmp, col_tmp, N=data.num_nodes, dataset_name='products', savename='products_snap'+str(sn+1), snap=(sn+1))
228 |         
229 |         with open('./data/products/products_Edgeupdate_snap' + str(sn+1) + '.txt', 'w') as f:
230 |             for i, j in zip(row_sn, col_sn):
231 |                 f.write("%d %d\n" % (i, j))
232 |                 f.write("%d %d\n" % (j, i))
233 |     print('Products -- save snapshots finish')
234 | 
235 | def papers100M():
236 |     s_time = time.time()
237 |     dataset=PygNodePropPredDataset("ogbn-papers100M")
238 |     split_idx = dataset.get_idx_split()
239 |     data = dataset[0]
240 | 
241 |     feat=data.x.numpy()
242 |     feat=np.array(feat,dtype=np.float64)
243 | 
244 |     #normalize feats
245 |     scaler = sklearn.preprocessing.StandardScaler()
246 |     scaler.fit(feat)
247 |     feat = scaler.transform(feat)
248 | 
249 |     #save feats
250 |     np.save('./data/papers100M/papers100M_feat.npy',feat)
251 |     del feat
252 |     gc.collect()
253 | 
254 |     #get labels
255 |     train_idx, val_idx, test_idx = split_idx['train'], split_idx['valid'], split_idx['test']
256 |     all_idx = torch.cat([train_idx, val_idx, test_idx])
257 |     
258 |     labels=data.y
259 |     train_labels=labels.data[train_idx]
260 |     val_labels=labels.data[val_idx]
261 |     test_labels=labels.data[test_idx]
262 | 
263 |     train_idx=train_idx.numpy()
264 |     val_idx=val_idx.numpy()
265 |     test_idx=test_idx.numpy()
266 |     train_idx=np.array(train_idx, dtype=np.int32)
267 |     val_idx=np.array(val_idx,dtype=np.int32)
268 |     test_idx=np.array(test_idx,dtype=np.int32)
269 | 
270 |     train_labels=train_labels.numpy().T
271 |     val_labels=val_labels.numpy().T
272 |     test_labels=test_labels.numpy().T
273 | 
274 |     train_labels=np.array(train_labels,dtype=np.int32)
275 |     val_labels=np.array(val_labels,dtype=np.int32)
276 |     test_labels=np.array(test_labels,dtype=np.int32)
277 |     train_labels=train_labels.reshape(train_labels.shape[1])
278 |     val_labels=val_labels.reshape(val_labels.shape[1])
279 |     test_labels=test_labels.reshape(test_labels.shape[1])
280 |     np.savez('./data/papers100M/papers100M_labels.npz',train_idx=train_idx,val_idx=val_idx,test_idx=test_idx,train_labels=train_labels,val_labels=val_labels,test_labels=test_labels)
281 | 
282 |     print('making the graph undirected')
283 |     data.edge_index=to_undirected(data.edge_index,data.num_nodes)
284 |     print("process finished cost:", time.time() - s_time)
285 |     
286 |     data.edge_index, drop_edge_index, _ = dropout_adj(data.edge_index, train_idx, num_nodes= data.num_nodes)
287 |     data.edge_index = to_undirected(data.edge_index, data.num_nodes)
288 |     
289 |     row_drop, col_drop = np.array(drop_edge_index)
290 |     row,col=data.edge_index
291 |     save_adj(row, col, N=data.num_nodes, dataset_name='papers100M', savename='papers100M_init', snap='init')
292 |     row=row.numpy()
293 |     col=col.numpy()
294 |     num_snap = 20
295 |     print('num_snap: ',num_snap)
296 |     snapshot = math.floor(row_drop.shape[0] / num_snap)
297 | 
298 |     for sn in range(num_snap):
299 |         st=sn+1
300 |         print('snap:', st)
301 | 
302 |         row_sn = row_drop[ sn*snapshot : st*snapshot ]
303 |         col_sn = col_drop[ sn*snapshot : st*snapshot ]
304 |         if sn == 0:
305 |             row_tmp=row
306 |             col_tmp=col
307 |         row_tmp=np.concatenate((row_tmp,row_sn))
308 |         col_tmp=np.concatenate((col_tmp,col_sn))
309 |         row_tmp=np.concatenate((row_tmp,col_sn))
310 |         col_tmp=np.concatenate((col_tmp,row_sn))
311 | 
312 |         #save_adj(row_tmp, col_tmp, N=data.num_nodes, dataset_name='papers100M', savename='papers100M_snap'+str(st), snap=st)
313 | 
314 |         with open('./data/papers100M/papers100M_Edgeupdate_snap' + str(st) + '.txt', 'w') as f:
315 |             for i, j in zip(row_sn, col_sn):
316 |                 f.write("%d %d\n" % (i, j))
317 |                 f.write("%d %d\n" % (j, i))
318 |     print('Papers100M -- save snapshots finish')
319 | 
320 | def save_adj(row, col, N, dataset_name, savename, snap, full=False):
321 |     adj=sp.csr_matrix((np.ones(row.shape[0]),(row,col)),shape=(N,N))
322 |     adj=adj+sp.eye(adj.shape[0])
323 |     print('snap:',snap,', edge:',adj.nnz)
324 |     save_path='./data/'+ dataset_name +'/'
325 | 
326 |     EL=adj.indices
327 |     PL=adj.indptr
328 | 
329 |     del adj
330 |     gc.collect()
331 | 
332 |     EL=np.array(EL,dtype=np.uint32)
333 |     PL=np.array(PL,dtype=np.uint32)
334 |     EL_re=[]
335 | 
336 |     for i in range(1,PL.shape[0]):
337 |         EL_re+=sorted(EL[PL[i-1]:PL[i]],key=lambda x:PL[x+1]-PL[x])
338 |     EL_re=np.asarray(EL_re,dtype=np.uint32)
339 | 
340 |     #save graph
341 |     f1=open(save_path+savename+'_adj_el.txt','wb')
342 |     for i in EL_re:
343 |         m=struct.pack('I',i)
344 |         f1.write(m)
345 |                 
346 |     f1.close()
347 | 
348 |     f2=open(save_path+savename+'_adj_pl.txt','wb')
349 |     for i in PL:
350 |         m=struct.pack('I',i)
351 |         f2.write(m)
352 |     f2.close()
353 |     del EL
354 |     del PL
355 |     del EL_re
356 |     gc.collect()
357 | 
358 | if __name__ == "__main__":
359 |     #papers100M()
360 |     #products()
361 |     arxiv()
362 | 


--------------------------------------------------------------------------------
/instantAlg_arxiv.cpp:
--------------------------------------------------------------------------------
  1 | #include "instantAlg.h"
  2 | #include "Graph.h"
  3 | 
  4 | using namespace std;
  5 | using namespace Eigen;
  6 | 
  7 | namespace propagation
  8 | {
  9 | vector<vector<uint>> Instantgnn::update_graph(string updatefilename, vector<uint>&affected_nodelst, vector<vector<uint>>&delete_neighbors) // vector<vector<uint>>&add_adjs
 10 | {
 11 |     ifstream infile(updatefilename.c_str());
 12 |     //cout<<"updating graph " << updatefilename <<endl;
 13 |     uint v_from, v_to;
 14 |     int insertFLAG = 0;
 15 | 
 16 |     vector<vector<uint>> new_neighbors(vert);
 17 |     vector<bool> isAffected(vert, false);
 18 |     while (infile >> v_from >> v_to)
 19 |     {
 20 |         insertFLAG = g.isEdgeExist(v_from, v_to);
 21 |         
 22 |         // update graph
 23 |         if(!isAffected[v_from]){
 24 |             affected_nodelst.push_back(v_from);
 25 |             isAffected[v_from] = true;
 26 |         }
 27 |         
 28 |         if(insertFLAG == 1){
 29 |             g.insertEdge(v_from, v_to);
 30 |             new_neighbors[v_from].push_back(v_to);
 31 |         }
 32 |         else if(insertFLAG == -1){
 33 |             cout<<"delete......"<<endl;
 34 |             g.deleteEdge(v_from, v_to);
 35 |             delete_neighbors[v_from].push_back(v_to);
 36 |         }
 37 |     }
 38 |     infile.close();
 39 |     cout<<"update graph finish..."<<"affected_nodelst.size():"<<affected_nodelst.size()<<endl;
 40 | 
 41 |     return new_neighbors;
 42 | }
 43 | 
 44 | //batch_update
 45 | void Instantgnn::snapshot_operation(string updatefilename, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat)
 46 | {
 47 |     alpha=alphaa;
 48 |     rmax=rmaxx;
 49 | 
 50 |     vector<queue<uint>> candidate_sets(dimension);
 51 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
 52 |     vector<bool> isUpdateW(dimension, false);
 53 | 
 54 |     clock_t start_t, end_t;
 55 |     start_t = clock();
 56 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
 57 |     
 58 |     //update graph, obtain affected node_list
 59 |     vector<uint> affected_nodelst;
 60 | 
 61 |     vector<vector<uint>> delete_neighbors(vert);
 62 |     vector<vector<uint>> add_neighbors(vert);
 63 | 
 64 |     add_neighbors = update_graph(updatefilename, affected_nodelst, delete_neighbors);
 65 |     end_t = clock();
 66 |     //cout<<"-----update_graph finish-------- time: " << (end_t - start_t)/(1.0*CLOCKS_PER_SEC)<<" s"<<endl;
 67 |     //cout<<"affected_nodelst.size():"<<affected_nodelst.size()<<endl;
 68 | 
 69 |     //deal nodes in affected node_list, update \pi and r
 70 |     vector<double> oldDu(affected_nodelst.size(), 0);
 71 |     //double oldDu[affected_nodelst.size()];
 72 |     for(uint i=0;i<affected_nodelst.size();i++)
 73 |     {
 74 |         uint affected_node = affected_nodelst[i];
 75 |         // update Du
 76 |         oldDu[i] = Du[affected_node]; //[d(u)-delta_d(u)]^0.5
 77 |         Du[affected_node] = pow(g.getOutSize(affected_node), 0.5);
 78 |         
 79 |         //update \pi(u) to avoid dealing with N(u), r needs to be updated accordingly
 80 |         for(int dim=0; dim<dimension; dim++)
 81 |         {
 82 |             feat(affected_node,dim) = feat(affected_node,dim) * Du[affected_node] / oldDu[i];
 83 |             double delta_1 = feat(affected_node,dim) * (oldDu[i]-Du[affected_node]) / alpha / Du[affected_node];
 84 |             R[dim][affected_node] += delta_1;
 85 |         }
 86 |     }
 87 |     clock_t end_t2 = clock();
 88 |     //cout << "-----update pi and r finish----- time: "<< (end_t2 - end_t)/(1.0*CLOCKS_PER_SEC)<<" s" <<endl;
 89 | 
 90 |     //update r
 91 |     for(uint i=0; i<affected_nodelst.size(); i++)
 92 |     {
 93 |         uint affected_node = affected_nodelst[i];
 94 |         for(int dim=0; dim<dimension; dim++)
 95 |         {
 96 |             double rowsum_p=rowsum_pos[dim];
 97 |             double rowsum_n=rowsum_neg[dim];
 98 |             double rmax_p=rowsum_p*rmax;
 99 |             double rmax_n=rowsum_n*rmax;
100 |             
101 |             double increment = feat(affected_node,dim) + alpha*R[dim][affected_node] - alpha*X(affected_node,dim);
102 |             increment *= oldDu[i] - Du[affected_node];
103 |             increment /= Du[affected_node];
104 |             
105 |             for(uint j=0; j<add_neighbors[affected_node].size(); j++)
106 |             {
107 |                 uint add_node = add_neighbors[affected_node][j];
108 |                 increment += (1-alpha)*feat(add_node,dim) / Du[affected_node] / Du[add_node];
109 |             }
110 |             for(uint j=0; j<delete_neighbors[affected_node].size(); j++)
111 |             {
112 |                 uint delete_node = delete_neighbors[affected_node][j];
113 |                 increment -= (1-alpha)*feat(delete_node,dim) / Du[affected_node] / Du[delete_node];
114 |             }
115 |             increment /= alpha;
116 |             R[dim][affected_node] += increment;
117 |             
118 |             if( R[dim][affected_node]>rmax_p || R[dim][affected_node]<rmax_n )
119 |             {
120 |                 if(!isCandidates[dim][affected_node]){
121 |                     candidate_sets[dim].push(affected_node);
122 |                     isCandidates[dim][affected_node] = true;
123 |                 }
124 |                 if(!isUpdateW[dim]){
125 |                     update_w.push_back(dim);
126 |                     isUpdateW[dim] = true;
127 |                 }
128 |             }
129 |         }
130 |     }
131 |     clock_t end_t3 = clock();
132 |     //cout<<"-----update r finish----- time: "<<(end_t3 - end_t2)/(1.0*CLOCKS_PER_SEC)<<" s" <<endl;
133 | 
134 |     //push
135 |     if(update_w.size()>0)
136 |     {
137 |       cout<<"dims of feats that need push:"<<update_w.size()<<endl;
138 |       Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true);
139 |     }
140 | }
141 | 
142 | void Instantgnn::linenum_operation(string updatefilename, int begin, int end, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat)
143 | {
144 |     alpha=alphaa;
145 |     rmax=rmaxx;
146 |     
147 |     int insertFLAG = 0;
148 |     ifstream infile(updatefilename.c_str());
149 |     int k = 0;
150 |     uint v_from, v_to;
151 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
152 |     
153 |     string outputFile = "deltaZ.txt";
154 |     
155 |     int line_num = 0;
156 |     while (infile >> v_from >> v_to)
157 |     {
158 |         line_num += 1;
159 |         if(line_num <= begin)
160 |             continue;
161 |         else if (line_num > end)
162 |             break;
163 |         //cout << "line:" << line_num << " from:" << v_from << " to: " << v_to << endl;
164 |         insertFLAG = g.isEdgeExist(v_from, v_to);
165 |         //cout << "insertFLAG: " << insertFLAG << endl;
166 |         // update graph
167 |         if(insertFLAG == 1)
168 |             g.insertEdge(v_from, v_to);
169 |         else if(insertFLAG == -1)
170 |             g.deleteEdge(v_from, v_to);
171 |         // update Du
172 |         double oldDu = Du[v_from];
173 |         Du[v_from] = pow(g.getOutSize(v_from), 0.5);
174 |         
175 |         vector<queue<uint>> candidate_sets(dimension);
176 |         vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
177 |         vector<bool> isUpdateW(dimension, false);
178 |         
179 |         for(int i=0; i<dimension; i++)
180 |         {
181 |             double rowsum_p=rowsum_pos[i];
182 |             double rowsum_n=rowsum_neg[i];
183 |             double rmax_p=rowsum_p*rmax;
184 |             double rmax_n=rowsum_n*rmax;
185 |             if(rmax_n == 0) rmax_n = -rmax_p; 
186 |             double increment = feat(v_from,i) + alpha*R[i][v_from] - alpha*X(v_from,i);
187 |             increment *= oldDu - Du[v_from];
188 |             increment /= Du[v_from];
189 |             double in_v = (1-alpha)*feat(v_to,i) / Du[v_from] / Du[v_to];
190 |  
191 |             if(insertFLAG > 0)
192 |             {
193 |                 increment += in_v;
194 |                 increment /= alpha;
195 |                 R[i][v_from] += increment;
196 |             }
197 |             else  //delete edge
198 |             {
199 |                 increment -= in_v;
200 |                 increment /= alpha;
201 |                 R[i][v_from] += increment;
202 |             }
203 |             if( R[i][v_from]>rmax_p || R[i][v_from]<rmax_n )
204 |             {
205 |                 k++;
206 |                 if(!isCandidates[i][v_from]){
207 |                     candidate_sets[i].push(v_from);
208 |                     isCandidates[i][v_from] = true;
209 |                 }
210 |                 if(!isUpdateW[i]){
211 |                     update_w.push_back(i);
212 |                     isUpdateW[i] = true;
213 |                 }
214 |             }
215 |             for(uint j=0; j<g.getInSize(v_from); j++)
216 |             {
217 |                 uint node_w = g.getInVert(v_from, j);
218 |                 double increment_w = (1-alpha) * feat(v_from,i) / Du[node_w];
219 |                 increment_w *= 1/Du[v_from] - 1/oldDu;
220 |                 increment_w /= alpha;
221 |                 
222 |                 R[i][node_w] += increment_w;
223 |                 //cout << increment_w<<endl;
224 |                 if( R[i][node_w]>rmax_p || R[i][node_w]<rmax_n )
225 |                 {
226 |                     if(!isCandidates[i][node_w]){
227 |                         candidate_sets[i].push(node_w);
228 |                         isCandidates[i][node_w] = true;
229 |                         k++;
230 |                     }
231 |                     if(!isUpdateW[i]){
232 |                         update_w.push_back(i);
233 |                         isUpdateW[i] = true;
234 |                     }
235 |                 }
236 |             } 
237 |         }
238 |         //push every edge
239 |         if(update_w.size()>0)
240 |             Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false);
241 |     }
242 |     infile.close();
243 | }
244 | 
245 | int Instantgnn::snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map<Eigen::MatrixXd> &feat, Eigen::Map<Eigen::MatrixXd> &init_Z)
246 | {
247 |     alpha=alphaa;
248 |     rmax=rmaxx;
249 | 
250 |     int insertFLAG = 0;
251 |     ifstream infile(updatefilename.c_str());
252 |     int k = 0;
253 |     uint v_from, v_to;
254 |     clock_t start_t, end_t;
255 |     start_t = clock();
256 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
257 |     
258 |     string outputFile = updatefilename + "_rate_Z.txt";
259 |     ofstream fout;
260 |     fout.open(outputFile);
261 |     fout.setf(ios::fixed,ios::floatfield);
262 |     fout.precision(15);
263 |     if(!fout) cout<<"Fail to open the writed file"<<endl;
264 |     
265 |     int line_num = 0;
266 |     double rate=0;
267 |     Eigen::MatrixXd feat_last_time, diff, delta_feat;
268 |     feat_last_time = feat;
269 |     while (infile >> v_from >> v_to)
270 |     {
271 |         line_num += 1;
272 |         if(line_num <= begin)
273 |             continue;
274 |         
275 |         //cout << "from:" << v_from << " to: " << v_to << endl;
276 |         insertFLAG = g.isEdgeExist(v_from, v_to);
277 |         //cout << "insertFLAG: " << insertFLAG << endl;
278 |         // update graph
279 |         if(insertFLAG == 1)
280 |             g.insertEdge(v_from, v_to);
281 |         else if(insertFLAG == -1)
282 |             g.deleteEdge(v_from, v_to);
283 |         // update Du
284 |         double oldDu = Du[v_from];
285 |         Du[v_from] = pow(g.getOutSize(v_from), 0.5);
286 |         
287 |         vector<queue<uint>> candidate_sets(dimension);
288 |         vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
289 |         vector<bool> isUpdateW(dimension, false);
290 |         
291 |         for(int i=0; i<dimension; i++)
292 |         {
293 |             double rowsum_p=rowsum_pos[i];
294 |             double rowsum_n=rowsum_neg[i];
295 |             double rmax_p=rowsum_p*rmax;
296 |             double rmax_n=rowsum_n*rmax;
297 |             if(rmax_n == 0) rmax_n = -rmax_p; 
298 |             double increment = feat(v_from,i) + alpha*R[i][v_from] - alpha*X(v_from,i);
299 |             increment *= oldDu - Du[v_from];
300 |             increment /= Du[v_from];
301 |             double in_v = (1-alpha)*feat(v_to,i) / Du[v_from] / Du[v_to];
302 |  
303 |             if(insertFLAG > 0)
304 |             {
305 |                 increment += in_v;
306 |                 increment /= alpha;
307 |                 R[i][v_from] += increment;
308 |             }
309 |             else  //delete edge
310 |             {
311 |                 increment -= in_v;
312 |                 increment /= alpha;
313 |                 R[i][v_from] += increment;
314 |             }
315 |             if( R[i][v_from]>rmax_p || R[i][v_from]<rmax_n )
316 |             {
317 |                 k++;
318 |                 if(!isCandidates[i][v_from]){
319 |                     candidate_sets[i].push(v_from);
320 |                     isCandidates[i][v_from] = true;
321 |                 }
322 |                 if(!isUpdateW[i]){
323 |                     update_w.push_back(i);
324 |                     isUpdateW[i] = true;
325 |                 }
326 |             }
327 |             for(uint j=0; j<g.getInSize(v_from); j++)
328 |             {
329 |                 uint node_w = g.getInVert(v_from, j);
330 |                 double increment_w = (1-alpha) * feat(v_from,i) / Du[node_w];
331 |                 increment_w *= 1/Du[v_from] - 1/oldDu;
332 |                 increment_w /= alpha;
333 |                 
334 |                 R[i][node_w] += increment_w;
335 |                 //cout << increment_w<<endl;
336 |                 if( R[i][node_w]>rmax_p || R[i][node_w]<rmax_n )
337 |                 {
338 |                     if(!isCandidates[i][node_w]){
339 |                         candidate_sets[i].push(node_w);
340 |                         isCandidates[i][node_w] = true;
341 |                         k++;
342 |                     }
343 |                     if(!isUpdateW[i]){
344 |                         update_w.push_back(i);
345 |                         isUpdateW[i] = true;
346 |                     }
347 |                 }
348 |             } 
349 |         }
350 |         //push every edge and cal diff every edge
351 |         //cout<<"k = "<<k <<", update_w.size:"<<update_w.size()<<endl;
352 |         if(update_w.size()>0)
353 |         {
354 |           Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false);
355 |           diff = feat - feat_last_time;
356 |           delta_feat = feat - init_Z;
357 |           double diff_F = diff.norm();
358 |           double delta_feat_F = delta_feat.norm();
359 |           rate = diff_F / delta_feat_F;
360 |           //double diff_l1 = diff.lpNorm<1>();
361 |           fout<<line_num<<" "<<diff_F<<" "<< delta_feat_F << " " << rate <<endl;
362 |         }else{
363 |           fout<<line_num<<" "<<0<<" "<< 0 << " " << 0 <<endl;
364 |         }
365 |         if(rate>threshold)
366 |             break;
367 |     }
368 |     infile.close();
369 |     fout.close();
370 |     end_t = clock();
371 |     double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
372 |     cout<<"snapshot time: " << total_t << endl;
373 |     return line_num;
374 | }
375 | 
376 | void Instantgnn::overall_operation(double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat){
377 |     alpha=alphaa;
378 |     rmax=rmaxx;
379 |     
380 |     vector<queue<uint>> candidate_sets(dimension);
381 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
382 |     vector<bool> isUpdateW(dimension, false);
383 |     for(int i=0; i<dimension; i++)
384 |     {
385 |         double rowsum_p=rowsum_pos[i];
386 |         double rowsum_n=rowsum_neg[i];
387 |         double rmax_p=rowsum_p*rmax;
388 |         double rmax_n=rowsum_n*rmax;
389 |         if(rmax_n == 0) rmax_n = -rmax_p;
390 |         for(uint j=0; j<vert; j++)
391 |         {
392 |             if( R[i][j]>rmax_p || R[i][j]<rmax_n )
393 |             {
394 |                 if(!isCandidates[i][j]){
395 |                     candidate_sets[i].push(j);
396 |                     isCandidates[i][j] = true;
397 |                 }
398 |                 if(!isUpdateW[i]){
399 |                     update_w.push_back(i);
400 |                     isUpdateW[i] = true;
401 |                 }
402 |             }
403 |         }
404 |     }
405 |     if(update_w.size()>0)
406 |     {
407 |         Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true);
408 |     }
409 | }
410 | 
411 | int startsWith(string s, string sub){
412 |         return s.find(sub)==0?1:0;
413 | }
414 | double Instantgnn::initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map<Eigen::MatrixXd> &feat)
415 | {
416 |     X = feat; // change in feat not influence X
417 |     rmax=rmaxx;
418 |     edges=mm;
419 |     vert=nn;
420 |     alpha=alphaa;
421 |     dataset_name=dataset;
422 |     cout<<dataset_name<<endl;
423 |     g.inputGraph(path, dataset_name, vert, edges);
424 |     cout<<"g.getOutSize:"<<g.getOutSize(0)<<endl;
425 | 
426 |     dimension=feat.cols();
427 |     cout<<"dimension: "<<dimension<<", col:"<<feat.rows()<<endl;
428 |     Du=vector<double>(vert,0);
429 |     double rrr=0.5;
430 |     for(uint i=0; i<vert; i++)
431 |     {
432 |         Du[i]=pow(g.getOutSize(i),rrr);
433 |     }
434 | 
435 |     R = vector<vector<double>>(dimension, vector<double>(vert, 0));
436 |     rowsum_pos = vector<double>(dimension,0);
437 |     rowsum_neg = vector<double>(dimension,0);
438 |     
439 |     random_w = vector<int>(dimension);
440 |     
441 |     for(int i = 0 ; i < dimension ; i++ )
442 |         random_w[i] = i;
443 |     random_shuffle(random_w.begin(),random_w.end());
444 |     for(int i=0; i<dimension; i++)
445 |     {
446 |         for(uint j=0; j<vert; j++)
447 |         {
448 |             if(feat(j,i)>0)
449 |                 rowsum_pos[i]+=feat(j,i);
450 |             else
451 |                 rowsum_neg[i]+=feat(j,i);
452 |         }
453 |     }
454 |     
455 |     vector<queue<uint>> candidate_sets(dimension);
456 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
457 | 
458 |     Instantgnn::ppr_push(dimension, feat, true,candidate_sets,isCandidates,true);
459 | 
460 |     double dataset_size=(double)(((long long)edges+vert)*4+(long long)vert*dimension*8)/1024.0/1024.0/1024.0;
461 |     return dataset_size;
462 | }
463 | 
464 | void Instantgnn::ppr_push(int dimension, Eigen::Ref<Eigen::MatrixXd>feat, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates, bool log)
465 | {
466 |     vector<thread> threads;
467 |     
468 |     struct timeval t_start,t_end;
469 |     double timeCost;
470 |     //clock_t start_t, end_t;
471 |     gettimeofday(&t_start,NULL);
472 |     if(log)
473 |         cout<<"Begin propagation..."<<init << "...dimension:"<< dimension <<endl;
474 |     int ti,start;
475 |     int ends=0;
476 |     
477 |     //start_t = clock();
478 |     for( ti=1 ; ti <= dimension%NUMTHREAD ; ti++ )
479 |     {
480 |         start = ends;
481 |         ends+=ceil((double)dimension/NUMTHREAD);
482 |         if(init)
483 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,true,std::ref(candidate_sets),std::ref(isCandidates)));
484 |         else
485 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,false,std::ref(candidate_sets),std::ref(isCandidates)));
486 |     }
487 |     for( ; ti<=NUMTHREAD ; ti++ )
488 |     {
489 |         start = ends;
490 |         ends+=dimension/NUMTHREAD;
491 |         if(init)
492 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,true,std::ref(candidate_sets),std::ref(isCandidates)));
493 |         else
494 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,false,std::ref(candidate_sets),std::ref(isCandidates)));
495 |     }
496 |     
497 |     for (int t = 0; t < NUMTHREAD ; t++)
498 |         threads[t].join();
499 |     vector<thread>().swap(threads);
500 |     update_w.clear();
501 |     
502 |     //end_t = clock();
503 |     //double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
504 |     gettimeofday(&t_end, NULL);
505 |     timeCost = t_end.tv_sec - t_start.tv_sec + (t_end.tv_usec - t_start.tv_usec)/1000000.0;
506 |     if(log){
507 |         cout<<"The propagation time: "<<timeCost<<" s"<<endl;
508 |         //cout<<"The clock time : "<<total_t<<" s"<<endl;
509 |     }
510 |     vector<vector<bool>>().swap(isCandidates);
511 |     vector<queue<uint>>().swap(candidate_sets);
512 | }
513 | 
514 | void Instantgnn::ppr_residue(Eigen::Ref<Eigen::MatrixXd>feats,int st,int ed, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates)
515 | {
516 |     int w;
517 |     for(int it=st;it<ed;it++)
518 |     {
519 |         if(init)
520 |             w = random_w[it];
521 |         else
522 |             w = update_w[it];
523 |             
524 |         queue<uint> candidate_set = candidate_sets[w];
525 |         vector<bool> isCandidate = isCandidates[w];
526 | 
527 |         double rowsum_p=rowsum_pos[w];
528 |         double rowsum_n=rowsum_neg[w];
529 |         double rmax_p=rowsum_p*rmax;
530 |         double rmax_n=rowsum_n*rmax;
531 |         if(rmax_n == 0) rmax_n = -rowsum_p;  
532 | 
533 |         if(init)
534 |         {
535 |             for(uint i=0; i<vert; i++)
536 |             {
537 |                 R[w][i] = feats(i, w);
538 |                 feats(i, w) = 0;
539 |                 if(R[w][i]>rmax_p || R[w][i]<rmax_n)
540 |                 {
541 |                      candidate_set.push(i);
542 |                      isCandidate[i] = true;
543 |                 }
544 |             }
545 |         }
546 | 
547 |         while(candidate_set.size() > 0)
548 |         {
549 |             uint tempNode = candidate_set.front();
550 |             candidate_set.pop();
551 |             isCandidate[tempNode] = false;
552 |             double old = R[w][tempNode];
553 |             R[w][tempNode] = 0;
554 |             feats(tempNode,w) += alpha*old;
555 |             
556 |             uint inSize = g.getInSize(tempNode);
557 |             for(uint i=0; i<inSize; i++)
558 |             {
559 |                 uint v = g.getInVert(tempNode, i);
560 |                 R[w][v] += (1-alpha) * old / Du[v] / Du[tempNode];
561 |                 if(!isCandidate[v])
562 |                 {
563 |                     if(R[w][v] > rmax_p || R[w][v] < rmax_n)
564 |                     {
565 |                         candidate_set.push(v);
566 |                         isCandidate[v] = true;
567 |                     }
568 |                 }
569 |             }
570 |         }
571 |         vector<bool>().swap(isCandidates[w]);
572 |     }
573 | }
574 | 
575 | }


--------------------------------------------------------------------------------
/instantAlg.cpp:
--------------------------------------------------------------------------------
  1 | #include "instantAlg.h"
  2 | #include "Graph.h"
  3 | 
  4 | using namespace std;
  5 | using namespace Eigen;
  6 | 
  7 | namespace propagation
  8 | {
  9 | vector<vector<uint>> Instantgnn::update_graph(string updatefilename, vector<uint>&affected_nodelst, vector<vector<uint>>&delete_neighbors) // vector<vector<uint>>&add_adjs
 10 | {
 11 |     ifstream infile(updatefilename.c_str());
 12 |     //cout<<"updating graph " << updatefilename <<endl;
 13 |     uint v_from, v_to;
 14 |     int insertFLAG = 0;
 15 | 
 16 |     vector<vector<uint>> new_neighbors(vert);
 17 |     vector<bool> isAffected(vert, false);
 18 |     while (infile >> v_from >> v_to)
 19 |     {
 20 |         insertFLAG = g.isEdgeExist(v_from, v_to);
 21 |         
 22 |         // update graph
 23 |         if(!isAffected[v_from]){
 24 |             affected_nodelst.push_back(v_from);
 25 |             isAffected[v_from] = true;
 26 |         }
 27 |         
 28 |         if(insertFLAG == 1){
 29 |             g.insertEdge(v_from, v_to);
 30 |             new_neighbors[v_from].push_back(v_to);
 31 |         }
 32 |         else if(insertFLAG == -1){
 33 |             g.deleteEdge(v_from, v_to);
 34 |             delete_neighbors[v_from].push_back(v_to);
 35 |         }
 36 |     }
 37 |     infile.close();
 38 |     cout<<"update graph finish..."<<"affected_nodelst.size():"<<affected_nodelst.size()<<endl;
 39 | 
 40 |     return new_neighbors;
 41 | }
 42 | 
 43 | //batch_update
 44 | void Instantgnn::snapshot_operation(string updatefilename, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat)
 45 | {
 46 |     alpha=alphaa;
 47 |     rmax=rmaxx;
 48 | 
 49 |     vector<queue<uint>> candidate_sets(dimension);
 50 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
 51 |     vector<bool> isUpdateW(dimension, false);
 52 | 
 53 |     clock_t start_t, end_t;
 54 |     start_t = clock();
 55 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
 56 |     
 57 |     //update graph, obtain affected node_list
 58 |     vector<uint> affected_nodelst;
 59 | 
 60 |     vector<vector<uint>> delete_neighbors(vert);
 61 |     vector<vector<uint>> add_neighbors(vert);
 62 | 
 63 |     add_neighbors = update_graph(updatefilename, affected_nodelst, delete_neighbors);
 64 |     end_t = clock();
 65 |     //cout<<"-----update_graph finish-------- time: " << (end_t - start_t)/(1.0*CLOCKS_PER_SEC)<<" s"<<endl;
 66 |     //cout<<"affected_nodelst.size():"<<affected_nodelst.size()<<endl;
 67 | 
 68 |     //deal nodes in affected node_list, update \pi and r
 69 |     vector<double> oldDu(affected_nodelst.size(), 0);
 70 |     //double oldDu[affected_nodelst.size()];
 71 |     for(uint i=0;i<affected_nodelst.size();i++)
 72 |     {
 73 |         uint affected_node = affected_nodelst[i];
 74 |         // update Du
 75 |         oldDu[i] = Du[affected_node]; //[d(u)-delta_d(u)]^0.5
 76 |         Du[affected_node] = pow(g.getOutSize(affected_node), 0.5);
 77 |         
 78 |         //update \pi(u) to avoid dealing with N(u), r needs to be updated accordingly
 79 |         for(int dim=0; dim<dimension; dim++)
 80 |         {
 81 |             feat(dim,affected_node) = feat(dim,affected_node) * Du[affected_node] / oldDu[i];
 82 |             double delta_1 = feat(dim,affected_node) * (oldDu[i]-Du[affected_node]) / alpha / Du[affected_node];
 83 |             R[dim][affected_node] += delta_1;
 84 |         }
 85 |     }
 86 |     clock_t end_t2 = clock();
 87 |     //cout << "-----update pi and r finish----- time: "<< (end_t2 - end_t)/(1.0*CLOCKS_PER_SEC)<<" s" <<endl;
 88 | 
 89 |     //update r
 90 |     for(uint i=0; i<affected_nodelst.size(); i++)
 91 |     {
 92 |         uint affected_node = affected_nodelst[i];
 93 |         for(int dim=0; dim<dimension; dim++)
 94 |         {
 95 |             double rowsum_p=rowsum_pos[dim];
 96 |             double rowsum_n=rowsum_neg[dim];
 97 |             double rmax_p=rowsum_p*rmax;
 98 |             double rmax_n=rowsum_n*rmax;
 99 |             
100 |             double increment = feat(dim,affected_node) + alpha*R[dim][affected_node] - alpha*X(dim,affected_node);
101 |             increment *= oldDu[i] - Du[affected_node];
102 |             increment /= Du[affected_node];
103 |             
104 |             for(uint j=0; j<add_neighbors[affected_node].size(); j++)
105 |             {
106 |                 uint add_node = add_neighbors[affected_node][j];
107 |                 increment += (1-alpha)*feat(dim,add_node) / Du[affected_node] / Du[add_node];
108 |             }
109 |             for(uint j=0; j<delete_neighbors[affected_node].size(); j++)
110 |             {
111 |                 uint delete_node = delete_neighbors[affected_node][j];
112 |                 increment -= (1-alpha)*feat(dim,delete_node) / Du[affected_node] / Du[delete_node];
113 |             }
114 |             increment /= alpha;
115 |             R[dim][affected_node] += increment;
116 |             
117 |             if( R[dim][affected_node]>rmax_p || R[dim][affected_node]<rmax_n )
118 |             {
119 |                 if(!isCandidates[dim][affected_node]){
120 |                     candidate_sets[dim].push(affected_node);
121 |                     isCandidates[dim][affected_node] = true;
122 |                 }
123 |                 if(!isUpdateW[dim]){
124 |                     update_w.push_back(dim);
125 |                     isUpdateW[dim] = true;
126 |                 }
127 |             }
128 |         }
129 |     }
130 |     clock_t end_t3 = clock();
131 |     //cout<<"-----update r finish----- time: "<<(end_t3 - end_t2)/(1.0*CLOCKS_PER_SEC)<<" s" <<endl;
132 | 
133 |     //push
134 |     if(update_w.size()>0)
135 |     {
136 |       cout<<"dims of feats that need push:"<<update_w.size()<<endl;
137 |       Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true);
138 |     }
139 | }
140 | 
141 | void Instantgnn::linenum_operation(string updatefilename, int begin, int end, double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat)
142 | {
143 |     alpha=alphaa;
144 |     rmax=rmaxx;
145 |     
146 |     int insertFLAG = 0;
147 |     ifstream infile(updatefilename.c_str());
148 |     int k = 0;
149 |     uint v_from, v_to;
150 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
151 |     
152 |     string outputFile = "deltaZ.txt";
153 |     
154 |     int line_num = 0;
155 |     while (infile >> v_from >> v_to)
156 |     {
157 |         line_num += 1;
158 |         if(line_num <= begin)
159 |             continue;
160 |         else if (line_num > end)
161 |             break;
162 |         //cout << "line:" << line_num << " from:" << v_from << " to: " << v_to << endl;
163 |         insertFLAG = g.isEdgeExist(v_from, v_to);
164 |         //cout << "insertFLAG: " << insertFLAG << endl;
165 |         // update graph
166 |         if(insertFLAG == 1)
167 |             g.insertEdge(v_from, v_to);
168 |         else if(insertFLAG == -1)
169 |             g.deleteEdge(v_from, v_to);
170 |         // update Du
171 |         double oldDu = Du[v_from];
172 |         Du[v_from] = pow(g.getOutSize(v_from), 0.5);
173 |         
174 |         vector<queue<uint>> candidate_sets(dimension);
175 |         vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
176 |         vector<bool> isUpdateW(dimension, false);
177 |         
178 |         for(int i=0; i<dimension; i++)
179 |         {
180 |             double rowsum_p=rowsum_pos[i];
181 |             double rowsum_n=rowsum_neg[i];
182 |             double rmax_p=rowsum_p*rmax;
183 |             double rmax_n=rowsum_n*rmax;
184 |             if(rmax_n == 0) rmax_n = -rmax_p; 
185 |             double increment = feat(i,v_from) + alpha*R[i][v_from] - alpha*X(i,v_from);
186 |             increment *= oldDu - Du[v_from];
187 |             increment /= Du[v_from];
188 |             double in_v = (1-alpha)*feat(i,v_to) / Du[v_from] / Du[v_to];
189 |  
190 |             if(insertFLAG > 0)
191 |             {
192 |                 increment += in_v;
193 |                 increment /= alpha;
194 |                 R[i][v_from] += increment;
195 |             }
196 |             else  //delete edge
197 |             {
198 |                 increment -= in_v;
199 |                 increment /= alpha;
200 |                 R[i][v_from] += increment;
201 |             }
202 |             if( R[i][v_from]>rmax_p || R[i][v_from]<rmax_n )
203 |             {
204 |                 k++;
205 |                 if(!isCandidates[i][v_from]){
206 |                     candidate_sets[i].push(v_from);
207 |                     isCandidates[i][v_from] = true;
208 |                 }
209 |                 if(!isUpdateW[i]){
210 |                     update_w.push_back(i);
211 |                     isUpdateW[i] = true;
212 |                 }
213 |             }
214 |             for(uint j=0; j<g.getInSize(v_from); j++)
215 |             {
216 |                 uint node_w = g.getInVert(v_from, j);
217 |                 double increment_w = (1-alpha) * feat(i,v_from) / Du[node_w];
218 |                 increment_w *= 1/Du[v_from] - 1/oldDu;
219 |                 increment_w /= alpha;
220 |                 
221 |                 R[i][node_w] += increment_w;
222 |                 //cout << increment_w<<endl;
223 |                 if( R[i][node_w]>rmax_p || R[i][node_w]<rmax_n )
224 |                 {
225 |                     if(!isCandidates[i][node_w]){
226 |                         candidate_sets[i].push(node_w);
227 |                         isCandidates[i][node_w] = true;
228 |                         k++;
229 |                     }
230 |                     if(!isUpdateW[i]){
231 |                         update_w.push_back(i);
232 |                         isUpdateW[i] = true;
233 |                     }
234 |                 }
235 |             } 
236 |         }
237 |         //push every edge
238 |         if(update_w.size()>0)
239 |             Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false);
240 |     }
241 |     infile.close();
242 | }
243 | 
244 | int Instantgnn::snapshot_operation_rate_Z(string updatefilename, int begin, double rmaxx,double alphaa, double threshold, Eigen::Map<Eigen::MatrixXd> &feat, Eigen::Map<Eigen::MatrixXd> &init_Z)
245 | {
246 |     alpha=alphaa;
247 |     rmax=rmaxx;
248 | 
249 |     int insertFLAG = 0;
250 |     ifstream infile(updatefilename.c_str());
251 |     int k = 0;
252 |     int v_from, v_to;
253 |     clock_t start_t, end_t;
254 |     start_t = clock();
255 |     cout<<"updating begin, for snapshot: " << updatefilename <<endl;
256 |     
257 |     string outputFile = updatefilename + "_rate_Z.txt";
258 |     ofstream fout;
259 |     fout.open(outputFile);
260 |     fout.setf(ios::fixed,ios::floatfield);
261 |     fout.precision(15);
262 |     if(!fout) cout<<"Fail to open the writed file"<<endl;
263 |     
264 |     int line_num = 0;
265 |     double rate=0;
266 |     Eigen::MatrixXd feat_last_time, diff, delta_feat;
267 |     feat_last_time = feat;
268 |     while (infile >> v_from >> v_to)
269 |     {
270 |         line_num += 1;
271 |         if(line_num <= begin)
272 |             continue;
273 |         
274 |         //cout << "from:" << v_from << " to: " << v_to << endl;
275 |         insertFLAG = g.isEdgeExist(v_from, v_to);
276 |         //cout << "insertFLAG: " << insertFLAG << endl;
277 |         // update graph
278 |         if(insertFLAG == 1)
279 |             g.insertEdge(v_from, v_to);
280 |         else if(insertFLAG == -1)
281 |             g.deleteEdge(v_from, v_to);
282 |         // update Du
283 |         double oldDu = Du[v_from];
284 |         Du[v_from] = pow(g.getOutSize(v_from), 0.5);
285 |         
286 |         vector<queue<uint>> candidate_sets(dimension);
287 |         vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
288 |         vector<bool> isUpdateW(dimension, false);
289 |         
290 |         for(int i=0; i<dimension; i++)
291 |         {
292 |             double rowsum_p=rowsum_pos[i];
293 |             double rowsum_n=rowsum_neg[i];
294 |             double rmax_p=rowsum_p*rmax;
295 |             double rmax_n=rowsum_n*rmax;
296 |             if(rmax_n == 0) rmax_n = -rmax_p; 
297 |             double increment = feat(i,v_from) + alpha*R[i][v_from] - alpha*X(i,v_from);
298 |             increment *= oldDu - Du[v_from];
299 |             increment /= Du[v_from];
300 |             double in_v = (1-alpha)*feat(i,v_to) / Du[v_from] / Du[v_to];
301 |  
302 |             if(insertFLAG > 0)
303 |             {
304 |                 increment += in_v;
305 |                 increment /= alpha;
306 |                 R[i][v_from] += increment;
307 |             }
308 |             else  //delete edge
309 |             {
310 |                 increment -= in_v;
311 |                 increment /= alpha;
312 |                 R[i][v_from] += increment;
313 |             }
314 |             if( R[i][v_from]>rmax_p || R[i][v_from]<rmax_n )
315 |             {
316 |                 k++;
317 |                 if(!isCandidates[i][v_from]){
318 |                     candidate_sets[i].push(v_from);
319 |                     isCandidates[i][v_from] = true;
320 |                 }
321 |                 if(!isUpdateW[i]){
322 |                     update_w.push_back(i);
323 |                     isUpdateW[i] = true;
324 |                 }
325 |             }
326 |             for(uint j=0; j<g.getInSize(v_from); j++)
327 |             {
328 |                 uint node_w = g.getInVert(v_from, j);
329 |                 double increment_w = (1-alpha) * feat(v_from,i) / Du[node_w];
330 |                 increment_w *= 1/Du[v_from] - 1/oldDu;
331 |                 increment_w /= alpha;
332 |                 
333 |                 R[i][node_w] += increment_w;
334 |                 //cout << increment_w<<endl;
335 |                 if( R[i][node_w]>rmax_p || R[i][node_w]<rmax_n )
336 |                 {
337 |                     if(!isCandidates[i][node_w]){
338 |                         candidate_sets[i].push(node_w);
339 |                         isCandidates[i][node_w] = true;
340 |                         k++;
341 |                     }
342 |                     if(!isUpdateW[i]){
343 |                         update_w.push_back(i);
344 |                         isUpdateW[i] = true;
345 |                     }
346 |                 }
347 |             } 
348 |         }
349 |         //push every edge and cal diff every edge
350 |         //cout<<"k = "<<k <<", update_w.size:"<<update_w.size()<<endl;
351 |         if(update_w.size()>0)
352 |         {
353 |           Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,false);
354 |           diff = feat - feat_last_time;
355 |           delta_feat = feat - init_Z;
356 |           double diff_F = diff.norm();
357 |           double delta_feat_F = delta_feat.norm();
358 |           rate = diff_F / delta_feat_F;
359 |           //double diff_l1 = diff.lpNorm<1>();
360 |           fout<<line_num<<" "<<diff_F<<" "<< delta_feat_F << " " << rate <<endl;
361 |         }else{
362 |           fout<<line_num<<" "<<0<<" "<< 0 << " " << 0 <<endl;
363 |         }
364 |         if(rate>threshold)
365 |             break;
366 |     }
367 |     infile.close();
368 |     fout.close();
369 |     end_t = clock();
370 |     double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
371 |     cout<<"snapshot time: " << total_t << endl;
372 |     return line_num;
373 | }
374 | 
375 | void Instantgnn::overall_operation(double rmaxx,double alphaa, Eigen::Map<Eigen::MatrixXd> &feat){
376 |     alpha=alphaa;
377 |     rmax=rmaxx;
378 |     
379 |     vector<queue<uint>> candidate_sets(dimension);
380 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
381 |     vector<bool> isUpdateW(dimension, false);
382 |     for(int i=0; i<dimension; i++)
383 |     {
384 |         double rowsum_p=rowsum_pos[i];
385 |         double rowsum_n=rowsum_neg[i];
386 |         double rmax_p=rowsum_p*rmax;
387 |         double rmax_n=rowsum_n*rmax;
388 |         if(rmax_n == 0) rmax_n = -rmax_p;
389 |         for(uint j=0; j<vert; j++)
390 |         {
391 |             if( R[i][j]>rmax_p || R[i][j]<rmax_n )
392 |             {
393 |                 if(!isCandidates[i][j]){
394 |                     candidate_sets[i].push(j);
395 |                     isCandidates[i][j] = true;
396 |                 }
397 |                 if(!isUpdateW[i]){
398 |                     update_w.push_back(i);
399 |                     isUpdateW[i] = true;
400 |                 }
401 |             }
402 |         }
403 |     }
404 |     if(update_w.size()>0)
405 |     {
406 |         Instantgnn::ppr_push(update_w.size(), feat, false,candidate_sets,isCandidates,true);
407 |     }
408 | }
409 | 
410 | int startsWith(string s, string sub){
411 |         return s.find(sub)==0?1:0;
412 | }
413 | double Instantgnn::initial_operation(string path, string dataset,uint mm,uint nn,double rmaxx,double alphaa,Eigen::Map<Eigen::MatrixXd> &feat)
414 | {
415 |     X = feat; // change in feat not influence X
416 |     /*if(startsWith(dataset, "arxiv")){
417 |         cout<<"X.cols():"<<X.cols()<<endl;
418 |         X = X.transpose().eval();
419 |         cout<<"X.cols():"<<X.cols()<<endl;
420 |     }*/
421 |     rmax=rmaxx;
422 |     edges=mm;
423 |     vert=nn;
424 |     alpha=alphaa;
425 |     dataset_name=dataset;
426 |     cout<<dataset_name<<endl;
427 |     g.inputGraph(path, dataset_name, vert, edges);
428 |     cout<<"g.getOutSize:"<<g.getOutSize(0)<<endl;
429 | 
430 |     dimension=feat.rows();
431 |     cout<<"dimension: "<<dimension<<", col:"<<feat.cols()<<endl;
432 |     Du=vector<double>(vert,0);
433 |     double rrr=0.5;
434 |     for(uint i=0; i<vert; i++)
435 |     {
436 |         Du[i]=pow(g.getOutSize(i),rrr);
437 |     }
438 | 
439 |     R = vector<vector<double>>(dimension, vector<double>(vert, 0));
440 |     rowsum_pos = vector<double>(dimension,0);
441 |     rowsum_neg = vector<double>(dimension,0);
442 |     
443 |     random_w = vector<int>(dimension);
444 |     
445 |     for(int i = 0 ; i < dimension ; i++ )
446 |         random_w[i] = i;
447 |     random_shuffle(random_w.begin(),random_w.end());
448 |     for(int i=0; i<dimension; i++)
449 |     {
450 |         for(uint j=0; j<vert; j++)
451 |         {
452 |             if(feat(i,j)>0)
453 |                 rowsum_pos[i]+=feat(i,j);
454 |             else
455 |                 rowsum_neg[i]+=feat(i,j);
456 |         }
457 |     }
458 |     
459 |     vector<queue<uint>> candidate_sets(dimension);
460 |     vector<vector<bool>> isCandidates(dimension, vector<bool>(vert, false));
461 | 
462 |     Instantgnn::ppr_push(dimension, feat, true,candidate_sets,isCandidates,true);
463 | 
464 |     double dataset_size=(double)(((long long)edges+vert)*4+(long long)vert*dimension*8)/1024.0/1024.0/1024.0;
465 |     return dataset_size;
466 | }
467 | 
468 | void Instantgnn::ppr_push(int dimension, Eigen::Ref<Eigen::MatrixXd>feat, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates, bool log)
469 | {
470 |     vector<thread> threads;
471 |     
472 |     struct timeval t_start,t_end;
473 |     double timeCost;
474 |     //clock_t start_t, end_t;
475 |     gettimeofday(&t_start,NULL);
476 |     if(log)
477 |         cout<<"Begin propagation..."<<init << "...dimension:"<< dimension <<endl;
478 |     int ti,start;
479 |     int ends=0;
480 |     
481 |     //start_t = clock();
482 |     for( ti=1 ; ti <= dimension%NUMTHREAD ; ti++ )
483 |     {
484 |         start = ends;
485 |         ends+=ceil((double)dimension/NUMTHREAD);
486 |         if(init)
487 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,true,std::ref(candidate_sets),std::ref(isCandidates)));
488 |         else
489 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,false,std::ref(candidate_sets),std::ref(isCandidates)));
490 |     }
491 |     for( ; ti<=NUMTHREAD ; ti++ )
492 |     {
493 |         start = ends;
494 |         ends+=dimension/NUMTHREAD;
495 |         if(init)
496 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,true,std::ref(candidate_sets),std::ref(isCandidates)));
497 |         else
498 |             threads.push_back(thread(&Instantgnn::ppr_residue,this,feat,start,ends,false,std::ref(candidate_sets),std::ref(isCandidates)));
499 |     }
500 |     
501 |     for (int t = 0; t < NUMTHREAD ; t++)
502 |         threads[t].join();
503 |     vector<thread>().swap(threads);
504 |     update_w.clear();
505 |     
506 |     //end_t = clock();
507 |     //double total_t = (double)(end_t - start_t) / CLOCKS_PER_SEC;
508 |     gettimeofday(&t_end, NULL);
509 |     timeCost = t_end.tv_sec - t_start.tv_sec + (t_end.tv_usec - t_start.tv_usec)/1000000.0;
510 |     if(log){
511 |         cout<<"The propagation time: "<<timeCost<<" s"<<endl;
512 |         //cout<<"The clock time : "<<total_t<<" s"<<endl;
513 |     }
514 |     vector<vector<bool>>().swap(isCandidates);
515 |     vector<queue<uint>>().swap(candidate_sets);
516 | }
517 | 
518 | void Instantgnn::ppr_residue(Eigen::Ref<Eigen::MatrixXd>feats,int st,int ed, bool init,vector<queue<uint>>& candidate_sets,vector<vector<bool>>& isCandidates)
519 | {
520 |     int w;
521 |     for(int it=st;it<ed;it++)
522 |     {
523 |         if(init)
524 |             w = random_w[it];
525 |         else
526 |             w = update_w[it];
527 |             
528 |         queue<uint> candidate_set = candidate_sets[w];
529 |         vector<bool> isCandidate = isCandidates[w];
530 | 
531 |         double rowsum_p=rowsum_pos[w];
532 |         double rowsum_n=rowsum_neg[w];
533 |         double rmax_p=rowsum_p*rmax;
534 |         double rmax_n=rowsum_n*rmax;
535 |         if(rmax_n == 0) rmax_n = -rowsum_p;  
536 | 
537 |         if(init)
538 |         {
539 |             for(uint i=0; i<vert; i++)
540 |             {
541 |                 R[w][i] = feats(w, i);
542 |                 feats(w, i) = 0;
543 |                 if(R[w][i]>rmax_p || R[w][i]<rmax_n)
544 |                 {
545 |                      candidate_set.push(i);
546 |                      isCandidate[i] = true;
547 |                 }
548 |             }
549 |         }
550 | 
551 |         while(candidate_set.size() > 0)
552 |         {
553 |             uint tempNode = candidate_set.front();
554 |             candidate_set.pop();
555 |             isCandidate[tempNode] = false;
556 |             double old = R[w][tempNode];
557 |             R[w][tempNode] = 0;
558 |             feats(w,tempNode) += alpha*old;
559 |             
560 |             uint inSize = g.getInSize(tempNode);
561 |             for(uint i=0; i<inSize; i++)
562 |             {
563 |                 uint v = g.getInVert(tempNode, i);
564 |                 R[w][v] += (1-alpha) * old / Du[v] / Du[tempNode];
565 |                 if(!isCandidate[v])
566 |                 {
567 |                     if(R[w][v] > rmax_p || R[w][v] < rmax_n)
568 |                     {
569 |                         candidate_set.push(v);
570 |                         isCandidate[v] = true;
571 |                     }
572 |                 }
573 |             }
574 |         }
575 |         vector<bool>().swap(isCandidates[w]);
576 |     }
577 | }
578 | 
579 | }


--------------------------------------------------------------------------------