├── KDD18AROPE.pdf ├── MATLAB ├── SampleRun.m ├── AROPE.m ├── Shift_Embedding.m ├── Eigen_TopL.m ├── Eigen_Reweighting.m └── Precision_Np.m ├── python ├── Sample_Run.py ├── eval.py └── utils.py └── README.md /KDD18AROPE.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZW-ZHANG/AROPE/HEAD/KDD18AROPE.pdf -------------------------------------------------------------------------------- /MATLAB/SampleRun.m: -------------------------------------------------------------------------------- 1 | % Refer to AROPE.m for details 2 | 3 | % A sample run is as follows: 4 | 5 | edge_list = load('BlogCatalog.csv'); 6 | A = sparse(edge_list(:,1),edge_list(:,2),1,max(max(edge_list)),max(max(edge_list))); 7 | A = A + A'; 8 | order = [1,2,3,-1]; 9 | weights = cell(4,1); 10 | weights{1} = 1; 11 | weights{2} = [1,0.1]; 12 | weights{3} = [1,0.1,0.01]; 13 | weights{4} = 0.001; 14 | [U_cell,V_cell] = AROPE(A,128,order,weights); 15 | % Network Reconstruction 16 | for i = 1:4 17 | results = Precision_Np(A,sparse(max(max(edge_list)),max(max(edge_list))),U_cell{i},V_cell{i},1e6); 18 | figure(i); 19 | semilogx(1:1e6,results); 20 | end -------------------------------------------------------------------------------- /MATLAB/AROPE.m: -------------------------------------------------------------------------------- 1 | function [U_output, V_output] = AROPE(A,d,order,weights) 2 | % AROPE Algortihm 3 | % Inputs: 4 | % A: adjacency matrix A or its variations 5 | % d: dimensionality 6 | % r different high-order proximity: 7 | % order: 1 x r vector, order of the proximity 8 | % weights: 1 x r cell, each containing the weights for one high-order proximity 9 | % Outputs: 1 x r cell, each containing the embedding vectors 10 | [lambda,X] = Eigen_TopL(A,d); 11 | r = length(order); 12 | U_output = cell(r,1); 13 | V_output = cell(r,1); 14 | for i = 1:r 15 | [U_output{i},V_output{i}] = Shift_Embedding(lambda,X,order(i),weights{i},d); 16 | end 17 | 18 | end -------------------------------------------------------------------------------- /MATLAB/Shift_Embedding.m: -------------------------------------------------------------------------------- 1 | function [U,V] = Shift_Embedding(lambda,X,order,coef,d) 2 | % lambda,X: top-L eigen-decomposition 3 | % order: a number indicating the order 4 | % coef: a vector of length order, indicating the weights for each order 5 | % d: preset embedding dimension 6 | % return: content/context embedding vectors 7 | lambda_H = Eigen_Reweighting(lambda,order,coef); % High-order transform 8 | [~,temp_index] = sort(abs(lambda_H),'descend'); % select top-d 9 | temp_index = temp_index(1:d); 10 | lambda_H = lambda_H(temp_index); 11 | U = X(:,temp_index) * diag(sqrt(abs(lambda_H))); % Calculate embedding 12 | V = X(:,temp_index) * diag(sqrt(abs(lambda_H)) .* sign(lambda_H)); 13 | 14 | end -------------------------------------------------------------------------------- /MATLAB/Eigen_TopL.m: -------------------------------------------------------------------------------- 1 | function [lambda,X] = Eigen_TopL(A,d) 2 | % A: N x N symmetric sparse adjacency matrix 3 | % d: preset dimension 4 | % return top-L eigen-decomposition of A containing at least d positive eigenvalues 5 | if ~ issymmetric(A) 6 | error('The matrix is not symmetric!'); 7 | end 8 | L = d + 10; 9 | while 1 % can be improved to reduce redundant calculation if L <= 2d not hold 10 | L = L + d; 11 | [X,lambda] = eigs(A,L); 12 | lambda = diag(lambda); 13 | if (sum(lambda > 0) >= d) 14 | break; 15 | end 16 | end 17 | % only select top-L 18 | [~,temp_index] = sort(abs(lambda),'descend'); 19 | lambda = lambda(temp_index); 20 | temp_max = find(cumsum(lambda > 0) >= d); 21 | lambda = lambda(1:temp_max(1)); 22 | temp_index = temp_index(1:temp_max(1)); 23 | X = X(:,temp_index); 24 | end -------------------------------------------------------------------------------- /python/Sample_Run.py: -------------------------------------------------------------------------------- 1 | # Sample run on BlogCatalog 2 | import numpy as np 3 | import pandas as pd 4 | from scipy.sparse import csr_matrix 5 | 6 | import utils 7 | from eval import Precision_Np 8 | 9 | if __name__ == '__main__': 10 | 11 | data = pd.read_csv('BlogCatalog.csv') 12 | data = np.array(data) - 1 # change index from 0 13 | N = np.max(np.max(data)) + 1 14 | A = csr_matrix((np.ones(data.shape[0]), (data[:,0],data[:,1])), shape = (N,N)) 15 | A += A.T 16 | 17 | order = [1,2,3,-1] 18 | weights = [] 19 | weights.append([1]) 20 | weights.append([1,0.1]) 21 | weights.append([1,0.1,0.01]) 22 | weights.append([0.001]) 23 | U_list,V_list = utils.AROPE(A,128,order,weights) 24 | # Network Reconstruction 25 | results = [Precision_Np(A,csr_matrix((N,N)),U_list[i],V_list[i],1e6) for i in range(4)] 26 | 27 | -------------------------------------------------------------------------------- /MATLAB/Eigen_Reweighting.m: -------------------------------------------------------------------------------- 1 | function X_H = Eigen_Reweighting(X,order,coef) 2 | % X: original eigenvalues 3 | % order: order, -1 stands for infinity 4 | % coef: weights, decaying constant if order = -1 5 | if (order == -1) % infinity 6 | if (length(coef) == 1) 7 | if (max(abs(X)) * coef < 1) 8 | X_H = X ./ (1 - coef * X); 9 | else 10 | error('Decaying constant too large.'); 11 | end 12 | else 13 | error('Eigen_Reweighting wrong.'); 14 | end 15 | else 16 | if (length(coef) == order) 17 | X_H = coef(1) * X; 18 | X_temp = X; 19 | for i = 2:order 20 | X_temp = X_temp .* X; 21 | X_H = X_H + coef(i) * X_temp; 22 | end 23 | else 24 | error('Eigen_Reweighting wrong.'); 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /python/eval.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def Precision_Np(Matrix_test,Matrix_train,U,V,Np): 5 | # Matrix_test is n x n testing matrix, may overlap with Matrix_train 6 | # Matrix_train is n x n training matrix 7 | # U/V are content/context embedding vectors 8 | # Np: returns Precision@Np for pairwise similarity 9 | N, _ = U.shape 10 | assert N < 30000, 'Network too large. Sample suggested.' 11 | Sim = U.dot(V.T) 12 | temp_row, temp_col = np.nonzero(Sim) 13 | temp_value = Sim[temp_row,temp_col] 14 | temp_choose = np.logical_and(np.array(Matrix_train[temp_row,temp_col])[0] == 0, temp_row != temp_col) 15 | temp_row, temp_col, temp_value = temp_row[temp_choose], temp_col[temp_choose], temp_value[temp_choose] 16 | temp_index = np.argsort(temp_value)[::-1] 17 | assert len(temp_index) >= Np, 'Np too large' 18 | temp_index = temp_index[: int(Np)+1] 19 | temp_row, temp_col = temp_row[temp_index], temp_col[temp_index] 20 | result = np.array(Matrix_test[temp_row,temp_col])[0] > 0 21 | result = np.divide(np.cumsum(result > 0), np.array(range(len(result))) + 1) 22 | return result -------------------------------------------------------------------------------- /MATLAB/Precision_Np.m: -------------------------------------------------------------------------------- 1 | function result = Precision_Np(Matrix_test,Matrix_train,U,V,Np) 2 | % Matrix_test is n x n testing matrix, may overlap with Matrix_train 3 | % Matrix_train is n x n training matrix 4 | % U/V are content/context embedding vectors 5 | % Np: returns Precision@Np for pairwise similarity 6 | [N,~] = size(U); 7 | if (N > 30000) 8 | error('Network too large. Sample suggested.'); 9 | else 10 | Sim = U * V'; 11 | [temp_row,temp_col,temp_value] = find(Sim); 12 | clear Sim; 13 | end 14 | temp_choose = (Matrix_train(sub2ind([N,N],temp_row,temp_col)) == 0) & (temp_row ~= temp_col); 15 | temp_row = temp_row(temp_choose); 16 | temp_col = temp_col(temp_choose); 17 | temp_value = temp_value(temp_choose); 18 | clear temp_choose; 19 | [~,temp_index] = sort(temp_value,'descend'); 20 | if length(temp_index) < Np 21 | error('Np too large'); 22 | end 23 | temp_index = temp_index(1:Np); 24 | clear temp_value; 25 | temp_row = temp_row(temp_index); 26 | temp_col = temp_col(temp_index); 27 | clear temp_index; 28 | result = Matrix_test(sub2ind([N,N],temp_row,temp_col)) > 0; 29 | result = cumsum(result > 0) ./ (1:length(result))'; 30 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AROPE 2 | This is the official implementation of "[Arbitrary-Order Proximity Preserved Network Embedding](http://cuip.thumedialab.com/papers/NE-ArbitraryProximity.pdf)"(KDD 2018). 3 | 4 | We provide two implementations: MATLAB and Python. Note that the MATLAB version is faster in our testing and is used in producing original results in the paper. 5 | 6 | ### Requirements 7 | ``` 8 | MATLAB R2017a 9 | or 10 | Python >= 3.5.2 11 | numpy >= 1.14.2 12 | scipy >= 1.0.0 13 | pandas >= 0.22.0 14 | ``` 15 | 16 | ### Usage 17 | #### Main Function 18 | ``` 19 | [U_output, V_output] = AROPE(A,d,order,weights) 20 | ``` 21 | ``` 22 | Input: 23 | A: sparse adjacency matrix or its variations, must be symmetric 24 | d: dimensionality 25 | order: 1 x r vector, order of the proximity 26 | weights: 1 x r cell/list, each containing the weights for one high-order proximity 27 | Output: 28 | U_output/V_output: 1 x r cell/list, each containing one content/context embedding vectors 29 | ``` 30 | #### Example Usage 31 | See SampleRun.m or SampleRun.py for a sample run of network reconstruction on BlogCatalog dataset 32 | 33 | ### Cite 34 | If you find this code useful, please cite our paper: 35 | ``` 36 | @inproceedings{zhang2018arbitrary, 37 | title={Arbitrary-Order Proximity Preserved Network Embedding}, 38 | author={Zhang, Ziwei and Cui, Peng and Wang, Xiao and Pei, Jian and Yao, Xuanrong and Zhu, Wenwu}, 39 | booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining}, 40 | pages={2778--2786}, 41 | year={2018}, 42 | organization={ACM} 43 | } 44 | ``` -------------------------------------------------------------------------------- /python/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from scipy.sparse.linalg import eigs 4 | 5 | def Eigen_Reweighting(X,order,coef): 6 | # X: original eigenvalues 7 | # order: order, -1 stands for infinity 8 | # coef: weights, decaying constant if order = -1 9 | # return: reweighted eigenvalues 10 | if order == -1: # infinity 11 | assert len(coef) == 1, 'Eigen_Reweighting wrong.' 12 | coef = coef[0] 13 | assert np.max(np.absolute(X)) * coef < 1, 'Decaying constant too large.' 14 | X_H = np.divide(X, 1 - coef * X) 15 | else: 16 | assert len(coef) == order, 'Eigen_Reweighting wrong.' 17 | X_H = coef[0] * X 18 | X_temp = X 19 | for i in range(1,order): 20 | X_temp = np.multiply(X_temp,X) 21 | X_H += coef[i] * X_temp 22 | return X_H 23 | 24 | 25 | def Eigen_TopL(A, d): 26 | # A: N x N symmetric sparse adjacency matrix 27 | # d: preset dimension 28 | # return: top-L eigen-decomposition of A containing at least d positive eigenvalues 29 | # assert np.all(A.T == A), 'The matrix is not symmetric!' 30 | L = d + 10 31 | lambd = np.array([0]) 32 | while sum(lambd > 0) < d: # can be improved to reduce redundant calculation if L <= 2d + 10 not hold 33 | L = L + d 34 | lambd, X = eigs(A, L) 35 | lambd, X = lambd.real, X.real 36 | # only select top-L 37 | temp_index = np.absolute(lambd).argsort()[::-1] 38 | lambd = lambd[temp_index] 39 | temp_max, = np.where(np.cumsum(lambd > 0) >= d) 40 | lambd, temp_index = lambd[:temp_max[0]+1], temp_index[:temp_max[0]+1] 41 | X = X[:,temp_index] 42 | return lambd, X 43 | 44 | 45 | def Shift_Embedding(lambd, X, order, coef, d): 46 | # lambd, X: top-L eigen-decomposition 47 | # order: a number indicating the order 48 | # coef: a vector of length order, indicating the weights for each order 49 | # d: preset embedding dimension 50 | # return: content/context embedding vectors 51 | lambd_H = Eigen_Reweighting(lambd,order,coef) # High-order transform 52 | temp_index = np.absolute(lambd_H).argsort()[::-1] # select top-d 53 | temp_index = temp_index[:d+1] 54 | lambd_H = lambd_H[temp_index] 55 | lambd_H_temp = np.sqrt(np.absolute(lambd_H)) 56 | U = np.dot(X[:,temp_index], np.diag(lambd_H_temp)) # Calculate embedding 57 | V = np.dot(X[:,temp_index], np.diag(np.multiply(lambd_H_temp, np.sign(lambd_H)))) 58 | return U, V 59 | 60 | 61 | def AROPE(A, d, order, weights): 62 | # A: adjacency matrix A or its variations, sparse scipy matrix 63 | # d: dimensionality 64 | # r different high-order proximity: 65 | # order: 1 x r vector, order of the proximity 66 | # weights: 1 x r list, each containing the weights for one high-order proximity 67 | # return: 1 x r list, each containing the embedding vectors 68 | A = A.asfptype() 69 | lambd, X = Eigen_TopL(A, d) 70 | r = len(order) 71 | U_output, V_output = [], [] 72 | for i in range(r): 73 | U_temp, V_temp = Shift_Embedding(lambd, X, order[i], weights[i], d) 74 | U_output.append(U_temp) 75 | V_output.append(V_temp) 76 | return U_output, V_output 77 | 78 | --------------------------------------------------------------------------------