├── KDD18AROPE.pdf
├── MATLAB
    ├── SampleRun.m
    ├── AROPE.m
    ├── Shift_Embedding.m
    ├── Eigen_TopL.m
    ├── Eigen_Reweighting.m
    └── Precision_Np.m
├── python
    ├── Sample_Run.py
    ├── eval.py
    └── utils.py
└── README.md


/KDD18AROPE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ZW-ZHANG/AROPE/HEAD/KDD18AROPE.pdf


--------------------------------------------------------------------------------
/MATLAB/SampleRun.m:
--------------------------------------------------------------------------------
 1 | % Refer to AROPE.m for details
 2 | 
 3 | % A sample run is as follows:
 4 | 
 5 | edge_list = load('BlogCatalog.csv');
 6 | A = sparse(edge_list(:,1),edge_list(:,2),1,max(max(edge_list)),max(max(edge_list)));
 7 | A = A + A';
 8 | order = [1,2,3,-1];
 9 | weights = cell(4,1);
10 | weights{1} = 1;
11 | weights{2} = [1,0.1];
12 | weights{3} = [1,0.1,0.01];
13 | weights{4} = 0.001;
14 | [U_cell,V_cell] = AROPE(A,128,order,weights);
15 | % Network Reconstruction
16 | for i = 1:4
17 |     results = Precision_Np(A,sparse(max(max(edge_list)),max(max(edge_list))),U_cell{i},V_cell{i},1e6);
18 |     figure(i);
19 |     semilogx(1:1e6,results);
20 | end


--------------------------------------------------------------------------------
/MATLAB/AROPE.m:
--------------------------------------------------------------------------------
 1 | function [U_output, V_output] = AROPE(A,d,order,weights)
 2 | % AROPE Algortihm
 3 | % Inputs: 
 4 | % A: adjacency matrix A or its variations
 5 | % d: dimensionality 
 6 | % r different high-order proximity:
 7 |     % order: 1 x r vector, order of the proximity
 8 |     % weights: 1 x r cell, each containing the weights for one high-order proximity
 9 | % Outputs: 1 x r cell, each containing the embedding vectors 
10 | [lambda,X] = Eigen_TopL(A,d);
11 | r = length(order);
12 | U_output = cell(r,1);
13 | V_output = cell(r,1);
14 | for i = 1:r
15 |     [U_output{i},V_output{i}] = Shift_Embedding(lambda,X,order(i),weights{i},d);
16 | end
17 | 
18 | end


--------------------------------------------------------------------------------
/MATLAB/Shift_Embedding.m:
--------------------------------------------------------------------------------
 1 | function [U,V] = Shift_Embedding(lambda,X,order,coef,d)
 2 | % lambda,X: top-L eigen-decomposition 
 3 | % order: a number indicating the order
 4 | % coef: a vector of length order, indicating the weights for each order
 5 | % d: preset embedding dimension
 6 | % return: content/context embedding vectors
 7 | lambda_H = Eigen_Reweighting(lambda,order,coef);      % High-order transform
 8 | [~,temp_index] = sort(abs(lambda_H),'descend');       % select top-d
 9 | temp_index = temp_index(1:d);
10 | lambda_H = lambda_H(temp_index);
11 | U = X(:,temp_index) * diag(sqrt(abs(lambda_H)));      % Calculate embedding
12 | V = X(:,temp_index) * diag(sqrt(abs(lambda_H)) .* sign(lambda_H));
13 | 
14 | end


--------------------------------------------------------------------------------
/MATLAB/Eigen_TopL.m:
--------------------------------------------------------------------------------
 1 | function [lambda,X] = Eigen_TopL(A,d)
 2 | % A: N x N symmetric sparse adjacency matrix
 3 | % d: preset dimension
 4 | % return top-L eigen-decomposition of A containing at least d positive eigenvalues
 5 | if ~ issymmetric(A)
 6 |     error('The matrix is not symmetric!');
 7 | end
 8 | L = d + 10;          
 9 | while 1         % can be improved to reduce redundant calculation if L <= 2d not hold
10 |     L = L + d;
11 |     [X,lambda] = eigs(A,L);
12 |     lambda = diag(lambda);
13 |     if (sum(lambda > 0) >= d)
14 |         break;
15 |     end
16 | end
17 | % only select top-L
18 | [~,temp_index] = sort(abs(lambda),'descend');  
19 | lambda = lambda(temp_index);
20 | temp_max = find(cumsum(lambda > 0) >= d);
21 | lambda = lambda(1:temp_max(1));
22 | temp_index = temp_index(1:temp_max(1));
23 | X = X(:,temp_index);
24 | end


--------------------------------------------------------------------------------
/python/Sample_Run.py:
--------------------------------------------------------------------------------
 1 | # Sample run on BlogCatalog
 2 | import numpy as np
 3 | import pandas as pd
 4 | from scipy.sparse import csr_matrix
 5 | 
 6 | import utils
 7 | from eval import Precision_Np
 8 | 
 9 | if __name__ == '__main__':
10 | 
11 |     data = pd.read_csv('BlogCatalog.csv')      
12 |     data = np.array(data) - 1                       # change index from 0
13 |     N = np.max(np.max(data)) + 1
14 |     A = csr_matrix((np.ones(data.shape[0]), (data[:,0],data[:,1])), shape = (N,N))
15 |     A += A.T
16 | 
17 |     order = [1,2,3,-1]
18 |     weights = []
19 |     weights.append([1])
20 |     weights.append([1,0.1])
21 |     weights.append([1,0.1,0.01])
22 |     weights.append([0.001])
23 |     U_list,V_list = utils.AROPE(A,128,order,weights)
24 |     # Network Reconstruction
25 |     results = [Precision_Np(A,csr_matrix((N,N)),U_list[i],V_list[i],1e6) for i in range(4)]
26 |     
27 |     


--------------------------------------------------------------------------------
/MATLAB/Eigen_Reweighting.m:
--------------------------------------------------------------------------------
 1 | function X_H = Eigen_Reweighting(X,order,coef)
 2 | % X: original eigenvalues
 3 | % order: order, -1 stands for infinity
 4 | % coef: weights, decaying constant if order = -1
 5 |     if (order == -1)  % infinity
 6 |         if (length(coef) == 1)
 7 |             if (max(abs(X)) * coef < 1)
 8 |               X_H = X ./ (1 - coef * X);
 9 |             else
10 |                 error('Decaying constant too large.');
11 |             end
12 |         else
13 |             error('Eigen_Reweighting wrong.');
14 |         end
15 |     else
16 |         if (length(coef) == order)
17 |             X_H = coef(1) * X;
18 |             X_temp = X;
19 |             for i = 2:order
20 |                 X_temp = X_temp .* X;
21 |                 X_H = X_H  + coef(i) * X_temp;
22 |             end
23 |         else
24 |             error('Eigen_Reweighting wrong.');
25 |         end
26 |     end
27 | end
28 | 


--------------------------------------------------------------------------------
/python/eval.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | 
 4 | def Precision_Np(Matrix_test,Matrix_train,U,V,Np):
 5 | # Matrix_test is n x n testing matrix, may overlap with Matrix_train
 6 | # Matrix_train is n x n training matrix
 7 | # U/V are content/context embedding vectors
 8 | # Np: returns Precision@Np for pairwise similarity 
 9 |     N, _ = U.shape
10 |     assert N < 30000, 'Network too large. Sample suggested.'
11 |     Sim = U.dot(V.T)
12 |     temp_row, temp_col = np.nonzero(Sim)
13 |     temp_value = Sim[temp_row,temp_col]
14 |     temp_choose = np.logical_and(np.array(Matrix_train[temp_row,temp_col])[0] == 0, temp_row != temp_col)
15 |     temp_row, temp_col, temp_value = temp_row[temp_choose], temp_col[temp_choose], temp_value[temp_choose]
16 |     temp_index = np.argsort(temp_value)[::-1]
17 |     assert len(temp_index) >= Np, 'Np too large'  
18 |     temp_index = temp_index[: int(Np)+1]
19 |     temp_row, temp_col = temp_row[temp_index], temp_col[temp_index]
20 |     result = np.array(Matrix_test[temp_row,temp_col])[0] > 0
21 |     result = np.divide(np.cumsum(result > 0), np.array(range(len(result))) + 1)
22 |     return result


--------------------------------------------------------------------------------
/MATLAB/Precision_Np.m:
--------------------------------------------------------------------------------
 1 | function result = Precision_Np(Matrix_test,Matrix_train,U,V,Np)
 2 | % Matrix_test is n x n testing matrix, may overlap with Matrix_train
 3 | % Matrix_train is n x n training matrix
 4 | % U/V are content/context embedding vectors
 5 | % Np: returns Precision@Np for pairwise similarity 
 6 | [N,~] = size(U);
 7 | if (N > 30000)
 8 |     error('Network too large. Sample suggested.');
 9 | else
10 |     Sim = U * V';
11 |     [temp_row,temp_col,temp_value] = find(Sim);
12 |     clear Sim;
13 | end
14 | temp_choose = (Matrix_train(sub2ind([N,N],temp_row,temp_col)) == 0) & (temp_row ~= temp_col);
15 | temp_row = temp_row(temp_choose);
16 | temp_col = temp_col(temp_choose);
17 | temp_value = temp_value(temp_choose);
18 | clear temp_choose;
19 | [~,temp_index] = sort(temp_value,'descend');
20 | if length(temp_index) < Np
21 |     error('Np too large');
22 | end
23 | temp_index = temp_index(1:Np);
24 | clear temp_value;
25 | temp_row = temp_row(temp_index);
26 | temp_col = temp_col(temp_index);
27 | clear temp_index;
28 | result = Matrix_test(sub2ind([N,N],temp_row,temp_col)) > 0;
29 | result = cumsum(result > 0) ./ (1:length(result))';
30 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AROPE
 2 | This is the official implementation of "[Arbitrary-Order Proximity Preserved Network Embedding](http://cuip.thumedialab.com/papers/NE-ArbitraryProximity.pdf)"(KDD 2018).
 3 | 
 4 | We provide two implementations: MATLAB and Python. Note that the MATLAB version is faster in our testing and is used in producing original results in the paper.
 5 | 
 6 | ### Requirements
 7 | ```
 8 | MATLAB R2017a
 9 | or 
10 | Python >= 3.5.2
11 | numpy >= 1.14.2
12 | scipy >= 1.0.0
13 | pandas >= 0.22.0
14 | ``` 
15 | 
16 | ### Usage
17 | #### Main Function
18 | ```
19 | [U_output, V_output] = AROPE(A,d,order,weights)
20 | ```
21 | ```
22 | Input:
23 |     A: sparse adjacency matrix or its variations, must be symmetric
24 |     d: dimensionality 
25 |     order: 1 x r vector, order of the proximity
26 |     weights: 1 x r cell/list, each containing the weights for one high-order proximity
27 | Output:
28 |     U_output/V_output: 1 x r cell/list, each containing one content/context embedding vectors 
29 | ```
30 | #### Example Usage
31 | See SampleRun.m or SampleRun.py for a sample run of network reconstruction on BlogCatalog dataset
32 | 
33 | ### Cite
34 | If you find this code useful, please cite our paper:
35 | ```
36 | @inproceedings{zhang2018arbitrary,
37 |   title={Arbitrary-Order Proximity Preserved Network Embedding},
38 |   author={Zhang, Ziwei and Cui, Peng and Wang, Xiao and Pei, Jian and Yao, Xuanrong and Zhu, Wenwu},
39 |   booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery \& Data Mining},
40 |   pages={2778--2786},
41 |   year={2018},
42 |   organization={ACM}
43 | }
44 | ```


--------------------------------------------------------------------------------
/python/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import numpy as np
 3 | from scipy.sparse.linalg import eigs
 4 | 
 5 | def Eigen_Reweighting(X,order,coef):
 6 | # X: original eigenvalues
 7 | # order: order, -1 stands for infinity
 8 | # coef: weights, decaying constant if order = -1
 9 | # return: reweighted eigenvalues
10 |     if order == -1:     # infinity
11 |         assert len(coef) == 1, 'Eigen_Reweighting wrong.'
12 |         coef = coef[0]
13 |         assert np.max(np.absolute(X)) * coef < 1, 'Decaying constant too large.'
14 |         X_H = np.divide(X, 1 - coef * X)
15 |     else:
16 |         assert len(coef) == order, 'Eigen_Reweighting wrong.'
17 |         X_H = coef[0] * X
18 |         X_temp = X
19 |         for i in range(1,order):
20 |             X_temp = np.multiply(X_temp,X)
21 |             X_H += coef[i] * X_temp
22 |     return X_H
23 | 
24 | 
25 | def Eigen_TopL(A, d):
26 | # A: N x N symmetric sparse adjacency matrix
27 | # d: preset dimension
28 | # return: top-L eigen-decomposition of A containing at least d positive eigenvalues
29 |     # assert np.all(A.T == A), 'The matrix is not symmetric!'
30 |     L = d + 10
31 |     lambd = np.array([0])
32 |     while sum(lambd > 0) < d:         # can be improved to reduce redundant calculation if L <= 2d + 10 not hold
33 |         L = L + d
34 |         lambd, X = eigs(A, L)
35 |         lambd, X = lambd.real, X.real
36 |         # only select top-L
37 |     temp_index = np.absolute(lambd).argsort()[::-1]
38 |     lambd = lambd[temp_index]
39 |     temp_max, = np.where(np.cumsum(lambd > 0) >= d)
40 |     lambd, temp_index = lambd[:temp_max[0]+1], temp_index[:temp_max[0]+1]
41 |     X = X[:,temp_index]
42 |     return lambd, X
43 | 
44 | 
45 | def Shift_Embedding(lambd, X, order, coef, d):
46 | # lambd, X: top-L eigen-decomposition 
47 | # order: a number indicating the order
48 | # coef: a vector of length order, indicating the weights for each order
49 | # d: preset embedding dimension
50 | # return: content/context embedding vectors
51 |     lambd_H = Eigen_Reweighting(lambd,order,coef)             # High-order transform
52 |     temp_index = np.absolute(lambd_H).argsort()[::-1]         # select top-d
53 |     temp_index = temp_index[:d+1]
54 |     lambd_H = lambd_H[temp_index]
55 |     lambd_H_temp = np.sqrt(np.absolute(lambd_H))
56 |     U = np.dot(X[:,temp_index], np.diag(lambd_H_temp))        # Calculate embedding
57 |     V = np.dot(X[:,temp_index], np.diag(np.multiply(lambd_H_temp, np.sign(lambd_H))))
58 |     return U, V
59 | 
60 |     
61 | def AROPE(A, d, order, weights):
62 | # A: adjacency matrix A or its variations, sparse scipy matrix
63 | # d: dimensionality 
64 | # r different high-order proximity:
65 |     # order: 1 x r vector, order of the proximity
66 |     # weights: 1 x r list, each containing the weights for one high-order proximity
67 | # return: 1 x r list, each containing the embedding vectors 
68 |     A = A.asfptype()
69 |     lambd, X = Eigen_TopL(A, d)
70 |     r = len(order)
71 |     U_output, V_output = [], []
72 |     for i in range(r):
73 |         U_temp, V_temp = Shift_Embedding(lambd, X, order[i], weights[i], d)
74 |         U_output.append(U_temp)
75 |         V_output.append(V_temp)
76 |     return U_output, V_output
77 | 
78 | 


--------------------------------------------------------------------------------