├── code ├── yelpWWWCLDynamic.m ├── largeMatrixProduct.m ├── utils │ ├── hungarian.m │ ├── bestMap.m │ ├── micro_macro_PR.m │ ├── precision_recall.m │ ├── MutualInfo.m │ ├── EuDist2.m │ ├── NormalizeFea.m │ ├── NormalizeAdj.m │ ├── AccMeasure.m │ ├── litekmeans.m │ └── constructW.m ├── .ipynb_checkpoints │ └── DataProcessor-checkpoint.ipynb ├── DHINOnline.m ├── yelpCL.m ├── aminerLP.m ├── DHINOffline.m ├── yelpWWWLP.m ├── yelpWWWCL.m ├── DynamciHIN4NR.m ├── aminerCL.m ├── dblpCLDynamic.m ├── DHINUpdate.m~ ├── DHINUpdate.m ├── dblpCL.m ├── dblpLP.m ├── DynamciHIN.m ├── DHIN.m ├── DataHelper.py ├── evaluateYelpWWW.py ├── evaluateAminer.py └── evaluateDBLP.py ├── data └── yelp │ ├── bsb_csr.mat │ ├── brurb_csr.mat │ └── business_category.txt └── README.md /code/yelpWWWCLDynamic.m: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/largeMatrixProduct.m: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/utils/hungarian.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootlu/DyHNE/HEAD/code/utils/hungarian.m -------------------------------------------------------------------------------- /data/yelp/bsb_csr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootlu/DyHNE/HEAD/data/yelp/bsb_csr.mat -------------------------------------------------------------------------------- /data/yelp/brurb_csr.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rootlu/DyHNE/HEAD/data/yelp/brurb_csr.mat -------------------------------------------------------------------------------- /code/.ipynb_checkpoints/DataProcessor-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DyHNE 2 | Code and dataset for IEEE TKDE paper "Dynamic Heterogeneous Information Network Embedding with Meta-path based Proximity" 3 | 4 | 5 | DBLP and AMiner datasets can be downloaded from [BaiduYun](https://pan.baidu.com/s/1kRoEk4XxYXQbJ_Ef7vkFSw) (Extraction code:77pr) 6 | 7 | 8 | More detailed description about coda and data will be available as soon as possiable. 9 | -------------------------------------------------------------------------------- /code/DHINOnline.m: -------------------------------------------------------------------------------- 1 | function [embedding, new_eigenvector, new_eigenvalue, new_A] = DHINOnline(U, Lambda, Delta_L, Delta_D, Delta_M, A, B, k, gamma) 2 | 3 | old_eigenvalue_tmp = diag(Lambda)'; 4 | old_eigenvalue = old_eigenvalue_tmp; 5 | old_eigenvector = U; 6 | [Delta_eigenvalue,Delta_eigenvector,new_A] = DHINUpdate(old_eigenvalue, old_eigenvector, Delta_L, Delta_D, Delta_M, A, B, gamma); 7 | new_eigenvector = old_eigenvector + Delta_eigenvector; 8 | new_eigenvalue = old_eigenvalue + Delta_eigenvalue; 9 | 10 | embedding = NormalizeFea(new_eigenvector(:,2:k+1)); 11 | -------------------------------------------------------------------------------- /code/yelpCL.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | load ./data/yelp/bsb_csr.mat; 5 | load ./data/yelp/bcb_csr.mat; 6 | load ./data/yelp/brurb_csr.mat; 7 | 8 | W_bsb = bsb_csr; 9 | W_bcb = bcb_csr; 10 | W_brurb = brurb_csr; 11 | 12 | k = 100; 13 | gamma = 1; 14 | 15 | % obtain diagonal and laplacian matrix 16 | t1=clock; 17 | W_unify = W_bsb+W_bcb+W_brurb; 18 | dunify = sum(W_unify,2); 19 | D_unify = diag(dunify); 20 | L_unify = D_unify- W_unify; 21 | W_unify = NormalizeAdj(W_unify,0,2); 22 | M_unify = (speye(size(W_unify,1)) - W_unify)' * (speye(size(W_unify,1)) - W_unify); 23 | 24 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify + M_unify, D_unify,k); 25 | save ./data/yelp/result/unify_bsb+bcb+brurb_embedding.mat unify_embedding; 26 | 27 | t2=clock; 28 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 29 | -------------------------------------------------------------------------------- /code/utils/bestMap.m: -------------------------------------------------------------------------------- 1 | function [newL2] = bestMap(L1,L2) 2 | %bestmap: permute labels of L2 to match L1 as good as possible 3 | % [newL2] = bestMap(L1,L2); 4 | % 5 | % version 2.0 --May/2007 6 | % version 1.0 --November/2003 7 | % 8 | % Written by Deng Cai (dengcai AT gmail.com) 9 | 10 | 11 | %=========== 12 | 13 | L1 = L1(:); 14 | L2 = L2(:); 15 | if size(L1) ~= size(L2) 16 | error('size(L1) must == size(L2)'); 17 | end 18 | 19 | Label1 = unique(L1); 20 | nClass1 = length(Label1); 21 | Label2 = unique(L2); 22 | nClass2 = length(Label2); 23 | 24 | nClass = max(nClass1,nClass2); 25 | G = zeros(nClass); 26 | for i=1:nClass1 27 | for j=1:nClass2 28 | G(i,j) = length(find(L1 == Label1(i) & L2 == Label2(j))); 29 | end 30 | end 31 | 32 | [c,t] = hungarian(-G); 33 | newL2 = zeros(size(L2)); 34 | for i=1:nClass2 35 | newL2(L2 == Label2(i)) = Label1(c(i)); 36 | end 37 | 38 | -------------------------------------------------------------------------------- /code/aminerLP.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | load ./data/aminer_lp/apa_csr_lp.mat; 5 | load ./data/aminer_lp/apcpa_csr_lp.mat; 6 | load ./data/aminer_lp/aptpa_csr_lp.mat; 7 | 8 | W_apa = apa_csr_lp; 9 | W_apcpa = apcpa_csr_lp; 10 | W_aptpa = aptpa_csr_lp; 11 | 12 | k = 100; 13 | gamma = 1; 14 | 15 | t1=clock; 16 | W_unify = W_apa; 17 | dunify = sum(W_unify,2); 18 | D_unify = diag(dunify); 19 | L_unify = D_unify- W_unify; 20 | W_unify= NormalizeAdj(W_unify,0,2); 21 | clear W_apa W_apcpa W_aptpa apa_csr apcpa_csr aptpa_csr; 22 | H = eye(size(W_unify,1)) - W_unify; 23 | clear W_unify 24 | M_unify = H'*H; 25 | X = L_unify+M_unify; 26 | clear M_unify H; 27 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(X, D_unify,k); 28 | 29 | t2=clock; 30 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 31 | 32 | save ./data/aminer_lp/result/unify_apa_embedding_lp.mat unify_embedding; 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /code/DHINOffline.m: -------------------------------------------------------------------------------- 1 | function [embedding, U, Lambda] = DHINOffline(L, D, k) 2 | %% Input 3 | % Dx -- diagonal matrix of the attribute similarity matrix 4 | % Lx -- laplacian matrix of the attribute similarity matrix 5 | % Da -- diagonal matrix of the network adjacency matrix 6 | % La -- laplacian matrix of the network adjacency matrix 7 | % k -- intermediate embedding dimension 8 | % l -- final embedding dimension 9 | 10 | 11 | n = size(D,1); 12 | 13 | epsilon = 1e-6; 14 | 15 | opts.v0 = rand(n,1); 16 | % opts.tol = 1e-3; 17 | % t1=clock; 18 | [U,Lambda] = eigs(L+epsilon*eye(n),D+epsilon*eye(n),k+1,'smallestreal'); 19 | % [U,Lambda] = eigs(L,D,k+1,'smallestreal',opts); 20 | 21 | % [U,Lambda] = eig(L+epsilon*eye(n),D+epsilon*eye(n)); 22 | 23 | % L = single(L+epsilon*eye(n)); 24 | % D = single(D+epsilon*eye(n)); 25 | % L = gpuArray(L); 26 | % D = gpuArray(D); 27 | % U = gather(U); 28 | % Lambda = gather(Lambda); 29 | 30 | % t2=clock 31 | % fprintf('Time for eigs: %f s \n', etime(t2,t1)) 32 | 33 | embedding = NormalizeFea(U(:,2:k+1)); 34 | U = U(:,1:k+1); 35 | Lambda = Lambda(:,1:k+1); 36 | 37 | end -------------------------------------------------------------------------------- /code/utils/micro_macro_PR.m: -------------------------------------------------------------------------------- 1 | function [micro, macro] = micro_macro_PR(pred_label,orig_label) 2 | %computer micro and macro: precision, recall and fscore 3 | mat=confusionmat(orig_label, pred_label); 4 | len=size(mat,1); 5 | macroTP=zeros(len,1); 6 | macroFP=zeros(len,1); 7 | macroFN=zeros(len,1); 8 | macroP=zeros(len,1); 9 | macroR=zeros(len,1); 10 | macroF=zeros(len,1); 11 | for i=1:len 12 | macroTP(i)=mat(i,i); 13 | macroFP(i)=sum(mat(:, i))-mat(i,i); 14 | macroFN(i)=sum(mat(i,:))-mat(i,i); 15 | macroP(i)=macroTP(i)/(macroTP(i)+macroFP(i)); 16 | macroR(i)=macroTP(i)/(macroTP(i)+macroFN(i)); 17 | macroF(i)=2*macroP(i)*macroR(i)/(macroP(i)+macroR(i)); 18 | if isnan(macroF(i)) 19 | macroF(i) = 0; 20 | end 21 | end 22 | macro.precision=mean(macroP); 23 | macro.recall=mean(macroR); 24 | macro.fscore=mean(macroF); 25 | 26 | micro.precision=sum(macroTP)/(sum(macroTP)+sum(macroFP)); 27 | micro.recall=sum(macroTP)/(sum(macroTP)+sum(macroFN)); 28 | micro.fscore=2*micro.precision*micro.recall/(micro.precision+micro.recall); 29 | end 30 | 31 | -------------------------------------------------------------------------------- /code/utils/precision_recall.m: -------------------------------------------------------------------------------- 1 | function eval = precision_recall(ACTUAL,PREDICTED) 2 | % This fucntion evaluates the performance of a classification model by 3 | % calculating the common performance measures: Accuracy, Sensitivity, 4 | % Specificity, Precision, Recall, F-Measure, G-mean. 5 | % Input: ACTUAL = Column matrix with actual class labels of the training 6 | % examples 7 | % PREDICTED = Column matrix with predicted class labels by the 8 | % classification model 9 | % Output: EVAL = Row matrix with all the performance measures 10 | 11 | 12 | idx = (ACTUAL()==1); 13 | 14 | p = length(ACTUAL(idx)); 15 | n = length(ACTUAL(~idx)); 16 | N = p+n; 17 | 18 | tp = sum(ACTUAL(idx)==PREDICTED(idx)); 19 | tn = sum(ACTUAL(~idx)==PREDICTED(~idx)); 20 | fp = n-tn; 21 | fn = p-tp; 22 | 23 | tp_rate = tp/p; 24 | tn_rate = tn/n; 25 | 26 | accuracy = (tp+tn)/N; 27 | sensitivity = tp_rate; 28 | specificity = tn_rate; 29 | precision = tp/(tp+fp); 30 | recall = sensitivity; 31 | f_measure = 2*((precision*recall)/(precision + recall)); 32 | gmean = sqrt(tp_rate*tn_rate); 33 | 34 | eval = [accuracy sensitivity specificity precision recall f_measure gmean]; 35 | end -------------------------------------------------------------------------------- /code/yelpWWWLP.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)) 3 | 4 | load ./data/yelpWWW_lp/bsb_csr.mat; 5 | load ./data/yelpWWW_lp/brurb_csr.mat; 6 | 7 | W_bsb = bsb_csr; 8 | W_brurb = brurb_csr; 9 | 10 | k = 100; 11 | gamma = 1; 12 | 13 | t1=clock; 14 | W_unify = W_bsb+W_brurb; 15 | dunify = sum(W_unify,2); 16 | D_unify = diag(dunify); 17 | L_unify = D_unify- W_unify; 18 | W_unify = NormalizeAdj(W_unify,0,2); 19 | M_unify = (speye(size(W_unify,1)) - W_unify)' * (speye(size(W_unify,1)) - W_unify); 20 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify, D_unify, k); 21 | save ./data/yelpWWW_lp/result/unify_bsb+brurb_embedding_1st.mat unify_embedding; 22 | t2=clock; 23 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 24 | 25 | % for i = 1:10 26 | % t1=clock; 27 | % W_unify = (i/10)*W_bsb+(1-i/10)*W_brurb; 28 | % dunify = sum(W_unify,2); 29 | % D_unify = diag(dunify); 30 | % L_unify = D_unify- W_unify; 31 | % W_unify = NormalizeAdj(W_unify,0,2); 32 | % M_unify = (speye(size(W_unify,1)) - W_unify)' * (speye(size(W_unify,1)) - W_unify); 33 | % [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify + M_unify, D_unify,k); 34 | % data_name = ['./data/yelpWWW_lp/result/unify_',num2str(i/10),'bsb+',num2str(1-i/10),'brurb_embedding.mat']; 35 | % save (data_name,'unify_embedding'); 36 | % 37 | % t2=clock; 38 | % fprintf('Time for static model: %f s \n', etime(t2,t1)) 39 | % end -------------------------------------------------------------------------------- /code/utils/MutualInfo.m: -------------------------------------------------------------------------------- 1 | function MIhat = MutualInfo(L1,L2) 2 | % mutual information 3 | % 4 | % version 2.0 --May/2007 5 | % version 1.0 --November/2003 6 | % 7 | % Written by Deng Cai (dengcai AT gmail.com) 8 | %=========== 9 | L1 = L1(:); 10 | L2 = L2(:); 11 | if size(L1) ~= size(L2) 12 | error('size(L1) must == size(L2)'); 13 | end 14 | 15 | Label = unique(L1); 16 | nClass = length(Label); 17 | 18 | Label2 = unique(L2); 19 | nClass2 = length(Label2); 20 | if nClass2 < nClass 21 | % smooth 22 | L1 = [L1; Label]; 23 | L2 = [L2; Label]; 24 | elseif nClass2 > nClass 25 | % smooth 26 | L1 = [L1; Label2]; 27 | L2 = [L2; Label2]; 28 | end 29 | 30 | 31 | G = zeros(nClass); 32 | for i=1:nClass 33 | for j=1:nClass 34 | G(i,j) = sum(L1 == Label(i) & L2 == Label(j)); 35 | end 36 | end 37 | sumG = sum(G(:)); 38 | 39 | P1 = sum(G,2); P1 = P1/sumG; 40 | P2 = sum(G,1); P2 = P2/sumG; 41 | if sum(P1==0) > 0 || sum(P2==0) > 0 42 | % smooth 43 | error('Smooth fail!'); 44 | else 45 | H1 = sum(-P1.*log2(P1)); 46 | H2 = sum(-P2.*log2(P2)); 47 | P12 = G/sumG; 48 | PPP = P12./repmat(P2,nClass,1)./repmat(P1,1,nClass); 49 | PPP(abs(PPP) < 1e-12) = 1; 50 | MI = sum(P12(:) .* log2(PPP(:))); 51 | MIhat = MI / max(H1,H2); 52 | %%%%%%%%%%%%% why complex ? %%%%%%%% 53 | MIhat = real(MIhat); 54 | end 55 | 56 | 57 | -------------------------------------------------------------------------------- /code/yelpWWWCL.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)) 3 | 4 | load ./data/yelpWWW/bsb_csr.mat; 5 | load ./data/yelpWWW/brkrb_csr.mat; 6 | load ./data/yelpWWW/brurb_csr.mat; 7 | 8 | W_bsb = bsb_csr; 9 | W_brkrb = brkrb_csr; 10 | W_brurb = brurb_csr; 11 | 12 | k = 100; 13 | gamma = 1; 14 | 15 | t1=clock; 16 | W_unify = 0.4*W_bsb+0.6*W_brurb; 17 | dunify = sum(W_unify,2); 18 | D_unify = diag(dunify); 19 | L_unify = D_unify- W_unify; 20 | W_unify = NormalizeAdj(W_unify,0,2); 21 | M_unify = (speye(size(W_unify,1)) - W_unify)' * (speye(size(W_unify,1)) - W_unify); 22 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(M_unify, D_unify,k); 23 | save ./data/yelpWWW/result/unify_0.4bsb+0.6brurb_embedding_2nd.mat unify_embedding; 24 | t2=clock; 25 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 26 | 27 | % for i = 1:10 28 | % t1=clock; 29 | % W_unify = (i/10)*W_bsb+(1-i/10)*W_brurb; 30 | % dunify = sum(W_unify,2); 31 | % D_unify = diag(dunify); 32 | % L_unify = D_unify- W_unify; 33 | % W_unify = NormalizeAdj(W_unify,0,2); 34 | % M_unify = (speye(size(W_unify,1)) - W_unify)' * (speye(size(W_unify,1)) - W_unify); 35 | % [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify + M_unify, D_unify,k); 36 | % data_name = ['./data/yelpWWW/result/unify_',num2str(i/10),'bsb+',num2str(1-i/10),'brurb_embedding.mat']; 37 | % save (data_name,'unify_embedding'); 38 | % 39 | % t2=clock; 40 | % fprintf('Time for static model: %f s \n', etime(t2,t1)) 41 | % end -------------------------------------------------------------------------------- /code/utils/EuDist2.m: -------------------------------------------------------------------------------- 1 | function D = EuDist2(fea_a,fea_b,bSqrt) 2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the 3 | %Matlab matrix operations. 4 | % 5 | % D = EuDist(fea_a,fea_b) 6 | % fea_a: nSample_a * nFeature 7 | % fea_b: nSample_b * nFeature 8 | % D: nSample_a * nSample_a 9 | % or nSample_a * nSample_b 10 | % 11 | % Examples: 12 | % 13 | % a = rand(500,10); 14 | % b = rand(1000,10); 15 | % 16 | % A = EuDist2(a); % A: 500*500 17 | % D = EuDist2(a,b); % D: 500*1000 18 | % 19 | % version 2.1 --November/2011 20 | % version 2.0 --May/2009 21 | % version 1.0 --November/2005 22 | % 23 | % Written by Deng Cai (dengcai AT gmail.com) 24 | 25 | 26 | if ~exist('bSqrt','var') 27 | bSqrt = 1; 28 | end 29 | 30 | if (~exist('fea_b','var')) || isempty(fea_b) 31 | aa = sum(fea_a.*fea_a,2); 32 | ab = fea_a*fea_a'; 33 | 34 | if issparse(aa) 35 | aa = full(aa); 36 | end 37 | 38 | D = bsxfun(@plus,aa,aa') - 2*ab; 39 | D(D<0) = 0; 40 | if bSqrt 41 | D = sqrt(D); 42 | end 43 | D = max(D,D'); 44 | else 45 | aa = sum(fea_a.*fea_a,2); 46 | bb = sum(fea_b.*fea_b,2); 47 | ab = fea_a*fea_b'; 48 | 49 | if issparse(aa) 50 | aa = full(aa); 51 | bb = full(bb); 52 | end 53 | 54 | D = bsxfun(@plus,aa,bb') - 2*ab; 55 | D(D<0) = 0; 56 | if bSqrt 57 | D = sqrt(D); 58 | end 59 | end 60 | 61 | -------------------------------------------------------------------------------- /code/utils/NormalizeFea.m: -------------------------------------------------------------------------------- 1 | function fea = NormalizeFea(fea,row) 2 | % if row == 1, normalize each row of fea to have unit norm; 3 | % if row == 0, normalize each column of fea to have unit norm; 4 | % 5 | % version 3.0 --Jan/2012 6 | % version 2.0 --Jan/2012 7 | % version 1.0 --Oct/2003 8 | % 9 | % Written by Deng Cai (dengcai AT gmail.com) 10 | % 11 | 12 | if ~exist('row','var') 13 | row = 1; 14 | end 15 | 16 | if row 17 | nSmp = size(fea,1); 18 | feaNorm = max(1e-14,full(sum(fea.^2,2))); 19 | fea = spdiags(feaNorm.^-.5,0,nSmp,nSmp)*fea; 20 | else 21 | nSmp = size(fea,2); 22 | feaNorm = max(1e-14,full(sum(fea.^2,1))'); 23 | fea = fea*spdiags(feaNorm.^-.5,0,nSmp,nSmp); 24 | end 25 | 26 | return; 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | if row 35 | [nSmp, mFea] = size(fea); 36 | if issparse(fea) 37 | fea2 = fea'; 38 | feaNorm = mynorm(fea2,1); 39 | for i = 1:nSmp 40 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 41 | end 42 | fea = fea2'; 43 | else 44 | feaNorm = sum(fea.^2,2).^.5; 45 | fea = fea./feaNorm(:,ones(1,mFea)); 46 | end 47 | else 48 | [mFea, nSmp] = size(fea); 49 | if issparse(fea) 50 | feaNorm = mynorm(fea,1); 51 | for i = 1:nSmp 52 | fea(:,i) = fea(:,i) ./ max(1e-10,feaNorm(i)); 53 | end 54 | else 55 | feaNorm = sum(fea.^2,1).^.5; 56 | fea = fea./feaNorm(ones(1,mFea),:); 57 | end 58 | end 59 | 60 | 61 | -------------------------------------------------------------------------------- /code/utils/NormalizeAdj.m: -------------------------------------------------------------------------------- 1 | % function A = NormalizeAdj(A,row) 2 | % % if row == 1, normalize each row of A to have unit norm; 3 | % % if row == 0, normalize each column of A to have unit norm; 4 | % 5 | % 6 | % if ~exist('row','var') 7 | % row = 1; 8 | % end 9 | % 10 | % if row 11 | % % A = A./repmat(sqrt(sum(A.^2,2)),1,size(A,2)); 12 | % A = A./repmat(sum(A,2),1,size(A,2)); 13 | % 14 | % else 15 | % % A = A./repmat(sqrt(sum(A.^2,1)),size(A,1),1); 16 | % A = A./repmat(sum(A,1),size(A,1),1); 17 | % end 18 | % 19 | % return; 20 | 21 | function [normMat] = NormalizeAdj(oriMat, isSqrt, type) 22 | %normlized the matrix in terms of row or col 23 | %oriMat is the original matrix 24 | %isSqrt means whether sqrt normlization 25 | %type is the normalized type. 1 is the column normalization; 2 is the row 26 | %normalization. 27 | %normMat is the normalized matrix. 28 | 29 | if(type == 1) 30 | %normalization according to column 31 | sumVect = sum(oriMat,1); 32 | if(isSqrt) 33 | sumVect = power(sumVect,0.5); 34 | end 35 | sumVect = 1./sumVect; 36 | len = length(sumVect); 37 | l = linspace(1,len,len); 38 | diagMat = spconvert([l',l',sumVect']); 39 | normMat = oriMat * diagMat; 40 | else 41 | %normalization according to row 42 | sumVect = sum(oriMat,2); 43 | if(isSqrt) 44 | sumVect = power(sumVect,0.5); 45 | end 46 | sumVect = 1./sumVect; 47 | len = length(sumVect); 48 | l = linspace(1,len,len); 49 | diagMat = spconvert([l',l',sumVect]); 50 | normMat = diagMat * oriMat; 51 | end 52 | 53 | end 54 | 55 | -------------------------------------------------------------------------------- /code/DynamciHIN4NR.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | load ./data/dblp_nr/pc_csr_nr.mat; 5 | load ./data/dblp_nr/apc_csr_nr.mat; 6 | load ./data/dblp_nr/apa_csr_nr.mat; 7 | load ./data/dblp_nr/apcpa_csr_nr.mat; 8 | load ./data/dblp_nr/aptpa_csr_nr.mat; 9 | 10 | W_pc = pc_csr_nr; 11 | W_apc = apc_csr_nr; 12 | W_apa = apa_csr_nr; 13 | W_apcpa = apcpa_csr_nr; 14 | W_aptpa = aptpa_csr_nr; 15 | 16 | 17 | k = 100; 18 | gamma = 1; 19 | 20 | % obtain diagonal and laplacian matrix 21 | 22 | W_unify = W_apcpa; 23 | dunify = sum(W_unify,2); 24 | D_unify = diag(dunify); 25 | L_unify = D_unify- W_unify; 26 | W_unify = NormalizeAdj(W_unify,0,2); 27 | 28 | t1=clock; 29 | M_unify = (eye(size(W_unify,1)) - W_unify)' * (eye(size(W_unify,1)) - W_unify); 30 | t2=clock; 31 | fprintf('Time for M_unify: %f s \n', etime(t2,t1)); 32 | 33 | [a_unify_embedding, a_U_unify, a_Lambda_unify] = DHINOffline(L_unify+gamma*M_unify, D_unify,k); 34 | save ./data/dblp_nr/result/a_unify_embedding_nr.mat a_unify_embedding; 35 | 36 | dapc = sum(W_apc,2); 37 | D_apc = diag(dapc); 38 | L_apc = D_apc- W_apc; 39 | W_apc = NormalizeAdj(W_apc,0,2); 40 | M_apc = (eye(size(W_apc,1)) - W_apc)' * (eye(size(W_apc,1)) - W_apc); 41 | 42 | [apc_embedding, U_apc, Lambda_apc] = DHINOffline(L_apc+gamma*M_apc, D_apc,k); 43 | save ./data/dblp_nr/result/apc_embedding_nr.mat apc_embedding; 44 | 45 | dpc = sum(W_pc,2); 46 | D_pc = diag(dpc); 47 | L_pc = D_pc - W_pc; 48 | W_pc = NormalizeAdj(W_pc,0,2); 49 | M_pc = (eye(size(W_pc,1)) - W_pc)' * (eye(size(W_pc,1)) - W_pc); 50 | 51 | [pc_embedding, U_pc, Lambda_pc] = DHINOffline(L_pc+gamma*M_pc, D_pc,k); 52 | save ./data/dblp_nr/result/pc_embedding_nr.mat pc_embedding; 53 | 54 | -------------------------------------------------------------------------------- /code/utils/AccMeasure.m: -------------------------------------------------------------------------------- 1 | function [Acc,rand_index,match]=AccMeasure(T,idx) 2 | %Measure percentage of Accuracy and the Rand index of clustering results 3 | % The number of class must equal to the number cluster 4 | 5 | %Output 6 | % Acc = Accuracy of clustering results 7 | % rand_index = Rand's Index, measure an agreement of the clustering results 8 | % match = 2xk mxtrix which are the best match of the Target and clustering results 9 | 10 | %Input 11 | % T = 1xn target index 12 | % idx =1xn matrix of the clustering results 13 | 14 | % EX: 15 | % X=[randn(200,2);randn(200,2)+6,;[randn(200,1)+12,randn(200,1)]]; T=[ones(200,1);ones(200,1).*2;ones(200,1).*3]; 16 | % idx=kmeans(X,3,'emptyaction','singleton','Replicates',5); 17 | % [Acc,rand_index,match]=Acc_measure(T,idx) 18 | 19 | k=max(T); 20 | n=length(T); 21 | for i=1:k 22 | temp=find(T==i); 23 | a{i}=temp; %#ok 24 | end 25 | 26 | b1=[]; 27 | t1=zeros(1,k); 28 | for i=1:k 29 | tt1=find(idx==i); 30 | for j=1:k 31 | t1(j)=sum(ismember(tt1,a{j})); 32 | end 33 | b1=[b1;t1]; %#ok 34 | end 35 | Members=zeros(1,k); 36 | 37 | P = perms((1:k)); 38 | Acc1=0; 39 | for pi=1:size(P,1) 40 | for ki=1:k 41 | Members(ki)=b1(P(pi,ki),ki); 42 | end 43 | if sum(Members)>Acc1 44 | match=P(pi,:); 45 | Acc1=sum(Members); 46 | end 47 | end 48 | 49 | rand_ss1=0; 50 | rand_dd1=0; 51 | for xi=1:n-1 52 | for xj=xi+1:n 53 | rand_ss1=rand_ss1+((idx(xi)==idx(xj))&&(T(xi)==T(xj))); 54 | rand_dd1=rand_dd1+((idx(xi)~=idx(xj))&&(T(xi)~=T(xj))); 55 | end 56 | end 57 | rand_index=200*(rand_ss1+rand_dd1)/(n*(n-1)); 58 | Acc=Acc1/n*100; 59 | match=[1:k;match]; -------------------------------------------------------------------------------- /code/aminerCL.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | % load ./data/aminer/ap_csr.mat; 5 | % load ./data/aminer/apc_csr.mat; 6 | load ./data/aminer/apa_csr.mat; 7 | load ./data/aminer/apcpa_csr.mat; 8 | load ./data/aminer/aptpa_csr.mat; 9 | 10 | % W_ap = ap; 11 | % W_apc = apc_csr; 12 | W_apa = apa_csr; 13 | W_apcpa = apcpa_csr; 14 | W_aptpa = aptpa_csr; 15 | 16 | k = 100; 17 | gamma = 1; 18 | 19 | % obtain diagonal and laplacian matrix 20 | 21 | % dap = sum(W_ap,2); 22 | % D_ap = diag(dap); 23 | % L_ap = D_ap - W_ap; 24 | % W_ap = NormalizeAdj(W_ap); 25 | % M_ap = (eye(size(W_ap,1)) - W_ap)' * (eye(size(W_ap,1)) - W_ap); 26 | % 27 | % [ap_embedding, U_ap, Lambda_ap] = DHINOffline(L_ap+gamma*M_ap, D_ap,k); 28 | % save ./data/aminer/result/ap_embedding.mat ap_embedding; 29 | 30 | % dapc = sum(W_apc,2); 31 | % D_apc = diag(dapc); 32 | % L_apc = D_apc- W_apc; 33 | % W_apc = NormalizeAdj(W_apc,0,2); 34 | % M_apc = (eye(size(W_apc,1)) - W_apc)' * (eye(size(W_apc,1)) - W_apc); 35 | % 36 | % [apc_embedding, U_apc, Lambda_apc] = DHINOffline(L_apc+gamma*M_apc, D_apc,k); 37 | % save ./data/aminer/result/apc_embedding.mat apc_embedding; 38 | 39 | % dapa = sum(W_apa,2); 40 | % D_apa = diag(dapa); 41 | % L_apa = D_apa- W_apa; 42 | % W_apa = NormalizeAdj(W_apa,0,2); 43 | % M_apa = (eye(size(W_apa,1)) - W_apa)' * (eye(size(W_apa,1)) - W_apa); 44 | % 45 | % [apa_embedding, U_apa, Lambda_apa] = DHINOffline(L_apa+gamma*M_apa, D_apa,k); 46 | % save ./data/aminer/result/apa_embedding.mat apa_embedding; 47 | % 48 | % dapcpa = sum(W_apcpa,2); 49 | % D_apcpa = diag(dapcpa); 50 | % L_apcpa = D_apcpa- W_apcpa; 51 | % W_apcpa = NormalizeAdj(W_apcpa,0,2); 52 | % M_apcpa = (eye(size(W_apcpa,1)) - W_apcpa)' * (eye(size(W_apcpa,1)) - W_apcpa); 53 | % 54 | % [apcpa_embedding, U_apcpa, Lambda_apcpa] = DHINOffline(L_apcpa+gamma*M_apcpa, D_apcpa,k); 55 | % save ./data/aminer/result/apcpa_embedding.mat apcpa_embedding; 56 | 57 | % split_embedding = 0.1*apa_embedding(1:22942,:)+apcpa_embedding(1:22942,:); 58 | % save ./data/aminer/result/split_apcpa+0.1apa_embedding.mat split_embedding; 59 | 60 | t1=clock; 61 | W_unify = 0.1*W_apa+W_apcpa+0.9*W_aptpa; 62 | dunify = sum(W_unify,2); 63 | D_unify = diag(dunify); 64 | L_unify = D_unify - W_unify; 65 | W_unify = NormalizeAdj(W_unify,0,2); 66 | H = speye(size(W_unify,1)) - W_unify; 67 | clear W_apa W_apcpa W_aptpa W_unify 68 | M_unify = H'*H; 69 | X = L_unify+gamma * M_unify; 70 | % save ./data/aminer/X.mat X; 71 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(X, D_unify,k); 72 | 73 | t2=clock; 74 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 75 | 76 | save ./data/aminer/result/unify_0.1apa+apcpa+0.9aptpa_embedding.mat unify_embedding; 77 | -------------------------------------------------------------------------------- /code/dblpCLDynamic.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | t1=clock; 5 | load ./data/dblpDynamic/apa_csr_0.mat; 6 | load ./data/dblpDynamic/apcpa_csr_0.mat; 7 | load ./data/dblpDynamic/aptpa_csr_0.mat; 8 | 9 | W_apa = apa_csr_0; 10 | W_apcpa = apcpa_csr_0; 11 | W_aptpa = aptpa_csr_0; 12 | 13 | k = 100; 14 | gamma = 1; 15 | 16 | W= W_apcpa+0.1*W_apa+0.9*W_aptpa; 17 | d = sum(W,2); 18 | D = diag(d); 19 | L = D- W; 20 | W_norm = NormalizeAdj(W,0,2); 21 | M = (eye(size(W_norm,1)) - W_norm)' * (eye(size(W_norm,1)) - W_norm); 22 | 23 | [embedding, U, Lambda] = DHINOffline(L+gamma*M , D,k); 24 | 25 | t2=clock; 26 | fprintf('Time for static model: %f s \n', etime(t2,t1)) 27 | 28 | save ./data/dblpDynamic/result/static_0.1apa+1apcpa+0.9aptpa_embedding.mat embedding; 29 | 30 | 31 | %% pertubate the data and obtain the new diagonal and laplacian matrix 32 | A = (W_norm-eye(size(W_norm,1)))*U; 33 | % B = zeros(size(W_norm,1),size(U,2)); 34 | for i = 1:9 35 | fprintf('Time step: %d \n', i) 36 | apa_data = ['./data/dblpDynamic/apa_csr_',int2str(i),'.mat']; 37 | apcpa_data = ['./data/dblpDynamic/apcpa_csr_',int2str(i),'.mat']; 38 | aptpa_data = ['./data/dblpDynamic/aptpa_csr_',int2str(i),'.mat']; 39 | W_apa = cell2mat(struct2cell(load(apa_data))); 40 | W_apcpa = cell2mat(struct2cell(load(apcpa_data))); 41 | W_aptpa = cell2mat(struct2cell(load(aptpa_data))); 42 | 43 | W_new = W_apcpa+0.1*W_apa+0.9*W_aptpa; 44 | d_new = sum(W_new,2); 45 | D_new = diag(d_new); 46 | L_new = D_new - W_new; 47 | W_new_norm = NormalizeAdj(W_new,0,2); 48 | % M_new = (speye(size(W_new_norm,1)) - W_new_norm)' * (speye(size(W_new_norm,1)) - W_new_norm); 49 | 50 | t1=clock; 51 | Delta_L = L_new - L; 52 | Delta_D = D_new - D; 53 | Delta_W = W_new_norm - W_norm; 54 | Delta_M = 0; 55 | % Delta_M = M_new - M; 56 | % Delta_M = (W_norm-eye(size(W_norm,1)))'*Delta_W+Delta_W'*(W_norm-eye(size(W_norm,1))); 57 | 58 | B = Delta_W*U; 59 | 60 | %% learn embedding at time step t+1 61 | [embedding, U_new, Lambda_new, A_new]= DHINOnline(U, Lambda, Delta_L, Delta_D, Delta_M, A, B, k, gamma); 62 | 63 | t2=clock; 64 | fprintf('Time for one dynamic update: %f s \n', etime(t2,t1)) 65 | 66 | current_time_step_embs = ['./data/dblpDynamic/result/',int2str(i),'_0.1apa+1apcpa+0.9aptpa_embedding.mat']; 67 | save (current_time_step_embs, 'embedding'); 68 | 69 | % L = L_new; 70 | % D= D_new; 71 | % M= M_new; 72 | % W = W_new; 73 | % U = U_new; 74 | % Lambda = diag(Lambda_new); 75 | % A = A_new; 76 | % B = B_new; 77 | end 78 | 79 | save ./data/dblpDynamic/result/final_0.1apa+1apcpa+0.9aptpa_embedding.mat embedding; 80 | -------------------------------------------------------------------------------- /code/DHINUpdate.m~: -------------------------------------------------------------------------------- 1 | function [Delta_eigenvalue,Delta_eigenvector,new_A] = DHINUpdate(old_eigenvalue, old_eigenvector, Delta_L, Delta_D, Delta_M, A, B, gamma) 2 | % Input oldeigenvalue is stored in a one-by-d diagonal matrix 3 | % Input oldeigenvector is stored in a n-by-d matrix 4 | % Input DeltaL denotes the change of the laplacian matrix 5 | % Input DeltaD denotes the change of the diagonal matrix 6 | 7 | [n, d] = size(old_eigenvector); 8 | Delta_eigenvalue = zeros(1,d); 9 | Delta_eigenvector = zeros(n,d); 10 | 11 | alpha = zeros(d,d); 12 | new_A = zeros(n,d); 13 | 14 | H = (A+B)'*new_B+new_B'*(A+B); % d x d 15 | G = old_eigenvector'*Delta_L*old_eigenvector - diag(old_eigenvalue)*old_eigenvector'*Delta_D*old_eigenvector; 16 | 17 | % for i = 2:d 18 | % old_eigenvector_i = old_eigenvector(:,i); 19 | % old_eigenvalue_i = old_eigenvalue(1,i); 20 | % 21 | % %% compute the change of eigenvalue 22 | % Delta_eigenvalue(1,i) = old_eigenvector_i'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_i'*Delta_D*old_eigenvector_i; 23 | % 24 | % %% compute the change of eigenvector 25 | % for j = 2:d 26 | % old_eigenvector_j = old_eigenvector(:,j); 27 | % old_eigenvalue_j = old_eigenvalue(1,j); 28 | % if j ~= i 29 | % Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + (old_eigenvector_j'*Delta_L*old_eigenvector_i-old_eigenvalue_i*old_eigenvector_j'*Delta_D*old_eigenvector_i)*old_eigenvector_j/(old_eigenvalue_i-old_eigenvalue_j); 30 | % end 31 | % end 32 | % end 33 | 34 | for i = 2:d 35 | old_eigenvector_i = old_eigenvector(:,i); % n x d 36 | old_eigenvalue_i = old_eigenvalue(1,i); % n x 1 37 | 38 | %% compute the change of eigenvalue 39 | Delta_eigenvalue(1,i) = G(i,i) + gamma*H(i,i); 40 | % Delta_eigenvalue(1,i) = (old_eigenvector_i'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_i'*Delta_D*old_eigenvector_i + gamma*old_eigenvector_i'*Delta_M*old_eigenvector_i); 41 | 42 | %% compute the change of eigenvector 43 | for j = 2:d 44 | old_eigenvector_j = old_eigenvector(:,j); 45 | old_eigenvalue_j = old_eigenvalue(1,j); 46 | if j ~= i 47 | alpha(i,j) = (G(j,i)+ gamma*H(j,i)) / (old_eigenvalue_i-old_eigenvalue_j); 48 | % Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + (old_eigenvector_j'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_j'*Delta_D*old_eigenvector_i + gamma*old_eigenvector_j'*Delta_M*old_eigenvector_i)*old_eigenvector_j / (old_eigenvalue_i-old_eigenvalue_j); 49 | else 50 | alpha(i,j) = -0.5*old_eigenvector_i'*Delta_D*old_eigenvector_i; 51 | end 52 | Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + alpha(i,j) * old_eigenvector_j ; 53 | 54 | end 55 | end 56 | 57 | for i = 2:d 58 | for z = 2:d 59 | if z ~=i 60 | new_A(:,i) = new_A(:,i) + alpha(i,z) * (A(:,z) + B(:,z)); 61 | end 62 | end 63 | end 64 | 65 | end -------------------------------------------------------------------------------- /code/DHINUpdate.m: -------------------------------------------------------------------------------- 1 | function [Delta_eigenvalue,Delta_eigenvector,new_A] = DHINUpdate(old_eigenvalue, old_eigenvector, Delta_L, Delta_D, Delta_M, A, B, gamma) 2 | % Input oldeigenvalue is stored in a one-by-d diagonal matrix 3 | % Input oldeigenvector is stored in a n-by-d matrix 4 | % Input DeltaL denotes the change of the laplacian matrix 5 | % Input DeltaD denotes the change of the diagonal matrix 6 | 7 | [n, d] = size(old_eigenvector); 8 | Delta_eigenvalue = zeros(1,d); 9 | Delta_eigenvector = zeros(n,d); 10 | 11 | alpha = zeros(d,d); 12 | new_A = zeros(n,d); 13 | 14 | H = (A+B)'*B+B'*(A+B); % d x d 15 | G = old_eigenvector'*Delta_L*old_eigenvector - diag(old_eigenvalue)*old_eigenvector'*Delta_D*old_eigenvector; 16 | 17 | % for i = 2:d 18 | % old_eigenvector_i = old_eigenvector(:,i); 19 | % old_eigenvalue_i = old_eigenvalue(1,i); 20 | % 21 | % %% compute the change of eigenvalue 22 | % Delta_eigenvalue(1,i) = old_eigenvector_i'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_i'*Delta_D*old_eigenvector_i; 23 | % 24 | % %% compute the change of eigenvector 25 | % for j = 2:d 26 | % old_eigenvector_j = old_eigenvector(:,j); 27 | % old_eigenvalue_j = old_eigenvalue(1,j); 28 | % if j ~= i 29 | % Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + (old_eigenvector_j'*Delta_L*old_eigenvector_i-old_eigenvalue_i*old_eigenvector_j'*Delta_D*old_eigenvector_i)*old_eigenvector_j/(old_eigenvalue_i-old_eigenvalue_j); 30 | % end 31 | % end 32 | % end 33 | 34 | for i = 2:d 35 | old_eigenvector_i = old_eigenvector(:,i); % n x d 36 | old_eigenvalue_i = old_eigenvalue(1,i); % n x 1 37 | 38 | %% compute the change of eigenvalue 39 | Delta_eigenvalue(1,i) = G(i,i) + gamma*H(i,i); 40 | % Delta_eigenvalue(1,i) = (old_eigenvector_i'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_i'*Delta_D*old_eigenvector_i + gamma*old_eigenvector_i'*Delta_M*old_eigenvector_i); 41 | 42 | %% compute the change of eigenvector 43 | for j = 2:d 44 | old_eigenvector_j = old_eigenvector(:,j); 45 | old_eigenvalue_j = old_eigenvalue(1,j); 46 | if j ~= i 47 | alpha(i,j) = (G(j,i)+ gamma*H(j,i)) / (old_eigenvalue_i-old_eigenvalue_j); 48 | % Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + (old_eigenvector_j'*Delta_L*old_eigenvector_i - old_eigenvalue_i*old_eigenvector_j'*Delta_D*old_eigenvector_i + gamma*old_eigenvector_j'*Delta_M*old_eigenvector_i)*old_eigenvector_j / (old_eigenvalue_i-old_eigenvalue_j); 49 | % else 50 | % alpha(i,j) = -0.5*old_eigenvector_i'*Delta_D*old_eigenvector_i; 51 | end 52 | Delta_eigenvector(:,i) = Delta_eigenvector(:,i) + alpha(i,j) * old_eigenvector_j ; 53 | 54 | end 55 | end 56 | 57 | % for i = 2:d 58 | % for z = 2:d 59 | % if z ~=i 60 | % new_A(:,i) = new_A(:,i) + alpha(i,z) * (A(:,z) + B(:,z)); 61 | % end 62 | % end 63 | % end 64 | 65 | end -------------------------------------------------------------------------------- /code/dblpCL.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | % load ./data/dblp/ap_csr.mat; 5 | % load ./data/dblp/apc_csr.mat; 6 | % load ./data/dblp/apt_csr.mat; 7 | load ./data/dblp/apa_csr.mat; 8 | load ./data/dblp/apcpa_csr.mat; 9 | load ./data/dblp/aptpa_csr.mat; 10 | 11 | % W_ap = ap; 12 | % W_apc = apc; 13 | % W_apt = apt; 14 | W_apa = apa; 15 | W_apcpa = apcpa; 16 | W_aptpa = aptpa; 17 | [y_id,y_label] = textread('./data/dblp/oriData/author_label.txt','%d%d'); 18 | label_size = length(y_id); 19 | 20 | k = 100; 21 | gamma = 1; 22 | 23 | % obtain diagonal and laplacian matrix 24 | 25 | % dap = sum(W_ap,2); 26 | % D_ap = diag(dap); 27 | % L_ap = D_ap - W_ap; 28 | % W_ap = NormalizeAdj(W_ap); 29 | % M_ap = (eye(size(W_ap,1)) - W_ap)' * (eye(size(W_ap,1)) - W_ap); 30 | % [ap_embedding, U_ap, Lambda_ap] = DHINOffline(L_ap+gamma*M_ap, D_ap,k); 31 | % save ./data/dblp/result/ap_embedding.mat ap_embedding; 32 | % 33 | % dapc = sum(W_apc,2); 34 | % D_apc = diag(dapc); 35 | % L_apc = D_apc- W_apc; 36 | % W_apc = NormalizeAdj(W_apc); 37 | % M_apc = (eye(size(W_apc,1)) - W_apc)' * (eye(size(W_apc,1)) - W_apc); 38 | % [apc_embedding, U_apc, Lambda_apc] = DHINOffline(L_apc+gamma*M_apc, D_apc,k); 39 | % save ./data/dblp/result/apc_embedding.mat apc_embedding; 40 | % 41 | % dapt = sum(W_apt,2); 42 | % D_apt = diag(dapt); 43 | % L_apt = D_apt - W_apt; 44 | % W_apt = NormalizeAdj(W_apt); 45 | % M_apt = (eye(size(W_apt,1)) - W_apt)' * (eye(size(W_apt,1)) - W_apt); 46 | % [apt_embedding, U_apt, Lambda_apt] = DHINOffline(L_apt+gamma*M_apt, D_apt,k); 47 | % save ./data/dblp/result/apt_embedding.mat apt_embedding; 48 | % 49 | % dapa = sum(W_apa,2); 50 | % D_apa = diag(dapa); 51 | % L_apa = D_apa- W_apa; 52 | % W_apa = NormalizeAdj(W_apa,0,2); 53 | % M_apa = (speye(size(W_apa,1)) - W_apa)' * (speye(size(W_apa,1)) - W_apa); 54 | % [apa_embedding, U_apa, Lambda_apa] = DHINOffline(L_apa+gamma*M_apa, D_apa,k); 55 | % save ./data/dblp/result/apa_embedding.mat apa_embedding; 56 | % 57 | % dapcpa = sum(W_apcpa,2); 58 | % D_apcpa = diag(dapcpa); 59 | % L_apcpa = D_apcpa- W_apcpa; 60 | % W_apcpa = NormalizeAdj(W_apcpa,0,2); 61 | % M_apcpa = (speye(size(W_apcpa,1)) - W_apcpa)' * (speye(size(W_apcpa,1)) - W_apcpa); 62 | % [apcpa_embedding, U_apcpa, Lambda_apcpa] = DHINOffline(L_apcpa+gamma*M_apcpa, D_apcpa,k); 63 | % save ./data/dblp/result/apcpa_embedding.mat apcpa_embedding; 64 | 65 | % daptpa = sum(W_aptpa,2); 66 | % D_aptpa = diag(daptpa); 67 | % L_aptpa = D_aptpa- W_aptpa; 68 | % W_aptpa = NormalizeAdj(W_aptpa,0,2); 69 | % M_aptpa = (speye(size(W_aptpa,1)) - W_aptpa)' * (speye(size(W_aptpa,1)) - W_aptpa); 70 | % [aptpa_embedding, U_aptpa, Lambda_aptpa] = DHINOffline(L_aptpa+gamma*M_aptpa, D_aptpa,k); 71 | % save ./data/dblp/result/aptpa_embedding.mat aptpa_embedding; 72 | % 73 | % split_embedding = 0.1*apa_embedding(1:14475,:)+0.9*aptpa_embedding(1:14475,:)+apcpa_embedding(1:14475,:); 74 | % save ./data/dblp/result/split_0.1apa+1apcpa+0.9aptpa_embedding.mat split_embedding; 75 | 76 | t1=clock; 77 | W_unify = W_apcpa+0.1*W_apa+0.9*W_aptpa; 78 | dunify = sum(W_unify,2); 79 | D_unify = diag(dunify); 80 | L_unify = D_unify- W_unify; 81 | W_unify = NormalizeAdj(W_unify,0,2); 82 | M_unify = (eye(size(W_unify,1)) - W_unify)' * (eye(size(W_unify,1)) - W_unify); 83 | 84 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify+gamma*M_unify, D_unify,k); 85 | save ./data/dblp/result/unify_0.1apa+1apcpa+0.9aptpa_embedding.mat unify_embedding; 86 | 87 | t2=clock; 88 | fprintf('Time for static model: %f s \n', etime(t2,t1)) -------------------------------------------------------------------------------- /code/dblpLP.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | load ./data/dblp_lp/ap_csr_lp.mat; 5 | % load ./data/dblp_lp/apc_csr_lp.mat; 6 | % load ./data/dblp_lp/apt_csr_lp.mat; 7 | load ./data/dblp_lp/apa_csr_lp.mat; 8 | load ./data/dblp_lp/apcpa_csr_lp.mat; 9 | load ./data/dblp_lp/aptpa_csr_lp.mat; 10 | 11 | W_ap = ap_csr_lp; 12 | % W_apc = apc_csr_lp; 13 | % W_apt = apt_csr_lp; 14 | W_apa = apa_csr_lp; 15 | W_apcpa = apcpa_csr_lp; 16 | W_aptpa = aptpa_csr_lp; 17 | 18 | 19 | k = 100; 20 | gamma = 1; 21 | 22 | % obtain diagonal and laplacian matrix 23 | 24 | % dap = sum(W_ap,2); 25 | % D_ap = diag(dap); 26 | % L_ap = D_ap - W_ap; 27 | % W_ap = NormalizeAdj(W_ap,0,2); 28 | % M_ap = (eye(size(W_ap,1)) - W_ap)' * (eye(size(W_ap,1)) - W_ap); 29 | % 30 | % [ap_embedding, U_ap, Lambda_ap] = DHINOffline(L_ap+gamma*M_ap, D_ap,k); 31 | % save ./data/dblp/result/ap_embedding_lp.mat ap_embedding; 32 | % 33 | % dapc = sum(W_apc,2); 34 | % D_apc = diag(dapc); 35 | % L_apc = D_apc- W_apc; 36 | % W_apc = NormalizeAdj(W_apc,0,2); 37 | % M_apc = (eye(size(W_apc,1)) - W_apc)' * (eye(size(W_apc,1)) - W_apc); 38 | % 39 | % [apc_embedding, U_apc, Lambda_apc] = DHINOffline(L_apc+gamma*M_apc, D_apc,k); 40 | % save ./data/dblp/result/apc_embedding_lp.mat apc_embedding; 41 | % 42 | % dapt = sum(W_apt,2); 43 | % D_apt = diag(dapt); 44 | % L_apt = D_apt - W_apt; 45 | % W_apt = NormalizeAdj(W_apt,0,2); 46 | % M_apt = (eye(size(W_apt,1)) - W_apt)' * (eye(size(W_apt,1)) - W_apt); 47 | % 48 | % [apt_embedding, U_apt, Lambda_apt] = DHINOffline(L_apt+gamma*M_apt, D_apt,k); 49 | % save ./data/dblp/result/apt_embedding_lp.mat apt_embedding; 50 | 51 | % dapa = sum(W_apa,2); 52 | % D_apa = diag(dapa); 53 | % L_apa = D_apa- W_apa; 54 | % W_apa = NormalizeAdj(W_apa,0,2); 55 | % M_apa = (eye(size(W_apa,1)) - W_apa)' * (eye(size(W_apa,1)) - W_apa); 56 | % 57 | % [apa_embedding, U_apa, Lambda_apa] = DHINOffline(L_apa+gamma*M_apa, D_apa,k); 58 | % save ./data/dblp_lp/result/apa_embedding_lp.mat apa_embedding; 59 | % 60 | % dapcpa = sum(W_apcpa,2); 61 | % D_apcpa = diag(dapcpa); 62 | % L_apcpa = D_apcpa- W_apcpa; 63 | % W_apcpa = NormalizeAdj(W_apcpa,0,2); 64 | % M_apcpa = (eye(size(W_apcpa,1)) - W_apcpa)' * (eye(size(W_apcpa,1)) - W_apcpa); 65 | % 66 | % [apcpa_embedding, U_apcpa, Lambda_apcpa] = DHINOffline(L_apcpa+gamma*M_apcpa, D_apcpa,k); 67 | % save ./data/dblp_lp/result/apcpa_embedding_lp.mat apcpa_embedding; 68 | % 69 | % daptpa = sum(W_aptpa,2); 70 | % D_aptpa = diag(daptpa); 71 | % L_aptpa = D_aptpa- W_aptpa; 72 | % W_aptpa = NormalizeAdj(W_aptpa,0,2); 73 | % M_aptpa = (eye(size(W_aptpa,1)) - W_aptpa)' * (eye(size(W_aptpa,1)) - W_aptpa); 74 | % 75 | % [aptpa_embedding, U_aptpa, Lambda_aptpa] = DHINOffline(L_aptpa+gamma*M_aptpa, D_aptpa,k); 76 | % save ./data/dblp_lp/result/aptpa_embedding_lp.mat aptpa_embedding; 77 | % 78 | % split_embedding = apa_embedding(1:14475,:)+aptpa_embedding(1:14475,:)+apcpa_embedding(1:14475,:); 79 | % save ./data/dblp_lp/result/split_embedding_lp.mat split_embedding; 80 | 81 | W_unify = W_apcpa+W_apa+W_aptpa; 82 | %W_unify = W_ap; 83 | dunify = sum(W_unify,2); 84 | D_unify = diag(dunify); 85 | L_unify = D_unify- W_unify; 86 | W_unify = NormalizeAdj(W_unify,0,2); 87 | 88 | t1=clock; 89 | M_unify = (eye(size(W_unify,1)) - W_unify)' * (eye(size(W_unify,1)) - W_unify); 90 | t2=clock; 91 | fprintf('Time for M_unify: %f s \n', etime(t2,t1)); 92 | 93 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify+gamma*M_unify, D_unify,k); 94 | save ./data/dblp_lp/result/unify_embedding_lp.mat unify_embedding; 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /code/DynamciHIN.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | % load ./data/dblp/ap_csr.mat; 5 | % load ./data/dblp/apc_csr.mat; 6 | % load ./data/dblp/apt_csr.mat; 7 | % load ./data/dblp/apa_csr.mat; 8 | % load ./data/dblp/apcpa_csr.mat; 9 | % load ./data/dblp/aptpa_csr.mat; 10 | 11 | % load ./data/aminer/ap_csr.mat; 12 | % load ./data/aminer/apc_csr.mat; 13 | % load ./data/aminer/apt_csr.mat; 14 | load ./data/aminer/apa_csr.mat; 15 | load ./data/aminer/apcpa_csr.mat; 16 | load ./data/aminer/apypa_csr.mat; 17 | 18 | % W_ap = ap; 19 | % W_apc = apc; 20 | % W_apt = apt; 21 | W_apa = apa_csr; 22 | W_apcpa = apcpa_csr; 23 | W_apypa = apypa_csr; 24 | 25 | 26 | k = 100; 27 | gamma = 1; 28 | 29 | % obtain diagonal and laplacian matrix 30 | 31 | % dap = sum(W_ap,2); 32 | % D_ap = diag(dap); 33 | % L_ap = D_ap - W_ap; 34 | % W_ap = NormalizeAdj(W_ap); 35 | % M_ap = (eye(size(W_ap,1)) - W_ap)' * (eye(size(W_ap,1)) - W_ap); 36 | % 37 | % [ap_embedding, U_ap, Lambda_ap] = DHINOffline(L_ap+gamma*M_ap, D_ap,k); 38 | % save ./data/dblp/result/ap_embedding.mat ap_embedding; 39 | % 40 | % dapc = sum(W_apc,2); 41 | % D_apc = diag(dapc); 42 | % L_apc = D_apc- W_apc; 43 | % W_apc = NormalizeAdj(W_apc); 44 | % M_apc = (eye(size(W_apc,1)) - W_apc)' * (eye(size(W_apc,1)) - W_apc); 45 | % 46 | % [apc_embedding, U_apc, Lambda_apc] = DHINOffline(L_apc+gamma*M_apc, D_apc,k); 47 | % save ./data/dblp/result/apc_embedding.mat apc_embedding; 48 | % 49 | % dapt = sum(W_apt,2); 50 | % D_apt = diag(dapt); 51 | % L_apt = D_apt - W_apt; 52 | % W_apt = NormalizeAdj(W_apt); 53 | % M_apt = (eye(size(W_apt,1)) - W_apt)' * (eye(size(W_apt,1)) - W_apt); 54 | % 55 | % [apt_embedding, U_apt, Lambda_apt] = DHINOffline(L_apt+gamma*M_apt, D_apt,k); 56 | % save ./data/dblp/result/apt_embedding.mat apt_embedding; 57 | % 58 | % dapa = sum(W_apa,2); 59 | % D_apa = diag(dapa); 60 | % L_apa = D_apa- W_apa; 61 | % W_apa = NormalizeAdj(W_apa); 62 | % M_apa = (eye(size(W_apa,1)) - W_apa)' * (eye(size(W_apa,1)) - W_apa); 63 | % 64 | % [apa_embedding, U_apa, Lambda_apa] = DHINOffline(L_apa+gamma*M_apa, D_apa,k); 65 | % save ./data/dblp/result/apa_embedding.mat apa_embedding; 66 | % 67 | dapcpa = sum(W_apcpa,2); 68 | D_apcpa = diag(dapcpa); 69 | L_apcpa = D_apcpa- W_apcpa; 70 | W_apcpa = NormalizeAdj(W_apcpa,0,2); 71 | M_apcpa = (eye(size(W_apcpa,1)) - W_apcpa)' * (eye(size(W_apcpa,1)) - W_apcpa); 72 | 73 | [apcpa_embedding, U_apcpa, Lambda_apcpa] = DHINOffline(L_apcpa+gamma*M_apcpa, D_apcpa,k); 74 | save ./data/aminer/result/apcpa_embedding.mat apcpa_embedding; 75 | 76 | % daptpa = sum(W_aptpa,2); 77 | % D_aptpa = diag(daptpa); 78 | % L_aptpa = D_aptpa- W_aptpa; 79 | % W_aptpa = NormalizeAdj(W_aptpa); 80 | % M_aptpa = (eye(size(W_aptpa,1)) - W_aptpa)' * (eye(size(W_aptpa,1)) - W_aptpa); 81 | % 82 | % [aptpa_embedding, U_aptpa, Lambda_aptpa] = DHINOffline(L_aptpa+gamma*M_aptpa, D_aptpa,k); 83 | % save ./data/dblp/result/aptpa_embedding.mat aptpa_embedding; 84 | % 85 | % split_embedding = 0.1*apa_embedding(1:14475,:)+0.9*aptpa_embedding(1:14475,:)+apcpa_embedding(1:14475,:); 86 | % save ./data/dblp/result/split_embedding.mat split_embedding; 87 | 88 | W_unify = W_apcpa+W_apa; 89 | dunify = sum(W_unify,2); 90 | D_unify = diag(dunify); 91 | L_unify = D_unify- W_unify; 92 | W_unify = NormalizeAdj(W_unify,0,2); 93 | t1=clock; 94 | M_unify = (eye(size(W_unify,1)) - W_unify)' * (eye(size(W_unify,1)) - W_unify); 95 | t2=clock; 96 | fprintf('Time for M_unify: %f s \n', etime(t2,t1)); 97 | 98 | [unify_embedding, U_unify, Lambda_unify] = DHINOffline(L_unify+gamma*M_unify, D_unify,k); 99 | save ./data/aminer/result/unify_embedding.mat unify_embedding; 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /code/DHIN.m: -------------------------------------------------------------------------------- 1 | clc;clear; 2 | addpath(genpath(pwd)); 3 | 4 | load ./data/dblp/aca.mat; 5 | load ./data/dblp/apa.mat; 6 | load ./data/dblp/ata.mat; 7 | load ./data/dblp/pcp.mat; 8 | load ./data/dblp/pap.mat; 9 | load ./data/dblp/ptp.mat; 10 | 11 | A_aca = aca; 12 | A_apa = apa; 13 | A_ata = ata; 14 | A_pcp = pcp; 15 | A_pap = pap; 16 | A_ptp = ptp; 17 | 18 | k = 100; 19 | 20 | %% obtain diagonal and laplacian matrix 21 | 22 | daca = sum(A_aca,2); 23 | D_aca = diag(daca); 24 | L_aca = D_aca - A_aca; % TODO: normalize A_aca!!! 25 | norm_A_aca = NormalizeAdj(A_aca); 26 | M_aca = (eye(size(A_aca,1)) - A_aca)' * (eye(size(A_aca,1)) - A_aca); 27 | % [V,D_eig] = eig(L_aca+M_aca); 28 | % embedding = NormalizeFea(V(:,2:k+1)); 29 | % save ./data/dblp/result/12order_embedding.mat embedding; 30 | 31 | [aca_embedding, V_aca, D_aca_eig] = DHINOffline(L_aca+M_aca,D_aca, k); 32 | save ./data/dblp/result/aca_embedding.mat aca_embedding; 33 | 34 | dapa = sum(A_apa,2); 35 | D_apa = diag(dapa); 36 | L_apa = D_apa - A_apa; 37 | [apa_embedding, V_apa, D_apa_eig] = DHINOffline(L_apa, D_apa,k); 38 | save ./data/dblp/result/apa_embedding.mat apa_embedding; 39 | 40 | data = sum(A_ata,2); 41 | D_ata = diag(data); 42 | L_ata = D_ata - A_ata; 43 | [ata_embedding, V_ata, D_ata_eig] = DHINOffline(L_ata, D_ata,k); 44 | save ./data/dblp/result/ata_embedding.mat ata_embedding; 45 | 46 | dpcp = sum(A_pcp,2); 47 | D_pcp = diag(dpcp); 48 | L_pcp = D_pcp - A_pcp; 49 | [pcp_embedding, V_pcp, D_pcp_eig] = DHINOffline(L_pcp, D_pcp,k); 50 | save ./data/dblp/result/pcp_embedding.mat pcp_embedding; 51 | 52 | dpap = sum(A_pap,2); 53 | D_pap = diag(dpap); 54 | L_pap = D_pap - A_pap; 55 | [pap_embedding, V_pap, D_pap_eig] = DHINOffline(L_pap, D_pap,k); 56 | save ./data/dblp/result/pap_embedding.mat pap_embedding; 57 | 58 | dptp = sum(A_ptp,2); 59 | D_ptp = diag(dptp); 60 | L_ptp = D_ptp - A_ptp; 61 | [ptp_embedding, V_ptp, D_ptp_eig] = DHINOffline(L_ptp, D_ptp,k); 62 | save ./data/dblp/result/ptp_embedding.mat ptp_embedding; 63 | 64 | % %% evaluation w.r.t. node classification at time step t 65 | % indices = crossvalind('Kfold',n,10); 66 | % Accuracytmp = 0; F1macrotmp = 0; F1microtmp = 0; 67 | % for m = 1:10 68 | % testidx = (indices == m); 69 | % trainidx = ~testidx; 70 | % Xtrain = embedding(trainidx,:); 71 | % ytrain = y(trainidx,:); 72 | % Xtest = embedding(testidx,:); 73 | % ytest = y(testidx,:); 74 | % 75 | % model = train(ytrain, sparse(Xtrain), '-s 0 -q'); 76 | % [predict_label, accuracy, decision_values] = predict(ytest, sparse(Xtest), model, '-q'); 77 | % [micro, macro] = micro_macro_PR(predict_label,ytest); 78 | % Accuracytmp = Accuracytmp + accuracy(1); 79 | % F1macrotmp = F1macrotmp + macro.fscore; 80 | % F1microtmp = F1microtmp + micro.fscore; 81 | % end 82 | % Accuracy = Accuracytmp/10; 83 | % F1macro = F1macrotmp/10; 84 | % F1micro = F1microtmp/10; 85 | % 86 | % fprintf('Joint Accuracy: %f\n', Accuracy); 87 | % fprintf('Joint F1macro: %f\n', F1macro); 88 | % fprintf('Joint F1micro: %f\n', F1micro); 89 | 90 | %% pertubate the data and obtain the new diagonal and laplacian matrix 91 | % addratio = 0.001; 92 | % removeratio = 0.001; 93 | % 94 | % Aremove = removeedge(removeratio, A); 95 | % Anew = Aremove; 96 | % danew = sum(Anew,2); 97 | % Danew = diag(danew); 98 | % Lanew = Danew - Anew; 99 | % DeltaLa = Lanew - La; 100 | % DeltaDa = Danew - Da; 101 | % 102 | % Xadd = addcontent(addratio, X); 103 | % Xnew = Xadd; 104 | % Sxnew = constructW(Xnew,options); 105 | % Sxnew = full(Sxnew); 106 | % dxnew = sum(Sxnew,2); 107 | % Dxnew = diag(dxnew); 108 | % Lxnew = Dxnew - Sxnew; 109 | % DeltaLx = Lxnew - Lx; 110 | % DeltaDx = Dxnew - Dx; 111 | 112 | % %% learn embedding at time step t+1 113 | % embedding = DANE_Online(Va, Daeig, Vx, Dxeig, Vjoint, Djoint, DeltaLx, DeltaDx, DeltaLa, DeltaDa, l, k); 114 | % 115 | % %% evaluation w.r.t. node classification at time step t+1 116 | % indices = crossvalind('Kfold',n,10); 117 | % Accuracytmp = 0; F1macrotmp = 0; F1microtmp = 0; 118 | % for m = 1:10 119 | % testidx = (indices == m); 120 | % trainidx = ~testidx; 121 | % Xtrain = embedding(trainidx,:); 122 | % ytrain = y(trainidx,:); 123 | % Xtest = embedding(testidx,:); 124 | % ytest = y(testidx,:); 125 | % 126 | % model = train(ytrain, sparse(Xtrain), '-s 0 -q'); 127 | % [predict_label, accuracy, decision_values] = predict(ytest, sparse(Xtest), model, '-q'); 128 | % [micro, macro] = micro_macro_PR(predict_label,ytest); 129 | % Accuracytmp = Accuracytmp + accuracy(1); 130 | % F1macrotmp = F1macrotmp + macro.fscore; 131 | % F1microtmp = F1microtmp + micro.fscore; 132 | % end 133 | % Accuracy = Accuracytmp/10; 134 | % F1macro = F1macrotmp/10; 135 | % F1micro = F1microtmp/10; 136 | % 137 | % fprintf('Joint Accuracy: %f\n', Accuracy); 138 | % fprintf('Joint F1macro: %f\n', F1macro); 139 | % fprintf('Joint F1micro: %f\n', F1micro); -------------------------------------------------------------------------------- /code/DataHelper.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # author: lu yf 3 | # create date: 2018/6/25 4 | 5 | import os 6 | import numpy as np 7 | import scipy.io 8 | 9 | 10 | class DataHelper: 11 | def __init__(self,data_dir): 12 | self.dblp_data_fold = data_dir 13 | self.paper_list = [] 14 | self.author_list = [] 15 | self.conf_list = [] 16 | self.term_list = [] 17 | 18 | def load_data(self): 19 | """ 20 | transform num to id, and build adj_matrix 21 | :return: 22 | """ 23 | print ('loading data...') 24 | with open(os.path.join(self.dblp_data_fold, 'paper_author.txt')) as pa_file: 25 | pa_lines = pa_file.readlines() 26 | for line in pa_lines: 27 | token = line.strip('\n').split('\t') 28 | self.paper_list.append(token[0]) 29 | self.author_list.append(token[1]) 30 | with open(os.path.join(self.dblp_data_fold, 'paper_conf.txt')) as pc_file: 31 | pc_lines = pc_file.readlines() 32 | for line in pc_lines: 33 | token = line.strip('\n').split('\t') 34 | self.paper_list.append(token[0]) 35 | self.conf_list.append(token[1]) 36 | with open(os.path.join(self.dblp_data_fold, 'paper_term.txt')) as pt_file: 37 | pt_lines = pt_file.readlines() 38 | for line in pt_lines: 39 | token = line.strip('\n').split('\t') 40 | self.paper_list.append(token[0]) 41 | self.term_list.append(token[1]) 42 | self.paper_list = list(set(self.paper_list)) 43 | self.author_list = list(set(self.author_list)) 44 | self.conf_list = list(set(self.conf_list)) 45 | self.term_list = list(set(self.term_list)) 46 | print ('#paper:{}, #author:{}, #conf:{}, term:{}'.format(len(self.paper_list), len(self.author_list), 47 | len(self.conf_list), len(self.term_list))) 48 | 49 | print ('build adj_matrix...') 50 | pa_adj_matrix = np.zeros([len(self.paper_list), len(self.author_list)], dtype=float) 51 | for line in pa_lines: 52 | token = line.strip('\n').split('\t') 53 | row = int(token[0]) 54 | col = int(token[1]) 55 | pa_adj_matrix[row][col] = 1 56 | 57 | pc_adj_matrix = np.zeros([len(self.paper_list), len(self.conf_list)], dtype=float) 58 | for line in pc_lines: 59 | token = line.strip('\n').split('\t') 60 | row = int(token[0]) 61 | col = int(token[1]) 62 | pc_adj_matrix[row][col] = 1 63 | 64 | pt_adj_matrix = np.zeros([len(self.paper_list), len(self.term_list)], dtype=float) 65 | for line in pt_lines: 66 | token = line.strip('\n').split('\t') 67 | row = int(token[0]) 68 | col = int(token[1]) 69 | pt_adj_matrix[row][col] = 1 70 | 71 | ap_adj_matrix = np.transpose(pa_adj_matrix) 72 | ac_adj_matrix = np.matmul(np.transpose(pa_adj_matrix), pc_adj_matrix) 73 | at_adj_matrix = np.matmul(ap_adj_matrix, pt_adj_matrix) 74 | apa_adj_matrix = np.matmul(ap_adj_matrix,ap_adj_matrix.transpose()) 75 | aca_adj_matrix = np.matmul(ac_adj_matrix,ac_adj_matrix.transpose()) 76 | ata_adj_matrix = np.matmul(at_adj_matrix,at_adj_matrix.transpose()) 77 | pcp_adj_matrix = np.matmul(pc_adj_matrix,pc_adj_matrix.transpose()) 78 | ptp_adj_matrix = np.matmul(pt_adj_matrix,pt_adj_matrix.transpose()) 79 | pap_adj_matrix = np.matmul(pa_adj_matrix,pa_adj_matrix.transpose()) 80 | 81 | print('save matrix...') 82 | # self.save_mat(apa_adj_matrix,'apa') 83 | # self.save_mat(aca_adj_matrix,'aca') 84 | # self.save_mat(ata_adj_matrix,'ata') 85 | # self.save_mat(pcp_adj_matrix, 'pcp') 86 | # self.save_mat(ptp_adj_matrix, 'ptp') 87 | # self.save_mat(pap_adj_matrix, 'pap') 88 | # self.save_mat(pa_adj_matrix,'pa') 89 | # self.save_mat(pc_adj_matrix,'pc') 90 | # self.save_mat(pt_adj_matrix,'pt') 91 | 92 | self.save_adj(ac_adj_matrix,'apc') 93 | self.save_adj(at_adj_matrix,'apt') 94 | self.save_adj(apa_adj_matrix,'apa') 95 | self.save_adj(aca_adj_matrix,'apcpa') 96 | self.save_adj(ata_adj_matrix,'aptpa') 97 | 98 | def save_mat(self,matrix,relation_name): 99 | scipy.io.savemat(os.path.join(self.dblp_data_fold,relation_name), 100 | {relation_name:matrix}) 101 | 102 | def save_adj(self,matrix,relation_name): 103 | row, col = np.nonzero(matrix) 104 | with open(os.path.join(self.dblp_data_fold,relation_name+'.txt'),'w') as adj_file: 105 | for i in xrange(len(row)): 106 | adj_file.write(str(row[i])+'\t'+str(col[i])+'\t'+str(matrix[row[i]][col[i]])+'\n') 107 | 108 | 109 | if __name__ == '__main__': 110 | dh = DataHelper('../data/dblp/OriData/') 111 | dh.load_data() 112 | 113 | -------------------------------------------------------------------------------- /code/evaluateYelpWWW.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # author: lu yf 3 | # create date: 2018/8/11 4 | 5 | from __future__ import division 6 | import os 7 | import random 8 | from sklearn.manifold import TSNE 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import normalized_mutual_info_score, f1_score, roc_auc_score, accuracy_score 12 | from sklearn.cluster import KMeans 13 | import warnings 14 | import matplotlib.pyplot as plt 15 | import scipy.io 16 | from tqdm import tqdm 17 | import numpy as np 18 | 19 | warnings.filterwarnings('ignore') 20 | random.seed(1) 21 | 22 | 23 | class Evaluation: 24 | 25 | def __init__(self, embeddings_data): 26 | self.embeddings_data = embeddings_data 27 | self.name_emb_dict = {} 28 | 29 | def load_embeddings(self): 30 | embeddings_mat = scipy.io.loadmat(self.embeddings_data) 31 | key = filter(lambda k:k.startswith('_') is False,embeddings_mat.keys())[0] 32 | 33 | embeddings = embeddings_mat[key] 34 | for i in range(len(embeddings)): 35 | self.name_emb_dict[i] = embeddings[i] 36 | 37 | def kmeans_nmi(self,cluster_k): 38 | x = [] 39 | y = [] 40 | with open('../data/yelpWWW/oriData/business_category.txt', 'r') as author_name_label_file: 41 | author_name_label_lines = author_name_label_file.readlines() 42 | for line in author_name_label_lines: 43 | tokens = line.strip().split('\t') 44 | if self.name_emb_dict.has_key(int(tokens[0])): 45 | x.append(list(self.name_emb_dict[int(tokens[0])])) 46 | y.append(int(tokens[1])) 47 | print(len(x)) 48 | km = KMeans(n_clusters=cluster_k) 49 | km.fit(x, y) 50 | y_pre = km.predict(x) 51 | # y_pre = km.fit_predict(x,y) 52 | nmi = normalized_mutual_info_score(y, y_pre) 53 | print('Kmean, k={}, nmi={}'.format(cluster_k, nmi)) 54 | return nmi 55 | 56 | def classification(self,train_size): 57 | x = [] 58 | y = [] 59 | with open('../data/yelpWWW/oriData/business_category.txt', 'r') as author_name_label_file: 60 | author_name_label_lines = author_name_label_file.readlines() 61 | for line in author_name_label_lines: 62 | tokens = line.strip().split('\t') 63 | if self.name_emb_dict.has_key(int(tokens[0])): 64 | x.append(list(self.name_emb_dict[int(tokens[0])])) 65 | y.append(int(tokens[1])) 66 | 67 | print(len(x)) 68 | x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=1-train_size,random_state=9) 69 | # print ('train_size: {}'.format(train_sicv ze)) 70 | lr = LogisticRegression() 71 | 72 | lr.fit(x_train, y_train) 73 | y_valid_pred = lr.predict(x_valid) 74 | 75 | micro_f1 = f1_score(y_valid, y_valid_pred,average='micro') 76 | macro_f1 = f1_score(y_valid, y_valid_pred,average='macro') 77 | print ('Macro_F1_score:{}'.format(macro_f1)) 78 | print ('Micro_F1_score:{}'.format(micro_f1)) 79 | return macro_f1,micro_f1 80 | 81 | def calculate_sim(self,u,v,sum_flag): 82 | if sum_flag: 83 | return sum(np.abs(np.array(u)-np.array(v))) 84 | # return sum(np.abs(np.array(u)*np.array(v))) 85 | else: 86 | return np.abs(np.array(u)-np.array(v)) 87 | 88 | def binary_classification_aa(self, x_train, y_train, x_test, y_test): 89 | classifier = LogisticRegression() 90 | classifier.fit(x_train, y_train) 91 | y_pred = classifier.predict_proba(x_test)[:, 1] 92 | auc_score = roc_auc_score(y_test, y_pred) 93 | f1 = f1_score(y_test, classifier.predict(x_test)) 94 | acc = accuracy_score(y_test,classifier.predict(x_test)) 95 | print('auc: {}'.format(auc_score)) 96 | print('f1: {}'.format(f1)) 97 | print('acc: {}'.format(acc)) 98 | 99 | def pre_4_link_prediction(self,data_type): 100 | print('prepare {} data for link prediction...'.format(data_type)) 101 | x = [] 102 | y = [] 103 | with open('../data/yelpWWW_lp/pos_brurb_'+data_type+'.txt', 'r') as p_co_file: 104 | for line in p_co_file: 105 | tokens = line.strip().split('\t') 106 | a1_name = int(tokens[0]) 107 | a2_name = int(tokens[1]) 108 | if self.name_emb_dict.has_key(a1_name) and self.name_emb_dict.has_key(a2_name): 109 | a1_emb = self.name_emb_dict[a1_name] 110 | a2_emb = self.name_emb_dict[a2_name] 111 | sim_a1_a2 = self.calculate_sim(a1_emb, a2_emb, sum_flag=False) 112 | x.append(sim_a1_a2) 113 | y.append(1) 114 | with open('../data/yelpWWW_lp/neg_brurb_' + data_type+'.txt', 'r') as p_co_file: 115 | for line in p_co_file: 116 | tokens = line.strip().split('\t') 117 | a1_name = int(tokens[0]) 118 | a2_name = int(tokens[1]) 119 | if self.name_emb_dict.has_key(a1_name) and self.name_emb_dict.has_key(a2_name): 120 | a1_emb = self.name_emb_dict[a1_name] 121 | a2_emb = self.name_emb_dict[a2_name] 122 | sim_a1_a2 = self.calculate_sim(a1_emb, a2_emb, sum_flag=False) 123 | x.append(sim_a1_a2) 124 | y.append(0) 125 | return x,y 126 | 127 | def link_prediction_with_auc(self): 128 | train_x, train_y = self.pre_4_link_prediction('train') 129 | test_x, test_y = self.pre_4_link_prediction('test') 130 | print(len(train_x), len(test_x)) 131 | self.binary_classification_aa(train_x, train_y, test_x, test_y) 132 | # x_train, x_valid, y_train, y_valid = train_test_split(test_x, test_y, test_size=1 - 0.8, random_state=9) 133 | # self.binary_classification_aa(x_train, y_train, x_valid, y_valid) 134 | 135 | def new_cl(self): 136 | x_train, x_valid, y_train, y_valid = [],[],[],[] 137 | new_nodes = [] 138 | with open('../yelp_delta_nodes.txt', 'r') as new_node_file: 139 | for f in new_node_file: 140 | new_nodes.append(int(f.strip())) 141 | 142 | with open('../data/yelpWWW/oriData/business_category.txt', 'r') as author_name_label_file: 143 | author_name_label_lines = author_name_label_file.readlines() 144 | 145 | for line in author_name_label_lines: 146 | tokens = line.strip().split('\t') 147 | if self.name_emb_dict.has_key(int(tokens[0])): 148 | if int(tokens[0]) in new_nodes: 149 | x_valid.append(list(self.name_emb_dict[int(tokens[0])])) 150 | y_valid.append(int(tokens[1])) 151 | else: 152 | x_train.append(list(self.name_emb_dict[int(tokens[0])])) 153 | y_train.append(int(tokens[1])) 154 | 155 | # print (len(x_train),len(x_valid)) 156 | lr = LogisticRegression() 157 | 158 | lr.fit(x_train, y_train) 159 | y_valid_pred = lr.predict(x_valid) 160 | 161 | micro_f1 = f1_score(y_valid, y_valid_pred, average='micro') 162 | macro_f1 = f1_score(y_valid, y_valid_pred, average='macro') 163 | print ('Macro_F1_score:{}'.format(macro_f1)) 164 | print ('Micro_F1_score:{}'.format(micro_f1)) 165 | 166 | 167 | if __name__ == '__main__': 168 | 169 | # print('===== classification =====') 170 | # train_ratio = [0.2,0.4,0.6,0.8] 171 | # embeddings_data = '../data/yelpWWW/result/unify_brurb_embedding.mat' 172 | # print(embeddings_data) 173 | # exp = Evaluation(embeddings_data) 174 | # exp.load_embeddings() 175 | # for t_r in train_ratio: 176 | # print(t_r) 177 | # exp.classification(train_size=t_r) 178 | 179 | # print('===== link prediction =====') 180 | # embeddings_data = '../data/yelpWWW_lp/result/unify_0.4bsb+0.6brurb_embedding.mat' 181 | # print(embeddings_data) 182 | # exp = Evaluation(embeddings_data) 183 | # exp.load_embeddings() 184 | # exp.link_prediction_with_auc() 185 | 186 | # for i in xrange(1,10): 187 | # embeddings_data = '../data/yelpWWW_lp/result/unify_'+str(i/10)+'bsb+'+str(1-i/10)+'brurb_embedding.mat' 188 | # print(embeddings_data) 189 | # exp = Evaluation(embeddings_data) 190 | # exp.load_embeddings() 191 | # exp.link_prediction_with_auc() 192 | 193 | # print('===== dynamic classification =====') 194 | # train_ratio = [0.2,0.4,0.6,0.8] 195 | # embeddings_data = '../data/yelpWWWDynamic/result/0_0.4bsb+0.6brurb_embedding.mat' 196 | # print(embeddings_data) 197 | # exp = Evaluation(embeddings_data) 198 | # exp.load_embeddings() 199 | # for t_r in train_ratio: 200 | # print(t_r) 201 | # exp.classification(train_size=t_r) 202 | # 203 | # for t_r in train_ratio: 204 | # print(t_r) 205 | # ma_f1 = [] 206 | # mi_f1 = [] 207 | # for i in xrange(0,10): 208 | # embeddings_data = '../data/yelpWWWDynamic/result/'+str(i)+'_0.4bsb+0.6brurb_embedding.mat' 209 | # print(embeddings_data) 210 | # exp = Evaluation(embeddings_data) 211 | # exp.load_embeddings() 212 | # ma_f1_tmp, mi_f1_tmp = exp.classification(train_size=t_r) 213 | # ma_f1.append(ma_f1_tmp) 214 | # mi_f1.append(mi_f1_tmp) 215 | # print('ave. ma_f1: {}'.format(sum(ma_f1) / 10)) 216 | # print('ave. mi_f1: {}'.format(sum(mi_f1) / 10)) 217 | 218 | # print('===== dynamic link prediction =====') 219 | # embeddings_data = '../data/yelpWWWDynamic_lp/result/0_0.4bsb+0.6brurb_embedding.mat' 220 | # print(embeddings_data) 221 | # exp = Evaluation(embeddings_data) 222 | # exp.load_embeddings() 223 | # exp.link_prediction_with_auc() 224 | # for i in xrange(10): 225 | # embeddings_data = '../data/yelpWWWDynamic_lp/result/'+str(i)+'_0.4bsb+0.6brurb_embedding.mat' 226 | # print(embeddings_data) 227 | # exp = Evaluation(embeddings_data) 228 | # exp.load_embeddings() 229 | # exp.link_prediction_with_auc() 230 | 231 | # retrain 232 | print ('YELP') 233 | print ('retrain...') 234 | embeddings_data = '../data/yelpWWW/result/unify_0.4bsb+0.6brurb_embedding.mat' 235 | exp = Evaluation(embeddings_data) 236 | exp.load_embeddings() 237 | exp.new_cl() 238 | 239 | # dynamic 240 | print ('dynamic...') 241 | embeddings_data = '../data/yelpWWWDynamic/result/10_0.4bsb+0.6brurb_embedding.mat' 242 | exp = Evaluation(embeddings_data) 243 | exp.load_embeddings() 244 | exp.new_cl() -------------------------------------------------------------------------------- /code/evaluateAminer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | # author: lu yf 3 | # create date: 2018/7/21 4 | 5 | # coding:utf-8 6 | # author: lu yf 7 | # create date: 2018/6/25 8 | 9 | from __future__ import division 10 | import os 11 | import random 12 | from sklearn.manifold import TSNE 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.linear_model import LogisticRegression 15 | from sklearn.metrics import normalized_mutual_info_score, f1_score, roc_auc_score, accuracy_score 16 | from sklearn.cluster import KMeans 17 | import warnings 18 | import matplotlib.pyplot as plt 19 | import scipy.io 20 | from sklearn.svm import SVC 21 | from tqdm import tqdm 22 | import numpy as np 23 | 24 | warnings.filterwarnings('ignore') 25 | random.seed(1) 26 | 27 | 28 | class Evaluation: 29 | 30 | def __init__(self, embeddings_data): 31 | self.embeddings_data = embeddings_data 32 | self.name_emb_dict = {} 33 | 34 | def load_embeddings(self): 35 | embeddings_mat = scipy.io.loadmat(self.embeddings_data) 36 | key = filter(lambda k:k.startswith('_') is False,embeddings_mat.keys())[0] 37 | 38 | embeddings = embeddings_mat[key] 39 | for i in range(len(embeddings)): 40 | self.name_emb_dict[i] = embeddings[i] 41 | 42 | def kmeans_nmi(self,cluster_k): 43 | x = [] 44 | y = [] 45 | with open('../data/aminer/oriData/author_label.txt', 'r') as author_name_label_file: 46 | author_name_label_lines = author_name_label_file.readlines() 47 | for line in author_name_label_lines: 48 | tokens = line.strip().split('\t') 49 | if self.name_emb_dict.has_key(int(tokens[0])): 50 | x.append(list(self.name_emb_dict[int(tokens[0])])) 51 | y.append(int(tokens[1])) 52 | 53 | km = KMeans(n_clusters=cluster_k) 54 | km.fit(x, y) 55 | y_pre = km.predict(x) 56 | # y_pre = km.fit_predict(x,y) 57 | nmi = normalized_mutual_info_score(y, y_pre) 58 | print(len(x)) 59 | print('Kmean, k={}, nmi={}'.format(cluster_k, nmi)) 60 | return nmi 61 | 62 | def classification(self,train_size): 63 | x = [] 64 | y = [] 65 | with open('../data/aminer/oriData/author_label.txt', 'r') as author_name_label_file: 66 | author_name_label_lines = author_name_label_file.readlines() 67 | for line in author_name_label_lines: 68 | tokens = line.strip().split('\t') 69 | if self.name_emb_dict.has_key(int(tokens[0])): 70 | x.append(list(self.name_emb_dict[int(tokens[0])])) 71 | y.append(int(tokens[1])) 72 | x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=1-train_size,random_state=9) 73 | # print ('train_size: {}'.format(train_sicv ze)) 74 | lr = LogisticRegression() 75 | print(len(x)) 76 | lr.fit(x_train, y_train) 77 | y_valid_pred = lr.predict(x_valid) 78 | 79 | micro_f1 = f1_score(y_valid, y_valid_pred,average='micro') 80 | macro_f1 = f1_score(y_valid, y_valid_pred,average='macro') 81 | print ('Macro_F1_score:{}'.format(macro_f1)) 82 | print ('Micro_F1_score:{}'.format(micro_f1)) 83 | return macro_f1,micro_f1 84 | 85 | def clustering_visual(self,x,y,title): 86 | if not os.path.exists('./figures'): 87 | os.mkdir('./figures') 88 | tsne = TSNE(n_components=2) 89 | Y = tsne.fit_transform(x) 90 | # with open(os.path.join('./figures',title+'.pickle'), 'wb') as f: 91 | # pickle.dump([x,Y,y],f) 92 | # plt.title(title) 93 | plt.scatter(Y[:, 0],Y[:, 1],c=y, marker='.') 94 | # plt.xticks(fontsize=16) 95 | # plt.yticks(fontsize=16) 96 | # plt.legend() 97 | plt.axis('off') # 关闭xy坐标轴 98 | plt.savefig(os.path.join('./figures',title + '.eps'),fomat='eps') 99 | plt.savefig(os.path.join('./figures',title + '.png')) 100 | plt.clf() 101 | 102 | def link_prediction_with_hit(self,hist_k): 103 | print('link prediction...') 104 | with open('../data/aminer_lp/test_aa_pos_neg.txt','r') as ld_f: 105 | apa_lines = ld_f.readlines() 106 | hist_or_not = 0 107 | for aa in tqdm(list(set(apa_lines))): 108 | a_list = aa.strip().split('\t') 109 | pos_a_1 = a_list[0] 110 | pos_a_2 = a_list[1] 111 | neg_a_list = a_list[2].split(' ') 112 | pos_a_1_embs = self.name_emb_dict[int(pos_a_1)] 113 | pos_a_2_embs = self.name_emb_dict[int(pos_a_2)] 114 | pos_sim = self.calculate_sim(pos_a_1_embs,pos_a_2_embs,'lp') 115 | neg_sim_list = map(lambda x: 116 | self.calculate_sim(pos_a_1_embs,self.name_emb_dict[int(x)],'lp'), 117 | neg_a_list) 118 | neg_a_list.append(pos_a_2) 119 | neg_sim_list.append(pos_sim) 120 | sim_dict = dict(zip(neg_a_list,neg_sim_list)) 121 | sorted_sim_triple = sorted(sim_dict.iteritems(),key=lambda d:d[1]) 122 | if (pos_a_2,pos_sim) in sorted_sim_triple[:hist_k]: 123 | hist_or_not += 1 124 | else: 125 | hist_or_not += 0 126 | print('hit@{}: {}'.format(hist_k,hist_or_not/len(set(apa_lines)))) 127 | 128 | def calculate_sim(self,u,v,sum_flag): 129 | if sum_flag: 130 | return sum(np.abs(np.array(u)-np.array(v))) 131 | # return sum(np.abs(np.array(u)*np.array(v))) 132 | else: 133 | return np.abs(np.array(u)-np.array(v)) 134 | # return np.abs(np.array(u)*np.array(v)) 135 | 136 | def binary_classification_aa(self, x_train, y_train, x_test, y_test): 137 | classifier = LogisticRegression() 138 | classifier.fit(x_train, y_train) 139 | y_pred = classifier.predict_proba(x_test)[:, 1] 140 | auc_score = roc_auc_score(y_test, y_pred) 141 | f1 = f1_score(y_test, classifier.predict(x_test)) 142 | acc = accuracy_score(y_test,classifier.predict(x_test)) 143 | print('auc: {}'.format(auc_score)) 144 | print('f1: {}'.format(f1)) 145 | print('acc: {}'.format(acc)) 146 | 147 | def get_good_author(self): 148 | self.good_author_list = [] 149 | with open('../data/aminer_lp_no_iso/good_author_4_test.txt') as ga_file: 150 | for line in ga_file: 151 | self.good_author_list.append(line.strip()) 152 | 153 | def pre_4_link_prediction(self,data_type): 154 | # print('prepare {} data for link prediction...'.format(data_type)) 155 | x = [] 156 | y = [] 157 | with open('../data/aminerDynamic_lp/'+data_type+'_aa_pos.txt', 'r') as aa_pos_f: 158 | # with open('../data/aminerDynamic_lp/'+data_type+'_aa_pos.txt', 'r') as aa_pos_f: 159 | for line in aa_pos_f: 160 | tokens = line.strip().split('\t') 161 | if self.name_emb_dict.has_key(int(tokens[0])) and self.name_emb_dict.has_key(int(tokens[1])): 162 | # if tokens[0] in self.good_author_list and tokens[1] in self.good_author_list: 163 | pos_1_emb = self.name_emb_dict[int(tokens[0])] 164 | pos_2_emb = self.name_emb_dict[int(tokens[1])] 165 | sim_pos = self.calculate_sim(pos_1_emb, pos_2_emb, sum_flag=False) 166 | x.append(sim_pos) 167 | y.append(1) 168 | # print('#pos {}: {}'.format(data_type,len(x))) 169 | with open('../data/aminerDynamic_lp/'+data_type+'_aa_neg.txt', 'r') as aa_neg_f: 170 | # with open('../data/aminerDynamic_lp/'+data_type+'_aa_neg.txt', 'r') as aa_neg_f: 171 | for line in aa_neg_f: 172 | tokens = line.strip().split('\t') 173 | if self.name_emb_dict.has_key(int(tokens[0])) and self.name_emb_dict.has_key(int(tokens[1])): 174 | # if tokens[0] in self.good_author_list and tokens[1] in self.good_author_list: 175 | neg_1_emb = self.name_emb_dict[int(tokens[0])] 176 | neg_2_emb = self.name_emb_dict[int(tokens[1])] 177 | sim_neg = self.calculate_sim(neg_1_emb, neg_2_emb, sum_flag=False) 178 | x.append(sim_neg) 179 | y.append(0) 180 | # print('#pos+neg {}: {}'.format(data_type,len(x))) 181 | # return x[0:2500]+x[-2500:],y[0:2500]+y[-2500:] 182 | return x,y 183 | 184 | def link_prediction_with_auc(self): 185 | train_x, train_y = self.pre_4_link_prediction('train') 186 | test_x, test_y = self.pre_4_link_prediction('test') 187 | print('link prediction with auc...') 188 | print(len(train_x), len(test_x)) 189 | self.binary_classification_aa(train_x, train_y, test_x, test_y) 190 | 191 | # x_train, x_valid, y_train, y_valid = train_test_split(test_x, test_y, test_size=1 - 0.8, random_state=9) 192 | # self.binary_classification_aa(x_train, y_train, x_valid, y_valid) 193 | 194 | def pre_4_node_recommendation(self,author_embeddings,conf_embeddings,author_start_end_id,conf_start_end_id): 195 | a_embeddings_mat = scipy.io.loadmat(author_embeddings) 196 | key = filter(lambda k: k.startswith('_') is False, a_embeddings_mat.keys())[0] 197 | author_embs = a_embeddings_mat[key] 198 | for i in range(author_start_end_id[0],author_start_end_id[1]): 199 | self.name_emb_dict['a'+str(i)] = author_embs[i] 200 | c_embeddings_mat = scipy.io.loadmat(conf_embeddings) 201 | key = filter(lambda k: k.startswith('_') is False, c_embeddings_mat.keys())[0] 202 | conf_embs = c_embeddings_mat[key] 203 | for i in range(conf_start_end_id[0],conf_start_end_id[1]): 204 | self.name_emb_dict['c'+str(i-conf_start_end_id[0])] = conf_embs[i] 205 | 206 | def node_recommendation(self,hit_k): 207 | with open('../data/aminer_nr/test_ac_pos_neg.txt', 'r') as test_ac_p_n_f: 208 | ac_lines = test_ac_p_n_f.readlines() 209 | 210 | hit_or_not = [] 211 | for aa in list(set(ac_lines)): 212 | ac_list = aa.strip().split('\t') 213 | pos_a = ac_list[0] 214 | pos_c = ac_list[1] 215 | if self.name_emb_dict.has_key('a'+pos_a) and self.name_emb_dict.has_key('c'+pos_c): 216 | neg_c_list = ac_list[2].split(' ') 217 | pos_a_embs = self.name_emb_dict['a' + pos_a] 218 | pos_c_embs = self.name_emb_dict['c' + pos_c] 219 | pos_sim = self.calculate_sim(pos_a_embs, pos_c_embs, sum_flag=True) 220 | neg_sim_list = map(lambda x: 221 | self.calculate_sim(pos_a_embs, self.name_emb_dict['c' + x], sum_flag=True), 222 | neg_c_list) 223 | neg_c_list.append(pos_c) 224 | neg_sim_list.append(pos_sim) 225 | sim_dict = dict(zip(neg_c_list, neg_sim_list)) 226 | sorted_sim_triple = sorted(sim_dict.iteritems(), key=lambda d: d[1]) 227 | if (pos_c, pos_sim) in sorted_sim_triple[:hit_k]: 228 | hit_or_not.append(1) 229 | else: 230 | hit_or_not.append(0) 231 | print('#test: {}'.format(len(hit_or_not))) 232 | print('hit@{}: {}'.format(hit_k, sum(hit_or_not) / len(hit_or_not))) 233 | 234 | def get_static_author(self): 235 | self.static_authors = [] 236 | with open('../baseline/aminerDynamic/dw.node2id_2000','r') as d_f: 237 | for line in d_f: 238 | tokens = line.strip().split('\t')[0] 239 | if tokens[0] == 'a': 240 | self.static_authors.append(tokens[1:]) 241 | 242 | def new_cl(self): 243 | x_train, x_valid, y_train, y_valid = [],[],[],[] 244 | new_nodes = [] 245 | with open('../aminer_delta_nodes.txt', 'r') as new_node_file: 246 | for f in new_node_file: 247 | new_nodes.append(int(f.strip())) 248 | 249 | with open('../data/aminer/oriData/author_label.txt', 'r') as author_name_label_file: 250 | author_name_label_lines = author_name_label_file.readlines() 251 | 252 | for line in author_name_label_lines: 253 | tokens = line.strip().split('\t') 254 | if self.name_emb_dict.has_key(int(tokens[0])): 255 | if int(tokens[0]) in new_nodes: 256 | x_valid.append(list(self.name_emb_dict[int(tokens[0])])) 257 | y_valid.append(int(tokens[1])) 258 | else: 259 | x_train.append(list(self.name_emb_dict[int(tokens[0])])) 260 | y_train.append(int(tokens[1])) 261 | 262 | # print (len(x_train),len(x_valid)) 263 | lr = LogisticRegression() 264 | 265 | lr.fit(x_train, y_train) 266 | y_valid_pred = lr.predict(x_valid) 267 | 268 | micro_f1 = f1_score(y_valid, y_valid_pred, average='micro') 269 | macro_f1 = f1_score(y_valid, y_valid_pred, average='macro') 270 | print ('Macro_F1_score:{}'.format(macro_f1)) 271 | print ('Micro_F1_score:{}'.format(micro_f1)) 272 | 273 | 274 | if __name__ == '__main__': 275 | 276 | # print('===== classification =====') 277 | # train_ratio = [0.8] 278 | # embeddings_data = '../data/aminer/result/unify_0.25apa+0.5apcpa+0.25aptpa_embedding_d200.mat' 279 | # print(embeddings_data) 280 | # exp = Evaluation(embeddings_data) 281 | # exp.load_embeddings() 282 | # # exp.kmeans_nmi(cluster_k=5) 283 | # for t_r in train_ratio: 284 | # print(t_r) 285 | # exp.classification(train_size=t_r) 286 | 287 | # print('===== link prediction =====') 288 | # embeddings_data = '../data/aminer_lp_no_iso/result/unify_0.25apa+0.5apcpa+0.25aptpa_embedding_lp.mat' 289 | # print(embeddings_data) 290 | # exp = Evaluation(embeddings_data) 291 | # exp.get_good_author() 292 | # exp.load_embeddings() 293 | # exp.link_prediction_with_auc() 294 | # 295 | # print('===== dynamic classification =====') 296 | # embeddings_data = '../data/aminerDynamic/result/2004_0.25apa+0.5apcpa+0.25aptpa_embedding.mat' 297 | # print(embeddings_data) 298 | # exp = Evaluation(embeddings_data) 299 | # exp.load_embeddings() 300 | # for t_r in train_ratio: 301 | # print(t_r) 302 | # exp.classification(train_size=t_r) 303 | # 304 | # ma_f1 = [] 305 | # mi_f1 = [] 306 | # nmi = [] 307 | # for t_r in train_ratio: 308 | # for i in xrange(10): 309 | # embeddings_data = '../data/aminerDynamic/result/2005_' + str(i+1) + '_0.25apa+0.5apcpa+0.25aptpa_embedding.mat' 310 | # print(embeddings_data) 311 | # exp = Evaluation(embeddings_data) 312 | # exp.load_embeddings() 313 | # ma_f1_tmp, mi_f1_tmp = exp.classification(train_size=t_r) 314 | # ma_f1.append(ma_f1_tmp) 315 | # mi_f1.append(mi_f1_tmp) 316 | # print('ave. ma_f1: {}'.format(sum(ma_f1) / 10)) 317 | # print('ave. mi_f1: {}'.format(sum(mi_f1) / 10)) 318 | 319 | # print('======= dynamic link prediction========') 320 | # # TODO: 更改link_prediction_with_auc 中正负样本文件名 321 | # embeddings_data = '../data/aminerDynamic_lp/result/full_apa+apcpa+aptpa_embedding_lp.mat' 322 | # print(embeddings_data) 323 | # exp = Evaluation(embeddings_data) 324 | # # exp.get_good_author() 325 | # exp.load_embeddings() 326 | # exp.link_prediction_with_auc() 327 | # 328 | # for i in xrange(10): 329 | # embeddings_data = '../data/aminerDynamic_lp/result/2004_' + str(i+1) + '_apa+apcpa+aptpa_embedding_lp.mat' 330 | # print(embeddings_data) 331 | # exp = Evaluation(embeddings_data) 332 | # exp.load_embeddings() 333 | # exp.link_prediction_with_auc() 334 | # 335 | # embeddings_data = '../data/aminerDynamic_lp/result/retrain_apa+apcpa+aptpa_embedding.mat' 336 | # print(embeddings_data) 337 | # exp = Evaluation(embeddings_data) 338 | # # exp.get_good_author() 339 | # exp.load_embeddings() 340 | # exp.link_prediction_with_auc() 341 | 342 | # retrain 343 | print ('AMiner') 344 | print ('retrain...') 345 | embeddings_data = '../data/aminer/result/unify_apa+apcpa+aptpa_embedding.mat' 346 | exp = Evaluation(embeddings_data) 347 | exp.load_embeddings() 348 | exp.new_cl() 349 | 350 | # dynamic 351 | print ('dynamic...') 352 | # embeddings_data = '../data/aminer/result/unify_apcpa+0.5apa_embedding_g0.1.mat' 353 | # exp = Evaluation(embeddings_data) 354 | # exp.load_embeddings() 355 | # exp.new_cl() 356 | print ('Macro_F1_score:{}'.format(0.903466239273)) 357 | print ('Micro_F1_score:{}'.format(0.902034134512)) 358 | -------------------------------------------------------------------------------- /code/evaluateDBLP.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | # author: lu yf 3 | # create date: 2018/6/25 4 | 5 | from __future__ import division 6 | import os 7 | import random 8 | from sklearn.manifold import TSNE 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.metrics import normalized_mutual_info_score, f1_score, roc_auc_score, accuracy_score 12 | from sklearn.cluster import KMeans 13 | import warnings 14 | import matplotlib.pyplot as plt 15 | import scipy.io 16 | from tqdm import tqdm 17 | import numpy as np 18 | 19 | warnings.filterwarnings('ignore') 20 | random.seed(1) 21 | 22 | 23 | class Evaluation: 24 | 25 | def __init__(self, embeddings_data): 26 | self.embeddings_data = embeddings_data 27 | self.name_emb_dict = {} 28 | 29 | def load_embeddings(self): 30 | embeddings_mat = scipy.io.loadmat(self.embeddings_data) 31 | key = filter(lambda k:k.startswith('_') is False,embeddings_mat.keys())[0] 32 | 33 | embeddings = embeddings_mat[key] 34 | for i in range(len(embeddings)): 35 | self.name_emb_dict[i] = embeddings[i] 36 | 37 | def kmeans_nmi(self,cluster_k): 38 | x = [] 39 | y = [] 40 | with open('../data/dblp/oriData/author_label.txt', 'r') as author_name_label_file: 41 | author_name_label_lines = author_name_label_file.readlines() 42 | for line in author_name_label_lines: 43 | tokens = line.strip().split('\t') 44 | if self.name_emb_dict.has_key(int(tokens[0])): 45 | x.append(list(self.name_emb_dict[int(tokens[0])])) 46 | y.append(int(tokens[1])) 47 | 48 | km = KMeans(n_clusters=cluster_k) 49 | km.fit(x, y) 50 | y_pre = km.predict(x) 51 | # y_pre = km.fit_predict(x,y) 52 | nmi = normalized_mutual_info_score(y, y_pre) 53 | print('Kmean, k={}, nmi={}'.format(cluster_k, nmi)) 54 | return nmi 55 | 56 | def classification(self,train_size): 57 | x = [] 58 | y = [] 59 | with open('../data/dblp/oriData/author_label.txt', 'r') as author_name_label_file: 60 | author_name_label_lines = author_name_label_file.readlines() 61 | for line in author_name_label_lines: 62 | tokens = line.strip().split('\t') 63 | if self.name_emb_dict.has_key(int(tokens[0])): 64 | x.append(list(self.name_emb_dict[int(tokens[0])])) 65 | y.append(int(tokens[1])) 66 | 67 | x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=1-train_size,random_state=9) 68 | # print ('train_size: {}'.format(train_sicv ze)) 69 | lr = LogisticRegression() 70 | 71 | lr.fit(x_train, y_train) 72 | y_valid_pred = lr.predict(x_valid) 73 | 74 | micro_f1 = f1_score(y_valid, y_valid_pred,average='micro') 75 | macro_f1 = f1_score(y_valid, y_valid_pred,average='macro') 76 | print ('Macro_F1_score:{}'.format(macro_f1)) 77 | print ('Micro_F1_score:{}'.format(micro_f1)) 78 | return macro_f1,micro_f1 79 | 80 | def clustering_visual(self,x,y,title): 81 | if not os.path.exists('./figures'): 82 | os.mkdir('./figures') 83 | tsne = TSNE(n_components=2) 84 | Y = tsne.fit_transform(x) 85 | # with open(os.path.join('./figures',title+'.pickle'), 'wb') as f: 86 | # pickle.dump([x,Y,y],f) 87 | # plt.title(title) 88 | plt.scatter(Y[:, 0],Y[:, 1],c=y, marker='.') 89 | # plt.xticks(fontsize=16) 90 | # plt.yticks(fontsize=16) 91 | # plt.legend() 92 | plt.axis('off') # 关闭xy坐标轴 93 | plt.savefig(os.path.join('./figures',title + '.eps'),fomat='eps') 94 | plt.savefig(os.path.join('./figures',title + '.png')) 95 | plt.clf() 96 | 97 | def link_prediction_with_hit(self,hist_k): 98 | print('link prediction...') 99 | with open('../data/dblp_lp/test_aa_pos_neg.txt','r') as ld_f: 100 | apa_lines = ld_f.readlines() 101 | hist_or_not = 0 102 | for aa in tqdm(list(set(apa_lines))): 103 | a_list = aa.strip().split('\t') 104 | pos_a_1 = a_list[0] 105 | pos_a_2 = a_list[1] 106 | neg_a_list = a_list[2].split(' ') 107 | pos_a_1_embs = self.name_emb_dict[int(pos_a_1)] 108 | pos_a_2_embs = self.name_emb_dict[int(pos_a_2)] 109 | pos_sim = self.calculate_sim(pos_a_1_embs,pos_a_2_embs,'lp') 110 | neg_sim_list = map(lambda x: 111 | self.calculate_sim(pos_a_1_embs,self.name_emb_dict[int(x)],'lp'), 112 | neg_a_list) 113 | neg_a_list.append(pos_a_2) 114 | neg_sim_list.append(pos_sim) 115 | sim_dict = dict(zip(neg_a_list,neg_sim_list)) 116 | sorted_sim_triple = sorted(sim_dict.iteritems(),key=lambda d:d[1]) 117 | if (pos_a_2,pos_sim) in sorted_sim_triple[:hist_k]: 118 | hist_or_not += 1 119 | else: 120 | hist_or_not += 0 121 | print('hit@{}: {}'.format(hist_k,hist_or_not/len(set(apa_lines)))) 122 | 123 | def calculate_sim(self,u,v,sum_flag): 124 | if sum_flag: 125 | return sum(np.abs(np.array(u)-np.array(v))) 126 | # return sum(np.abs(np.array(u)*np.array(v))) 127 | else: 128 | return np.abs(np.array(u)-np.array(v)) 129 | 130 | def binary_classification_aa(self, x_train, y_train, x_test, y_test): 131 | classifier = LogisticRegression() 132 | classifier.fit(x_train, y_train) 133 | y_pred = classifier.predict_proba(x_test)[:, 1] 134 | auc_score = roc_auc_score(y_test, y_pred) 135 | f1 = f1_score(y_test, classifier.predict(x_test)) 136 | acc = accuracy_score(y_test,classifier.predict(x_test)) 137 | print('auc: {}'.format(auc_score)) 138 | print('f1: {}'.format(f1)) 139 | print('acc: {}'.format(acc)) 140 | 141 | def pre_4_link_prediction(self,data_type): 142 | # print('prepare {} data for link prediction...'.format(data_type)) 143 | x = [] 144 | y = [] 145 | with open('../data/dblp_lp_cikm/pos_coauthor_'+data_type, 'r') as p_co_file: 146 | for line in p_co_file: 147 | tokens = line.strip().split('\t') 148 | a1_name = int(tokens[0]) 149 | a2_name = int(tokens[1]) 150 | if self.name_emb_dict.has_key(a1_name) and self.name_emb_dict.has_key(a2_name): 151 | if tokens[0] in self.good_author and tokens[1] in self.good_author: 152 | a1_emb = self.name_emb_dict[a1_name] 153 | a2_emb = self.name_emb_dict[a2_name] 154 | sim_a1_a2 = self.calculate_sim(a1_emb, a2_emb, sum_flag=False) 155 | x.append(sim_a1_a2) 156 | y.append(1) 157 | with open('../data/dblp_lp_cikm/neg_coauthor_' + data_type, 'r') as p_co_file: 158 | for line in p_co_file: 159 | tokens = line.strip().split('\t') 160 | a1_name = int(tokens[0]) 161 | a2_name = int(tokens[1]) 162 | if self.name_emb_dict.has_key(a1_name) and self.name_emb_dict.has_key(a2_name): 163 | if tokens[0] in self.good_author and tokens[1] in self.good_author: 164 | a1_emb = self.name_emb_dict[a1_name] 165 | a2_emb = self.name_emb_dict[a2_name] 166 | sim_a1_a2 = self.calculate_sim(a1_emb, a2_emb, sum_flag=False) 167 | x.append(sim_a1_a2) 168 | y.append(0) 169 | 170 | # with open('../data/dblp_lp_del_edges/'+data_type+'_aa_pos.txt', 'r') as aa_pos_f: 171 | # for line in aa_pos_f: 172 | # tokens = line.strip().split('\t') 173 | # if not self.name_emb_dict.has_key(int(tokens[0])) or not self.name_emb_dict.has_key(int(tokens[1])): 174 | # continue 175 | # if tokens[0] in self.good_author and tokens[1] in self.good_author: 176 | # pos_1_emb = self.name_emb_dict[int(tokens[0])] 177 | # pos_2_emb = self.name_emb_dict[int(tokens[1])] 178 | # sim_pos = self.calculate_sim(pos_1_emb, pos_2_emb, sum_flag=False) 179 | # x.append(sim_pos) 180 | # y.append(1) 181 | # # print('#pos {}: {}'.format(data_type,len(x))) 182 | # with open('../data/dblp_lp_del_edges/'+data_type+'_aa_neg.txt', 'r') as aa_neg_f: 183 | # for line in aa_neg_f: 184 | # tokens = line.strip().split('\t') 185 | # if not self.name_emb_dict.has_key(int(tokens[0])) or not self.name_emb_dict.has_key(int(tokens[1])): 186 | # continue 187 | # if tokens[0] in self.good_author and tokens[1] in self.good_author: 188 | # neg_1_emb = self.name_emb_dict[int(tokens[0])] 189 | # neg_2_emb = self.name_emb_dict[int(tokens[1])] 190 | # sim_neg = self.calculate_sim(neg_1_emb, neg_2_emb, sum_flag=False) 191 | # x.append(sim_neg) 192 | # y.append(0) 193 | # # print('#pos+neg {}: {}'.format(data_type,len(x))) 194 | return x,y 195 | 196 | def link_prediction_with_auc(self): 197 | train_x, train_y = self.pre_4_link_prediction('train') 198 | test_x, test_y = self.pre_4_link_prediction('test') 199 | print('link prediction with auc...') 200 | print(len(train_x), len(test_x)) 201 | self.binary_classification_aa(train_x, train_y, test_x, test_y) 202 | 203 | x_train, x_valid, y_train, y_valid = train_test_split(test_x, test_y, test_size=1 - 0.8, random_state=9) 204 | self.binary_classification_aa(x_train, y_train, x_valid, y_valid) 205 | 206 | def pre_4_node_recommendation(self,author_embeddings,conf_embeddings,author_start_end_id,conf_start_end_id): 207 | a_embeddings_mat = scipy.io.loadmat(author_embeddings) 208 | key = filter(lambda k: k.startswith('_') == False, a_embeddings_mat.keys())[0] 209 | author_embs = a_embeddings_mat[key] 210 | for i in range(author_start_end_id[0],author_start_end_id[1]): 211 | self.name_emb_dict['a'+str(i)] = author_embs[i] 212 | c_embeddings_mat = scipy.io.loadmat(conf_embeddings) 213 | key = filter(lambda k: k.startswith('_') is False, c_embeddings_mat.keys())[0] 214 | conf_embs = c_embeddings_mat[key] 215 | for i in range(conf_start_end_id[0],conf_start_end_id[1]): 216 | self.name_emb_dict['c'+str(i-conf_start_end_id[0])] = conf_embs[i] 217 | 218 | def node_recommendation(self,hit_k): 219 | with open('../data/dblp_nr/test_ac_pos_neg.txt', 'r') as test_ac_p_n_f: 220 | ac_lines = test_ac_p_n_f.readlines() 221 | 222 | hit_or_not = [] 223 | for aa in list(set(ac_lines)): 224 | ac_list = aa.strip().split('\t') 225 | pos_a = ac_list[0] 226 | pos_c = ac_list[1] 227 | if self.name_emb_dict.has_key('a'+pos_a) and self.name_emb_dict.has_key('c'+pos_c): 228 | neg_c_list = ac_list[2].split(' ') 229 | pos_a_embs = self.name_emb_dict['a' + pos_a] 230 | pos_c_embs = self.name_emb_dict['c' + pos_c] 231 | pos_sim = self.calculate_sim(pos_a_embs, pos_c_embs, sum_flag=True) 232 | neg_sim_list = map(lambda x: 233 | self.calculate_sim(pos_a_embs, self.name_emb_dict['c' + x], sum_flag=True), 234 | neg_c_list) 235 | neg_c_list.append(pos_c) 236 | neg_sim_list.append(pos_sim) 237 | sim_dict = dict(zip(neg_c_list, neg_sim_list)) 238 | sorted_sim_triple = sorted(sim_dict.iteritems(), key=lambda d: d[1]) 239 | if (pos_c, pos_sim) in sorted_sim_triple[:hit_k]: 240 | hit_or_not.append(1) 241 | else: 242 | hit_or_not.append(0) 243 | print('#test: {}'.format(len(hit_or_not))) 244 | print('hit@{}: {}'.format(hit_k, sum(hit_or_not) / len(hit_or_not))) 245 | 246 | def get_good_author(self): 247 | id2node = {} 248 | self.good_author = [] 249 | with open('../baseline/dblpDynamic_lp_cikm/line.node2id_0','r') as d_f: 250 | for line in d_f: 251 | tokens = line.strip().split('\t') 252 | id2node[tokens[1]] = tokens[0] 253 | with open('../baseline/dblpDynamic_lp_cikm/line.edgelist_0','r') as f: 254 | for line in f: 255 | tokens = line.strip().split(' ') 256 | if id2node[tokens[0]].startswith('a'): 257 | self.good_author.append(id2node[tokens[0]][1:]) 258 | if id2node[tokens[1]].startswith('a'): 259 | self.good_author.append(id2node[tokens[1]][1:]) 260 | self.good_author = list(set(self.good_author)) 261 | print(len(self.good_author)) 262 | 263 | def new_cl(self): 264 | x_train, x_valid, y_train, y_valid = [],[],[],[] 265 | new_nodes = [] 266 | with open('../dblp_delta_nodes.txt', 'r') as new_node_file: 267 | for f in new_node_file: 268 | new_nodes.append(int(f.strip())) 269 | 270 | with open('../data/dblp/oriData/author_label.txt', 'r') as author_name_label_file: 271 | author_name_label_lines = author_name_label_file.readlines() 272 | 273 | for line in author_name_label_lines: 274 | tokens = line.strip().split('\t') 275 | if self.name_emb_dict.has_key(int(tokens[0])): 276 | if int(tokens[0]) in new_nodes: 277 | x_valid.append(list(self.name_emb_dict[int(tokens[0])])) 278 | y_valid.append(int(tokens[1])) 279 | else: 280 | x_train.append(list(self.name_emb_dict[int(tokens[0])])) 281 | y_train.append(int(tokens[1])) 282 | 283 | # print (len(x_train),len(x_valid)) 284 | lr = LogisticRegression() 285 | 286 | lr.fit(x_train, y_train) 287 | y_valid_pred = lr.predict(x_valid) 288 | 289 | micro_f1 = f1_score(y_valid, y_valid_pred, average='micro') 290 | macro_f1 = f1_score(y_valid, y_valid_pred, average='macro') 291 | print ('Macro_F1_score:{}'.format(macro_f1)) 292 | print ('Micro_F1_score:{}'.format(micro_f1)) 293 | 294 | 295 | if __name__ == '__main__': 296 | 297 | # # print('===== classification =====') 298 | # train_ratio = [0.2,0.4,0.6,0.8] 299 | # # classification 300 | # embeddings_data = '../data/dblp/result/apcpa_embedding.mat' 301 | # print(embeddings_data) 302 | # exp = Evaluation(embeddings_data) 303 | # exp.load_embeddings() 304 | # exp.kmeans_nmi(cluster_k=4) 305 | # for t_r in train_ratio: 306 | # print(t_r) 307 | # exp.classification(train_size=t_r) 308 | 309 | # # link prediction 310 | # print('===== link prediction =====') 311 | # embeddings_data = '../data/dblp_lp_cikm/result/unify_apa_embedding_lp.mat' 312 | # print(embeddings_data) 313 | # exp = Evaluation(embeddings_data) 314 | # exp.get_good_author() 315 | # exp.load_embeddings() 316 | # exp.link_prediction_with_auc() 317 | # 318 | # print('===== dynamic classification =====') 319 | # # for t_r in train_ratio: 320 | # print(t_r) 321 | # ma_f1 = [] 322 | # mi_f1 = [] 323 | # nmi = [] 324 | # for i in xrange(10): 325 | # embeddings_data = '../data/dblpDynamic/result/'+str(i)+'_0.1apa+1apcpa+0.9aptpa_embedding.mat' 326 | # print(embeddings_data) 327 | # exp = Evaluation(embeddings_data) 328 | # exp.load_embeddings() 329 | # nmi_tmp = exp.kmeans_nmi(cluster_k=4) 330 | # ma_f1_tmp, mi_f1_tmp = exp.classification(train_size=t_r) 331 | # nmi.append(nmi_tmp) 332 | # ma_f1.append(ma_f1_tmp) 333 | # mi_f1.append(mi_f1_tmp) 334 | # print('ave. nim: {}'.format(sum(nmi) / 10)) 335 | # print('ave. ma_f1: {}'.format(sum(ma_f1) / 10)) 336 | # print('ave. mi_f1: {}'.format(sum(mi_f1) / 10)) 337 | 338 | # print('===== dynamic link prediction =====') 339 | # embeddings_data = '../data/dblpDynamic_lp_cikm/result/10_apa+apcpa+aptpa_embedding.mat' 340 | # print(embeddings_data) 341 | # exp = Evaluation(embeddings_data) 342 | # exp.get_good_author() 343 | # exp.load_embeddings() 344 | # exp.link_prediction_with_auc() 345 | 346 | # retrain 347 | print ('DBLP') 348 | print ('retrain...') 349 | embeddings_data = '../data/dblp/result/unify_0.1apa+1apcpa+0.9aptpa_embedding_d100.mat' 350 | exp = Evaluation(embeddings_data) 351 | exp.load_embeddings() 352 | exp.new_cl() 353 | 354 | # dynamic 355 | print ('dynamic...') 356 | embeddings_data = '../data/dblpDynamic/result/10_0.1apa+1apcpa+0.9aptpa_embedding.mat' 357 | exp = Evaluation(embeddings_data) 358 | exp.load_embeddings() 359 | exp.new_cl() -------------------------------------------------------------------------------- /code/utils/litekmeans.m: -------------------------------------------------------------------------------- 1 | function [label, center, bCon, sumD, D] = litekmeans(X, k, varargin) 2 | %LITEKMEANS K-means clustering, accelerated by matlab matrix operations. 3 | % 4 | % label = LITEKMEANS(X, K) partitions the points in the N-by-P data matrix 5 | % X into K clusters. This partition minimizes the sum, over all 6 | % clusters, of the within-cluster sums of point-to-cluster-centroid 7 | % distances. Rows of X correspond to points, columns correspond to 8 | % variables. KMEANS returns an N-by-1 vector label containing the 9 | % cluster indices of each point. 10 | % 11 | % [label, center] = LITEKMEANS(X, K) returns the K cluster centroid 12 | % locations in the K-by-P matrix center. 13 | % 14 | % [label, center, bCon] = LITEKMEANS(X, K) returns the bool value bCon to 15 | % indicate whether the iteration is converged. 16 | % 17 | % [label, center, bCon, SUMD] = LITEKMEANS(X, K) returns the 18 | % within-cluster sums of point-to-centroid distances in the 1-by-K vector 19 | % sumD. 20 | % 21 | % [label, center, bCon, SUMD, D] = LITEKMEANS(X, K) returns 22 | % distances from each point to every centroid in the N-by-K matrix D. 23 | % 24 | % [ ... ] = LITEKMEANS(..., 'PARAM1',val1, 'PARAM2',val2, ...) specifies 25 | % optional parameter name/value pairs to control the iterative algorithm 26 | % used by KMEANS. Parameters are: 27 | % 28 | % 'Distance' - Distance measure, in P-dimensional space, that KMEANS 29 | % should minimize with respect to. Choices are: 30 | % {'sqEuclidean'} - Squared Euclidean distance (the default) 31 | % 'cosine' - One minus the cosine of the included angle 32 | % between points (treated as vectors). Each 33 | % row of X SHOULD be normalized to unit. If 34 | % the intial center matrix is provided, it 35 | % SHOULD also be normalized. 36 | % 37 | % 'Start' - Method used to choose initial cluster centroid positions, 38 | % sometimes known as "seeds". Choices are: 39 | % {'sample'} - Select K observations from X at random (the default) 40 | % 'cluster' - Perform preliminary clustering phase on random 10% 41 | % subsample of X. This preliminary phase is itself 42 | % initialized using 'sample'. An additional parameter 43 | % clusterMaxIter can be used to control the maximum 44 | % number of iterations in each preliminary clustering 45 | % problem. 46 | % matrix - A K-by-P matrix of starting locations; or a K-by-1 47 | % indicate vector indicating which K points in X 48 | % should be used as the initial center. In this case, 49 | % you can pass in [] for K, and KMEANS infers K from 50 | % the first dimension of the matrix. 51 | % 52 | % 'MaxIter' - Maximum number of iterations allowed. Default is 100. 53 | % 54 | % 'Replicates' - Number of times to repeat the clustering, each with a 55 | % new set of initial centroids. Default is 1. If the 56 | % initial centroids are provided, the replicate will be 57 | % automatically set to be 1. 58 | % 59 | % 'clusterMaxIter' - Only useful when 'Start' is 'cluster'. Maximum number 60 | % of iterations of the preliminary clustering phase. 61 | % Default is 10. 62 | % 63 | % 64 | % Examples: 65 | % 66 | % fea = rand(500,10); 67 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50); 68 | % 69 | % fea = rand(500,10); 70 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Replicates', 10); 71 | % 72 | % fea = rand(500,10); 73 | % [label, center, bCon, sumD, D] = litekmeans(fea, 5, 'MaxIter', 50); 74 | % TSD = sum(sumD); 75 | % 76 | % fea = rand(500,10); 77 | % initcenter = rand(5,10); 78 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', initcenter); 79 | % 80 | % fea = rand(500,10); 81 | % idx=randperm(500); 82 | % [label, center] = litekmeans(fea, 5, 'MaxIter', 50, 'Start', idx(1:5)); 83 | % 84 | % 85 | % See also KMEANS 86 | % 87 | % [Cite] Deng Cai, "Litekmeans: the fastest matlab implementation of 88 | % kmeans," Available at: 89 | % http://www.zjucadcg.cn/dengcai/Data/Clustering.html, 2011. 90 | % 91 | % version 2.0 --December/2011 92 | % version 1.0 --November/2011 93 | % 94 | % Written by Deng Cai (dengcai AT gmail.com) 95 | 96 | 97 | if nargin < 2 98 | error('litekmeans:TooFewInputs','At least two input arguments required.'); 99 | end 100 | 101 | [n, p] = size(X); 102 | 103 | 104 | pnames = { 'distance' 'start' 'maxiter' 'replicates' 'onlinephase' 'clustermaxiter'}; 105 | dflts = {'sqeuclidean' 'sample' [] [] 'off' [] }; 106 | [eid,errmsg,distance,start,maxit,reps,online,clustermaxit] = getargs(pnames, dflts, varargin{:}); 107 | if ~isempty(eid) 108 | error(sprintf('litekmeans:%s',eid),errmsg); 109 | end 110 | 111 | if ischar(distance) 112 | distNames = {'sqeuclidean','cosine'}; 113 | j = strcmpi(distance, distNames); 114 | j = find(j); 115 | if length(j) > 1 116 | error('litekmeans:AmbiguousDistance', ... 117 | 'Ambiguous ''Distance'' parameter value: %s.', distance); 118 | elseif isempty(j) 119 | error('litekmeans:UnknownDistance', ... 120 | 'Unknown ''Distance'' parameter value: %s.', distance); 121 | end 122 | distance = distNames{j}; 123 | else 124 | error('litekmeans:InvalidDistance', ... 125 | 'The ''Distance'' parameter value must be a string.'); 126 | end 127 | 128 | 129 | center = []; 130 | if ischar(start) 131 | startNames = {'sample','cluster'}; 132 | j = find(strncmpi(start,startNames,length(start))); 133 | if length(j) > 1 134 | error(message('litekmeans:AmbiguousStart', start)); 135 | elseif isempty(j) 136 | error(message('litekmeans:UnknownStart', start)); 137 | elseif isempty(k) 138 | error('litekmeans:MissingK', ... 139 | 'You must specify the number of clusters, K.'); 140 | end 141 | if j == 2 142 | if floor(.1*n) < 5*k 143 | j = 1; 144 | end 145 | end 146 | start = startNames{j}; 147 | elseif isnumeric(start) 148 | if size(start,2) == p 149 | center = start; 150 | elseif (size(start,2) == 1 || size(start,1) == 1) 151 | center = X(start,:); 152 | else 153 | error('litekmeans:MisshapedStart', ... 154 | 'The ''Start'' matrix must have the same number of columns as X.'); 155 | end 156 | if isempty(k) 157 | k = size(center,1); 158 | elseif (k ~= size(center,1)) 159 | error('litekmeans:MisshapedStart', ... 160 | 'The ''Start'' matrix must have K rows.'); 161 | end 162 | start = 'numeric'; 163 | else 164 | error('litekmeans:InvalidStart', ... 165 | 'The ''Start'' parameter value must be a string or a numeric matrix or array.'); 166 | end 167 | 168 | % The maximum iteration number is default 100 169 | if isempty(maxit) 170 | maxit = 100; 171 | end 172 | 173 | % The maximum iteration number for preliminary clustering phase on random 174 | % 10% subsamples is default 10 175 | if isempty(clustermaxit) 176 | clustermaxit = 10; 177 | end 178 | 179 | 180 | % Assume one replicate 181 | if isempty(reps) || ~isempty(center) 182 | reps = 1; 183 | end 184 | 185 | if ~(isscalar(k) && isnumeric(k) && isreal(k) && k > 0 && (round(k)==k)) 186 | error('litekmeans:InvalidK', ... 187 | 'X must be a positive integer value.'); 188 | elseif n < k 189 | error('litekmeans:TooManyClusters', ... 190 | 'X must have more rows than the number of clusters.'); 191 | end 192 | 193 | 194 | bestlabel = []; 195 | sumD = zeros(1,k); 196 | bCon = false; 197 | 198 | for t=1:reps 199 | switch start 200 | case 'sample' 201 | center = X(randsample(n,k),:); 202 | case 'cluster' 203 | Xsubset = X(randsample(n,floor(.1*n)),:); 204 | [dump, center] = litekmeans(Xsubset, k, varargin{:}, 'start','sample', 'replicates',1 ,'MaxIter',clustermaxit); 205 | case 'numeric' 206 | end 207 | 208 | last = 0;label=1; 209 | it=0; 210 | 211 | switch distance 212 | case 'sqeuclidean' 213 | while any(label ~= last) && it1 244 | if it>=maxit 245 | aa = full(sum(X.*X,2)); 246 | bb = full(sum(center.*center,2)); 247 | ab = full(X*center'); 248 | D = bsxfun(@plus,aa,bb') - 2*ab; 249 | D(D<0) = 0; 250 | else 251 | aa = full(sum(X.*X,2)); 252 | D = aa(:,ones(1,k)) + D; 253 | D(D<0) = 0; 254 | end 255 | D = sqrt(D); 256 | for j = 1:k 257 | sumD(j) = sum(D(label==j,j)); 258 | end 259 | bestsumD = sumD; 260 | bestD = D; 261 | end 262 | else 263 | if it>=maxit 264 | aa = full(sum(X.*X,2)); 265 | bb = full(sum(center.*center,2)); 266 | ab = full(X*center'); 267 | D = bsxfun(@plus,aa,bb') - 2*ab; 268 | D(D<0) = 0; 269 | else 270 | aa = full(sum(X.*X,2)); 271 | D = aa(:,ones(1,k)) + D; 272 | D(D<0) = 0; 273 | end 274 | D = sqrt(D); 275 | for j = 1:k 276 | sumD(j) = sum(D(label==j,j)); 277 | end 278 | if sum(sumD) < sum(bestsumD) 279 | bestlabel = label; 280 | bestcenter = center; 281 | bestsumD = sumD; 282 | bestD = D; 283 | end 284 | end 285 | case 'cosine' 286 | while any(label ~= last) && it1 311 | if any(label ~= last) 312 | W=full(X*center'); 313 | end 314 | D = 1-W; 315 | for j = 1:k 316 | sumD(j) = sum(D(label==j,j)); 317 | end 318 | bestsumD = sumD; 319 | bestD = D; 320 | end 321 | else 322 | if any(label ~= last) 323 | W=full(X*center'); 324 | end 325 | D = 1-W; 326 | for j = 1:k 327 | sumD(j) = sum(D(label==j,j)); 328 | end 329 | if sum(sumD) < sum(bestsumD) 330 | bestlabel = label; 331 | bestcenter = center; 332 | bestsumD = sumD; 333 | bestD = D; 334 | end 335 | end 336 | end 337 | end 338 | 339 | label = bestlabel; 340 | center = bestcenter; 341 | if reps>1 342 | sumD = bestsumD; 343 | D = bestD; 344 | elseif nargout > 3 345 | switch distance 346 | case 'sqeuclidean' 347 | if it>=maxit 348 | aa = full(sum(X.*X,2)); 349 | bb = full(sum(center.*center,2)); 350 | ab = full(X*center'); 351 | D = bsxfun(@plus,aa,bb') - 2*ab; 352 | D(D<0) = 0; 353 | else 354 | aa = full(sum(X.*X,2)); 355 | D = aa(:,ones(1,k)) + D; 356 | D(D<0) = 0; 357 | end 358 | D = sqrt(D); 359 | case 'cosine' 360 | if it>=maxit 361 | W=full(X*center'); 362 | end 363 | D = 1-W; 364 | end 365 | for j = 1:k 366 | sumD(j) = sum(D(label==j,j)); 367 | end 368 | end 369 | 370 | 371 | 372 | 373 | function [eid,emsg,varargout]=getargs(pnames,dflts,varargin) 374 | %GETARGS Process parameter name/value pairs 375 | % [EID,EMSG,A,B,...]=GETARGS(PNAMES,DFLTS,'NAME1',VAL1,'NAME2',VAL2,...) 376 | % accepts a cell array PNAMES of valid parameter names, a cell array 377 | % DFLTS of default values for the parameters named in PNAMES, and 378 | % additional parameter name/value pairs. Returns parameter values A,B,... 379 | % in the same order as the names in PNAMES. Outputs corresponding to 380 | % entries in PNAMES that are not specified in the name/value pairs are 381 | % set to the corresponding value from DFLTS. If nargout is equal to 382 | % length(PNAMES)+1, then unrecognized name/value pairs are an error. If 383 | % nargout is equal to length(PNAMES)+2, then all unrecognized name/value 384 | % pairs are returned in a single cell array following any other outputs. 385 | % 386 | % EID and EMSG are empty if the arguments are valid. If an error occurs, 387 | % EMSG is the text of an error message and EID is the final component 388 | % of an error message id. GETARGS does not actually throw any errors, 389 | % but rather returns EID and EMSG so that the caller may throw the error. 390 | % Outputs will be partially processed after an error occurs. 391 | % 392 | % This utility can be used for processing name/value pair arguments. 393 | % 394 | % Example: 395 | % pnames = {'color' 'linestyle', 'linewidth'} 396 | % dflts = { 'r' '_' '1'} 397 | % varargin = {{'linew' 2 'nonesuch' [1 2 3] 'linestyle' ':'} 398 | % [eid,emsg,c,ls,lw] = statgetargs(pnames,dflts,varargin{:}) % error 399 | % [eid,emsg,c,ls,lw,ur] = statgetargs(pnames,dflts,varargin{:}) % ok 400 | 401 | % We always create (nparams+2) outputs: 402 | % one each for emsg and eid 403 | % nparams varargs for values corresponding to names in pnames 404 | % If they ask for one more (nargout == nparams+3), it's for unrecognized 405 | % names/values 406 | 407 | % Original Copyright 1993-2008 The MathWorks, Inc. 408 | % Modified by Deng Cai (dengcai@gmail.com) 2011.11.27 409 | 410 | 411 | 412 | 413 | % Initialize some variables 414 | emsg = ''; 415 | eid = ''; 416 | nparams = length(pnames); 417 | varargout = dflts; 418 | unrecog = {}; 419 | nargs = length(varargin); 420 | 421 | % Must have name/value pairs 422 | if mod(nargs,2)~=0 423 | eid = 'WrongNumberArgs'; 424 | emsg = 'Wrong number of arguments.'; 425 | else 426 | % Process name/value pairs 427 | for j=1:2:nargs 428 | pname = varargin{j}; 429 | if ~ischar(pname) 430 | eid = 'BadParamName'; 431 | emsg = 'Parameter name must be text.'; 432 | break; 433 | end 434 | i = strcmpi(pname,pnames); 435 | i = find(i); 436 | if isempty(i) 437 | % if they've asked to get back unrecognized names/values, add this 438 | % one to the list 439 | if nargout > nparams+2 440 | unrecog((end+1):(end+2)) = {varargin{j} varargin{j+1}}; 441 | % otherwise, it's an error 442 | else 443 | eid = 'BadParamName'; 444 | emsg = sprintf('Invalid parameter name: %s.',pname); 445 | break; 446 | end 447 | elseif length(i)>1 448 | eid = 'BadParamName'; 449 | emsg = sprintf('Ambiguous parameter name: %s.',pname); 450 | break; 451 | else 452 | varargout{i} = varargin{j+1}; 453 | end 454 | end 455 | end 456 | 457 | varargout{nparams+1} = unrecog; -------------------------------------------------------------------------------- /code/utils/constructW.m: -------------------------------------------------------------------------------- 1 | function [W, elapse, M] = constructW(fea,options) 2 | % Usage: 3 | % W = constructW(fea,options) 4 | % 5 | % fea: Rows of vectors of data points. Each row is x_i 6 | % options: Struct value in Matlab. The fields in options that can be set: 7 | % Metric - Choices are: 8 | % 'Euclidean' - Will use the Euclidean distance of two data 9 | % points to evaluate the "closeness" between 10 | % them. [Default One] 11 | % 'Cosine' - Will use the cosine value of two vectors 12 | % to evaluate the "closeness" between them. 13 | % A popular similarity measure used in 14 | % Information Retrieval. 15 | % 16 | % NeighborMode - Indicates how to construct the graph. Choices 17 | % are: [Default 'KNN'] 18 | % 'KNN' - k = 0 19 | % Complete graph 20 | % k > 0 21 | % Put an edge between two nodes if and 22 | % only if they are among the k nearst 23 | % neighbors of each other. You are 24 | % required to provide the parameter k in 25 | % the options. Default k=5. 26 | % 'Supervised' - k = 0 27 | % Put an edge between two nodes if and 28 | % only if they belong to same class. 29 | % k > 0 30 | % Put an edge between two nodes if 31 | % they belong to same class and they 32 | % are among the k nearst neighbors of 33 | % each other. 34 | % Default: k=0 35 | % You are required to provide the label 36 | % information gnd in the options. 37 | % 38 | % WeightMode - Indicates how to assign weights for each edge 39 | % in the graph. Choices are: 40 | % 'Binary' - 0-1 weighting. Every edge receiveds weight 41 | % of 1. [Default One] 42 | % 'HeatKernel' - If nodes i and j are connected, put weight 43 | % W_ij = exp(-norm(x_i - x_j)/2t^2). This 44 | % weight mode can only be used under 45 | % 'Euclidean' metric and you are required to 46 | % provide the parameter t. 47 | % 'Cosine' - If nodes i and j are connected, put weight 48 | % cosine(x_i,x_j). Can only be used under 49 | % 'Cosine' metric. 50 | % 51 | % k - The parameter needed under 'KNN' NeighborMode. 52 | % Default will be 5. 53 | % gnd - The parameter needed under 'Supervised' 54 | % NeighborMode. Colunm vector of the label 55 | % information for each data point. 56 | % bLDA - 0 or 1. Only effective under 'Supervised' 57 | % NeighborMode. If 1, the graph will be constructed 58 | % to make LPP exactly same as LDA. Default will be 59 | % 0. 60 | % t - The parameter needed under 'HeatKernel' 61 | % WeightMode. Default will be 1 62 | % bNormalized - 0 or 1. Only effective under 'Cosine' metric. 63 | % Indicates whether the fea are already be 64 | % normalized to 1. Default will be 0 65 | % bSelfConnected - 0 or 1. Indicates whether W(i,i) == 1. Default 1 66 | % if 'Supervised' NeighborMode & bLDA == 1, 67 | % bSelfConnected will always be 1. Default 1. 68 | % 69 | % 70 | % Examples: 71 | % 72 | % fea = rand(50,15); 73 | % options = []; 74 | % options.Metric = 'Euclidean'; 75 | % options.NeighborMode = 'KNN'; 76 | % options.k = 5; 77 | % options.WeightMode = 'HeatKernel'; 78 | % options.t = 1; 79 | % W = constructW(fea,options); 80 | % 81 | % 82 | % fea = rand(50,15); 83 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 84 | % options = []; 85 | % options.Metric = 'Euclidean'; 86 | % options.NeighborMode = 'Supervised'; 87 | % options.gnd = gnd; 88 | % options.WeightMode = 'HeatKernel'; 89 | % options.t = 1; 90 | % W = constructW(fea,options); 91 | % 92 | % 93 | % fea = rand(50,15); 94 | % gnd = [ones(10,1);ones(15,1)*2;ones(10,1)*3;ones(15,1)*4]; 95 | % options = []; 96 | % options.Metric = 'Euclidean'; 97 | % options.NeighborMode = 'Supervised'; 98 | % options.gnd = gnd; 99 | % options.bLDA = 1; 100 | % W = constructW(fea,options); 101 | % 102 | % 103 | % For more details about the different ways to construct the W, please 104 | % refer: 105 | % Deng Cai, Xiaofei He and Jiawei Han, "Document Clustering Using 106 | % Locality Preserving Indexing" IEEE TKDE, Dec. 2005. 107 | % 108 | % 109 | % Written by Deng Cai (dengcai2 AT cs.uiuc.edu), April/2004, Feb/2006, 110 | % May/2007 111 | % 112 | 113 | if (~exist('options','var')) 114 | options = []; 115 | else 116 | if ~isstruct(options) 117 | error('parameter error!'); 118 | end 119 | end 120 | 121 | %===================add for LLE=========================== 122 | if isfield(options,'LLE') && options.LLE 123 | tmp_T = cputime; 124 | [W M]=LLE_Matrix(fea',options.k,options.regLLE); 125 | elapse=cputime-tmp_T; 126 | return 127 | end 128 | 129 | %================================================= 130 | if ~isfield(options,'Metric') 131 | options.Metric = 'Cosine'; 132 | end 133 | 134 | switch lower(options.Metric) 135 | case {lower('Euclidean')} %Euclidean distance 136 | case {lower('Cosine')} 137 | if ~isfield(options,'bNormalized') 138 | options.bNormalized = 0; 139 | end 140 | otherwise 141 | error('Metric does not exist!'); 142 | end 143 | 144 | %================================================= 145 | if ~isfield(options,'NeighborMode') 146 | options.NeighborMode = 'KNN'; 147 | end 148 | 149 | switch lower(options.NeighborMode) 150 | case {lower('KNN')} %For simplicity, we include the data point itself in the kNN 151 | if ~isfield(options,'k') 152 | options.k = 5; 153 | end 154 | case {lower('Supervised')} 155 | if ~isfield(options,'bLDA') 156 | options.bLDA = 0; 157 | end 158 | if options.bLDA 159 | options.bSelfConnected = 1; 160 | end 161 | if ~isfield(options,'k') 162 | options.k = 0; 163 | end 164 | if ~isfield(options,'gnd') 165 | error('Label(gnd) should be provided under ''Supervised'' NeighborMode!'); 166 | end 167 | if ~isempty(fea) && length(options.gnd) ~= size(fea,1) 168 | error('gnd doesn''t match with fea!'); 169 | end 170 | otherwise 171 | error('NeighborMode does not exist!'); 172 | end 173 | 174 | %================================================= 175 | 176 | if ~isfield(options,'WeightMode') 177 | options.WeightMode = 'Binary'; 178 | end 179 | 180 | bBinary = 0; 181 | switch lower(options.WeightMode) 182 | case {lower('Binary')} 183 | bBinary = 1; 184 | case {lower('HeatKernel')} 185 | if ~strcmpi(options.Metric,'Euclidean') 186 | warning('''HeatKernel'' WeightMode should be used under ''Euclidean'' Metric!'); 187 | options.Metric = 'Euclidean'; 188 | end 189 | if ~isfield(options,'t') 190 | options.t = 1; 191 | end 192 | case {lower('Cosine')} 193 | if ~strcmpi(options.Metric,'Cosine') 194 | warning('''Cosine'' WeightMode should be used under ''Cosine'' Metric!'); 195 | options.Metric = 'Cosine'; 196 | end 197 | if ~isfield(options,'bNormalized') 198 | options.bNormalized = 0; 199 | end 200 | otherwise 201 | error('WeightMode does not exist!'); 202 | end 203 | 204 | %================================================= 205 | 206 | if ~isfield(options,'bSelfConnected') 207 | options.bSelfConnected = 1; 208 | end 209 | 210 | %================================================= 211 | tmp_T = cputime; 212 | 213 | if isfield(options,'gnd') 214 | nSmp = length(options.gnd); 215 | else 216 | nSmp = size(fea,1); 217 | end 218 | maxM = 62500000; %500M 219 | BlockSize = floor(maxM/(nSmp*3)); 220 | 221 | 222 | if strcmpi(options.NeighborMode,'Supervised') 223 | Label = unique(options.gnd); 224 | nLabel = length(Label); 225 | if options.bLDA 226 | G = zeros(nSmp,nSmp); 227 | for idx=1:nLabel 228 | classIdx = options.gnd==Label(idx); 229 | G(classIdx,classIdx) = 1/sum(classIdx); 230 | end 231 | W = sparse(G); 232 | elapse = cputime - tmp_T; 233 | return; 234 | end 235 | 236 | switch lower(options.WeightMode) 237 | case {lower('Binary')} 238 | if options.k > 0 239 | G = zeros(nSmp*(options.k+1),3); 240 | idNow = 0; 241 | for i=1:nLabel 242 | classIdx = find(options.gnd==Label(i)); 243 | D = EuDist2(fea(classIdx,:),[],0); 244 | [dump idx] = sort(D,2); % sort each row 245 | clear D dump; 246 | 247 | idx = idx(:,1:options.k+1); 248 | 249 | 250 | nSmpClass = length(classIdx)*(options.k+1); 251 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 252 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 253 | G(idNow+1:nSmpClass+idNow,3) = 1; 254 | idNow = idNow+nSmpClass; 255 | clear idx 256 | end 257 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 258 | G = max(G,G'); 259 | else 260 | G = zeros(nSmp,nSmp); 261 | for i=1:nLabel 262 | classIdx = find(options.gnd==Label(i)); 263 | G(classIdx,classIdx) = 1; 264 | end 265 | end 266 | 267 | if ~options.bSelfConnected 268 | for i=1:size(G,1) 269 | G(i,i) = 0; 270 | end 271 | end 272 | 273 | W = sparse(G); 274 | case {lower('HeatKernel')} 275 | if options.k > 0 276 | G = zeros(nSmp*(options.k+1),3); 277 | idNow = 0; 278 | for i=1:nLabel 279 | classIdx = find(options.gnd==Label(i)); 280 | D = EuDist2(fea(classIdx,:),[],0); 281 | [dump idx] = sort(D,2); % sort each row 282 | clear D; 283 | idx = idx(:,1:options.k+1); 284 | dump = dump(:,1:options.k+1); 285 | dump = exp(-dump/(2*options.t^2)); 286 | 287 | nSmpClass = length(classIdx)*(options.k+1); 288 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 289 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 290 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 291 | idNow = idNow+nSmpClass; 292 | clear dump idx 293 | end 294 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 295 | else 296 | G = zeros(nSmp,nSmp); 297 | for i=1:nLabel 298 | classIdx = find(options.gnd==Label(i)); 299 | D = EuDist2(fea(classIdx,:),[],0); 300 | D = exp(-D/(2*options.t^2)); 301 | G(classIdx,classIdx) = D; 302 | end 303 | end 304 | 305 | if ~options.bSelfConnected 306 | for i=1:size(G,1) 307 | G(i,i) = 0; 308 | end 309 | end 310 | 311 | W = sparse(max(G,G')); 312 | case {lower('Cosine')} 313 | if ~options.bNormalized 314 | [nSmp, nFea] = size(fea); 315 | if issparse(fea) 316 | fea2 = fea'; 317 | feaNorm = sum(fea2.^2,1).^.5; 318 | for i = 1:nSmp 319 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 320 | end 321 | fea = fea2'; 322 | clear fea2; 323 | else 324 | feaNorm = sum(fea.^2,2).^.5; 325 | for i = 1:nSmp 326 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 327 | end 328 | end 329 | 330 | end 331 | 332 | if options.k > 0 333 | G = zeros(nSmp*(options.k+1),3); 334 | idNow = 0; 335 | for i=1:nLabel 336 | classIdx = find(options.gnd==Label(i)); 337 | D = fea(classIdx,:)*fea(classIdx,:)'; 338 | [dump idx] = sort(-D,2); % sort each row 339 | clear D; 340 | idx = idx(:,1:options.k+1); 341 | dump = -dump(:,1:options.k+1); 342 | 343 | nSmpClass = length(classIdx)*(options.k+1); 344 | G(idNow+1:nSmpClass+idNow,1) = repmat(classIdx,[options.k+1,1]); 345 | G(idNow+1:nSmpClass+idNow,2) = classIdx(idx(:)); 346 | G(idNow+1:nSmpClass+idNow,3) = dump(:); 347 | idNow = idNow+nSmpClass; 348 | clear dump idx 349 | end 350 | G = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 351 | else 352 | G = zeros(nSmp,nSmp); 353 | for i=1:nLabel 354 | classIdx = find(options.gnd==Label(i)); 355 | G(classIdx,classIdx) = fea(classIdx,:)*fea(classIdx,:)'; 356 | end 357 | end 358 | 359 | if ~options.bSelfConnected 360 | for i=1:size(G,1) 361 | G(i,i) = 0; 362 | end 363 | end 364 | 365 | W = sparse(max(G,G')); 366 | otherwise 367 | error('WeightMode does not exist!'); 368 | end 369 | elapse = cputime - tmp_T; 370 | return; 371 | end 372 | 373 | 374 | if strcmpi(options.NeighborMode,'KNN') && (options.k > 0) 375 | if strcmpi(options.Metric,'Euclidean') 376 | G = zeros(nSmp*(options.k+1),3); 377 | for i = 1:ceil(nSmp/BlockSize) 378 | if i == ceil(nSmp/BlockSize) 379 | smpIdx = (i-1)*BlockSize+1:nSmp; 380 | dist = EuDist2(fea(smpIdx,:),fea,0); 381 | dist = full(dist); 382 | [dump idx] = sort(dist,2); % sort each row 383 | idx = idx(:,1:options.k+1); 384 | dump = dump(:,1:options.k+1); 385 | if ~bBinary 386 | dump = exp(-dump/(2*options.t^2)); 387 | end 388 | 389 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 390 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 391 | if ~bBinary 392 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 393 | else 394 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = 1; 395 | end 396 | else 397 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 398 | dist = EuDist2(fea(smpIdx,:),fea,0); 399 | dist = full(dist); 400 | [dump idx] = sort(dist,2); % sort each row 401 | idx = idx(:,1:options.k+1); 402 | dump = dump(:,1:options.k+1); 403 | if ~bBinary 404 | dump = exp(-dump/(2*options.t^2)); 405 | end 406 | 407 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 408 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 409 | if ~bBinary 410 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 411 | else 412 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = 1; 413 | end 414 | end 415 | end 416 | 417 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 418 | else 419 | if ~options.bNormalized 420 | [nSmp, nFea] = size(fea); 421 | if issparse(fea) 422 | fea2 = fea'; 423 | clear fea; 424 | for i = 1:nSmp 425 | fea2(:,i) = fea2(:,i) ./ max(1e-10,sum(fea2(:,i).^2,1).^.5); 426 | end 427 | fea = fea2'; 428 | clear fea2; 429 | else 430 | feaNorm = sum(fea.^2,2).^.5; 431 | for i = 1:nSmp 432 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 433 | end 434 | end 435 | end 436 | 437 | G = zeros(nSmp*(options.k+1),3); 438 | for i = 1:ceil(nSmp/BlockSize) 439 | if i == ceil(nSmp/BlockSize) 440 | smpIdx = (i-1)*BlockSize+1:nSmp; 441 | dist = fea(smpIdx,:)*fea'; 442 | dist = full(dist); 443 | [dump idx] = sort(-dist,2); % sort each row 444 | idx = idx(:,1:options.k+1); 445 | dump = -dump(:,1:options.k+1); 446 | 447 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 448 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),2) = idx(:); 449 | G((i-1)*BlockSize*(options.k+1)+1:nSmp*(options.k+1),3) = dump(:); 450 | else 451 | smpIdx = (i-1)*BlockSize+1:i*BlockSize; 452 | dist = fea(smpIdx,:)*fea'; 453 | dist = full(dist); 454 | [dump idx] = sort(-dist,2); % sort each row 455 | idx = idx(:,1:options.k+1); 456 | dump = -dump(:,1:options.k+1); 457 | 458 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),1) = repmat(smpIdx',[options.k+1,1]); 459 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),2) = idx(:); 460 | G((i-1)*BlockSize*(options.k+1)+1:i*BlockSize*(options.k+1),3) = dump(:); 461 | end 462 | end 463 | 464 | W = sparse(G(:,1),G(:,2),G(:,3),nSmp,nSmp); 465 | end 466 | 467 | if strcmpi(options.WeightMode,'Binary') 468 | W(find(W)) = 1; 469 | end 470 | 471 | if ~options.bSelfConnected 472 | for i=1:size(W,1) 473 | W(i,i) = 0; 474 | end 475 | end 476 | W = max(W,W'); 477 | 478 | elapse = cputime - tmp_T; 479 | return; 480 | end 481 | 482 | 483 | % strcmpi(options.NeighborMode,'KNN') & (options.k == 0) 484 | % Complete Graph 485 | 486 | if strcmpi(options.Metric,'Euclidean') 487 | W = EuDist2(fea,[],0); 488 | W = exp(-W/(2*options.t^2)); 489 | else 490 | if ~options.bNormalized 491 | % feaNorm = sum(fea.^2,2).^.5; 492 | % fea = fea ./ repmat(max(1e-10,feaNorm),1,size(fea,2)); 493 | [nSmp, nFea] = size(fea); 494 | if issparse(fea) 495 | fea2 = fea'; 496 | feaNorm = sum(fea2.^2,1).^.5; 497 | for i = 1:nSmp 498 | fea2(:,i) = fea2(:,i) ./ max(1e-10,feaNorm(i)); 499 | end 500 | fea = fea2'; 501 | clear fea2; 502 | else 503 | feaNorm = sum(fea.^2,2).^.5; 504 | for i = 1:nSmp 505 | fea(i,:) = fea(i,:) ./ max(1e-12,feaNorm(i)); 506 | end 507 | end 508 | end 509 | 510 | W = full(fea*fea'); 511 | end 512 | 513 | if ~options.bSelfConnected 514 | for i=1:size(W,1) 515 | W(i,i) = 0; 516 | end 517 | end 518 | 519 | W = max(W,W'); 520 | 521 | 522 | 523 | elapse = cputime - tmp_T; 524 | 525 | 526 | -------------------------------------------------------------------------------- /data/yelp/business_category.txt: -------------------------------------------------------------------------------- 1 | 267 1 2 | 1328 1 3 | 698 1 4 | 2211 2 5 | 274 2 6 | 515 1 7 | 1287 1 8 | 1883 2 9 | 2188 2 10 | 1742 1 11 | 1751 1 12 | 848 2 13 | 1581 2 14 | 830 2 15 | 707 2 16 | 1821 2 17 | 1671 2 18 | 410 1 19 | 1184 0 20 | 696 1 21 | 1445 1 22 | 456 1 23 | 1221 0 24 | 601 2 25 | 1986 2 26 | 1861 1 27 | 2253 1 28 | 243 1 29 | 761 2 30 | 2129 1 31 | 2365 2 32 | 1991 2 33 | 527 1 34 | 1485 2 35 | 1370 1 36 | 894 1 37 | 2026 1 38 | 2421 2 39 | 224 0 40 | 1240 1 41 | 2113 2 42 | 783 1 43 | 2319 1 44 | 2342 2 45 | 2012 2 46 | 1443 0 47 | 2108 2 48 | 2057 2 49 | 426 0 50 | 1783 2 51 | 1619 0 52 | 1775 2 53 | 1189 0 54 | 1156 2 55 | 713 0 56 | 2602 1 57 | 520 1 58 | 1082 1 59 | 153 1 60 | 1714 2 61 | 261 2 62 | 999 2 63 | 2264 2 64 | 2079 2 65 | 2096 2 66 | 433 1 67 | 2009 1 68 | 423 2 69 | 558 0 70 | 43 1 71 | 1232 2 72 | 2447 2 73 | 2027 2 74 | 1435 0 75 | 2302 1 76 | 879 1 77 | 2390 2 78 | 2035 2 79 | 1404 1 80 | 682 0 81 | 956 0 82 | 1597 2 83 | 953 1 84 | 1229 1 85 | 1219 0 86 | 2468 2 87 | 1962 2 88 | 1534 1 89 | 780 2 90 | 2050 2 91 | 2551 1 92 | 1850 2 93 | 151 1 94 | 391 1 95 | 2338 2 96 | 1165 1 97 | 1047 2 98 | 668 1 99 | 2557 2 100 | 1564 1 101 | 1342 1 102 | 1941 1 103 | 2357 2 104 | 1137 0 105 | 775 1 106 | 2118 1 107 | 1683 1 108 | 964 2 109 | 2177 1 110 | 2219 2 111 | 564 0 112 | 2327 2 113 | 1006 0 114 | 2064 1 115 | 611 1 116 | 695 1 117 | 296 0 118 | 1041 1 119 | 1508 1 120 | 290 1 121 | 65 1 122 | 1025 1 123 | 1781 2 124 | 581 2 125 | 1230 1 126 | 1085 2 127 | 2037 0 128 | 2470 2 129 | 2493 2 130 | 709 0 131 | 1699 1 132 | 1590 1 133 | 185 2 134 | 1314 1 135 | 1690 2 136 | 2448 2 137 | 298 1 138 | 680 1 139 | 394 0 140 | 317 1 141 | 1462 1 142 | 233 2 143 | 1662 2 144 | 198 1 145 | 2383 0 146 | 1510 2 147 | 640 0 148 | 2431 2 149 | 2028 1 150 | 1014 1 151 | 1175 1 152 | 1909 2 153 | 1574 1 154 | 247 1 155 | 870 1 156 | 1953 0 157 | 591 1 158 | 2450 2 159 | 1934 2 160 | 2098 2 161 | 1488 2 162 | 1170 1 163 | 324 0 164 | 1168 1 165 | 1659 2 166 | 2164 2 167 | 2085 0 168 | 1586 0 169 | 2094 1 170 | 1114 2 171 | 821 1 172 | 111 0 173 | 2527 1 174 | 2165 2 175 | 154 0 176 | 1460 2 177 | 1254 0 178 | 1527 2 179 | 2457 1 180 | 2530 1 181 | 1020 2 182 | 347 1 183 | 166 1 184 | 553 0 185 | 1453 2 186 | 2372 2 187 | 1792 2 188 | 164 1 189 | 1249 1 190 | 2020 2 191 | 1894 2 192 | 2243 2 193 | 1689 1 194 | 646 1 195 | 860 1 196 | 649 1 197 | 1558 0 198 | 1833 1 199 | 167 0 200 | 2206 2 201 | 1281 2 202 | 169 1 203 | 1437 0 204 | 799 1 205 | 1623 0 206 | 1932 2 207 | 1206 1 208 | 202 1 209 | 2232 1 210 | 811 1 211 | 235 1 212 | 798 1 213 | 1864 2 214 | 1440 1 215 | 1118 0 216 | 1294 1 217 | 883 1 218 | 1381 1 219 | 173 1 220 | 1308 2 221 | 177 1 222 | 803 0 223 | 968 0 224 | 1797 1 225 | 493 2 226 | 929 1 227 | 1172 2 228 | 446 1 229 | 1886 2 230 | 2373 2 231 | 1357 0 232 | 1367 2 233 | 1966 2 234 | 2304 1 235 | 2442 2 236 | 1948 1 237 | 592 2 238 | 168 1 239 | 454 2 240 | 1655 0 241 | 444 0 242 | 1946 1 243 | 2071 2 244 | 2261 2 245 | 1090 1 246 | 74 0 247 | 1115 1 248 | 745 1 249 | 912 0 250 | 1844 2 251 | 2589 1 252 | 2476 2 253 | 2359 1 254 | 687 2 255 | 1666 1 256 | 828 1 257 | 2417 2 258 | 1267 2 259 | 1330 1 260 | 2231 0 261 | 2048 2 262 | 671 1 263 | 1355 0 264 | 2465 2 265 | 1716 2 266 | 762 1 267 | 200 1 268 | 1791 0 269 | 791 1 270 | 741 1 271 | 1158 2 272 | 997 1 273 | 11 1 274 | 1097 2 275 | 1525 2 276 | 1806 2 277 | 1679 2 278 | 1152 2 279 | 718 1 280 | 538 1 281 | 677 0 282 | 1853 2 283 | 932 2 284 | 1808 1 285 | 186 1 286 | 1081 2 287 | 1027 1 288 | 1074 1 289 | 1054 0 290 | 1838 1 291 | 316 0 292 | 1804 2 293 | 1417 1 294 | 373 1 295 | 2584 2 296 | 31 1 297 | 2492 2 298 | 2226 2 299 | 2297 2 300 | 240 0 301 | 1856 0 302 | 2103 1 303 | 1236 0 304 | 2004 2 305 | 51 0 306 | 1876 1 307 | 1124 1 308 | 647 0 309 | 1860 0 310 | 379 0 311 | 1859 1 312 | 180 2 313 | 342 2 314 | 355 0 315 | 1413 1 316 | 2107 2 317 | 33 0 318 | 2581 1 319 | 1513 1 320 | 1647 2 321 | 1258 0 322 | 1617 1 323 | 789 2 324 | 207 0 325 | 691 1 326 | 1937 1 327 | 1599 0 328 | 2021 2 329 | 60 1 330 | 726 2 331 | 1211 1 332 | 217 1 333 | 227 1 334 | 644 2 335 | 2387 2 336 | 1004 1 337 | 1670 2 338 | 1098 1 339 | 1099 1 340 | 1075 2 341 | 283 1 342 | 1482 1 343 | 531 1 344 | 985 1 345 | 1556 2 346 | 2554 1 347 | 1255 0 348 | 599 0 349 | 1339 2 350 | 2031 2 351 | 779 1 352 | 1318 1 353 | 1119 1 354 | 1711 0 355 | 2153 2 356 | 877 1 357 | 1088 1 358 | 788 2 359 | 2348 2 360 | 388 1 361 | 1146 1 362 | 2128 2 363 | 1346 0 364 | 552 2 365 | 915 1 366 | 2346 0 367 | 450 1 368 | 364 1 369 | 2277 2 370 | 121 0 371 | 1185 2 372 | 39 0 373 | 128 0 374 | 1335 1 375 | 567 1 376 | 2222 1 377 | 1003 2 378 | 134 1 379 | 1755 2 380 | 1 0 381 | 2599 2 382 | 2556 2 383 | 796 0 384 | 1474 2 385 | 2360 1 386 | 965 1 387 | 555 1 388 | 1734 2 389 | 133 2 390 | 50 0 391 | 1650 1 392 | 733 1 393 | 810 1 394 | 858 0 395 | 2578 2 396 | 872 2 397 | 2428 0 398 | 115 0 399 | 2520 2 400 | 1200 0 401 | 2378 1 402 | 1707 2 403 | 1918 2 404 | 2393 2 405 | 2566 0 406 | 1160 2 407 | 2196 1 408 | 2419 2 409 | 415 1 410 | 1022 2 411 | 2115 1 412 | 170 1 413 | 706 0 414 | 336 1 415 | 2162 1 416 | 623 2 417 | 1321 1 418 | 475 1 419 | 199 1 420 | 808 2 421 | 1385 1 422 | 25 1 423 | 805 1 424 | 1648 0 425 | 1663 2 426 | 2104 2 427 | 2186 2 428 | 2283 2 429 | 54 2 430 | 1371 2 431 | 2218 2 432 | 2452 2 433 | 1827 2 434 | 2077 2 435 | 1187 0 436 | 2137 2 437 | 0 1 438 | 1487 1 439 | 1668 2 440 | 82 1 441 | 2462 1 442 | 2601 2 443 | 818 1 444 | 2560 2 445 | 554 1 446 | 498 2 447 | 1629 1 448 | 119 2 449 | 1292 0 450 | 2067 2 451 | 1802 2 452 | 2478 2 453 | 2080 2 454 | 266 1 455 | 2086 2 456 | 86 1 457 | 864 2 458 | 279 2 459 | 2201 1 460 | 1268 1 461 | 1396 2 462 | 343 2 463 | 29 1 464 | 2420 1 465 | 2537 2 466 | 2163 1 467 | 1302 0 468 | 226 0 469 | 2479 0 470 | 725 0 471 | 2454 2 472 | 1810 2 473 | 2065 1 474 | 91 1 475 | 459 0 476 | 184 1 477 | 1439 1 478 | 2191 2 479 | 1392 1 480 | 1073 2 481 | 2445 1 482 | 179 1 483 | 1799 2 484 | 1366 0 485 | 1486 0 486 | 886 1 487 | 1795 2 488 | 2088 1 489 | 2154 0 490 | 1785 2 491 | 2099 2 492 | 161 1 493 | 464 0 494 | 1382 2 495 | 2488 1 496 | 2344 0 497 | 1001 1 498 | 2034 1 499 | 1532 2 500 | 596 0 501 | 2535 2 502 | 56 0 503 | 598 1 504 | 1395 0 505 | 943 0 506 | 1752 2 507 | 1447 2 508 | 2213 2 509 | 143 1 510 | 206 2 511 | 1299 2 512 | 2256 1 513 | 1544 0 514 | 2515 2 515 | 249 1 516 | 209 0 517 | 1212 1 518 | 1911 2 519 | 1881 1 520 | 1540 1 521 | 884 1 522 | 218 1 523 | 10 1 524 | 139 0 525 | 1983 2 526 | 1504 0 527 | 500 1 528 | 852 2 529 | 2120 2 530 | 2364 2 531 | 1064 1 532 | 27 1 533 | 368 1 534 | 740 1 535 | 756 1 536 | 112 1 537 | 232 1 538 | 2073 2 539 | 2192 1 540 | 1133 0 541 | 281 0 542 | 2221 2 543 | 19 1 544 | 496 1 545 | 1051 1 546 | 1239 1 547 | 3 1 548 | 690 0 549 | 1127 2 550 | 2374 2 551 | 1275 1 552 | 861 1 553 | 685 2 554 | 1578 0 555 | 1469 2 556 | 1067 2 557 | 2299 1 558 | 467 0 559 | 681 1 560 | 2563 2 561 | 1248 1 562 | 577 0 563 | 194 0 564 | 776 1 565 | 1522 1 566 | 305 2 567 | 1594 1 568 | 254 2 569 | 1754 1 570 | 399 1 571 | 824 1 572 | 1140 2 573 | 287 1 574 | 187 2 575 | 892 0 576 | 1108 1 577 | 2544 2 578 | 34 0 579 | 657 2 580 | 6 1 581 | 2430 2 582 | 2198 1 583 | 1045 1 584 | 69 1 585 | 1793 2 586 | 490 0 587 | 1529 2 588 | 302 1 589 | 967 0 590 | 922 0 591 | 131 1 592 | 2318 2 593 | 1637 1 594 | 2314 2 595 | 1241 2 596 | 2514 2 597 | 1340 1 598 | 1607 2 599 | 944 0 600 | 107 1 601 | 1131 0 602 | 2239 2 603 | 890 0 604 | 562 1 605 | 1017 2 606 | 2126 1 607 | 1470 1 608 | 2150 0 609 | 2495 2 610 | 2438 1 611 | 1961 0 612 | 2140 2 613 | 1879 1 614 | 2398 2 615 | 1506 2 616 | 744 1 617 | 142 0 618 | 211 0 619 | 888 2 620 | 1457 1 621 | 1101 2 622 | 2533 1 623 | 838 2 624 | 2114 1 625 | 931 1 626 | 1050 1 627 | 2036 1 628 | 288 1 629 | 1034 0 630 | 1944 1 631 | 1348 2 632 | 1096 1 633 | 466 1 634 | 1949 1 635 | 2179 1 636 | 827 1 637 | 49 1 638 | 299 1 639 | 1512 0 640 | 429 1 641 | 1183 0 642 | 2029 1 643 | 370 2 644 | 2143 2 645 | 660 2 646 | 1263 2 647 | 584 2 648 | 972 1 649 | 2423 1 650 | 2545 1 651 | 607 1 652 | 1583 2 653 | 126 0 654 | 1038 0 655 | 1092 2 656 | 1166 2 657 | 2380 1 658 | 919 2 659 | 2155 2 660 | 871 2 661 | 417 1 662 | 338 1 663 | 995 1 664 | 2415 1 665 | 2081 1 666 | 375 1 667 | 755 2 668 | 265 1 669 | 2161 2 670 | 509 1 671 | 412 0 672 | 820 1 673 | 806 1 674 | 986 0 675 | 1819 1 676 | 2214 0 677 | 739 1 678 | 1293 0 679 | 812 1 680 | 1433 0 681 | 132 1 682 | 2225 2 683 | 913 0 684 | 1419 2 685 | 1455 1 686 | 1560 2 687 | 2180 0 688 | 650 1 689 | 2054 2 690 | 2294 2 691 | 1638 1 692 | 1780 1 693 | 1770 0 694 | 1162 2 695 | 2106 2 696 | 563 1 697 | 1667 2 698 | 2435 2 699 | 1747 1 700 | 1596 1 701 | 2172 1 702 | 2321 2 703 | 1077 0 704 | 1965 0 705 | 1585 2 706 | 1727 2 707 | 2516 2 708 | 1153 0 709 | 378 1 710 | 723 0 711 | 911 2 712 | 72 2 713 | 1603 2 714 | 710 2 715 | 341 2 716 | 2229 1 717 | 276 1 718 | 1972 2 719 | 1782 1 720 | 442 1 721 | 1830 1 722 | 2500 0 723 | 678 0 724 | 248 1 725 | 1467 2 726 | 1337 2 727 | 2416 2 728 | 1896 2 729 | 1537 1 730 | 1171 2 731 | 1015 2 732 | 286 0 733 | 1990 1 734 | 1425 1 735 | 2058 0 736 | 1543 2 737 | 2595 2 738 | 2576 2 739 | 1640 1 740 | 2053 2 741 | 1824 1 742 | 1313 2 743 | 2305 2 744 | 1491 2 745 | 59 0 746 | 123 2 747 | 2149 1 748 | 30 1 749 | 2068 1 750 | 1700 2 751 | 2010 1 752 | 1280 2 753 | 2461 1 754 | 2613 1 755 | 271 1 756 | 2208 2 757 | 1814 2 758 | 2355 2 759 | 120 0 760 | 2405 0 761 | 371 1 762 | 1803 1 763 | 176 0 764 | 1350 2 765 | 2267 2 766 | 413 2 767 | 988 0 768 | 626 0 769 | 1476 1 770 | 2434 1 771 | 1359 1 772 | 2363 2 773 | 1033 1 774 | 529 0 775 | 1356 0 776 | 1625 1 777 | 1454 0 778 | 42 2 779 | 315 0 780 | 1237 2 781 | 630 1 782 | 1403 2 783 | 136 0 784 | 362 2 785 | 2242 2 786 | 1222 1 787 | 1103 0 788 | 1296 0 789 | 1960 0 790 | 1643 1 791 | 990 2 792 | 2536 0 793 | 85 0 794 | 2467 0 795 | 2334 2 796 | 1703 1 797 | 1739 2 798 | 1936 2 799 | 730 2 800 | 419 2 801 | 2569 2 802 | 1895 2 803 | 1251 1 804 | 1624 1 805 | 1855 0 806 | 699 2 807 | 2241 2 808 | 1829 2 809 | 675 1 810 | 75 1 811 | 2487 1 812 | 2394 2 813 | 1197 0 814 | 1514 2 815 | 81 2 816 | 825 1 817 | 2193 2 818 | 836 1 819 | 2528 1 820 | 150 0 821 | 1923 1 822 | 135 1 823 | 2326 1 824 | 1484 2 825 | 2429 2 826 | 149 1 827 | 2532 2 828 | 2178 1 829 | 2152 1 830 | 1005 2 831 | 2608 2 832 | 2354 1 833 | 541 2 834 | 458 1 835 | 2562 1 836 | 1199 1 837 | 1950 1 838 | 971 1 839 | 2209 1 840 | 2497 2 841 | 1701 1 842 | 1102 2 843 | 2101 2 844 | 2481 2 845 | 1551 1 846 | 2262 2 847 | 1406 1 848 | 2060 1 849 | 241 1 850 | 2443 2 851 | 403 2 852 | 452 1 853 | 1303 0 854 | 447 1 855 | 1686 2 856 | 1305 1 857 | 1208 2 858 | 1872 0 859 | 1753 2 860 | 1773 2 861 | 712 0 862 | 851 1 863 | 816 2 864 | 2189 1 865 | 521 2 866 | 1569 1 867 | 1693 1 868 | 594 1 869 | 282 0 870 | 620 1 871 | 1013 2 872 | 809 2 873 | 411 2 874 | 407 1 875 | 2182 1 876 | 2333 1 877 | 1952 2 878 | 544 1 879 | 1235 1 880 | 734 1 881 | 2040 0 882 | 9 1 883 | 1477 1 884 | 358 1 885 | 1245 1 886 | 954 1 887 | 797 2 888 | 2399 2 889 | 101 2 890 | 939 0 891 | 573 0 892 | 1349 0 893 | 1202 2 894 | 205 2 895 | 213 1 896 | 1130 1 897 | 1519 2 898 | 175 0 899 | 356 1 900 | 1327 1 901 | 1058 1 902 | 1095 1 903 | 1479 1 904 | 785 1 905 | 2227 2 906 | 1405 2 907 | 525 1 908 | 401 1 909 | 731 0 910 | 1475 2 911 | 448 1 912 | 137 0 913 | 1009 1 914 | 817 2 915 | 674 1 916 | 1383 1 917 | 1056 2 918 | 1704 2 919 | 2018 1 920 | 2441 1 921 | 2315 1 922 | 2003 2 923 | 89 1 924 | 764 1 925 | 961 1 926 | 624 1 927 | 1893 2 928 | 1772 1 929 | 1870 2 930 | 1369 1 931 | 1645 1 932 | 124 1 933 | 1907 2 934 | 1035 1 935 | 1259 2 936 | 987 2 937 | 800 2 938 | 1606 1 939 | 1089 0 940 | 1250 2 941 | 340 1 942 | 2156 0 943 | 1887 2 944 | 2439 2 945 | 2119 2 946 | 486 1 947 | 645 2 948 | 504 1 949 | 855 1 950 | 1111 1 951 | 386 1 952 | 1565 1 953 | 2455 1 954 | 1939 1 955 | 2011 0 956 | 183 0 957 | 477 0 958 | 1498 1 959 | 1480 2 960 | 2317 2 961 | 430 2 962 | 2548 1 963 | 1190 2 964 | 2353 1 965 | 1332 1 966 | 2483 1 967 | 1134 2 968 | 845 0 969 | 1091 2 970 | 2160 0 971 | 641 1 972 | 95 2 973 | 1863 1 974 | 1372 2 975 | 1710 2 976 | 1973 2 977 | 2369 1 978 | 2328 2 979 | 2596 2 980 | 2274 1 981 | 977 2 982 | 1043 2 983 | 439 1 984 | 717 1 985 | 807 0 986 | 1297 1 987 | 2310 1 988 | 214 2 989 | 228 2 990 | 1930 2 991 | 511 1 992 | 455 1 993 | 2451 2 994 | 759 1 995 | 574 1 996 | 400 1 997 | 801 1 998 | 1201 2 999 | 1708 2 1000 | 473 0 1001 | 345 1 1002 | 719 1 1003 | 1741 1 1004 | 104 1 1005 | 118 1 1006 | 402 2 1007 | 1401 1 1008 | 2507 1 1009 | 1011 2 1010 | 928 1 1011 | 1057 0 1012 | 1899 2 1013 | 465 0 1014 | 631 1 1015 | 746 1 1016 | 1809 0 1017 | 2597 2 1018 | 23 1 1019 | 1326 1 1020 | 711 2 1021 | 2546 2 1022 | 2293 2 1023 | 83 1 1024 | 1651 2 1025 | 40 1 1026 | 1849 2 1027 | 1632 2 1028 | 777 1 1029 | 2519 2 1030 | 1788 2 1031 | 1198 1 1032 | 223 1 1033 | 2084 2 1034 | 568 2 1035 | 162 1 1036 | 2610 2 1037 | 1800 1 1038 | 438 1 1039 | 140 1 1040 | 1573 2 1041 | 962 2 1042 | 382 1 1043 | 1549 1 1044 | 1630 0 1045 | 2335 1 1046 | 2331 1 1047 | 2141 2 1048 | 1945 1 1049 | 1213 2 1050 | 2286 2 1051 | 1790 2 1052 | 1858 1 1053 | 1749 2 1054 | 293 1 1055 | 70 0 1056 | 1552 2 1057 | 672 1 1058 | 2482 2 1059 | 1611 1 1060 | 2205 2 1061 | 285 1 1062 | 1621 1 1063 | 1207 0 1064 | 156 0 1065 | 617 1 1066 | 395 2 1067 | 1373 1 1068 | 2197 2 1069 | 2200 1 1070 | 2075 2 1071 | 357 1 1072 | 478 1 1073 | 2312 2 1074 | 1497 2 1075 | 414 2 1076 | 396 1 1077 | 1954 2 1078 | 277 2 1079 | 482 0 1080 | 79 0 1081 | 608 1 1082 | 2216 2 1083 | 595 0 1084 | 1203 0 1085 | 1926 2 1086 | 767 0 1087 | 457 2 1088 | 2349 2 1089 | 2379 2 1090 | 2167 2 1091 | 2582 1 1092 | 2606 2 1093 | 2345 2 1094 | 537 1 1095 | 197 1 1096 | 2509 2 1097 | 1694 1 1098 | 1436 2 1099 | 1363 2 1100 | 582 2 1101 | 950 1 1102 | 1269 0 1103 | 76 1 1104 | 1427 0 1105 | 1798 2 1106 | 2281 2 1107 | 1148 1 1108 | 100 1 1109 | 1902 1 1110 | 1225 2 1111 | 856 2 1112 | 1242 1 1113 | 786 1 1114 | 2142 2 1115 | 770 1 1116 | 1376 1 1117 | 2122 2 1118 | 1888 1 1119 | 1288 0 1120 | 2397 2 1121 | 374 2 1122 | 1324 1 1123 | 2228 2 1124 | 716 1 1125 | 2366 2 1126 | 99 1 1127 | 784 2 1128 | 404 2 1129 | 652 1 1130 | 2070 2 1131 | 1472 1 1132 | 1616 2 1133 | 536 1 1134 | 1048 0 1135 | 1311 2 1136 | 1995 2 1137 | 505 0 1138 | 2356 2 1139 | 2502 1 1140 | 1007 2 1141 | 431 0 1142 | 1922 2 1143 | 1977 1 1144 | 1787 2 1145 | 1979 2 1146 | 1726 2 1147 | 2426 2 1148 | 790 0 1149 | 2061 2 1150 | 960 0 1151 | 2083 2 1152 | 1848 1 1153 | 1323 2 1154 | 642 1 1155 | 1807 2 1156 | 2592 2 1157 | 1635 2 1158 | 1446 2 1159 | 2306 2 1160 | 2586 2 1161 | 1641 0 1162 | 1653 1 1163 | 665 2 1164 | 2298 2 1165 | 837 2 1166 | 1273 2 1167 | 1179 0 1168 | 1955 2 1169 | 2303 2 1170 | 1021 2 1171 | 704 2 1172 | 942 0 1173 | 2015 1 1174 | 145 1 1175 | 1502 2 1176 | 2518 2 1177 | 1786 1 1178 | 1988 0 1179 | 195 2 1180 | 1618 1 1181 | 560 1 1182 | 1093 1 1183 | 2464 2 1184 | 1319 1 1185 | 2525 2 1186 | 2287 2 1187 | 2510 0 1188 | 323 1 1189 | 1438 2 1190 | 846 2 1191 | 2235 2 1192 | 2558 2 1193 | 1964 1 1194 | 1107 0 1195 | 768 2 1196 | 627 2 1197 | 2244 2 1198 | 20 0 1199 | 1362 0 1200 | 673 0 1201 | 80 1 1202 | 506 1 1203 | 1178 0 1204 | 2069 2 1205 | 1526 2 1206 | 2039 2 1207 | 933 1 1208 | 1789 2 1209 | 2377 2 1210 | 2580 2 1211 | 578 2 1212 | 998 2 1213 | 1928 0 1214 | 2078 0 1215 | 408 2 1216 | 2289 2 1217 | 113 1 1218 | 842 0 1219 | 160 1 1220 | 2407 2 1221 | 576 1 1222 | 1316 2 1223 | 2308 1 1224 | 1820 2 1225 | 1524 0 1226 | 463 1 1227 | 441 1 1228 | 983 2 1229 | 1851 1 1230 | 1374 1 1231 | 1718 2 1232 | 289 1 1233 | 472 0 1234 | 2382 2 1235 | 1218 1 1236 | 732 2 1237 | 1557 2 1238 | 2133 2 1239 | 212 1 1240 | 481 1 1241 | 491 1 1242 | 73 1 1243 | 1422 1 1244 | 2087 1 1245 | 2055 2 1246 | 1150 1 1247 | 2598 2 1248 | 753 1 1249 | 612 1 1250 | 258 1 1251 | 1024 2 1252 | 2400 2 1253 | 2063 1 1254 | 141 1 1255 | 1418 2 1256 | 970 1 1257 | 1511 2 1258 | 906 1 1259 | 53 1 1260 | 1580 1 1261 | 1032 0 1262 | 92 0 1263 | 1257 2 1264 | 794 2 1265 | 2565 2 1266 | 600 1 1267 | 471 2 1268 | 188 1 1269 | 352 0 1270 | 1209 1 1271 | 144 1 1272 | 882 1 1273 | 1875 0 1274 | 893 1 1275 | 1968 2 1276 | 1924 1 1277 | 980 0 1278 | 236 1 1279 | 908 2 1280 | 2307 2 1281 | 321 2 1282 | 2587 1 1283 | 528 1 1284 | 2185 2 1285 | 1542 2 1286 | 2257 1 1287 | 1729 0 1288 | 993 2 1289 | 1811 2 1290 | 12 2 1291 | 1866 0 1292 | 1412 1 1293 | 231 0 1294 | 190 0 1295 | 2174 1 1296 | 1548 2 1297 | 714 0 1298 | 1456 1 1299 | 376 1 1300 | 1409 2 1301 | 694 2 1302 | 1947 2 1303 | 940 1 1304 | 1523 1 1305 | 1652 2 1306 | 2247 2 1307 | 1572 1 1308 | 1496 2 1309 | 1737 2 1310 | 445 2 1311 | 1677 1 1312 | 1721 1 1313 | 1495 2 1314 | 2158 2 1315 | 2324 1 1316 | 899 2 1317 | 2046 2 1318 | 1735 0 1319 | 2097 1 1320 | 749 1 1321 | 1812 0 1322 | 1873 2 1323 | 1277 0 1324 | 84 1 1325 | 130 1 1326 | 1763 2 1327 | 260 1 1328 | 1563 0 1329 | 781 0 1330 | 1862 1 1331 | 2033 2 1332 | 1304 1 1333 | 2151 1 1334 | 406 1 1335 | 1159 1 1336 | 1012 0 1337 | 1360 1 1338 | 216 1 1339 | 1100 1 1340 | 2002 1 1341 | 1329 1 1342 | 839 2 1343 | 1432 0 1344 | 1857 1 1345 | 1889 1 1346 | 436 0 1347 | 1499 2 1348 | 2588 2 1349 | 2542 2 1350 | 2351 2 1351 | 61 1 1352 | 895 1 1353 | 1234 1 1354 | 2414 2 1355 | 823 1 1356 | 750 1 1357 | 902 0 1358 | 898 0 1359 | 916 2 1360 | 1822 1 1361 | 1361 2 1362 | 2134 1 1363 | 1244 2 1364 | 2159 0 1365 | 628 1 1366 | 1310 2 1367 | 2418 1 1368 | 405 1 1369 | 702 2 1370 | 590 1 1371 | 2008 1 1372 | 946 0 1373 | 501 2 1374 | 2278 2 1375 | 1843 1 1376 | 2496 2 1377 | 2038 2 1378 | 435 1 1379 | 1622 1 1380 | 826 0 1381 | 1147 1 1382 | 666 2 1383 | 18 1 1384 | 502 2 1385 | 559 2 1386 | 1685 2 1387 | 339 0 1388 | 602 1 1389 | 700 0 1390 | 1998 1 1391 | 1109 1 1392 | 1610 2 1393 | 2212 2 1394 | 2233 1 1395 | 1891 2 1396 | 2413 2 1397 | 108 1 1398 | 2375 2 1399 | 332 0 1400 | 2490 2 1401 | 1656 2 1402 | 1087 2 1403 | 2440 0 1404 | 1906 1 1405 | 1062 2 1406 | 2603 2 1407 | 1466 0 1408 | 52 0 1409 | 158 0 1410 | 881 1 1411 | 889 2 1412 | 758 2 1413 | 862 2 1414 | 2376 1 1415 | 476 2 1416 | 546 0 1417 | 2042 0 1418 | 618 1 1419 | 1161 2 1420 | 795 2 1421 | 815 1 1422 | 1931 1 1423 | 2056 1 1424 | 1702 2 1425 | 2265 1 1426 | 885 2 1427 | 2254 0 1428 | 1070 2 1429 | 1828 2 1430 | 1982 1 1431 | 2471 1 1432 | 1826 2 1433 | 1757 1 1434 | 664 0 1435 | 1247 0 1436 | 1307 1 1437 | 2329 2 1438 | 569 2 1439 | 1210 2 1440 | 1680 2 1441 | 2203 2 1442 | 2270 1 1443 | 359 1 1444 | 918 0 1445 | 1976 2 1446 | 55 1 1447 | 102 2 1448 | 1724 1 1449 | 2541 0 1450 | 1272 2 1451 | 1890 1 1452 | 1483 1 1453 | 2271 2 1454 | 2498 2 1455 | 2032 0 1456 | 2199 2 1457 | 1028 2 1458 | 1149 1 1459 | 2093 1 1460 | 1579 1 1461 | 526 0 1462 | 1746 1 1463 | 1805 2 1464 | 1626 2 1465 | 1942 1 1466 | 550 1 1467 | 1762 1 1468 | 1756 2 1469 | 1106 0 1470 | 1266 1 1471 | 244 0 1472 | 1759 1 1473 | 125 1 1474 | 1517 0 1475 | 1351 1 1476 | 2347 2 1477 | 1978 2 1478 | 2506 1 1479 | 566 2 1480 | 2341 2 1481 | 519 0 1482 | 1681 2 1483 | 938 1 1484 | 497 2 1485 | 1365 0 1486 | 1139 1 1487 | 518 2 1488 | 2269 2 1489 | 1765 1 1490 | 1018 2 1491 | 1547 2 1492 | 2350 2 1493 | 2607 1 1494 | 1490 1 1495 | 1660 2 1496 | 969 0 1497 | 1233 1 1498 | 1424 1 1499 | 524 2 1500 | 1649 0 1501 | 834 1 1502 | 2024 2 1503 | 310 2 1504 | 1044 0 1505 | 485 2 1506 | 1407 1 1507 | 561 1 1508 | 1154 0 1509 | 2105 2 1510 | 326 1 1511 | 1738 2 1512 | 670 1 1513 | 1776 1 1514 | 270 1 1515 | 2284 2 1516 | 603 2 1517 | 1151 1 1518 | 2480 2 1519 | 295 2 1520 | 605 1 1521 | 1177 1 1522 | 833 2 1523 | 975 0 1524 | 1224 2 1525 | 1228 0 1526 | 1343 1 1527 | 721 0 1528 | 1733 2 1529 | 2396 2 1530 | 1421 0 1531 | 1869 2 1532 | 2089 2 1533 | 989 1 1534 | 2132 2 1535 | 35 1 1536 | 2531 2 1537 | 1008 2 1538 | 251 2 1539 | 2590 2 1540 | 1084 1 1541 | 873 0 1542 | 2263 2 1543 | 36 1 1544 | 1969 2 1545 | 2007 1 1546 | 37 0 1547 | 857 1 1548 | 1173 0 1549 | 1030 1 1550 | 1430 0 1551 | 348 1 1552 | 234 2 1553 | 1692 1 1554 | 2552 2 1555 | 802 1 1556 | 1113 1 1557 | 551 1 1558 | 1080 1 1559 | 1157 0 1560 | 1758 2 1561 | 2336 1 1562 | 2573 2 1563 | 868 1 1564 | 2017 2 1565 | 294 1 1566 | 2109 1 1567 | 661 0 1568 | 67 0 1569 | 629 1 1570 | 765 2 1571 | 1414 0 1572 | 2567 2 1573 | 8 2 1574 | 1587 2 1575 | 1609 1 1576 | 2136 2 1577 | 2166 2 1578 | 1452 0 1579 | 831 1 1580 | 1078 0 1581 | 903 1 1582 | 1072 0 1583 | 304 1 1584 | 1388 0 1585 | 2391 1 1586 | 1389 2 1587 | 1289 1 1588 | 1600 2 1589 | 2449 2 1590 | 909 1 1591 | 2571 2 1592 | 1871 1 1593 | 1276 1 1594 | 854 0 1595 | 365 2 1596 | 15 0 1597 | 1627 1 1598 | 253 1 1599 | 2534 2 1600 | 138 2 1601 | 1295 2 1602 | 610 1 1603 | 1877 1 1604 | 116 1 1605 | 686 1 1606 | 440 1 1607 | 692 1 1608 | 2395 2 1609 | 1391 2 1610 | 256 1 1611 | 589 1 1612 | 2025 2 1613 | 300 0 1614 | 1712 0 1615 | 1615 1 1616 | 2145 2 1617 | 1384 2 1618 | 1231 2 1619 | 583 0 1620 | 2411 1 1621 | 78 2 1622 | 1533 0 1623 | 705 1 1624 | 2130 2 1625 | 548 0 1626 | 1678 1 1627 | 2246 0 1628 | 1777 2 1629 | 615 0 1630 | 2176 2 1631 | 530 1 1632 | 2044 2 1633 | 1831 1 1634 | 1539 2 1635 | 397 0 1636 | 1880 0 1637 | 263 0 1638 | 2444 2 1639 | 155 0 1640 | 259 0 1641 | 2575 0 1642 | 2111 2 1643 | 613 1 1644 | 1397 1 1645 | 813 1 1646 | 449 0 1647 | 1055 1 1648 | 225 1 1649 | 1068 0 1650 | 1898 2 1651 | 1471 1 1652 | 468 1 1653 | 1036 2 1654 | 1608 1 1655 | 1501 1 1656 | 2432 1 1657 | 1125 1 1658 | 648 2 1659 | 949 1 1660 | 1252 1 1661 | 1984 0 1662 | 350 1 1663 | 229 0 1664 | 503 0 1665 | 2147 2 1666 | 64 1 1667 | 2300 2 1668 | 917 2 1669 | 1155 0 1670 | 1132 2 1671 | 1642 0 1672 | 1919 2 1673 | 1509 2 1674 | 1164 2 1675 | 930 0 1676 | 1900 1 1677 | 1416 1 1678 | 380 1 1679 | 896 0 1680 | 771 0 1681 | 2469 1 1682 | 292 1 1683 | 451 2 1684 | 1205 1 1685 | 2517 2 1686 | 381 1 1687 | 1567 2 1688 | 2000 2 1689 | 2320 1 1690 | 936 0 1691 | 1575 1 1692 | 1917 2 1693 | 2499 2 1694 | 1801 2 1695 | 1882 1 1696 | 383 2 1697 | 850 0 1698 | 966 0 1699 | 1538 2 1700 | 1420 1 1701 | 329 1 1702 | 2403 0 1703 | 2424 2 1704 | 2463 2 1705 | 1378 2 1706 | 637 2 1707 | 1334 1 1708 | 351 1 1709 | 1345 1 1710 | 1817 1 1711 | 193 1 1712 | 1913 2 1713 | 428 1 1714 | 275 1 1715 | 604 2 1716 | 1393 1 1717 | 754 1 1718 | 1908 2 1719 | 1672 2 1720 | 757 2 1721 | 935 2 1722 | 1921 2 1723 | 875 1 1724 | 656 0 1725 | 910 2 1726 | 516 1 1727 | 26 1 1728 | 1410 2 1729 | 619 0 1730 | 514 0 1731 | 1069 1 1732 | 1141 2 1733 | 2408 1 1734 | 1720 1 1735 | 171 1 1736 | 1193 2 1737 | 2494 2 1738 | 736 2 1739 | 1364 1 1740 | 1813 0 1741 | 210 2 1742 | 1687 2 1743 | 1589 1 1744 | 328 1 1745 | 1377 0 1746 | 2555 2 1747 | 981 2 1748 | 937 1 1749 | 272 1 1750 | 178 2 1751 | 1967 1 1752 | 1481 2 1753 | 2223 2 1754 | 1604 2 1755 | 2484 2 1756 | 1559 1 1757 | 250 1 1758 | 2388 2 1759 | 1444 1 1760 | 1071 0 1761 | 330 2 1762 | 280 1 1763 | 1076 1 1764 | 1278 1 1765 | 565 2 1766 | 1682 2 1767 | 683 1 1768 | 2574 0 1769 | 1750 2 1770 | 840 1 1771 | 1461 1 1772 | 580 0 1773 | 2339 2 1774 | 766 1 1775 | 1061 1 1776 | 1312 2 1777 | 534 2 1778 | 976 2 1779 | 312 0 1780 | 327 2 1781 | 982 1 1782 | 1933 1 1783 | 354 1 1784 | 542 1 1785 | 1825 2 1786 | 1291 1 1787 | 2138 1 1788 | 1769 1 1789 | 1963 2 1790 | 2508 1 1791 | 2110 2 1792 | 1317 1 1793 | 1602 1 1794 | 1216 2 1795 | 2184 1 1796 | 2047 2 1797 | 774 1 1798 | 728 0 1799 | 1796 2 1800 | 2568 2 1801 | 614 2 1802 | 2473 2 1803 | 655 1 1804 | 1938 2 1805 | 87 2 1806 | 98 1 1807 | 2236 1 1808 | 221 2 1809 | 1306 2 1810 | 684 1 1811 | 1901 2 1812 | 318 0 1813 | 2522 1 1814 | 1697 0 1815 | 1958 2 1816 | 2605 2 1817 | 1874 2 1818 | 724 2 1819 | 1300 0 1820 | 390 0 1821 | 1142 1 1822 | 437 2 1823 | 1740 2 1824 | 2611 1 1825 | 1993 1 1826 | 570 1 1827 | 907 2 1828 | 165 1 1829 | 1411 0 1830 | 389 1 1831 | 1143 1 1832 | 952 1 1833 | 152 2 1834 | 335 1 1835 | 1584 1 1836 | 46 1 1837 | 593 0 1838 | 2322 2 1839 | 2291 1 1840 | 273 1 1841 | 2547 2 1842 | 1186 2 1843 | 1845 2 1844 | 556 1 1845 | 1865 0 1846 | 904 1 1847 | 14 0 1848 | 1636 2 1849 | 2051 2 1850 | 2361 2 1851 | 2564 2 1852 | 2249 2 1853 | 1852 0 1854 | 1971 2 1855 | 1040 1 1856 | 333 1 1857 | 689 1 1858 | 622 1 1859 | 1987 2 1860 | 284 1 1861 | 2389 2 1862 | 2572 2 1863 | 609 0 1864 | 191 0 1865 | 484 2 1866 | 2561 2 1867 | 2112 1 1868 | 96 1 1869 | 2591 2 1870 | 2187 2 1871 | 1695 1 1872 | 1614 1 1873 | 1577 2 1874 | 1778 1 1875 | 264 2 1876 | 874 0 1877 | 2139 1 1878 | 891 1 1879 | 2475 2 1880 | 1837 2 1881 | 1768 1 1882 | 804 1 1883 | 1515 2 1884 | 2386 2 1885 | 2433 2 1886 | 772 2 1887 | 269 2 1888 | 2043 0 1889 | 122 1 1890 | 792 0 1891 | 325 0 1892 | 1079 2 1893 | 1657 2 1894 | 1593 2 1895 | 1531 0 1896 | 344 0 1897 | 585 2 1898 | 1458 2 1899 | 658 1 1900 | 667 2 1901 | 958 1 1902 | 2076 1 1903 | 62 1 1904 | 2282 2 1905 | 2204 2 1906 | 1503 1 1907 | 1553 1 1908 | 58 0 1909 | 2157 2 1910 | 1568 2 1911 | 2102 1 1912 | 278 1 1913 | 2013 1 1914 | 1698 2 1915 | 1309 2 1916 | 1562 2 1917 | 245 2 1918 | 301 1 1919 | 2409 2 1920 | 17 1 1921 | 1956 1 1922 | 1935 1 1923 | 2343 1 1924 | 1715 2 1925 | 291 2 1926 | 1301 1 1927 | 13 2 1928 | 1386 0 1929 | 1598 1 1930 | 1840 2 1931 | 203 0 1932 | 2593 1 1933 | 24 2 1934 | 1925 0 1935 | 1270 0 1936 | 460 2 1937 | 1398 0 1938 | 1571 2 1939 | 2512 1 1940 | 44 0 1941 | 2014 2 1942 | 769 0 1943 | 2135 2 1944 | 1434 1 1945 | 539 2 1946 | 793 2 1947 | 268 2 1948 | 1505 1 1949 | 1112 1 1950 | 959 2 1951 | 994 0 1952 | 1760 2 1953 | 2072 2 1954 | 1592 2 1955 | 369 2 1956 | 16 0 1957 | 2316 0 1958 | 1122 1 1959 | 2543 2 1960 | 377 2 1961 | 1554 0 1962 | 2330 2 1963 | 1473 1 1964 | 853 0 1965 | 398 0 1966 | 697 0 1967 | 1217 2 1968 | 2100 2 1969 | 1731 2 1970 | 2503 2 1971 | 1854 1 1972 | 1264 2 1973 | 819 0 1974 | 337 2 1975 | 2049 2 1976 | 1083 1 1977 | 727 1 1978 | 1333 1 1979 | 2275 2 1980 | 2600 2 1981 | 512 1 1982 | 1841 0 1983 | 393 0 1984 | 1634 2 1985 | 1284 2 1986 | 729 1 1987 | 1996 2 1988 | 353 1 1989 | 1713 2 1990 | 557 1 1991 | 434 0 1992 | 2539 2 1993 | 923 1 1994 | 2066 2 1995 | 114 1 1996 | 308 0 1997 | 2579 1 1998 | 2023 1 1999 | 991 1 2000 | 992 1 2001 | 2422 0 2002 | 996 0 2003 | 688 1 2004 | 1516 2 2005 | 94 0 2006 | 1180 2 2007 | 1688 1 2008 | 309 1 2009 | 1675 1 2010 | 237 1 2011 | 208 1 2012 | 587 2 2013 | 366 2 2014 | 1347 0 2015 | 1684 2 2016 | 2250 2 2017 | 934 0 2018 | 427 1 2019 | 1037 2 2020 | 1016 0 2021 | 2337 2 2022 | 1644 0 2023 | 787 1 2024 | 416 0 2025 | 737 1 2026 | 1970 0 2027 | 1441 2 2028 | 2524 2 2029 | 2131 2 2030 | 1448 1 2031 | 1774 1 2032 | 1274 1 2033 | 129 0 2034 | 2019 2 2035 | 157 2 2036 | 1605 0 2037 | 822 2 2038 | 2559 0 2039 | 1128 0 2040 | 1794 1 2041 | 1669 0 2042 | 1065 1 2043 | 945 0 2044 | 1892 2 2045 | 372 1 2046 | 844 1 2047 | 2238 2 2048 | 77 0 2049 | 782 1 2050 | 1761 0 2051 | 1260 2 2052 | 2092 1 2053 | 1031 0 2054 | 1940 2 2055 | 738 2 2056 | 1196 2 2057 | 2181 1 2058 | 1722 2 2059 | 849 0 2060 | 1639 0 2061 | 679 2 2062 | 489 1 2063 | 1394 0 2064 | 941 2 2065 | 701 1 2066 | 480 1 2067 | 2526 2 2068 | 2295 2 2069 | 469 0 2070 | 2491 2 2071 | 1390 2 2072 | 109 1 2073 | 1717 1 2074 | 2406 2 2075 | 2090 2 2076 | 1279 1 2077 | 2466 2 2078 | 2504 1 2079 | 663 0 2080 | 1059 1 2081 | 453 1 2082 | 1063 1 2083 | 880 1 2084 | 215 1 2085 | 2215 2 2086 | 1066 0 2087 | 1325 2 2088 | 513 0 2089 | 2358 0 2090 | 1823 2 2091 | 1117 1 2092 | 1545 0 2093 | 1352 2 2094 | 1784 2 2095 | 1815 2 2096 | 103 2 2097 | 1253 1 2098 | 2549 1 2099 | 2 0 2100 | 1500 1 2101 | 2168 2 2102 | 1135 1 2103 | 2505 1 2104 | 540 1 2105 | 230 1 2106 | 1129 1 2107 | 963 1 2108 | 1010 1 2109 | 606 2 2110 | 597 1 2111 | 2325 2 2112 | 1442 2 2113 | 1226 2 2114 | 1243 2 2115 | 2224 2 2116 | 1613 2 2117 | 307 1 2118 | 510 0 2119 | 2251 2 2120 | 924 2 2121 | 635 2 2122 | 360 1 2123 | 2477 0 2124 | 474 1 2125 | 2460 2 2126 | 1535 1 2127 | 494 2 2128 | 2392 1 2129 | 869 2 2130 | 117 1 2131 | 1353 1 2132 | 97 2 2133 | 257 1 2134 | 633 1 2135 | 1105 0 2136 | 2362 2 2137 | 1868 0 2138 | 320 2 2139 | 662 1 2140 | 2404 2 2141 | 720 2 2142 | 1620 2 2143 | 1449 1 2144 | 832 1 2145 | 2121 1 2146 | 322 0 2147 | 2288 1 2148 | 1194 1 2149 | 2412 0 2150 | 1246 0 2151 | 639 2 2152 | 1916 1 2153 | 1959 2 2154 | 432 1 2155 | 1767 2 2156 | 1315 1 2157 | 523 2 2158 | 2436 1 2159 | 2323 2 2160 | 420 0 2161 | 1664 2 2162 | 1576 1 2163 | 1719 0 2164 | 735 1 2165 | 1331 2 2166 | 1816 1 2167 | 2171 0 2168 | 533 1 2169 | 2220 2 2170 | 1665 1 2171 | 1507 0 2172 | 2381 1 2173 | 2210 1 2174 | 2402 0 2175 | 1834 2 2176 | 93 2 2177 | 925 0 2178 | 2458 2 2179 | 1358 2 2180 | 2016 0 2181 | 1459 1 2182 | 146 1 2183 | 2585 2 2184 | 547 1 2185 | 1994 1 2186 | 1223 0 2187 | 1897 2 2188 | 1550 1 2189 | 1478 1 2190 | 2022 1 2191 | 220 0 2192 | 588 2 2193 | 1182 1 2194 | 2472 2 2195 | 201 1 2196 | 189 0 2197 | 196 0 2198 | 1261 2 2199 | 1104 2 2200 | 1494 2 2201 | 1779 2 2202 | 575 0 2203 | 1974 0 2204 | 483 0 2205 | 2332 2 2206 | 1463 1 2207 | 2041 1 2208 | 2052 2 2209 | 1646 1 2210 | 159 1 2211 | 2273 2 2212 | 2285 1 2213 | 2059 2 2214 | 499 1 2215 | 349 1 2216 | 2074 1 2217 | 147 2 2218 | 2367 2 2219 | 110 1 2220 | 508 2 2221 | 708 1 2222 | 1238 2 2223 | 921 0 2224 | 1878 2 2225 | 747 2 2226 | 1426 2 2227 | 1818 2 2228 | 1174 1 2229 | 1489 2 2230 | 1451 2 2231 | 1195 2 2232 | 1344 1 2233 | 127 1 2234 | 1847 2 2235 | 1400 2 2236 | 1336 1 2237 | 1138 2 2238 | 1354 1 2239 | 306 1 2240 | 163 2 2241 | 2245 2 2242 | 715 0 2243 | 1375 1 2244 | 535 1 2245 | 900 1 2246 | 1867 1 2247 | 174 1 2248 | 1145 1 2249 | 479 1 2250 | 2292 2 2251 | 1744 2 2252 | 319 0 2253 | 2296 2 2254 | 2301 2 2255 | 1706 0 2256 | 1528 1 2257 | 2095 2 2258 | 418 1 2259 | 878 1 2260 | 1060 2 2261 | 1957 1 2262 | 2268 2 2263 | 90 0 2264 | 1676 1 2265 | 1673 2 2266 | 1423 2 2267 | 238 2 2268 | 835 0 2269 | 1997 2 2270 | 219 2 2271 | 2169 2 2272 | 1566 2 2273 | 914 2 2274 | 314 1 2275 | 1989 2 2276 | 948 1 2277 | 1116 2 2278 | 1282 0 2279 | 760 2 2280 | 1661 2 2281 | 722 1 2282 | 422 0 2283 | 957 0 2284 | 5 1 2285 | 495 2 2286 | 148 1 2287 | 653 2 2288 | 1674 2 2289 | 1846 2 2290 | 7 2 2291 | 1120 0 2292 | 334 2 2293 | 21 0 2294 | 2170 2 2295 | 105 1 2296 | 1518 1 2297 | 1766 1 2298 | 978 0 2299 | 2123 2 2300 | 172 2 2301 | 751 1 2302 | 2195 2 2303 | 2230 1 2304 | 1110 1 2305 | 1215 1 2306 | 1387 2 2307 | 68 0 2308 | 88 1 2309 | 57 1 2310 | 1053 1 2311 | 1262 0 2312 | 1951 2 2313 | 252 0 2314 | 1521 2 2315 | 2001 2 2316 | 182 1 2317 | 517 1 2318 | 2550 2 2319 | 1591 2 2320 | 387 0 2321 | 545 1 2322 | 1049 1 2323 | 1929 2 2324 | 1541 1 2325 | 1320 2 2326 | 2370 2 2327 | 905 1 2328 | 1338 1 2329 | 659 0 2330 | 71 0 2331 | 2045 0 2332 | 384 0 2333 | 1181 2 2334 | 1927 2 2335 | 1832 2 2336 | 638 0 2337 | 1415 1 2338 | 2513 2 2339 | 1188 1 2340 | 616 1 2341 | 2570 2 2342 | 742 2 2343 | 2173 2 2344 | 470 0 2345 | 1094 0 2346 | 2604 2 2347 | 1654 0 2348 | 45 0 2349 | 1283 2 2350 | 1705 1 2351 | 651 1 2352 | 488 1 2353 | 2255 1 2354 | 2006 2 2355 | 1428 2 2356 | 1026 0 2357 | 1204 1 2358 | 703 1 2359 | 1696 1 2360 | 1023 2 2361 | 643 1 2362 | 392 1 2363 | 1905 2 2364 | 586 1 2365 | 1492 1 2366 | 920 2 2367 | 2553 2 2368 | 487 2 2369 | 66 0 2370 | 1582 1 2371 | 2183 2 2372 | 1086 0 2373 | 2474 2 2374 | 829 2 2375 | 421 1 2376 | 4 1 2377 | 955 1 2378 | 1431 0 2379 | 47 1 2380 | 41 1 2381 | 222 0 2382 | 1121 0 2383 | 2540 0 2384 | 1595 2 2385 | 2259 2 2386 | 1380 2 2387 | 984 1 2388 | 571 2 2389 | 1904 2 2390 | 262 1 2391 | 693 2 2392 | 2446 2 2393 | 2248 2 2394 | 2144 1 2395 | 1368 2 2396 | 239 0 2397 | 2489 2 2398 | 1915 2 2399 | 1429 2 2400 | 1835 2 2401 | 2258 2 2402 | 346 2 2403 | 1536 0 2404 | 38 1 2405 | 363 1 2406 | 549 2 2407 | 979 1 2408 | 1046 0 2409 | 2371 2 2410 | 1633 1 2411 | 2401 2 2412 | 361 1 2413 | 1126 2 2414 | 1975 1 2415 | 1884 2 2416 | 897 2 2417 | 443 1 2418 | 1290 2 2419 | 2313 0 2420 | 2062 0 2421 | 1914 2 2422 | 2217 0 2423 | 2529 1 2424 | 773 0 2425 | 106 0 2426 | 204 1 2427 | 2175 0 2428 | 543 2 2429 | 2234 0 2430 | 1723 0 2431 | 2266 1 2432 | 1980 2 2433 | 926 2 2434 | 1002 1 2435 | 2309 1 2436 | 297 1 2437 | 2237 2 2438 | 1612 1 2439 | 1555 1 2440 | 1167 1 2441 | 32 1 2442 | 1052 2 2443 | 876 1 2444 | 2005 1 2445 | 2521 2 2446 | 2091 2 2447 | 1169 1 2448 | 1341 1 2449 | 1561 1 2450 | 1732 1 2451 | 1227 1 2452 | 2456 2 2453 | 1839 2 2454 | 1658 2 2455 | 313 0 2456 | 1256 2 2457 | 1192 1 2458 | 2583 1 2459 | 2279 2 2460 | 1298 1 2461 | 1885 2 2462 | 1691 2 2463 | 1000 1 2464 | 1728 1 2465 | 2202 2 2466 | 2459 2 2467 | 2290 2 2468 | 2207 1 2469 | 748 0 2470 | 242 1 2471 | 2425 0 2472 | 2117 1 2473 | 1985 1 2474 | 1743 1 2475 | 778 2 2476 | 2124 2 2477 | 2523 2 2478 | 1912 1 2479 | 2385 0 2480 | 1163 0 2481 | 2410 2 2482 | 1468 0 2483 | 1191 1 2484 | 492 2 2485 | 863 2 2486 | 1123 2 2487 | 1981 2 2488 | 1910 2 2489 | 763 1 2490 | 2252 2 2491 | 865 2 2492 | 1771 2 2493 | 425 0 2494 | 1836 2 2495 | 1530 0 2496 | 2538 1 2497 | 1285 2 2498 | 1709 0 2499 | 2577 1 2500 | 1402 2 2501 | 2368 2 2502 | 424 1 2503 | 2276 1 2504 | 1029 1 2505 | 367 1 2506 | 461 0 2507 | 947 0 2508 | 255 1 2509 | 2194 2 2510 | 1220 1 2511 | 1039 2 2512 | 2501 2 2513 | 385 0 2514 | 532 1 2515 | 1764 1 2516 | 1214 0 2517 | 625 1 2518 | 331 0 2519 | 1464 2 2520 | 1019 2 2521 | 48 1 2522 | 462 0 2523 | 621 2 2524 | 1520 2 2525 | 1286 1 2526 | 181 1 2527 | 1601 2 2528 | 311 0 2529 | 2340 2 2530 | 1730 1 2531 | 1570 1 2532 | 1450 1 2533 | 1745 0 2534 | 2240 1 2535 | 2384 0 2536 | 1408 2 2537 | 2190 0 2538 | 1842 2 2539 | 901 2 2540 | 632 2 2541 | 1144 1 2542 | 1903 1 2543 | 1271 1 2544 | 2609 2 2545 | 2486 1 2546 | 1042 1 2547 | 1736 1 2548 | 2352 2 2549 | 522 1 2550 | 951 1 2551 | 1920 0 2552 | 743 1 2553 | 2612 1 2554 | 1631 2 2555 | 2127 1 2556 | 2453 2 2557 | 2485 2 2558 | 2260 1 2559 | 867 1 2560 | 1588 2 2561 | 2511 2 2562 | 1322 2 2563 | 1176 2 2564 | 409 2 2565 | 63 0 2566 | 1399 2 2567 | 859 2 2568 | 843 1 2569 | 579 1 2570 | 2437 2 2571 | 1379 1 2572 | 2594 2 2573 | 1546 1 2574 | 1725 2 2575 | 887 1 2576 | 1992 0 2577 | 28 2 2578 | 1748 0 2579 | 192 1 2580 | 2148 1 2581 | 572 1 2582 | 2272 1 2583 | 927 1 2584 | 636 1 2585 | 752 1 2586 | 246 2 2587 | 507 1 2588 | 1493 1 2589 | 634 2 2590 | 2311 2 2591 | 814 1 2592 | 2427 2 2593 | 1265 2 2594 | 2116 2 2595 | 654 1 2596 | 1465 1 2597 | 847 2 2598 | 2030 2 2599 | 676 2 2600 | 973 0 2601 | 2125 1 2602 | 841 1 2603 | 2280 2 2604 | 1943 1 2605 | 1628 1 2606 | 2082 1 2607 | 974 2 2608 | 1999 2 2609 | 866 2 2610 | 2146 2 2611 | 22 0 2612 | 1136 1 2613 | 303 1 2614 | 669 2 2615 | --------------------------------------------------------------------------------