├── DNESBP_public └── code │ ├── NNutil │ ├── sigm.m │ ├── tanh_opt.m │ ├── saesetup.m │ ├── nntest.m │ ├── nnpredict.m │ ├── softmax.m │ ├── nnapplygrads.m │ ├── softmaxPredict.m │ ├── nneval.m │ ├── softmaxCost.m │ ├── nnsetup.m │ ├── nnbp.m │ ├── nnupdatefigures.m │ ├── softmaxTrain.m │ ├── nnff.m │ ├── nntrain.m │ ├── saeff.m │ ├── saebp.m │ └── saenntrain.m │ └── DNESBP │ ├── data │ ├── wiki_UD.mat │ ├── epinions_UD.mat │ └── slashdot_UD.mat │ ├── GenRep.m │ ├── ComputeAP.m │ ├── DNESBP_CD.m │ ├── DNESBP_LP.m │ ├── DNESBP_CD_epinions.m │ ├── DNESBP_CD_slashdot.m │ ├── DNESBP_CD_wiki.m │ ├── DNESBP_LP_wiki.m │ ├── DNESBP_LP_slashdot.m │ └── DNESBP_LP_epinions.m └── README.md /DNESBP_public/code/NNutil/sigm.m: -------------------------------------------------------------------------------- 1 | function X = sigm(P) 2 | X = 1./(1+exp(-P)); 3 | end -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/tanh_opt.m: -------------------------------------------------------------------------------- 1 | function f=tanh_opt(A) 2 | f=1.7159*tanh(2/3.*A); 3 | end -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/saesetup.m: -------------------------------------------------------------------------------- 1 | function sae = saesetup(size) 2 | for u = 2 : numel(size) 3 | sae.ae{u-1} = nnsetup([size(u-1) size(u) size(u-1)]); 4 | end 5 | end 6 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/data/wiki_UD.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenxiaocam/Deep-network-embedding-for-graph-representation-learning-in-signed-networks/HEAD/DNESBP_public/code/DNESBP/data/wiki_UD.mat -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/data/epinions_UD.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenxiaocam/Deep-network-embedding-for-graph-representation-learning-in-signed-networks/HEAD/DNESBP_public/code/DNESBP/data/epinions_UD.mat -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/data/slashdot_UD.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shenxiaocam/Deep-network-embedding-for-graph-representation-learning-in-signed-networks/HEAD/DNESBP_public/code/DNESBP/data/slashdot_UD.mat -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nntest.m: -------------------------------------------------------------------------------- 1 | function [er, bad] = nntest(nn, x, y) 2 | labels = nnpredict(nn, x); 3 | [dummy, expected] = max(y,[],2); 4 | bad = find(labels ~= expected); 5 | er = numel(bad) / size(x, 1); 6 | end 7 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnpredict.m: -------------------------------------------------------------------------------- 1 | function labels = nnpredict(nn, x) 2 | nn.testing = 1; 3 | nn = nnff(nn, x, zeros(size(x,1), nn.size(end))); 4 | nn.testing = 0; 5 | 6 | [dummy, i] = max(nn.a{end},[],2); 7 | labels = i; 8 | end 9 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/softmax.m: -------------------------------------------------------------------------------- 1 | function mu = softmax(eta) 2 | % Softmax function 3 | % mu(i,c) = exp(eta(i,c))/sum_c' exp(eta(i,c')) 4 | 5 | % This file is from matlabtools.googlecode.com 6 | c = 3; 7 | 8 | tmp = exp(c*eta); 9 | denom = sum(tmp, 2); 10 | mu = bsxfun(@rdivide, tmp, denom); 11 | 12 | end -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/GenRep.m: -------------------------------------------------------------------------------- 1 | %% Genereate Graph Representations %% 2 | 3 | function rep = GenRep(input_data, sae, nnsize) 4 | 5 | len = length(nnsize); 6 | 7 | nnFF = nnsetup(nnsize); 8 | nnFF.activation_function = 'tanh' ; 9 | nnFF.output = 'tanh' ; 10 | 11 | 12 | num_layers = len - 1; 13 | for i = 1:num_layers 14 | nnFF.W{i} = sae.ae{i}.W{1}; 15 | end 16 | 17 | % do FFNN 18 | nnFF.testing = 1; 19 | nnFF = nnff(nnFF, input_data, zeros(size(input_data,1), nnFF.size(end))); 20 | 21 | 22 | for i=1:size(nnFF.a,2)-1 23 | nnFF.a{i}(:,1)=[]; %remove the fistr temr, bias 24 | end 25 | 26 | rep = nnFF.a(2:end); %return the hidden representation at each layer 27 | 28 | end -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnapplygrads.m: -------------------------------------------------------------------------------- 1 | function nn = nnapplygrads(nn) 2 | %NNAPPLYGRADS updates weights and biases with calculated gradients 3 | % nn = nnapplygrads(nn) returns an neural network structure with updated 4 | % weights and biases 5 | 6 | for i = 1 : (nn.n - 1) 7 | 8 | if(nn.weightPenaltyL2>0) 9 | dW = nn.dW{i} + nn.weightPenaltyL2 * [zeros(size(nn.W{i},1),1) nn.W{i}(:,2:end)]; 10 | else 11 | dW = nn.dW{i}; 12 | end 13 | 14 | 15 | dW = nn.learningRate * dW; 16 | 17 | 18 | if(nn.momentum>0) 19 | nn.vW{i} = nn.momentum*nn.vW{i} + dW; 20 | dW = nn.vW{i}; 21 | end 22 | 23 | nn.W{i} = nn.W{i} - dW; 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/softmaxPredict.m: -------------------------------------------------------------------------------- 1 | function [pred] = softmaxPredict(softmaxModel, data) 2 | 3 | % softmaxModel - model trained using softmaxTrain 4 | % data - the N x M input matrix, where each column data(:, i) corresponds to 5 | % a single test set 6 | % 7 | % Your code should produce the prediction matrix 8 | % pred, where pred(i) is argmax_c P(y(c) | x(i)). 9 | 10 | % Unroll the parameters from theta 11 | theta = softmaxModel.optTheta; % this provides a numClasses x inputSize matrix 12 | pred = zeros(1, size(data, 2)); 13 | 14 | %% ---------- YOUR CODE HERE -------------------------------------- 15 | % Instructions: Compute pred using theta assuming that the labels start 16 | % from 1. 17 | 18 | p = theta*data; 19 | [~, idx] = max(p,[],1); 20 | 21 | pred = idx; 22 | 23 | 24 | 25 | 26 | % --------------------------------------------------------------------- 27 | 28 | end 29 | 30 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/ComputeAP.m: -------------------------------------------------------------------------------- 1 | % pl=load('data.csv');% a table with predicted propabilities and ground-truth labels (1/0) 2 | % % pl=1-pl; %negative class 3 | 4 | %% compute average precision 5 | function AP=ComputeAP(pl) 6 | rpl=sortrows(pl,1,'descend'); %rank pl by predicted probabilities in descending order 7 | precision=zeros(size(rpl,1),1); 8 | recall=zeros(size(rpl,1),1); 9 | 10 | %% calculate precision and recall at each index from 1 to N (number of testing examples) 11 | for i=1:size(rpl,1) 12 | precision(i)=sum(rpl(1:i,2))/i; 13 | recall(i)=sum(rpl(1:i,2))/sum(rpl(:,2)); 14 | end 15 | 16 | rpl=[rpl precision recall]; 17 | 18 | %% calculate average precision 19 | [Urecall,~,~] = unique(rpl(:,end)); 20 | AP=0; %average precision 21 | for i=1:length(Urecall) 22 | indexR=find(rpl(:,end)==Urecall(i)); 23 | AP=AP+max(rpl(indexR,3)); %find the max precision with the fixed recall 24 | end 25 | AP=AP/size(Urecall,1); 26 | 27 | end -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nneval.m: -------------------------------------------------------------------------------- 1 | function [loss] = nneval(nn, loss, train_x, train_y, val_x, val_y) 2 | %NNEVAL evaluates performance of neural network 3 | % Returns a updated loss struct 4 | assert(nargin == 4 || nargin == 6, 'Wrong number of arguments'); 5 | 6 | nn.testing = 1; 7 | % training performance 8 | nn = nnff(nn, train_x, train_y); 9 | loss.train.e(end + 1) = nn.L; 10 | 11 | % validation performance 12 | if nargin == 6 13 | nn = nnff(nn, val_x, val_y); 14 | loss.val.e(end + 1) = nn.L; 15 | end 16 | nn.testing = 0; 17 | %calc misclassification rate if softmax 18 | if strcmp(nn.output,'softmax') 19 | [er_train, dummy] = nntest(nn, train_x, train_y); 20 | loss.train.e_frac(end+1) = er_train; 21 | 22 | if nargin == 6 23 | [er_val, dummy] = nntest(nn, val_x, val_y); 24 | loss.val.e_frac(end+1) = er_val; 25 | end 26 | end 27 | 28 | end 29 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/softmaxCost.m: -------------------------------------------------------------------------------- 1 | function [cost, grad] = softmaxCost(theta, numClasses, inputSize, lambda, data, labels) 2 | 3 | % numClasses - the number of classes 4 | % inputSize - the size N of the input vector 5 | % lambda - weight decay parameter 6 | % data - the N x M input matrix, where each column data(:, i) corresponds to 7 | % a single test set 8 | % labels - an M x 1 matrix containing the labels corresponding for the input data 9 | % 10 | 11 | % Unroll the parameters from theta 12 | theta = reshape(theta, numClasses, inputSize); 13 | 14 | numCases = size(data, 2); 15 | 16 | groundTruth = full(sparse(labels, 1:numCases, 1)); 17 | cost = 0; 18 | 19 | thetagrad = zeros(numClasses, inputSize); 20 | 21 | %% ---------- YOUR CODE HERE -------------------------------------- 22 | % Instructions: Compute the cost and gradient for softmax regression. 23 | % You need to compute thetagrad and cost. 24 | % The groundTruth matrix might come in handy. 25 | 26 | M = theta*data; 27 | M = bsxfun(@minus, M, max(M, [], 1)); 28 | M = bsxfun(@rdivide, exp(M), sum(exp(M))); 29 | M = groundTruth .* M; 30 | 31 | M = log(M (M~=0) ); 32 | 33 | % Decay terms 34 | theta_flat = theta(:); 35 | theta_flat = theta_flat .^ 2; 36 | term2 = (lambda/2)*sum(theta_flat); 37 | 38 | cost = -mean(M) + term2; 39 | 40 | % Gradient 41 | % TODO below vars are calculated twice save it earlier 42 | sub_max = bsxfun(@minus, theta*data, max(theta*data, [], 1)); % these are calculated twice 43 | temp = bsxfun(@rdivide, exp(sub_max), sum(exp(sub_max))); 44 | 45 | gt_term = groundTruth - temp; 46 | decay_term = lambda.*theta; 47 | 48 | thetagrad = ((-1/size(data,2))*data*gt_term')' + decay_term; 49 | 50 | % ------------------------------------------------------------------ 51 | % Unroll the gradient matrices into a vector for minFunc 52 | grad = [thetagrad(:)]; 53 | end 54 | 55 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnsetup.m: -------------------------------------------------------------------------------- 1 | function nn = nnsetup(architecture) 2 | %NNSETUP creates a Feedforward Backpropagate Neural Network 3 | % nn = nnsetup(architecture) returns an neural network structure with n=numel(architecture) 4 | % layers, architecture being a n x 1 vector of layer sizes e.g. [784 100 10] 5 | 6 | nn.size = architecture; 7 | nn.n = numel(nn.size); 8 | 9 | nn.activation_function = 'sigm'; % Activation functions of hidden layers: 'sigm' (sigmoid) or 'tanh_opt' (optimal tanh). 10 | nn.learningRate = 2; % learning rate Note: typically needs to be lower when using 'sigm' activation function and non-normalized inputs. 11 | nn.momentum = 0.5; % Momentum 12 | nn.scaling_learningRate = 1; % Scaling factor for the learning rate (each epoch) 13 | nn.weightPenaltyL2 = 0; % L2 regularization 14 | nn.nonSparsityPenalty = 0; % Non sparsity penalty 15 | nn.sparsityTarget = 0.05; % Sparsity target 16 | nn.inputZeroMaskedFraction = 0; % Used for Denoising AutoEncoders 17 | nn.dropoutFraction = 0; % Dropout level (http://www.cs.toronto.edu/~hinton/absps/dropout.pdf) 18 | nn.testing = 0; % Internal variable. nntest sets this to one. 19 | nn.output = 'sigm'; % output unit 'sigm' (=logistic), 'softmax' and 'linear' 20 | 21 | for i = 2 : nn.n 22 | % weights and weight momentum 23 | nn.W{i - 1} = (rand(nn.size(i), nn.size(i - 1)+1) - 0.5) * 2 * 4 * sqrt(6 / (nn.size(i) + nn.size(i - 1))); 24 | nn.vW{i - 1} = zeros(size(nn.W{i - 1})); 25 | 26 | % average activations (for use with sparsity) 27 | nn.p{i} = zeros(1, nn.size(i)); 28 | end 29 | end 30 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_CD.m: -------------------------------------------------------------------------------- 1 | %% Learn node vector representations by DNE-SBP for signed network community detection %% 2 | 3 | %% hyperparameters 4 | %beta: ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 5 | % r = #positive edges/ #negative edges 6 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 7 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 8 | %alfa1: weight of pairwise constraints for 1-st layer of SAE 9 | %alfa2: weight of pairwise constraints for deep layers of SAE 10 | %nnsize: Dimensionality of each layer of SAE 11 | 12 | %% Inputs 13 | %sae: configuration of stacked autoencoder 14 | %adj: adjacency matrix 15 | 16 | %% Output 17 | %rep: node vector representation learned by DNE-SBP for signed network community detection 18 | 19 | function rep = DNESBP_CD(sae, nnsize,adj, beta,r, alfa1,alfa2) 20 | 21 | sae = saetrain_CD(sae, adj, beta,r,adj, alfa1,alfa2); 22 | rep = GenRep(adj, sae, nnsize); % node vector representation learned by DNE-SBP 23 | 24 | end 25 | 26 | 27 | function sae = saetrain_CD(sae, x, beta,r,network, alfa1,alfa2) 28 | 29 | for i = 1 : numel(sae.ae) 30 | disp(['Training SAE ' num2str(i) '/' num2str(numel(sae.ae))]); 31 | opts.batchsize = 1000; %process how many instances in each batch , community detection 32 | 33 | if i==1 34 | opts.numepochs = 100; 35 | alfa=alfa1; 36 | else 37 | opts.numepochs =50; 38 | 39 | beta=1; % not beta penalty on second layer autoencoder 40 | r=1; % not extra penalty on second layer autoencoder 41 | alfa=alfa2; 42 | end 43 | 44 | 45 | sae.ae{i} = saenntrain(sae.ae{i}, x, x, opts,beta,r,network, alfa); 46 | t = nnff(sae.ae{i}, x, x); 47 | 48 | x = t.a{2}; 49 | %remove bias term 50 | x = x(:,2:end); 51 | end 52 | 53 | end 54 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_LP.m: -------------------------------------------------------------------------------- 1 | %% Learn node vector representations by DNE-SBP for link sign prediction %% 2 | 3 | %% hyperparameters 4 | %beta: ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 5 | % r = #positive edges/ #negative edges 6 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 7 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 8 | %alfa1: weight of pairwise constraints for 1-st layer of SAE 9 | %alfa2: weight of pairwise constraints for deep layers of SAE 10 | %nnsize: Dimensionality of each layer of SAE 11 | 12 | %% Inputs 13 | %sae: configuration of stacked autoencoder 14 | %adj: training adjacency matrix 15 | 16 | %% Output 17 | %rep: node vector representation learned by DNE-SBP for link sign prediction 18 | 19 | function rep = DNESBP_LP(sae, nnsize,adj, beta,r, alfa1,alfa2) 20 | 21 | sae = saetrain_LP(sae, adj, beta,r,adj, alfa1,alfa2); 22 | rep = GenRep(adj, sae, nnsize); % node vector representation learned by DNE-SBP 23 | 24 | end 25 | 26 | 27 | function sae = saetrain_LP(sae, x, beta,r,network, alfa1,alfa2) 28 | for i = 1 : numel(sae.ae) 29 | disp(['Training SAE ' num2str(i) '/' num2str(numel(sae.ae))]); 30 | 31 | if i==1 32 | opts.batchsize = 500; %process how many instances in each batch 33 | opts.numepochs = 100; 34 | alfa=alfa1; 35 | else 36 | opts.batchsize = 100; %process how many instances in each batch 37 | opts.numepochs =50; 38 | 39 | beta=1; % not beta penalty on deep layer autoencoder 40 | r=1; % not extra penalty on deep layer autoencoder 41 | alfa=alfa2; 42 | end 43 | 44 | sae.ae{i} = saenntrain(sae.ae{i}, x, x, opts,beta,r,network, alfa); 45 | t = nnff(sae.ae{i}, x, x); 46 | 47 | x = t.a{2}; 48 | %remove bias term 49 | x = x(:,2:end); 50 | end 51 | 52 | end 53 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnbp.m: -------------------------------------------------------------------------------- 1 | function nn = nnbp(nn) 2 | %NNBP performs backpropagation 3 | % nn = nnbp(nn) returns an neural network structure with updated weights 4 | 5 | n = nn.n; 6 | sparsityError = 0; 7 | switch nn.output 8 | case 'sigm' 9 | d{n} = - nn.e .* (nn.a{n} .* (1 - nn.a{n})); 10 | case {'softmax','linear'} 11 | d{n} = - nn.e; 12 | case 'tanh_opt' 13 | d{n} = - nn.e.*(1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{n}.^2)); 14 | case 'tanh' 15 | d{n} = - nn.e.*(1-nn.a{n}.^2); 16 | end 17 | for i = (n - 1) : -1 : 2 18 | % Derivative of the activation function 19 | switch nn.activation_function 20 | case 'sigm' 21 | d_act = nn.a{i} .* (1 - nn.a{i}); 22 | case 'tanh_opt' 23 | d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2); 24 | case 'tanh' 25 | d_act =1-nn.a{i}.^2; 26 | end 27 | 28 | if(nn.nonSparsityPenalty>0) 29 | pi = repmat(nn.p{i}, size(nn.a{i}, 1), 1); 30 | sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))]; 31 | end 32 | 33 | % Backpropagate first derivatives 34 | if i+1==n % in this case in d{n} there is not the bias term to be removed 35 | d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act; % Bishop (5.56) 36 | else % in this case in d{i} the bias term has to be removed 37 | d{i} = (d{i + 1}(:,2:end) * nn.W{i} + sparsityError) .* d_act; 38 | end 39 | 40 | if(nn.dropoutFraction>0) 41 | d{i} = d{i} .* [ones(size(d{i},1),1) nn.dropOutMask{i}]; 42 | end 43 | 44 | end 45 | 46 | for i = 1 : (n - 1) 47 | if i+1==n 48 | nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1); 49 | else 50 | nn.dW{i} = (d{i + 1}(:,2:end)' * nn.a{i}) / size(d{i + 1}, 1); 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnupdatefigures.m: -------------------------------------------------------------------------------- 1 | function nnupdatefigures(nn,fhandle,L,opts,i) 2 | %NNUPDATEFIGURES updates figures during training 3 | if i > 1 %dont plot first point, its only a point 4 | x_ax = 1:i; 5 | % create legend 6 | if opts.validation == 1 7 | M = {'Training','Validation'}; 8 | else 9 | M = {'Training'}; 10 | end 11 | 12 | %create data for plots 13 | if strcmp(nn.output,'softmax') 14 | plot_x = x_ax'; 15 | plot_ye = L.train.e'; 16 | plot_yfrac = L.train.e_frac'; 17 | 18 | else 19 | plot_x = x_ax'; 20 | plot_ye = L.train.e'; 21 | end 22 | 23 | %add error on validation data if present 24 | if opts.validation == 1 25 | plot_x = [plot_x, x_ax']; 26 | plot_ye = [plot_ye,L.val.e']; 27 | end 28 | 29 | 30 | %add classification error on validation data if present 31 | if opts.validation == 1 && strcmp(nn.output,'softmax') 32 | plot_yfrac = [plot_yfrac, L.val.e_frac']; 33 | end 34 | 35 | % plotting 36 | figure(fhandle); 37 | if strcmp(nn.output,'softmax') %also plot classification error 38 | 39 | p1 = subplot(1,2,1); 40 | plot(plot_x,plot_ye); 41 | xlabel('Number of epochs'); ylabel('Error');title('Error'); 42 | title('Error') 43 | legend(p1, M,'Location','NorthEast'); 44 | set(p1, 'Xlim',[0,opts.numepochs + 1]) 45 | 46 | p2 = subplot(1,2,2); 47 | plot(plot_x,plot_yfrac); 48 | xlabel('Number of epochs'); ylabel('Misclassification rate'); 49 | title('Misclassification rate') 50 | legend(p2, M,'Location','NorthEast'); 51 | set(p2, 'Xlim',[0,opts.numepochs + 1]) 52 | 53 | else 54 | 55 | p = plot(plot_x,plot_ye); 56 | xlabel('Number of epochs'); ylabel('Error');title('Error'); 57 | legend(p, M,'Location','NorthEast'); 58 | set(gca, 'Xlim',[0,opts.numepochs + 1]) 59 | 60 | end 61 | drawnow; 62 | end 63 | end 64 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/softmaxTrain.m: -------------------------------------------------------------------------------- 1 | function [softmaxModel] = softmaxTrain(inputSize, numClasses, lambda, inputData, labels, options) 2 | %softmaxTrain Train a softmax model with the given parameters on the given 3 | % data. Returns softmaxOptTheta, a vector containing the trained parameters 4 | % for the model. 5 | % 6 | % inputSize: the size of an input vector x^(i) 7 | % numClasses: the number of classes 8 | % lambda: weight decay parameter 9 | % inputData: an N by M matrix containing the input data, such that 10 | % inputData(:, c) is the cth input 11 | % labels: M by 1 matrix containing the class labels for the 12 | % corresponding inputs. labels(c) is the class label for 13 | % the cth input 14 | % options (optional): options 15 | % options.maxIter: number of iterations to train for 16 | 17 | if ~exist('options', 'var') 18 | options = struct; 19 | end 20 | 21 | if ~isfield(options, 'maxIter') 22 | options.maxIter = 400; 23 | end 24 | 25 | % initialize parameters 26 | theta = 0.005 * randn(numClasses * inputSize, 1); 27 | 28 | % Use minFunc to minimize the function 29 | % addpath(genpath('C:\Users\hkpuadmin\Desktop\UFLDL_Tutorial-master\UFLDL_Tutorial-master\PS1\ex3\minFunc')); 30 | 31 | 32 | options.Method = 'lbfgs'; % Here, we use L-BFGS to optimize our cost 33 | % function. Generally, for minFunc to work, you 34 | % need a function pointer with two outputs: the 35 | % function value and the gradient. In our problem, 36 | % softmaxCost.m satisfies this. 37 | minFuncOptions.display = 'on'; 38 | 39 | [softmaxOptTheta, cost] = minFunc( @(p) softmaxCost(p, ... 40 | numClasses, inputSize, lambda, ... 41 | inputData, labels), ... 42 | theta, options); 43 | 44 | % Fold softmaxOptTheta into a nicer format 45 | softmaxModel.optTheta = reshape(softmaxOptTheta, numClasses, inputSize); 46 | softmaxModel.inputSize = inputSize; 47 | softmaxModel.numClasses = numClasses; 48 | 49 | end 50 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nnff.m: -------------------------------------------------------------------------------- 1 | function nn = nnff(nn, x, y) 2 | %NNFF performs a feedforward pass 3 | % nn = nnff(nn, x, y) returns an neural network structure with updated 4 | % layer activations, error and loss (nn.a, nn.e and nn.L) 5 | 6 | n = nn.n; 7 | m = size(x, 1); 8 | 9 | x = [ones(m,1) x]; 10 | nn.a{1} = x; 11 | 12 | %feedforward pass 13 | for i = 2 : n-1 14 | switch nn.activation_function 15 | case 'sigm' 16 | % Calculate the unit's outputs (including the bias term) 17 | nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}'); 18 | case 'tanh_opt' 19 | nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}'); 20 | case 'tanh' 21 | nn.a{i} = tanh(nn.a{i - 1} * nn.W{i - 1}'); 22 | end 23 | 24 | %dropout 25 | if(nn.dropoutFraction > 0) 26 | if(nn.testing) 27 | nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction); 28 | else 29 | nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction); 30 | nn.a{i} = nn.a{i}.*nn.dropOutMask{i}; 31 | end 32 | end 33 | 34 | %calculate running exponential activations for use with sparsity 35 | if(nn.nonSparsityPenalty>0) 36 | nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1); 37 | end 38 | 39 | %Add the bias term 40 | nn.a{i} = [ones(m,1) nn.a{i}]; 41 | end 42 | switch nn.output 43 | case 'sigm' 44 | nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}'); 45 | case 'linear' 46 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 47 | case 'softmax' 48 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 49 | nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2))); 50 | nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2)); 51 | case 'tanh_opt' 52 | nn.a{n} = tanh_opt(nn.a{n - 1} * nn.W{n - 1}'); 53 | case 'tanh' 54 | nn.a{n} = tanh(nn.a{n - 1} * nn.W{n - 1}'); 55 | end 56 | 57 | %error and loss 58 | nn.e = y - nn.a{n}; 59 | 60 | switch nn.output 61 | case {'sigm', 'linear','tanh_opt','tanh'} 62 | nn.L = 1/2 * sum(sum(nn.e .^ 2)) / m; 63 | case 'softmax' 64 | nn.L = -sum(sum(y .* log(nn.a{n}))) / m; 65 | end 66 | end 67 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/nntrain.m: -------------------------------------------------------------------------------- 1 | function [nn, L] = nntrain(nn, train_x, train_y, opts, val_x, val_y) 2 | %NNTRAIN trains a neural net 3 | % [nn, L] = nnff(nn, x, y, opts) trains the neural network nn with input x and 4 | % output y for opts.numepochs epochs, with minibatches of size 5 | % opts.batchsize. Returns a neural network nn with updated activations, 6 | % errors, weights and biases, (nn.a, nn.e, nn.W, nn.b) and L, the sum 7 | % squared error for each training minibatch. 8 | 9 | assert(isfloat(train_x), 'train_x must be a float'); 10 | assert(nargin == 4 || nargin == 6,'number ofinput arguments must be 4 or 6') 11 | 12 | loss.train.e = []; 13 | loss.train.e_frac = []; 14 | loss.val.e = []; 15 | loss.val.e_frac = []; 16 | opts.validation = 0; 17 | if nargin == 6 18 | opts.validation = 1; 19 | end 20 | 21 | fhandle = []; 22 | if isfield(opts,'plot') && opts.plot == 1 23 | fhandle = figure(); 24 | end 25 | 26 | m = size(train_x, 1); 27 | 28 | batchsize = opts.batchsize; 29 | numepochs = opts.numepochs; 30 | 31 | numbatches = m / batchsize; 32 | 33 | %---update here by Shaosheng---% 34 | numbatches = floor(numbatches); 35 | %------------------------------% 36 | 37 | assert(rem(numbatches, 1) == 0, 'numbatches must be a integer'); 38 | 39 | L = zeros(numepochs*numbatches,1); 40 | n = 1; 41 | for i = 1 : numepochs 42 | tic; 43 | 44 | kk = randperm(m); 45 | for l = 1 : numbatches 46 | batch_x = train_x(kk((l - 1) * batchsize + 1 : l * batchsize), :); 47 | 48 | %Add noise to input (for use in denoising autoencoder) 49 | if(nn.inputZeroMaskedFraction ~= 0) 50 | batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction); 51 | end 52 | 53 | batch_y = train_y(kk((l - 1) * batchsize + 1 : l * batchsize), :); 54 | 55 | nn = nnff(nn, batch_x, batch_y); 56 | nn = nnbp(nn); 57 | nn = nnapplygrads(nn); 58 | 59 | L(n) = nn.L; 60 | 61 | n = n + 1; 62 | end 63 | 64 | t = toc; 65 | 66 | if opts.validation == 1 67 | loss = nneval(nn, loss, train_x, train_y, val_x, val_y); 68 | str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end)); 69 | else 70 | loss = nneval(nn, loss, train_x, train_y); 71 | str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end)); 72 | end 73 | if ishandle(fhandle) 74 | nnupdatefigures(nn, fhandle, loss, opts, i); 75 | end 76 | 77 | disp(['epoch ' num2str(i) '/' num2str(opts.numepochs) '. Took ' num2str(t) ' seconds' '. Mini-batch mean squared error on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]); 78 | 79 | nn.learningRate = nn.learningRate * nn.scaling_learningRate; 80 | %nn.learningRate = 1 - (i/numepochs)*nn.learningRate; 81 | end 82 | end 83 | 84 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/saeff.m: -------------------------------------------------------------------------------- 1 | function nn = saeff(nn, x, y,beta,r,laplace, alfa) 2 | %NNFF performs a feedforward pass 3 | % nn = nnff(nn, x, y) returns an neural network structure with updated 4 | % layer activations, error and loss (nn.a, nn.e and nn.L) 5 | %% add more penalty to non-zero elements for autoencoder %% 6 | 7 | n = nn.n; 8 | m = size(x, 1); 9 | 10 | x = [ones(m,1) x]; 11 | nn.a{1} = x; 12 | 13 | %feedforward pass 14 | for i = 2 : n-1 15 | switch nn.activation_function 16 | case 'sigm' 17 | % Calculate the unit's outputs (including the bias term) 18 | nn.a{i} = sigm(nn.a{i - 1} * nn.W{i - 1}'); 19 | case 'tanh_opt' 20 | nn.a{i} = tanh_opt(nn.a{i - 1} * nn.W{i - 1}'); 21 | case 'tanh' 22 | nn.a{i} = tanh(nn.a{i - 1} * nn.W{i - 1}'); 23 | end 24 | 25 | %dropout 26 | if(nn.dropoutFraction > 0) 27 | if(nn.testing) 28 | nn.a{i} = nn.a{i}.*(1 - nn.dropoutFraction); 29 | else 30 | nn.dropOutMask{i} = (rand(size(nn.a{i}))>nn.dropoutFraction); 31 | nn.a{i} = nn.a{i}.*nn.dropOutMask{i}; 32 | end 33 | end 34 | 35 | %calculate running exponential activations for use with sparsity 36 | if(nn.nonSparsityPenalty>0) 37 | nn.p{i} = 0.99 * nn.p{i} + 0.01 * mean(nn.a{i}, 1); 38 | end 39 | 40 | %Add the bias term 41 | nn.a{i} = [ones(m,1) nn.a{i}]; 42 | end 43 | switch nn.output 44 | case 'sigm' 45 | nn.a{n} = sigm(nn.a{n - 1} * nn.W{n - 1}'); 46 | case 'tanh_opt' 47 | nn.a{n} = tanh_opt(nn.a{n - 1} * nn.W{n - 1}'); 48 | case 'tanh' 49 | nn.a{n} = tanh(nn.a{n - 1} * nn.W{n - 1}'); 50 | case 'linear' 51 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 52 | case 'softmax' 53 | nn.a{n} = nn.a{n - 1} * nn.W{n - 1}'; 54 | nn.a{n} = exp(bsxfun(@minus, nn.a{n}, max(nn.a{n},[],2))); 55 | nn.a{n} = bsxfun(@rdivide, nn.a{n}, sum(nn.a{n}, 2)); 56 | end 57 | 58 | %error and loss 59 | nn.e = y - nn.a{n}; 60 | 61 | %% add more penalty to non-zero elements for autoencoder %% 62 | if(beta~=1) 63 | pos_index=find(y>0); 64 | nn.e(pos_index)= nn.e(pos_index)*beta; % penalty for non-zero input elements (observed links) 65 | neg_index=find(y<0); 66 | nn.e(neg_index)= nn.e(neg_index)*(beta*r); % larger penalty for negative links 67 | end 68 | 69 | 70 | 71 | switch nn.output 72 | case {'sigm', 'linear','tanh_opt','tanh'} 73 | nn.L = 1/2 * sum(sum((nn.e).^ 2)) / m; 74 | case 'softmax' 75 | nn.L = -sum(sum(y .* log(nn.a{n}))) / m; 76 | end 77 | 78 | 79 | %% add pairwise constraints %% 80 | Y=nn.a{2}(:,2:end); %hidden representation learned from autoencoder 81 | nn.L= nn.L+ (alfa/m)*trace(Y'*laplace*Y); 82 | end 83 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_CD_epinions.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('epinions_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | 9 | %% this is the configuration of stacked autoencoder %% 10 | num_nodes = size(Gwl_ud,1); %number of vertex 11 | nnsize = [num_nodes 512 256 128 64]; %layer-wised setting 12 | len = length(nnsize); % number of layers 13 | 14 | rand('state',0) 15 | sae = saesetup(nnsize); 16 | 17 | for i = 1: len - 1 18 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 19 | sae.ae{i}.output = 'tanh'; 20 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 21 | sae.ae{i}.momentum = 0; % Momentum 22 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 23 | sae.ae{i}.nonSparsityPenalty = 0; % 0 indicates Non sparsity penalty 24 | sae.ae{i}.sparsityTarget = 0.01; % Sparsity target 25 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 26 | 27 | if i==1 28 | sae.ae{i}.learningRate = 0.025; 29 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 30 | else 31 | sae.ae{i}.learningRate = 0.015; 32 | sae.ae{i}.weightPenaltyL2 = 0.1; % L2 regularization 33 | end 34 | end 35 | 36 | %% hyperparameter settings 37 | beta=10; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 38 | 39 | r=floor(length(find(G.Edges{:,2}==1))/length(find(G.Edges{:,2}==-1))); 40 | % #positive edges/ #negative edges 41 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 42 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 43 | 44 | alfa1=10; %weight of pairwise constraints for 1-st layer of SAE 45 | alfa2=1.5; %weight of pairwise constraints for deeper layers of SAE 46 | 47 | 48 | %% node vector representation learned by DNE-SBP 49 | rep = DNESBP_CD(sae, nnsize,Gwl_ud, beta,r, alfa1,alfa2); 50 | 51 | 52 | %% k-means clustering on embedding features 53 | errorAllK=[]; %error rates for different number of clusters 54 | for numCluster=2:10 %number of communities 55 | idx = kmeans(rep{end},numCluster,'Replicates',20,'MaxIter',1000); 56 | table=tabulate(idx); 57 | 58 | %% compute errors 59 | error=0; %sum of number of positive links between different clusters and negative links within the same cluster 60 | for i=1:edgeNum 61 | [s,t]=findedge(G,i); 62 | if((Gwl_ud(s,t)<0)&&(idx(s)==idx(t))) 63 | %negative links within a cluster 64 | error=error+1; 65 | else if((Gwl_ud(s,t)>0)&&(idx(s)~=idx(t))) 66 | % positive links between clusters 67 | error=error+1; 68 | end 69 | end 70 | end 71 | error=error/edgeNum; 72 | errorAllK=[errorAllK error]; 73 | end 74 | 75 | 76 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Deep network embedding for graph representation learning in signed networks 4 | ==== 5 | This repository contains the author's implementation in Matlab for the paper "Deep network embedding for graph representation learning in signed networks". 6 | 7 | 8 | DNE-SBP Model Descriptions 9 | ==== 10 | 11 | **Input** 12 | ------- 13 | Load ".mat" file and get an input matrix **"Gwl_ud"**, i.e., the signed adjacency matrix of a network. 14 | 15 | **Hyperparameters** 16 | ------- 17 | 1) beta: ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 18 | 19 | 2) r= #positive edges / #negative edges; 20 | 21 | ratio of penalty for reconstruction errors of negative links over that of positive links; 22 | 23 | ratio of weight of pairwise constraints on negatively connected nodes over that of positively connected nodes 24 | 25 | 3) alfa1: weight of pairwise constraints at 1-st layer of SAE 26 | 27 | alfa2: weight of pairwise constraints at deep layers of SAE 28 | 29 | **Output** 30 | ------- 31 | Low-dimensional node vector representations learned by DNE-SBP are stored in the variable: **"rep"** 32 | 33 | 34 | 35 | 36 | **Application 1: Link sign prediction** 37 | ------- 38 | The function **DNESBP_LP()** in file “DNESBP_LP.m" can generate low-dimensional node vector representations for link sign prediction 39 | Test examples: 40 | 41 | 1) In MATLAB, run “DNESBP_LP_wiki.m”, “DNESBP_LP_slashdot.m”, “DNESBP_LP_epinions.m” for example link sign prediction results on Wiki, Slashdot and Epinions datasets, respectively. 42 | 43 | 2) Use variable “trp” to assign different training percentages. 44 | For example, 45 | "trp=0.2" indicates training percentage fixed as 20%. 46 | "trp=[0.2,0.4,0.6,0.8]" indicates training percentage can be varied among 20%, 40%, 60% and 80%. 47 | 48 | 3) The AUC and AP averaged over 5 random splits are stored in the variables: “avgAUC” and “avgAPN”, where 49 | each row corresponds to a type of edge feature, i.e., "L1", "L2", "Had", and "Avg"; 50 | each column corresponds to a specific training percentage, e.g., " 20%", "40%", "60%" or "80%". 51 | 52 | 53 | 54 | 55 | **Application 2: Signed network community detection** 56 | ------- 57 | The function **DNESBP_CD()** in file “DNESBP_CD.m" can generate low-dimensional node vector representations for signed network community detection 58 | Test examples: 59 | 60 | 1) In MATLAB, run files “DNESBP_CD_wiki.m”, “DNESBP_CD_slashdot.m”, “DNESBP_CD_epinions.m” for example community detection results on Wiki, Slashdot and Epinions datasets, respectively. 61 | 62 | 2) Use variable “numCluster” to assign different numbers of clusters. 63 | For example, "numCluster=2:10" indicates the number of clusters can be varied between 2 and 10. 64 | 65 | 3) The error rates of signed network clustering are stored in the variable: “errorAllK”, where 66 | each k-th column corresponds to the error rate given a specific number of k clusters. 67 | 68 | Please cite our paper as 69 | ==== 70 | X. Shen and F.-L. Chung, "Deep network embedding for graph representation learning in signed networks," IEEE Transactions on Cybernetics, vol. 50, no. 4, pp. 1556-1568, 2020. 71 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_CD_slashdot.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('slashdot_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | 9 | 10 | %% this is the configuration of stacked autoencoder %% 11 | num_nodes = size(Gwl_ud,1); %number of vertex 12 | nnsize = [num_nodes 512 256 128 64]; %layer-wised setting 13 | len = length(nnsize); % number of layers 14 | 15 | rand('state',0) 16 | sae = saesetup(nnsize); 17 | 18 | for i = 1: len - 1 19 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 20 | sae.ae{i}.output = 'tanh'; 21 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 22 | sae.ae{i}.momentum = 0; % Momentum 23 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 24 | sae.ae{i}.nonSparsityPenalty = 0; % 0 indicates Non sparsity penalty 25 | sae.ae{i}.sparsityTarget = 0.01; % Sparsity target 26 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 27 | 28 | if i==1 29 | sae.ae{i}.learningRate = 0.025; 30 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 31 | else 32 | sae.ae{i}.learningRate = 0.015; 33 | sae.ae{i}.weightPenaltyL2 = 0.1; % L2 regularization 34 | end 35 | end 36 | 37 | %% hyperparameter settings 38 | beta=25; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 39 | 40 | r=floor(length(find(G.Edges{:,2}==1))/length(find(G.Edges{:,2}==-1))); 41 | % #positive edges/ #negative edges 42 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 43 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 44 | 45 | alfa1=14; %weight of pairwise constraints for 1-st layer of SAE 46 | alfa2=1.5; %weight of pairwise constraints for deeper layers of SAE 47 | 48 | %% node vector representation learned by DNE-SBP 49 | rep = DNESBP_CD(sae, nnsize,Gwl_ud, beta,r, alfa1,alfa2); 50 | 51 | 52 | %% k-means clustering on embedding features 53 | errorAllK=[]; %error rates for different number of clusters (k) 54 | for numCluster=2:10 %number of communities 55 | idx = kmeans(rep{end},numCluster,'Replicates',20,'MaxIter',1000); 56 | table=tabulate(idx); 57 | 58 | %% compute errors 59 | error=0; %sum of number of positive links between different clusters and negative links within the same cluster 60 | for i=1:edgeNum 61 | [s,t]=findedge(G,i); 62 | if((Gwl_ud(s,t)<0)&&(idx(s)==idx(t))) 63 | %negative links within a cluster 64 | error=error+1; 65 | else if((Gwl_ud(s,t)>0)&&(idx(s)~=idx(t))) 66 | % positive links between clusters 67 | error=error+1; 68 | end 69 | end 70 | end 71 | error=error/edgeNum; 72 | errorAllK=[errorAllK error]; 73 | end 74 | 75 | 76 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_CD_wiki.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('wiki_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | 9 | 10 | %% this is the configuration of stacked autoencoder %% 11 | num_nodes = size(Gwl_ud,1); %number of vertex 12 | nnsize = [num_nodes 512 256 128 64]; %layer-wised setting 13 | len = length(nnsize); % number of layers 14 | 15 | rand('state',0) 16 | sae = saesetup(nnsize); 17 | 18 | for i = 1: len - 1 19 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 20 | sae.ae{i}.output = 'tanh'; 21 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 22 | sae.ae{i}.momentum = 0; % Momentum 23 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 24 | sae.ae{i}.nonSparsityPenalty = 0; % 0 indicates Non sparsity penalty 25 | sae.ae{i}.sparsityTarget = 0.01; % Sparsity target 26 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 27 | 28 | if i==1 29 | sae.ae{i}.learningRate = 0.025; 30 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 31 | else 32 | sae.ae{i}.learningRate = 0.015; 33 | sae.ae{i}.weightPenaltyL2 = 0.25; % L2 regularization 34 | end 35 | end 36 | 37 | %% hyperparameter settings 38 | beta=25; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 39 | 40 | r=floor(length(find(G.Edges{:,2}==1))/length(find(G.Edges{:,2}==-1))); 41 | % #positive edges/ #negative edges 42 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 43 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 44 | 45 | alfa1=16; %weight of pairwise constraints for 1-st layer of SAE 46 | alfa2=1.5; %weight of pairwise constraints for deeper layers of SAE 47 | 48 | %% node vector representation learned by DNE-SBP 49 | rep = DNESBP_CD(sae, nnsize,Gwl_ud, beta,r, alfa1,alfa2); 50 | 51 | 52 | %% k-means clustering on embedding features 53 | errorAllK=[]; %error rates for different number of clusters (k) 54 | for numCluster=2:10 %number of communities 55 | idx = kmeans(rep{end},numCluster,'Replicates',20,'MaxIter',1000); 56 | table=tabulate(idx); 57 | 58 | %% compute errors 59 | error=0; %sum of number of positive links between different clusters and negative links within the same cluster 60 | for i=1:edgeNum 61 | [s,t]=findedge(G,i); 62 | if((Gwl_ud(s,t)<0)&&(idx(s)==idx(t))) 63 | %negative links within a cluster 64 | error=error+1; 65 | else if((Gwl_ud(s,t)>0)&&(idx(s)~=idx(t))) 66 | % positive links between clusters 67 | error=error+1; 68 | end 69 | end 70 | end 71 | error=error/edgeNum; 72 | errorAllK=[errorAllK error]; 73 | end 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/saebp.m: -------------------------------------------------------------------------------- 1 | function nn = saebp(nn,x,beta,r,laplace, alfa) 2 | %NNBP performs backpropagation 3 | % nn = nnbp(nn) returns an neural network structure with updated weights 4 | 5 | n = nn.n; 6 | m = size(x, 1); 7 | 8 | sparsityError = 0; 9 | switch nn.output 10 | case 'sigm' 11 | d{n} = - nn.e.* (nn.a{n} .* (1 - nn.a{n})); 12 | case {'softmax','linear'} 13 | d{n} = - nn.e; 14 | case 'tanh_opt' 15 | d{n} = - nn.e.*(1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{n}.^2)); 16 | case 'tanh' 17 | d{n} = - nn.e.*(1-nn.a{n}.^2); 18 | end 19 | 20 | %% add more penalty to non-zero elements for autoencoder %% 21 | if(beta~=1) 22 | pos_index=find(x>0); 23 | d{n}(pos_index)= d{n}(pos_index)*beta; % penalty for non-zero input elements (observed links) 24 | neg_index=find(x<0); 25 | d{n}(neg_index)= d{n}(neg_index)*(beta*r); % larger penalty for negative links 26 | end 27 | 28 | 29 | for i = (n - 1) : -1 : 2 30 | % Derivative of the activation function 31 | switch nn.activation_function 32 | case 'sigm' 33 | d_act = nn.a{i} .* (1 - nn.a{i}); 34 | case 'tanh_opt' 35 | d_act = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}.^2); 36 | case 'tanh' 37 | d_act =1-nn.a{i}.^2; 38 | 39 | end 40 | 41 | if(nn.nonSparsityPenalty>0) 42 | pi = repmat(nn.p{i}, size(nn.a{i}, 1), 1); 43 | sparsityError = [zeros(size(nn.a{i},1),1) nn.nonSparsityPenalty * (-nn.sparsityTarget ./ pi + (1 - nn.sparsityTarget) ./ (1 - pi))]; 44 | end 45 | 46 | % Backpropagate first derivatives 47 | if i+1==n % in this case in d{n} there is not the bias term to be removed 48 | d{i} = (d{i + 1} * nn.W{i} + sparsityError) .* d_act; % Bishop (5.56) 49 | else % in this case in d{i} the bias term has to be removed 50 | d{i} = (d{i + 1}(:,2:end) * nn.W{i} + sparsityError) .* d_act; 51 | end 52 | 53 | if(nn.dropoutFraction>0) 54 | d{i} = d{i} .* [ones(size(d{i},1),1) nn.dropOutMask{i}]; 55 | end 56 | 57 | 58 | 59 | %pairewise constraints devirations 60 | if i==2 61 | switch nn.activation_function 62 | case 'sigm' 63 | d_act_1 = nn.a{i}(:,2:end) .* (1 - nn.a{i}(:,2:end)); 64 | case 'tanh_opt' 65 | d_act_1 = 1.7159 * 2/3 * (1 - 1/(1.7159)^2 * nn.a{i}(:,2:end).^2); 66 | case 'tanh' 67 | d_act_1 =1-nn.a{i}(:,2:end).^2; 68 | end 69 | 70 | Y=nn.a{i}(:,2:end); 71 | d{i}=d{i}+[zeros(size(d{i},1),1) (alfa*(laplace+laplace')*Y).*d_act_1]; %pairewise constraints devirations 72 | end 73 | 74 | end 75 | 76 | 77 | 78 | for i = 1 : (n - 1) 79 | if i+1==n 80 | nn.dW{i} = (d{i + 1}' * nn.a{i}) / size(d{i + 1}, 1); 81 | else 82 | nn.dW{i} = (d{i + 1}(:,2:end)' * nn.a{i}) / size(d{i + 1}, 1); 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /DNESBP_public/code/NNutil/saenntrain.m: -------------------------------------------------------------------------------- 1 | function [nn, L] = saenntrain(nn, train_x, train_y, opts, beta,r,network, alfa,val_x, val_y) 2 | %NNTRAIN trains a neural net 3 | % [nn, L] = nnff(nn, x, y, opts) trains the neural network nn with input x and 4 | % output y for opts.numepochs epochs, with minibatches of size 5 | % opts.batchsize. Returns a neural network nn with updated activations, 6 | % errors, weights and biases, (nn.a, nn.e, nn.W, nn.b) and L, the sum 7 | % squared error for each training minibatch. 8 | 9 | assert(isfloat(train_x), 'train_x must be a float'); 10 | assert(nargin == 8 || nargin == 10,'number ofinput arguments must be 8 or 10') 11 | 12 | loss.train.e = []; 13 | loss.train.e_frac = []; 14 | loss.val.e = []; 15 | loss.val.e_frac = []; 16 | opts.validation = 0; 17 | if nargin == 10 18 | opts.validation = 1; 19 | end 20 | 21 | fhandle = []; 22 | if isfield(opts,'plot') && opts.plot == 1 23 | fhandle = figure(); 24 | end 25 | 26 | m = size(train_x, 1); 27 | 28 | batchsize = opts.batchsize; 29 | numepochs = opts.numepochs; 30 | 31 | numbatches = m / batchsize; 32 | numbatches = floor(numbatches); 33 | 34 | 35 | assert(rem(numbatches, 1) == 0, 'numbatches must be a integer'); 36 | 37 | L = zeros(numepochs*numbatches,1); 38 | n = 1; 39 | LossEpoches=[]; 40 | for epoch = 1 : numepochs 41 | 42 | 43 | kk = randperm(m); 44 | for l = 1 : numbatches 45 | batch_x = train_x(kk((l - 1) * batchsize + 1 : l * batchsize), :); 46 | 47 | % compute the similarity matrix between the samples in each batch 48 | batch_x_index=kk((l - 1) * batchsize+1: l * batchsize); 49 | 50 | simlarMatrix=zeros(batchsize,batchsize); 51 | for a=1:batchsize 52 | for b=1:batchsize 53 | simlarMatrix(a,b)=network(batch_x_index(a),batch_x_index(b)) ; 54 | end 55 | end 56 | S = simlarMatrix; 57 | 58 | 59 | S_P=max(S,0); 60 | S_N=-min(S,0); 61 | D_P = diag(sum(S_P,2)); % the degree matrix of S_P 62 | D_N = diag(sum(S_N,2)); % the degree matrix of S_N 63 | L_P= D_P- S_P; %laplace matrix of S_P 64 | L_N= D_N- S_N; %laplace matrix of S_N 65 | 66 | laplace = L_P - r*L_N; 67 | 68 | 69 | %Add noise to input (for use in denoising autoencoder) 70 | if(nn.inputZeroMaskedFraction ~= 0) 71 | batch_x = batch_x.*(rand(size(batch_x))>nn.inputZeroMaskedFraction); 72 | end 73 | 74 | batch_y = train_y(kk((l - 1) * batchsize + 1 : l * batchsize), :); 75 | 76 | nn = saeff(nn, batch_x, batch_y,beta,r,laplace, alfa); 77 | nn = saebp(nn,batch_x,beta,r,laplace, alfa); 78 | nn = nnapplygrads(nn); 79 | 80 | L(n) = nn.L; 81 | 82 | n = n + 1; 83 | end 84 | 85 | 86 | % if opts.validation == 1 87 | % loss = nneval(nn, loss, train_x, train_y, val_x, val_y); 88 | % str_perf = sprintf('; Full-batch train mse = %f, val mse = %f', loss.train.e(end), loss.val.e(end)); 89 | % else 90 | % loss = nneval(nn, loss, train_x, train_y); 91 | % str_perf = sprintf('; Full-batch train err = %f', loss.train.e(end)); 92 | % end 93 | 94 | % if (mod(epoch, 10) == 0) % update figure after each 10 epoches 95 | % if ishandle(fhandle) 96 | % nnupdatefigures(nn, fhandle, loss, opts, epoch); 97 | % end 98 | % end 99 | 100 | % % LossEpoches=[LossEpoches mean(L((n-numbatches):(n-1)))]; 101 | % disp(['epoch ' num2str(epoch) '/' num2str(opts.numepochs) '. Mini-batch loss on training set is ' num2str(mean(L((n-numbatches):(n-1)))) str_perf]); 102 | 103 | nn.learningRate = nn.learningRate * nn.scaling_learningRate; 104 | 105 | end 106 | 107 | % LossEpoches 108 | end 109 | 110 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_LP_wiki.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('wiki_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | num_nodes=numnodes(G); 9 | 10 | 11 | %% Randomly sample a fraction of observed links for training for link sign prediction 12 | trp=0.2; %percentage of observed links used for training fixed as 20% 13 | %trp=[0.2,0.4,0.6,0.8]; %percentage of observed links used for training varied in [20%, 40%, 60%,80%] 14 | 15 | numRandomSplit=5; %number of random splits for each training percentage 16 | 17 | APNAllPer=cell(1,length(trp)); 18 | aucAllPer=cell(1,length(trp)); 19 | for trpindex=1:length(trp) 20 | aucAll=[]; 21 | APNAll=[]; 22 | random_state=0; 23 | for randomSplit=1:numRandomSplit 24 | disp(['Given ' num2str(trp(trpindex)*100) '% of observed links for training: ' num2str(randomSplit) '-th random split']); 25 | rng('default'); 26 | rng(random_state); 27 | edgeindex=[1:1:edgeNum]; 28 | ranindices = edgeindex(randperm(length(edgeindex),fix(length(edgeindex)*trp(trpindex)))); 29 | test_index=setdiff(edgeindex, ranindices); 30 | random_state=random_state+1; 31 | 32 | Gwl_train=zeros(size(Gwl_ud)); %training adjacency matrix given only observed links 33 | for i=1:length(ranindices) 34 | [s,t]=findedge(G,ranindices(i)); 35 | Gwl_train(s,t)=Gwl_ud(s,t); 36 | Gwl_train(t,s)=Gwl_ud(s,t); 37 | end 38 | 39 | 40 | %% this is the configuration of stacked autoencoder %% 41 | nnsize = [num_nodes 256 64]; %layer-wised setting 42 | len = length(nnsize); % number of layers 43 | rand('state',0) 44 | sae = saesetup(nnsize); 45 | 46 | for i = 1: len - 1 47 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 48 | sae.ae{i}.output = 'tanh'; 49 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 50 | sae.ae{i}.momentum = 0.0; % Momentum 51 | sae.ae{i}.nonSparsityPenalty = 0.0; % 0 indicates Non sparsity penalty 52 | sae.ae{i}.sparsityTarget = 0.05; % Sparsity target 53 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 54 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 55 | if i==1 56 | sae.ae{i}.learningRate = 0.025; 57 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 58 | else 59 | sae.ae{i}.learningRate = 0.015; 60 | sae.ae{i}.weightPenaltyL2 = 0.25; % L2 regularization 61 | end 62 | end 63 | 64 | 65 | %% hyperparameter settings 66 | beta=25; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 67 | 68 | r=floor(length(find(Gwl_train==1))/length(find(Gwl_train==-1))); 69 | % #positive edges/ #negative edges 70 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 71 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 72 | 73 | alfa1=16; %weight of pairwise constraints for 1-st layer SAE 74 | alfa2=0.4; %weight of pairwise constraints for 2-nd layer SAE 75 | 76 | 77 | %% node vector representation learned by DNE-SBP 78 | rep = DNESBP_LP(sae, nnsize,Gwl_train, beta,r, alfa1,alfa2); 79 | 80 | 81 | %% build egde representation 82 | APNAllEdge=[]; 83 | aucAllEdge=[]; 84 | for edgeType=1:4 % 1 for L1-norm; 2 for L2-norm; 3 for hadmard; 4 for average 85 | inputSize=size(rep{end},2); %number of features for nodes 86 | edgeRep_train=zeros(length(ranindices),inputSize); % edge representations for training 87 | edgeLabel_train=zeros(length(ranindices),1); 88 | for i=1:length(ranindices) 89 | [s,t]=findedge(G,ranindices(i)); 90 | %get egde representation of training edge (i,j) 91 | switch edgeType 92 | case 1 93 | edgeRep_train(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 94 | case 2 95 | edgeRep_train(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 96 | case 3 97 | edgeRep_train(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 98 | case 4 99 | edgeRep_train(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 100 | end 101 | edgeLabel_train(i)= Gwl_ud(s,t); 102 | end 103 | 104 | %get egde representation of testing edge (i,j) 105 | edgeRep_test=zeros(length(test_index),inputSize); % edge representations for testing 106 | edgeLabel_test=zeros(length(test_index),1); 107 | for i=1:length(test_index) 108 | [s,t]=findedge(G,test_index(i)); 109 | %get the egde representation of edge (i,j) 110 | switch edgeType 111 | case 1 112 | edgeRep_test(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 113 | case 2 114 | edgeRep_test(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 115 | case 3 116 | edgeRep_test(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 117 | case 4 118 | edgeRep_test(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 119 | end 120 | edgeLabel_test(i)= Gwl_ud(s,t); 121 | end 122 | 123 | 124 | %% logistic regression to predict link labels 125 | pred=zeros(size(edgeLabel_test)); 126 | edgeLabel_train(edgeLabel_train==-1)=0; %postive link:1; negative link:0 127 | b = glmfit(edgeRep_train,edgeLabel_train,'binomial','link','logit'); 128 | probability = glmval(b,edgeRep_test, 'logit'); 129 | 130 | %% compute AUC score 131 | [~,~,~,AUC] = perfcurve(edgeLabel_test,probability,1) ; 132 | aucAllEdge=[aucAllEdge;AUC]; 133 | 134 | %% compute avergae precision of negative links 135 | edgeLabel_test1=edgeLabel_test; 136 | edgeLabel_test1(edgeLabel_test1==-1)=0; 137 | pl=[probability edgeLabel_test1]; 138 | AP_N=ComputeAP(1-pl); 139 | APNAllEdge=[APNAllEdge;AP_N]; 140 | 141 | end 142 | aucAll=[aucAll aucAllEdge]; 143 | APNAll=[APNAll APNAllEdge]; 144 | 145 | fprintf('AUC score for 4 types of edge features: \n'); 146 | aucAllEdge 147 | fprintf('AP for 4 types of edge features: \n'); 148 | APNAllEdge 149 | end 150 | aucAllPer{trpindex}=aucAll; 151 | APNAllPer{trpindex}=APNAll; 152 | 153 | end 154 | 155 | %% average AUC and AP over 5 random splits 156 | avgAUC=zeros(edgeType,length(trp)); 157 | avgAPN=zeros(edgeType,length(trp)); 158 | for j=1:length(trp) 159 | avgAUC(:,j)=mean(aucAllPer{j},2); 160 | avgAPN(:,j)=mean(APNAllPer{j},2); 161 | end 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_LP_slashdot.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('slashdot_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | num_nodes=numnodes(G); 9 | 10 | 11 | %% Randomly sample a fraction of observed links for training for link sign prediction 12 | trp=0.2; %percentage of observed links used for training fixed as 20% 13 | %trp=[0.2,0.4,0.6,0.8]; %percentage of observed links used for training varied in [20%, 40%, 60%,80%] 14 | 15 | numRandomSplit=5; %number of random splits for each training percentage 16 | 17 | APNAllPer=cell(1,length(trp)); 18 | aucAllPer=cell(1,length(trp)); 19 | 20 | for trpindex=1:length(trp) 21 | aucAll=[]; 22 | APNAll=[]; 23 | random_state=0; 24 | for randomSplit=1:numRandomSplit 25 | disp(['Given ' num2str(trp(trpindex)*100) '% of observed links for training: ' num2str(randomSplit) '-th random split']); 26 | rng('default'); 27 | rng(random_state); 28 | edgeindex=[1:1:edgeNum]; 29 | ranindices = edgeindex(randperm(length(edgeindex),fix(length(edgeindex)*trp(trpindex)))); 30 | test_index=setdiff(edgeindex, ranindices); 31 | random_state=random_state+1; 32 | 33 | Gwl_train=zeros(size(Gwl_ud)); %training adjacency matrix given only observed links 34 | for i=1:length(ranindices) 35 | [s,t]=findedge(G,ranindices(i)); 36 | Gwl_train(s,t)=Gwl_ud(s,t); 37 | Gwl_train(t,s)=Gwl_ud(s,t); 38 | end 39 | 40 | %% this is the configuration of stacked autoencoder %% 41 | nnsize = [num_nodes 256 64]; %layer-wised setting 42 | len = length(nnsize); % number of layers 43 | rand('state',0) 44 | sae = saesetup(nnsize); 45 | 46 | for i = 1: len - 1 47 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 48 | sae.ae{i}.output = 'tanh'; 49 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 50 | sae.ae{i}.momentum = 0.0; % Momentum 51 | sae.ae{i}.nonSparsityPenalty = 0.0; % 0 indicates Non sparsity penalty 52 | sae.ae{i}.sparsityTarget = 0.05; % Sparsity target 53 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 54 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 55 | 56 | if i==1 57 | sae.ae{i}.learningRate = 0.025; 58 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 59 | else 60 | sae.ae{i}.learningRate = 0.015; 61 | sae.ae{i}.weightPenaltyL2 = 0.1; % L2 regularization 62 | end 63 | end 64 | 65 | 66 | %% hyperparameter settings 67 | beta=25; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 68 | 69 | r=floor(length(find(Gwl_train==1))/length(find(Gwl_train==-1))); 70 | % #positive edges/ #negative edges 71 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 72 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 73 | 74 | alfa1=14; %weight of pairwise constraints for 1-st layer SAE 75 | alfa2=0.2; %weight of pairwise constraints for 2-nd layer SAE 76 | 77 | 78 | %% node vector representation learned by DNE-SBP 79 | rep = DNESBP_LP(sae, nnsize,Gwl_train, beta,r, alfa1,alfa2); 80 | 81 | %% build egde representation 82 | APNAllEdge=[]; 83 | aucAllEdge=[]; 84 | for edgeType=1:4 % 1 for L1-norm; 2 for L2-norm; 3 for hadmard; 4 for average 85 | inputSize=size(rep{end},2); %number of features for nodes 86 | edgeRep_train=zeros(length(ranindices),inputSize); % edge representations for training 87 | edgeLabel_train=zeros(length(ranindices),1); 88 | for i=1:length(ranindices) 89 | [s,t]=findedge(G,ranindices(i)); 90 | %get egde representation of training edge (i,j) 91 | switch edgeType 92 | case 1 93 | edgeRep_train(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 94 | case 2 95 | edgeRep_train(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 96 | case 3 97 | edgeRep_train(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 98 | case 4 99 | edgeRep_train(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 100 | end 101 | edgeLabel_train(i)= Gwl_ud(s,t); 102 | end 103 | 104 | %get egde representation of testing edge (i,j) 105 | edgeRep_test=zeros(length(test_index),inputSize); % edge representations for testing 106 | edgeLabel_test=zeros(length(test_index),1); 107 | for i=1:length(test_index) 108 | [s,t]=findedge(G,test_index(i)); 109 | %get the egde representation of edge (i,j) 110 | switch edgeType 111 | case 1 112 | edgeRep_test(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 113 | case 2 114 | edgeRep_test(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 115 | case 3 116 | edgeRep_test(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 117 | case 4 118 | edgeRep_test(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 119 | end 120 | edgeLabel_test(i)= Gwl_ud(s,t); 121 | end 122 | 123 | 124 | %% logistic regression to predict link labels 125 | pred=zeros(size(edgeLabel_test)); 126 | edgeLabel_train(edgeLabel_train==-1)=0; %postive link:1; negative link:0 127 | b = glmfit(edgeRep_train,edgeLabel_train,'binomial','link','logit'); 128 | probability = glmval(b,edgeRep_test, 'logit'); 129 | 130 | %% compute AUC score 131 | [~,~,~,AUC] = perfcurve(edgeLabel_test,probability,1) ; 132 | aucAllEdge=[aucAllEdge;AUC]; 133 | 134 | %% compute avergae precision of negative links 135 | edgeLabel_test1=edgeLabel_test; 136 | edgeLabel_test1(edgeLabel_test1==-1)=0; 137 | pl=[probability edgeLabel_test1]; 138 | AP_N=ComputeAP(1-pl); 139 | APNAllEdge=[APNAllEdge;AP_N]; 140 | 141 | end 142 | aucAll=[aucAll aucAllEdge]; 143 | APNAll=[APNAll APNAllEdge]; 144 | 145 | fprintf('AUC score for 4 types of edge features: \n'); 146 | aucAllEdge 147 | fprintf('AP for 4 types of edge features: \n'); 148 | APNAllEdge 149 | end 150 | aucAllPer{trpindex}=aucAll; 151 | APNAllPer{trpindex}=APNAll; 152 | 153 | end 154 | 155 | %% average AUC and AP over 5 random splits 156 | avgAUC=zeros(edgeType,length(trp)); 157 | avgAPN=zeros(edgeType,length(trp)); 158 | for j=1:length(trp) 159 | avgAUC(:,j)=mean(aucAllPer{j},2); 160 | avgAPN(:,j)=mean(APNAllPer{j},2); 161 | end 162 | 163 | 164 | -------------------------------------------------------------------------------- /DNESBP_public/code/DNESBP/DNESBP_LP_epinions.m: -------------------------------------------------------------------------------- 1 | %% An Example Case %% 2 | clear all; 3 | addpath(genpath('../../code')); 4 | 5 | load('epinions_UD.mat'); %dataset 6 | G=graph(Gwl_ud); 7 | edgeNum=numedges(G); 8 | num_nodes=numnodes(G); 9 | 10 | 11 | %% Randomly sample a fraction of observed links for training for link sign prediction 12 | trp=0.2; %percentage of observed links used for training fixed as 20% 13 | %trp=[0.2,0.4,0.6,0.8]; %percentage of observed links used for training varied in [20%, 40%, 60%,80%] 14 | 15 | numRandomSplit=5; %number of random splits for each training percentage 16 | 17 | APNAllPer=cell(1,length(trp)); 18 | aucAllPer=cell(1,length(trp)); 19 | for trpindex=1:length(trp) 20 | aucAll=[]; 21 | APNAll=[]; 22 | random_state=0; 23 | for randomSplit=1:numRandomSplit 24 | disp(['Given ' num2str(trp(trpindex)*100) '% of observed links for training: ' num2str(randomSplit) '-th random split']); 25 | rng('default'); 26 | rng(random_state); 27 | %% randomly choose trp % of links for training 28 | edgeindex=[1:1:edgeNum]; 29 | ranindices = edgeindex(randperm(length(edgeindex),fix(length(edgeindex)*trp(trpindex)))); 30 | test_index=setdiff(edgeindex, ranindices); 31 | random_state=random_state+1; 32 | 33 | Gwl_train=zeros(size(Gwl_ud)); %training adjacency matrix given only observed links 34 | for i=1:length(ranindices) 35 | [s,t]=findedge(G,ranindices(i)); 36 | Gwl_train(s,t)=Gwl_ud(s,t); 37 | Gwl_train(t,s)=Gwl_ud(s,t); 38 | end 39 | 40 | 41 | %% this is the configuration of stacked autoencoder %% 42 | nnsize = [num_nodes 256 64]; %layer-wised setting 43 | len = length(nnsize); % number of layers 44 | rand('state',0) 45 | sae = saesetup(nnsize); 46 | 47 | for i = 1: len - 1 48 | sae.ae{i}.activation_function = 'tanh'; %tanh, tanh_opt ,sigm 49 | sae.ae{i}.output = 'tanh'; 50 | sae.ae{i}.dropoutFraction = 0; % Dropout fraction, only used for fine tuning 51 | sae.ae{i}.momentum = 0.0; % Momentum 52 | sae.ae{i}.nonSparsityPenalty = 0.0; % 0 indicates Non sparsity penalty 53 | sae.ae{i}.sparsityTarget = 0.05; % Sparsity target 54 | sae.ae{i}.inputZeroMaskedFraction = 0.0; % Used for Denoising AutoEncoders 55 | sae.ae{i}.scaling_learningRate = 0.95; % Scaling factor for the learning rate (each epoch) 56 | 57 | if i==1 58 | sae.ae{i}.learningRate = 0.025; 59 | sae.ae{i}.weightPenaltyL2 = 0.05; % L2 regularization 60 | else 61 | sae.ae{i}.learningRate = 0.015; 62 | sae.ae{i}.weightPenaltyL2 = 0.1; % L2 regularization 63 | end 64 | end 65 | 66 | 67 | 68 | %% hyperparameter settings 69 | beta=10; %ratio of penalty on reconstruction errors of observed connections over that of unobserved connections 70 | 71 | r=floor(length(find(Gwl_train==1))/length(find(Gwl_train==-1))); 72 | % #positive edges/ #negative edges 73 | % r is the ratio of penalty for reconstruction errors of negative links over that of positive links 74 | % r is also the ratio of weight of pairwise constraints for negatively connected nodes over that for positively connected nodes 75 | 76 | alfa1=10; %weight of pairwise constraints for 1-st layer SAE 77 | alfa2=0.2; %weight of pairwise constraints for 2-nd layer SAE 78 | 79 | 80 | %% node vector representation learned by DNE-SBP 81 | rep = DNESBP_LP(sae, nnsize,Gwl_train, beta,r, alfa1,alfa2); 82 | 83 | 84 | %% build egde representation 85 | APNAllEdge=[]; 86 | aucAllEdge=[]; 87 | for edgeType=1:4 % 1 for L1-norm; 2 for L2-norm; 3 for hadmard; 4 for average 88 | inputSize=size(rep{end},2); %number of features for nodes 89 | edgeRep_train=zeros(length(ranindices),inputSize); % edge representations for training 90 | edgeLabel_train=zeros(length(ranindices),1); 91 | for i=1:length(ranindices) 92 | [s,t]=findedge(G,ranindices(i)); 93 | %get egde representation of training edge (i,j) 94 | switch edgeType 95 | case 1 96 | edgeRep_train(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 97 | case 2 98 | edgeRep_train(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 99 | case 3 100 | edgeRep_train(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 101 | case 4 102 | edgeRep_train(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 103 | end 104 | edgeLabel_train(i)= Gwl_ud(s,t); 105 | end 106 | 107 | %get egde representation of testing edge (i,j) 108 | edgeRep_test=zeros(length(test_index),inputSize); % edge representations for testing 109 | edgeLabel_test=zeros(length(test_index),1); 110 | for i=1:length(test_index) 111 | [s,t]=findedge(G,test_index(i)); 112 | %get the egde representation of edge (i,j) 113 | switch edgeType 114 | case 1 115 | edgeRep_test(i,:)=abs(rep{end}(s,:)-rep{end}(t,:)); %L1-norm 116 | case 2 117 | edgeRep_test(i,:)=(rep{end}(s,:)-rep{end}(t,:)).^2; %L2-norm 118 | case 3 119 | edgeRep_test(i,:)=rep{end}(s,:).*rep{end}(t,:); %hadamard 120 | case 4 121 | edgeRep_test(i,:)=(rep{end}(s,:)+rep{end}(t,:))/2; %average 122 | end 123 | edgeLabel_test(i)= Gwl_ud(s,t); 124 | end 125 | 126 | 127 | %% logistic regression to predict link labels 128 | pred=zeros(size(edgeLabel_test)); 129 | edgeLabel_train(edgeLabel_train==-1)=0; %postive link:1; negative link:0 130 | b = glmfit(edgeRep_train,edgeLabel_train,'binomial','link','logit'); 131 | probability = glmval(b,edgeRep_test, 'logit'); 132 | 133 | %% compute AUC score 134 | [~,~,~,AUC] = perfcurve(edgeLabel_test,probability,1) ; 135 | aucAllEdge=[aucAllEdge;AUC]; 136 | 137 | %% compute avergae precision of negative links 138 | edgeLabel_test1=edgeLabel_test; 139 | edgeLabel_test1(edgeLabel_test1==-1)=0; 140 | pl=[probability edgeLabel_test1]; 141 | AP_N=ComputeAP(1-pl); 142 | APNAllEdge=[APNAllEdge;AP_N]; 143 | 144 | end 145 | aucAll=[aucAll aucAllEdge]; 146 | APNAll=[APNAll APNAllEdge]; 147 | 148 | fprintf('AUC score for 4 types of edge features: \n'); 149 | aucAllEdge 150 | fprintf('AP for 4 types of edge features: \n'); 151 | APNAllEdge 152 | end 153 | aucAllPer{trpindex}=aucAll; 154 | APNAllPer{trpindex}=APNAll; 155 | 156 | end 157 | 158 | %% average AUC and AP over 5 random splits 159 | avgAUC=zeros(edgeType,length(trp)); 160 | avgAPN=zeros(edgeType,length(trp)); 161 | for j=1:length(trp) 162 | avgAUC(:,j)=mean(aucAllPer{j},2); 163 | avgAPN(:,j)=mean(APNAllPer{j},2); 164 | end 165 | 166 | 167 | 168 | --------------------------------------------------------------------------------