├── +core
    ├── Graph.m
    ├── Instance.m
    └── Instances.m
├── +random_generators
    ├── RandomGraphGenerator.m
    └── RandomInstancesGenerator.m
├── +utils
    ├── CostFunction.m
    ├── EdgeWeighter.m
    └── ParameterLearner.m
├── .gitattributes
├── .gitignore
├── LICENSE.txt
├── README.md
├── architecture.png
├── cost.png
├── log_cost.png
├── main.m
└── plotcost.m


/+core/Graph.m:
--------------------------------------------------------------------------------
 1 | classdef Graph < handle
 2 |     %Holds information about the graph topology and edge features.
 3 |     %Only existing edges have features.
 4 |     
 5 |     properties
 6 |         num_nodes = 0; %number of nodes in the graph
 7 |         num_features = 0; %number fo features in the graph
 8 |         G = [[]]; %unweigthed directed adjacency matrix for the graph [n x n] matrix of 0s and 1s
 9 |         features = {}; % f x {n x n} feature matrix - each double value corresponds to a single feature type and a single edge
10 |         isSparse = true;
11 |     end
12 |     
13 |     methods
14 |         function g = Graph(num_nodes,num_features,G,features,isSparse)
15 |             %Populates and returns a
16 |             %Graph object with the supplied properties, 
17 |             if nargin == 5
18 |                 g.num_nodes = num_nodes;
19 |                 g.num_features = num_features;
20 |                 g.G = G;
21 |                 g.features = features;   
22 |                 g.isSparse = isSparse;
23 |             end
24 |         end
25 |         
26 |         function adjMat = getWeightedAdjMatrix(g,weigther,w)
27 |             %uses the weighter function and w parameters to combine the
28 |             %features of each edge into a single double value (weight or
29 |             %strength), returns aa n x n matrix of doubles
30 |             if g.isSparse
31 |                 dot_product = sparse(g.num_nodes,g.num_nodes);
32 |             else
33 |                 dot_product = zeros(g.num_nodes);
34 |             end
35 |             for k=1:g.num_features
36 |                 dot_product = dot_product + g.features{k}*w(k);
37 |             end
38 |             adjMat = g.G.*weigther.calcWeights(dot_product);
39 |         end
40 |         
41 |         function setSparse(g,isSparse)
42 |             %if tru forces the graph to use the sparse representation, and
43 |             %the dense representation otherwise. Cost computation can
44 |             %differ signicantly depending on graph representation. If your
45 |             %graph is really sparse <5% od the edges exist the sparse
46 |             %representation is best.
47 |            if g.isSparse ~= isSparse
48 |               if isSparse
49 |                    g.G = sparse(g.G);
50 |                    for k=1:g.num_features
51 |                       g.features{k} = sparse(g.features{k});
52 |                    end
53 |               else
54 |                   g.G = full(g.G); 
55 |                   for k=1:g.num_features
56 |                       g.features{k} = full(g.features{k});
57 |                   end
58 |               end
59 |            end
60 |         end
61 |     end
62 |     methods (Static)
63 |         
64 |     end
65 | end
66 | 
67 | 


--------------------------------------------------------------------------------
/+core/Instance.m:
--------------------------------------------------------------------------------
  1 | classdef Instance < handle
  2 |     %Instance holds information on positive and negative links for a source
  3 |     %node in a graph.
  4 |     
  5 |     properties
  6 |         source_node_index = 1; % source node
  7 |         positive_links = []; % positive nodes (links)
  8 |         negative_links = []; % negative nodes (links)
  9 |         graph@core.Graph; %a graph object that contains information on links and features
 10 |     end
 11 |     
 12 |     methods
 13 |         function this = Instance(source_node_index,positive_links,negative_links,graph)
 14 |             if nargin > 0
 15 |                 this.source_node_index = source_node_index;
 16 |                 this.positive_links = positive_links;
 17 |                 this.negative_links = negative_links;
 18 |                 this.graph = graph;
 19 |             end
 20 |         end
 21 |         
 22 |         
 23 |         
 24 |         function cost = calcCost(this,weighter,alpha,costf,w)
 25 |             %calculates the cost for the given parameters w as defined by
 26 |             %Leskovec. The wighter and cost functions need to be specified
 27 |             %as well as the restart probability (alpha).
 28 |             pagerank = calcPagerank(this,weighter,alpha,w);
 29 |             cost = 0;
 30 |             for di=1:length(this.positive_links)
 31 |                 cost = cost+sum(costf.calcCost(pagerank(this.negative_links)-pagerank(this.positive_links(di))));
 32 |             end
 33 |         end
 34 |         
 35 |        function [cost,gradient] = calcCostAndGradient(this,weighter,alpha,costf,w)
 36 |            %calculates the cost and the gradient/ for the given parameters w as defined by
 37 |             %Leskovec. The wighter and cost functions need to be specified
 38 |             %as well as the restart probability (alpha).
 39 |             
 40 |            %%%%%%%%%%%%%%%%%%%%
 41 |            %   calculate the weigthed adjacency matrix
 42 |            %% %%%%%%%%%%%%%%%%%
 43 |            adjMat = this.graph.getWeightedAdjMatrix(weighter,w);
 44 |            %%%%%%%%%%%%%%%%%%%% 
 45 |            % precalculate the sums of rows in the adjMat
 46 |            %%%%%%%%%%%%%%%%%%%%
 47 |            sumRowsAdjMat = full(sum(adjMat,2));
 48 |            [i,j,v_adjMat] = find(adjMat);
 49 |            v_sum_fuv_w = sumRowsAdjMat(i);
 50 |            v_sum_fuv_w_squared = v_sum_fuv_w.^2;
 51 |            %%%%%%%%%%%%%%%%%%%%
 52 |            %calculate the transition probability matrix with respect to a starting node s
 53 |            %%%%%%%%%%%%%%%%%%%%
 54 |            Q = this.calcTransitionProbabilityMatrixForSourceSparse(this.graph.num_nodes,i,j,v_adjMat,this.source_node_index,alpha,v_sum_fuv_w);
 55 |            Qt = Q';
 56 |            %% 
 57 |            %calc pagerank
 58 |            %% 
 59 |            p = zeros(this.graph.num_nodes,100);
 60 |            p(:,1) = repmat(1.0/this.graph.num_nodes,this.graph.num_nodes,1);
 61 |            last_iter = 0;
 62 |            for iter=2:100    
 63 |                 p(:,iter) = Qt*p(:,iter-1);
 64 |                 if sum((p(:,iter)-p(:,iter-1)).^2) < 1e-12
 65 |                        last_iter = iter;
 66 |                        break; 
 67 |                 end
 68 |                 if iter == 99
 69 |                      'p didnt converge' 
 70 |                 end
 71 |            end
 72 |            pagerank = p(:,last_iter);
 73 |            %% 
 74 |            %calc derivative, for every feature
 75 |            %% 
 76 |            d_p = cell(this.graph.num_features,1);
 77 |            for k=1:this.graph.num_features
 78 |                %init gradient
 79 |                d_p_t = zeros(this.graph.num_nodes,1);
 80 |                d_p_t_1 = zeros(this.graph.num_nodes,1);
 81 |                
 82 |                tic
 83 |                %calculate dQ #1
 84 |                dQ = this.TcalcdQM(weighter,w,alpha,k,adjMat,v_sum_fuv_w,v_sum_fuv_w_squared);
 85 |                
 86 |               % dQ1 = sDL.TcalcdQ(weighter,w,alpha,k,adjMat,sumRowsAdjMat);
 87 |                
 88 |                
 89 |                dQt = dQ';
 90 |                for iter=1:100   
 91 |                   d_p_t = Qt*d_p_t_1+dQt*p(:,min(iter,last_iter));
 92 |                   if sum((d_p_t_1-d_p_t).^2) < 1e-12
 93 |                        break; 
 94 |                   end
 95 |                   d_p_t_1 = d_p_t;
 96 |                   if iter == 99
 97 |                      'dp didnt converge' 
 98 |                   end
 99 |                end
100 |                d_p{k} = d_p_t'; 
101 |            end
102 |            
103 |            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
104 |            %calc cost and gradient
105 |            %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
106 |            l = repmat(this.negative_links,1,length(this.positive_links));
107 |            d = repmat(this.positive_links,1,length(this.negative_links))';
108 |            gradient = zeros(1,this.graph.num_features);
109 |            [costs,gradients] = costf.calcCostAndGradient(pagerank(l)-pagerank(d));
110 |            cost = sum(sum(costs));
111 |            for k=1:this.graph.num_features
112 |                gradient(k) = sum(sum(gradients.*(d_p{k}(l)-d_p{k}(d))));
113 |            end
114 |            
115 |         end
116 |         
117 |         function pagerank = calcPagerank(this,weighter,alpha,w)
118 |             %calculates the pagerank for this instance and the specified
119 |             %parameters, weighter function and restart probability.
120 |             %%%%%%%%%%%%%%%
121 |             %  calculate the weigthed adjacency matrix
122 |             %%%%%%%%%%%%%%% 
123 |             adjMat = this.graph.getWeightedAdjMatrix(weighter,w);
124 |             %%%%%%%%%%%%%%%
125 |             %  precalculate the sums of rows in the adjMat
126 |             %%%%%%%%%%%%%%%
127 |             sumRowsAdjMat = full(sum(adjMat,2));
128 |             [i,j,v_adjMat] = find(adjMat);
129 |             v_sum_fuv_w = sumRowsAdjMat(i);
130 |             %%%%%%%%%%%%%%%
131 |             %  calculate the transition probability matrix with respect to a
132 |             %  starting node s
133 |             %%%%%%%%%%%%%%%
134 |             Q = this.calcTransitionProbabilityMatrixForSourceSparse(this.graph.num_nodes,i,j,v_adjMat,this.source_node_index,alpha,v_sum_fuv_w);
135 |             Qt = Q';
136 |             %%%%%%%%%%%%%%
137 |             % calc the actual pagerank using the transition probability matrix
138 |             %%%%%%%%%%%%%%
139 |             pagerank = repmat(1.0/this.graph.num_nodes,this.graph.num_nodes,1);
140 |             previous_pagerank = repmat(1.0/this.graph.num_nodes,this.graph.num_nodes,1);
141 |             for iter=1:100    
142 |                 pagerank = Qt*previous_pagerank;
143 |                 pagerank = pagerank./sum(pagerank);
144 |                 if sum((pagerank-previous_pagerank).^2) < 1e-12
145 |                        break; 
146 |                 end
147 |                 if iter == 99
148 |                      w
149 |                      'Error: pagerank didnt converge' 
150 |                 end
151 |                 previous_pagerank = pagerank;
152 |             end
153 |          end
154 |     end
155 |     
156 |     methods (Access = private)
157 |          
158 |         
159 |          function dQ = TcalcdQM(sDL,weighter,w,alpha,k,adjMat,v_sum_fuv_w,v_sum_fuv_w_squared)
160 |            [i,j,v_adjMat] = find(adjMat);
161 |            %calc  dufv_dw
162 |            dfuv_dwk = sDL.graph.G.*weighter.calcGradient(w(k),sDL.graph.features{k});
163 |            [~,~,v_dfuv_dwk] = find(dfuv_dwk);
164 |            if length(v_dfuv_dwk) ~= length(v_adjMat);
165 |                [~,~,v_dfuv_dwk] = find(sDL.graph.features{k});
166 |            end
167 |            %calc sum_dufv_dw 
168 |            sum_dufv_dw = full(sum(dfuv_dwk,2));
169 |            v_sum_dfuv_dwk = sum_dufv_dw(i);
170 |            %precalc dQ
171 |            res = (1-alpha).*(v_dfuv_dwk.*v_sum_fuv_w-v_adjMat.*v_sum_dfuv_dwk)./v_sum_fuv_w_squared;
172 |            dQ = sparse(i,j,res,sDL.graph.num_nodes,sDL.graph.num_nodes);
173 |         end
174 |                 
175 |         function dQ = TcalcdQ(sDL,weighter,w,alpha,k,adjMat,sumRowsAdjMatRepmat)
176 |            %depracated
177 |            dQ = sparse(sDL.graph.num_nodes,sDL.graph.num_nodes);
178 |            sum_dufv_dw = zeros(sDL.graph.num_nodes,1);
179 |            for j=1:sDL.graph.num_nodes
180 |                for u=1:sDL.graph.num_nodes
181 |                    if sDL.graph.G(j,u) == 1
182 |                       sum_dufv_dw(j) = sum_dufv_dw(j)+weighter.calcGradient(w(k),sDL.graph.features{k}(j,u));
183 |                    end
184 |                end
185 |            end
186 |            for j=1:sDL.graph.num_nodes
187 |                 for u=1:sDL.graph.num_nodes
188 |                     if sDL.graph.G(j,u) == 1
189 |                     	dQ(j,u) = (1-alpha)*(weighter.calcGradient(w(k),sDL.graph.features{k}(j,u))*sumRowsAdjMatRepmat(j)-adjMat(j,u)*sum_dufv_dw(j))/(sumRowsAdjMatRepmat(j)^2);
190 |                     end
191 |                 end
192 |            end
193 |            dQ = dQ;
194 |         end
195 |         
196 |         function TransProbMatrix = calcTransitionProbabilityMatrixForSourceSparse(this,n,i,j,v_adjMat,source_node,alpha,v_row_sums)
197 |             adjMat = v_adjMat./v_row_sums;
198 |             TransProbMatrix = sparse(i,j,(1-alpha)*adjMat,n,n);
199 |             TransProbMatrix(:,source_node) = TransProbMatrix(:,source_node)+alpha;
200 |             TransProbMatrix(sum(TransProbMatrix,2)==alpha,source_node) = 1;
201 |         end
202 |         
203 |         function TransProbMatrix = calcTransitionProbabilityMatrixForSource(this,adjMat,source_node,alpha,row_sums)
204 |             %Q = normalize adjMat to make it row stochastic
205 |             %Q = (1-alpha)Q+alpha*1(v=s)
206 |             %1(v=s) is a matrix with zeros except for the s column that contains ones
207 |             adjMat = bsxfun(@rdivide,adjMat,row_sums);
208 |             TransProbMatrix = (1-alpha)*adjMat;
209 |             TransProbMatrix(:,source_node) = TransProbMatrix(:,source_node)+alpha;
210 |         end
211 |     end
212 |     
213 | end
214 | 
215 | 


--------------------------------------------------------------------------------
/+core/Instances.m:
--------------------------------------------------------------------------------
 1 | classdef Instances < handle
 2 |     %A set of sDL groups with their respective graphs
 3 |     %an Instances object can be a training or a test dataset for
 4 |     %link prediction
 5 |     
 6 |     properties
 7 |         instances;%an array of sDLGroups
 8 |     end
 9 |     
10 |     properties (SetAccess = private)
11 |         n = 0;
12 |     end
13 |     
14 |     methods
15 |         function iset = Instances(instances)
16 |            %initializes the instances with a set of sDL groups
17 |            iset.instances = instances;
18 |            iset.n = length(instances);
19 |         end
20 |         
21 |         function num_instances = getNumberOfInstances(this)
22 |             %returns the number of instances in this dataset
23 |             num_instances = this.n;
24 |         end
25 |         
26 |         function [cost,gradient] = calcCostAndGradient(this,weighter,alpha,costf,w)
27 |            % calculates the cost and gradient for this Instances using a
28 |            % given cost function. Calls the calcCostAndGradient on each
29 |            % instance in its instances array
30 |            costs = zeros(this.n,1);
31 |            gradients = zeros(this.n,length(w));
32 |            for i=1:this.n
33 |                [cost_i,gradient_i] = this.instances(i).calcCostAndGradient(weighter,alpha,costf,w);
34 |                costs(i) = cost_i;
35 |                gradients(i,:) = gradient_i;               
36 |            end
37 |            cost = sum(w.^2)+sum(costs);
38 |            gradient = 2*w+sum(gradients);
39 |         end
40 |         
41 |         function cost = calcCost(this,weighter,alpha,costf,w)
42 |            %calculates the cost for this Instances using a given cost
43 |            %function. Calls the calcCost on each
44 |            % instance in its instances array
45 |            costs = zeros(this.n,1);
46 |            for i=1:this.n
47 |                costs(i) = this.instances(i).calcCost(weighter,alpha,costf,w);   
48 |            end
49 |            cost = sum(w.^2)+sum(costs);
50 |         end
51 |         
52 |     end
53 |     
54 | end
55 | 
56 | 


--------------------------------------------------------------------------------
/+random_generators/RandomGraphGenerator.m:
--------------------------------------------------------------------------------
 1 | classdef RandomGraphGenerator < handle
 2 |     % Generates a random graph built with the hybrid process.
 3 |     % With probability p_preferential a link chosen using preferential attachment
 4 |     % and with probability 1-p_preferenial a link is chose unifrmly at
 5 |     % random. Also generates num_features random features for each edge.
 6 |     
 7 |     properties
 8 |         num_nodes = 100;%number of nodes in each graph
 9 |         num_features = 2;%number of features in each graph
10 |         start_nodes = 10;%starting number of nodes
11 |         p_preferential = .8;%probability for preferential attachment
12 |     end
13 |     
14 |     methods
15 |         function g = generate(this)
16 |             %generates a random graph (using preferential attachement with probability p_preferential),
17 |             %starting with start_nodes fully connected nodes up to a total of num_nodes nodes. 
18 |             %Also generates num_features random features for each edge taken from a normal distribution (0,1)
19 |             G = zeros(this.num_nodes,this.num_nodes);
20 |             G(1:this.start_nodes,1:this.start_nodes) = ones(this.start_nodes,this.start_nodes)-diag(ones(1,this.start_nodes));
21 |             degrees = zeros(1,this.num_nodes);
22 |             degrees(1:this.start_nodes) = repmat(this.start_nodes-1,this.start_nodes,1);
23 |             total_sum_degrees = (this.start_nodes-1)*this.start_nodes;
24 |             for k=this.start_nodes+1:this.num_nodes
25 |                 perm = randperm(k-1);
26 |                 perm_i = 1;
27 |                 for j=1:this.start_nodes
28 |                     l = 0;
29 |                     if rand() < this.p_preferential
30 |                         l = perm(perm_i);
31 |                         perm_i = perm_i+1;
32 |                     else
33 |                         flag = true;
34 |                         while flag
35 |                             p = rand();
36 |                             for i=1:k
37 |                                 p = p-double(degrees(i))/total_sum_degrees;
38 |                                 if p<0
39 |                                     l = i;
40 |                                     break
41 |                                 end
42 |                             end 
43 |                             if G(l,k) ~= 1
44 |                                 flag = false;
45 |                             end
46 |                         end
47 |                     end
48 |                     degrees(l) = degrees(l)+1;
49 |                     total_sum_degrees = total_sum_degrees+1;
50 |                     G(l,k) = 1;
51 |                     G(k,l) = 1;
52 |                 end
53 |                 degrees(k) = this.start_nodes;
54 |                 total_sum_degrees = total_sum_degrees+this.start_nodes;
55 |             end
56 |             G = sparse(G);
57 |             psi = cell(this.num_features);
58 |             for k=1:this.num_features
59 |                psi{k} = sparse(randn(this.num_nodes).*G);
60 |             end
61 |             g = core.Graph(this.num_nodes,this.num_features,G,psi,true);
62 |         end
63 |     end
64 |     
65 | end
66 | 
67 | 


--------------------------------------------------------------------------------
/+random_generators/RandomInstancesGenerator.m:
--------------------------------------------------------------------------------
 1 | classdef RandomInstancesGenerator < handle
 2 |     %Used to generate a random instances object for testing. Users can
 3 |     %specify how many instance objects to generate and the generator for
 4 |     %the graph objects.
 5 |     
 6 |     properties
 7 |         K = 10;%top K links are chosen as positive the rest as negative
 8 |         alpha = .3;%restart probability
 9 |         num_graphs = 3;%number of graphs to generate
10 |         num_instances = 50;%number of instances (sDL groups)
11 |         graph_generator = random_generators.RandomGraphGenerator();%a generator used to generate graphs
12 |         weighter = utils.EdgeWeighter(1);%weighter function
13 |         w = [1,-1];%feature parameters, length must be same as the number of features, otheriwse num of features will be changed
14 |     end
15 |     
16 |     methods
17 |         function this = RandomInstancesGenerator()
18 |             %returns a generator object with default parameters
19 |         end
20 |         
21 |         function [dataset,weighter,alpha,w] = generate(this)
22 |             %generates the instances objects as specified by the properties
23 |             graphs(1,this.num_graphs) = core.Graph();
24 |             this.graph_generator.num_features = length(this.w);
25 |             for i=1:this.num_graphs
26 |                graphs(i) = this.graph_generator.generate();
27 |             end
28 |             instances(1,this.num_instances) = core.Instance();
29 |             k = 1;
30 |             for i=1:this.num_graphs
31 |                num_start_nodes = this.num_instances/this.num_graphs;
32 |                if i <= mod(this.num_instances,this.num_graphs)
33 |                    num_start_nodes = num_start_nodes+1;
34 |                end
35 |                for s=1:num_start_nodes
36 |                     instance = core.Instance();                    
37 |                     instance.graph = graphs(i);
38 |                     instance.source_node_index = s;
39 |                     pagerank = instance.calcPagerank(this.weighter,this.alpha,this.w);
40 |                     [~,Idxs] = sort(pagerank);
41 |                     instance.positive_links = Idxs(graphs(i).num_nodes-this.K+1:graphs(i).num_nodes);
42 |                     instance.negative_links = Idxs(1:graphs(i).num_nodes-this.K);
43 |                     instances(k) = instance;
44 |                     k = k+1;
45 |                end
46 |             end
47 |             %group all sDL triplests with their respective graphs together in an
48 |             %instances object
49 |             dataset = core.Instances(instances);
50 |             weighter = this.weighter;
51 |             alpha = this.alpha;
52 |             w = this.w;
53 |         end
54 |     end
55 |     
56 | end
57 | 
58 | 


--------------------------------------------------------------------------------
/+utils/CostFunction.m:
--------------------------------------------------------------------------------
 1 | classdef CostFunction < handle
 2 |     %Calculates the cost and gradient for a given delta value
 3 |     %the function is usually a sigmoid
 4 |     
 5 |     properties
 6 |         b = .00001;%b parameter present in every type of cost function refer to Leskovec
 7 |         type = 1;%1 WMW loss function
 8 |     end
 9 |     
10 |     methods
11 |         function costf = CostFunction(b,type)
12 |            if nargin > 0
13 |              costf.b = b;
14 |              costf.type = type;
15 |            end
16 |         end
17 |         
18 |         function cost = calcCost(cf,x)
19 |             if cf.type == 1
20 |                 cost = 1.0./(1.0+exp(-x./cf.b)); 
21 |             end
22 |         end
23 |         
24 |         function gradient = calcGradient(cf,x)
25 |            if cf.type == 1 
26 |                tmp = 1.0 ./ (1+exp(x./cf.b));
27 |                gradient = tmp .* (1-tmp) ./ cf.b;
28 |            end
29 |         end
30 |         
31 |         function [cost,gradient] = calcCostAndGradient(cf,x)
32 |            if cf.type == 1 
33 |                cost = 1.0./(1.0+exp(-x./cf.b)); 
34 |                gradient = cost .* (1-cost) ./ cf.b;
35 |            end
36 |         end
37 |             
38 |     end
39 |     
40 | end
41 | 
42 | 


--------------------------------------------------------------------------------
/+utils/EdgeWeighter.m:
--------------------------------------------------------------------------------
 1 | classdef EdgeWeighter < handle
 2 |     
 3 |     properties
 4 |        type = 1;%one is exponential edge strength, 2 is logistic edge strength 
 5 |     end
 6 |        
 7 |     methods
 8 |         function weighter = EdgeWeighter(type)
 9 |             if nargin > 0
10 |                 weighter.type = type;                 
11 |             end
12 |         end
13 |         
14 |         function weights = calcWeights(weighter,dot_product)
15 |             if weighter.type == 1
16 |                 weights = spfun(@exp,dot_product);
17 |             else
18 |                 weights = 1/(1+exp(-dot_product));
19 |             end
20 |         end
21 |         
22 |         function gradient = calcGradient(weighter,w,psi)
23 |            if weighter.type == 1
24 |                 gradient = psi.*spfun(@exp,psi*w);
25 |             else
26 |                 gradient = psi.*exp(-psi*w)./(1+exp(-psi*w)).^2;
27 |             end 
28 |         end
29 |     end
30 |     
31 | end
32 | 
33 | 


--------------------------------------------------------------------------------
/+utils/ParameterLearner.m:
--------------------------------------------------------------------------------
 1 | classdef ParameterLearner
 2 |     %A utility class that laerns the best parameters for a specifeid
 3 |     %instances with the learn method. Groups togehter information about the
 4 |     %wighter and cost functions and the restart probability. Can also
 5 |     %define the maximal time limit for learning the parameters.
 6 |     
 7 |     properties
 8 |         weighter@utils.EdgeWeighter;%wegihter function
 9 |         costf@utils.CostFunction;%cost function
10 |         alpha = .3;%restart probability
11 |         time_limit = 30;%time limit for optimization in seconds
12 |         print_progress=true;%whether or not to display progress while learning the parameters
13 |     end
14 |     
15 |     methods
16 |         function this = ParameterLearner(weighter,alpha,costf,time_limit,print_progress)
17 |             %creates a new parameter learner either with all parameters
18 |             %specified or with default values.
19 |             if nargin == 5
20 |                this.weighter = weighter;
21 |                this.costf = costf;
22 |                this.alpha = alpha;
23 |                this.time_limit = time_limit;
24 |                this.print_progress = print_progress;
25 |             else
26 |                this.weighter = utils.EdgeWeighter();
27 |                this.costf = utils.CostFunction();
28 |             end
29 |         end
30 | 
31 |         function w = learn(this,instances)
32 |             %learns the parameters using the simulated anealing method
33 |             %the parameters are bounded to -3,3
34 |             nf = instances.instances(1).graph.num_features;
35 |             w0 = repmat(.000001,1,nf);
36 |             ObjectiveFunction = @(x) instances.calcCost(this.weighter,this.alpha,this.costf,x);
37 |             display = 'iter';
38 |             if ~this.print_progress
39 |                 display = 'off';
40 |             end
41 |             options = saoptimset('Display',display,'ReannealInterval',10,...
42 |                                 'ObjectiveLimit',nf+1,'TimeLimit',this.time_limit,...
43 |                                 'TemperatureFcn',@temperatureboltz,'TolFun',1e-10);
44 |             w = simulannealbnd(ObjectiveFunction,w0,-repmat(3.0,1,nf),repmat(3.0,1,nf),options);
45 |         end
46 |     end
47 |     
48 | end
49 | 
50 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 | 
38 | # Directories potentially created on remote AFP share
39 | .AppleDB
40 | .AppleDesktop
41 | Network Trash Folder
42 | Temporary Items
43 | .apdisk
44 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Andrej
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Link prediction with supervised random walks
 2 | ==================================
 3 | 
 4 | A Matlab implementation of the supervised random walks algorithm for link prediction proposed by Backstrom and Leskovec.
 5 | 
 6 | For detailed explanation of the algorithm we refer users to the their [2011 paper](http://arxiv.org/pdf/1011.4071.pdf).
 7 | 
 8 | ----------------------------------
 9 | User manual
10 | ----------------------------------
11 | 
12 | You can test the code with artificial data to check its performance and computational efficiency. To do this you need to generate a random dataset with some prespecified parameters. Next you can use a parameter learner and see how well it will guess the parameters you specified in the previous step.
13 | 
14 | ```matlab
15 | %generate a random dataset, if you want to change the number of instances,
16 | %features or the weighter function, simply change the respective properties 
17 | %of the generator before invoking the generate function
18 | 
19 | generator = random_generators.RandomInstancesGenerator();
20 | [dataset,weighter,alpha,true_w] = generator.generate();
21 | 
22 | %use a WMW cost function; a time limit of 30 seconds and progress printing
23 | %for learning the parameters
24 | learner = utils.ParameterLearner(weighter,alpha,utils.CostFunction(),30,true);
25 | learned_w = learner.learn(dataset);
26 | 
27 | disp(['The true parameters are: ',num2str(true_w)])
28 | disp(['The learned parameters are: ',num2str(learned_w)])
29 | ```
30 | 
31 | The above code is pretty self-explanatory, but I feel it is important to discuss the system abstractions here. I designed the system guided by the Weka organization, so anyone who is familiar with it will feel very comfortable using this framework.
32 | I'll just give a brief definition of the major abstractions. 
33 | 
34 | - A *Graph* is represented by an unweighted adjacency matrix that determines which nodes are connected to each other. Additionally, each edge has an array of values attached to it which we call *features*.
35 | 
36 | - An *Instance* object defines the positive and negative links for a given node. You can think of these as the classes/labels for the node in a multilabel classification task, which ultimately is what we are trying to learn to predict. For predicting the positive and negative links the algorithm uses information about the graph topology and the features for each link. The effect that each feature has on the prediction depends heavily on the feature parameters and slightly on the *weighter function*. 
37 | 
38 | - In order to learn anything useful, the algorithm needs many instance objects which are grouped in an *Instances* object for easier manipulation. 
39 | 
40 | - The *ParameterLearner* tries to find those parameters that give the predictions that match the positive/negative links specified. Each instance is treated independently although some of them may share the same graph. The learner uses the *cost function* to evaluate its predictions.
41 |  
42 |  
43 | The system architecture and package structure is summarized in the following diagram
44 | 
45 | ![alt tag](https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/master/architecture.png)
46 | 
47 | 
48 | You can confirm that the problem is smooth by plotting the cost function for a small number of features using the following code
49 | 
50 | ```matlab
51 | %define the bounds and the granularity
52 | granularity = 10;
53 | bound1 = 2.5;
54 | bound2 = 2.5;
55 | costf = utils.CostFunction();
56 | 
57 | %create the meshgrid and initilize the cost
58 | x = -bound1:bound1/granularity*2:bound1;
59 | y = -bound2:bound2/granularity*2:bound2;
60 | [X,Y] = meshgrid(x,y);
61 | n = length(x);
62 | Z = zeros(n,n);
63 | 
64 | %generate the random dataset
65 | generator = random_generators.RandomInstancesGenerator();
66 | [dataset,weighter,alpha,true_w] = generator.generate();
67 | 
68 | %calculate the cost for each point
69 | parfor i=1:n
70 |     for k=1:n
71 |         cost = dataset.calcCost(weighter,alpha,costf,[X(i,k)+0.000001,Y(i,k)+0.000001]);
72 |         Z(i,k) = cost;
73 |     end
74 | end
75 | 
76 | %plot the results
77 | figure;surf(X,Y,Z);xlabel('w1');ylabel('w2');title('cost');
78 | figure;surf(X,Y,log(Z));xlabel('w1');ylabel('w2');title('log cost');
79 | ```
80 | 
81 | By increasing the granularity you can get the following images
82 | 
83 | ![alt tag](https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/master/cost.png)
84 | 
85 | ![alt tag](https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/master/log_cost.png)
86 | 
87 | 
88 | -------------------
89 | 
90 | I implemented this mostly to hone my Matlab skills.
91 | However, I also did [another implementation](https://github.com/gajduk/TwitterLinkPrediction), this time in Java, which I am currently using to study the social network landscape in Macedonia.


--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/d22a109c64d2129759b1d3144ddf27e584e073ac/architecture.png


--------------------------------------------------------------------------------
/cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/d22a109c64d2129759b1d3144ddf27e584e073ac/cost.png


--------------------------------------------------------------------------------
/log_cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gajduk/link-prediction-with-supervised-random-walks/d22a109c64d2129759b1d3144ddf27e584e073ac/log_cost.png


--------------------------------------------------------------------------------
/main.m:
--------------------------------------------------------------------------------
 1 | %generates a random dataset with prespecified parameters, then uses a
 2 | %parameter learner to learn them.
 3 | 
 4 | %generate a random dataset, if you want to change the number of instances,
 5 | %features or the weighter function, change the properties of the generator
 6 | %before invoking generate
 7 | generator = random_generators.RandomInstancesGenerator();
 8 | [dataset,weighter,alpha,true_w] = generator.generate();
 9 | %use a WMW cost function; a time limit of 30 seconds and progress printing
10 | learner = utils.ParameterLearner(weighter,alpha,utils.CostFunction(),30,true);
11 | learned_w = learner.learn(dataset);
12 | disp(['The true parameters are: ',num2str(true_w)])
13 | disp(['The learned parameters are: ',num2str(learned_w)])
14 | 


--------------------------------------------------------------------------------
/plotcost.m:
--------------------------------------------------------------------------------
 1 | %plots the cost function for a random instances object. Might take a while.
 2 | %Runing "matlabpool open" before calling this script is a good idea.
 3 | 
 4 | %you can change the granularity or the bounds here
 5 | granularity = 10;
 6 | bound1 = 2.5;
 7 | bound2 = 2.5;
 8 | costf = utils.CostFunction();
 9 | 
10 | %create the meshgrid and initilizes the cost
11 | x = -bound1:bound1/granularity*2:bound1;
12 | y = -bound2:bound2/granularity*2:bound2;
13 | [X,Y] = meshgrid(x,y);
14 | n = length(x);
15 | Z = zeros(n,n);
16 | 
17 | %generate the random dataset
18 | generator = random_generators.RandomInstancesGenerator();
19 | [dataset,weighter,alpha,true_w] = generator.generate();
20 | %calculate the cost for each point
21 | parfor i=1:n
22 |     for k=1:n
23 |         cost = dataset.calcCost(weighter,alpha,costf,[X(i,k)+0.000001,Y(i,k)+0.000001]);
24 |         Z(i,k) = cost;
25 |     end
26 | end
27 | figure;surf(X,Y,Z);xlabel('w1');ylabel('w2');title('cost');
28 | figure;surf(X,Y,log(Z));xlabel('w1');ylabel('w2');title('log cost');


--------------------------------------------------------------------------------