├── CreateGraphInstances ├── compute_knn_wnn.m ├── data_dim_d.m └── symmetrize_weights.m ├── IRLS-pNorm.jl ├── README.md ├── create_graph_matrices.m ├── pNorm.m ├── test_graph_instances.m └── test_random_instances.m /CreateGraphInstances/compute_knn_wnn.m: -------------------------------------------------------------------------------- 1 | % Written by Mauricio Flores, last edited 08/24/2018. 2 | % This function computes 'knn' and 'wnn' for a given 'X', 'Y' set. 3 | 4 | function [knn, wnn, k_neigh, h] = compute_knn_wnn(X, n, m, k_neigh, local, symm) 5 | 6 | % Apply default inputs: 7 | if nargin < 5; local = 0; end 8 | if nargin < 6; symm = 1; end 9 | 10 | % Computing KNN is 99% of the cost: 11 | [knn, dnn] = knnsearch(X, X, 'k', k_neigh); 12 | 13 | % Compute length-scale. If 'local' = 1, then we compute a different 14 | % length scale for each vertex in the graph (symmetry is not lost though). 15 | if local == 0 16 | h = max(dnn(:))/2; wnn = exp(-dnn.^2/h^2); 17 | else 18 | h = max(dnn, [], 2)/2; wnn = exp(-(dnn./h).^2); 19 | end 20 | 21 | if symm == 1 22 | [knn, wnn, k_neigh] = symmetrize_weights(n, m, k_neigh, knn, wnn); 23 | end 24 | 25 | h = mean(h); % Need to return a single 'h': 26 | 27 | end % End of compute_weights. 28 | -------------------------------------------------------------------------------- /CreateGraphInstances/data_dim_d.m: -------------------------------------------------------------------------------- 1 | function [X, Y, g, m] = data_dim_d(n, d) 2 | 3 | % Written by Mauricio Flores, last edited on 08/22/18. 4 | 5 | % We have decided to use three types of datasets, as follows 6 | % (1) Two dimensional, uniform, with two labels. 7 | % (2) High-dimensional, uniform, with few labels. 8 | % (3) Two clusters, with different functions inside. 9 | 10 | % This corresponds to dataset (2) within that list. 11 | % We generate a uniform distribution in [0,1]^d, 12 | % with labeled points & labels randomly chosen. 13 | 14 | % Unlabeled points 15 | X = rand(n, d); 16 | 17 | % Labeled points 18 | m = 10; 19 | g = rand(m, 1); 20 | Y = rand(m, d); 21 | 22 | end -------------------------------------------------------------------------------- /CreateGraphInstances/symmetrize_weights.m: -------------------------------------------------------------------------------- 1 | % Function to symmetrize weights (which we always do) 2 | function [knn, wnn, k_neigh] = symmetrize_weights(n, m, k_neigh, knn, wnn) 3 | 4 | % Assemble & Symmetrize the weight-matrix: 5 | W = sparse((1:n+m)'*ones(1, k_neigh), knn, wnn); W = 0.5*(W + W'); 6 | 7 | % Compute Start Index for neighbors of each 'x'. 8 | [row, col] = find(W); % Note: index columns are already sorted, and it's ok 9 | % to loop across 'col' because W is symmetric. 10 | 11 | % Figure out length of each set of neighbors: 12 | start = [1; find(diff(col)) + 1; length(col)+1]; neigh_x = diff(start); 13 | 14 | %% Compute new knn & weights matrices (in place): 15 | knn = ones(n+m, max(neigh_x)); wnn = zeros(n+m, max(neigh_x)); 16 | for i = 1 : n+m 17 | knn(i, 1 : neigh_x(i)) = row(start(i) : start(i+1) - 1); 18 | wnn(i, 1 : neigh_x(i)) = W(i, knn(i, 1:neigh_x(i))); 19 | end 20 | 21 | k_neigh = size(knn, 2); 22 | 23 | end % End of function to symmetrize weights -------------------------------------------------------------------------------- /IRLS-pNorm.jl: -------------------------------------------------------------------------------- 1 | using LinearAlgebra 2 | using SparseArrays 3 | 4 | # The linear solver used at every iteration. 5 | function solve(A, W, b) 6 | L = A' * W * A 7 | return L\b 8 | end 9 | 10 | # Finds the initial solution, i.e., the l_2-norm minimizer. Note that for 11 | # both cases with and without constraints the solution looks a little 12 | # different. 13 | function InitialSoln(C,d,A,b) 14 | if findmax(abs.(C))[1]==0 15 | return solve(A,I,A'*b) 16 | else 17 | inverseCalc = inv(A'*A) 18 | v = solve(C',inverseCalc,d-C*inverseCalc*A'*b) 19 | x = solve(A,I,A'*b+C'*v) 20 | return x 21 | end 22 | end 23 | 24 | # Returns the next step. The case with and without linear constraints are a little different. 25 | function FindDeltaConstraints(C,d,A,b,x,i,p) 26 | m = size(A)[1] 27 | R = Diagonal((abs.(A*x-b)).^(p-2) ) 28 | s = 0.5*i^((p-2)/p)/m^((p-2)/p) 29 | g = p*R*(A*x-b) 30 | R1 = R + s*I 31 | if findmax(abs.(C))[1]==0 32 | inverseCalc = solve(A, R1, transpose(A)*g) 33 | quadform = transpose(g)*A*inverseCalc 34 | if quadform ==0 35 | println("s= ",s) 36 | println(norm(inverseCalc,2)) 37 | println("norm gradient ", norm(g,2)) 38 | end 39 | Δ = i*inverseCalc/(2*quadform) 40 | else 41 | C_aug = vcat(C,g'*A) 42 | d_aug = vcat(zeros(size(d)[1]),i/2) 43 | inverseCalc = inv(A'*R1*A) 44 | v = solve(C_aug',inverseCalc,d_aug) 45 | Δ = solve(A,R1,C_aug'*v) 46 | end 47 | return Δ 48 | end 49 | 50 | # A function that calculates the gradient of ||A(x-scale*delta)-b||_p^p. 51 | # Here A,b are as in the input. We use this in the next function to find a 52 | # scale so that given the current solution x and the next step delta, we 53 | # can scale delta so as to make maximum progress. 54 | function GradientScaledObj(scale,p,z,w) 55 | v = z-scale*w 56 | y = abs.(v).^(p-2) 57 | y1 = v.*y 58 | return (-1)*(w'*y1) 59 | end 60 | 61 | # This finds a scaling so that given the current solution x and the next 62 | # step delta, we can scale delta so as to make maximum progress. 63 | function LineSearchObj(A,b,x,p,Δ) 64 | L = -1 65 | U = 1 66 | w = A*Δ 67 | z = A*x-b 68 | while GradientScaledObj(U,p,z,w)<0 69 | L = U 70 | U = 2*U 71 | end 72 | while GradientScaledObj(L,p,z,w)>0 73 | U = L 74 | L = 2*L 75 | end 76 | @assert (GradientScaledObj(L,p,z,w) < 0) 77 | @assert (GradientScaledObj(U,p,z,w) > 0) 78 | while U-L>1e-4 79 | if (GradientScaledObj((L+U)/2,p,z,w)>0) 80 | U = (L+U)/2 81 | else 82 | L = (L+U)/2 83 | end 84 | end 85 | α = (L+U)/2 86 | return α 87 | end 88 | 89 | # Evaluates the value of the residual problem. 90 | function EvalResidual(A,b,x,p,Δ) 91 | R = spdiagm(0=>(abs.(A*x-b)).^(p-2)) 92 | g = p*R*(A*x-b) 93 | return transpose(g)*A*Δ - 2*p*p*transpose(A*Δ)*R*A*Δ - (p*norm(A*Δ,p))^p 94 | end 95 | 96 | # In our algorithm, we had added a padding to the resistances. This 97 | # function checks if we are making enough progress and appropriately 98 | # adjusts the padding. 99 | function Reduce_i(A,b,x,i,p,Δ) 100 | R = (abs.(A*x-b)).^(p-2) 101 | m = size(A)[1] 102 | s = 0.5*i^((p-2)/p)/m^((p-2)/p) 103 | SqTerm = transpose(A*Δ)*((R.+s).*(A*Δ)) 104 | 105 | # ratio of the square term and the p-norm term at delta. 106 | k =p*norm(A*Δ,p)*(norm(A*Δ,p)/(2*p*SqTerm))^(1/(p-1)) 107 | λ = 16*p 108 | 109 | # the minimum progress that has to be made, or the minimum approximation factor. 110 | α0 = findmin([1/(16*λ), 1/((16*λ)^(1/(p-1))*k)])[1] 111 | γ = EvalResidual(A,b,x,p,α0*Δ) 112 | 113 | # condition that determines whether the padding i has to be reduced. 114 | if γ - 0.25*α0*i<=0 || SqTerm >= λ*i 115 | return true 116 | else 117 | return false 118 | end 119 | end 120 | 121 | 122 | # the main algorithm where parameters are: 123 | # ϵ : accuracy we want to achieve 124 | # A,b : the objective we are minimizing is ||Ax-b||_p^p 125 | # p : the norm we want to minimize 126 | # C,d : The linear constraints are Cx = d 127 | # x : Initial solution 128 | # lb : lower bound on the optimum 129 | function pNorm(ϵ,A,b,p,C,d, x, lb) 130 | # lb is a lower bound on the objective 131 | # Initial Solution 132 | current = norm(A*x - b,p) 133 | 134 | println("initial objective = ",current^p) 135 | 136 | # Check if the initial solution is 0. In that case return 0. 137 | if current^p ==0 138 | println("Norm = 0") 139 | println("Max gradient entry:", max_gradient_entry) 140 | return x 141 | end 142 | iteration = 1 143 | m = size(b)[1] 144 | 145 | # Initial padding. An upper bound on (Initial_Solution - OPT)/16p. 146 | i = (current^p - lb^p)/(16*p) 147 | 148 | # Termination condition, if this is achieved, we have a (1+ϵ)-approximate solution 149 | while i > 2*ϵ*current^p/(16*p*(1+ϵ)) 150 | iteration = iteration+1 151 | println("Iteration count:", iteration) 152 | 153 | # Find the next step 154 | Δ = FindDeltaConstraints(C,d,A,b,x,i,p) 155 | 156 | #Find the step size that gives the minimum objective given the step Δ. 157 | α = LineSearchObj(A,b,x,p,Δ) 158 | need_to_reduce_i = false 159 | 160 | #Check if we have had sufficient progress, if not reduce the padding 161 | if Reduce_i(A,b,x,i,p,Δ) 162 | need_to_reduce_i = true 163 | end 164 | 165 | # Check if our new norm is less than the current norm, and then 166 | # update x. It is possible to get a value that does not reduce the 167 | # norm because the line search does not solve to very high 168 | # precision. 169 | if norm(A*(x-α*Δ)-b,p) < current 170 | x = x-α*Δ 171 | current = norm(A*x-b,p) 172 | println("Reducing norm : ", current^p) 173 | else 174 | # If we do not reduce the norm, we reduce the padding i. 175 | need_to_reduce_i = true 176 | end 177 | 178 | if need_to_reduce_i 179 | println("Reducing i") 180 | i = i/2 181 | end 182 | i = min(i, (current^p - lb^p)/(16*p)) 183 | end 184 | return x,iteration 185 | end 186 | 187 | 188 | # ϵ : accuracy we want to achieve 189 | # A,b : the objective we are minimizing is ||Ax-b||_p^p 190 | # p : the norm we want to minimize 191 | # C,d : The linear constraints are Cx = d 192 | function pNorm(ϵ,A,b,p,C,d) 193 | x = InitialSoln(C,d,A,b) 194 | m = size(b)[1] 195 | lb = norm(A*x-b, 2)/m^(1/2-1/p) 196 | return pNorm(ϵ,A,b,p,C,d,x,lb)#,iter_max)#,res) 197 | end 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pIRLS 2 | This contains a fast IRLS code for solving p-norm regression problems for p at least 2. It is an implementation of the algorithm proposed in the paper, "Fast, Provably convergent IRLS Algorithm for p-norm Linear Regression. Deeksha Adil, Richard Peng and Sushant Sachdeva." 3 | 4 | # Algorithm 5 | Algorithm pIRLS is an Iteratively Reweighted Least Squares (IRLS) Algorithm that provably converges for all p at least 2. The algorithm converges geometrically and can thus be used to solve problems to a high accuracy. You may refer to the paper for the analysis and proof of convergence guarantees. 6 | 7 | # Using the Code 8 | We have included an implementation in Julia as well as Matlab. The main files have the function implementation. We have two 9 | files, one for graph instances and one for random matrix instances, that can be run directly. For more details on 10 | these instances and the problems we are solving with them, refer to the paper. The functions can be directly used with other 11 | inputs as well. Refer to the test files to see how to use them. 12 | 13 | --- 14 | If you found this code useful in your work, please cite: 15 | 16 | ``` 17 | @incollection{APS19, 18 | title = {Fast, Provably convergent IRLS Algorithm for p-norm Linear Regression}, 19 | author = {Adil, Deeksha and Peng, Richard and Sachdeva, Sushant}, 20 | booktitle = {Advances in Neural Information Processing Systems 32}, 21 | editor = {H. Wallach and H. Larochelle and A. Beygelzimer and F. d\textquotesingle Alch\'{e}-Buc and E. Fox and R. Garnett}, 22 | pages = {14166--14177}, 23 | year = {2019}, 24 | publisher = {Curran Associates, Inc.}, 25 | url = {http://papers.nips.cc/paper/9565-fast-provably-convergent-irls-algorithm-for-p-norm-linear-regression.pdf} 26 | } 27 | ``` 28 | -------------------------------------------------------------------------------- /create_graph_matrices.m: -------------------------------------------------------------------------------- 1 | 2 | % This function creates the required matrices from the graphs generated by 3 | % Mauricio Flores et al. We use their scripts which are in the folder 4 | % CreateGraphInstances and use that data to create the matrices. 5 | 6 | function [A,b,C,d,sizedata] = create_graph_matrices(n,p,k_neigh) 7 | dim = 10; 8 | addpath('CreateGraphInstances/') 9 | [X, Y, g, m] = data_dim_d(n, dim); 10 | [knn, wnn, k_neigh] = compute_knn_wnn([X; Y], n, m, k_neigh); 11 | wnn = wnn(1:n,:); knn = knn(1:n,:); 12 | 13 | %create list of edges 14 | [rknn,cknn] = size(knn); 15 | L = []; 16 | Wt = []; 17 | for i = 1:rknn 18 | for j = 1:cknn 19 | if knn(i,j)~=1 && knn(i,j)> i 20 | edge = [i knn(i,j)]; 21 | weight = wnn(i,j); 22 | L = [L;edge]; 23 | Wt = [Wt;weight]; 24 | end 25 | end 26 | end 27 | 28 | %create the graph 29 | G = graph(L(:,1),L(:,2)); 30 | B = incidence(G); 31 | Unwtd_A = transpose(B); 32 | s = size(Wt); 33 | sizedata = s(1); 34 | 35 | %creating matrices A,b,C,d 36 | A = spdiags(Wt.^(1/p),zeros(1,1),s(1),s(1))*B'; 37 | b = sparse(-A(:,n+1:n+m)*g); 38 | A = sparse(A(:,1:n)); 39 | C = zeros(n); 40 | d = zeros(n,1); 41 | end 42 | -------------------------------------------------------------------------------- /pNorm.m: -------------------------------------------------------------------------------- 1 | %The main function. Want to solve min_x ||Ax-b||_p subject to constraints 2 | %Cx = d to a (1+eps) approximation. 3 | function [final,iteration]= pNorm(eps,A,b,p,C,d) 4 | x = InitialSoln(C,d,A,b); 5 | m = size(b); 6 | lb = norm(A*x-b, 2)/m(1)^(1/2-1/p); 7 | [final,iteration] = pNorm_with_initial_vector(eps,A,b,p,C,d,x,lb); 8 | fprintf('Final norm %d \n', norm(final,p)^p); 9 | end 10 | 11 | % The main algorithm. 12 | function [final_vec,iteration] = pNorm_with_initial_vector(eps,A,b,p,C,d, x, lb) 13 | % lb is a lower bound on the objective 14 | % Initial Solution 15 | current = norm(A*x - b,p); 16 | % Check if the initial solution is 0. In that case return 0. 17 | if current^p ==0 18 | println("Norm = 0"); 19 | println("Max gradient entry:", max_gradient_entry); 20 | iteration = 1; 21 | return 22 | end 23 | iteration = 1; 24 | m = size(b); 25 | 26 | % Initial padding. An upper bound on (Initial_Solution - OPT)/16p. 27 | i = (current^p - lb^p)/(16*p) ; 28 | 29 | % Termination condition, if this is achieved, we have a (1+eps)-approximate solution 30 | while i > 2*eps*current^p/(16*p*(1+eps)) 31 | iteration = iteration+1; 32 | fprintf('Iteration count: %d \n', iteration); 33 | 34 | % Find the next step 35 | delta = FindDeltaConstraints(C,d,A,b,x,i,p); 36 | 37 | % Find the step size that gives the minimum objective given the step delta. 38 | alpha = LineSearchObj(A,b,x,p,delta); 39 | need_to_reduce_i = false; 40 | 41 | % Check if we have had sufficient progress, if not reduce the padding 42 | if Reduce_i(A,b,x,i,p,delta) 43 | need_to_reduce_i = true; 44 | end 45 | % Check if our new norm is less than the current norm, and then 46 | % update x. It is possible to get a value that does not reduce the 47 | % norm because the line search does not solve to very high 48 | % precision. 49 | if norm(A*(x-alpha*delta)-b,p) < current 50 | x = x-alpha*delta; 51 | current = norm(A*x-b,p); 52 | fprintf('Reducing norm: %d \n', current^p); 53 | else 54 | % If we do not reduce the norm, we reduce the padding i. 55 | need_to_reduce_i = true; 56 | end 57 | if need_to_reduce_i 58 | i = i/2; 59 | fprintf('Reducing i: %d \n', i); 60 | end 61 | i = min(i, (current^p - lb^p)/(16*p)); 62 | end 63 | final_vec = x; 64 | end 65 | 66 | %The linear solver used at every iteration. 67 | function delta = solve(A, w, b) 68 | m = size(w); 69 | W = spdiags(w, zeros(1,1), m(1), m(1)); 70 | L = transpose(A) * W * A; 71 | delta = L\b; 72 | end 73 | 74 | %Finds the initial solution, i.e., the l_2-norm minimizer. Note that for 75 | %both cases with and without constraints the solution looks a little 76 | %different. 77 | function soln = InitialSoln(C,d,A,b) 78 | m = size(A); 79 | if max(abs(C))==0 80 | soln = solve(A,ones(m(1)),transpose(A)*b); 81 | else 82 | inverseCalc = inv(transpose(A)*A); 83 | v = solve(transpose(C),inverseCalc,d-C*inverseCalc*transpose(A)*b); 84 | soln = solve(A,eye(m(1)),transpose(A)*b+transpose(C)*v); 85 | end 86 | end 87 | 88 | 89 | % Returns the next step. 90 | function delta = FindDeltaConstraints(C,d,A,b,x,i,p) 91 | m = size(A); 92 | s = 0.5*i^((p-2)/p)/m(1)^((p-2)/p); 93 | r = (abs(A*x-b)).^(p-2); 94 | g = p* r .*(A*x-b); 95 | r1 = r + s; 96 | if max(abs(C))==0 97 | inverseCalc = solve(A, r1, transpose(A)*g); 98 | quadform = transpose(g)*A*inverseCalc; 99 | delta = i*inverseCalc/(2*quadform) ; 100 | else 101 | sizeofd = size(d); 102 | C_aug = [C;transpose(g)*A]; 103 | d_aug = [zeros(sizeofd(1),1);i/2]; 104 | inverseCalc = inv(transpose(A)*R1*A); 105 | v = solve(transpose(C_aug),inverseCalc,d_aug); 106 | delta = solve(A,R1,transpose(C_aug)*v); 107 | end 108 | end 109 | 110 | 111 | % A function that calculates the gradient of ||A(x-scale*delta)-b||_p^p. 112 | % Here A,b are as in the input. We use this in the next function to find a 113 | % scale so that given the current solution x and the next step delta, we 114 | % can scale delta so as to make maximum progress. 115 | function obj = GradientScaledObj(scale,p,z,w) 116 | v = z - scale*w; 117 | y = abs(v).^(p-2); 118 | y1 = v .* y; 119 | obj = -1 * (w' * y1); 120 | end 121 | 122 | % This finds a scaling so that given the current solution x and the next 123 | % step delta, we can scale delta so as to make maximum progress. 124 | function alpha = LineSearchObj(A,b,x,p,delta) 125 | L = -3; 126 | U = 3; 127 | w = A * delta; 128 | z = A * x - b; 129 | while GradientScaledObj(U,p,z,w)<0 130 | L = U; 131 | U = 2*U; 132 | end 133 | while GradientScaledObj(L,p,z,w)>0 134 | U = L; 135 | L = 2*L; 136 | end 137 | assert (GradientScaledObj(L,p,z,w) < 0); 138 | assert (GradientScaledObj(U,p,z,w) > 0); 139 | while abs(U-L)>1e-1 140 | if (GradientScaledObj((L+U)/2,p,z,w)>0) 141 | U = (L+U)/2; 142 | else 143 | L = (L+U)/2; 144 | end 145 | end 146 | alpha = (L+U)/2; 147 | end 148 | 149 | 150 | % Evaluates the value of the residual problem. 151 | function gamma = EvalResidual(A,b,x,p,delta) 152 | m = size(A); 153 | R = spdiags((abs(A*x-b)).^(p-2),zeros(1,1),m(1),m(1)); 154 | g = p*R*(A*x-b); 155 | gamma = transpose(g)*A*delta - 2*p*p*transpose(A*delta)*R*A*delta - (p*norm(A*delta,p))^p; 156 | end 157 | 158 | % In our algorithm, we had added a padding to the resistances. This 159 | % function checks if we are making enough progress and appropriately 160 | % adjusts the padding. 161 | function [out] = Reduce_i(A,b,x,i,p,delta) 162 | m = size(A); 163 | R = abs(A*x-b).^(p-2); 164 | s = 0.5*i^((p-2)/p)/m(1)^((p-2)/p) ; 165 | SqTerm = transpose(A*delta)*((R + s) .* (A*delta)); 166 | 167 | % ratio of the square term and the p-norm term at delta. 168 | k =p*norm(A*delta,p)*(norm(A*delta,p)/(2*p*SqTerm))^(1/(p-1)) ; 169 | lambda = 16*p; 170 | 171 | % the minimum progress that has to be made, or the minimum approximation factor. 172 | alpha0 = min(1/(16*lambda), 1/((16*lambda)^(1/(p-1))*k)); 173 | gamma = EvalResidual(A,b,x,p,alpha0*delta); 174 | 175 | % condition that determines whether the padding i has to be reduced. 176 | if gamma < 0.25*alpha0*i || SqTerm >= lambda*i 177 | out = true; 178 | else 179 | out = false; 180 | end 181 | end 182 | -------------------------------------------------------------------------------- /test_graph_instances.m: -------------------------------------------------------------------------------- 1 | % We use the graph instances generate by Mauricio Flores et al. and run 2 | % our IRLS algorithm to find the minimum $\ell_p$ laplacian. 3 | 4 | eps = 1e-8; 5 | n = 1000; 6 | p = 8; 7 | nearest_neighbours = 10; 8 | [A,b,C,d] = create_graph_matrices(n,p,nearest_neighbours); 9 | pNorm(eps,A,b,p,C,d); -------------------------------------------------------------------------------- /test_random_instances.m: -------------------------------------------------------------------------------- 1 | % We generate random matrices and run our IRLS algorithm on them. We will 2 | % set this up to run the algorithm for the unconstrained version. 3 | % Data dimensions are m x n, where n < m. 4 | 5 | n = 800; 6 | m = 1000; 7 | eps = 1e-8; 8 | p = 8; 9 | A = rand(m,n); 10 | b = rand(m,1); 11 | C = zeros(n,n); 12 | d = zeros(n,n); 13 | pNorm(eps,A,b,p,C,d); --------------------------------------------------------------------------------