├── woodbury.pdf ├── lasso_example.pdf ├── README.md ├── logcosh.m ├── rica.m ├── nmf.m ├── lasso.m ├── woodbury.m ├── nucnorm.py └── admm.py /woodbury.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nirum/ADMM/HEAD/woodbury.pdf -------------------------------------------------------------------------------- /lasso_example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nirum/ADMM/HEAD/lasso_example.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ADMM 2 | --- 3 | References: http://stanford.edu/~boyd/papers/admm_distr_stats.html 4 | -------------------------------------------------------------------------------- /logcosh.m: -------------------------------------------------------------------------------- 1 | % log cosh penalty 2 | % Niru Maheswaranathan 3 | % 03:01 PM Jun 17, 2014 4 | 5 | function [f, grad] = logcosh(w, x, lambda, alpha) 6 | 7 | [k, n] = size(x); 8 | 9 | W = reshape(w, k, k); 10 | f = mean(sum(log(cosh(alpha*W*x))/alpha) + (0.5*lambda)*norms(W'*W*x - x)); 11 | 12 | z = W*(x*x'); 13 | u = tanh(alpha*W*x)*x'; 14 | grad = vec(u + lambda * ( (W*W')*z + z*(W'*W) - z ))/n; 15 | 16 | end 17 | -------------------------------------------------------------------------------- /rica.m: -------------------------------------------------------------------------------- 1 | % RICA 2 | % Niru Maheswaranathan 3 | % 02:35 PM Jun 17, 2014 4 | addtoolbox('minFunc'); 5 | addtoolbox('cvx'); 6 | cvx_startup; 7 | setpaths_minFunc; 8 | 9 | k = 5; 10 | n = 1000; 11 | 12 | % generate data 13 | W = orth(randn(k)); 14 | s = laprnd(k,n); 15 | x = W \ s; 16 | 17 | lambda = 2; 18 | 19 | %cvx_begin 20 | %variable What(k,k) 21 | %minimize norm(What*x,1) + lambda*sum(norms(What'*What*x - x)) 22 | %cvx_end 23 | 24 | phi = @(x) log(cosh(x)); 25 | 26 | alpha = 1; 27 | fobj = @(w) logcosh(w, x, lambda, alpha); 28 | options = struct('Method', 'qnewton', 'Display', 'iter', 'MaxIter', 1000, 'MaxFunEvals', 5000, 'numDiff', 1); 29 | 30 | W0 = vec(randn(k)); 31 | [What, objval, exitflag, output] = minFunc(fobj, W0, options); 32 | What = reshape(What, k, k); 33 | shat = What*x; 34 | -------------------------------------------------------------------------------- /nmf.m: -------------------------------------------------------------------------------- 1 | % non-negative matrix factorization 2 | % Niru Maheswaranathan 3 | % 11:41 AM Jun 17, 2014 4 | 5 | % toy data 6 | n = 2; m = 4; k = 3; 7 | X = rand(n,k); 8 | Y = rand(m,k); 9 | A = X*Y'; 10 | 11 | % initialize 12 | Xhat = randn(n,k); 13 | Yhat = randn(m,k); 14 | Ux = rand(n,k); 15 | Uy = rand(m,k); 16 | Vx = rand(n,k); 17 | Vy = rand(m,k); 18 | 19 | % parameters 20 | lambda = 0.0001; 21 | numiter = 1e4; 22 | err = zeros(numiter,1); 23 | 24 | % run admm 25 | for j = 1:numiter 26 | 27 | Xhat = (A*Yhat + lambda*(Ux-Vx)) * pinv(Yhat'*Yhat + lambda*eye(k)); 28 | Yhat = (pinv(Xhat'*Xhat + lambda*eye(k)) * (Xhat'*A + lambda*(Uy-Vy)'))'; 29 | 30 | Ux = max(Ux,0); 31 | Uy = max(Uy,0); 32 | 33 | Vx = Vx - lambda*(Xhat - Ux); 34 | Vy = Vy - lambda*(Yhat - Uy); 35 | 36 | err(j) = norm(X-Xhat, 'fro') + norm(Y-Yhat, 'fro'); 37 | 38 | end 39 | -------------------------------------------------------------------------------- /lasso.m: -------------------------------------------------------------------------------- 1 | % ADMM Lasso example 2 | % Niru Maheswaranathan 3 | % 4/23/14 4 | 5 | % generate a problem 6 | n = 150; % measurements 7 | p = 500; % regressors 8 | A = randn(n,p); % sensing matrix 9 | x0 = randn(p,1).*(rand(p,1) < 0.05); % signal (sparse) 10 | b = A*x0; % measurement 11 | lambda = 0.1; % sparsity penalty 12 | rho = 0.05; % augmented Lagrange parameter 13 | 14 | % initialize 15 | numiter = 100; 16 | x = randn(p,1); 17 | z = randn(p,1); 18 | y = zeros(p,1); 19 | err = zeros(numiter,1); 20 | 21 | % caching 22 | P = eye(p)/rho - (A' * pinv(rho * eye(n) + A * A') * A)/rho; 23 | c = P*A'*b; 24 | 25 | % ADMM 26 | for k = 1:numiter 27 | 28 | % updates 29 | x = c + P*(rho*z - y); 30 | z = wthresh(x + y / rho, 's', lambda / rho); 31 | y = y + rho*(x - z); 32 | 33 | % store error 34 | progressbar(k,numiter); 35 | err(k) = norm(x-x0) / p; 36 | 37 | end 38 | -------------------------------------------------------------------------------- /woodbury.m: -------------------------------------------------------------------------------- 1 | % testing speedup using the matrix inversion lemma 2 | % Niru Maheswaranathan 3 | % Apr 25 2014 4 | 5 | % test different parameter sizes 6 | nvals = round(logspace(1,3,25)); 7 | p = max(nvals); 8 | 9 | % initialize variables 10 | err = zeros(size(nvals)); 11 | naive = zeros(size(nvals)); 12 | lemma = zeros(size(nvals)); 13 | 14 | for idx = 1:length(nvals) 15 | 16 | % generate a matrix 17 | n = nvals(idx); 18 | A = randn(n,p); 19 | 20 | % naive inverse 21 | tic; P1 = pinv(A'*A + rho*eye(p)); naive(idx) = toc; 22 | 23 | % matrix inversion lemma 24 | tic; P2 = eye(p)/rho - (A' * pinv(rho * eye(n) + A * A') * A)/rho; lemma(idx) = toc; 25 | 26 | % store error 27 | err(idx) = norm(P1-P2,'fro'); 28 | progressbar(idx,length(nvals)); 29 | 30 | end 31 | 32 | % plots 33 | fig(1); 34 | loglog(nvals/p, naive, 'ko-', nvals/p, lemma, 'ro-'); 35 | xlabel('Fracitonal rank (n/p)', 'FontSize', 24); 36 | ylabel('Time to compute inverse (seconds)', 'FontSize', 24); 37 | title('Matrix inversion via the Sherman-Morrison-Woodbury identity', 'FontSize', 30); 38 | legend('Naive', 'Woodbury', 'Location', 'SouthEast'); 39 | makepretty; grid on; 40 | 41 | fig(2); 42 | semilogx(nvals/p, err, 'bo-'); 43 | xlabel('Fracitonal rank (n/p)', 'FontSize', 24); 44 | ylabel('Fro. norm distance between the estimates', 'FontSize', 24) 45 | title('Discrepancy between Woodbury and naive inverse', 'FontSize', 30); 46 | makepretty; grid on; 47 | -------------------------------------------------------------------------------- /nucnorm.py: -------------------------------------------------------------------------------- 1 | """ 2 | nuclear norm minimization via ADMM 3 | author: Niru Maheswaranathan 4 | 10:12 PM May 5, 2014 5 | """ 6 | import numpy as np 7 | from scipy.linalg import svd, norm, cho_factor, cho_solve 8 | 9 | def admm(y,X,shape,options,penalty): 10 | """ 11 | ADMM for nuclear norm minimization 12 | """ 13 | 14 | # dimensions 15 | ds = shape[0] 16 | dt = shape[1] 17 | n = ds*dt 18 | m = float(y.size) 19 | sq = lambda k: k.reshape(ds,dt) 20 | 21 | # initialize variables 22 | k = np.zeros(n) 23 | z = np.zeros(n) 24 | u = k - z 25 | resid = np.zeros(options['maxiter']) 26 | 27 | # linear system 28 | P = X.T.dot(X) / m + penalty['rho']*np.eye(n) 29 | print('Condition number of P: %5.4f' % np.linalg.cond(P)) 30 | L = cho_factor(P) 31 | xty = X.T.dot(y) / m 32 | 33 | # loop until convergence or maxiter is reached 34 | for idx in range(1,options['maxiter']): 35 | 36 | # minimize l2 error 37 | #k = solve(P, X.T.dot(y)/m + penalty['rho']*(z-u)) 38 | k = cho_solve(L, xty + penalty['rho']*(z-u)) 39 | 40 | # singular value thresholding 41 | U,S,V = svd(sq(k+u), full_matrices=False) 42 | z = (U.dot(np.diag(np.maximum(S-penalty['rank'],0))).dot(V)).ravel() 43 | 44 | # dual update 45 | u += k-z 46 | 47 | # stopping criterion 48 | print('Resid.\t\tError\tNuc. Norm') 49 | resid[idx] = norm(u) 50 | if (resid[idx] <= options['tol']): #| (np.abs(resid[idx]-resid[idx-1]) <= options['tol']): 51 | print('Converged after %i iterations.' % idx) 52 | break 53 | else: 54 | print('%5.4f\t\t%5.2f\t%5.4f' % (resid[idx], norm(X.dot(k)-y)/m, norm(S,1))) 55 | 56 | A = sq(0.5*(k+z)) 57 | return A, k, z, u, resid 58 | 59 | if __name__ == "__main__": 60 | 61 | # problem size 62 | ds = 20 63 | dt = 10 64 | r = 3 65 | m = 100 66 | n = ds*dt 67 | 68 | # generate data 69 | A = np.random.randn(ds,r).dot(np.random.randn(r,dt)) 70 | X = np.random.randn(m,n) 71 | y = X.dot(A.ravel()) 72 | 73 | # options 74 | options = {'maxiter': 1000, 'tol': 1e-4} 75 | penalty = {'rho': 0.01, 'rank': 0.5} 76 | 77 | # run ADMM 78 | Ahat, k, z, u, resid = admm(y,X,A.shape,options,penalty) 79 | -------------------------------------------------------------------------------- /admm.py: -------------------------------------------------------------------------------- 1 | """ 2 | ADMM python implementation 3 | author: Niru Maheswaranathan 4 | 01:20 PM Aug 12, 2014 5 | """ 6 | import numpy as np 7 | import proxops as po 8 | from functools import partial 9 | 10 | class ADMM(object): 11 | def __init__(self, lmbda): 12 | self.objectives = list() # prox. operators for objectives 13 | self.lmbda = lmbda # prox. op. trade-off parameter 14 | self.rho = 1.0 / lmbda # inverse of trade-off parameter 15 | 16 | def add_operator(self, proxfun, **kwargs): 17 | # add proximal operator to the list 18 | proxop = partial(proxfun, lmbda=self.lmbda, **kwargs) 19 | self.objectives.append(proxop) 20 | 21 | def lowrank_approx_demo(): 22 | """ 23 | solve a low-rank matrix approximation problem 24 | """ 25 | 26 | # parameters 27 | n = 50 # dimension 28 | k = 3 # rank 29 | eta = 0.01 # noise strength 30 | gamma = 0.1 # low-rank penalty 31 | lmbda = 100.0 # ADMM parameter 32 | num_batches = 25 33 | 34 | # reproducible 35 | np.random.seed(1234) 36 | 37 | # build data matrix 38 | A_star = np.random.randn(n, k).dot(np.random.randn(k, n)) 39 | data = [A_star + eta * np.random.randn(n, n) for j in range(num_batches)] 40 | 41 | # define objective and gradient (fro-norm) 42 | f = lambda x, d: 0.5 * np.sum((x.reshape(d.shape) - d) ** 2) 43 | fgrad = lambda x, d: (x.reshape(d.shape) - d).ravel() 44 | 45 | # initialize proximal operators and ADMM object 46 | lowrank = ADMM(lmbda) 47 | 48 | from sfo.sfo import SFO 49 | def f_df(x, d): 50 | return f(x,d), fgrad(x,d) 51 | 52 | # optimizer = SFO(f_df, 0.1*np.random.randn(n*n), data, display=1, admm_lambda=lmbda) 53 | 54 | ## set up SFO for ADMM iteration 55 | # def sfo_admm(v, lmbda): 56 | # optimizer.set_theta(v) 57 | # optimizer.theta_admm_prev = optimizer.theta_original_to_flat(v) 58 | # return optimizer.optimize(num_steps=5) 59 | 60 | # lowrank.add(po.bfgs, f=f, fgrad=fgrad) 61 | lowrank.add_operator(po.sfo, f=f, fgrad=fgrad, data=data) 62 | # lowrank.add(sfo_admm) 63 | 64 | # theta_init = [0.1 * np.random.randn(n*n) for dummy in range(2)] 65 | # from sfo.sfo import SFO 66 | # optimizer = SFO(po.get_f_df(theta_init, lmbda, f, fgrad, data), theta_init, data, display=1) 67 | # lowrank.add(po.sfo_persist, optimizer=optimizer, f=f, fgrad=fgrad, data=data) 68 | 69 | lowrank.add_operator(po.nucnorm, gamma=gamma, array_shape=A_star.shape) 70 | 71 | # optimize 72 | A_hat = lowrank.optimize((n, n), maxiter=20)[0] 73 | print('\nLow-rank matrix approximation\n----------') 74 | print('Final Error: %4.4f' % np.linalg.norm(A_hat - A_star)) 75 | print('') 76 | 77 | return A_hat, A_star 78 | 79 | 80 | def lasso_demo(): 81 | """ 82 | solve a LASSO problem via ADMM 83 | """ 84 | 85 | # generate problem instance 86 | n = 150 87 | p = 500 88 | A = np.random.randn(n, p) 89 | x_star = np.random.randn(p) * (np.random.rand(p) < 0.05) 90 | b = A.dot(x_star) 91 | 92 | # parameters 93 | sparsity = 0.8 # sparsity penalty 94 | lmbda = 2 # ADMM parameter 95 | 96 | # initialize prox operators and problem instance 97 | lasso = ADMM(lmbda) 98 | lasso.add_operator(po.linsys, P=A.T.dot(A), q=A.T.dot(b)) 99 | lasso.add_operator(po.sparse, gamma=sparsity) 100 | 101 | # optimize 102 | x_hat = lasso.optimize(x_star.shape, maxiter=50)[0] 103 | print('\nLasso\n----------') 104 | print('Final Error: %4.4f' % np.sum((x_hat - x_star) ** 2)) 105 | print('') 106 | 107 | return x_hat, x_star 108 | 109 | 110 | if __name__ == "__main__": 111 | # x_hat, x_star = lasso_demo() 112 | A_hat, A_star = lowrank_approx_demo() 113 | --------------------------------------------------------------------------------