├── README.md ├── geometry ├── lasso │ ├── SolutionLogger.m │ ├── algm_Nesterov1st.m │ ├── backtracking.m │ ├── func_conv.m │ ├── func_l1.m │ ├── func_simple.m │ ├── func_smooth.m │ ├── homotopy.m │ ├── inner_product.m │ ├── linesearch.m │ ├── prox_grad.m │ ├── reversal.m │ ├── set_options.m │ └── soft_thresholding.m ├── plot_geometry_DQ.m └── plot_geometry_lasso.m ├── solvers_1D ├── algorithm │ ├── ADM.m │ ├── homotopy.m │ ├── iADM.m │ ├── incoherent.mat │ └── reweighting.m ├── auxiliary │ ├── Log_map.m │ ├── Retract.m │ ├── backtracking.m │ ├── compute_error.m │ ├── compute_gradient.m │ ├── compute_y.m │ ├── gen_data.m │ ├── linesearch.m │ ├── reversal.m │ ├── shift_correction.m │ └── soft_thres.m └── test_algorithms_1D.m └── solvers_2D ├── algorithm ├── ADM_2D.m ├── homotopy_2D.m ├── iADM_2D.m └── reweighting_2D.m ├── auxiliary ├── F_val.m ├── Log_map.m ├── Log_map2D.m ├── Retract2D.m ├── backtracking_2D.m ├── cconvfft2.m ├── compute_gradient.m ├── f_quad.m ├── g_val.m ├── innerprod.m ├── linesearch_2D.m ├── row_soft_thres.m ├── shift_correction_2D.m └── soft_thres.m ├── data └── calcium_img.png └── test_2D.m /README.md: -------------------------------------------------------------------------------- 1 | # sparse_deconvolution 2 | nonconvex algorithms for solving sparse deconvolution/convolutional dictionary learning problems 3 | 4 | The code in geometry folder plots the 3D low-dimensional landscape of both drop quadratic and bilinear lasso loss over the sphere. 5 | 6 | The code in solvers_1D solves one-dimensional sparse blind deconvolution/convolutional dictionary learning problem. 7 | Run test_algorithms_1D.m for a test and comparison. 8 | 9 | The code in solvers_2D solves two-dimensional sparse blind deconvolution/convolutional dictionary learning problem. 10 | Run test_2D.m for a test and comparison. 11 | 12 | 13 | -------------------------------------------------------------------------------- /geometry/lasso/SolutionLogger.m: -------------------------------------------------------------------------------- 1 | classdef SolutionLogger < handle 2 | % a logger class to record values of all iterates in an iterative algorithm 3 | % 4 | % Properties: 5 | % status: 'U':Unknown, 'O':Optimal, 'L':LineSearchFailure, 'M':MaxIterReached 6 | % algm_name: name of algorithm that generated this logger 7 | % x: final solution 8 | % t: local Lipschitz constant from line search at final solution x 9 | 10 | % fx_all : values of f(x) at all iterates 11 | % Rx_all : values of R(x) at all iterates 12 | % Fx_all : values of f(x)+lambda*Psi(x) at all iterates 13 | % ts_all : values of local Lipschitz inverse t = 1/L constants at all iterates 14 | % NNZ_all: numbers of non-zeros (||x||_0) at all iterates 15 | % Ts_all : record of computational time at all iterates 16 | 17 | % rcv_all: recovery errors if oroginal signal x_gen is provided 18 | % res_all: optimality residues at all iterates 19 | % idx_all: vector of indices for plotting 20 | % nAx_all: numbers of matrix-vector multiplications 21 | 22 | % lambdas: vector of regularization parameters along homotopy path 23 | % n_iters: vector of number of iterations for each lambda on homotopy path 24 | properties 25 | verbose = false; 26 | x_gen = []; 27 | status = 'U'; 28 | algm_name 29 | x 30 | t 31 | Ts 32 | mu 33 | 34 | s 35 | y 36 | rho 37 | stack_flag 38 | 39 | fx_all = []; 40 | Rx_all = []; 41 | Fx_all = []; 42 | ts_all = []; 43 | NNZ_all = []; 44 | Ts_all = []; 45 | nAx_all = []; 46 | 47 | rcv_all = []; 48 | res_all = []; 49 | idx_all = []; 50 | mu_all = []; 51 | 52 | % for homotopy algorithms 53 | lambdas = []; 54 | n_iters = []; 55 | 56 | end 57 | 58 | methods 59 | 60 | function logger = SolutionLogger(algm_name, x0, x_gen) 61 | % construct the logger 62 | logger.algm_name = algm_name; 63 | logger.x = x0; 64 | logger.x_gen = x_gen; 65 | end 66 | 67 | function assign_name( logger, name ) 68 | logger.algm_name = name; 69 | end 70 | 71 | function record(logger, k, f, x, fx, Rx, Fx, t, Ts, residue, mu) 72 | logger.fx_all(k) = fx; 73 | logger.Rx_all(k) = Rx; 74 | logger.Fx_all(k) = Fx; 75 | logger.ts_all(k) = t; 76 | logger.Ts_all(k) = Ts; 77 | 78 | logger.NNZ_all(k) = sum( abs(x) >= 1e-12 ); 79 | logger.nAx_all(k) = f.total_mvCount(); 80 | logger.idx_all(k) = k - 1; 81 | 82 | logger.res_all(k) = residue; 83 | logger.rcv_all(k) = norm(x-logger.x_gen); % for lasso 84 | 85 | logger.mu_all(k) = mu; 86 | end 87 | 88 | % update_solution: in the homotopy method 89 | function update_solution(logger, x, t, Ts, lambda, k, mu) 90 | logger.x = x; 91 | logger.t = t; 92 | logger.Ts = Ts; 93 | logger.mu = mu; 94 | logger.lambdas = [logger.lambdas lambda]; 95 | logger.n_iters = [logger.n_iters k-1]; 96 | end 97 | function update_memory(logger, y, s, rho, stack_flag) 98 | logger.y = y; 99 | logger.s = s; 100 | logger.rho = rho; 101 | logger.stack_flag = stack_flag; 102 | 103 | end 104 | 105 | %concatenate: concatenate two loggers in the homotopy method 106 | function concatenate(logger, nextlog) 107 | 108 | logger.status = nextlog.status; 109 | logger.x = nextlog.x; 110 | logger.t = nextlog.t; 111 | 112 | logger.fx_all = [logger.fx_all nextlog.fx_all]; 113 | logger.Rx_all = [logger.Rx_all nextlog.Rx_all]; 114 | logger.Fx_all = [logger.Fx_all nextlog.Fx_all]; 115 | logger.ts_all = [logger.ts_all nextlog.ts_all]; 116 | logger.Ts_all = [logger.Ts_all nextlog.Ts_all]; 117 | logger.mu_all = [logger.mu_all nextlog.mu_all]; 118 | 119 | logger.NNZ_all = [logger.NNZ_all nextlog.NNZ_all]; 120 | logger.rcv_all = [logger.rcv_all nextlog.rcv_all]; 121 | logger.res_all = [logger.res_all nextlog.res_all]; 122 | logger.nAx_all = [logger.nAx_all nextlog.nAx_all]; 123 | 124 | logger.lambdas = [logger.lambdas nextlog.lambdas]; 125 | logger.n_iters = [logger.n_iters nextlog.n_iters]; 126 | 127 | % needs to check 128 | idx_length = length(logger.idx_all); 129 | if idx_length == 0 130 | idx_shift = 0; 131 | else 132 | idx_shift = logger.idx_all(idx_length); 133 | end 134 | logger.idx_all = [logger.idx_all idx_shift+nextlog.idx_all]; 135 | end 136 | end 137 | end 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /geometry/lasso/algm_Nesterov1st.m: -------------------------------------------------------------------------------- 1 | function logger = algm_Nesterov1stA(f, R, x0, opts) 2 | % Nesterov's 1st method for composite convex optimization 3 | % minimize { F(x) = f(x) + Psi(x) } 4 | % Inputs: 5 | % f: a function object that implements [fval, grad] = f.oracle(x) 6 | % Psi: a function object that implements proximal mapping 7 | % x0: the starting point for the iterative algorithm 8 | % opts: a struct of options and parameters for the algorithm 9 | % Outputs: 10 | % x: the final solution 11 | % fs: a vector recording history of function values at each iteration 12 | % ts: a vector recording history of step sizes used at each iteration 13 | 14 | % first we need to check the options, use default ones if not provided 15 | if nargin < 3; opts = []; end 16 | opts = set_options( opts ); 17 | opts.bt_init = 'adaptive'; 18 | logger = SolutionLogger('Nesterov1stA', x0, opts.x_gen); 19 | 20 | % choose to use mu provided by function or an user-specified value in opts 21 | mu = f.strong_convex_parameter(); 22 | %mu = 0.1; 23 | 24 | % initialize alpha (this is NOT the alpha for line search) 25 | alpha = (sqrt(5)-1)/2; 26 | 27 | % initialize x, y and step size 28 | x = x0; 29 | y = x0; 30 | t = opts.t_fixed; 31 | Ts = 0; 32 | % main loop of accelerated proximal gradient method 33 | for k = 1:opts.maxitrs 34 | tstart = tic; 35 | % first query the oracle 36 | fx = f.oracle(x); 37 | Rx = R.oracle(x); 38 | Fx = fx + Rx; 39 | 40 | [fy, gy] = f.oracle(y); 41 | 42 | % line search 43 | switch lower( opts.linesearch ) 44 | case 'fixed' 45 | t = opts.t_fixed; 46 | % apply proximal mapping of Psi and compute gradient mapping 47 | x1 = R.prox_mapping(y - t*gy, t); 48 | % compute gradient mapping 49 | Gy = (y - x1)/t; 50 | case 'bt' 51 | [t, x1, Gy] = backtracking(f, R, y, fy, gy, t, opts); 52 | otherwise 53 | error('line search method not implemented'); 54 | end 55 | 56 | 57 | residue = norm(Gy,inf); 58 | % stopping criterion: stop if norm of gradient mapping is small 59 | if norm(Gy,'fro') < opts.tol || residue < opts.tol 60 | break; 61 | end 62 | 63 | % find the next alpha 64 | q = mu*t; 65 | 66 | alpha2_q = alpha^2-q; 67 | alpha1 = (sqrt((alpha2_q)^2+4*alpha^2)-alpha2_q)/2.0; 68 | 69 | % update y 70 | beta = alpha*(1-alpha)/(alpha^2+alpha1); 71 | y = x1 + beta*(x1 - x); 72 | Ts1 = Ts + toc(tstart); 73 | % record history 74 | if opts.recording == true 75 | logger.record(k, f, x, fx, Rx, Fx, t, Ts, residue, 0); 76 | end 77 | if opts.isprint 78 | fprintf('Iter = %d, func_val = %d \n', k, Fx); 79 | end 80 | % update alpha and x for next iteration 81 | alpha = alpha1; 82 | x = x1; 83 | Ts = Ts1; 84 | end 85 | 86 | logger.update_solution(x, t, Ts, R.lambda, k, 0); 87 | end 88 | -------------------------------------------------------------------------------- /geometry/lasso/backtracking.m: -------------------------------------------------------------------------------- 1 | function [t, x1, Gx] = backtracking(f, Psi, x, fx, gx, t_pre, opts) 2 | % line search to find t>0 such that x1 = prox_Psi(x-t*gx,t) satisfies 3 | % f(x1) <= f(x) + gx'*(x1-x) + (1/2*t)*||x1 - x||^2 (see page 7-18) 4 | % Inputs: 5 | % f: function object that implements method oracle(x) 6 | % Psi:simple function that implements prox_mapping(z, t) 7 | % x: the current point 8 | % fx: value of f at x 9 | % gx: gradient at x 10 | % t_pre: previous stepsize used 11 | % opts: algorithmic options (see set_options.m) 12 | % Outputs: 13 | % t: step size choosen by line search 14 | % x1: prox_mapping of x with step size t 15 | % Gx: the gradient mapping at x 16 | 17 | % choose initial stepsize for backtracking line search 18 | switch lower( opts.bt_init ) 19 | case 't_fixed' 20 | t = opts.t_fixed; 21 | case 'previous' 22 | t = t_pre; 23 | case 'adaptive' 24 | t = min(t_pre*opts.ls_gamma, opts.ls_maxstep); 25 | otherwise 26 | error('Unknown initialization for backtracking line search'); 27 | end 28 | 29 | % line search loop 30 | x1 = Psi.prox_mapping(x - t*gx, t); 31 | % use inner_product and Frobenius norm that work for both vector and matrix 32 | while f.oracle(x1) > fx + inner_product(gx, x1-x) + 0.5/t*norm(x1-x,'fro')^2 33 | t = t*opts.ls_beta; 34 | x1 = Psi.prox_mapping(x - t*gx, t); 35 | end 36 | % compute gradient mapping at x (not at x1) 37 | Gx = (x - x1)/t; 38 | 39 | end % end of function backtracking() 40 | -------------------------------------------------------------------------------- /geometry/lasso/func_conv.m: -------------------------------------------------------------------------------- 1 | classdef func_conv < func_smooth 2 | % objective function: quadratic function 3 | % f(x) = 1/2 * || y - A*x ||_2^2 4 | properties 5 | a % an n by 1 real matrix 6 | y % an m real vector 7 | mvCount % Counter for number of matrix-vector multiplications 8 | end 9 | 10 | methods 11 | function f = func_conv(a, y) 12 | % constructor for quadratic function 13 | f.a = a; 14 | f.y = y; 15 | f.mvCount = 0; 16 | end 17 | 18 | function [fval, grad] = oracle(f, x) 19 | % 0 and 1st order oracle (depending on nargout) 20 | 21 | % compute function value 22 | % fval = 1/2 * || y - A*x ||_2^2 23 | z = cconv(f.a, x, length(f.y)); 24 | fval = 1/2 * norm( f.y - z )^2 ; 25 | f.mvCount = f.mvCount + 1; 26 | if nargout <= 1; return; end 27 | 28 | % compute gradient vector 29 | grad = cconv(reversal(f.a, length(f.y)), z - f.y, length(f.y)); 30 | f.mvCount = f.mvCount + 1; 31 | 32 | end 33 | 34 | % function hess = Hess(f, u) 35 | % hess = f.A*u; 36 | % hess = f.A'*hess; 37 | % f.mvCount = f.mvCount + 2; 38 | % end 39 | 40 | function count = total_mvCount(f) 41 | count = f.mvCount; 42 | end 43 | 44 | function mu = strong_convex_parameter(f) 45 | % return a lower bound on strong convexity parameter 46 | mu = 0; 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /geometry/lasso/func_l1.m: -------------------------------------------------------------------------------- 1 | classdef func_l1 < func_simple 2 | % the weighted l1 norm: Psi(x) = lambda*||x||_1 3 | properties 4 | lambda % weight for l1 regularization 5 | mu % strong convexity parameter 6 | end 7 | 8 | methods 9 | function Psi = func_l1(lambda) 10 | % construct the weighted l1 norm function 11 | Psi.lambda = lambda; 12 | Psi.mu = 0; 13 | end 14 | 15 | function [fval, subg] = oracle(Psi, x) 16 | % Return function value Psi(x) 17 | fval = Psi.lambda * norm(x,1); 18 | if nargout <= 1; return; end; 19 | 20 | % compute a subgradient 21 | subg = Psi.lambda*sign(x); 22 | end 23 | 24 | function u = prox_mapping(Psi, z, t) 25 | % Return: argmin_u { (1/2)||u-z||_2^2 + t*lambda*||u||_1 } 26 | % same as argmin_u { (1/2*t)||u-z||_2^2 + lambda*||u||_1 } 27 | % try a simple one first 28 | u = max(abs(z) - t*Psi.lambda, 0); 29 | u = u.*sign(z); 30 | % This following is a more efficient implementation, 31 | % which handles matrices and complex numbers as well. 32 | % u = max(abs(z) - t*Psi.lambda, 0); 33 | % u = u./(u+t*Psi.lambda).*z; 34 | end 35 | 36 | function mu = strong_convex_parameter(R) 37 | % Return (strong) convexity parameter 38 | mu = R.mu; 39 | end 40 | end 41 | end 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /geometry/lasso/func_simple.m: -------------------------------------------------------------------------------- 1 | classdef func_simple < handle 2 | % Define interface of simple closed convex function Psi(x) 3 | methods (Abstract) 4 | [fval, subgrad] = oracle(Psi, x) 5 | % Return function value Psi(x), and a subgradient 6 | 7 | u = prox_mapping(Psi, z, t) 8 | % Return: argmin_u { (1/2)||u-z||_2^2 + t*Psi(u) } 9 | % same as argmin_u { (1/2*t)||u-z||_2^2 + Psi(u) } 10 | 11 | mu = strong_convex_parameter(Psi) 12 | % Return (strong) convexity parameter 13 | end 14 | end -------------------------------------------------------------------------------- /geometry/lasso/func_smooth.m: -------------------------------------------------------------------------------- 1 | classdef func_smooth < handle 2 | % An abstract class that defines interface for a differentiable function f(x). 3 | methods (Abstract) 4 | [fval, grad] = oracle(f, x); 5 | % 0 and 1st order oracle (depending on nargout) 6 | 7 | mu = strong_convex_parameter(f); 8 | % return strong convexity parameter or a lower bound. 9 | end 10 | end 11 | 12 | -------------------------------------------------------------------------------- /geometry/lasso/homotopy.m: -------------------------------------------------------------------------------- 1 | function homo_logger = homotopy(algm, f, R, x0, opts) 2 | % homo_logger = homotopy(algm, f, R, lambda0, lambda_tgt, x0, L0, opts) 3 | % 4 | % homotopy method for minimizing composite objective function 5 | % minimize_x f(x) + lambda * Psi(x) 6 | % where f(x) is a convex differentiable function 7 | % Psi(x) is a convex regularization function 8 | % lambda is a regularization parameter 9 | % Reference: Yu. Nesterov, "Gradient methods for minimizing composite 10 | % objective function," CORE discussion paper 2007/76. 11 | % Inputs: 12 | % algm: function handle for algorithm used to solve each homotopy stage 13 | % f: an object of a subclass of LossFunction (see LossFunction.m) 14 | % Psi: an object of a subclass of Regularizer (see Regularizer.m) 15 | % opts.lambda0: the starting regularization parameter 16 | % opts.lambda_tgt: the target regularization parameter 17 | % x0: initial point for the iterative algorithm 18 | % opts.t_fixed: initial estimate of local Lipschitz constant 19 | % opts: algorithmic options, see details in set_options.m 20 | % Output: 21 | % logger: an object of the SolutionLogger class (see SolutionLogger.m) 22 | 23 | if nargin <= 5; 24 | opts = set_options(opts); 25 | end 26 | 27 | % t = opts.t_fixed; 28 | Ts = 0; 29 | % create logger to store solution history 30 | homo_logger = SolutionLogger('homotopy', x0, opts.x_gen); 31 | 32 | 33 | 34 | % initialization 35 | lambda = opts.lambda_0; 36 | x = x0; 37 | opts.is_homotopy = 0; 38 | homo_opts = opts; 39 | homo_opts.maxitrs = opts.homo_maxitrs; 40 | 41 | % calculate number of regularization parameters for continuation 42 | N_stages = floor( log(opts.lambda_0/opts.lambda_tgt) / log(1.0/opts.homo_eta ) ); 43 | 44 | for k = 1:N_stages 45 | 46 | lambda = opts.homo_eta * lambda; % shrink the lasso penalty parameter lambda 47 | homo_opts.tol = opts.homo_delta * lambda; % shrink the tol for the solution 48 | 49 | R.lambda = lambda; 50 | % solving for each intermediate stage 51 | logger = algm(f, R, x, homo_opts); 52 | logger.Ts_all = logger.Ts_all + Ts; 53 | if opts.recording == true 54 | homo_logger.concatenate( logger ); 55 | end 56 | 57 | x = logger.x; 58 | Ts = Ts + logger.Ts; 59 | homo_opts.t_fixed = logger.t; 60 | homo_opts.is_homotopy = 1; 61 | homo_opts.logger = logger; 62 | 63 | end 64 | 65 | % solving the final stage to precision tol 66 | R.lambda = opts.lambda_tgt; 67 | logger = algm(f, R, x, opts); 68 | logger.Ts_all = logger.Ts_all + Ts; 69 | 70 | if opts.recording == true 71 | homo_logger.concatenate( logger ); 72 | end 73 | 74 | homo_logger.assign_name( strcat(logger.algm_name, 'H') ); 75 | 76 | end % function homotopy 77 | 78 | -------------------------------------------------------------------------------- /geometry/lasso/inner_product.m: -------------------------------------------------------------------------------- 1 | % inner product function that works for both vector and matrix 2 | function z = inner_product(x, y) 3 | z = sum( sum( x.*y ) ); 4 | end -------------------------------------------------------------------------------- /geometry/lasso/linesearch.m: -------------------------------------------------------------------------------- 1 | function [x1, mu] = linesearch(f, Psi, x, p, Gx, t, mu, opts) 2 | % Given F(x) = f(x) + Psi(x), line search to find a stepsize alpha 3 | % satisfies the Armijo condition 4 | % F(x + alpha * p) <= F(x) + eta * alpha * Gx * p 5 | % Inputs: 6 | % f: function object that implements method oracle(x) 7 | % Psi:simple function that implements prox_mapping(z, t) 8 | % x: the current point 9 | % fx: value of f at x 10 | % gx: gradient at x 11 | % t_pre: previous stepsize used 12 | % opts: algorithmic options (see set_options.m) 13 | % Outputs: 14 | % alpha: step size choosen by line search 15 | % x1: prox_mapping of x with step size t 16 | % Gx: the gradient mapping at x 17 | 18 | % mu = min(opts.ls_mu_const * mu, opts.ls_mu_max); 19 | switch opts.ls_mu_cond 20 | case 'previous' 21 | mu = mu; 22 | case 'adaptive' 23 | mu = min(opts.ls_mu_const * mu, opts.ls_mu_max); 24 | case 'fixed' 25 | mu = opts.ls_mu; 26 | end 27 | 28 | F = @(z) f.oracle(z) + Psi.oracle(z); 29 | 30 | Fx = F(x); 31 | Gp = trace( Gx' * p) ; 32 | 33 | x1 = x + mu * p; 34 | count = 1; 35 | switch opts.ls_cond 36 | case 'armijo' 37 | while( F(x1) >= Fx + opts.ls_eta * mu * Gp) 38 | mu = mu * opts.ls_beta; 39 | x1 = x + mu * p; 40 | count = count + 1; 41 | if count>=15 42 | break; 43 | end 44 | end 45 | 46 | case 'wolfe' 47 | [fx, gx] = f.oracle(x1); 48 | [t, ~, Gx_1] = backtracking(f, Psi, x1, fx, gx, t, opts); 49 | Gp_1 = inner_product(Gx_1,p); 50 | while( F(x1) >= Fx + opts.ls_wolfe_c1 * mu * Gp ... 51 | || abs( Gp_1 ) >= - opts.ls_wolfe_c2 * Gp ) 52 | mu = mu * opts.ls_beta; 53 | x1 = x + mu * p; 54 | [fx, gx] = f.oracle(x1); 55 | [t, ~, Gx_1] = backtracking(f, Psi, x1, fx, gx, t, opts); 56 | Gp_1 = inner_product(Gx_1, p); 57 | count = count + 1; 58 | if count>= opts.ls_maxstep 59 | break; 60 | end 61 | end 62 | end 63 | 64 | end -------------------------------------------------------------------------------- /geometry/lasso/prox_grad.m: -------------------------------------------------------------------------------- 1 | function [x1, Gx, Grad] = prox_grad(f, Psi, x, gx, t) 2 | 3 | x1 = Psi.prox_mapping(x - t*gx, t); 4 | Gx = (x - x1)/t; 5 | 6 | if nargout <= 2; return; end 7 | Grad = Gx - t * f.Hess(Gx) ; 8 | 9 | end -------------------------------------------------------------------------------- /geometry/lasso/reversal.m: -------------------------------------------------------------------------------- 1 | function [ revX ] = reversal( X, m ) 2 | if nargin > 1 3 | X = [X(1:min(size(X,1), m), :) ; zeros(max(m - size(X,1), 0), size(X,2))]; 4 | end 5 | 6 | revX = [X(1,:) ; flipud(X(2:end,:))]; 7 | end -------------------------------------------------------------------------------- /geometry/lasso/set_options.m: -------------------------------------------------------------------------------- 1 | function opts = set_options( opts ) 2 | % Check and set algorithmic options 3 | % 4 | % Field Default values 5 | %------------------------------ 6 | % .epsilon 1.0e-4 stopping precision for norm of gradient 7 | % .maxitrs 100 maximum number of iterations allowed 8 | % .linesearch 'fixed' line search scheme: {'fixed', 'bt'} 9 | % .t_fixed 1.0 value for fixed step size 10 | % .ls_alpha 0.5 backtracking (bt) line search parameter alpha 11 | % .ls_beta 0.5 backtracking (bt) line search parameter beta 12 | % .ls_gamma 2.0 adaptive bt line search parameter gamma 13 | % .ls_maxstep 1.0e4 maximum step size for adaptive line search 14 | % .bt_init 't_fixed' how to initialize backtracking line search: 15 | % {'t_fixed', 'previous', 'adaptive'} 16 | % .subg_stepsize 't_sqrt' stepsize rule for subgradient method: 17 | % {'t_const', 't_harmonic', 't_sqrt', 18 | % 's_const', 's_harmonic', 's_sqrt' } 19 | %------------------------------ 20 | 21 | 22 | if isfield(opts, 'tol') 23 | if opts.tol <= 0 24 | error('opts.tol should be a small positive number'); 25 | end 26 | else 27 | opts.tol = 1.0e-4; 28 | end 29 | 30 | if isfield(opts, 'maxitrs') 31 | if opts.maxitrs <= 0 32 | error('opts.maxitrs should be a positive integer'); 33 | end 34 | else 35 | opts.max_iters = 1e4; 36 | end 37 | 38 | 39 | %% parameters for backtracking linesearch of Lipschitz constant 40 | if ~isfield(opts, 'linesearch') 41 | opts.linesearch = 'fixed'; 42 | end 43 | 44 | if isfield(opts, 't_fixed') 45 | if opts.t_fixed <=0 46 | error('opts.t_fixed should be a positive number'); 47 | end 48 | else 49 | opts.t_fixed = 1.0; 50 | end 51 | 52 | if ~isfield(opts, 'bt_init') 53 | opts.bt_init = 't_fixed'; 54 | end 55 | 56 | %% parameters for stepsize linsearch 57 | if ~isfield(opts, 'ls_cond') 58 | opts.ls_cond = 'armijo'; 59 | end 60 | 61 | if ~isfield(opts, 'ls_mu') 62 | opts.ls_mu = 1; 63 | end 64 | 65 | if ~isfield(opts, 'ls_mu_max') 66 | opts.ls_mu_max = 10; 67 | end 68 | 69 | if isfield(opts, 'ls_alpha') 70 | if opts.ls_alpha <=0 || opts.ls_alpha > 0.51 71 | error('opts.ls_alpha should be in the interval (0,0.5]'); 72 | end 73 | else 74 | opts.ls_alpha = 0.5; 75 | end 76 | 77 | if isfield(opts, 'ls_beta') 78 | if opts.ls_beta <=0 || opts.ls_beta >=1 79 | error('opts.ls_beta should be in the interval (0,1)'); 80 | end 81 | else 82 | opts.ls_beta = 0.5; 83 | end 84 | 85 | if isfield(opts, 'ls_gamma') 86 | if opts.ls_gamma < 1 87 | error('opts.ls_gamma should be no smaller than 1'); 88 | end 89 | else 90 | opts.ls_gamma = 2.0; 91 | end 92 | 93 | if isfield(opts, 'ls_maxstep') 94 | if opts.ls_maxstep < opts.t_fixed 95 | error('opts.ls_maxstep should be no smaller than opts.t_fixed'); 96 | end 97 | else 98 | opts.ls_maxstep = 1.0e4; 99 | end 100 | 101 | if isfield(opts, 'ls_eta') 102 | if opts.ls_alpha <=0 || opts.ls_alpha > 1 103 | error('opts.ls_eta should be in the interval (0,1]'); 104 | end 105 | else 106 | opts.ls_eta = 0.5; 107 | % opts.ls_eta = 1e-4; 108 | end 109 | 110 | if ~isfield(opts, 'opts.ls_wolfe_c1') 111 | opts.ls_wolfe_c1 = 0.3; 112 | % opts.ls_wolfe_c1 = 1e-4; 113 | end 114 | 115 | if ~isfield(opts, 'opts.ls_wolfe_c2') 116 | opts.ls_wolfe_c2 = 0.4; 117 | % opts.ls_wolfe_c2 = 0.9; 118 | end 119 | 120 | 121 | %% miscellous 122 | if ~isfield(opts, 'subg_stepsize') 123 | opts.subg_stepsize = 't_sqrt'; 124 | end 125 | 126 | if ~isfield(opts, 'isprint') 127 | opts.isprint = false; 128 | end 129 | 130 | if ~isfield(opts, 'recording') 131 | opts.recording = true; 132 | end 133 | 134 | if ~isfield(opts, 'opts.proxlbfgs_m') 135 | opts.proxlbfgs_m = 5; 136 | end 137 | 138 | 139 | %% parameters for homotopy method 140 | if isfield(opts, 'homo_delta') 141 | if opts.homo_delta >= 1 || opts.homo_delta <= 0 142 | error('opts.delta should be in the interval (0,1)'); 143 | end 144 | else 145 | opts.homo_delta = 0.2; 146 | end 147 | 148 | if isfield(opts, 'homo_eta') 149 | if opts.homo_eta >= 1 || opts.homo_eta <= 0 150 | error('opts.eta should be in the interval (0,1)'); 151 | end 152 | else 153 | opts.homo_eta = 0.7; 154 | end 155 | -------------------------------------------------------------------------------- /geometry/lasso/soft_thresholding.m: -------------------------------------------------------------------------------- 1 | function s = soft_thresholding(x,lambda) 2 | 3 | s = sign(x) .* max( abs(x) - lambda, 0 ); -------------------------------------------------------------------------------- /geometry/plot_geometry_DQ.m: -------------------------------------------------------------------------------- 1 | clc;close all;clear all; 2 | 3 | % Plot the low dimensional function landscape of DQ in the 4 | % paper: 5 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 6 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 7 | % (* denote equal contribution) 8 | % We consider the short-and-sparse blind deconvolution problem 9 | % y = a0 conv x0, with both a0 and x0 unknown 10 | % The drop quadratic formulation: 11 | % F(a,x) = 0.5 || y ||^2 - < a conv x,y > + 0.5 || x ||_2^2 + lambda * ||x||_1 12 | % F(a) = min_x F(a,x) = 0.5 || y ||^2 - 0.5 || S_lambda( reversal(y) conv a ) ||_2^2 13 | % We plot F(a) over a submanifold 14 | % M = span(a0,a1,a2) cap S^(n-1) 15 | % a1 and a2 are shifts of a0 16 | % Code written by Qing Qu 17 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 18 | 19 | %% setting parameters and generate the ground truth 20 | n = 500; % length of kernel a 21 | m = 5e4; % number of samples 22 | theta_n = (3/4); 23 | theta = n^(-theta_n); % sparsity 24 | lambda = 0.3; % penalty parameter 25 | isprint = true; % print intermediate result 26 | 27 | a0 = zeros(n,1); 28 | a0(1:(n-2)) = randn(n-2,1); % leave the last two entries 0 for shifts a_1, a_2 29 | a0 = a0 / norm(a0); % normalization 30 | 31 | x0 = (rand(m,1) < theta) .* randn(m,1); % generate x_0 from Bernoulli-Gaussian 32 | y = cconv(a0, x0, m); 33 | 34 | % generate orthogonal basis vectors for a subspace spanned by {a_0, a_1, 35 | % a_2} 36 | a1 = circshift(a0,1); 37 | a2 = circshift(a0,2); 38 | 39 | u3 = a0 + a1 + a2; 40 | u3 = u3 / norm(u3); 41 | 42 | u2 = a1 - u3 * (u3'*a1); 43 | u2 = u2 / norm(u2); 44 | 45 | u1 = a0 - u3 * (u3'*a0) - u2 * (u2'*a0); 46 | u1 = u1 / norm(u1); 47 | 48 | 49 | 50 | %% evaluate the function value over the sphere 51 | 52 | % generate spherical coordinate 53 | R = [0:.01:.75, .75:.005:.95, .95:.0005:.99, .99:.0001:1]; 54 | T = 0:.005:(2*pi+.05); 55 | 56 | rm = max(R); 57 | 58 | X = R' * cos(T); 59 | Y = R' * sin(T); 60 | Z = sqrt(max(1 - X.^2 - Y.^2,0)); 61 | 62 | X = [X; X]; 63 | Y = [Y; Y]; 64 | Z = [Z; -Z]; 65 | 66 | F_val = zeros(size(Z)); 67 | 68 | % record function value 69 | [x_1, x_2] = size(X); 70 | 71 | for i = 1 : x_1 72 | for j = 1 : x_2 73 | 74 | % print itermediate steps 75 | if(isprint == true) 76 | fprintf('L_x1 = %d, x1 = %d, L_x2 = %d, x2 = %d...\n',... 77 | x_1, i, x_2, j); 78 | end 79 | 80 | a = X(i,j) * u1 + Y(i,j) * u2 + Z(i,j) * u3; 81 | f = -.5 * norm(soft_thresholding(cconv(reversal(y,m),a,m),lambda))^2; 82 | F_val(i,j) = f; 83 | 84 | end 85 | end 86 | 87 | % normalize the function value 88 | F_min = min(F_val(:)); 89 | F_val = F_val - F_min; 90 | F_val = F_val / max(F_val(:)); 91 | 92 | %% plot the landscape over 3D sphere 93 | 94 | figure(1); 95 | 96 | r = 1.005; 97 | Marker = 15; 98 | 99 | hold on; 100 | surf(X,Y,Z,F_val,'EdgeAlpha',0); 101 | axis off; axis equal; 102 | 103 | plot3(r*u1'*a0,r*u2'*a0,r*u3'*a0,'r.','MarkerSize',Marker); 104 | plot3(r*u1'*a1,r*u2'*a1,r*u3'*a1,'r.','MarkerSize',Marker); 105 | plot3(r*u1'*a2,r*u2'*a2,r*u3'*a2,'r.','MarkerSize',Marker); 106 | plot3(-r*u1'*a0,-r*u2'*a0,-r*u3'*a0,'r.','MarkerSize',Marker); 107 | plot3(-r*u1'*a1,-r*u2'*a1,-r*u3'*a1,'r.','MarkerSize',Marker); 108 | plot3(-r*u1'*a2,-r*u2'*a2,-r*u3'*a2,'r.','MarkerSize',Marker); 109 | 110 | % save the data 111 | file_name = ['DQ_incoherent_lambda=',num2str(lambda),'_theta=n^(',num2str(-theta_n),')']; 112 | save(file_name,'n','m','theta','lambda','R','T','F_val','a0','a1','a2','X','Y','Z'); 113 | 114 | 115 | -------------------------------------------------------------------------------- /geometry/plot_geometry_lasso.m: -------------------------------------------------------------------------------- 1 | % Plot the low dimensional function landscape of bilinear lasso in the 2 | % paper: 3 | % ``Short-and-Sparse Deconvolution ? A Geometric Approach'' 4 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 5 | % (* denote equal contribution) 6 | % We consider the short-and-sparse blind deconvolution problem 7 | % y = a0 conv x0, with both a0 and x0 unknown, 8 | % We consider both incoherent and coherent kernels. 9 | % The bilinear formulation: 10 | % F(a,x) = 0.5 ||y - a conv x||_2^2 + lambda * ||x||_1 11 | % F(a) = min_x F(a,x) 12 | % We plot F(a) over a submanifold 13 | % M = span(a0,a1,a2) cap S^(n-1) 14 | % a1 and a2 are shifts of a0 15 | 16 | % Code written by Qing Qu 17 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 18 | 19 | clc;close all;clear all; 20 | addpath(genpath(pwd)); 21 | 22 | %% setting parameters 23 | 24 | % parameters for the problem 25 | n = 50; % length of kernel a 26 | m = 5e3; % number of samples 27 | theta_n = (3/4); 28 | theta = n^(-theta_n); % sparsity 29 | lambda = 0.3; % penalty parameter 30 | isprint = true; % print intermediate result 31 | 32 | kernel_type = 'gaussian'; % choose randn for incoherent kernel, and gaussian 33 | % for a smooth coherent gaussian kernel 34 | 35 | % generate the data; 36 | a0 = zeros(n,1); 37 | switch lower(kernel_type) 38 | case 'randn' 39 | a0(1:(n-2)) = randn(n-2,1); % leave the last two entries 0 for shifts a_1, a_2 40 | a0 = a0 / norm(a0); % normalization 41 | a1 = circshift(a0,1); 42 | a2 = circshift(a0,2); 43 | case 'gaussian' 44 | n_0 = n - 10; 45 | t = [-2:4/(n_0-1):2]'; 46 | a0(1:n_0) = exp( -t.^2); 47 | a1 = circshift(a0,5); 48 | a2 = circshift(a0,10); 49 | end 50 | 51 | x0 = (rand(m,1) < theta) .* randn(m,1); % generate x_0 from Bernoulli-Gaussian 52 | y = cconv(a0, x0, m); 53 | 54 | % parameters for the lasso solver 55 | lasso_opts.lambda = lambda; 56 | lasso_opts.t_fixed = 1; 57 | lasso_opts.linesearch = 'bt'; 58 | lasso_opts.bt_init = 'adaptive'; 59 | lasso_opts.tol = 1e-6; 60 | lasso_opts.maxitrs = 5e3; 61 | lasso_opts.homo_maxitrs = 5e2; 62 | lasso_opts.x_gen = zeros(m,1); % initialization for x 63 | 64 | 65 | %% evaluate the function value over the sphere 66 | 67 | % generate orthogonal basis vectors for a subspace spanned by {a_0, a_1, 68 | % a_2} 69 | 70 | u3 = a0 + a1 + a2; 71 | u3 = u3 / norm(u3); 72 | 73 | u2 = a1 - u3 * (u3'*a1); 74 | u2 = u2 / norm(u2); 75 | 76 | u1 = a0 - u3 * (u3'*a0) - u2 * (u2'*a0); 77 | u1 = u1 / norm(u1); 78 | 79 | % generate spherical coordinate 80 | % R = [0:.1:1]; 81 | % T = 0:.1:(2*pi+.05); 82 | % R = [0:.1:.75, .75:.05:.95, .95:.05:.99, .99:.01:1]; 83 | % T = 0:.1:(2*pi+.2); 84 | 85 | R = [0:.01:.75, .75:.005:.95, .95:.0005:.99, .99:.0001:1]; 86 | T = 0:.005:(2*pi+.05); 87 | 88 | rm = max(R); 89 | 90 | X = R' * cos(T); 91 | Y = R' * sin(T); 92 | Z = sqrt(max(1 - X.^2 - Y.^2,0)); 93 | 94 | X = [X; X]; 95 | Y = [Y; Y]; 96 | Z = [Z; -Z]; 97 | 98 | F_val = zeros(size(Z)); 99 | 100 | % record function value 101 | [x_1, x_2] = size(X); 102 | 103 | for i = 1 : x_1 104 | for j = 1 : x_2 105 | 106 | % print itermediate steps 107 | if(isprint == true) 108 | fprintf('L_x1 = %d, x1 = %d, L_x2 = %d, x2 = %d...\n',... 109 | x_1, i, x_2, j); 110 | end 111 | 112 | a = X(i,j) * u1 + Y(i,j) * u2 + Z(i,j) * u3; 113 | 114 | f = func_conv(a, y); % data fidelity term for lasso 115 | g = func_l1(lasso_opts.lambda); % l1 penalty 116 | 117 | lasso_opts.lambda_0 = norm(cconv(reversal(y),a,m),'inf'); 118 | lasso_opts.lambda_tgt = lambda; 119 | x_init = zeros(m,1); 120 | % Logger = algm_Nesterov1st(f, g, x_init, lasso_opts); % solving x by using FISTA 121 | Logger = homotopy(@algm_Nesterov1st, f, g, x_init, lasso_opts); 122 | x_lasso = Logger.x; 123 | 124 | % record function value 125 | F_val(i,j) = f.oracle(x_lasso) + g.oracle(x_lasso); 126 | 127 | end 128 | end 129 | 130 | % normalize the function value 131 | F_min = min(F_val(:)); 132 | F_val = F_val - F_min; 133 | F_val = F_val / max(F_val(:)); 134 | 135 | 136 | %% plot the landscape over 3D sphere 137 | r = 1.005; 138 | Marker = 15; 139 | 140 | figure(1); 141 | 142 | hold on; 143 | surf(X,Y,Z,F_val,'EdgeAlpha',0); 144 | axis off; axis equal; 145 | 146 | plot3(r*u1'*a0,r*u2'*a0,r*u3'*a0,'r.','MarkerSize',Marker); 147 | plot3(r*u1'*a1,r*u2'*a1,r*u3'*a1,'r.','MarkerSize',Marker); 148 | plot3(r*u1'*a2,r*u2'*a2,r*u3'*a2,'r.','MarkerSize',Marker); 149 | plot3(-r*u1'*a0,-r*u2'*a0,-r*u3'*a0,'r.','MarkerSize',Marker); 150 | plot3(-r*u1'*a1,-r*u2'*a1,-r*u3'*a1,'r.','MarkerSize',Marker); 151 | plot3(-r*u1'*a2,-r*u2'*a2,-r*u3'*a2,'r.','MarkerSize',Marker); 152 | 153 | % save the data 154 | file_name = ['lasso','_lambda=',num2str(lambda),'_theta=n^(',num2str(-theta_n),')']; 155 | save(file_name,'n','m','theta','lambda','R','T','F_val','a0','a1','a2','X','Y','Z'); 156 | 157 | 158 | 159 | -------------------------------------------------------------------------------- /solvers_1D/algorithm/ADM.m: -------------------------------------------------------------------------------- 1 | % Implementation of Alternating Desecent Method (ADM) in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % y = sum_{k=1}^K a0k conv x0k + b * 1 + n 8 | % with both a0k and x0k unknown, b is a constant bias, n is noise 9 | % 10 | % The algorithms solve the following 1D optimization problem 11 | % min F(A,X) = 0.5 * ||y - sum_{k=1}^K ak conv xk||_2^2 + lambda * ||X||_1 12 | % s.t. ||ak|| = 1, k = 1,...,K 13 | % A = [a1,a2,...,aK], X = [x1,x2,...,xK] 14 | % via alternating gradient descent: 15 | % 16 | % 1. Fix A, and take a proximal gradient on X 17 | % 2. Fix X, and take a Riemannian gradient on A 18 | % 19 | % Code written by Qing Qu 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | 22 | function [A, X, b, Psi_val, psi_val, Err_A, Err_X] = ADM(y,opts) 23 | lambda = opts.lambda * opts.W ; % penalty for sparsity 24 | Psi = @(v, u, V, Lambda) 0.5 * norm(v - u)^2 + norm(Lambda .*V,1); % handle function ... 25 | % evalute the function value 26 | 27 | m = length(y); % the number of measurements 28 | [n,K] = size(opts.A_init);% n: the length of kernel, K: number of the kernels 29 | 30 | A = opts.A_init; % initialization for A 31 | X = opts.X_init; % initialization for X 32 | b = opts.b_init; % initialization of the bias 33 | 34 | t = 1; % initialization of the stepsize 35 | 36 | % record the solution path 37 | Psi_val = []; 38 | psi_val = []; 39 | X_track = []; 40 | A_track = []; 41 | Err_A = []; 42 | Err_X = []; 43 | 44 | 45 | % main ADM algorithm 46 | for iter = 1: opts.MaxIter 47 | 48 | %% Given A fixed, take a descent step on X via proximal gradient descent 49 | 50 | y_hat = compute_y(A,X); % compute y_hat = sum_k conv(a_k, x_k, m) 51 | y_b = y - ones(m,1) * b; 52 | 53 | Psi_X = Psi(y_b, y_hat, X, opts.lambda); % evaluate the function value 54 | 55 | fx = 0.5 * norm( y_b - y_hat )^2; 56 | grad_fx = compute_gradient( A, X, y_b, y_hat,0); 57 | 58 | % backtracking for update X and update stepsize t 59 | X_old = X; 60 | [X, t] = backtracking( y_b, A, X, fx, grad_fx, lambda, t, opts); 61 | 62 | %% Given X fixed, take a Riemannian gradient step on A 63 | % take a Riemannian gradient step on A 64 | y_hat = compute_y( A,X); % compute y_hat = sum_k conv(a_k, x_k, m) 65 | 66 | Psi_A = Psi(y_b, y_hat, X, opts.lambda); 67 | 68 | fa = 0.5*norm( y_b - y_hat )^2; 69 | grad_fa = compute_gradient( A, X, y_b, y_hat, 1); 70 | 71 | A_old = A; 72 | [A,tau] = linesearch( y_b, A, X, fa, grad_fa); % line-search for tau 73 | 74 | %% Given A, X fixed, update the bias b 75 | y_hat = compute_y( A, X); % compute y_hat = sum_k conv(a_k, x_k, m) 76 | if(opts.isbias) 77 | b = 1/m * sum( y - y_hat ); 78 | end 79 | 80 | %% update results and check for stopping criteria 81 | 82 | Psi_val = [Psi_val; Psi(y_b, y_hat, X, opts.lambda)]; 83 | psi_val = [psi_val; 0.5*norm(y_b -y_hat)^2 ]; 84 | X_track = [ X_track; X]; 85 | A_track = [ A_track; A]; 86 | 87 | % calculate the distance between the groundtruth and the iterate 88 | if(opts.err_truth) 89 | [err_A, err_X] = compute_error(A, X, opts); 90 | Err_A = [Err_A;err_A]; 91 | Err_X = [Err_X;err_X]; 92 | end 93 | 94 | if(opts.isprint) 95 | fprintf('Running the %d-th simulation, Psi_X = %f, Psi_A = %f...\n',... 96 | iter, Psi_X, Psi_A); 97 | end 98 | 99 | % check stopping criteria 100 | if( norm(A_old -A,'fro') <= opts.tol && norm(X_old -X,'fro') <= opts.tol ) 101 | break; 102 | end 103 | 104 | 105 | end 106 | 107 | end 108 | 109 | 110 | 111 | 112 | -------------------------------------------------------------------------------- /solvers_1D/algorithm/homotopy.m: -------------------------------------------------------------------------------- 1 | % Implementation of homotopy acceleration in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % y = sum_{k=1}^K a0k conv x0k + b * 1 + n 8 | % with both a0k and x0k unknown, b is a constant bias, n is noise 9 | % 10 | % The algorithms solve the following 1D optimization problem 11 | % min F(A,X) = 0.5 * ||y - sum_{k=1}^K ak conv xk||_2^2 + lambda * ||X||_1 12 | % s.t. ||ak|| = 1, k = 1,...,K 13 | % A = [a1,a2,...,aK], X = [x1,x2,...,xK] 14 | % homotopy chooses a sparse solution path by shrinking the lambda: 15 | % The algorithm starts with a large lambda, and for each iteration it solves 16 | % the problem with using a solver (e.g., ADM or iADM). 17 | % It shrink the lambda geoemtrically and repeat until convergence. 18 | % 19 | % Code written by Qing Qu 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | 22 | function [A, X, b, Psi_Val, psi_Val, Err_A, Err_X]= homotopy(y_0, opts) 23 | [n, K] = size(opts.A_init); 24 | m = length(y_0); 25 | Psi_Val = []; 26 | psi_Val = []; 27 | Err_A = []; 28 | Err_X = []; 29 | 30 | homo_opts = opts; 31 | 32 | %% setting parameters 33 | 34 | switch lower( opts.homo_alg ) 35 | case 'adm' 36 | eta = 0.8; 37 | delta = 8e-2; 38 | homo_opts.MaxIter = 2e2; 39 | case 'iadm' 40 | eta = 0.85; 41 | delta = 5e-1; 42 | homo_opts.MaxIter = 1e2; 43 | case 'reweight' 44 | eta = 0.8; 45 | delta = 0.1; 46 | homo_opts.MaxIter = 2e2; 47 | otherwise 48 | error('wrong algorithm'); 49 | end 50 | 51 | 52 | % lambda_0 = 1; % initial lambda 53 | lambda_0 = norm(cconv(reversal(y_0),opts.A_init,m),'inf'); % initial lambda 54 | lambda_tgt = opts.lambda; % target lambda 55 | 56 | homo_opts.lambda = lambda_0; 57 | homo_opts.tol = delta*lambda_0; 58 | 59 | N_stages = floor( log(lambda_0/lambda_tgt) / log(1.0/eta ) ); 60 | lambda = lambda_0; 61 | 62 | %% running the algorithm 63 | for k = 1:N_stages 64 | 65 | switch lower( opts.homo_alg ) 66 | case 'adm' 67 | [A, X, b, Psi_val,psi_val, Err_a, Err_x] = ADM( y_0, homo_opts); 68 | case 'iadm' 69 | [A, X, b, Psi_val,psi_val, Err_a, Err_x] = iADM( y_0, homo_opts); 70 | % opts.count = opts.count + length(f_val); 71 | case 'reweight' 72 | [A, X, b, Psi_val,psi_val,W] = reweighting( y_0, homo_opts); 73 | homo_opts.W = W; 74 | otherwise 75 | error('wrong algorithm'); 76 | end 77 | 78 | % record result 79 | Psi_Val = [Psi_Val;Psi_val]; 80 | psi_Val = [psi_Val;psi_val]; 81 | Err_A = [Err_A;Err_a]; 82 | Err_X = [Err_X;Err_x]; 83 | 84 | % Update the parameters of opts 85 | homo_opts.A_init = A; 86 | homo_opts.X_init = X; 87 | homo_opts.b_init = b; 88 | % homo_opts.count = opts.count + length(f_val); 89 | 90 | lambda = lambda * eta; 91 | tol = delta*lambda; 92 | homo_opts.lambda = lambda; 93 | homo_opts.tol = tol; 94 | 95 | end 96 | 97 | % solving the final stage to precision tol 98 | 99 | homo_opts.lambda = lambda_tgt; 100 | homo_opts.tol = opts.tol; 101 | homo_opts.MaxIter = opts.MaxIter; 102 | 103 | switch lower( opts.homo_alg ) 104 | case 'adm' 105 | [A, X, b, Psi_val,psi_val,Err_a,Err_x] = ADM( y_0, homo_opts); 106 | case 'iadm' 107 | [A, X, b, Psi_val,psi_val,Err_a,Err_x] = iADM( y_0, homo_opts); 108 | case 'reweight' 109 | [A, X, b, Psi_val,psi_val] = reweighting( y_0, homo_opts); 110 | otherwise 111 | error('wrong algorithm'); 112 | end 113 | 114 | Psi_Val = [Psi_Val;Psi_val]; 115 | psi_Val = [psi_Val;psi_val]; 116 | Err_A = [Err_A;Err_a]; 117 | Err_X = [Err_X;Err_x]; 118 | 119 | end -------------------------------------------------------------------------------- /solvers_1D/algorithm/iADM.m: -------------------------------------------------------------------------------- 1 | % Implementation of inertial Alternating Desecent Method (iADM) in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % y = sum_{k=1}^K a0k conv x0k + b * 1 + n 8 | % with both a0k and x0k unknown, b is a constant bias, n is noise 9 | % 10 | % The algorithms solve the following 1D optimization problem 11 | % min F(A,X) = 0.5 * ||y - sum_{k=1}^K ak conv xk||_2^2 + lambda * ||X||_1 12 | % s.t. ||ak|| = 1, k = 1,...,K 13 | % A = [a1,a2,...,aK], X = [x1,x2,...,xK] 14 | % via alternating acclerated gradient descent 15 | % 16 | % 1. Fix A, and take a proximal gradient on X with momentum acceleration 17 | % 2. Fix X, and take a Riemannian gradient on A with momentum acceleration 18 | % 19 | % Code written by Qing Qu 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | 22 | function [A, X, b, Psi_val, psi_val, Err_A, Err_X] = iADM(y,opts) 23 | 24 | lambda = opts.lambda .* opts.W ; % penalty for sparsity 25 | 26 | Psi = @(v, u, V) 0.5 * norm(v - u)^2 + norm( lambda .* V,1); % handle function ... 27 | % evalute the function value 28 | 29 | 30 | m = length(y); % the number of measurements 31 | [n,K] = size(opts.A_init);% n: the length of kernel, K: number of the kernels 32 | 33 | A = opts.A_init; % initialization for A 34 | A_old = opts.A_init; % initialization for A_hat, auxiliary variable for acceleration 35 | X = opts.X_init; % initialization for X 36 | X_old = opts.X_init; % initialization for X_hat, auxiliary variable for acceleration 37 | b = opts.b_init; % initialization of the bias 38 | 39 | 40 | t = 1; % initialization of the stepsize for X 41 | Psi_val = []; 42 | psi_val = []; 43 | X_track = []; 44 | A_track = []; 45 | Err_A = []; 46 | Err_X = []; 47 | 48 | for iter = 1: opts.MaxIter 49 | 50 | %% Given A fixed, take a descent step on X via proximal gradient descent 51 | beta = 0.85; 52 | % beta = (iter - 1) / (iter+2); 53 | X_hat = X + beta*(X - X_old); 54 | 55 | y_hat = compute_y(A, X_hat); % compute y_hat = sum_k conv(a_k, x_k, m) 56 | y_b = y - ones(m,1) * b; 57 | 58 | Psi_X = Psi(y_b, y_hat, X_hat); % evaluate the function value 59 | 60 | fx = 0.5 * norm( y_b - y_hat )^2; 61 | grad_fx = compute_gradient( A, X_hat, y_b, y_hat, 0); 62 | 63 | % backtracking for update X and update stepsize t 64 | X_old = X; 65 | 66 | % line search 67 | switch lower( opts.t_linesearch ) 68 | case 'fixed' 69 | t = opts.t_fixed; 70 | % apply proximal mapping of Psi and compute gradient mapping 71 | X = soft_thres( X_hat - t * grad_fx, lambda * t ); 72 | case 'bt' 73 | [X, t] = backtracking( y_b, A, X_hat, fx, grad_fx, lambda, t, opts); 74 | otherwise 75 | error('line search method not implemented'); 76 | end 77 | 78 | %% Given X fixed, take a Riemannian gradient step on A 79 | % take a Riemannian gradient step on A 80 | % D = A - A_old; 81 | D = Log_map(A_old, A); 82 | Norm_D = zeros(K,1); 83 | for k = 1:K 84 | Norm_D(k) = norm(D(:,k)); 85 | end 86 | 87 | A_hat = Retract(A, beta*D, beta*Norm_D ); 88 | 89 | y_hat = compute_y( A_hat, X); % compute y_hat = sum_k conv(a_k, x_k, m) 90 | y_b = y - ones(m,1) * b; 91 | 92 | 93 | Psi_A = Psi(y_b, y_hat, X); 94 | 95 | fa = 0.5 * norm( y_b - y_hat )^2; 96 | 97 | grad_fa = compute_gradient( A_hat, X, y_b, y_hat, 1); 98 | 99 | A_old = A; 100 | [A,~] = linesearch( y_b, A_hat, X, fa, grad_fa); % line-search for tau 101 | 102 | %% Given A, X fixed, update the bias b 103 | y_hat = compute_y( A, X); % compute y_hat = sum_k conv(a_k, x_k, m) 104 | if(opts.isbias) 105 | b = 1/m * sum( y - y_hat ); 106 | end 107 | y_b = y - ones(m,1) * b; 108 | 109 | %% update results and check for stopping criteria 110 | 111 | Psi_val = [Psi_val; Psi(y_b, y_hat, X)]; 112 | psi_val = [psi_val; 0.5*norm(y_b -y_hat)^2]; 113 | A_track = [ A_track, A]; 114 | X_track = [ X_track, X]; 115 | 116 | % calculate the distance between the groundtruth and the iterate 117 | if(opts.err_truth) 118 | [err_A, err_X] = compute_error(A, X, opts); 119 | Err_A = [Err_A;err_A]; 120 | Err_X = [Err_X;err_X]; 121 | end 122 | 123 | if(opts.isprint) 124 | fprintf('Running the %d-th simulation, Psi_X = %f, Psi_A = %f...\n',... 125 | iter, Psi_X, Psi_A); 126 | end 127 | 128 | % check stopping criteria 129 | if( norm(A_old -A,'fro') <= opts.tol && norm(X_old -X,'fro') <= opts.tol ) 130 | break; 131 | end 132 | 133 | 134 | end 135 | 136 | end 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /solvers_1D/algorithm/incoherent.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingqu06/sparse_deconvolution/821f419cf615e31db8ee777f97f61445b8c65120/solvers_1D/algorithm/incoherent.mat -------------------------------------------------------------------------------- /solvers_1D/algorithm/reweighting.m: -------------------------------------------------------------------------------- 1 | % Implementation of reweighting method in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % y = sum_{k=1}^K a0k conv x0k + b * 1 + n 8 | % with both a0k and x0k unknown, b is a constant bias, n is noise 9 | % 10 | % The algorithms solve the following 1D optimization problem 11 | % min F(A,X) = 0.5 * ||y - sum_{k=1}^K ak conv xk||_2^2 + lambda * ||W*X||_1 12 | % s.t. ||ak|| = 1, k = 1,...,K 13 | % A = [a1,a2,...,aK], X = [x1,x2,...,xK] 14 | % 15 | % Reweighting method starts with an all one weights W, and update the weights 16 | % W_ij = 1/(|X_ij| + eps) for each iteration 17 | % We repeat the process until convergence 18 | % 19 | % Code written by Qing Qu 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | 22 | function [A, X, b, Psi_Val, psi_Val, Err_A, Err_X]= reweighting(y_0, opts) 23 | [n, ~] = size(opts.A_init); 24 | m = length(y_0); 25 | 26 | Psi_Val = []; 27 | psi_Val = []; 28 | Err_A = []; 29 | Err_X = []; 30 | 31 | 32 | for k = 1:opts.MaxIter_reweight 33 | switch lower( opts.reweight_alg ) 34 | case 'adm' 35 | [A, X, b, Psi_val,psi_val, Err_a, Err_x] = ADM( y_0, homo_opts); 36 | case 'iadm' 37 | [A, X, b, Psi_val,psi_val, Err_a, Err_x] = iADM( y_0, opts); 38 | case 'homo' 39 | [A, X, b, Psi_val,psi_val, Err_a, Err_x] = homotopy( y_0, opts); 40 | % opts.count = opts.count + length(f_val); 41 | otherwise 42 | error('wrong algorithm'); 43 | end 44 | 45 | % record result 46 | Psi_Val = [Psi_Val;Psi_val]; 47 | psi_Val = [psi_Val;psi_val]; 48 | Err_A = [Err_A;Err_a]; 49 | Err_X = [Err_X;Err_x]; 50 | 51 | if(opts.isprint) 52 | fprintf('Running the %d-th round of reweighting...\n', k); 53 | end 54 | 55 | if( norm(opts.A_init-A,'fro') <= opts.tol && norm(opts.X_init -X,'fro') <= opts.tol ) 56 | break; 57 | end 58 | 59 | % Update the initialization 60 | opts.A_init = A; 61 | opts.X_init = X; 62 | opts.b_init = b; 63 | opts.count = opts.count + length(psi_val); 64 | 65 | % Update the weight matrix 66 | x = sort(abs(X(:)),'descend'); 67 | thres = x( round(n/(4*log(m/n)))); 68 | e = max(thres,1e-3); 69 | 70 | opts.W = 1 ./ ( abs(X)+e ); 71 | end 72 | 73 | W = opts.W; 74 | 75 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/Log_map.m: -------------------------------------------------------------------------------- 1 | function T = Log_map(Z, D) 2 | 3 | proj_a = @(w,z) z - (w'*z)*w; 4 | 5 | [n,K] = size(Z); 6 | T = zeros(n,K); 7 | 8 | for k = 1:K 9 | alpha = acos(Z(:,k)' * D(:,k)); 10 | proj_tmp = proj_a( Z(:,k), D(:,k) ) ; 11 | T(:,k) = proj_tmp * alpha/sin(alpha) ; 12 | end 13 | 14 | 15 | end 16 | -------------------------------------------------------------------------------- /solvers_1D/auxiliary/Retract.m: -------------------------------------------------------------------------------- 1 | % retract back to oblique manifold 2 | function T = Retract( Z, D, t) 3 | [n,K] = size(Z); 4 | T = zeros(n,K); 5 | 6 | for k = 1:K 7 | T(:,k) = Z(:,k) * cos(t(k)) + ( D(:,k) / t(k)) * sin(t(k)); 8 | end 9 | T = normc(T); 10 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/backtracking.m: -------------------------------------------------------------------------------- 1 | % update X via backtracking linesearch 2 | function [X1, t] = backtracking( y, A, X, fx, grad_fx, lambda, t, opts) 3 | 4 | m = length(y); 5 | 6 | Q = @(Z,tau) fx + norm(lambda .* Z,1) + innerprod(grad_fx, Z-X) + 0.5/tau*norm(Z-X,'fro')^2; 7 | 8 | t = 8*t; 9 | 10 | X1 = soft_thres( X - t * grad_fx, lambda * t ); %proximal mapping 11 | if(opts.isnonnegative) 12 | X1 = max(X1,0); 13 | end 14 | 15 | if(opts.hard_thres) 16 | ind = (X1<=opts.hard_threshold); 17 | X1(ind) = 0; 18 | end 19 | 20 | while ( Psi_val(y, A, X1, lambda) > Q(X1,t) ) 21 | t = 1/2*t; 22 | X1 = soft_thres( X - t * grad_fx, lambda * t ); 23 | if(opts.isnonnegative) 24 | X1 = max(X1,0); 25 | end 26 | if(opts.isupperbound) 27 | X1 = min(X1,opts.upperbound); 28 | end 29 | if(opts.hard_thres) 30 | ind = (X1<=opts.hard_threshold); 31 | X1(ind) = 0; 32 | end 33 | end 34 | 35 | end 36 | 37 | 38 | 39 | function f = innerprod(U,V) 40 | f = sum(sum(U.*V)); 41 | end 42 | 43 | function f = Psi_val( y, A, Z, lambda) 44 | m = length(y); 45 | [~,K] = size(A); 46 | y_hat = zeros(size(y)); 47 | 48 | for k = 1:K 49 | y_hat = y_hat + cconv( A(:,k), Z(:,k), m); 50 | end 51 | 52 | f = 0.5 * norm(y - y_hat)^2 + norm(lambda .* Z,1); 53 | 54 | end 55 | 56 | 57 | -------------------------------------------------------------------------------- /solvers_1D/auxiliary/compute_error.m: -------------------------------------------------------------------------------- 1 | function [err_A, err_X] = compute_error(A, X, opts) 2 | 3 | [n,K] = size(A); 4 | [m,~] = size(X); 5 | 6 | 7 | A_0 = [zeros(n/3,K);opts.A_0;zeros(n/3,K)]; 8 | X_0 = opts.X_0; 9 | err_A = 0; 10 | err_X = 0; 11 | for i = 1:K 12 | a = A(:,i); 13 | x = X(:,i); 14 | cor = zeros(K,1); 15 | ind = zeros(K,1); 16 | for j = 1:K 17 | Corr = cconv(reversal(A_0(:,j)),a,m); 18 | [cor(j),ind(j)] = max(abs(Corr)); 19 | end 20 | [~,Ind] = max(cor); 21 | a_max = circshift(A_0(:,Ind),ind(Ind)-1); 22 | x_max = circshift(X_0(:,Ind),-(ind(Ind)-1)); 23 | err_A = err_A + min( norm( a_max - a ), norm( a_max + a ) ); 24 | err_X = err_X + min( norm( x_max - x ), norm( x_max + x ) ); 25 | 26 | end 27 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/compute_gradient.m: -------------------------------------------------------------------------------- 1 | %% compute (Riemannian) gradient 2 | function Grad = compute_gradient( A, X, y_b, y_hat, gradient_case) 3 | 4 | proj_a = @(w,z) z - (w'*z)*w; 5 | 6 | [m,K] = size(X); 7 | [n,~] = size(A); 8 | Grad = zeros(m,K); 9 | 10 | switch gradient_case 11 | case 0 12 | Grad = zeros(m,K); 13 | case 1 14 | Grad = zeros(n,K); 15 | end 16 | 17 | for k = 1:K 18 | switch gradient_case 19 | case 0 20 | Grad(:,k) = cconv( reversal(A(:,k),m), y_hat - y_b, m) ; 21 | case 1 22 | G = cconv( reversal(X(:,k),m), y_hat - y_b, m) ; 23 | Grad(:,k) = proj_a( A(:,k), G(1:n)); 24 | end 25 | end 26 | 27 | end 28 | -------------------------------------------------------------------------------- /solvers_1D/auxiliary/compute_y.m: -------------------------------------------------------------------------------- 1 | %% compute y = sum_k conv(a_k,x_k) 2 | function y_hat = compute_y(A,X) 3 | 4 | [m, K] = size(X); 5 | y_hat = zeros(m,1); 6 | for k = 1:K 7 | y_hat = y_hat + cconv( A(:,k), X(:,k), m); 8 | end 9 | 10 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/gen_data.m: -------------------------------------------------------------------------------- 1 | % generate the groudtruth data 2 | % y = sum_{k=1}^K a0k conv x0k + b*1 + n 3 | function [a_0, x_0, y_0, y] = gen_data( theta, m, n, b, noise_level, a_type, x_type) 4 | 5 | % s = rng(seed); 6 | %% generate the kernel a_0 7 | gamma = [1.7, -0.712]; % parameter for AR2 model 8 | t = [0:1/(n-1):1]'; 9 | 10 | switch lower(a_type) 11 | case 'randn' % random Gaussian 12 | a_0 = randn(n,1); 13 | case 'ar2' % AR2 kernel 14 | tau = 0.01*ar2exp(gamma); 15 | a_0 = exp(-t/tau(1)) - exp(-t/tau(2)); 16 | case 'ar1' % AR1 kernel 17 | tau = 0.25; 18 | a_0 = exp(-t/tau); 19 | 20 | case 'gaussian' % Gaussian kernel 21 | t = [-2:4/(n-1):2]'; 22 | a_0 = exp( - (t).^2 ); 23 | case 'sinc' 24 | sigma = 0.05; 25 | a_0 = sinc((t-0.5)/sigma); 26 | 27 | otherwise 28 | error('wrong type'); 29 | end 30 | a_0 = a_0 / norm(a_0); % normalize the kernel 31 | 32 | 33 | %% generate the spike train x_0 34 | switch lower(x_type) 35 | case 'bernoulli' 36 | x_0 = double(rand(m,1)<=theta); % Bernoulli spike train 37 | case 'bernoulli-rademacher' 38 | x_0 = (rand(m,1)<=theta) .* (double(rand(m,1)<0.5) -0.5)*2 ; 39 | case 'bernoulli-gaussian' 40 | x_0 = randn(m,1) .* (rand(m,1)<=theta); % Gaussian-Bernoulli spike train 41 | otherwise 42 | error('wrong type'); 43 | end 44 | 45 | %% generate the data y = a_0 conv b_0 + bias + noise 46 | y_0 = cconv(a_0, x_0,m) + b * ones(m,1); 47 | y = y_0 + randn(m,1) * noise_level; 48 | 49 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/linesearch.m: -------------------------------------------------------------------------------- 1 | % update A via Riemannian linsearch 2 | function [A1,tau] = linesearch( y, A, X, fa, grad_a) 3 | 4 | m = length(y); 5 | [~,K] = size(A); 6 | eta = 0.8; 7 | tau = 1; 8 | 9 | 10 | norm_grad = norm(grad_a,'fro'); 11 | 12 | Norm_G = zeros(K,1); 13 | for k = 1:K 14 | Norm_G(k) = norm(grad_a(:,k)); 15 | end 16 | 17 | 18 | A1 = Retract( A, -tau*grad_a, tau*Norm_G); 19 | 20 | 21 | count = 1; 22 | while( Psi_val(y, A1, X) > fa - eta*tau * norm_grad^2 ) 23 | tau = 0.5 * tau; 24 | A1 = Retract( A, -tau*grad_a, tau*Norm_G); 25 | 26 | if(count>=100) 27 | break; 28 | end 29 | count = count + 1; 30 | end 31 | 32 | end 33 | 34 | % calculation the function value Psi_val 35 | function f_val = Psi_val(y, A, X) 36 | 37 | [m,K] = size(X); 38 | y_hat = zeros(m,1); 39 | 40 | for k = 1:K 41 | y_hat = y_hat + cconv(A(:,k),X(:,k),m); 42 | end 43 | 44 | f_val = 0.5 * sum((y - y_hat).^2); 45 | 46 | end 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /solvers_1D/auxiliary/reversal.m: -------------------------------------------------------------------------------- 1 | function [ revX ] = reversal( X, m ) 2 | if nargin > 1 3 | X = [X(1:min(size(X,1), m), :) ; zeros(max(m - size(X,1), 0), size(X,2))]; 4 | end 5 | 6 | revX = [X(1,:) ; flipud(X(2:end,:))]; 7 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/shift_correction.m: -------------------------------------------------------------------------------- 1 | function [a_shift, x_shift] = shift_correction( a, x, opts) 2 | 3 | 4 | a_0 = shift_opts.A_0; 5 | x_0 = shift_opts.x_0; 6 | 7 | n_0 = length(a_0); 8 | n = length(a); 9 | m = length(x); 10 | 11 | if(opts.grouth_truth) 12 | Corr = cconv(reversal(a_0),a,m); 13 | [~,ind] = max(abs(Corr)); 14 | Corr_max = Corr(ind); 15 | 16 | if(Corr_max>0) 17 | a_shift = circshift(a, ind-1); 18 | x_shift = circshift(x, -(ind-1)); 19 | else 20 | a_shift = - circshift(a, ind-1); 21 | x_shift = - circshift(x, -(ind-1)); 22 | end 23 | 24 | end 25 | 26 | 27 | end -------------------------------------------------------------------------------- /solvers_1D/auxiliary/soft_thres.m: -------------------------------------------------------------------------------- 1 | function z = soft_thres(z,lambda) 2 | 3 | z = sign(z) .* max( abs(z)-lambda,0); 4 | 5 | end -------------------------------------------------------------------------------- /solvers_1D/test_algorithms_1D.m: -------------------------------------------------------------------------------- 1 | clc; close all;clear all; 2 | addpath(genpath(pwd)); 3 | % Comparing the algorithmic performance of the proposed nonconvex 4 | % optimization methods in the paper 5 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 6 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 7 | % (* denote equal contribution) 8 | % 9 | % We solve the short-and-sparse convolutional dictionary learning problem 10 | % y = sum_{k=1}^K a0k conv x0k + b * 1 + n 11 | %with both a0k and x0k unknown, b is a constant bias, n is noise 12 | % 13 | % The algorithms solve the following 1D optimization problem 14 | % min F(A,X) = 0.5 * ||y - sum_{k=1}^K ak conv xk||_2^2 + lambda * ||X||_1 15 | % s.t. ||ak|| = 1, k = 1,...,K 16 | % A = [a1,a2,...,aK], X = [x1,x2,...,xK] 17 | % 18 | % Test the proposed Alternating desecent method (ADM), inertial ADM (iADM), 19 | % homotopy acceleration and reweighting method 20 | % Code written by Qing Qu 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | 23 | % platform for simulation of Convolutionoal dictionary learning problem 24 | 25 | %% optimization parameters 26 | opts.tol = 1e-6; % convergence tolerance 27 | opts.isnonnegative = false; % enforcing nonnegativity on X 28 | opts.isupperbound = false; % enforce upper bound on X 29 | opts.upperbound = 1.5; % upper bound number 30 | opts.hard_thres = false; % hard-threshold on small entries of X to zero 31 | opts.MaxIter = 1e3; % number of maximum iterations 32 | opts.MaxIter_reweight = 10; % reweighting iterations for reweighting algorithm 33 | opts.isbias = true; % enforce when there is a constant bias in y 34 | opts.t_linesearch = 'bt'; % linesearch for the stepsize t for X 35 | opts.err_truth = true; % enforce to compute error w.r.t. the groundtruth for (a0, x0) 36 | opts.isprint = true; % print the intermediate result 37 | 38 | 39 | %% generate the measurements 40 | 41 | % setup the parameters 42 | n = 1e2; % length of each kernel a0k 43 | m = 1e4; % length of the measurements y 44 | K = 1; % number of kernels 45 | theta = n^(-3/4); % sparsity parameter for Bernoulli distribution 46 | opts.lambda = 1e-2; % penalty parameter lambda 47 | 48 | 49 | a_type = 'randn'; % choose from 'randn', 'ar1', 'ar2', 'gaussian', 'sinc' 50 | x_type = 'bernoulli-rademacher'; % choose 'bernoulli' or 51 | % 'bernoulli-rademacher' or 'bernoulli-gaussian' 52 | b_0 = 1; % bias 53 | noise_level = 0; % noise level 54 | 55 | % generate the data 56 | [A_0, X_0, y_0, y] = gen_data( theta, m, n, b_0, noise_level, a_type, x_type); 57 | opts.truth = true; 58 | opts.A_0 = A_0; opts.X_0 = X_0; opts.b_0 = b_0; 59 | 60 | 61 | 62 | %% initialization for A, X, b 63 | 64 | % initialize A 65 | opts.A_init = zeros(3*n,K); 66 | for k = 1:K 67 | ind = randperm(m,1); 68 | y_pad = [y_0;y_0]; 69 | a_init = y_pad(ind:ind+n-1); 70 | a_init = [zeros(n,1); a_init; zeros(n,1)]; 71 | a_init = a_init / norm(a_init); 72 | opts.A_init(:,k) = a_init; 73 | end 74 | 75 | opts.X_init = zeros(m,K); % initialize X 76 | opts.b_init = mean(y); 77 | opts.W = ones(m,K); % initialize the weight matrix 78 | 79 | %% run the optimization algorithms 80 | Alg_num = 4; 81 | 82 | % Alg_type = {'ADM','iADM','homotopy-ADM','homotopy-iADM','reweighting'}; 83 | Alg_type = {'ADM','iADM','homotopy-ADM','homotopy-iADM'}; 84 | 85 | Psi_min = Inf; psi_min = Inf; 86 | Psi = cell(length(Alg_type),1); 87 | psi = cell(length(Alg_type),1); 88 | Err_A = cell(length(Alg_type),1); 89 | 90 | for k = 1:length(Alg_type) 91 | 92 | switch lower(Alg_type{k}) 93 | case 'adm' 94 | [A, X, b, Psi{k}, psi{k}, Err_A{k}, Err_X{k}] = ADM( y_0, opts); 95 | case 'iadm' 96 | [A, X, b, Psi{k}, psi{k}, Err_A{k}, Err_X{k}] = iADM( y_0, opts); 97 | case 'homotopy-adm' 98 | opts.homo_alg = 'adm'; 99 | [A, X, b, Psi{k}, psi{k}, Err_A{k}, Err_X{k}] = homotopy( y_0, opts); 100 | case 'homotopy-iadm' 101 | opts.homo_alg = 'iadm'; 102 | [A, X, b, Psi{k}, psi{k}, Err_A{k}, Err_X{k}] = homotopy( y_0, opts); 103 | case 'reweighting' 104 | opts.reweight_alg = 'adm'; 105 | [A, X, b, Psi{k}, psi{k}, Err_A{k}, Err_X{k}] = reweighting( y_0, opts); 106 | end 107 | 108 | if(Psi{k}(end)<=Psi_min) 109 | Psi_min = Psi{k}(end); 110 | end 111 | 112 | if(psi{k}(end)<=psi_min) 113 | psi_min = psi{k}(end); 114 | end 115 | 116 | end 117 | 118 | 119 | 120 | %% plotting results 121 | % figure; 122 | % plot(A_0); 123 | 124 | color = {'r','g','b','k'}; 125 | 126 | figure(1); 127 | hold on; 128 | for k = 1:length(Alg_type) 129 | plot(log( Psi{k} - Psi_min ), color{k}, 'LineWidth', 2); 130 | end 131 | leg1 = legend(Alg_type); 132 | set(leg1,'FontSize',16); set(leg1,'Interpreter','latex'); 133 | xlabel('Iteration','Interpreter','latex','FontSize',16); 134 | ylabel('$\log ( \Psi(${\boldmath$a$},{\boldmath$x$}$) - \Psi_{\min} )$',... 135 | 'Interpreter','latex','FontSize',16); 136 | xlim([0,opts.MaxIter]); 137 | set(gca, 'FontName', 'Times New Roman','FontSize',14); 138 | title('(a) function value convergence','Interpreter','latex','FontSize',20); 139 | grid on; 140 | 141 | figure(2); 142 | hold on; 143 | for k = 1:length(Alg_type) 144 | plot(log(Err_A{k}), color{k}, 'LineWidth', 2); 145 | end 146 | leg2 = legend(Alg_type); 147 | set(leg2,'FontSize',16); set(leg2,'Interpreter','latex'); 148 | xlabel('Iteration','Interpreter','latex'); 149 | ylabel('$\log ( \min \{||${\boldmath$a$}$_\star-${\boldmath$a$}$_0 ||\;,||${\boldmath$a$}$_\star + ${\boldmath$a$}$_0 || \} )$',... 150 | 'Interpreter','latex','FontSize',16); 151 | xlim([0,opts.MaxIter]); 152 | set(gca, 'FontName', 'Times New Roman','FontSize',14); 153 | title('(b) iterate convergence','Interpreter','latex','FontSize',20); 154 | grid on; 155 | 156 | save('incoherent.mat','Psi_min','Alg_type','opts','Err_A','Psi'); 157 | 158 | 159 | -------------------------------------------------------------------------------- /solvers_2D/algorithm/ADM_2D.m: -------------------------------------------------------------------------------- 1 | % Implementation of Alternating Desecent Method (ADM) in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % in 2D with multiple samples Y_i of the same kernels jointly 8 | % Y_i = sum_{k=1}^K A0k conv X0ik + bi * 1 + Ni, (i = 1,...,T) 9 | % with both A0k and X0ik unknown 2D signal, bi is a constant bias, Ni is noise 10 | % 11 | % The algorithms solve the following 1D optimization problem 12 | % min F(A,X) = 0.5 * sum_i ||Yi - sum_{k=1}^K Ak conv Xik||_2^2 + lambda * sum ||Xik||_1 13 | % s.t. ||Ak||_F = 1, k = 1,...,K 14 | % A = {A1,A2,...,AK}, Xi = {Xi1,Xi2,...,XiK} 15 | % via alternating gradient descent: 16 | % 17 | % 1. Fix A, and take a proximal gradient on {Xi} 18 | % 2. Fix X, and take a Riemannian gradient on A 19 | % 20 | % Code written by Qing Qu 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | 23 | function [A, X, b, Psi_val, psi_val] = ADM_2D(Y, opts) 24 | 25 | [m(1), m(2), T] = size(Y); 26 | [n(1), n(2), K] = size(opts.A_init); 27 | 28 | lambda = opts.lambda .* opts.W; 29 | 30 | 31 | % initialization of A, X, b, t 32 | A = opts.A_init; 33 | X = opts.X_init; 34 | b = opts.b_init; 35 | t = 1; % initialize the stepsize 36 | 37 | % record function values 38 | Psi_val = []; psi_val = []; 39 | 40 | for iter = 1:opts.MaxIter 41 | 42 | %% Given A fixed, take a descent step on X via proximal gradient descent 43 | Bias = zeros([m,T]); 44 | for k = 1:T 45 | Bias(:, :, k) = ones(m)*b(k); 46 | end 47 | 48 | Y_b = Y - Bias; % remove the bias from Y 49 | opts_f.isgrad = true; opts_f.case = 'isgrad_X'; 50 | [psi_X, grad_psi_X] = f_quad(Y, A, X, opts_f); 51 | 52 | Psi_X = psi_X + g_val( X, lambda, opts); 53 | 54 | % backtracking for update X and update stepsize t 55 | X_old = X; 56 | [X, t] = backtracking_2D( Y_b, A, X, psi_X, grad_psi_X, lambda, t, opts); 57 | 58 | %% Given X fixed, take a Riemannian gradient step on A 59 | % take a Riemannian gradient step on A 60 | opts_f.isgrad = true; opts_f.case = 'isgrad_A'; 61 | [psi_A, grad_psi_A] = f_quad(Y, A, X, opts_f); 62 | Psi_A = psi_A + g_val( X, lambda, opts); 63 | 64 | % line-search for tau 65 | A_old = A; 66 | [A,tau] = linesearch_2D( Y_b, A, X, psi_A, grad_psi_A, opts); 67 | 68 | %% Given A, X fixed, update the bias b 69 | opts_f.isgrad = false; 70 | [psi, ~, Y_hat] = f_quad(Y, A, X, opts_f); 71 | Psi = psi + g_val( X, lambda, opts); 72 | 73 | if(opts.isbias) 74 | b = mean(reshape(Y - Y_hat, m(1)*m(2),T))'; 75 | end 76 | 77 | %% update results and check for stopping criteria 78 | 79 | Psi_val = [ Psi_val; Psi]; 80 | psi_val = [ psi_val; psi]; 81 | 82 | if(opts.isdisplay) 83 | fprintf('Running the %d-th simulation, Psi_X = %f, Psi_A = %f...\n', iter, Psi_X, Psi_A); 84 | end 85 | % check stopping criteria 86 | diff_A = norm(A(:) - A_old(:)); 87 | diff_X = norm(X(:) - X_old(:)); 88 | 89 | if( diff_A <= opts.tol && diff_X <= opts.tol ) 90 | break; 91 | end 92 | 93 | 94 | end 95 | 96 | end 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /solvers_2D/algorithm/homotopy_2D.m: -------------------------------------------------------------------------------- 1 | % Implementation of homotopy acceleration in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % Yi = sum_{k=1}^K A0k conv X0ik + bi * 1 + Ni 8 | % with both A0k and X0ik unknown, b is a constant bias, n is noise 9 | % 10 | % The algorithms solve the following 2D optimization problem 11 | % min F(A,X) = 0.5 * sum_i ||Yi - sum_{k=1}^K Ak conv Xik||_2^2 + lambda * sum ||Xik||_1 12 | % s.t. ||Ak||_F = 1, k = 1,...,K 13 | % A = {A1,A2,...,AK}, Xi = {Xi1,Xi2,...,XiK} 14 | % homotopy chooses a sparse solution path by shrinking the lambda: 15 | % The algorithm starts with a large lambda, and for each iteration it solves 16 | % the problem with using a solver (e.g., ADM or iADM). 17 | % It shrink the lambda geoemtrically and repeat until convergence. 18 | % 19 | % Code written by Qing Qu 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 21 | 22 | function [A, X, b, Psi_Val,psi_Val]= homotopy_2D(Y, opts) 23 | [n(1), n(2), K] = size(opts.A_init); 24 | [m(1),m(2),~,T] = size(opts.X_init); 25 | Psi_Val = []; 26 | psi_Val = []; 27 | 28 | homo_opts = opts; 29 | 30 | %% setting parameters 31 | 32 | switch lower( opts.homo_alg ) 33 | case 'adm' 34 | eta = 0.8; 35 | delta = 5e-2; 36 | homo_opts.MaxIter = 1e2; 37 | case 'iadm' 38 | eta = 0.8; 39 | delta = 1e-1; 40 | homo_opts.MaxIter = 1e2; 41 | case 'reweight' 42 | eta = 0.8; 43 | delta = 0.1; 44 | homo_opts.MaxIter = 2e2; 45 | otherwise 46 | error('wrong algorithm'); 47 | end 48 | 49 | 50 | lambda_0 = 1; % initial lambda 51 | lambda_tgt = opts.lambda; % target lambda 52 | 53 | homo_opts.lambda = lambda_0; 54 | homo_opts.tol = delta*lambda_0; 55 | 56 | N_stages = floor( log(lambda_0/lambda_tgt) / log(1.0/eta ) ); 57 | lambda = lambda_0; 58 | 59 | %% running the algorithm 60 | for k = 1:N_stages 61 | 62 | switch lower( opts.homo_alg ) 63 | case 'adm' 64 | [A, X, b, Psi, psi] = ADM_2D( Y, homo_opts); 65 | case 'iadm' 66 | [A, X, b, Psi, psi] = iADM_2D( Y, homo_opts); 67 | case 'reweight' 68 | [A, X, b, Psi,psi,W] = reweighting_2D( y_0, homo_opts); 69 | homo_opts.W = W; 70 | otherwise 71 | error('wrong algorithm'); 72 | end 73 | 74 | % record result 75 | Psi_Val = [Psi_Val; Psi]; 76 | psi_Val = [psi_Val; psi]; 77 | 78 | % Update the parameters of opts 79 | homo_opts.A_init = A; 80 | homo_opts.X_init = X; 81 | homo_opts.b_init = b; 82 | % homo_opts.count = opts.count + length(f_val); 83 | 84 | lambda = lambda * eta; 85 | tol = delta*lambda; 86 | homo_opts.lambda = lambda; 87 | homo_opts.tol = tol; 88 | 89 | end 90 | 91 | % solving the final stage to precision tol 92 | 93 | homo_opts.lambda = lambda_tgt; 94 | homo_opts.tol = opts.tol; 95 | homo_opts.MaxIter = opts.MaxIter; 96 | 97 | switch lower( opts.homo_alg ) 98 | case 'adm' 99 | [A, X, b, Psi, psi] = ADM_2D( Y, homo_opts); 100 | case 'iadm' 101 | [A, X, b, Psi, psi] = iADM_2D( Y, homo_opts); 102 | case 'reweight' 103 | [A, X, b, Psi,psi] = reweighting_2D( Y, homo_opts); 104 | otherwise 105 | error('wrong algorithm'); 106 | end 107 | 108 | Psi_Val = [Psi_Val;Psi]; 109 | psi_Val = [psi_Val;psi]; 110 | 111 | end -------------------------------------------------------------------------------- /solvers_2D/algorithm/iADM_2D.m: -------------------------------------------------------------------------------- 1 | % Implementation of Alternating Desecent Method (ADM) in the paper 2 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 3 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 4 | % (* denote equal contribution) 5 | % 6 | % We solve the short-and-sparse convolutional dictionary learning problem 7 | % in 2D with multiple samples Y_i of the same kernels jointly 8 | % Y_i = sum_{k=1}^K A0k conv X0ik + bi * 1 + Ni, (i = 1,...,T) 9 | % with both A0k and X0ik unknown 2D signal, bi is a constant bias, Ni is noise 10 | % 11 | % The algorithms solve the following 1D optimization problem 12 | % min F(A,X) = 0.5 * sum_i ||Yi - sum_{k=1}^K Ak conv Xik||_2^2 + lambda * sum ||Xik||_1 13 | % s.t. ||Ak||_F = 1, k = 1,...,K 14 | % A = {A1,A2,...,AK}, Xi = {Xi1,Xi2,...,XiK} 15 | % via accelerated alternating gradient descent: 16 | % 17 | % 1. Fix A, and take a proximal gradient on X with momentum 18 | % 2. Fix X, and take a Riemannian gradient on A with momentum 19 | % 20 | % Code written by Qing Qu 21 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 22 | 23 | function [A, X, b, Psi_val, psi_val] = iADM_2D(Y, opts) 24 | 25 | [m(1), m(2), T] = size(Y); 26 | [n(1), n(2), K] = size(opts.A_init); 27 | 28 | lambda = opts.lambda .* opts.W; 29 | 30 | 31 | % initialization of A, X, b, t 32 | A = opts.A_init; A_old = A; 33 | X = opts.X_init; X_old = X; 34 | b = opts.b_init; 35 | t = 1; % initialize the stepsize 36 | 37 | % record function values 38 | Psi_val = []; psi_val = []; 39 | 40 | for iter = 1:opts.MaxIter 41 | 42 | %% Given A fixed, take a descent step on X via proximal gradient descent 43 | Bias = zeros([m,T]); 44 | beta = (iter - 1) / (iter+2); 45 | % beta = 0.9; 46 | 47 | X_hat = X + beta*(X - X_old); 48 | 49 | for k = 1:T 50 | Bias(:, :, k) = ones(m)*b(k); 51 | end 52 | 53 | Y_b = Y - Bias; % remove the bias from Y 54 | opts_f.isgrad = true; opts_f.case = 'isgrad_X'; 55 | [psi_X, grad_psi_X] = f_quad(Y, A, X_hat, opts_f); 56 | 57 | Psi_X = psi_X + g_val( X_hat, lambda, opts); 58 | 59 | % backtracking for update X and update stepsize t 60 | X_old = X; 61 | [X, t] = backtracking_2D( Y_b, A, X_hat, psi_X, grad_psi_X, lambda, t, opts); 62 | 63 | %% Given X fixed, take a Riemannian gradient step on A 64 | 65 | % take a Riemannian gradient step on A 66 | 67 | D = A - A_old; 68 | 69 | A_hat = Retract2D(A, D, beta*ones(K,1) ); 70 | 71 | opts_f.isgrad = true; opts_f.case = 'isgrad_A'; 72 | [psi_A, grad_psi_A] = f_quad(Y, A_hat, X, opts_f); 73 | Psi_A = psi_A + g_val( X, lambda, opts); 74 | % line-search for tau 75 | [A, tau] = linesearch_2D( Y_b, A_hat, X, psi_A, grad_psi_A, opts); 76 | 77 | A_old = A; 78 | 79 | %% Given A, X fixed, update the bias b 80 | opts_f.isgrad = false; 81 | [psi, ~, Y_hat] = f_quad(Y, A, X, opts_f); 82 | Psi = psi + g_val( X, lambda, opts); 83 | 84 | if(opts.isbias) 85 | b = mean(reshape(Y - Y_hat, m(1)*m(2),T))'; 86 | end 87 | 88 | %% update results and check for stopping criteria 89 | 90 | Psi_val = [ Psi_val; Psi]; 91 | psi_val = [ psi_val; psi]; 92 | 93 | if(opts.isdisplay) 94 | fprintf('Running the %d-th simulation, Psi_X = %f, Psi_A = %f...\n', iter, Psi_X, Psi_A); 95 | end 96 | % check stopping criteria 97 | diff_A = norm(A(:) - A_old(:)); 98 | diff_X = norm(X(:) - X_old(:)); 99 | 100 | if( diff_A <= opts.tol && diff_X <= opts.tol ) 101 | break; 102 | end 103 | 104 | 105 | end 106 | 107 | end 108 | 109 | 110 | 111 | -------------------------------------------------------------------------------- /solvers_2D/algorithm/reweighting_2D.m: -------------------------------------------------------------------------------- 1 | function [A, X, b, Psi_val, psi_val]= reweighting_2D(Y, opts) 2 | 3 | Psi_Val = []; 4 | psi_Val = []; 5 | [m(1),m(2),T] = size(Y); 6 | [n(1),n(2),K] = size(opts.A_init); 7 | M = m(1)*m(2); N = n(1)*n(2); 8 | 9 | opts.count = 0; % counting number of iterations 10 | 11 | 12 | for k = 1:opts.MaxIter_reweight 13 | switch lower( opts.reweight_alg ) 14 | case 'adm' 15 | [A, X, b, Psi, psi] = ADM_2D( Y, opts); 16 | case 'iadm' 17 | [A, X, b, Psi, psi] = iADM_2D( Y, opts); 18 | otherwise 19 | error('wrong algorithm'); 20 | end 21 | 22 | % record result 23 | Psi_val = [Psi_Val; Psi]; 24 | psi_val = [psi_Val; psi]; 25 | if(opts.isdisplay) 26 | fprintf('Running the %d-th round of reweighting...\n', k); 27 | end 28 | diff_A = opts.A_init - A; diff_X = opts.X_init -X; 29 | if( norm(diff_A(:)) <= opts.tol && norm(diff_X(:)) <= opts.tol ) 30 | break; 31 | end 32 | 33 | % Update the initialization 34 | opts.A_init = A; 35 | opts.X_init = X; 36 | opts.b_init = b; 37 | opts.count = opts.count + length(psi_val); 38 | 39 | % Update the weight matrix 40 | x = sort(abs(X(:)),'descend'); 41 | 42 | thres = x( round(N*K/(4*log(M/N)))); 43 | e = max(thres,1e-3); 44 | 45 | opts.W = 1 ./ ( abs(X)+e ); 46 | end 47 | 48 | W = opts.W; 49 | 50 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/F_val.m: -------------------------------------------------------------------------------- 1 | function [ F_val, f_val, g_val, Y_hat] = F_val( Y, A, Z, lambda, isgrad) 2 | 3 | [m(1),m(2),M] = size(Y); 4 | [~,~,K] = size(A); 5 | Y_hat = zeros([m,M]); 6 | 7 | g_val = inf; F_val = inf; 8 | 9 | % calculate the function value 10 | for t = 1:M 11 | for k = 1:K 12 | Y_hat(:,:,t) = Y_hat(:,:,t) + cconvfft2( A(:,:,k), Z{k}(:,:,t)); 13 | end 14 | end 15 | 16 | f_val = 0.5 * norm( Y(:) - Y_hat(:) )^2; 17 | 18 | 19 | 20 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/Log_map.m: -------------------------------------------------------------------------------- 1 | function T = Log_map(Z, D) 2 | 3 | proj_a = @(w,z) z - innerprod(w,z)*w / norm(w(:))^2 ; 4 | 5 | [n,K] = size(Z); 6 | T = zeros(n,K); 7 | 8 | for k = 1:K 9 | alpha = acos(innerprod(Z(:,:,k),D(:,:,k))); 10 | proj_tmp = proj_a( Z(:,k), D(:,k) ) ; 11 | T( :, :, k) = proj_tmp * alpha/sin(alpha) ; 12 | end 13 | 14 | 15 | end 16 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/Log_map2D.m: -------------------------------------------------------------------------------- 1 | function T = Log_map2D(Z, D) 2 | 3 | proj_a = @(w,z) z - innerprod(w,z)*w / norm(w(:))^2 ; 4 | 5 | [n(1),n(2),K] = size(Z); 6 | T = zeros([n,K]); 7 | 8 | for k = 1:K 9 | alpha = acos(innerprod(Z(:,:,k),D(:,:,k))); 10 | proj_tmp = proj_a( Z(:,:,k), D(:,:,k) ) ; 11 | T( :, :, k) = proj_tmp * alpha/(sin(alpha)+10^(-20)) ; 12 | end 13 | 14 | 15 | end 16 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/Retract2D.m: -------------------------------------------------------------------------------- 1 | % retraction operator 2 | function A1 = Retract2D(A, D, tau) 3 | 4 | [n(1), n(2), K] = size(A); 5 | 6 | A1 = zeros([n,K]); 7 | 8 | for k = 1:K 9 | % A1(:,:,k) = A(:,:,k) * cos(tau(k)) + (D(:,:,k) / tau(k)) * sin(tau(k)); 10 | Delta = A(:,:,k) + tau(k) * D(:,:,k); 11 | A1(:,:,k) = Delta / norm(Delta(:)); 12 | end 13 | 14 | % T(:,k) = Z(:,k) * cos(t(k)) + ( D(:,k) / t(k)) * sin(t(k)); 15 | 16 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/backtracking_2D.m: -------------------------------------------------------------------------------- 1 | % backtacking linesearch for updating stepsize of proximal gradient on X 2 | function [X1, t] = backtracking_2D( Y, A, X, fX, grad_fX, lambda, t, opts) 3 | 4 | [ m(1), m(2), T] = size(Y); 5 | 6 | t = 5*t; 7 | 8 | X1 = proximal_mapping( X, grad_fX, lambda, t, opts); %proximal mapping 9 | 10 | X1 = thresholding( X1, opts); 11 | 12 | 13 | opts_f.isgrad = false; 14 | [ psi_val, ~, ~] = f_quad( Y, A, X1, opts_f); 15 | Psi_val = psi_val + g_val(X1,lambda,opts); 16 | 17 | 18 | while ( Psi_val > func_quad_val(X, X1, fX, grad_fX, lambda, t, opts) && t>1e-12 ) 19 | t = 1/2*t; 20 | X1 = proximal_mapping( X, grad_fX, lambda, t, opts); %proximal mapping 21 | X1 = thresholding( X1, opts); 22 | [ psi_val, ~, ~] = f_quad( Y, A, X1, opts_f); 23 | Psi_val = psi_val + g_val(X1,lambda,opts); 24 | end 25 | 26 | end 27 | 28 | 29 | %% function value of linearization 30 | function f_val = func_quad_val(X, Z, fX, grad_fX, lambda, tau, opts) 31 | 32 | [ m(1), m(2), K, T] = size(Z); 33 | f_val = fX; 34 | 35 | f_val = f_val + innerprod( grad_fX, Z - X ); 36 | f_val = f_val + 0.5/tau * sum( ( Z(:) - X(:) ).^2 ); 37 | 38 | 39 | Z_lambda = lambda .* Z; 40 | switch lower(opts.prox) 41 | case 'l1' 42 | f_val = f_val + norm( Z_lambda(:), 1); 43 | case 'l2' 44 | f_val = f_val + norm( Z_lambda(:), 1); 45 | Z_lambda = reshape(lambda{k} .* Z{k}, m(1)*m(2), M); 46 | f_val = f_val + sum( sqrt(sum( Z_lambda.^2, 2)) ); 47 | end 48 | 49 | 50 | end 51 | 52 | %% function of hard thresholding on X 53 | 54 | function X_val = thresholding( X, opts) 55 | 56 | X_val = X; 57 | 58 | if(opts.isnonnegative_X) 59 | X_val = max(X_val,0); 60 | end 61 | 62 | if(opts.isupperbound) 63 | ind = X_val<=opts.hard_threshold; 64 | X_val(ind) = 0; 65 | end 66 | 67 | end 68 | 69 | %% function of proximal operator on X 70 | 71 | function X_val = proximal_mapping(X, grad_fX, lambda, t, opts) 72 | 73 | [m(1),m(2),~,~] = size(X); 74 | 75 | 76 | switch lower(opts.prox) 77 | case 'l1' 78 | X_val = soft_thres( X - t * grad_fX, lambda*t); 79 | case 'l12' 80 | X_r = reshape(X{k}, m(1)*m(2), M); 81 | grad_r = reshape(grad_fX{k}, m(1)*m(2), M); 82 | lambda_r = reshape(lambda{k}, m(1)*m(2), M); 83 | X_tmp = row_soft_thres( X_r - t * grad_r, lambda_r *t); 84 | X_val{k} = reshape(X_tmp, m(1), m(2), M); 85 | 86 | end 87 | 88 | end 89 | 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/cconvfft2.m: -------------------------------------------------------------------------------- 1 | %CCONVFFT2 FFT implementation of 2D cyclic convolution 2 | % C = cconvfft(A, B) convolves A and B using the larger size 3 | % 4 | % C = cconvfft(A, B, N) convolves A and B using size N 5 | % 6 | % C = cconvfft(A, B, N, adj) convolves A and B using size N, adj is a 7 | % string chosen from: 'left', right', or 'both'. Choosing 'left' convoles 8 | % B with the adjoint kernel of A, and vice versa. Choosing 'both' 9 | % convolves the adjoint kernels of both A and B. 10 | % 11 | % Both N and adj can be left empty. 12 | 13 | function [ C ] = cconvfft2( A, B, varargin ) 14 | 15 | 16 | numvararg = numel(varargin); 17 | 18 | if numvararg > 2 19 | error('Too many input arguments.'); 20 | end 21 | 22 | N = max(size(A), size(B)); 23 | if numvararg >= 1 && ~isempty(varargin{1}) 24 | N = varargin{1}; 25 | end 26 | 27 | A_hat = fft2(A,N(1),N(2)); 28 | B_hat = fft2(B,N(1),N(2)); 29 | if numvararg >= 2 && ~isempty(varargin{2}) 30 | if strcmp(varargin{2}, 'left') 31 | A_hat = conj(A_hat); 32 | elseif strcmp(varargin{2}, 'right') 33 | B_hat = conj(B_hat); 34 | elseif strcmp(varargin{2}, 'both') 35 | A_hat = conj(A_hat); 36 | B_hat = conj(B_hat); 37 | else 38 | end 39 | end 40 | 41 | C = ifft2( A_hat .* B_hat ); 42 | 43 | end 44 | 45 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/compute_gradient.m: -------------------------------------------------------------------------------- 1 | %% compute (Riemannian) gradient 2 | % gradient_case = 0 for gradient of X, 3 | % gradient_case = 1 for gradient of A 4 | function Grad = compute_gradient( A, X, Y_b, Y_hat, gradient_case) 5 | 6 | proj_a = @(W,Z) Z - sum(sum(conj(W).*Z)) *W; 7 | 8 | [m(1),m(2),K] = size(X); 9 | [n(1),n(2),~] = size(A); 10 | 11 | switch gradient_case 12 | case 0 13 | Grad = zeros([m,K]); 14 | case 1 15 | Grad = zeros([n,K]); 16 | end 17 | 18 | for k = 1:K 19 | switch gradient_case 20 | case 0 21 | Grad(:,:,k) = cconvfft2( A(:,:,k) , Y_hat - Y_b, m, 'left'); 22 | case 1 23 | G = cconvfft2( X(:,:,k), Y_hat - Y_b, m, 'left'); 24 | Grad(:,:,k) = proj_a( A(:,:,k), G(1:n(1),1:n(2))); 25 | end 26 | end 27 | 28 | end 29 | 30 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/f_quad.m: -------------------------------------------------------------------------------- 1 | function [ F_val, Grad, Y_hat] = f_quad( Y, A, Z, opts) 2 | 3 | [ m(1), m(2), T] = size(Y); 4 | [ n(1), n(2), K] = size(A); 5 | Y_hat = zeros([m,T]); 6 | 7 | %% evaluate the function value 8 | 9 | for t = 1:T 10 | for k = 1:K 11 | Y_hat(:,:,t) = Y_hat(:,:,t) + cconvfft2( A(:,:,k), Z(:,:,k,t)); 12 | end 13 | end 14 | F_val = 0.5 * norm( Y(:) - Y_hat(:) )^2; 15 | 16 | 17 | %% evaluate the gradient 18 | % gradient_case = 0 for gradient of X, 19 | % gradient_case = 1 for gradient of A 20 | Proj = @(U,V) V - sum(sum(conj(U) .* V)) * U / norm(U(:))^2 ; 21 | 22 | Grad = []; 23 | if(opts.isgrad) 24 | switch lower(opts.case) 25 | case 'isgrad_x' 26 | Grad = zeros([m,K,T]); 27 | case 'isgrad_a' 28 | Grad = zeros([n,K]); 29 | end 30 | for k = 1:K 31 | for t = 1:T 32 | switch lower(opts.case) 33 | case 'isgrad_x' 34 | Grad(:,:,k,t) = cconvfft2( A(:,:,k) , Y_hat(:,:,t)... 35 | - Y(:,:,t), m, 'left'); 36 | case 'isgrad_a' 37 | G = cconvfft2( Z(:,:,k,t), Y_hat(:,:,t) - Y(:,:,t), m, 'left'); 38 | Grad(:,:,k) = Grad(:,:,k) + G(1:n(1), 1:n(2)); 39 | end 40 | end 41 | 42 | if(lower(opts.case) == 'isgrad_a') 43 | Grad(:,:,k) = Proj( A(:,:,k), Grad(:,:,k)); 44 | end 45 | end 46 | 47 | 48 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/g_val.m: -------------------------------------------------------------------------------- 1 | function G_Val = g_val(Z, lambda, opts) 2 | 3 | [m(1),m(2),K,T] = size(Z); 4 | G_Val = 0; 5 | 6 | Z_lambda = reshape(lambda .* Z, m(1)*m(2), K, T); 7 | switch lower(opts.prox) 8 | case 'l1' 9 | G_Val = norm(Z_lambda(:),1); 10 | case 'l12' 11 | for k = 1:K 12 | tmp = Z_lambda(:,:,k); 13 | G_Val = G_Val + sum( sqrt(sum( tmp.^2, 2)) ); 14 | end 15 | end 16 | 17 | end 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/innerprod.m: -------------------------------------------------------------------------------- 1 | function f = innerprod(U,V) 2 | T = U(:).*V(:); 3 | f = sum(T); 4 | 5 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/linesearch_2D.m: -------------------------------------------------------------------------------- 1 | % linesearch for updating the stepsize of Riemannian gradient on A 2 | 3 | function [A1,tau] = linesearch_2D( Y, A, X, fA, grad_fA, opts) 4 | 5 | % set parameters 6 | [m(1), m(2), K, T] = size(X); 7 | 8 | eta = 0.8; 9 | tau = 1; 10 | 11 | % calcuate the norm 12 | norm_grad = norm(grad_fA(:)); 13 | 14 | A1 = Retract2D( A, -grad_fA, tau*ones(K,1)); 15 | if(opts.isnonnegative_A) 16 | A1 = max(A1,0); 17 | end 18 | 19 | opts_f.isgrad = false; 20 | [psi_val, ~, ~] = f_quad(Y, A1, X, opts_f); 21 | 22 | % Riemannian linesearch for the stepsize tau 23 | while( psi_val > fA - eta*tau * norm_grad^2 && tau>= 1e-12 ) 24 | tau = 0.5 * tau; 25 | A1 = Retract2D( A, -grad_fA, tau*ones(K,1)); 26 | if(opts.isnonnegative_A) 27 | A1 = max(A1,0); 28 | A1 = A1 / norm(A1(:)); 29 | end 30 | 31 | [psi_val, ~, ~] = f_quad(Y, A1, X, opts_f); 32 | 33 | end 34 | 35 | end 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/row_soft_thres.m: -------------------------------------------------------------------------------- 1 | % soft-thresholding for l12 norm 2 | function X_val = row_soft_thres(X, lambda) 3 | [~,M] = size(X); 4 | 5 | norm_row = sqrt(sum(X.^2,2)); 6 | norm_row_copy = norm_row(:,ones(M,1)); 7 | 8 | X_val = max( (norm_row_copy - lambda), 0) .* (X./ norm_row_copy); 9 | 10 | end 11 | -------------------------------------------------------------------------------- /solvers_2D/auxiliary/shift_correction_2D.m: -------------------------------------------------------------------------------- 1 | function [A_shift,X_shift] = shift_correction_2D(A, X) 2 | 3 | [~,~,K,T] = size(X); 4 | [n(1),n(2),~] = size(A); 5 | n(1) = n(1)/3; n(2) = n(2)/3; 6 | 7 | A_shift = zeros(n(1),n(2),K); 8 | X_shift = zeros(size(X)); 9 | 10 | for k = 1:K 11 | Corr = zeros(2*n); 12 | for i = 1:3*n(1)-n(1) 13 | for j =1:3*n(2)-n(2) 14 | window = A(i:i+n(1)-1,j:j+n(2)-1,k); 15 | Corr(i,j) = norm(window(:)); 16 | end 17 | end 18 | 19 | max_val = max(Corr(:)); 20 | [ind_1, ind_2] = find(Corr == max_val); 21 | 22 | A_shift(:,:,k) = A(ind_1:ind_1+n(1)-1, ind_2:ind_2+n(2)-1,k); 23 | for t = 1:T 24 | X_shift(:,:,k,t) = circshift(X(:,:,k,t), [ ind_1+1, ind_2+1]); 25 | end 26 | end 27 | 28 | 29 | end -------------------------------------------------------------------------------- /solvers_2D/auxiliary/soft_thres.m: -------------------------------------------------------------------------------- 1 | function z = soft_thres(z,lambda) 2 | 3 | z = sign(z) .* max( abs(z)-lambda,0); 4 | 5 | end -------------------------------------------------------------------------------- /solvers_2D/data/calcium_img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/qingqu06/sparse_deconvolution/821f419cf615e31db8ee777f97f61445b8c65120/solvers_2D/data/calcium_img.png -------------------------------------------------------------------------------- /solvers_2D/test_2D.m: -------------------------------------------------------------------------------- 1 | clc;close all;clear all; 2 | addpath(genpath(pwd)); 3 | 4 | % demonstration of the proposed nonconvex optimization methods for 2D data 5 | % in the paper 6 | % ``Short-and-Sparse Deconvolution -- A Geometric Approach'' 7 | % Yenson Lau*, Qing Qu*, Han-Wen Kuo, Pengcheng Zhou, Yuqian Zhang, and John Wright 8 | % (* denote equal contribution) 9 | % 10 | % We solve the short-and-sparse convolutional dictionary learning (CDL) problem 11 | % Y_i = sum_{k=1}^K A0k conv X0ik + bi * 1 + Ni, (i = 1,...,T) 12 | % with both A0k and X0ik unknown, bi is a constant bias, Ni is noise 13 | % 14 | % The algorithms solve the following 2D optimization problem 15 | % min F(A,X) = 0.5 * sum_i ||Yi - sum_{k=1}^K Ak conv Xik||_2^2 + lambda * sum||Xik||_1 16 | % s.t. ||Ak||_F = 1, k = 1,...,K 17 | % A = {A1,A2,...,AK}, Xi = {Xi1,Xi2,...,XiK} 18 | % 19 | % Demonstration of the proposed Alternating desecent method (ADM), inertial ADM (iADM), 20 | % homotopy acceleration and reweighting method 21 | % 22 | % The test is performed on a 2D two photon Calcium image obtained from 23 | % Allen Institute website: http://observatory.brain-map.org/visualcoding/search/overview 24 | % 25 | % Code written by Qing Qu 26 | % 27 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 28 | 29 | % platform for simulation of Convolutionoal dictionary learning problem 30 | 31 | % read data 32 | Y = double(imread('calcium_img.png')); 33 | Y = Y ./ max(Y(:)); 34 | 35 | 36 | m = [512,512]; % size of the image 37 | n = [20,20]; % size of the kernel 38 | K = 2; % number of kernels/atoms 39 | T = 1; % number of images/samples 40 | 41 | 42 | %% Set up parameters for algorithms solving CDL problem 43 | 44 | % setting parameters 45 | opts.tol = 1e-4; % tolerance parameter for convergence 46 | opts.lambda = 1e-1; % sparsity regularization parameter 47 | opts.isnonnegative_X = true; % recover a nonnegative activation map 48 | opts.isnonnegative_A = false; % recover nonnegative kernels 49 | opts.isbias = true; % recover a constant bias 50 | opts.hard_thres = false; % parameter to set hard thresholding 51 | opts.MaxIter = 1e3; % iterations and updates 52 | opts.isupperbound = false; % decision if set upper bound on X 53 | opts.MaxIter_reweight = 10; % number of max iteration for reweighting 54 | opts.isdisplay = true; % whether display intermediate result 55 | opts.prox = 'l1'; % choose penalization function 56 | 57 | alg_type = 'iadm'; % choose the algorithm type 'adm','iadm',... 58 | %'homotopy-adm','homotopy-iadm','reweighting-adm','reweighting-iadm' 59 | 60 | %% initialization 61 | 62 | % initialization for A 63 | opts.A_init = zeros([3*n,K]); 64 | for k = 1:K 65 | ind_1 = randi( m(1)-n(1)); 66 | ind_2 = randi( m(2)-n(2)); 67 | tmp = Y(ind_1:ind_1+n(1)-1, ind_2:ind_2+n(2)-1); 68 | tmp = tmp / norm(tmp(:)); 69 | opts.A_init(n(1)+1:2*n(1), n(2)+1:2*n(2),k) = tmp; 70 | end 71 | 72 | % initialization for X, b, W 73 | opts.X_init = zeros([m,K,T]); 74 | opts.b_init = mean(reshape(Y,m(1)*m(2),T),1)'; 75 | opts.W = ones([m,K,T]); 76 | 77 | %% solve the 2D CDL problem using one of the algorithms below 78 | 79 | switch lower(alg_type) 80 | case 'adm' 81 | [A, X] = ADM_2D( Y, opts); 82 | case 'iadm' 83 | [A, X] = iADM_2D( Y, opts); 84 | case 'homotopy-adm' 85 | opts.homo_alg ='ADM'; 86 | [A, X] = homotopy_2D( Y, opts); 87 | case 'homotopy-iadm' 88 | opts.homo_alg ='iADM'; 89 | [A, X] = homotopy_2D( Y, opts); 90 | case 'reweighting-adm' 91 | opts.reweight_alg = 'ADM'; 92 | [A, X] = reweighting_2D( Y, opts); 93 | case 'reweighting-iadm' 94 | opts.reweight_alg = 'iADM'; 95 | [A, X] = reweighting_2D( Y, opts); 96 | end 97 | 98 | %% plot the results 99 | % shift correction 100 | [A_shift, X_shift] = shift_correction_2D(A, X); 101 | 102 | figure(1); 103 | imagesc(Y); 104 | 105 | figure(2); 106 | for k = 1:K 107 | subplot(1,K,k); imagesc(A(:,:,k)); 108 | colormap('jet'); 109 | axis off; 110 | end 111 | 112 | for t = 1:T 113 | figure(2+t); 114 | for k = 1:K 115 | subplot(1,K,k); imagesc(X(:,:,k,t)); 116 | colormap('jet'); 117 | axis off; 118 | end 119 | end 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | --------------------------------------------------------------------------------