├── BBstep.m ├── BFGS.m ├── Conjugate_gradient.m ├── GD_vs_FISTA.m ├── GMRes.m ├── PCA_and_least_square_regression.m ├── Proximal_vs_FISTA_fixed_step.m ├── README.md ├── Schrodinger.m ├── damped_newton.m ├── data.txt ├── gradient_desc.m ├── kmeans.m ├── line search梯度下降.py ├── max_variance.m ├── normalized_cut.m ├── projected_vs_nesterov_second_fixed_step.m ├── projected_vs_nesterov_second_line_search.m ├── proximal_vs_FISTA_line_search.m ├── subgradient_method.m ├── 求两个基向量对空间某点的平行四边形分解.py └── 证明到点的距离小于到集合的距离的点组成凸集.py /BBstep.m: -------------------------------------------------------------------------------- 1 | x=[1;0.1]; 2 | alpha = 0.3; 3 | beta = 0.5; 4 | iter = 1:1000; 5 | gra=zeros(1000); 6 | t = 1; 7 | for k = 1:1000 8 | d = -alpha*grad(x); 9 | x_plus = x + t*d; 10 | s = x_plus-x; 11 | y = grad(x_plus)-grad(x); 12 | %two updates of t 13 | %t = s'*y/(y'*y); 14 | t = s'*s/(s'*y); 15 | while f(x) - f(x+t*d) < -alpha*t*grad(x)'*d 16 | t = t*beta; 17 | end 18 | 19 | x = x+t*d; 20 | gra(k) = norm(grad(x)); 21 | 22 | if norm(grad(x))<=1e-7 23 | break 24 | end 25 | end 26 | 27 | disp('ans=') 28 | disp(f(x)) 29 | semilogy(iter,gra) 30 | title('BB step') 31 | xlabel('step') 32 | ylabel('the norm of gradient of f(x)') -------------------------------------------------------------------------------- /BFGS.m: -------------------------------------------------------------------------------- 1 | t=1; 2 | x=[1;1]; 3 | alpha = 0.001; 4 | beta = 0.5; 5 | iter = 1:15; 6 | gra=zeros(15); 7 | H = inv(hessian(x)); 8 | for k = 1:15 9 | d = -H*grad(x); 10 | while f(x) - f(x+t*d) < -alpha*t*grad(x)'*d 11 | t = t*beta; 12 | end 13 | 14 | x_plus = x + t*d; 15 | s = x_plus-x; 16 | q = grad(x_plus)-grad(x); 17 | w = s/(s'*q) - H*q/(q'*H*q); 18 | H = H + s*s'/(s'*q) - (H*(q*q')*H)/(q'*H*q) + q'*H*q*(w*w'); 19 | x = x_plus; 20 | gra(k) = norm(grad(x)); 21 | 22 | if norm(grad(x))<=1e-7 23 | break 24 | end 25 | end 26 | 27 | disp('iteration:'); 28 | disp(k); 29 | disp('ans=') 30 | disp(f(x)) 31 | semilogy(iter,gra) 32 | title('BFGS') 33 | xlabel('step') 34 | ylabel('the norm of gradient of f(x)') -------------------------------------------------------------------------------- /Conjugate_gradient.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%generate data%%%%%%%%%%%%%%%%%%% 2 | n = 100; 3 | L = sparse(eye(n) - diag(ones(n-1, 1), 1)); 4 | A = kron(L + L', eye(n)) + kron(eye(n), L + L'); %10000*10000 5 | b = ones(n*n, 1); 6 | 7 | %%%%%%%%%%%%%%%%%%iteration%%%%%%%%%%%%%%%%%%% 8 | x = zeros(n*n,1); 9 | r = zeros(n*n,10000); 10 | res = zeros(10001); 11 | 12 | r0 = b-A*x; 13 | res(1) = norm(r0); 14 | p = r0; 15 | 16 | alpha = r0'*r0/(p'*A*p); 17 | x = x + p*alpha; 18 | r(:,1) = r0 - A*p*alpha; 19 | res(2) = norm(r(:,1)); 20 | beta = r(:,1)'*r(:,1)/(r0'*r0); 21 | p = r(:,1) + p*beta; 22 | 23 | for k = 1:9999 24 | Ap = A*p; %save computation 25 | alpha = r(:,k)'*r(:,k)/(p'*Ap); 26 | x = x + p*alpha; 27 | r(:,k+1) = r(:,k) - Ap*alpha; 28 | res(k+2) = norm(r(:,k+1)); 29 | beta = r(:,k+1)'*r(:,k+1)/(r(:,k)'*r(:,k)); 30 | p = r(:,k+1) + p*beta; 31 | disp(k) 32 | end 33 | semilogy(res(1:10001)) 34 | title('Conjugate gradient method') 35 | xlabel('Iteration') 36 | ylabel('Residual') -------------------------------------------------------------------------------- /GD_vs_FISTA.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Schuture/Convex-optimization/b819eafe61323935eecb15cd8e1421483de64e88/GD_vs_FISTA.m -------------------------------------------------------------------------------- /GMRes.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Schuture/Convex-optimization/b819eafe61323935eecb15cd8e1421483de64e88/GMRes.m -------------------------------------------------------------------------------- /PCA_and_least_square_regression.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%% read data %%%%%%%%%%%%%%%% 2 | filename = 'data.txt'; 3 | [x,y]=textread(filename,'%n%n',100); 4 | plot(x,y,'o') 5 | hold on 6 | 7 | %%%%%%%%%%%%%% PCA %%%%%%%%%%%%%%%% 8 | data = [x';y']; % data should be columnwise 9 | b = [mean(x);mean(y)]; 10 | data = data - b; % we do SVD with the 2*100 matrix 11 | [U,S,V] = svd(data,'econ'); 12 | direction = U(:,1); 13 | k = direction(2)/direction(1); % slope 14 | c = b(2) - k*b(1); %intercept 15 | f = @(x)(k*x+c); 16 | line([0,4],[f(0),f(4)],'linestyle','--','color','k') 17 | hold on 18 | 19 | %%%%%%%%%%%%%% least square %%%%%%%%%%%%%%%% 20 | k = x'*y/(x'*x); % slope 21 | c = b(2) - k*b(1); %intercept 22 | f = @(x)(k*x+c); 23 | line([0,4],[f(0),f(4)],'linestyle','--','color','r') 24 | hold on 25 | legend('data','PCA','Least square') 26 | title('Linear regression') 27 | xlabel('x') 28 | ylabel('y') -------------------------------------------------------------------------------- /Proximal_vs_FISTA_fixed_step.m: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%generate data%%%%%%%%%%%%%%%%%% 2 | global m n s A b tau 3 | m = 1000; 4 | n = 500; 5 | s = 50; 6 | A = randn(m,n); 7 | xs = zeros(n,1); 8 | picks = randperm(n); 9 | xs(picks(1:s)) = randn(s,1); 10 | b = A*xs; 11 | tau = 1e-6; 12 | 13 | %%%%%%%%%%%%%%%%%%proximal fixed step size%%%%%%%%%%%%%%%%%% 14 | L = norm(A'*A); 15 | t = 1/L; 16 | diff = zeros(200,1); 17 | x = zeros(n,1); 18 | x_last = x; 19 | x_next = zeros(n,1); 20 | for k = 1:200 21 | x_next = prox(x,t); 22 | diff(k) = norm(x-xs); 23 | disp(k) 24 | x_last = x; 25 | x = x_next; 26 | end 27 | semilogy(diff(1:200)) 28 | hold on 29 | 30 | %%%%%%%%%%%%%%%%%%FISTA fixed step size%%%%%%%%%%%%%%%%%% 31 | diff = zeros(200,1); 32 | x = zeros(n,1); 33 | x_last = x; 34 | x_next = zeros(n,1); 35 | for k = 1:200 36 | y = x+(k-2)/(k+1)*(x-x_last); 37 | x_next = prox(y,t); 38 | diff(k) = norm(x-xs); 39 | disp(k) 40 | x_last = x; 41 | x = x_next; 42 | end 43 | semilogy(diff(1:200)) 44 | title('Proximal gradient vs FISTA with step size 1/L') 45 | legend('Proximal gradient','FISTA') 46 | xlabel('Iteration') 47 | ylabel('Difference between x and xs') 48 | 49 | %%%%%%%%%%%%%%define functions%%%%%%%%%%%%%% 50 | function [ret]=f(x) 51 | global A b tau 52 | ret = 0.5*norm(A*x-b)^2+tau*norm(x,1); 53 | end 54 | 55 | function [ret]=grad(x) 56 | global A b 57 | ret = A'*(A*x-b); 58 | end 59 | 60 | function [ret] = prox(y,t) 61 | global n tau 62 | z = y-t*grad(y); 63 | ret = zeros(n,1); 64 | for i=1:n 65 | if z(i)>t*tau 66 | ret(i) = z(i)-t*tau; 67 | end 68 | if z(i)<-t*tau 69 | ret(i) = 0; 70 | end 71 | if -t*tau