├── README.md ├── data └── mnist_plus.mat ├── demo_mnist_svmplus.m ├── solve_l2svmplus_kernel.m ├── svm_plus_train.m ├── svmtrain.mexw64 └── utils ├── L1_normalization.m ├── L2_distance_2.m ├── getKernel.m └── return_GaussianKernel.m /README.md: -------------------------------------------------------------------------------- 1 | # svmplus_matlab 2 | An implementation of SVM+ with MATLAB QP solver. It has also been tested with MOSEK QP solver. 3 | 4 | A faster implementation of kernel SVM+ based on a new SVM+ formulation is also released. You need libsvm to run it. 5 | 6 | If you feel it is useful, please cite the following papers: 7 | 8 | Wen Li, Dengxin Dai, Mingkui Tan, Dong Xu, and Luc Van Gool, “Fast Algorithms for Linear and Kernel SVM+,” IEEE International Conference on Computer Vision and Pattern Recognition(CVPR),2016 9 | 10 | For any question, please contact Wen Li via liwenbnu@gmail.com. 11 | 12 | ------------------------ 13 | Dependencies 14 | 15 | The libsvm library is needed. I have included a compiled mex file (Windows64 version). For other platform, please 16 | * download the latest libsvm package, and run "\/matlab/make.m" to comiple the mex file compatiable to your OS. 17 | * Put the obtained mex file in the folder of libsvm+, or add the folder containing mex file to your matlab paths at the beginning of demo_mnist_svmplus.m 18 | addpath('\/matlab/') 19 | 20 | ------------------------ 21 | How to use 22 | 23 | Simple. Run "demo_mnist_svmplus.m", and see the results^_^. 24 | 25 | ------------------------ 26 | Copyright 27 | 28 | Non-commercial use only. All rights reserved. 29 | -------------------------------------------------------------------------------- /data/mnist_plus.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenli-vision/svmplus_matlab/d3e3728cac84dc463622cc85c27842f6d3959198/data/mnist_plus.mat -------------------------------------------------------------------------------- /demo_mnist_svmplus.m: -------------------------------------------------------------------------------- 1 | clear; clc; 2 | addpath('./utils'); 3 | % load data 4 | load('./data/mnist_plus.mat'); 5 | 6 | % preprocessing data with L1-normalization 7 | train_features = L1_normalization(train_features'); 8 | test_features = L1_normalization(test_features'); 9 | train_PFfeatures = L1_normalization(train_PFfeatures'); 10 | 11 | train_labels(train_labels==5) = 1; 12 | train_labels(train_labels~=1) = -1; 13 | test_labels(test_labels==5) = 1; 14 | test_labels(test_labels~=1) = -1; 15 | 16 | % calculate kernels 17 | kparam = struct(); 18 | kparam.kernel_type = 'gaussian'; 19 | [K, train_kparam] = getKernel(train_features, kparam); 20 | testK = getKernel(test_features, train_features, train_kparam); 21 | 22 | kparam = struct(); 23 | kparam.kernel_type = 'gaussian'; 24 | tK = getKernel(train_PFfeatures, kparam); 25 | 26 | % ================ train SVM+ ==================== 27 | % parameters could be obtained via validation 28 | svmplus_param.svm_C = 1; 29 | svmplus_param.gamma = 1; 30 | tic; 31 | model = svm_plus_train(train_labels, K, tK, svmplus_param); 32 | tt = toc; 33 | decs = testK(:, model.SVs) * model.sv_coef - model.rho; 34 | acc = sum((2*(decs>0)-1) == test_labels)/length(test_labels); 35 | 36 | fprintf(2, 'Orignal SVM+, time = %f, Acc = %.4f.\n', tt, acc); 37 | 38 | % ================ train l2-SVM+ ==================== 39 | % parameters could be obtained via validation 40 | tic; 41 | model = solve_l2svmplus_kernel(train_labels, K, tK, svmplus_param.svm_C, svmplus_param.gamma); 42 | tt = toc; 43 | alpha = zeros(length(train_labels), 1); 44 | alpha(model.SVs) = full(model.sv_coef); 45 | alpha = abs(alpha); 46 | decs = (testK + 1)*(alpha.*train_labels); 47 | acc = sum((2*(decs>0)-1) == test_labels)/length(test_labels); 48 | 49 | fprintf(2, 'L2-SVM+, time=%f, Acc = %.4f.\n', tt, acc); 50 | 51 | -------------------------------------------------------------------------------- /solve_l2svmplus_kernel.m: -------------------------------------------------------------------------------- 1 | function model = solve_l2svmplus_kernel(labels, K, tK, C, gamma) 2 | n = length(labels); 3 | uy = unique(labels); 4 | 5 | assert(size(K, 1) == n); 6 | assert(size(K, 2) == n); 7 | assert(size(tK, 1) == n); 8 | assert(size(tK, 2) == n); 9 | 10 | K = K + 1; % append bias 11 | tK = tK + 1; % append bias 12 | 13 | H = eye(n) - inv(eye(n) + C/gamma*tK); 14 | H = 1/C * H; 15 | opt = ['-s 2 -t 4 -n ', num2str(1/n)]; 16 | if length(uy) == 2 17 | if uy(1)== -1 && uy(2) == 1 18 | uy(1) = 1; 19 | uy(2) = -1; 20 | end 21 | y = -ones(n, 1); 22 | y(labels==uy(1)) = 1; 23 | Q = K.*(y*y') + H; 24 | model = svmtrain(ones(n, 1), [(1:n)' Q], opt); 25 | else 26 | model = cell(0); 27 | for i = 1:length(uy) 28 | y = -ones(n, 1); 29 | y(labels==uy(i)) = 1; 30 | Q = K.*(y*y') + H; 31 | model{i} = svmtrain(ones(n, 1), [(1:n)' Q], opt); 32 | end 33 | end 34 | 35 | end -------------------------------------------------------------------------------- /svm_plus_train.m: -------------------------------------------------------------------------------- 1 | function models = svm_plus_train(labels, K, tK, param) 2 | % models = svm_plus_train(labels, K, tK, param) 3 | % Training SVM+. 4 | % Inputs: 5 | % - labels: n-by-1 vecotr, source labels (support multi-class) 6 | % - K: n-by-n kernel matrix, feature space 7 | % - tK: n-by-n kernel matrix, privileged information space 8 | % - param: 9 | % - svm_C: C in SVM 10 | % - gamma: gamma in SVM+ 11 | % Outputs: 12 | % - models: m-by-1 cell, each is an SVM model, ordered in ascending order 13 | % of labels. 14 | % 15 | % LI Wen, on July 31, 2013 16 | % ----------------------------------------------- 17 | % update the calculation of rho, by Li Wen on Aug 2, 2013 18 | % 19 | 20 | % ----------------------------------------------- 21 | % parameters 22 | svm_C = param.svm_C; 23 | gamma = param.gamma; 24 | 25 | cates = unique(labels); 26 | n_class = length(cates); 27 | n = length(labels); 28 | 29 | DISP = 1; % if display the messages or not 30 | if(DISP) 31 | fprintf('Training SVM+...\n'); 32 | fprintf('Number of Categoris: %d\n', n_class); 33 | end 34 | 35 | if(n_class == 2) 36 | n_class = 1; 37 | cates = [1 -1]; % put postive before, so Label(1) == 1 always hold 38 | end 39 | 40 | models = cell(n_class, 1); 41 | fprintf('In total %d classes:\n', n_class); 42 | for ci = 1:n_class 43 | y = (labels == cates(ci))*2 - 1; 44 | fprintf('Class#%d training...\t', ci); 45 | % ----------------------------------------------- 46 | % solving in SVM+ form 47 | tt = tic; 48 | H = 1/gamma*[tK tK; tK tK] + [K.*(y*y') zeros(n, n); zeros(n, 2*n)]; 49 | tmp = -svm_C/gamma*sum(tK, 2); 50 | f = [-1 + tmp; tmp]; 51 | A1 = []; 52 | b1 = []; 53 | A2 = [y' zeros(1, n); ones(1, 2*n)]; %a'y = 0; 1'(a + z) = 1'c; 54 | b2 = [0; svm_C*n]; 55 | lb = zeros(2*n, 1); 56 | ub = []; 57 | opt.Display = 'off'; 58 | x = quadprog(H, f, A1, b1, A2, b2, lb, ub, [], opt); 59 | 60 | fprintf('Train Time = %f\n', toc(tt)); 61 | 62 | alpha = x(1:n); 63 | zeta = x(n+1:end); 64 | % ------------------ rho -------------------- 65 | % tmp_idx =(alpha>1e-10)&(alpha 1e-10); 74 | if(all(~tilde_idx)) % no zeta > 0 75 | tilde_b = max(-tilde_dec); 76 | else 77 | tilde_b = mean(-tilde_dec(tilde_idx)); 78 | end 79 | 80 | dec = K*(alpha.*y); 81 | dec_2 = -dec + y.*(1 - tilde_dec - tilde_b); 82 | tmp_idx =(alpha>1e-10); 83 | if(all(~tmp_idx)) % no alpha > 0 84 | lb = max(dec_2(y>0)); 85 | ub = min(dec_2(y<0)); 86 | b = (lb + ub)/2; 87 | else 88 | b = mean(dec_2(tmp_idx)); 89 | end 90 | if(isnan(b)) 91 | error('b is NaN.\n'); 92 | end 93 | rho = - b; 94 | 95 | index = find(alpha > 1e-10); 96 | sv_coef = (alpha.*y); 97 | 98 | % save models, similar as libsvm models 99 | models{ci}.sv_coef = sv_coef(index); 100 | models{ci}.SVs = index; 101 | models{ci}.Label(1) = 1; 102 | models{ci}.rho = rho; 103 | 104 | % ------------------------------------------- 105 | % others, for debugging 106 | models{ci}.x = x; 107 | models{ci}.y = labels; 108 | models{ci}.param = param; 109 | models{ci}.alpha = alpha; 110 | models{ci}.beta = zeta; 111 | models{ci}.b = b; 112 | models{ci}.tilde_b = tilde_b; 113 | 114 | end 115 | if(n_class == 1) 116 | models = models{1}; 117 | end 118 | -------------------------------------------------------------------------------- /svmtrain.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wenli-vision/svmplus_matlab/d3e3728cac84dc463622cc85c27842f6d3959198/svmtrain.mexw64 -------------------------------------------------------------------------------- /utils/L1_normalization.m: -------------------------------------------------------------------------------- 1 | function X = L1_normalization(X) 2 | % normalize each sample to be a unit vector 3 | % 4 | % X d-by-n matrix 5 | d = size(X, 1); 6 | Xnorm = sum(abs(X)); 7 | Xnorm(Xnorm==0) = 1; 8 | Xnorm = 1./Xnorm; 9 | X = X.*repmat(Xnorm, [d, 1]); -------------------------------------------------------------------------------- /utils/L2_distance_2.m: -------------------------------------------------------------------------------- 1 | function n2 = L2_distance_2(x,c,df) 2 | % D2 = L2_distance_2(a, b, df) 3 | % 4 | % Get the square of L2_distance of two sets of samples. It is useful when 5 | % you only need square of L2_distance, for example, in Guassian Kernel. 6 | % This function is faster and lower memory requirement compared with the 7 | % funtion L2_distance by Roland Bunschoten et al. 8 | % 9 | % Input: 10 | % a -- d-by-m matrix, i.e. m samples of d-dimension. 11 | % b -- d-by-n matrix; 12 | % df -- df = 1, force diagnal to zero, otherwise not. 13 | % 14 | % Output: 15 | % D2 -- m-by-n matrix, the square of L2_distance. 16 | % 17 | % Code from CHEN Lin, comment by LI Wen. 18 | % 19 | 20 | if nargin < 3 21 | df = 0; 22 | end 23 | [dimx, ndata] = size(x); 24 | [dimc, ncentres] = size(c); 25 | if dimx ~= dimc 26 | error('Data dimension does not match dimension of centres') 27 | end 28 | 29 | n2 = (ones(ncentres, 1) * sum((x.^2), 1))' + ... 30 | ones(ndata, 1) * sum((c.^2),1) - ... 31 | 2.*(x'*(c)); 32 | % make sure result is all real 33 | n2 = real(full(n2)); 34 | n2(n2<0) = 0; 35 | % force 0 on the diagonal? 36 | if (df==1) 37 | n2 = n2.*(1-eye(size(n2))); 38 | end -------------------------------------------------------------------------------- /utils/getKernel.m: -------------------------------------------------------------------------------- 1 | function [kernel, param] = getKernel(featuresA, featuresB, param) 2 | % compute a kernel, it can be K(A, A) or K(A, B) 3 | % Usage: 4 | % 1. Compute the kernel between different examples, e.g. in testing: 5 | % [kernel param] = getKernel(featuresA, featuresB, param) 6 | % 2. Compute the kernel between the sample exaples, e.g. in training: 7 | % [kernel param] = getKernel(features, param) 8 | % 9 | % Input: 10 | % featuresA: d-by-m matrix, d is feature dimension, m is the number of 11 | % samples 12 | % featuresB: d-by-n matrix, d is feature dimension, m is the number of 13 | % samples 14 | % param: -kernel_type: 15 | % 'linear', 'gaussian' 16 | % -(gaussian)ratio, sigma, gamma 17 | % 18 | % Output: 19 | % kernel: m-by-n or m-by-m matrix 20 | % param: depends on the kernel type 21 | % 22 | % by LI Wen on Jan 04, 2012 23 | % 24 | 25 | if (nargin < 2) 26 | error('Not enough inputs!\n'); 27 | elseif (nargin < 3) 28 | param = featuresB; 29 | featuresB = featuresA; 30 | end 31 | 32 | if(~isfield(param, 'kernel_type')) 33 | error('Please specify the kernel_type!\n'); 34 | end 35 | 36 | % kernel = []; 37 | kt = lower(param.kernel_type); 38 | if(strcmp(kt, 'linear')) 39 | kernel = return_LinearKernel(featuresA, featuresB); 40 | elseif(strcmp(kt, 'exp_chisquare')) 41 | kernel = return_expChiSquareKernel(featuresA, featuresB); 42 | elseif(strcmp(kt, 'exp_chisquare2')) 43 | [kernel, param] = return_expChiSquareKernel2(featuresA, featuresB, param); 44 | elseif(strcmp(kt, 'chisquare')) 45 | kernel = return_ChiSquareKernel(featuresA, featuresB); 46 | elseif(strcmp(kt, 'gaussian')) 47 | [kernel, param] = return_GaussianKernel(featuresA, featuresB, param); 48 | else 49 | error('Unknown type of kernel: %s.\n', param.kernel_type); 50 | end -------------------------------------------------------------------------------- /utils/return_GaussianKernel.m: -------------------------------------------------------------------------------- 1 | function [K, param] = return_GaussianKernel(featuresA, featuresB, param) 2 | 3 | [dA nA] = size(featuresA); 4 | [dB nB] = size(featuresB); 5 | 6 | assert(dA == dB); 7 | 8 | sq_dist = L2_distance_2(featuresA, featuresB); 9 | 10 | if(~isfield(param, 'ratio') || param.ratio == 0) 11 | param.ratio = 1; 12 | end 13 | 14 | if(~isfield(param, 'gamma') || param.gamma == 0) 15 | if (~isfield(param, 'sigma') || param.sigma == 0) 16 | % use default sigma 17 | tmp = mean(mean(sq_dist))*0.5; 18 | param.sigma = sqrt(tmp); 19 | end 20 | % compute gamma according to param.ratio and param.sigma 21 | if(param.sigma == 0) 22 | param.gamma = 0; 23 | else 24 | param.gamma = 1/(2*param.ratio*param.sigma^2); 25 | end 26 | else 27 | % already specify gamma, then sigma and ratio set to 0. 28 | if(~isfield(param, 'sigma')) 29 | param.sigma = 0; 30 | end 31 | if(~isfield(param, 'ratio')) 32 | param.ratio = 0; 33 | end 34 | end 35 | 36 | K = exp(-sq_dist*param.gamma); 37 | 38 | --------------------------------------------------------------------------------