├── README.md
├── data
└── mnist_plus.mat
├── demo_mnist_svmplus.m
├── solve_l2svmplus_kernel.m
├── svm_plus_train.m
├── svmtrain.mexw64
└── utils
├── L1_normalization.m
├── L2_distance_2.m
├── getKernel.m
└── return_GaussianKernel.m
/README.md:
--------------------------------------------------------------------------------
1 | # svmplus_matlab
2 | An implementation of SVM+ with MATLAB QP solver. It has also been tested with MOSEK QP solver.
3 |
4 | A faster implementation of kernel SVM+ based on a new SVM+ formulation is also released. You need libsvm to run it.
5 |
6 | If you feel it is useful, please cite the following papers:
7 |
8 | Wen Li, Dengxin Dai, Mingkui Tan, Dong Xu, and Luc Van Gool, “Fast Algorithms for Linear and Kernel SVM+,” IEEE International Conference on Computer Vision and Pattern Recognition(CVPR),2016
9 |
10 | For any question, please contact Wen Li via liwenbnu@gmail.com.
11 |
12 | ------------------------
13 | Dependencies
14 |
15 | The libsvm library is needed. I have included a compiled mex file (Windows64 version). For other platform, please
16 | * download the latest libsvm package, and run "\/matlab/make.m" to comiple the mex file compatiable to your OS.
17 | * Put the obtained mex file in the folder of libsvm+, or add the folder containing mex file to your matlab paths at the beginning of demo_mnist_svmplus.m
18 | addpath('\/matlab/')
19 |
20 | ------------------------
21 | How to use
22 |
23 | Simple. Run "demo_mnist_svmplus.m", and see the results^_^.
24 |
25 | ------------------------
26 | Copyright
27 |
28 | Non-commercial use only. All rights reserved.
29 |
--------------------------------------------------------------------------------
/data/mnist_plus.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenli-vision/svmplus_matlab/d3e3728cac84dc463622cc85c27842f6d3959198/data/mnist_plus.mat
--------------------------------------------------------------------------------
/demo_mnist_svmplus.m:
--------------------------------------------------------------------------------
1 | clear; clc;
2 | addpath('./utils');
3 | % load data
4 | load('./data/mnist_plus.mat');
5 |
6 | % preprocessing data with L1-normalization
7 | train_features = L1_normalization(train_features');
8 | test_features = L1_normalization(test_features');
9 | train_PFfeatures = L1_normalization(train_PFfeatures');
10 |
11 | train_labels(train_labels==5) = 1;
12 | train_labels(train_labels~=1) = -1;
13 | test_labels(test_labels==5) = 1;
14 | test_labels(test_labels~=1) = -1;
15 |
16 | % calculate kernels
17 | kparam = struct();
18 | kparam.kernel_type = 'gaussian';
19 | [K, train_kparam] = getKernel(train_features, kparam);
20 | testK = getKernel(test_features, train_features, train_kparam);
21 |
22 | kparam = struct();
23 | kparam.kernel_type = 'gaussian';
24 | tK = getKernel(train_PFfeatures, kparam);
25 |
26 | % ================ train SVM+ ====================
27 | % parameters could be obtained via validation
28 | svmplus_param.svm_C = 1;
29 | svmplus_param.gamma = 1;
30 | tic;
31 | model = svm_plus_train(train_labels, K, tK, svmplus_param);
32 | tt = toc;
33 | decs = testK(:, model.SVs) * model.sv_coef - model.rho;
34 | acc = sum((2*(decs>0)-1) == test_labels)/length(test_labels);
35 |
36 | fprintf(2, 'Orignal SVM+, time = %f, Acc = %.4f.\n', tt, acc);
37 |
38 | % ================ train l2-SVM+ ====================
39 | % parameters could be obtained via validation
40 | tic;
41 | model = solve_l2svmplus_kernel(train_labels, K, tK, svmplus_param.svm_C, svmplus_param.gamma);
42 | tt = toc;
43 | alpha = zeros(length(train_labels), 1);
44 | alpha(model.SVs) = full(model.sv_coef);
45 | alpha = abs(alpha);
46 | decs = (testK + 1)*(alpha.*train_labels);
47 | acc = sum((2*(decs>0)-1) == test_labels)/length(test_labels);
48 |
49 | fprintf(2, 'L2-SVM+, time=%f, Acc = %.4f.\n', tt, acc);
50 |
51 |
--------------------------------------------------------------------------------
/solve_l2svmplus_kernel.m:
--------------------------------------------------------------------------------
1 | function model = solve_l2svmplus_kernel(labels, K, tK, C, gamma)
2 | n = length(labels);
3 | uy = unique(labels);
4 |
5 | assert(size(K, 1) == n);
6 | assert(size(K, 2) == n);
7 | assert(size(tK, 1) == n);
8 | assert(size(tK, 2) == n);
9 |
10 | K = K + 1; % append bias
11 | tK = tK + 1; % append bias
12 |
13 | H = eye(n) - inv(eye(n) + C/gamma*tK);
14 | H = 1/C * H;
15 | opt = ['-s 2 -t 4 -n ', num2str(1/n)];
16 | if length(uy) == 2
17 | if uy(1)== -1 && uy(2) == 1
18 | uy(1) = 1;
19 | uy(2) = -1;
20 | end
21 | y = -ones(n, 1);
22 | y(labels==uy(1)) = 1;
23 | Q = K.*(y*y') + H;
24 | model = svmtrain(ones(n, 1), [(1:n)' Q], opt);
25 | else
26 | model = cell(0);
27 | for i = 1:length(uy)
28 | y = -ones(n, 1);
29 | y(labels==uy(i)) = 1;
30 | Q = K.*(y*y') + H;
31 | model{i} = svmtrain(ones(n, 1), [(1:n)' Q], opt);
32 | end
33 | end
34 |
35 | end
--------------------------------------------------------------------------------
/svm_plus_train.m:
--------------------------------------------------------------------------------
1 | function models = svm_plus_train(labels, K, tK, param)
2 | % models = svm_plus_train(labels, K, tK, param)
3 | % Training SVM+.
4 | % Inputs:
5 | % - labels: n-by-1 vecotr, source labels (support multi-class)
6 | % - K: n-by-n kernel matrix, feature space
7 | % - tK: n-by-n kernel matrix, privileged information space
8 | % - param:
9 | % - svm_C: C in SVM
10 | % - gamma: gamma in SVM+
11 | % Outputs:
12 | % - models: m-by-1 cell, each is an SVM model, ordered in ascending order
13 | % of labels.
14 | %
15 | % LI Wen, on July 31, 2013
16 | % -----------------------------------------------
17 | % update the calculation of rho, by Li Wen on Aug 2, 2013
18 | %
19 |
20 | % -----------------------------------------------
21 | % parameters
22 | svm_C = param.svm_C;
23 | gamma = param.gamma;
24 |
25 | cates = unique(labels);
26 | n_class = length(cates);
27 | n = length(labels);
28 |
29 | DISP = 1; % if display the messages or not
30 | if(DISP)
31 | fprintf('Training SVM+...\n');
32 | fprintf('Number of Categoris: %d\n', n_class);
33 | end
34 |
35 | if(n_class == 2)
36 | n_class = 1;
37 | cates = [1 -1]; % put postive before, so Label(1) == 1 always hold
38 | end
39 |
40 | models = cell(n_class, 1);
41 | fprintf('In total %d classes:\n', n_class);
42 | for ci = 1:n_class
43 | y = (labels == cates(ci))*2 - 1;
44 | fprintf('Class#%d training...\t', ci);
45 | % -----------------------------------------------
46 | % solving in SVM+ form
47 | tt = tic;
48 | H = 1/gamma*[tK tK; tK tK] + [K.*(y*y') zeros(n, n); zeros(n, 2*n)];
49 | tmp = -svm_C/gamma*sum(tK, 2);
50 | f = [-1 + tmp; tmp];
51 | A1 = [];
52 | b1 = [];
53 | A2 = [y' zeros(1, n); ones(1, 2*n)]; %a'y = 0; 1'(a + z) = 1'c;
54 | b2 = [0; svm_C*n];
55 | lb = zeros(2*n, 1);
56 | ub = [];
57 | opt.Display = 'off';
58 | x = quadprog(H, f, A1, b1, A2, b2, lb, ub, [], opt);
59 |
60 | fprintf('Train Time = %f\n', toc(tt));
61 |
62 | alpha = x(1:n);
63 | zeta = x(n+1:end);
64 | % ------------------ rho --------------------
65 | % tmp_idx =(alpha>1e-10)&(alpha 1e-10);
74 | if(all(~tilde_idx)) % no zeta > 0
75 | tilde_b = max(-tilde_dec);
76 | else
77 | tilde_b = mean(-tilde_dec(tilde_idx));
78 | end
79 |
80 | dec = K*(alpha.*y);
81 | dec_2 = -dec + y.*(1 - tilde_dec - tilde_b);
82 | tmp_idx =(alpha>1e-10);
83 | if(all(~tmp_idx)) % no alpha > 0
84 | lb = max(dec_2(y>0));
85 | ub = min(dec_2(y<0));
86 | b = (lb + ub)/2;
87 | else
88 | b = mean(dec_2(tmp_idx));
89 | end
90 | if(isnan(b))
91 | error('b is NaN.\n');
92 | end
93 | rho = - b;
94 |
95 | index = find(alpha > 1e-10);
96 | sv_coef = (alpha.*y);
97 |
98 | % save models, similar as libsvm models
99 | models{ci}.sv_coef = sv_coef(index);
100 | models{ci}.SVs = index;
101 | models{ci}.Label(1) = 1;
102 | models{ci}.rho = rho;
103 |
104 | % -------------------------------------------
105 | % others, for debugging
106 | models{ci}.x = x;
107 | models{ci}.y = labels;
108 | models{ci}.param = param;
109 | models{ci}.alpha = alpha;
110 | models{ci}.beta = zeta;
111 | models{ci}.b = b;
112 | models{ci}.tilde_b = tilde_b;
113 |
114 | end
115 | if(n_class == 1)
116 | models = models{1};
117 | end
118 |
--------------------------------------------------------------------------------
/svmtrain.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wenli-vision/svmplus_matlab/d3e3728cac84dc463622cc85c27842f6d3959198/svmtrain.mexw64
--------------------------------------------------------------------------------
/utils/L1_normalization.m:
--------------------------------------------------------------------------------
1 | function X = L1_normalization(X)
2 | % normalize each sample to be a unit vector
3 | %
4 | % X d-by-n matrix
5 | d = size(X, 1);
6 | Xnorm = sum(abs(X));
7 | Xnorm(Xnorm==0) = 1;
8 | Xnorm = 1./Xnorm;
9 | X = X.*repmat(Xnorm, [d, 1]);
--------------------------------------------------------------------------------
/utils/L2_distance_2.m:
--------------------------------------------------------------------------------
1 | function n2 = L2_distance_2(x,c,df)
2 | % D2 = L2_distance_2(a, b, df)
3 | %
4 | % Get the square of L2_distance of two sets of samples. It is useful when
5 | % you only need square of L2_distance, for example, in Guassian Kernel.
6 | % This function is faster and lower memory requirement compared with the
7 | % funtion L2_distance by Roland Bunschoten et al.
8 | %
9 | % Input:
10 | % a -- d-by-m matrix, i.e. m samples of d-dimension.
11 | % b -- d-by-n matrix;
12 | % df -- df = 1, force diagnal to zero, otherwise not.
13 | %
14 | % Output:
15 | % D2 -- m-by-n matrix, the square of L2_distance.
16 | %
17 | % Code from CHEN Lin, comment by LI Wen.
18 | %
19 |
20 | if nargin < 3
21 | df = 0;
22 | end
23 | [dimx, ndata] = size(x);
24 | [dimc, ncentres] = size(c);
25 | if dimx ~= dimc
26 | error('Data dimension does not match dimension of centres')
27 | end
28 |
29 | n2 = (ones(ncentres, 1) * sum((x.^2), 1))' + ...
30 | ones(ndata, 1) * sum((c.^2),1) - ...
31 | 2.*(x'*(c));
32 | % make sure result is all real
33 | n2 = real(full(n2));
34 | n2(n2<0) = 0;
35 | % force 0 on the diagonal?
36 | if (df==1)
37 | n2 = n2.*(1-eye(size(n2)));
38 | end
--------------------------------------------------------------------------------
/utils/getKernel.m:
--------------------------------------------------------------------------------
1 | function [kernel, param] = getKernel(featuresA, featuresB, param)
2 | % compute a kernel, it can be K(A, A) or K(A, B)
3 | % Usage:
4 | % 1. Compute the kernel between different examples, e.g. in testing:
5 | % [kernel param] = getKernel(featuresA, featuresB, param)
6 | % 2. Compute the kernel between the sample exaples, e.g. in training:
7 | % [kernel param] = getKernel(features, param)
8 | %
9 | % Input:
10 | % featuresA: d-by-m matrix, d is feature dimension, m is the number of
11 | % samples
12 | % featuresB: d-by-n matrix, d is feature dimension, m is the number of
13 | % samples
14 | % param: -kernel_type:
15 | % 'linear', 'gaussian'
16 | % -(gaussian)ratio, sigma, gamma
17 | %
18 | % Output:
19 | % kernel: m-by-n or m-by-m matrix
20 | % param: depends on the kernel type
21 | %
22 | % by LI Wen on Jan 04, 2012
23 | %
24 |
25 | if (nargin < 2)
26 | error('Not enough inputs!\n');
27 | elseif (nargin < 3)
28 | param = featuresB;
29 | featuresB = featuresA;
30 | end
31 |
32 | if(~isfield(param, 'kernel_type'))
33 | error('Please specify the kernel_type!\n');
34 | end
35 |
36 | % kernel = [];
37 | kt = lower(param.kernel_type);
38 | if(strcmp(kt, 'linear'))
39 | kernel = return_LinearKernel(featuresA, featuresB);
40 | elseif(strcmp(kt, 'exp_chisquare'))
41 | kernel = return_expChiSquareKernel(featuresA, featuresB);
42 | elseif(strcmp(kt, 'exp_chisquare2'))
43 | [kernel, param] = return_expChiSquareKernel2(featuresA, featuresB, param);
44 | elseif(strcmp(kt, 'chisquare'))
45 | kernel = return_ChiSquareKernel(featuresA, featuresB);
46 | elseif(strcmp(kt, 'gaussian'))
47 | [kernel, param] = return_GaussianKernel(featuresA, featuresB, param);
48 | else
49 | error('Unknown type of kernel: %s.\n', param.kernel_type);
50 | end
--------------------------------------------------------------------------------
/utils/return_GaussianKernel.m:
--------------------------------------------------------------------------------
1 | function [K, param] = return_GaussianKernel(featuresA, featuresB, param)
2 |
3 | [dA nA] = size(featuresA);
4 | [dB nB] = size(featuresB);
5 |
6 | assert(dA == dB);
7 |
8 | sq_dist = L2_distance_2(featuresA, featuresB);
9 |
10 | if(~isfield(param, 'ratio') || param.ratio == 0)
11 | param.ratio = 1;
12 | end
13 |
14 | if(~isfield(param, 'gamma') || param.gamma == 0)
15 | if (~isfield(param, 'sigma') || param.sigma == 0)
16 | % use default sigma
17 | tmp = mean(mean(sq_dist))*0.5;
18 | param.sigma = sqrt(tmp);
19 | end
20 | % compute gamma according to param.ratio and param.sigma
21 | if(param.sigma == 0)
22 | param.gamma = 0;
23 | else
24 | param.gamma = 1/(2*param.ratio*param.sigma^2);
25 | end
26 | else
27 | % already specify gamma, then sigma and ratio set to 0.
28 | if(~isfield(param, 'sigma'))
29 | param.sigma = 0;
30 | end
31 | if(~isfield(param, 'ratio'))
32 | param.ratio = 0;
33 | end
34 | end
35 |
36 | K = exp(-sq_dist*param.gamma);
37 |
38 |
--------------------------------------------------------------------------------