├── setup.m
├── LICENSE
├── Data
    ├── PenDigit.mat
    ├── abalone.mat
    ├── original.png
    ├── Letter15K.mat
    ├── mushrooms.mat
    └── rbf.m
├── Sketch
    ├── GaussianProjection.m
    ├── LeverageScoreSampling.m
    ├── srht.m
    └── CountSketch.m
├── SPSD_Sketching
    ├── GPR
    │   ├── gprTest.m
    │   ├── gprTrain.m
    │   ├── gprTestCUR.m
    │   └── demo_gpr.m
    ├── KPCA
    │   ├── kpcaTest.m
    │   ├── kpcaTestCUR.m
    │   ├── kpcaTrain.m
    │   └── demo_kpca_knn.m
    ├── Spectral Clustering
    │   ├── demo_spectralclustering.m
    │   ├── SpectralClusteringFaster.m
    │   ├── accuracy.m
    │   ├── nmi.m
    │   └── hungarian.m
    ├── Nystrom.m
    └── spsdFaster.m
├── SVD
    ├── ksvdPrototype.m
    ├── BlockLanczos.m
    └── ksvdFaster.m
├── CUR
    ├── curPrototype.m
    ├── curFaster.m
    ├── demo_cur.m
    └── curFasterKernel.m
├── LSR
    ├── InexactCurTypeRegression.m
    └── InexactLSR.m
└── README.md


/setup.m:
--------------------------------------------------------------------------------
1 | addpath(genpath(pwd));


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/LICENSE


--------------------------------------------------------------------------------
/Data/PenDigit.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/PenDigit.mat


--------------------------------------------------------------------------------
/Data/abalone.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/abalone.mat


--------------------------------------------------------------------------------
/Data/original.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/original.png


--------------------------------------------------------------------------------
/Data/Letter15K.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/Letter15K.mat


--------------------------------------------------------------------------------
/Data/mushrooms.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/mushrooms.mat


--------------------------------------------------------------------------------
/Sketch/GaussianProjection.m:
--------------------------------------------------------------------------------
1 | function [C] = GaussianProjection(A, c)
2 | n = size(A, 2);
3 | S = randn(n, c) / sqrt(c);
4 | C = A * S;
5 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/GPR/gprTest.m:
--------------------------------------------------------------------------------
1 | function [ytest] = gprTest(Xtrain, Xtest, sigma, w)
2 | Ktest = rbf(Xtest, Xtrain, sigma);
3 | ytest = Ktest * w;
4 | end


--------------------------------------------------------------------------------
/SVD/ksvdPrototype.m:
--------------------------------------------------------------------------------
1 | function [Utilde, Stilde, Vtilde] = ksvdPrototype(A, k, s)
2 | C = CountSketch(A, s);
3 | [Q, R] = qr(C, 0);
4 | [Ubar, Stilde, Vtilde] = svds(Q' * A, k);
5 | Utilde = Q * Ubar;
6 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/KPCA/kpcaTest.m:
--------------------------------------------------------------------------------
1 | function [featuretest] = kpcaTest(Xtrain, Xtest, sigma, U, lambda)
2 | Ktest = rbf(Xtest, Xtrain, sigma);
3 | U = bsxfun(@times, U, (1 ./ sqrt(lambda))');
4 | featuretest = Ktest * U;
5 | end


--------------------------------------------------------------------------------
/CUR/curPrototype.m:
--------------------------------------------------------------------------------
1 | function [C, U, R] = curPrototype(A, c, r)
2 | [m, n] = size(A);
3 | SC = sort(randsample(n, c));
4 | SR = sort(randsample(m, r));
5 | C = A(:, SC);
6 | R = A(SR, :);
7 | U = pinv(C) * A * pinv(R);
8 | 
9 | end


--------------------------------------------------------------------------------
/LSR/InexactCurTypeRegression.m:
--------------------------------------------------------------------------------
1 | function [Xtilde] = InexactCurTypeRegression(C, R, A, sc, sr)
2 | [~, idxC] = LeverageScoreSampling(C', sc);
3 | [~, idxR] = LeverageScoreSampling(R, sr);
4 | Xtilde = pinv(C(idxC, :)) * A(idxC, idxR) * pinv(R(:, idxR));
5 | end


--------------------------------------------------------------------------------
/LSR/InexactLSR.m:
--------------------------------------------------------------------------------
1 | function [xtilde] = InexactLSR(A, b, s)
2 | d = size(A, 2);
3 | sketch = (CountSketch([A, b]', s))';
4 | Asketch = sketch(:, 1:d); % Asketch = S' * A
5 | bsketch = sketch(:, end); % bsketch = S' * b
6 | xtilde = Asketch \ bsketch;
7 | end
8 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/GPR/gprTrain.m:
--------------------------------------------------------------------------------
1 | function [w] = gprTrain(Xtrain, ytrain, sigma, alpha)
2 | l = 100; % can be tuned
3 | L = Nystrom(Xtrain, sigma, l); % K is approximated by L * L'
4 | l = size(L, 2);
5 | w = L' * ytrain;
6 | w = (alpha * eye(l) + L' * L) \ w;
7 | w = ytrain - L * w;
8 | w = w / alpha;
9 | end


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RandMatrixMatlab
 2 | Simple MATLAB code for randomized matrix computation.
 3 | 
 4 | First run ``setup''.
 5 | 
 6 | The detailed descriptions are in the article:
 7 | "A Practical Guide to Randomized Matrix Computations with MATLAB Implementations"
 8 | http://arxiv.org/abs/1505.07570
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/GPR/gprTestCUR.m:
--------------------------------------------------------------------------------
1 | function [ytest] = gprTestCUR(Xtrain, Xtest, sigma, w)
2 | c = max(100, ceil(size(Xtrain, 1) / 20)); % can be tuned
3 | r = max(100, ceil(size(Xtest, 1) / 20)); % can be tuned
4 | [C, Utilde, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r);
5 | ytest = C * (Utilde * (R * w));
6 | end


--------------------------------------------------------------------------------
/Sketch/LeverageScoreSampling.m:
--------------------------------------------------------------------------------
1 | function [C, idx] = LeverageScoreSampling(A, s)
2 | n = size(A, 2);
3 | [~, ~, V] = svd(A, 'econ');
4 | leveragescores = sum(V.^2, 2);
5 | prob = leveragescores / sum(leveragescores);
6 | idx = randsample(n, s, true, prob);
7 | idx = unique(idx); % eliminate duplicates
8 | C = A(:, idx);
9 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/KPCA/kpcaTestCUR.m:
--------------------------------------------------------------------------------
1 | function [featuretest] = kpcaTestCUR(Xtrain, Xtest, sigma, U, lambda)
2 | c = max(100, ceil(size(Xtrain, 1) / 20)); % can be tuned
3 | r = max(100, ceil(size(Xtest, 1) / 20)); % can be tuned
4 | [C, Utilde, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r);
5 | U = bsxfun(@times, U, (1 ./ sqrt(lambda))');
6 | featuretest = C * (Utilde * (R * U));
7 | end


--------------------------------------------------------------------------------
/Sketch/srht.m:
--------------------------------------------------------------------------------
 1 | function [C] = srht(A, s)
 2 | n = size(A, 2);
 3 | sgn = randi(2, [1, n]) * 2 - 3; % one half are +1 and the rest are -1
 4 | A = bsxfun(@times, A, sgn); % flip the signs of each column w.p. 50%
 5 | n = 2^(ceil(log2(n))); 
 6 | C = (fwht(A', n))'; % Hadarmard transform
 7 | idx = sort(randsample(n, s));
 8 | C = C(:, idx); % subsampling
 9 | C = C * (n / sqrt(s));
10 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/Spectral Clustering/demo_spectralclustering.m:
--------------------------------------------------------------------------------
1 | % demo_spectralclustering
2 | load('.\Data\mushrooms.mat')
3 | X = full(X);
4 | sigma = 3.0; % scaling parameter of the RBF kernel
5 | k = 2; % number of classes
6 | [labels] = SpectralClusteringFaster(X, sigma, k);
7 | ac = accuracy(labels, y);
8 | NMI = nmi(labels, y);
9 | display(['Accuracy: ', num2str(ac), '    nmi: ', num2str(NMI)]);


--------------------------------------------------------------------------------
/SPSD_Sketching/Nystrom.m:
--------------------------------------------------------------------------------
 1 | function [L] = Nystrom(X, sigma, s)
 2 | k = ceil(0.8 * s); % can be tuned
 3 | n = size(X, 1);
 4 | S = sort(randsample(n, s)); % uniform sampling
 5 | C = rbf(X, X(S, :), sigma); % C = K(:, S)
 6 | W = C(S, :);
 7 | [UW, SW, ~] = svd(W);
 8 | SW = diag(SW);
 9 | SW = 1 ./ sqrt(SW(1:k));
10 | UW = bsxfun(@times, UW(:, 1:k), SW');
11 | L = C * UW; % K is approximated by L * L'
12 | end


--------------------------------------------------------------------------------
/Data/rbf.m:
--------------------------------------------------------------------------------
 1 | function [K] = rbf(X1, X2, sigma)
 2 | % input:
 3 | %   X1: n1-by-d
 4 | %   X2: n2-by-d
 5 | % output:
 6 | %   K: n1-by-n2
 7 | %   K(i,j) = exp( -0.5*sigma^2 * norm(X1(i, :) - X2(j, :))^2 )
 8 | 
 9 | 
10 | K = X1 * X2';
11 | X1_row_sq = sum(X1.^2, 2) / 2;
12 | X2_row_sq = sum(X2.^2, 2) / 2;
13 | K = bsxfun(@minus, K, X1_row_sq);
14 | K = bsxfun(@minus, K, X2_row_sq');
15 | K = K / (sigma^2);
16 | K = exp(K);
17 | 
18 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/KPCA/kpcaTrain.m:
--------------------------------------------------------------------------------
 1 | function [U, lambda, featuretrain] = kpcaTrain(Xtrain, sigma, k)
 2 | s = k * 10; % can be tuned
 3 | [QC, Z] = spsdFaster(Xtrain, sigma, s); % QC has orthogonal columns
 4 | clear Xtrain
 5 | [UZ, SZ, ~] = svd(Z);
 6 | U = QC * UZ(:, 1:k); % U contains the top k eigenvectors
 7 | lambda = diag(SZ);
 8 | lambda = lambda(1:k); % lambda is the vector containing the top k eigenvalues
 9 | featuretrain = bsxfun(@times, U, (sqrt(lambda))');
10 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/spsdFaster.m:
--------------------------------------------------------------------------------
 1 | function [QC, Z] = spsdFaster(X, sigma, s)
 2 | p = 4 * s; % can be tuned
 3 | n = size(X, 1);
 4 | S = sort(randsample(n, s)); % uniform sampling
 5 | C = rbf(X, X(S, :), sigma); 
 6 | [QC, ~] = qr(C, 0);
 7 | q = sum(QC.^2, 2); % the sampling probability
 8 | q = q / sum(q);
 9 | P = randsample(n, p,true, q);
10 | P = unique([P; S]); % enforce P contains S
11 | PQCinv = pinv(QC(P, :));
12 | Ksub = rbf(X(P, :), X(P, :), sigma);
13 | Z = PQCinv * Ksub * PQCinv';
14 | end


--------------------------------------------------------------------------------
/CUR/curFaster.m:
--------------------------------------------------------------------------------
 1 | function [C, U, R] = curFaster(A, c, r)
 2 | pc = 2 * (r + c); % can be tuned
 3 | pr = 2 * (r + c); % can be tuned
 4 | [m, n] = size(A);
 5 | SC = sort(randsample(n, c));
 6 | SR = sort(randsample(m, r));
 7 | C = A(:, SC);
 8 | R = A(SR, :);
 9 | PC = sort(randsample(m, pc));
10 | PR = sort(randsample(n, pr));
11 | PC = unique([PC; SR]); % enforce PC to contain SR
12 | PR = unique([PR; SC]); % enforce PR to contain SC
13 | U = pinv(C(PC, :)) * A(PC, PR) * pinv(R(:, PR));
14 | 
15 | end


--------------------------------------------------------------------------------
/SVD/BlockLanczos.m:
--------------------------------------------------------------------------------
 1 | function [U, S, V] = BlockLanczos(A, k, q)
 2 | s = 2 * k; % can be tuned
 3 | [m, n] = size(A);
 4 | C = A * randn(n, s);
 5 | Krylov = zeros(m, s * q);
 6 | Krylov(:, 1:s) = C;
 7 | for i = 2: q
 8 |     C = A' * C;
 9 |     C = A * C;
10 |     [C, ~] = qr(C, 0); % optional
11 |     Krylov(:, (i-1)*s+1: i*s) = C;
12 | end
13 | [Q, ~] = qr(Krylov, 0);
14 | [Ubar, S, V] = svd(Q' * A, 'econ');
15 | Ubar = Ubar(:, 1:k);
16 | S = S(1:k, 1:k);
17 | V = V(:, 1:k);
18 | U = Q * Ubar;
19 | end


--------------------------------------------------------------------------------
/Sketch/CountSketch.m:
--------------------------------------------------------------------------------
 1 | function [C] = CountSketch(A, s)
 2 | [m, n] = size(A);
 3 | sgn = randi(2, [1, n]) * 2 - 3; % one half are +1 and the rest are -1
 4 | A = bsxfun(@times, A, sgn); % flip the signs of each column w.p. 50%
 5 | ll = randsample(s, n, true); % sample n items from [s] with replacement
 6 | C = zeros(m, s); % initialize C
 7 | for j = 1: n
 8 |     C(:, ll(j)) = C(:, ll(j)) + A(:, j);
 9 | end
10 | % for j = 1: c
11 | %     idx = (ll == j); % find the columns of A which should be added to C(:, j)
12 | %     C(:, j) = sum(A(:, idx), 2);
13 | % end
14 | end


--------------------------------------------------------------------------------
/CUR/demo_cur.m:
--------------------------------------------------------------------------------
 1 | % demo_CUR
 2 | 
 3 | % ----------- parameters ----------- %
 4 | c = 100;
 5 | r = 100;
 6 | 
 7 | % ----------- load data ----------- %
 8 | A = imread('./Data/original.png');
 9 | A = double(A) / 256;
10 | 
11 | % ----------- CUR ----------- %
12 | [C1, U1, R1] = curPrototype(A, c, r);
13 | [C2, U2, R2] = curFaster(A, c, r);
14 | 
15 | 
16 | 
17 | % ----------- show images ----------- %
18 | subplot(1, 3, 1), imshow(A), title('A')
19 | subplot(1, 3, 2), imshow(C1 * U1 * R1), title('C * Ustar * R')
20 | subplot(1, 3, 3), imshow(C2 * U2 * R2), title('C * Utilde * R')


--------------------------------------------------------------------------------
/CUR/curFasterKernel.m:
--------------------------------------------------------------------------------
 1 | function [C, U, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r)
 2 | pc = 2 * (r + c); % can be tuned
 3 | pr = 2 * (r + c); % can be tuned
 4 | m = size(Xtest, 1);
 5 | n = size(Xtrain, 1);
 6 | SC = sort(randsample(n, c));
 7 | SR = sort(randsample(m, r));
 8 | C = rbf(Xtest, Xtrain(SC, :), sigma);
 9 | R = rbf(Xtest(SR, :), Xtrain, sigma);
10 | PC = sort(randsample(m, pc));
11 | PR = sort(randsample(n, pr));
12 | PC = unique([PC; SR]); % enforce PC to contain SR
13 | PR = unique([PR; SC]); % enforce PR to contain SC
14 | Kblock = rbf(Xtest(PC, :), Xtrain(PR, :), sigma);
15 | U = pinv(C(PC, :)) * Kblock * pinv(R(:, PR));
16 | end


--------------------------------------------------------------------------------
/SVD/ksvdFaster.m:
--------------------------------------------------------------------------------
 1 | function [Utilde, Stilde, Vtilde] = ksvdFaster(A, k, s, p1, p2)
 2 | n = size(A, 2);
 3 | C = CountSketch(A, s);
 4 | A = [A, C];
 5 | A = A';
 6 | sketch = CountSketch(A, p1);
 7 | clear A % A (m-by-n) will not be used
 8 | sketch = GaussianProjection(sketch, p2);
 9 | sketch = sketch';
10 | L = sketch(:, 1:n);
11 | D = sketch(:, n+1:end);
12 | clear sketch % sketch (p2-by-(n+c)) will not be used
13 | [QD, RD] = qr(D, 0);
14 | [Ubar, Sbar, Vbar] = svds(QD' * L, k);
15 | clear L % L (p2-by-n) will not be used
16 | C = C * (pinv(RD) * (Ubar * Sbar));
17 | [Utilde, Stilde, Vhat] = svd(C, 'econ');
18 | Vtilde = Vbar * Vhat;
19 | 
20 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/Spectral Clustering/SpectralClusteringFaster.m:
--------------------------------------------------------------------------------
 1 | function [labels] = SpectralClusteringFaster(X, sigma, k)
 2 | s = k * 10; % can be tuned
 3 | n = size(X, 1);
 4 | [QC, Z] = spsdFaster(X, sigma, s); % K is approximated by QC * Z * QC'
 5 | [UZ, SZ, ~] = svd(Z);
 6 | SZ = sqrt(diag(SZ));
 7 | UZ = bsxfun(@times, UZ, SZ'); % now Z = UZ * UZ'
 8 | L = QC * UZ; % now K is approximated by L * L'
 9 | d = ones(n, 1);
10 | d = L * (L' * d); % diagonal of the degree matrix D
11 | d = 1 ./ sqrt(d);
12 | L = bsxfun(@times, L, d); % now G is approximated by L*L'
13 | [U, ~, ~] = svd(L, 'econ');
14 | U = U(:, 1:k);
15 | U = normr(U); % normalize the rows of U
16 | labels = kmeans(U, k, 'Replicates', 3);
17 | end


--------------------------------------------------------------------------------
/SPSD_Sketching/GPR/demo_gpr.m:
--------------------------------------------------------------------------------
 1 | % demo_gpr
 2 | 
 3 | load('.\Data\abalone.mat')
 4 | X = full(X);
 5 | n = size(X, 1);
 6 | 
 7 | % --------------------- parameters --------------------- %
 8 | sigma = 1.0; % scaling parameter of the RBF kernel
 9 | alpha = 1.0; % indicating the noise in the observation
10 | 
11 | % ------- randomly partition training-test data ------- %
12 | ntrain = ceil(n * 0.8); % number of training data;
13 | idx = randperm(n);
14 | X = X(idx, :);
15 | y = y(idx, :);
16 | Xtrain = X(1: ntrain, :);
17 | ytrain = y(1: ntrain);
18 | Xtest = X(ntrain + 1:end, :);
19 | ytest = y(ntrain + 1:end);
20 | 
21 | % ----------------- GPR predictive mean ----------------- %
22 | w = gprTrain(Xtrain, ytrain, sigma, alpha);
23 | labels = gprTest(Xtrain, Xtest, sigma, w);
24 | %labels = gprTestCUR(Xtrain, Xtest, sigma, w); % use CUR to speedup
25 | error = norm(labels - ytest) / norm(ytest);
26 | display(['error ratio: ', num2str(error)]);
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/Spectral Clustering/accuracy.m:
--------------------------------------------------------------------------------
 1 | function score = accuracy(true_labels, cluster_labels)
 2 | %ACCURACY Compute clustering accuracy using the true and cluster labels and
 3 | %   return the value in 'score'.
 4 | %
 5 | %   Input  : true_labels    : N-by-1 vector containing true labels
 6 | %            cluster_labels : N-by-1 vector containing cluster labels
 7 | %
 8 | %   Output : score          : clustering accuracy
 9 | %
10 | %   Author : Wen-Yen Chen (wychen@alumni.cs.ucsb.edu)
11 | %			 Chih-Jen Lin (cjlin@csie.ntu.edu.tw)
12 | 
13 | % Compute the confusion matrix 'cmat', where
14 | %   col index is for true label (CAT),
15 | %   row index is for cluster label (CLS).
16 | n = length(true_labels);
17 | cat = spconvert([(1:n)' true_labels ones(n,1)]);
18 | cls = spconvert([(1:n)' cluster_labels ones(n,1)]);
19 | cls = cls';
20 | cmat = full(cls * cat);
21 | 
22 | %
23 | % Calculate accuracy
24 | %
25 | [match, cost] = hungarian(-cmat);
26 | score = 100*(-cost/n);
27 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/KPCA/demo_kpca_knn.m:
--------------------------------------------------------------------------------
 1 | % demo_kpca_knn
 2 | 
 3 | load('.\Data\mushrooms.mat')
 4 | X = full(X);
 5 | n = size(X, 1);
 6 | 
 7 | % --------------------- parameters --------------------- %
 8 | sigma = 3.0; % scaling parameter of the RBF kernel
 9 | k = 5; % target rank
10 | 
11 | % ------- randomly partition training-test data ------- %
12 | ntrain = ceil(n * 0.8); % number of training data;
13 | idx = randperm(n);
14 | X = X(idx, :);
15 | y = y(idx, :);
16 | Xtrain = X(1: ntrain, :);
17 | ytrain = y(1: ntrain);
18 | Xtest = X(ntrain + 1:end, :);
19 | ytest = y(ntrain + 1:end);
20 | 
21 | % ----------------- extract features ----------------- %
22 | [U, lambda, featuretrain] = kpcaTrain(full(Xtrain), sigma, k); % KPCA
23 | [featuretest] = kpcaTest(Xtrain, Xtest, sigma, U, lambda); % extract features from test data
24 | %[featuretest] = kpcaTestCUR(Xtrain, Xtest, sigma, U, lambda); % extract features from test data, with CUR to speedup
25 | 
26 | % ----------------- knn classification ----------------- %
27 | [labels] = knnclassify(featuretest, featuretrain, ytrain);
28 | error = (labels ~= ytest);
29 | display(['error rate: ', num2str(sum(error) / length(error))]);
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/Spectral Clustering/nmi.m:
--------------------------------------------------------------------------------
 1 | function score = nmi(true_labels, cluster_labels)
 2 | %NMI Compute normalized mutual information (NMI) using the true and cluster
 3 | %   labels and return the value in 'score'.
 4 | %
 5 | %   Input    : true_labels    : N-by-1 vector containing true labels
 6 | %              cluster_labels : N-by-1 vector containing cluster labels
 7 | %
 8 | %   Output   : score          : NMI value
 9 | %
10 | %   Author   : Shi Zhong, 2003.
11 | %              http://www.cse.fau.edu/~zhong/software/textclust.zip
12 | %
13 | %   Modified : Wen-Yen Chen (wychen@alumni.cs.ucsb.edu)
14 | %			   Chih-Jen Lin (cjlin@csie.ntu.edu.tw)
15 | 
16 | % Compute the confusion matrix 'cmat', where
17 | %   col index is for true label (CAT),
18 | %   row index is for cluster label (CLS).
19 | n = length(true_labels);
20 | cat = spconvert([(1:n)' true_labels ones(n,1)]);
21 | cls = spconvert([(1:n)' cluster_labels ones(n,1)]);
22 | cls = cls';
23 | cmat = full(cls * cat);
24 | 
25 | n_i = sum(cmat, 1); % Total number of data for each true label (CAT), n_i
26 | n_j = sum(cmat, 2); % Total number of data for each cluster label (CLS), n_j
27 | 
28 | % Calculate n*n_ij / n_i*n_j
29 | [row, col] = size(cmat);
30 | product = repmat(n_i, [row, 1]) .* repmat(n_j, [1, col]);
31 | index = find(product > 0);
32 | n = sum(cmat(:));
33 | product(index) = (n*cmat(index)) ./ product(index);
34 | % Sum up n_ij*log()
35 | index = find(product > 0);
36 | product(index) = log(product(index));
37 | product = cmat .* product;
38 | score = sum(product(:));
39 | % Divide by sqrt( sum(n_i*log(n_i/n)) * sum(n_j*log(n_j/n)) )
40 | index = find(n_i > 0);
41 | n_i(index) = n_i(index) .* log(n_i(index)/n);
42 | index = find(n_j > 0);
43 | n_j(index) = n_j(index) .* log(n_j(index)/n);
44 | denominator = sqrt(sum(n_i) * sum(n_j));
45 | 
46 | % Check if the denominator is zero
47 | if denominator == 0
48 |   score = 0;
49 | else
50 |   score = score / denominator;
51 | end
52 | 


--------------------------------------------------------------------------------
/SPSD_Sketching/Spectral Clustering/hungarian.m:
--------------------------------------------------------------------------------
  1 | function [Matching,Cost] = Hungarian(Perf)
  2 | %
  3 | % [MATCHING,COST] = Hungarian_New(WEIGHTS)
  4 | %
  5 | % A function for finding a minimum edge weight matching given a MxN Edge
  6 | % weight matrix WEIGHTS using the Hungarian Algorithm.
  7 | %
  8 | % An edge weight of Inf indicates that the pair of vertices given by its
  9 | % position have no adjacent edge.
 10 | %
 11 | % MATCHING return a MxN matrix with ones in the place of the matchings and
 12 | % zeros elsewhere.
 13 | %
 14 | % COST returns the cost of the minimum matching
 15 | 
 16 | % Written by: Alex Melin 30 June 2006
 17 | 
 18 | 
 19 |  % Initialize Variables
 20 |  Matching = zeros(size(Perf));
 21 | 
 22 | % Condense the Performance Matrix by removing any unconnected vertices to
 23 | % increase the speed of the algorithm
 24 | 
 25 |   % Find the number in each column that are connected
 26 |     num_y = sum(~isinf(Perf),1);
 27 |   % Find the number in each row that are connected
 28 |     num_x = sum(~isinf(Perf),2);
 29 | 
 30 |   % Find the columns(vertices) and rows(vertices) that are isolated
 31 |     x_con = find(num_x~=0);
 32 |     y_con = find(num_y~=0);
 33 | 
 34 |   % Assemble Condensed Performance Matrix
 35 |     P_size = max(length(x_con),length(y_con));
 36 |     P_cond = zeros(P_size);
 37 |     P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con);
 38 |     if isempty(P_cond)
 39 |       Cost = 0;
 40 |       return
 41 |     end
 42 | 
 43 |     % Ensure that a perfect matching exists
 44 |       % Calculate a form of the Edge Matrix
 45 |       Edge = P_cond;
 46 |       Edge(P_cond~=Inf) = 0;
 47 |       % Find the deficiency(CNUM) in the Edge Matrix
 48 |       cnum = min_line_cover(Edge);
 49 | 
 50 |       % Project additional vertices and edges so that a perfect matching
 51 |       % exists
 52 |       Pmax = max(max(P_cond(P_cond~=Inf)));
 53 |       P_size = length(P_cond)+cnum;
 54 |       P_cond = ones(P_size)*Pmax;
 55 |       P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con);
 56 | 
 57 | %*************************************************
 58 | % MAIN PROGRAM: CONTROLS WHICH STEP IS EXECUTED
 59 | %*************************************************
 60 |   exit_flag = 1;
 61 |   stepnum = 1;
 62 |   while exit_flag
 63 |     switch stepnum
 64 |       case 1
 65 |         [P_cond,stepnum] = step1(P_cond);
 66 |       case 2
 67 |         [r_cov,c_cov,M,stepnum] = step2(P_cond);
 68 |       case 3
 69 |         [c_cov,stepnum] = step3(M,P_size);
 70 |       case 4
 71 |         [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M);
 72 |       case 5
 73 |         [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov);
 74 |       case 6
 75 |         [P_cond,stepnum] = step6(P_cond,r_cov,c_cov);
 76 |       case 7
 77 |         exit_flag = 0;
 78 |     end
 79 |   end
 80 | 
 81 | % Remove all the virtual satellites and targets and uncondense the
 82 | % Matching to the size of the original performance matrix.
 83 | Matching(x_con,y_con) = M(1:length(x_con),1:length(y_con));
 84 | Cost = sum(sum(Perf(Matching==1)));
 85 | 
 86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 87 | %   STEP 1: Find the smallest number of zeros in each row
 88 | %           and subtract that minimum from its row
 89 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 90 | 
 91 | function [P_cond,stepnum] = step1(P_cond)
 92 | 
 93 |   P_size = length(P_cond);
 94 | 
 95 |   % Loop throught each row
 96 |   for ii = 1:P_size
 97 |     rmin = min(P_cond(ii,:));
 98 |     P_cond(ii,:) = P_cond(ii,:)-rmin;
 99 |   end
100 | 
101 |   stepnum = 2;
102 | 
103 | %**************************************************************************
104 | %   STEP 2: Find a zero in P_cond. If there are no starred zeros in its
105 | %           column or row start the zero. Repeat for each zero
106 | %**************************************************************************
107 | 
108 | function [r_cov,c_cov,M,stepnum] = step2(P_cond)
109 | 
110 | % Define variables
111 |   P_size = length(P_cond);
112 |   r_cov = zeros(P_size,1);  % A vector that shows if a row is covered
113 |   c_cov = zeros(P_size,1);  % A vector that shows if a column is covered
114 |   M = zeros(P_size);        % A mask that shows if a position is starred or primed
115 | 
116 |   for ii = 1:P_size
117 |     for jj = 1:P_size
118 |       if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0
119 |         M(ii,jj) = 1;
120 |         r_cov(ii) = 1;
121 |         c_cov(jj) = 1;
122 |       end
123 |     end
124 |   end
125 | 
126 | % Re-initialize the cover vectors
127 |   r_cov = zeros(P_size,1);  % A vector that shows if a row is covered
128 |   c_cov = zeros(P_size,1);  % A vector that shows if a column is covered
129 |   stepnum = 3;
130 | 
131 | %**************************************************************************
132 | %   STEP 3: Cover each column with a starred zero. If all the columns are
133 | %           covered then the matching is maximum
134 | %**************************************************************************
135 | 
136 | function [c_cov,stepnum] = step3(M,P_size)
137 | 
138 |   c_cov = sum(M,1);
139 |   if sum(c_cov) == P_size
140 |     stepnum = 7;
141 |   else
142 |     stepnum = 4;
143 |   end
144 | 
145 | %**************************************************************************
146 | %   STEP 4: Find a noncovered zero and prime it.  If there is no starred
147 | %           zero in the row containing this primed zero, Go to Step 5.
148 | %           Otherwise, cover this row and uncover the column containing
149 | %           the starred zero. Continue in this manner until there are no
150 | %           uncovered zeros left. Save the smallest uncovered value and
151 | %           Go to Step 6.
152 | %**************************************************************************
153 | function [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M)
154 | 
155 | P_size = length(P_cond);
156 | 
157 | zflag = 1;
158 | while zflag
159 |     % Find the first uncovered zero
160 |       row = 0; col = 0; exit_flag = 1;
161 |       ii = 1; jj = 1;
162 |       while exit_flag
163 |           if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0
164 |             row = ii;
165 |             col = jj;
166 |             exit_flag = 0;
167 |           end
168 |           jj = jj + 1;
169 |           if jj > P_size; jj = 1; ii = ii+1; end
170 |           if ii > P_size; exit_flag = 0; end
171 |       end
172 | 
173 |     % If there are no uncovered zeros go to step 6
174 |       if row == 0
175 |         stepnum = 6;
176 |         zflag = 0;
177 |         Z_r = 0;
178 |         Z_c = 0;
179 |       else
180 |         % Prime the uncovered zero
181 |         M(row,col) = 2;
182 |         % If there is a starred zero in that row
183 |         % Cover the row and uncover the column containing the zero
184 |           if sum(find(M(row,:)==1)) ~= 0
185 |             r_cov(row) = 1;
186 |             zcol = find(M(row,:)==1);
187 |             c_cov(zcol) = 0;
188 |           else
189 |             stepnum = 5;
190 |             zflag = 0;
191 |             Z_r = row;
192 |             Z_c = col;
193 |           end
194 |       end
195 | end
196 | 
197 | %**************************************************************************
198 | % STEP 5: Construct a series of alternating primed and starred zeros as
199 | %         follows.  Let Z0 represent the uncovered primed zero found in Step 4.
200 | %         Let Z1 denote the starred zero in the column of Z0 (if any).
201 | %         Let Z2 denote the primed zero in the row of Z1 (there will always
202 | %         be one).  Continue until the series terminates at a primed zero
203 | %         that has no starred zero in its column.  Unstar each starred
204 | %         zero of the series, star each primed zero of the series, erase
205 | %         all primes and uncover every line in the matrix.  Return to Step 3.
206 | %**************************************************************************
207 | 
208 | function [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov)
209 | 
210 |   zflag = 1;
211 |   ii = 1;
212 |   while zflag
213 |     % Find the index number of the starred zero in the column
214 |     rindex = find(M(:,Z_c(ii))==1);
215 |     if rindex > 0
216 |       % Save the starred zero
217 |       ii = ii+1;
218 |       % Save the row of the starred zero
219 |       Z_r(ii,1) = rindex;
220 |       % The column of the starred zero is the same as the column of the
221 |       % primed zero
222 |       Z_c(ii,1) = Z_c(ii-1);
223 |     else
224 |       zflag = 0;
225 |     end
226 | 
227 |     % Continue if there is a starred zero in the column of the primed zero
228 |     if zflag == 1;
229 |       % Find the column of the primed zero in the last starred zeros row
230 |       cindex = find(M(Z_r(ii),:)==2);
231 |       ii = ii+1;
232 |       Z_r(ii,1) = Z_r(ii-1);
233 |       Z_c(ii,1) = cindex;
234 |     end
235 |   end
236 | 
237 |   % UNSTAR all the starred zeros in the path and STAR all primed zeros
238 |   for ii = 1:length(Z_r)
239 |     if M(Z_r(ii),Z_c(ii)) == 1
240 |       M(Z_r(ii),Z_c(ii)) = 0;
241 |     else
242 |       M(Z_r(ii),Z_c(ii)) = 1;
243 |     end
244 |   end
245 | 
246 |   % Clear the covers
247 |   r_cov = r_cov.*0;
248 |   c_cov = c_cov.*0;
249 | 
250 |   % Remove all the primes
251 |   M(M==2) = 0;
252 | 
253 | stepnum = 3;
254 | 
255 | % *************************************************************************
256 | % STEP 6: Add the minimum uncovered value to every element of each covered
257 | %         row, and subtract it from every element of each uncovered column.
258 | %         Return to Step 4 without altering any stars, primes, or covered lines.
259 | %**************************************************************************
260 | 
261 | function [P_cond,stepnum] = step6(P_cond,r_cov,c_cov)
262 | a = find(r_cov == 0);
263 | b = find(c_cov == 0);
264 | minval = min(min(P_cond(a,b)));
265 | 
266 | P_cond(find(r_cov == 1),:) = P_cond(find(r_cov == 1),:) + minval;
267 | P_cond(:,find(c_cov == 0)) = P_cond(:,find(c_cov == 0)) - minval;
268 | 
269 | stepnum = 4;
270 | 
271 | function cnum = min_line_cover(Edge)
272 | 
273 |   % Step 2
274 |     [r_cov,c_cov,M,stepnum] = step2(Edge);
275 |   % Step 3
276 |     [c_cov,stepnum] = step3(M,length(Edge));
277 |   % Step 4
278 |     [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(Edge,r_cov,c_cov,M);
279 |   % Calculate the deficiency
280 |     cnum = length(Edge)-sum(r_cov)-sum(c_cov);
281 | 


--------------------------------------------------------------------------------