├── setup.m ├── LICENSE ├── Data ├── PenDigit.mat ├── abalone.mat ├── original.png ├── Letter15K.mat ├── mushrooms.mat └── rbf.m ├── Sketch ├── GaussianProjection.m ├── LeverageScoreSampling.m ├── srht.m └── CountSketch.m ├── SPSD_Sketching ├── GPR │ ├── gprTest.m │ ├── gprTrain.m │ ├── gprTestCUR.m │ └── demo_gpr.m ├── KPCA │ ├── kpcaTest.m │ ├── kpcaTestCUR.m │ ├── kpcaTrain.m │ └── demo_kpca_knn.m ├── Spectral Clustering │ ├── demo_spectralclustering.m │ ├── SpectralClusteringFaster.m │ ├── accuracy.m │ ├── nmi.m │ └── hungarian.m ├── Nystrom.m └── spsdFaster.m ├── SVD ├── ksvdPrototype.m ├── BlockLanczos.m └── ksvdFaster.m ├── CUR ├── curPrototype.m ├── curFaster.m ├── demo_cur.m └── curFasterKernel.m ├── LSR ├── InexactCurTypeRegression.m └── InexactLSR.m └── README.md /setup.m: -------------------------------------------------------------------------------- 1 | addpath(genpath(pwd)); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/LICENSE -------------------------------------------------------------------------------- /Data/PenDigit.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/PenDigit.mat -------------------------------------------------------------------------------- /Data/abalone.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/abalone.mat -------------------------------------------------------------------------------- /Data/original.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/original.png -------------------------------------------------------------------------------- /Data/Letter15K.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/Letter15K.mat -------------------------------------------------------------------------------- /Data/mushrooms.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangshusen/RandMatrixMatlab/HEAD/Data/mushrooms.mat -------------------------------------------------------------------------------- /Sketch/GaussianProjection.m: -------------------------------------------------------------------------------- 1 | function [C] = GaussianProjection(A, c) 2 | n = size(A, 2); 3 | S = randn(n, c) / sqrt(c); 4 | C = A * S; 5 | end -------------------------------------------------------------------------------- /SPSD_Sketching/GPR/gprTest.m: -------------------------------------------------------------------------------- 1 | function [ytest] = gprTest(Xtrain, Xtest, sigma, w) 2 | Ktest = rbf(Xtest, Xtrain, sigma); 3 | ytest = Ktest * w; 4 | end -------------------------------------------------------------------------------- /SVD/ksvdPrototype.m: -------------------------------------------------------------------------------- 1 | function [Utilde, Stilde, Vtilde] = ksvdPrototype(A, k, s) 2 | C = CountSketch(A, s); 3 | [Q, R] = qr(C, 0); 4 | [Ubar, Stilde, Vtilde] = svds(Q' * A, k); 5 | Utilde = Q * Ubar; 6 | end -------------------------------------------------------------------------------- /SPSD_Sketching/KPCA/kpcaTest.m: -------------------------------------------------------------------------------- 1 | function [featuretest] = kpcaTest(Xtrain, Xtest, sigma, U, lambda) 2 | Ktest = rbf(Xtest, Xtrain, sigma); 3 | U = bsxfun(@times, U, (1 ./ sqrt(lambda))'); 4 | featuretest = Ktest * U; 5 | end -------------------------------------------------------------------------------- /CUR/curPrototype.m: -------------------------------------------------------------------------------- 1 | function [C, U, R] = curPrototype(A, c, r) 2 | [m, n] = size(A); 3 | SC = sort(randsample(n, c)); 4 | SR = sort(randsample(m, r)); 5 | C = A(:, SC); 6 | R = A(SR, :); 7 | U = pinv(C) * A * pinv(R); 8 | 9 | end -------------------------------------------------------------------------------- /LSR/InexactCurTypeRegression.m: -------------------------------------------------------------------------------- 1 | function [Xtilde] = InexactCurTypeRegression(C, R, A, sc, sr) 2 | [~, idxC] = LeverageScoreSampling(C', sc); 3 | [~, idxR] = LeverageScoreSampling(R, sr); 4 | Xtilde = pinv(C(idxC, :)) * A(idxC, idxR) * pinv(R(:, idxR)); 5 | end -------------------------------------------------------------------------------- /LSR/InexactLSR.m: -------------------------------------------------------------------------------- 1 | function [xtilde] = InexactLSR(A, b, s) 2 | d = size(A, 2); 3 | sketch = (CountSketch([A, b]', s))'; 4 | Asketch = sketch(:, 1:d); % Asketch = S' * A 5 | bsketch = sketch(:, end); % bsketch = S' * b 6 | xtilde = Asketch \ bsketch; 7 | end 8 | -------------------------------------------------------------------------------- /SPSD_Sketching/GPR/gprTrain.m: -------------------------------------------------------------------------------- 1 | function [w] = gprTrain(Xtrain, ytrain, sigma, alpha) 2 | l = 100; % can be tuned 3 | L = Nystrom(Xtrain, sigma, l); % K is approximated by L * L' 4 | l = size(L, 2); 5 | w = L' * ytrain; 6 | w = (alpha * eye(l) + L' * L) \ w; 7 | w = ytrain - L * w; 8 | w = w / alpha; 9 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RandMatrixMatlab 2 | Simple MATLAB code for randomized matrix computation. 3 | 4 | First run ``setup''. 5 | 6 | The detailed descriptions are in the article: 7 | "A Practical Guide to Randomized Matrix Computations with MATLAB Implementations" 8 | http://arxiv.org/abs/1505.07570 9 | 10 | 11 | -------------------------------------------------------------------------------- /SPSD_Sketching/GPR/gprTestCUR.m: -------------------------------------------------------------------------------- 1 | function [ytest] = gprTestCUR(Xtrain, Xtest, sigma, w) 2 | c = max(100, ceil(size(Xtrain, 1) / 20)); % can be tuned 3 | r = max(100, ceil(size(Xtest, 1) / 20)); % can be tuned 4 | [C, Utilde, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r); 5 | ytest = C * (Utilde * (R * w)); 6 | end -------------------------------------------------------------------------------- /Sketch/LeverageScoreSampling.m: -------------------------------------------------------------------------------- 1 | function [C, idx] = LeverageScoreSampling(A, s) 2 | n = size(A, 2); 3 | [~, ~, V] = svd(A, 'econ'); 4 | leveragescores = sum(V.^2, 2); 5 | prob = leveragescores / sum(leveragescores); 6 | idx = randsample(n, s, true, prob); 7 | idx = unique(idx); % eliminate duplicates 8 | C = A(:, idx); 9 | end -------------------------------------------------------------------------------- /SPSD_Sketching/KPCA/kpcaTestCUR.m: -------------------------------------------------------------------------------- 1 | function [featuretest] = kpcaTestCUR(Xtrain, Xtest, sigma, U, lambda) 2 | c = max(100, ceil(size(Xtrain, 1) / 20)); % can be tuned 3 | r = max(100, ceil(size(Xtest, 1) / 20)); % can be tuned 4 | [C, Utilde, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r); 5 | U = bsxfun(@times, U, (1 ./ sqrt(lambda))'); 6 | featuretest = C * (Utilde * (R * U)); 7 | end -------------------------------------------------------------------------------- /Sketch/srht.m: -------------------------------------------------------------------------------- 1 | function [C] = srht(A, s) 2 | n = size(A, 2); 3 | sgn = randi(2, [1, n]) * 2 - 3; % one half are +1 and the rest are -1 4 | A = bsxfun(@times, A, sgn); % flip the signs of each column w.p. 50% 5 | n = 2^(ceil(log2(n))); 6 | C = (fwht(A', n))'; % Hadarmard transform 7 | idx = sort(randsample(n, s)); 8 | C = C(:, idx); % subsampling 9 | C = C * (n / sqrt(s)); 10 | end -------------------------------------------------------------------------------- /SPSD_Sketching/Spectral Clustering/demo_spectralclustering.m: -------------------------------------------------------------------------------- 1 | % demo_spectralclustering 2 | load('.\Data\mushrooms.mat') 3 | X = full(X); 4 | sigma = 3.0; % scaling parameter of the RBF kernel 5 | k = 2; % number of classes 6 | [labels] = SpectralClusteringFaster(X, sigma, k); 7 | ac = accuracy(labels, y); 8 | NMI = nmi(labels, y); 9 | display(['Accuracy: ', num2str(ac), ' nmi: ', num2str(NMI)]); -------------------------------------------------------------------------------- /SPSD_Sketching/Nystrom.m: -------------------------------------------------------------------------------- 1 | function [L] = Nystrom(X, sigma, s) 2 | k = ceil(0.8 * s); % can be tuned 3 | n = size(X, 1); 4 | S = sort(randsample(n, s)); % uniform sampling 5 | C = rbf(X, X(S, :), sigma); % C = K(:, S) 6 | W = C(S, :); 7 | [UW, SW, ~] = svd(W); 8 | SW = diag(SW); 9 | SW = 1 ./ sqrt(SW(1:k)); 10 | UW = bsxfun(@times, UW(:, 1:k), SW'); 11 | L = C * UW; % K is approximated by L * L' 12 | end -------------------------------------------------------------------------------- /Data/rbf.m: -------------------------------------------------------------------------------- 1 | function [K] = rbf(X1, X2, sigma) 2 | % input: 3 | % X1: n1-by-d 4 | % X2: n2-by-d 5 | % output: 6 | % K: n1-by-n2 7 | % K(i,j) = exp( -0.5*sigma^2 * norm(X1(i, :) - X2(j, :))^2 ) 8 | 9 | 10 | K = X1 * X2'; 11 | X1_row_sq = sum(X1.^2, 2) / 2; 12 | X2_row_sq = sum(X2.^2, 2) / 2; 13 | K = bsxfun(@minus, K, X1_row_sq); 14 | K = bsxfun(@minus, K, X2_row_sq'); 15 | K = K / (sigma^2); 16 | K = exp(K); 17 | 18 | end -------------------------------------------------------------------------------- /SPSD_Sketching/KPCA/kpcaTrain.m: -------------------------------------------------------------------------------- 1 | function [U, lambda, featuretrain] = kpcaTrain(Xtrain, sigma, k) 2 | s = k * 10; % can be tuned 3 | [QC, Z] = spsdFaster(Xtrain, sigma, s); % QC has orthogonal columns 4 | clear Xtrain 5 | [UZ, SZ, ~] = svd(Z); 6 | U = QC * UZ(:, 1:k); % U contains the top k eigenvectors 7 | lambda = diag(SZ); 8 | lambda = lambda(1:k); % lambda is the vector containing the top k eigenvalues 9 | featuretrain = bsxfun(@times, U, (sqrt(lambda))'); 10 | end -------------------------------------------------------------------------------- /SPSD_Sketching/spsdFaster.m: -------------------------------------------------------------------------------- 1 | function [QC, Z] = spsdFaster(X, sigma, s) 2 | p = 4 * s; % can be tuned 3 | n = size(X, 1); 4 | S = sort(randsample(n, s)); % uniform sampling 5 | C = rbf(X, X(S, :), sigma); 6 | [QC, ~] = qr(C, 0); 7 | q = sum(QC.^2, 2); % the sampling probability 8 | q = q / sum(q); 9 | P = randsample(n, p,true, q); 10 | P = unique([P; S]); % enforce P contains S 11 | PQCinv = pinv(QC(P, :)); 12 | Ksub = rbf(X(P, :), X(P, :), sigma); 13 | Z = PQCinv * Ksub * PQCinv'; 14 | end -------------------------------------------------------------------------------- /CUR/curFaster.m: -------------------------------------------------------------------------------- 1 | function [C, U, R] = curFaster(A, c, r) 2 | pc = 2 * (r + c); % can be tuned 3 | pr = 2 * (r + c); % can be tuned 4 | [m, n] = size(A); 5 | SC = sort(randsample(n, c)); 6 | SR = sort(randsample(m, r)); 7 | C = A(:, SC); 8 | R = A(SR, :); 9 | PC = sort(randsample(m, pc)); 10 | PR = sort(randsample(n, pr)); 11 | PC = unique([PC; SR]); % enforce PC to contain SR 12 | PR = unique([PR; SC]); % enforce PR to contain SC 13 | U = pinv(C(PC, :)) * A(PC, PR) * pinv(R(:, PR)); 14 | 15 | end -------------------------------------------------------------------------------- /SVD/BlockLanczos.m: -------------------------------------------------------------------------------- 1 | function [U, S, V] = BlockLanczos(A, k, q) 2 | s = 2 * k; % can be tuned 3 | [m, n] = size(A); 4 | C = A * randn(n, s); 5 | Krylov = zeros(m, s * q); 6 | Krylov(:, 1:s) = C; 7 | for i = 2: q 8 | C = A' * C; 9 | C = A * C; 10 | [C, ~] = qr(C, 0); % optional 11 | Krylov(:, (i-1)*s+1: i*s) = C; 12 | end 13 | [Q, ~] = qr(Krylov, 0); 14 | [Ubar, S, V] = svd(Q' * A, 'econ'); 15 | Ubar = Ubar(:, 1:k); 16 | S = S(1:k, 1:k); 17 | V = V(:, 1:k); 18 | U = Q * Ubar; 19 | end -------------------------------------------------------------------------------- /Sketch/CountSketch.m: -------------------------------------------------------------------------------- 1 | function [C] = CountSketch(A, s) 2 | [m, n] = size(A); 3 | sgn = randi(2, [1, n]) * 2 - 3; % one half are +1 and the rest are -1 4 | A = bsxfun(@times, A, sgn); % flip the signs of each column w.p. 50% 5 | ll = randsample(s, n, true); % sample n items from [s] with replacement 6 | C = zeros(m, s); % initialize C 7 | for j = 1: n 8 | C(:, ll(j)) = C(:, ll(j)) + A(:, j); 9 | end 10 | % for j = 1: c 11 | % idx = (ll == j); % find the columns of A which should be added to C(:, j) 12 | % C(:, j) = sum(A(:, idx), 2); 13 | % end 14 | end -------------------------------------------------------------------------------- /CUR/demo_cur.m: -------------------------------------------------------------------------------- 1 | % demo_CUR 2 | 3 | % ----------- parameters ----------- % 4 | c = 100; 5 | r = 100; 6 | 7 | % ----------- load data ----------- % 8 | A = imread('./Data/original.png'); 9 | A = double(A) / 256; 10 | 11 | % ----------- CUR ----------- % 12 | [C1, U1, R1] = curPrototype(A, c, r); 13 | [C2, U2, R2] = curFaster(A, c, r); 14 | 15 | 16 | 17 | % ----------- show images ----------- % 18 | subplot(1, 3, 1), imshow(A), title('A') 19 | subplot(1, 3, 2), imshow(C1 * U1 * R1), title('C * Ustar * R') 20 | subplot(1, 3, 3), imshow(C2 * U2 * R2), title('C * Utilde * R') -------------------------------------------------------------------------------- /CUR/curFasterKernel.m: -------------------------------------------------------------------------------- 1 | function [C, U, R] = curFasterKernel(Xtest, Xtrain, sigma, c, r) 2 | pc = 2 * (r + c); % can be tuned 3 | pr = 2 * (r + c); % can be tuned 4 | m = size(Xtest, 1); 5 | n = size(Xtrain, 1); 6 | SC = sort(randsample(n, c)); 7 | SR = sort(randsample(m, r)); 8 | C = rbf(Xtest, Xtrain(SC, :), sigma); 9 | R = rbf(Xtest(SR, :), Xtrain, sigma); 10 | PC = sort(randsample(m, pc)); 11 | PR = sort(randsample(n, pr)); 12 | PC = unique([PC; SR]); % enforce PC to contain SR 13 | PR = unique([PR; SC]); % enforce PR to contain SC 14 | Kblock = rbf(Xtest(PC, :), Xtrain(PR, :), sigma); 15 | U = pinv(C(PC, :)) * Kblock * pinv(R(:, PR)); 16 | end -------------------------------------------------------------------------------- /SVD/ksvdFaster.m: -------------------------------------------------------------------------------- 1 | function [Utilde, Stilde, Vtilde] = ksvdFaster(A, k, s, p1, p2) 2 | n = size(A, 2); 3 | C = CountSketch(A, s); 4 | A = [A, C]; 5 | A = A'; 6 | sketch = CountSketch(A, p1); 7 | clear A % A (m-by-n) will not be used 8 | sketch = GaussianProjection(sketch, p2); 9 | sketch = sketch'; 10 | L = sketch(:, 1:n); 11 | D = sketch(:, n+1:end); 12 | clear sketch % sketch (p2-by-(n+c)) will not be used 13 | [QD, RD] = qr(D, 0); 14 | [Ubar, Sbar, Vbar] = svds(QD' * L, k); 15 | clear L % L (p2-by-n) will not be used 16 | C = C * (pinv(RD) * (Ubar * Sbar)); 17 | [Utilde, Stilde, Vhat] = svd(C, 'econ'); 18 | Vtilde = Vbar * Vhat; 19 | 20 | end -------------------------------------------------------------------------------- /SPSD_Sketching/Spectral Clustering/SpectralClusteringFaster.m: -------------------------------------------------------------------------------- 1 | function [labels] = SpectralClusteringFaster(X, sigma, k) 2 | s = k * 10; % can be tuned 3 | n = size(X, 1); 4 | [QC, Z] = spsdFaster(X, sigma, s); % K is approximated by QC * Z * QC' 5 | [UZ, SZ, ~] = svd(Z); 6 | SZ = sqrt(diag(SZ)); 7 | UZ = bsxfun(@times, UZ, SZ'); % now Z = UZ * UZ' 8 | L = QC * UZ; % now K is approximated by L * L' 9 | d = ones(n, 1); 10 | d = L * (L' * d); % diagonal of the degree matrix D 11 | d = 1 ./ sqrt(d); 12 | L = bsxfun(@times, L, d); % now G is approximated by L*L' 13 | [U, ~, ~] = svd(L, 'econ'); 14 | U = U(:, 1:k); 15 | U = normr(U); % normalize the rows of U 16 | labels = kmeans(U, k, 'Replicates', 3); 17 | end -------------------------------------------------------------------------------- /SPSD_Sketching/GPR/demo_gpr.m: -------------------------------------------------------------------------------- 1 | % demo_gpr 2 | 3 | load('.\Data\abalone.mat') 4 | X = full(X); 5 | n = size(X, 1); 6 | 7 | % --------------------- parameters --------------------- % 8 | sigma = 1.0; % scaling parameter of the RBF kernel 9 | alpha = 1.0; % indicating the noise in the observation 10 | 11 | % ------- randomly partition training-test data ------- % 12 | ntrain = ceil(n * 0.8); % number of training data; 13 | idx = randperm(n); 14 | X = X(idx, :); 15 | y = y(idx, :); 16 | Xtrain = X(1: ntrain, :); 17 | ytrain = y(1: ntrain); 18 | Xtest = X(ntrain + 1:end, :); 19 | ytest = y(ntrain + 1:end); 20 | 21 | % ----------------- GPR predictive mean ----------------- % 22 | w = gprTrain(Xtrain, ytrain, sigma, alpha); 23 | labels = gprTest(Xtrain, Xtest, sigma, w); 24 | %labels = gprTestCUR(Xtrain, Xtest, sigma, w); % use CUR to speedup 25 | error = norm(labels - ytest) / norm(ytest); 26 | display(['error ratio: ', num2str(error)]); 27 | 28 | 29 | -------------------------------------------------------------------------------- /SPSD_Sketching/Spectral Clustering/accuracy.m: -------------------------------------------------------------------------------- 1 | function score = accuracy(true_labels, cluster_labels) 2 | %ACCURACY Compute clustering accuracy using the true and cluster labels and 3 | % return the value in 'score'. 4 | % 5 | % Input : true_labels : N-by-1 vector containing true labels 6 | % cluster_labels : N-by-1 vector containing cluster labels 7 | % 8 | % Output : score : clustering accuracy 9 | % 10 | % Author : Wen-Yen Chen (wychen@alumni.cs.ucsb.edu) 11 | % Chih-Jen Lin (cjlin@csie.ntu.edu.tw) 12 | 13 | % Compute the confusion matrix 'cmat', where 14 | % col index is for true label (CAT), 15 | % row index is for cluster label (CLS). 16 | n = length(true_labels); 17 | cat = spconvert([(1:n)' true_labels ones(n,1)]); 18 | cls = spconvert([(1:n)' cluster_labels ones(n,1)]); 19 | cls = cls'; 20 | cmat = full(cls * cat); 21 | 22 | % 23 | % Calculate accuracy 24 | % 25 | [match, cost] = hungarian(-cmat); 26 | score = 100*(-cost/n); 27 | -------------------------------------------------------------------------------- /SPSD_Sketching/KPCA/demo_kpca_knn.m: -------------------------------------------------------------------------------- 1 | % demo_kpca_knn 2 | 3 | load('.\Data\mushrooms.mat') 4 | X = full(X); 5 | n = size(X, 1); 6 | 7 | % --------------------- parameters --------------------- % 8 | sigma = 3.0; % scaling parameter of the RBF kernel 9 | k = 5; % target rank 10 | 11 | % ------- randomly partition training-test data ------- % 12 | ntrain = ceil(n * 0.8); % number of training data; 13 | idx = randperm(n); 14 | X = X(idx, :); 15 | y = y(idx, :); 16 | Xtrain = X(1: ntrain, :); 17 | ytrain = y(1: ntrain); 18 | Xtest = X(ntrain + 1:end, :); 19 | ytest = y(ntrain + 1:end); 20 | 21 | % ----------------- extract features ----------------- % 22 | [U, lambda, featuretrain] = kpcaTrain(full(Xtrain), sigma, k); % KPCA 23 | [featuretest] = kpcaTest(Xtrain, Xtest, sigma, U, lambda); % extract features from test data 24 | %[featuretest] = kpcaTestCUR(Xtrain, Xtest, sigma, U, lambda); % extract features from test data, with CUR to speedup 25 | 26 | % ----------------- knn classification ----------------- % 27 | [labels] = knnclassify(featuretest, featuretrain, ytrain); 28 | error = (labels ~= ytest); 29 | display(['error rate: ', num2str(sum(error) / length(error))]); 30 | 31 | 32 | -------------------------------------------------------------------------------- /SPSD_Sketching/Spectral Clustering/nmi.m: -------------------------------------------------------------------------------- 1 | function score = nmi(true_labels, cluster_labels) 2 | %NMI Compute normalized mutual information (NMI) using the true and cluster 3 | % labels and return the value in 'score'. 4 | % 5 | % Input : true_labels : N-by-1 vector containing true labels 6 | % cluster_labels : N-by-1 vector containing cluster labels 7 | % 8 | % Output : score : NMI value 9 | % 10 | % Author : Shi Zhong, 2003. 11 | % http://www.cse.fau.edu/~zhong/software/textclust.zip 12 | % 13 | % Modified : Wen-Yen Chen (wychen@alumni.cs.ucsb.edu) 14 | % Chih-Jen Lin (cjlin@csie.ntu.edu.tw) 15 | 16 | % Compute the confusion matrix 'cmat', where 17 | % col index is for true label (CAT), 18 | % row index is for cluster label (CLS). 19 | n = length(true_labels); 20 | cat = spconvert([(1:n)' true_labels ones(n,1)]); 21 | cls = spconvert([(1:n)' cluster_labels ones(n,1)]); 22 | cls = cls'; 23 | cmat = full(cls * cat); 24 | 25 | n_i = sum(cmat, 1); % Total number of data for each true label (CAT), n_i 26 | n_j = sum(cmat, 2); % Total number of data for each cluster label (CLS), n_j 27 | 28 | % Calculate n*n_ij / n_i*n_j 29 | [row, col] = size(cmat); 30 | product = repmat(n_i, [row, 1]) .* repmat(n_j, [1, col]); 31 | index = find(product > 0); 32 | n = sum(cmat(:)); 33 | product(index) = (n*cmat(index)) ./ product(index); 34 | % Sum up n_ij*log() 35 | index = find(product > 0); 36 | product(index) = log(product(index)); 37 | product = cmat .* product; 38 | score = sum(product(:)); 39 | % Divide by sqrt( sum(n_i*log(n_i/n)) * sum(n_j*log(n_j/n)) ) 40 | index = find(n_i > 0); 41 | n_i(index) = n_i(index) .* log(n_i(index)/n); 42 | index = find(n_j > 0); 43 | n_j(index) = n_j(index) .* log(n_j(index)/n); 44 | denominator = sqrt(sum(n_i) * sum(n_j)); 45 | 46 | % Check if the denominator is zero 47 | if denominator == 0 48 | score = 0; 49 | else 50 | score = score / denominator; 51 | end 52 | -------------------------------------------------------------------------------- /SPSD_Sketching/Spectral Clustering/hungarian.m: -------------------------------------------------------------------------------- 1 | function [Matching,Cost] = Hungarian(Perf) 2 | % 3 | % [MATCHING,COST] = Hungarian_New(WEIGHTS) 4 | % 5 | % A function for finding a minimum edge weight matching given a MxN Edge 6 | % weight matrix WEIGHTS using the Hungarian Algorithm. 7 | % 8 | % An edge weight of Inf indicates that the pair of vertices given by its 9 | % position have no adjacent edge. 10 | % 11 | % MATCHING return a MxN matrix with ones in the place of the matchings and 12 | % zeros elsewhere. 13 | % 14 | % COST returns the cost of the minimum matching 15 | 16 | % Written by: Alex Melin 30 June 2006 17 | 18 | 19 | % Initialize Variables 20 | Matching = zeros(size(Perf)); 21 | 22 | % Condense the Performance Matrix by removing any unconnected vertices to 23 | % increase the speed of the algorithm 24 | 25 | % Find the number in each column that are connected 26 | num_y = sum(~isinf(Perf),1); 27 | % Find the number in each row that are connected 28 | num_x = sum(~isinf(Perf),2); 29 | 30 | % Find the columns(vertices) and rows(vertices) that are isolated 31 | x_con = find(num_x~=0); 32 | y_con = find(num_y~=0); 33 | 34 | % Assemble Condensed Performance Matrix 35 | P_size = max(length(x_con),length(y_con)); 36 | P_cond = zeros(P_size); 37 | P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con); 38 | if isempty(P_cond) 39 | Cost = 0; 40 | return 41 | end 42 | 43 | % Ensure that a perfect matching exists 44 | % Calculate a form of the Edge Matrix 45 | Edge = P_cond; 46 | Edge(P_cond~=Inf) = 0; 47 | % Find the deficiency(CNUM) in the Edge Matrix 48 | cnum = min_line_cover(Edge); 49 | 50 | % Project additional vertices and edges so that a perfect matching 51 | % exists 52 | Pmax = max(max(P_cond(P_cond~=Inf))); 53 | P_size = length(P_cond)+cnum; 54 | P_cond = ones(P_size)*Pmax; 55 | P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con); 56 | 57 | %************************************************* 58 | % MAIN PROGRAM: CONTROLS WHICH STEP IS EXECUTED 59 | %************************************************* 60 | exit_flag = 1; 61 | stepnum = 1; 62 | while exit_flag 63 | switch stepnum 64 | case 1 65 | [P_cond,stepnum] = step1(P_cond); 66 | case 2 67 | [r_cov,c_cov,M,stepnum] = step2(P_cond); 68 | case 3 69 | [c_cov,stepnum] = step3(M,P_size); 70 | case 4 71 | [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M); 72 | case 5 73 | [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov); 74 | case 6 75 | [P_cond,stepnum] = step6(P_cond,r_cov,c_cov); 76 | case 7 77 | exit_flag = 0; 78 | end 79 | end 80 | 81 | % Remove all the virtual satellites and targets and uncondense the 82 | % Matching to the size of the original performance matrix. 83 | Matching(x_con,y_con) = M(1:length(x_con),1:length(y_con)); 84 | Cost = sum(sum(Perf(Matching==1))); 85 | 86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 87 | % STEP 1: Find the smallest number of zeros in each row 88 | % and subtract that minimum from its row 89 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 90 | 91 | function [P_cond,stepnum] = step1(P_cond) 92 | 93 | P_size = length(P_cond); 94 | 95 | % Loop throught each row 96 | for ii = 1:P_size 97 | rmin = min(P_cond(ii,:)); 98 | P_cond(ii,:) = P_cond(ii,:)-rmin; 99 | end 100 | 101 | stepnum = 2; 102 | 103 | %************************************************************************** 104 | % STEP 2: Find a zero in P_cond. If there are no starred zeros in its 105 | % column or row start the zero. Repeat for each zero 106 | %************************************************************************** 107 | 108 | function [r_cov,c_cov,M,stepnum] = step2(P_cond) 109 | 110 | % Define variables 111 | P_size = length(P_cond); 112 | r_cov = zeros(P_size,1); % A vector that shows if a row is covered 113 | c_cov = zeros(P_size,1); % A vector that shows if a column is covered 114 | M = zeros(P_size); % A mask that shows if a position is starred or primed 115 | 116 | for ii = 1:P_size 117 | for jj = 1:P_size 118 | if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0 119 | M(ii,jj) = 1; 120 | r_cov(ii) = 1; 121 | c_cov(jj) = 1; 122 | end 123 | end 124 | end 125 | 126 | % Re-initialize the cover vectors 127 | r_cov = zeros(P_size,1); % A vector that shows if a row is covered 128 | c_cov = zeros(P_size,1); % A vector that shows if a column is covered 129 | stepnum = 3; 130 | 131 | %************************************************************************** 132 | % STEP 3: Cover each column with a starred zero. If all the columns are 133 | % covered then the matching is maximum 134 | %************************************************************************** 135 | 136 | function [c_cov,stepnum] = step3(M,P_size) 137 | 138 | c_cov = sum(M,1); 139 | if sum(c_cov) == P_size 140 | stepnum = 7; 141 | else 142 | stepnum = 4; 143 | end 144 | 145 | %************************************************************************** 146 | % STEP 4: Find a noncovered zero and prime it. If there is no starred 147 | % zero in the row containing this primed zero, Go to Step 5. 148 | % Otherwise, cover this row and uncover the column containing 149 | % the starred zero. Continue in this manner until there are no 150 | % uncovered zeros left. Save the smallest uncovered value and 151 | % Go to Step 6. 152 | %************************************************************************** 153 | function [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M) 154 | 155 | P_size = length(P_cond); 156 | 157 | zflag = 1; 158 | while zflag 159 | % Find the first uncovered zero 160 | row = 0; col = 0; exit_flag = 1; 161 | ii = 1; jj = 1; 162 | while exit_flag 163 | if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0 164 | row = ii; 165 | col = jj; 166 | exit_flag = 0; 167 | end 168 | jj = jj + 1; 169 | if jj > P_size; jj = 1; ii = ii+1; end 170 | if ii > P_size; exit_flag = 0; end 171 | end 172 | 173 | % If there are no uncovered zeros go to step 6 174 | if row == 0 175 | stepnum = 6; 176 | zflag = 0; 177 | Z_r = 0; 178 | Z_c = 0; 179 | else 180 | % Prime the uncovered zero 181 | M(row,col) = 2; 182 | % If there is a starred zero in that row 183 | % Cover the row and uncover the column containing the zero 184 | if sum(find(M(row,:)==1)) ~= 0 185 | r_cov(row) = 1; 186 | zcol = find(M(row,:)==1); 187 | c_cov(zcol) = 0; 188 | else 189 | stepnum = 5; 190 | zflag = 0; 191 | Z_r = row; 192 | Z_c = col; 193 | end 194 | end 195 | end 196 | 197 | %************************************************************************** 198 | % STEP 5: Construct a series of alternating primed and starred zeros as 199 | % follows. Let Z0 represent the uncovered primed zero found in Step 4. 200 | % Let Z1 denote the starred zero in the column of Z0 (if any). 201 | % Let Z2 denote the primed zero in the row of Z1 (there will always 202 | % be one). Continue until the series terminates at a primed zero 203 | % that has no starred zero in its column. Unstar each starred 204 | % zero of the series, star each primed zero of the series, erase 205 | % all primes and uncover every line in the matrix. Return to Step 3. 206 | %************************************************************************** 207 | 208 | function [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov) 209 | 210 | zflag = 1; 211 | ii = 1; 212 | while zflag 213 | % Find the index number of the starred zero in the column 214 | rindex = find(M(:,Z_c(ii))==1); 215 | if rindex > 0 216 | % Save the starred zero 217 | ii = ii+1; 218 | % Save the row of the starred zero 219 | Z_r(ii,1) = rindex; 220 | % The column of the starred zero is the same as the column of the 221 | % primed zero 222 | Z_c(ii,1) = Z_c(ii-1); 223 | else 224 | zflag = 0; 225 | end 226 | 227 | % Continue if there is a starred zero in the column of the primed zero 228 | if zflag == 1; 229 | % Find the column of the primed zero in the last starred zeros row 230 | cindex = find(M(Z_r(ii),:)==2); 231 | ii = ii+1; 232 | Z_r(ii,1) = Z_r(ii-1); 233 | Z_c(ii,1) = cindex; 234 | end 235 | end 236 | 237 | % UNSTAR all the starred zeros in the path and STAR all primed zeros 238 | for ii = 1:length(Z_r) 239 | if M(Z_r(ii),Z_c(ii)) == 1 240 | M(Z_r(ii),Z_c(ii)) = 0; 241 | else 242 | M(Z_r(ii),Z_c(ii)) = 1; 243 | end 244 | end 245 | 246 | % Clear the covers 247 | r_cov = r_cov.*0; 248 | c_cov = c_cov.*0; 249 | 250 | % Remove all the primes 251 | M(M==2) = 0; 252 | 253 | stepnum = 3; 254 | 255 | % ************************************************************************* 256 | % STEP 6: Add the minimum uncovered value to every element of each covered 257 | % row, and subtract it from every element of each uncovered column. 258 | % Return to Step 4 without altering any stars, primes, or covered lines. 259 | %************************************************************************** 260 | 261 | function [P_cond,stepnum] = step6(P_cond,r_cov,c_cov) 262 | a = find(r_cov == 0); 263 | b = find(c_cov == 0); 264 | minval = min(min(P_cond(a,b))); 265 | 266 | P_cond(find(r_cov == 1),:) = P_cond(find(r_cov == 1),:) + minval; 267 | P_cond(:,find(c_cov == 0)) = P_cond(:,find(c_cov == 0)) - minval; 268 | 269 | stepnum = 4; 270 | 271 | function cnum = min_line_cover(Edge) 272 | 273 | % Step 2 274 | [r_cov,c_cov,M,stepnum] = step2(Edge); 275 | % Step 3 276 | [c_cov,stepnum] = step3(M,length(Edge)); 277 | % Step 4 278 | [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(Edge,r_cov,c_cov,M); 279 | % Calculate the deficiency 280 | cnum = length(Edge)-sum(r_cov)-sum(c_cov); 281 | --------------------------------------------------------------------------------