├── Contingency.m
├── EuDist2.m
├── LICENSE
├── README.md
├── RandIndex.m
├── StandCompRank.m
├── accuracy.m
├── clustering_metric.m
├── compute_conductance.m
├── compute_f.m
├── compute_nmi.m
├── compute_normalized_cut.m
├── datasets
└── pendigits.train.test
├── hungarian.m
├── kernel.m
├── pendigits_SC_RB_varyingR_exampleResults.mat
├── primme_eigs.m
├── primme_svds.m
├── rb_grid.m
├── rb_test.m
├── rb_train.m
├── selfloops.m
├── specClustering_rb_example.m
└── utilities
├── .DS_Store
├── libsvm_mex
├── libsvmread.mexa64
├── libsvmread.mexmaci64
├── libsvmwrite.mexa64
├── libsvmwrite.mexmaci64
├── svmpredict.mexa64
├── svmpredict.mexmaci64
├── svmtrain.mexa64
└── svmtrain.mexmaci64
├── primme_mex
├── primme_mex.mexa64
└── primme_mex.mexmaci64
└── rb_mex
├── rb_grid_mex.mexmaci64
├── rb_test_mex.mexmaci64
└── rb_train_mex.mexmaci64
/Contingency.m:
--------------------------------------------------------------------------------
1 | function Cont=Contingency(Mem1,Mem2)
2 | %CONTINGENCY Form contigency matrix for two vectors
3 | % C=Contingency(Mem1,Mem2) returns contingency matrix for two
4 | % column vectors Mem1, Mem2. These define which cluster each entity
5 | % has been assigned to.
6 | %
7 | % See also RANDINDEX.
8 | %
9 |
10 | %(C) David Corney (2000) D.Corney@cs.ucl.ac.uk
11 |
12 | if nargin < 2 | min(size(Mem1)) > 1 | min(size(Mem2)) > 1
13 | error('Contingency: Requires two vector arguments')
14 | return
15 | end
16 |
17 | Cont=zeros(max(Mem1),max(Mem2));
18 |
19 | for i = 1:length(Mem1);
20 | Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1;
21 | end
--------------------------------------------------------------------------------
/EuDist2.m:
--------------------------------------------------------------------------------
1 | function D = EuDist2(fea_a,fea_b,bSqrt)
2 | %EUDIST2 Efficiently Compute the Euclidean Distance Matrix by Exploring the
3 | %Matlab matrix operations.
4 | %
5 | % D = EuDist(fea_a,fea_b)
6 | % fea_a: nSample_a * nFeature
7 | % fea_b: nSample_b * nFeature
8 | % D: nSample_a * nSample_a
9 | % or nSample_a * nSample_b
10 | %
11 | % Examples:
12 | %
13 | % a = rand(500,10);
14 | % b = rand(1000,10);
15 | %
16 | % A = EuDist2(a); % A: 500*500
17 | % D = EuDist2(a,b); % D: 500*1000
18 | %
19 | % version 2.1 --November/2011
20 | % version 2.0 --May/2009
21 | % version 1.0 --November/2005
22 | %
23 | % Written by Deng Cai (dengcai AT gmail.com)
24 |
25 |
26 | if ~exist('bSqrt','var')
27 | bSqrt = 1;
28 | end
29 |
30 | if (~exist('fea_b','var')) || isempty(fea_b)
31 | aa = sum(fea_a.*fea_a,2);
32 | ab = fea_a*fea_a';
33 |
34 | if issparse(aa)
35 | aa = full(aa);
36 | end
37 |
38 | D = bsxfun(@plus,aa,aa') - 2*ab;
39 | D(D<0) = 0;
40 | if bSqrt
41 | D = sqrt(D);
42 | end
43 | D = max(D,D');
44 | else
45 | aa = sum(fea_a.*fea_a,2);
46 | bb = sum(fea_b.*fea_b,2);
47 | ab = fea_a*fea_b';
48 |
49 | if issparse(aa)
50 | aa = full(aa);
51 | bb = full(bb);
52 | end
53 |
54 | D = bsxfun(@plus,aa,bb') - 2*ab;
55 | D(D<0) = 0;
56 | if bSqrt
57 | D = sqrt(D);
58 | end
59 | end
60 |
61 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SpectralClustering_RandomBinning
2 | SpectralClustering_RandomBinning (SC_RB) is a simple code for scaling up spectral clustering on large-scale datasets using state-of-the-art kernel approximation (Random Binning) and eigenvalue and singular value solver (PRIMME).
3 |
4 | This code is a simple implementation (mix of Matlab, Matlab MEX, and C) of the WME in (Wu et al, "Scalable Spectral Clustering Using Random Binning Features", KDD'18). We refer more information about SC_RB to the following paper link: https://arxiv.org/abs/1805.11048 and the IBM Research AI Blog: https://www.ibm.com/blogs/research/2018/08/spectral-clustering/.
5 |
6 |
7 | # Prerequisites
8 |
9 | There are three required tool packages in order to run this code. You need to download RB, PRIMME, and LibSVM and compile the corresponding MEX files for your operating systems (Mac, Linux, or Windows).
10 |
11 | For RB: https://github.com/teddylfwu/RB_GEN
12 | For PRIMME: https://github.com/primme/primme
13 | For LibSVM: https://github.com/cjlin1/libsvm
14 |
15 | You will also need to download the datasets that are in libsvm format. Since this is the clustering task, you need to merge training and testing datasets into one file. You can download them from this link: https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/
16 |
17 |
18 | # How To Run The Codes
19 | Note that, in order to achieve the best performance, the hyper-parameter sigma (for generating a good kernel approximation matrix Z) has to be searched (using cross validation or other techniques). This is a crucial step for SC_RB.
20 |
21 | To generate the WME for your NLP applications, you need:
22 |
23 | (1) If you use linux and Mac, you should be fine to skip compiling MEX for RB, PRIMME, and LibSVM. Otherwise, you need to download them form the above links and compile them in their Matlab folders. Then you need copy these MEX files into the utilities folder.
24 |
25 | (2) Open Matlab terminal console and run specClustering_rb_example.m for getting clustering performance for example dataset pendigits. You might want to check if your results are consistent with them in pendigits_SC_RB_varyingR_exampleResults.mat.
26 |
27 |
28 | # How To Cite The Codes
29 | Please cite our work if you like or are using our codes for your projects! Let me know if you have any questions: lwu at email.wm.edu.
30 |
31 | Lingfei Wu, Pin-Yu Chen, Ian En-Hsu Yen, Fangli Xu, Yinglong Xia and Charu Aggarwal, "Scalable Spectral Clustering Using Random Binning Features", KDD'18.
32 |
33 | @InProceedings{wu2018scalable,
34 | title={Scalable Spectral Clustering Using Random Binning Features},
35 | author={Wu, Lingfei and Chen, Pin-Yu and Yen, Ian En-Hsu and Xu, Fangli and Xia, Yinglong and Aggarwal, Charu},
36 | booktitle={Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining},
37 | year={2018}
38 | }
39 |
40 |
41 | ------------------------------------------------------
42 | Contributors: Lingfei Wu
43 | Created date: January 16, 2019
44 | Last update: January 16, 2019
45 |
--------------------------------------------------------------------------------
/RandIndex.m:
--------------------------------------------------------------------------------
1 | function [AR,RI,MI,HI]=RandIndex(c1,c2)
2 | %RANDINDEX - calculates Rand Indices to compare two partitions
3 | % ARI=RANDINDEX(c1,c2), where c1,c2 are vectors listing the
4 | % class membership, returns the "Hubert & Arabie adjusted Rand index".
5 | % [AR,RI,MI,HI]=RANDINDEX(c1,c2) returns the adjusted Rand index,
6 | % the unadjusted Rand index, "Mirkin's" index and "Hubert's" index.
7 | %
8 | % See L. Hubert and P. Arabie (1985) "Comparing Partitions" Journal of
9 | % Classification 2:193-218
10 |
11 | %(C) David Corney (2000) D.Corney@cs.ucl.ac.uk
12 |
13 | if nargin < 2 | min(size(c1)) > 1 | min(size(c2)) > 1
14 | error('RandIndex: Requires two vector arguments')
15 | return
16 | end
17 |
18 | C=Contingency(c1,c2); %form contingency matrix
19 |
20 | n=sum(sum(C));
21 | nis=sum(sum(C,2).^2); %sum of squares of sums of rows
22 | njs=sum(sum(C,1).^2); %sum of squares of sums of columns
23 |
24 | t1=nchoosek(n,2); %total number of pairs of entities
25 | t2=sum(sum(C.^2)); %sum over rows & columnns of nij^2
26 | t3=.5*(nis+njs);
27 |
28 | %Expected index (for adjustment)
29 | nc=(n*(n^2+1)-(n+1)*nis-(n+1)*njs+2*(nis*njs)/n)/(2*(n-1));
30 |
31 | A=t1+t2-t3; %no. agreements
32 | D= -t2+t3; %no. disagreements
33 |
34 | if t1==nc
35 | AR=0; %avoid division by zero; if k=1, define Rand = 0
36 | else
37 | AR=(A-nc)/(t1-nc); %adjusted Rand - Hubert & Arabie 1985
38 | end
39 |
40 | RI=A/t1; %Rand 1971 %Probability of agreement
41 | MI=D/t1; %Mirkin 1970 %p(disagreement)
42 | HI=(A-D)/t1; %Hubert 1977 %p(agree)-p(disagree)
--------------------------------------------------------------------------------
/StandCompRank.m:
--------------------------------------------------------------------------------
1 | function y = StandCompRank(x)
2 | %
3 | % Calculate the STANDARD COMPETITION RANKINGS of vector x (in ascending order)
4 | %
5 | % For details regarding ranking methodologies: http://en.wikipedia.org/wiki/Ranking
6 | %
7 | %
8 | % INPUT
9 | % The user supplies the data vector x.
10 | %
11 | %
12 | % EXAMPLE 1:
13 | % x = [32 73 46 32 95 73 87 73 22 69 13 57];
14 | % y = StandardCompetitionRankings(x);
15 | % sortrows([x', y], 1)
16 | % ans =
17 | % 13 1
18 | % 22 2
19 | % 32 3
20 | % 32 3
21 | % 46 5
22 | % 57 6
23 | % 69 7
24 | % 73 8
25 | % 73 8
26 | % 73 8
27 | % 87 11
28 | % 95 12
29 | %
30 | %
31 | % EXAMPLE 2:
32 | % x = ceil(10000 * rand(10000000, 1));
33 | % tic;
34 | % y = StandardCompetitionRankings(x);
35 | % toc
36 | % Elapsed time is 15.802000 seconds.
37 | %
38 | %
39 | % EXAMPLE 3:
40 | % x = rand(10000000, 1);
41 | % tic;
42 | % y = StandardCompetitionRankings(x);
43 | % toc
44 | % Elapsed time is 8.211000 seconds.
45 | %
46 | %
47 | % This code has a poor performance when x is a vector with a huge Frequency Table
48 | % when frequencies are generally low and there exists at least one value of x with
49 | % frequency more than one.
50 | %
51 | %
52 | % This code is not particularly elegant nor efficient: if anyone finds out a more
53 | % efficient and elegant way to obtain the same result, I would greatly appreciate
54 | % if you could share it.
55 | %
56 | %
57 | % I acknowledge the beautiful solution for calculating Frequency Tables provided by
58 | % Mukhtar Ullah (mukhtar.ullah@informatik.uni-rostock.de) on the 28th of December 2004
59 | % http://www.mathworks.com/matlabcentral/fileexchange/loadFile.do?objectId=6631&objectType=file
60 | %
61 | %
62 | %-*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-*%
63 | % %
64 | % Author: Liber Eleutherios %
65 | % E-Mail: libereleutherios@gmail.com %
66 | % Date: 8 April 2008 %
67 | % %
68 | %-*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-* -*-*%
69 | %
70 | %
71 | %
72 |
73 | % Prepare data
74 | ctrl = isvector(x) & isnumeric(x);
75 | if ctrl
76 | x = x(:);
77 | x = x(~isnan(x) & ~isinf(x));
78 | else
79 | error('x is not a vector of numbers! The Standard Competition Ranking could not be calculated')
80 | end
81 | % Find the Frequency Distribution
82 | [y, ind] = sort(x);
83 | FreqTab(:, 1) = y([find(diff(y)); end]);
84 | N1 = length(x);
85 | N2 = length(FreqTab(:, 1));
86 | if N1 == N2
87 | y(ind) = 1:N1;
88 | return
89 | end
90 | FreqTab(:, 2) = histc(y, FreqTab(:, 1));
91 | % Find the rankings
92 | y = (1:N1)';
93 | k = 1;
94 | for i = 1:N2
95 | if FreqTab(i, 2) > 1
96 | y(k:(k + FreqTab(i, 2) - 1)) = k;
97 | end
98 | k = k + FreqTab(i, 2);
99 | end
100 | y = sortrows([y, ind], 2);
101 | y(:, 2) = [];
102 |
--------------------------------------------------------------------------------
/accuracy.m:
--------------------------------------------------------------------------------
1 | function score = accuracy(true_labels, cluster_labels)
2 | %ACCURACY Compute clustering accuracy using the true and cluster labels and
3 | % return the value in 'score'.
4 | %
5 | % Input : true_labels : N-by-1 vector containing true labels
6 | % cluster_labels : N-by-1 vector containing cluster labels
7 | %
8 | % Output : score : clustering accuracy
9 |
10 | % Compute the confusion matrix 'cmat', where
11 | % col index is for true label (CAT),
12 | % row index is for cluster label (CLS).
13 | n = length(true_labels);
14 | cat = spconvert([(1:n)' true_labels ones(n,1)]);
15 | cls = spconvert([(1:n)' cluster_labels ones(n,1)]);
16 | cls = cls';
17 | cmat = full(cls * cat);
18 |
19 | %
20 | % Calculate accuracy
21 | %
22 | [match, cost] = hungarian(-cmat);
23 | score = 100*(-cost/n);
24 |
--------------------------------------------------------------------------------
/clustering_metric.m:
--------------------------------------------------------------------------------
1 | function [nmi,fm,RI,conduc,NC,ODF]=clustering_metric(flag_ext,A,s,s_true)
2 | % Outputs:
3 | % 1. NMI
4 | % 2. fm - F-measure
5 | % 3. RI - rand index
6 | % These three are measures given groundtruth labels
7 | %
8 | % 4. conduc = conductance
9 | % 5. NC - normalized cut
10 | % 6. ODF - out-degree fraction
11 | %
12 | % Input:
13 | %
14 | % flag_ext=0 - compute 1-3
15 | % flag_ext=1 - compute 4-6
16 | % flag_ext=2 - compute 1-6
17 | %
18 | % A: symmetric n-by-n weight matrix (sparse format supported)
19 | % s: cluster label of each node from an algorithm. Every entry should be
20 | % between 1 to K, where K is the number of clusters (For MNist, K=10)
21 | % s_true: gound-truth cluster label
22 |
23 |
24 | if(flag_ext == 0 || flag_ext == 2)
25 | % Rand Index
26 | [AR,RI,MI,HI]=RandIndex(s_true,s);
27 | % f-measure
28 | fm=-1;
29 | p=-1;
30 | r=-1;
31 | [fm,p,r] = compute_f(s_true,s);
32 | % NMI
33 | [dummy,nmi,avgent] = compute_nmi(s_true,s);
34 | else
35 | nmi=-1;
36 | fm=-1;
37 | RI=-1;
38 | end
39 |
40 | if(flag_ext == 1 || flag_ext == 2)
41 | % conductance
42 | conductance=compute_conductance(A,s);
43 | conduc=sum(conductance)/max(s);
44 |
45 | % normalized cut
46 | NCtmp=compute_normalized_cut(A,s);
47 | NC=sum(NCtmp)/max(s);
48 |
49 | % average ODF
50 | K=max(s); d=sum(A,2);
51 | for k=1:K
52 | ind=find(s==k);
53 | d_in=sum(A(ind,ind),2);
54 | tmp(k)=(sum(1-d_in./d(ind)))/length(ind);
55 | end
56 | ODF=sum(tmp)/k;
57 | else
58 | conduc=-1;
59 | NC=-1;
60 | ODF=-1;
61 | end
--------------------------------------------------------------------------------
/compute_conductance.m:
--------------------------------------------------------------------------------
1 | function [conductance]=compute_conductance(A,s);
2 |
3 | K=max(s);
4 | n=size(A,1);
5 | for k=1:K
6 | ind_in=find(s==k);
7 | ind_out=setdiff(1:n,ind_in);
8 | Ain=A(ind_in,ind_in');
9 | Aout=A(ind_in,ind_out');
10 | twoms=sum(nonzeros(Ain));
11 | cs=sum(nonzeros(Aout));
12 | if(twoms==0)
13 | conductance(k)=0;
14 | else
15 | conductance(k)=cs/(cs+twoms);
16 | end
17 | end
--------------------------------------------------------------------------------
/compute_f.m:
--------------------------------------------------------------------------------
1 | function [f,p,r] = compute_f(T,H)
2 |
3 | if length(T) ~= length(H),
4 | size(T)
5 | size(H)
6 | end;
7 |
8 | N = length(T);
9 | numT = 0;
10 | numH = 0;
11 | numI = 0;
12 | for n=1:N,
13 | Tn = (T(n+1:end))==T(n);
14 | Hn = (H(n+1:end))==H(n);
15 | numT = numT + sum(Tn);
16 | numH = numH + sum(Hn);
17 | numI = numI + sum(Tn .* Hn);
18 | end;
19 | p = 1;
20 | r = 1;
21 | f = 1;
22 | if numH > 0,
23 | p = numI / numH;
24 | end;
25 | if numT > 0,
26 | r = numI / numT;
27 | end;
28 | if (p+r) == 0,
29 | f = 0;
30 | else
31 | f = 2 * p * r / (p + r);
32 | end;
--------------------------------------------------------------------------------
/compute_nmi.m:
--------------------------------------------------------------------------------
1 | function [A nmi avgent] = compute_nmi (T, H)
2 |
3 | N = length(T);
4 | classes = unique(T);
5 | clusters = unique(H);
6 | num_class = length(classes);
7 | num_clust = length(clusters);
8 |
9 | %%compute number of points in each class
10 | for j=1:num_class
11 | index_class = (T(:)==classes(j));
12 | D(j) = sum(index_class);
13 | end
14 |
15 | %%mutual information
16 | mi = 0;
17 | A = zeros(num_clust, num_class);
18 | avgent = 0;
19 | for i=1:num_clust
20 | %number of points in cluster 'i'
21 | index_clust = (H(:)==clusters(i));
22 | B(i) = sum(index_clust);
23 | for j=1:num_class
24 | index_class = (T(:)==classes(j));
25 | %%compute number of points in class 'j' that end up in cluster 'i'
26 | A(i,j) = sum(index_class.*index_clust);
27 | if (A(i,j) ~= 0)
28 | miarr(i,j) = A(i,j)/N * log2 (N*A(i,j)/(B(i)*D(j)));
29 | %%average entropy calculation
30 | avgent = avgent - (B(i)/N) * (A(i,j)/B(i)) * log2 (A(i,j)/B(i));
31 | else
32 | miarr(i,j) = 0;
33 | end
34 | mi = mi + miarr(i,j);
35 |
36 |
37 |
38 | end
39 | end
40 |
41 | %%class entropy
42 | class_ent = 0;
43 | for i=1:num_class
44 | class_ent = class_ent + D(i)/N * log2(N/D(i));
45 | end
46 |
47 | %%clustering entropy
48 | clust_ent = 0;
49 | for i=1:num_clust
50 | clust_ent = clust_ent + B(i)/N * log2(N/B(i));
51 | end
52 |
53 | %%normalized mutual information
54 | nmi = 2*mi / (clust_ent + class_ent);
--------------------------------------------------------------------------------
/compute_normalized_cut.m:
--------------------------------------------------------------------------------
1 | function [normalized_cut]=compute_normalized_cut(A,s);
2 |
3 | K=max(s);
4 | n=size(A,1);
5 | for k=1:K
6 | ind_in=find(s==k);
7 | ind_out=setdiff(1:n,ind_in);
8 | Ain=A(ind_in,ind_in');
9 | Aout=A(ind_in,ind_out');
10 | twoms=sum(nonzeros(Ain));
11 | cs=sum(nonzeros(Aout));
12 | if(twoms==0)
13 | conductance(k)=0;
14 | else
15 | conductance(k)=cs/(cs+twoms);
16 | end
17 | twom=sum(nonzeros(A));
18 |
19 | if(twom==0|cs==0)
20 | normalized_cut(k)=conductance(k);
21 | else
22 | normalized_cut(k)=conductance(k)+cs/(twom-twoms+cs);
23 | end
24 | end
--------------------------------------------------------------------------------
/hungarian.m:
--------------------------------------------------------------------------------
1 | function [Matching,Cost] = Hungarian(Perf)
2 | %
3 | % [MATCHING,COST] = Hungarian_New(WEIGHTS)
4 | %
5 | % A function for finding a minimum edge weight matching given a MxN Edge
6 | % weight matrix WEIGHTS using the Hungarian Algorithm.
7 | %
8 | % An edge weight of Inf indicates that the pair of vertices given by its
9 | % position have no adjacent edge.
10 | %
11 | % MATCHING return a MxN matrix with ones in the place of the matchings and
12 | % zeros elsewhere.
13 | %
14 | % COST returns the cost of the minimum matching
15 |
16 | % Written by: Alex Melin 30 June 2006
17 |
18 |
19 | % Initialize Variables
20 | Matching = zeros(size(Perf));
21 |
22 | % Condense the Performance Matrix by removing any unconnected vertices to
23 | % increase the speed of the algorithm
24 |
25 | % Find the number in each column that are connected
26 | num_y = sum(~isinf(Perf),1);
27 | % Find the number in each row that are connected
28 | num_x = sum(~isinf(Perf),2);
29 |
30 | % Find the columns(vertices) and rows(vertices) that are isolated
31 | x_con = find(num_x~=0);
32 | y_con = find(num_y~=0);
33 |
34 | % Assemble Condensed Performance Matrix
35 | P_size = max(length(x_con),length(y_con));
36 | P_cond = zeros(P_size);
37 | P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con);
38 | if isempty(P_cond)
39 | Cost = 0;
40 | return
41 | end
42 |
43 | % Ensure that a perfect matching exists
44 | % Calculate a form of the Edge Matrix
45 | Edge = P_cond;
46 | Edge(P_cond~=Inf) = 0;
47 | % Find the deficiency(CNUM) in the Edge Matrix
48 | cnum = min_line_cover(Edge);
49 |
50 | % Project additional vertices and edges so that a perfect matching
51 | % exists
52 | Pmax = max(max(P_cond(P_cond~=Inf)));
53 | P_size = length(P_cond)+cnum;
54 | P_cond = ones(P_size)*Pmax;
55 | P_cond(1:length(x_con),1:length(y_con)) = Perf(x_con,y_con);
56 |
57 | %*************************************************
58 | % MAIN PROGRAM: CONTROLS WHICH STEP IS EXECUTED
59 | %*************************************************
60 | exit_flag = 1;
61 | stepnum = 1;
62 | while exit_flag
63 | switch stepnum
64 | case 1
65 | [P_cond,stepnum] = step1(P_cond);
66 | case 2
67 | [r_cov,c_cov,M,stepnum] = step2(P_cond);
68 | case 3
69 | [c_cov,stepnum] = step3(M,P_size);
70 | case 4
71 | [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M);
72 | case 5
73 | [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov);
74 | case 6
75 | [P_cond,stepnum] = step6(P_cond,r_cov,c_cov);
76 | case 7
77 | exit_flag = 0;
78 | end
79 | end
80 |
81 | % Remove all the virtual satellites and targets and uncondense the
82 | % Matching to the size of the original performance matrix.
83 | Matching(x_con,y_con) = M(1:length(x_con),1:length(y_con));
84 | Cost = sum(sum(Perf(Matching==1)));
85 |
86 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
87 | % STEP 1: Find the smallest number of zeros in each row
88 | % and subtract that minimum from its row
89 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
90 |
91 | function [P_cond,stepnum] = step1(P_cond)
92 |
93 | P_size = length(P_cond);
94 |
95 | % Loop throught each row
96 | for ii = 1:P_size
97 | rmin = min(P_cond(ii,:));
98 | P_cond(ii,:) = P_cond(ii,:)-rmin;
99 | end
100 |
101 | stepnum = 2;
102 |
103 | %**************************************************************************
104 | % STEP 2: Find a zero in P_cond. If there are no starred zeros in its
105 | % column or row start the zero. Repeat for each zero
106 | %**************************************************************************
107 |
108 | function [r_cov,c_cov,M,stepnum] = step2(P_cond)
109 |
110 | % Define variables
111 | P_size = length(P_cond);
112 | r_cov = zeros(P_size,1); % A vector that shows if a row is covered
113 | c_cov = zeros(P_size,1); % A vector that shows if a column is covered
114 | M = zeros(P_size); % A mask that shows if a position is starred or primed
115 |
116 | for ii = 1:P_size
117 | for jj = 1:P_size
118 | if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0
119 | M(ii,jj) = 1;
120 | r_cov(ii) = 1;
121 | c_cov(jj) = 1;
122 | end
123 | end
124 | end
125 |
126 | % Re-initialize the cover vectors
127 | r_cov = zeros(P_size,1); % A vector that shows if a row is covered
128 | c_cov = zeros(P_size,1); % A vector that shows if a column is covered
129 | stepnum = 3;
130 |
131 | %**************************************************************************
132 | % STEP 3: Cover each column with a starred zero. If all the columns are
133 | % covered then the matching is maximum
134 | %**************************************************************************
135 |
136 | function [c_cov,stepnum] = step3(M,P_size)
137 |
138 | c_cov = sum(M,1);
139 | if sum(c_cov) == P_size
140 | stepnum = 7;
141 | else
142 | stepnum = 4;
143 | end
144 |
145 | %**************************************************************************
146 | % STEP 4: Find a noncovered zero and prime it. If there is no starred
147 | % zero in the row containing this primed zero, Go to Step 5.
148 | % Otherwise, cover this row and uncover the column containing
149 | % the starred zero. Continue in this manner until there are no
150 | % uncovered zeros left. Save the smallest uncovered value and
151 | % Go to Step 6.
152 | %**************************************************************************
153 | function [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(P_cond,r_cov,c_cov,M)
154 |
155 | P_size = length(P_cond);
156 |
157 | zflag = 1;
158 | while zflag
159 | % Find the first uncovered zero
160 | row = 0; col = 0; exit_flag = 1;
161 | ii = 1; jj = 1;
162 | while exit_flag
163 | if P_cond(ii,jj) == 0 && r_cov(ii) == 0 && c_cov(jj) == 0
164 | row = ii;
165 | col = jj;
166 | exit_flag = 0;
167 | end
168 | jj = jj + 1;
169 | if jj > P_size; jj = 1; ii = ii+1; end
170 | if ii > P_size; exit_flag = 0; end
171 | end
172 |
173 | % If there are no uncovered zeros go to step 6
174 | if row == 0
175 | stepnum = 6;
176 | zflag = 0;
177 | Z_r = 0;
178 | Z_c = 0;
179 | else
180 | % Prime the uncovered zero
181 | M(row,col) = 2;
182 | % If there is a starred zero in that row
183 | % Cover the row and uncover the column containing the zero
184 | if sum(find(M(row,:)==1)) ~= 0
185 | r_cov(row) = 1;
186 | zcol = find(M(row,:)==1);
187 | c_cov(zcol) = 0;
188 | else
189 | stepnum = 5;
190 | zflag = 0;
191 | Z_r = row;
192 | Z_c = col;
193 | end
194 | end
195 | end
196 |
197 | %**************************************************************************
198 | % STEP 5: Construct a series of alternating primed and starred zeros as
199 | % follows. Let Z0 represent the uncovered primed zero found in Step 4.
200 | % Let Z1 denote the starred zero in the column of Z0 (if any).
201 | % Let Z2 denote the primed zero in the row of Z1 (there will always
202 | % be one). Continue until the series terminates at a primed zero
203 | % that has no starred zero in its column. Unstar each starred
204 | % zero of the series, star each primed zero of the series, erase
205 | % all primes and uncover every line in the matrix. Return to Step 3.
206 | %**************************************************************************
207 |
208 | function [M,r_cov,c_cov,stepnum] = step5(M,Z_r,Z_c,r_cov,c_cov)
209 |
210 | zflag = 1;
211 | ii = 1;
212 | while zflag
213 | % Find the index number of the starred zero in the column
214 | rindex = find(M(:,Z_c(ii))==1);
215 | if rindex > 0
216 | % Save the starred zero
217 | ii = ii+1;
218 | % Save the row of the starred zero
219 | Z_r(ii,1) = rindex;
220 | % The column of the starred zero is the same as the column of the
221 | % primed zero
222 | Z_c(ii,1) = Z_c(ii-1);
223 | else
224 | zflag = 0;
225 | end
226 |
227 | % Continue if there is a starred zero in the column of the primed zero
228 | if zflag == 1;
229 | % Find the column of the primed zero in the last starred zeros row
230 | cindex = find(M(Z_r(ii),:)==2);
231 | ii = ii+1;
232 | Z_r(ii,1) = Z_r(ii-1);
233 | Z_c(ii,1) = cindex;
234 | end
235 | end
236 |
237 | % UNSTAR all the starred zeros in the path and STAR all primed zeros
238 | for ii = 1:length(Z_r)
239 | if M(Z_r(ii),Z_c(ii)) == 1
240 | M(Z_r(ii),Z_c(ii)) = 0;
241 | else
242 | M(Z_r(ii),Z_c(ii)) = 1;
243 | end
244 | end
245 |
246 | % Clear the covers
247 | r_cov = r_cov.*0;
248 | c_cov = c_cov.*0;
249 |
250 | % Remove all the primes
251 | M(M==2) = 0;
252 |
253 | stepnum = 3;
254 |
255 | % *************************************************************************
256 | % STEP 6: Add the minimum uncovered value to every element of each covered
257 | % row, and subtract it from every element of each uncovered column.
258 | % Return to Step 4 without altering any stars, primes, or covered lines.
259 | %**************************************************************************
260 |
261 | function [P_cond,stepnum] = step6(P_cond,r_cov,c_cov)
262 | a = find(r_cov == 0);
263 | b = find(c_cov == 0);
264 | minval = min(min(P_cond(a,b)));
265 |
266 | P_cond(find(r_cov == 1),:) = P_cond(find(r_cov == 1),:) + minval;
267 | P_cond(:,find(c_cov == 0)) = P_cond(:,find(c_cov == 0)) - minval;
268 |
269 | stepnum = 4;
270 |
271 | function cnum = min_line_cover(Edge)
272 |
273 | % Step 2
274 | [r_cov,c_cov,M,stepnum] = step2(Edge);
275 | % Step 3
276 | [c_cov,stepnum] = step3(M,length(Edge));
277 | % Step 4
278 | [M,r_cov,c_cov,Z_r,Z_c,stepnum] = step4(Edge,r_cov,c_cov,M);
279 | % Calculate the deficiency
280 | cnum = length(Edge)-sum(r_cov)-sum(c_cov);
281 |
--------------------------------------------------------------------------------
/kernel.m:
--------------------------------------------------------------------------------
1 | function K = kernel(X,x,sigma)
2 | % K=kernel(X,x)
3 | %
4 | % computes the pairwise squared kernel matrix between any column vectors
5 | % in X and in x
6 | %
7 | % INPUT:
8 | %
9 | % X dxN matrix consisting of N column vectors
10 | % x dxn matrix consisting of n column vectors
11 | %
12 | % OUTPUT:
13 | %
14 | % K Nxn matrix
15 | %
16 | % Example:
17 | % K=kernel(X,X);
18 | % is equivalent to
19 | % K=kernel(X);
20 | %
21 | % Authur: Lingfei Wu
22 | % Data: 08/23/2017
23 |
24 | [D,N] = size(X);
25 | if(nargin>=2)
26 | [d,n] = size(x);
27 | if(D~=d)
28 | error('Both sets of vectors must have same dimensionality!\n');
29 | end;
30 | X2 = sum(X.^2,1);
31 | x2 = sum(x.^2,1);
32 | dist = bsxfun(@plus,X2.',bsxfun(@plus,x2,-2*X.'*x));
33 | else
34 | s=sum(X.^2,1);
35 | dist=bsxfun(@plus,s',bsxfun(@plus,s,-2*X.'*X));
36 | end;
37 | K = exp(-dist/(2*sigma^2));
38 |
--------------------------------------------------------------------------------
/pendigits_SC_RB_varyingR_exampleResults.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/pendigits_SC_RB_varyingR_exampleResults.mat
--------------------------------------------------------------------------------
/primme_eigs.m:
--------------------------------------------------------------------------------
1 | function [varargout] = primme_eigs(varargin)
2 | %PRIMME_EIGS Find a few eigenvalues/vectors of large, sparse Hermitian matrices
3 | %
4 | % D = PRIMME_EIGS(A) returns a vector of A's 6 largest magnitude eigenvalues.
5 | %
6 | % D = PRIMME_EIGS(AFUN,DIM) accepts a function AFUN instead of a matrix. AFUN
7 | % is a function handle and y = AFUN(x) returns the matrix-vector product A*x.
8 | % In all the following syntaxes, A can be replaced by AFUN,DIM.
9 | %
10 | % D = PRIMME_EIGS(A,K) finds the K largest magnitude eigenvalues. K must be
11 | % less than the dimension of the matrix A.
12 | %
13 | % D = PRIMME_EIGS(A,K,TARGET) returns K eigenvalues such that:
14 | % If TARGET is a real number, it finds the closest eigenvalues to TARGET.
15 | % If TARGET is
16 | % 'LA' or 'SA', eigenvalues with the largest or smallest algebraic value
17 | % 'LM' or 'SM', eigenvalues with the largest or smallest magnitude if
18 | % OPTS.targetShifts is empty. If TARGET is a real or complex
19 | % scalar including 0, PRIMME_EIGS finds the eigenvalues closest
20 | % to TARGET.
21 | % In addition, if m values are provided in OPTS.targetShifts,
22 | % find eigenvalues that are farthest (LM) or closest (SM) in
23 | % absolute value from the given values.
24 | % Examples:
25 | % k=1, 'LM', OPTS.targetShifts=[] returns the largest magnitude lambda(A).
26 | % k=1, 'SM', OPTS.targetShifts=[] returns the smallest magnitude lambda(A).
27 | % k=3, 'SM', OPTS.targetShifts=[2, 5] returns the closest eigenvalue in
28 | % absolute sense to 2, and the two closest eigenvalues to 5.
29 | % 'CLT' or 'CGT', find eigenvalues closest to but less or greater than
30 | % the given values in OPTS.targetShifts.
31 | %
32 | % D = PRIMME_EIGS(A,K,TARGET,OPTS) specifies extra solver parameters. Some
33 | % default values are indicated in brackets {}:
34 | %
35 | % OPTS.aNorm: the estimated 2-norm of A {0.0 (estimate the norm internally)}
36 | % OPTS.tol: convergence tolerance: {eps*1e4}
37 | % NORM(A*X(:,i)-X(:,i)*D(i,i)) < tol*NORM(A)
38 | % OPTS.maxBlockSize: maximum block size (useful for high multiplicities) {1}
39 | % OPTS.disp: different level reporting (0-3) (see HIST) {no output 0}
40 | % OPTS.isreal: whether A represented by AFUN is real or complex {false}
41 | % OPTS.targetShifts: shifts for interior eigenvalues (see TARGET) {[]}
42 | % OPTS.v0: any number of initial guesses to the eigenvectors {[]}
43 | % OPTS.orthoConst: external orthogonalization constraints {[]}
44 | % OPTS.locking: 1, hard locking; 0, soft locking
45 | % OPTS.p: maximum size of the search subspace
46 | % OPTS.minRestartSize: minimum Ritz vectors to keep in restarting
47 | % OPTS.maxMatvecs: maximum number of matrix vector multiplications {Inf}
48 | % OPTS.maxit: maximum number of outer iterations {Inf}
49 | % OPTS.scheme: the restart scheme {'primme_thick'}
50 | % OPTS.maxPrevRetain: number of Ritz vectors from previous iteration
51 | % that are kept after restart {typically >0, see PRIMME doc}
52 | % OPTS.robustShifts: setting to true may avoid stagnation or misconvergence
53 | % OPTS.maxInnerIterations: maximum number of inner solver iterations
54 | % OPTS.LeftQ: use the locked vectors in the left projector
55 | % OPTS.LeftX: use the approx. eigenvector in the left projector
56 | % OPTS.RightQ: use the locked vectors in the right projector
57 | % OPTS.RightX: use the approx. eigenvector in the right projector
58 | % OPTS.SkewQ: use the preconditioned locked vectors in the right projector
59 | % OPTS.SkewX: use the preconditioned approx. eigenvector in the right projector
60 | % OPTS.relTolBase: a legacy from classical JDQR (not recommended)
61 | % OPTS.convTest: how to stop the inner QMR Method
62 | % OPTS.iseed: random seed
63 | %
64 | % For detailed descriptions of the above options, visit:
65 | % http://www.cs.wm.edu/~andreas/software/doc/primmec.html#parameters-guide
66 | %
67 | % D = PRIMME_EIGS(A,K,TARGET,OPTS,METHOD) specifies the eigensolver method:
68 | % 'DYNAMIC', (default) switches dynamically to the best method
69 | % 'DEFAULT_MIN_TIME', best method for low-cost matrix-vector product
70 | % 'DEFAULT_MIN_MATVECS', best method for heavy matvec/preconditioner
71 | % 'Arnoldi', Arnoldi not implemented efficiently
72 | % 'GD', classical block Generalized Davidson
73 | % 'GD_plusK', GD+k block GD with recurrence restarting
74 | % 'GD_Olsen_plusK', GD+k with approximate Olsen precond.
75 | % 'JD_Olsen_plusK', GD+k, exact Olsen (two precond per step)
76 | % 'RQI', Rayleigh Quotient Iteration. Also INVIT,
77 | % but for INVIT provide OPTS.targetShifts
78 | % 'JDQR', Original block, Jacobi Davidson
79 | % 'JDQMR', Our block JDQMR method (similar to JDCG)
80 | % 'JDQMR_ETol', Slight, but efficient JDQMR modification
81 | % 'STEEPEST_DESCENT', equiv. to GD(block,2*block)
82 | % 'LOBPCG_OrthoBasis', equiv. to GD(nev,3*nev)+nev
83 | % 'LOBPCG_OrthoBasis_Window' equiv. to GD(block,3*block)+block nev>block
84 | %
85 | % For further description of the method visit:
86 | % http://www.cs.wm.edu/~andreas/software/doc/appendix.html#preset-methods
87 | %
88 | % D = PRIMME_EIGS(A,K,TARGET,OPTS,METHOD,P)
89 | % D = PRIMME_EIGS(A,K,TARGET,OPTS,METHOD,P1,P2) uses preconditioner P or
90 | % P = P1*P2 to accelerate convergence of the method. Applying P\x should
91 | % approximate (A-sigma*eye(N))\x, for sigma near the wanted eigenvalue(s).
92 | % If P is [] then a preconditioner is not applied. P may be a function
93 | % handle PFUN such that PFUN(x) returns P\x.
94 | %
95 | % [X,D] = PRIMME_EIGS(...) returns a diagonal matrix D with the eigenvalues
96 | % and a matrix X whose columns are the corresponding eigenvectors.
97 | %
98 | % [X,D,R] = PRIMME_EIGS(...) also returns an array of the residual norms of
99 | % the computed eigenpairs.
100 | %
101 | % [X,D,R,STATS] = PRIMME_EIGS(...) returns a struct to report statistical
102 | % information about number of matvecs, elapsed time, and estimates for the
103 | % largest and smallest algebraic eigenvalues of A.
104 | %
105 | % [X,D,R,STATS,HIST] = PRIMME_EIGS(...) it returns the convergence history,
106 | % instead of printing it. Every row is a record, and the columns report:
107 | %
108 | % HIST(:,1): number of matvecs
109 | % HIST(:,2): time
110 | % HIST(:,3): number of converged/locked pairs
111 | % HIST(:,4): block index
112 | % HIST(:,5): approximate eigenvalue
113 | % HIST(:,6): residual norm
114 | % HIST(:,7): QMR residual norm
115 | %
116 | % OPTS.disp controls the granularity of the record. If OPTS.disp == 1, HIST
117 | % has one row per converged eigenpair and only the first three columns are
118 | % reported; if OPTS.disp == 2, HIST has one row per outer iteration and only
119 | % the first six columns are reported; and otherwise HIST has one row per QMR
120 | % iteration and all columns are reported.
121 | %
122 | % Examples:
123 | % A = diag(1:100);
124 | %
125 | % d = primme_eigs(A,10) % the 10 largest magnitude eigenvalues
126 | %
127 | % d = primme_eigs(A,10,'SM') % the 10 smallest magnitude eigenvalues
128 | %
129 | % d = primme_eigs(A,10,25.0) % the 10 closest eigenvalues to 25.0
130 | %
131 | % opts.targetShifts = [2 20];
132 | % d = primme_eigs(A,10,'SM',opts) % 1 eigenvalue closest to 2 and
133 | % % 9 eigenvalues closest to 20
134 | %
135 | % opts = struct();
136 | % opts.tol = 1e-4; % set tolerance
137 | % opts.maxBlockSize = 2; % set block size
138 | % [x,d] = primme_eigs(A,10,'SA',opts,'DEFAULT_MIN_TIME')
139 | %
140 | % opts.orthoConst = x;
141 | % [d,rnorms] = primme_eigs(A,10,'SA',opts) % find another 10
142 | %
143 | % % Compute the 6 eigenvalues closest to 30.5 using ILU(0) as a precond.
144 | % % by passing the matrices L and U.
145 | % A = sparse(diag(1:50) + diag(ones(49,1), 1) + diag(ones(49,1), -1));
146 | % [L,U] = ilu(A, struct('type', 'nofill'));
147 | % d = primme_eigs(A, k, 30.5, [], [], L, U);
148 | %
149 | % % Compute the 6 eigenvalues closest to 30.5 using Jacobi preconditioner
150 | % % by passing a function.
151 | % Pfun = @(x)(diag(A) - 30.5)\x;
152 | % d = primme_eigs(A,6,30.5,[],[],Pfun);
153 | %
154 | % For more details see PRIMME documentation at
155 | % http://www.cs.wm.edu/~andreas/software/doc/readme.html
156 | %
157 | % See also PRIMME_SVDS, EIGS.
158 |
159 | % Check primme_mex exists
160 | if ~ exist('primme_mex')
161 | error 'primme_mex is not available. Try to recompile the MATLAB/Octave''s PRIMME module'
162 | end
163 |
164 | % Check arity of input and output arguments
165 | minInputs = 1;
166 | maxInputs = 8;
167 | narginchk(minInputs,maxInputs);
168 |
169 | minOutputs = 0;
170 | maxOutputs = 5;
171 | nargoutchk(minOutputs,maxOutputs);
172 |
173 | % Check input arguments
174 | opts = struct();
175 | A = varargin{1};
176 | nextArg = 2;
177 | if isnumeric(A)
178 | % Check matrix is Hermitian and get matrix dimension
179 | [m, n] = size(A);
180 | if ~ishermitian(A)
181 | error('Input matrix must be real symmetric or complex Hermitian');
182 | end
183 | opts.n = n;
184 | opts.matrixMatvec = @(x)A*x;
185 |
186 | % Get type and complexity
187 | Acomplex = ~isreal(A);
188 | Adouble = strcmp(class(A), 'double');
189 | else
190 | opts.matrixMatvec = fcnchk_gen(A); % get the function handle of user's function
191 | n = round(varargin{nextArg});
192 | if ~isscalar(n) || ~isreal(n) || (n<0) || ~isfinite(n)
193 | error(message('The size of input matrix A must be an positive integer'));
194 | end
195 | opts.n = n;
196 | nextArg = nextArg + 1;
197 |
198 | % Assume complex double matrix
199 | Acomplex = 1;
200 | Adouble = 1;
201 | end
202 |
203 | if nargin >= nextArg
204 | opts.numEvals = round(varargin{nextArg});
205 | nextArg = nextArg + 1;
206 | else
207 | opts.numEvals = min(6, opts.n);
208 | end
209 |
210 | if nargin >= nextArg
211 | target = varargin{nextArg};
212 | if isnumeric(target)
213 | opts.target = 'primme_closest_abs';
214 | opts.targetShifts = target;
215 | elseif ischar(target)
216 | targets = struct('LA', 'primme_largest', ...
217 | 'LM', 'primme_largest_abs', ...
218 | 'SA', 'primme_smallest', ...
219 | 'CGT', 'primme_closest_geq', ...
220 | 'CLT', 'primme_closest_leq', ...
221 | 'SM', 'primme_closest_abs');
222 | if ~isfield(targets, target)
223 | error('target must be LA, SA, LM, SM, CGT or CLT');
224 | end
225 | opts.target = getfield(targets, target);
226 | if (strcmp(target, 'SM') || strcmp(target, 'LM')) && ~isfield(opts, 'targetShifts')
227 | opts.targetShifts = 0;
228 | end
229 | else
230 | error('target must be a number or a string');
231 | end
232 | nextArg = nextArg + 1;
233 | else
234 | opts.target = 'primme_largest_abs';
235 | opts.targetShifts = 0;
236 | end
237 |
238 | if nargin >= nextArg
239 | if ~isempty(varargin{nextArg})
240 | opts0 = varargin{nextArg};
241 | if ~isstruct(opts0)
242 | error('opts must be a struct');
243 | end
244 | opts0_names = fieldnames(opts0);
245 | for i=1:numel(opts0_names)
246 | opts.(opts0_names{i}) = opts0.(opts0_names{i});
247 | end
248 | end
249 | nextArg = nextArg + 1;
250 | end
251 |
252 | method = 'PRIMME_DEFAULT_METHOD';
253 | if nargin >= nextArg
254 | if ~isempty(varargin{nextArg})
255 | method = varargin{nextArg};
256 | if ischar(method)
257 | method = ['PRIMME_' method];
258 | end
259 | end
260 | nextArg = nextArg + 1;
261 | end
262 |
263 | if nargin >= nextArg
264 | P = varargin{nextArg};
265 | if isnumeric(P)
266 | P = @(x)P\x;
267 | else
268 | P = fcnchk_gen(P); % get the function handle of user's function
269 | end
270 | nextArg = nextArg + 1;
271 | else
272 | P = [];
273 | end
274 |
275 | if nargin >= nextArg
276 | P2 = varargin{nextArg};
277 | if isnumeric(P2)
278 | P2 = @(x)P2\x;
279 | else
280 | P2 = fcnchk_gen(P2); % get the function handle of user's function
281 | end
282 | P = @(x)P2(P(x));
283 | end
284 | if ~isempty(P)
285 | opts.applyPreconditioner = P;
286 | opts.correction.precondition = 1;
287 | end
288 |
289 | % Test whether the given matrix and preconditioner are valid
290 | try
291 | x = opts.matrixMatvec(ones(opts.n, 1));
292 | if isfield(opts, 'applyPreconditioner')
293 | x = opts.applyPreconditioner(ones(opts.n, 1));
294 | end
295 | clear x;
296 | catch ME
297 | rethrow(ME);
298 | end
299 |
300 | % Process 'isreal' in opts
301 | if isfield(opts, 'isreal')
302 | Acomplex = ~opts.isreal;
303 | opts = rmfield(opts, 'isreal');
304 | end
305 |
306 | % Process 'isdouble' in opts
307 | if isfield(opts, 'isdouble')
308 | Adouble = opts.isdouble;
309 | opts = rmfield(opts, 'isdouble');
310 | end
311 |
312 | % Process 'disp' in opts
313 | if isfield(opts, 'disp')
314 | dispLevel = opts.disp;
315 | if dispLevel > 3 || dispLevel < 0
316 | error('Invalid value in opts.disp; it should be 0, 1, 2 or 3');
317 | end
318 | opts = rmfield(opts, 'disp');
319 | elseif nargout >= 5
320 | dispLevel = 1;
321 | else
322 | dispLevel = 0;
323 | end
324 |
325 | % Rename tol, maxit and p as eps, maxOuterIterations and maxBasisSize.
326 | % Also move options that are outside of primme_params' hierarchy.
327 | changes = {{'tol', 'eps'}, {'maxit', 'maxOuterIterations'}, {'p', 'maxBasisSize'}, ...
328 | {'projection', 'projection_projection'}, ...
329 | {'scheme', 'restarting_scheme'}, ...
330 | {'maxPrevRetain', 'restarting_maxPrevRetain'}, ...
331 | {'precondition', 'correction_precondition'}, ...
332 | {'robustShifts', 'correction_robustShifts'}, ...
333 | {'maxInnerIterations', 'correction_maxInnerIterations'}, ...
334 | {'LeftQ', 'correction_projectors_LeftQ'}, ...
335 | {'LeftX', 'correction_projectors_LeftX'}, ...
336 | {'RightQ', 'correction_projectors_RightQ'}, ...
337 | {'RightX', 'correction_projectors_RightX'}, ...
338 | {'SkewQ', 'correction_projectors_SkewQ'}, ...
339 | {'SkewX', 'correction_projectors_SkewX'}, ...
340 | {'convTest', 'correction_convTest'}, ...
341 | {'relTolBase', 'correction_relTolBase'}};
342 |
343 | for i=1:numel(changes)
344 | if isfield(opts, changes{i}{1})
345 | opts.(changes{i}{2}) = opts.(changes{i}{1});
346 | opts = rmfield(opts, changes{i}{1});
347 | end
348 | end
349 |
350 | % Prepare numOrthoConst and initSize
351 | if isfield(opts, 'orthoConst')
352 | init = opts.orthoConst;
353 | if size(init, 1) ~= opts.n
354 | error('Invalid matrix dimensions in opts.orthoConst');
355 | end
356 | opts = rmfield(opts, 'orthoConst');
357 | opts.numOrthoConst = size(init, 2);
358 | else
359 | init = [];
360 | end
361 |
362 | if isfield(opts, 'v0')
363 | init0 = opts.v0;
364 | if size(init0, 1) ~= opts.n
365 | error('Invalid matrix dimensions in opts.v0');
366 | end
367 | opts = rmfield(opts, 'v0');
368 | opts.initSize = size(init0, 2);
369 | init = [init init0];
370 | end
371 |
372 | % Set default tol
373 | if ~isfield(opts, 'eps')
374 | if Adouble
375 | opts.eps = eps*1e4;
376 | else
377 | opts.eps = sqrt(eps)*1e4;
378 | end
379 | end
380 |
381 | % Create primme_params
382 | primme = primme_mex('primme_initialize');
383 |
384 | % Set other options in primme_params
385 | primme_set_members(opts, primme);
386 |
387 | % Set method
388 | primme_mex('primme_set_method', method, primme);
389 |
390 | % Set monitor and shared variables with the monitor
391 | hist = [];
392 | locking = primme_mex('primme_get_member', primme, 'locking');
393 | nconv = [];
394 | return_hist = 0;
395 | if dispLevel > 0
396 | % NOTE: Octave doesn't support function handler for nested functions
397 | primme_mex('primme_set_member', primme, 'monitorFun', ...
398 | @(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10)record_history(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10));
399 | end
400 | if nargout >= 5
401 | return_hist = 1;
402 | elseif dispLevel == 1
403 | fprintf('#MV\tTime\t\tNConv\n');
404 | elseif dispLevel == 2
405 | fprintf('#MV\tTime\t\tNConv\tIdx\tValue\tRes\n');
406 | elseif dispLevel == 3
407 | fprintf('#MV\tTime\t\tNConv\tIdx\tValue\tRes\tQMR_Res\n');
408 | end
409 |
410 | % Select solver
411 | if Adouble
412 | if Acomplex
413 | type = 'z';
414 | else
415 | type = 'd';
416 | end
417 | else
418 | if Acomplex
419 | type = 'c';
420 | else
421 | type = 's';
422 | end
423 | end
424 | xprimme = [type 'primme'];
425 |
426 | % Call xprimme
427 | [ierr, evals, norms, evecs] = primme_mex(xprimme, init, primme);
428 |
429 | % Process error code and return the required arguments
430 | if ierr ~= 0
431 | error([xprimme ' returned ' num2str(ierr) ': ' primme_error_msg(ierr)]);
432 | end
433 |
434 | % Return interior eigenvalues in descending order
435 | if ~strcmp(opts.target,'primme_largest') ...
436 | && ~strcmp(opts.target,'primme_smallest') ...
437 | && ~strcmp(opts.target,'primme_largest_abs')
438 | [evals,ind] = sort(evals,'descend');
439 | evecs = evecs(:,ind);
440 | end
441 |
442 | if (nargout <= 1)
443 | varargout{1} = evals;
444 | end
445 | if (nargout >= 2)
446 | varargout{1} = evecs;
447 | varargout{2} = diag(evals);
448 | end
449 | if (nargout >= 3)
450 | varargout{3} = norms;
451 | end
452 | if (nargout >= 4)
453 | stats = struct();
454 | stats.numMatvecs = primme_mex('primme_get_member', primme, 'stats_numMatvecs');
455 | stats.elapsedTime = primme_mex('primme_get_member', primme, 'stats_elapsedTime');
456 | stats.estimateMinEVal = primme_mex('primme_get_member', primme, 'stats_estimateMinEVal');
457 | stats.estimateMaxEVal = primme_mex('primme_get_member', primme, 'stats_estimateMaxEVal');
458 | stats.estimateAnorm = primme_mex('primme_get_member', primme, 'stats_estimateLargestSVal');
459 | varargout{4} = stats;
460 | end
461 | if (nargout >= 5)
462 | varargout{5} = hist;
463 | end
464 |
465 | function record_history(basisEvals, basisFlags, iblock, basisNorms, ...
466 | numConverged, lockedEvals, lockedFlags, lockedNorms, inner_its, ...
467 | LSRes, event)
468 |
469 | numMatvecs = double(primme_mex('primme_get_member', primme, 'stats_numMatvecs'));
470 | maxInnerIterations = primme_mex('primme_get_member', primme, 'correction_maxInnerIterations');
471 | elapsedTime = primme_mex('primme_get_member', primme, 'stats_elapsedTime');
472 | hist_rows = size(hist, 1);
473 | if event == 0 || (event == 4 && ~locking) || event == 5
474 | if ~locking
475 | nconv = double(numConverged);
476 | else
477 | nconv = numel(lockedEvals);
478 | end
479 | end
480 | if dispLevel == 0
481 | elseif dispLevel == 1
482 | if (event == 4 && ~locking) || event == 5
483 | hist = [hist; numMatvecs elapsedTime nconv];
484 | end
485 | elseif dispLevel == 2
486 | if event == 0 || (nconv == opts.numEvals && ((event == 4 && ~locking) || event == 5))
487 | for i=1:numel(iblock)
488 | hist = [hist; numMatvecs elapsedTime nconv i basisEvals(iblock(i)+1) basisNorms(iblock(i)+1)];
489 | end
490 | end
491 | elseif dispLevel == 3
492 | if event == 1
493 | if ~isempty(basisEvals)
494 | value = basisEvals(iblock(1)+1);
495 | resNorm = basisNorms(iblock(1)+1);
496 | else
497 | value = nan;
498 | resNorm = nan;
499 | end
500 | hist = [hist; numMatvecs elapsedTime nconv nan value resNorm LSRes];
501 | elseif (maxInnerIterations == 0 || nconv == opts.numEvals) && (event == 0 || ((event == 4 && ~locking) || event == 5))
502 | for i=1:numel(iblock)
503 | hist = [hist; numMatvecs elapsedTime nconv i basisEvals(iblock(i)+1) basisNorms(iblock(i)+1) nan];
504 | end
505 | end
506 | end
507 | if ~return_hist && size(hist,1) > hist_rows
508 | template{1} = '%d\t%f\t%d\n';
509 | template{2} = '%d\t%f\t%d\t%d\t%g\t%e\n';
510 | template{3} = '%d\t%f\t%d\t%d\t%g\t%e\t%e\n';
511 | for i=hist_rows+1:size(hist,1)
512 | a = num2cell(hist(i,:));
513 | fprintf(template{dispLevel}, a{:});
514 | end
515 | hist = [];
516 | end
517 | end
518 | end
519 |
520 | function [f] = fcnchk_gen(x)
521 | if exist('fcnchk', 'var')
522 | f = fcnchk(x);
523 | else
524 | f = x;
525 | end
526 | end
527 |
528 | function primme_set_members(opts, primme, prefix)
529 | %PRIMME_SET_MEMBERS Set options in primme_params
530 | % PRIMME_SET_MEMBERS(S, P) sets the options in struct S into the primme_params
531 | % reference P.
532 | %
533 | % Example:
534 | % primme = primme_mex('primme_initialize');
535 | % ops.n = 10;
536 | % ops.target = 'primme_largest';
537 | % primme_set_members(ops, primme);
538 |
539 | % NOTE: Expensive Mathworks' MATLAB doesn't support default values in function
540 | % declaration, Octave does.
541 | if nargin < 3, prefix = ''; end
542 |
543 | fields = fieldnames(opts);
544 | for i=1:numel(fields)
545 | value = getfield(opts, fields{i});
546 | label = fields{i};
547 | if isstruct(value)
548 | primme_set_members(value, primme, [prefix label '_']);
549 | else
550 | try
551 | primme_mex('primme_set_member', primme, [prefix label], value);
552 | catch ME
553 | if isnumeric(value)
554 | error(['Error setting the option ' prefix label ' to value ' num2str(value)]);
555 | else
556 | error(['Error setting the option ' prefix label ' to value ' value]);
557 | end
558 | end
559 | end
560 | end
561 | end
562 |
563 | function s = primme_error_msg(errorCode)
564 |
565 | msg = {};
566 | msg{39+ 0} = 'success';
567 | msg{39+ 1} = 'reported only amount of required memory';
568 | msg{39+ -1} = 'failed in allocating int or real workspace';
569 | msg{39+ -2} = 'malloc failed in allocating a permutation integer array';
570 | msg{39+ -3} = 'main_iter() encountered problem; the calling stack of the functions where the error occurred was printed in stderr';
571 | msg{39+ -4} = 'argument primme is NULL';
572 | msg{39+ -5} = 'n < 0 or nLocal < 0 or nLocal > n';
573 | msg{39+ -6} = 'numProcs' < 1';
574 | msg{39+ -7} = 'matrixMatvec is NULL';
575 | msg{39+ -8} = 'applyPreconditioner is NULL and precondition is not NULL';
576 | msg{39+ -9} = 'not used';
577 | msg{39+-10} = 'numEvals > n';
578 | msg{39+-11} = 'numEvals < 0';
579 | msg{39+-12} = 'eps > 0 and eps < machine precision';
580 | msg{39+-13} = 'target is not properly defined';
581 | msg{39+-14} = 'target is one of primme_largest_abs, primme_closest_geq, primme_closest_leq or primme_closest_abs but numTargetShifts <= 0 (no shifts)';
582 | msg{39+-15} = 'target is one of primme_largest_abs primme_closest_geq primme_closest_leq or primme_closest_abs but targetShifts is NULL (no shifts array)';
583 | msg{39+-16} = 'numOrthoConst < 0 or numOrthoConst > n (no free dimensions left)';
584 | msg{39+-17} = 'maxBasisSize < 2';
585 | msg{39+-18} = 'minRestartSize < 0 or minRestartSize shouldn''t be zero';
586 | msg{39+-19} = 'maxBlockSize < 0 or maxBlockSize shouldn''t be zero';
587 | msg{39+-20} = 'maxPrevRetain < 0';
588 | msg{39+-21} = 'scheme is not one of *primme_thick* or *primme_dtr*';
589 | msg{39+-22} = 'initSize < 0';
590 | msg{39+-23} = 'locking == 0 and initSize > maxBasisSize';
591 | msg{39+-24} = 'locking and initSize > numEvals';
592 | msg{39+-25} = 'maxPrevRetain + minRestartSize >= maxBasisSize';
593 | msg{39+-26} = 'minRestartSize >= n';
594 | msg{39+-27} = 'printLevel < 0 or printLevel > 5';
595 | msg{39+-28} = 'convTest is not one of primme_full_LTolerance primme_decreasing_LTolerance primme_adaptive_ETolerance or primme_adaptive';
596 | msg{39+-29} = 'convTest == primme_decreasing_LTolerance and relTolBase <= 1';
597 | msg{39+-30} = 'evals is NULL, but not evecs and resNorms';
598 | msg{39+-31} = 'evecs is NULL, but not evals and resNorms';
599 | msg{39+-32} = 'resNorms is NULL, but not evecs and evals';
600 | msg{39+-33} = 'locking == 0 and minRestartSize < numEvals';
601 | msg{39+-34} = 'ldevecs is less than nLocal';
602 | msg{39+-35} = 'ldOPs is non-zero and less than nLocal';
603 | msg{39+-36} = 'not enough memory for realWork';
604 | msg{39+-37} = 'not enough memory for intWork';
605 | msg{39+-38} = '"locking == 0 and target is primme_closest_leq or primme_closet_geq';
606 |
607 | errorCode = errorCode + 39;
608 | if errorCode > 0 && errorCode <= numel(msg)
609 | s = msg{errorCode};
610 | else
611 | s = 'Unknown error code';
612 | end
613 | end
614 |
--------------------------------------------------------------------------------
/primme_svds.m:
--------------------------------------------------------------------------------
1 | function [varargout] = primme_svds(varargin)
2 | %PRIMME_SVDS Find a few singular values and vectors of large, sparse matrices
3 | %
4 | % S = PRIMME_SVDS(A) returns a vector with the 6 largest singular values of A.
5 | %
6 | % S = PRIMME_SVDS(AFUN,M,N) accepts the function handle AFUN to perform
7 | % the matrix vector products with an M-by-N matrix A.
8 | % AFUN(X,'notransp') returns A*X while AFUN(X,'transp') returns A’*X.
9 | % In all the following, A can be replaced by AFUN,M,N.
10 | %
11 | % S = PRIMME_SVDS(A,K) computes the K largest singular values of A.
12 | %
13 | % S = PRIMME_SVDS(A,K,SIGMA) computes the K singular values closest to the
14 | % scalar shift SIGMA.
15 | % If SIGMA is a vector, find a singular value closest to each SIGMA(i)
16 | % If SIGMA is 'L', it computes the largest singular values.
17 | % if SIGMA is 'S', it computes the smallest singular values.
18 | %
19 | % S = PRIMME_SVDS(A,K,SIGMA,OPTIONS) specifies extra solver parameters:
20 | % (for some parameters we refer to PRIMME_EIGS)
21 | %
22 | % Field name Parameter Default
23 | %
24 | % OPTIONS.aNorm estimation of the 2-norm A 0.0
25 | % OPTIONS.tol convergence tolerance (see eps): 1e-10
26 | % NORM([A*V-U*S;A'*U-V*S]) <= tol * NORM(A).
27 | % OPTIONS.maxit maximum number of matvecs (see maxMatvecs) inf
28 | % OPTIONS.p maximum basis size (see maxBasisSize) -
29 | % OPTIONS.disp level of reporting 0-3 (see HIST) 0
30 | % OPTIONS.isreal if 0, the matrix is complex; else it's real 0
31 | % OPTIONS.isdouble if 0, the matrix is single; else it's double 1
32 | % OPTIONS.method which equivalent eigenproblem to solve
33 | % - 'primme_svds_normalequations': A'*A or A*A'
34 | % - 'primme_svds_augmented': [0 A';A 0]
35 | % - 'primme_svds_hybrid': (default)
36 | % first normal equations and then augmented
37 | % OPTIONS.u0 approx. left singular vectors []
38 | % OPTIONS.v0 approx. right singular vectors []
39 | % OPTIONS.orthoConst external orthogonalization constraints []
40 | % OPTIONS.locking 1, hard locking; 0, soft locking -
41 | % OPTIONS.maxBlockSize maximum block size 1
42 | % OPTIONS.iseed random seed
43 | % OPTIONS.primme options for first stage solver -
44 | % OPTIONS.primmeStage2 options for second stage solver -
45 | %
46 | % The available options for OPTIONS.primme and primmeStage2 are
47 | % the same as PRIMME_EIGS, plus the option 'method'. For detailed
48 | % descriptions of the above options, visit:
49 | % http://www.cs.wm.edu/~andreas/software/doc/svdsc.html#parameters-guide
50 | % and for further descriptions of the methods visit:
51 | % http://www.cs.wm.edu/~andreas/software/doc/appendixsvds.html#preset-methods
52 | %
53 | % S = PRIMME_SVDS(A,K,SIGMA,OPTIONS,P) applies a preconditioner P as follows.
54 | % If P is a matrix it applies P\X and P'\X to approximate A\X and A'\X.
55 | % If P is a function handle, PFUN, PFUN(X,'notransp') returns P\X and
56 | % PFUN(X,'transp') returns P’\X, approximating A\X and A'\X respectively.
57 | % If P is a struct, it can have one or more of the following fields:
58 | % P.AHA\X or P.AHA(X) returns an approximation of (A'*A)\X,
59 | % P.AAH\X or P.AAH(X) returns an approximation of (A*A')\X,
60 | % P.aug\X or P.aug(X) returns an approximation of [zeros(N,N) A';A zeros(M,M)]\X.
61 | % If P is [] then no preconditioner is applied.
62 | %
63 | % S = PRIMME_SVDS(A,K,SIGMA,OPTIONS,P1,P2) applies a factorized preconditioner.
64 | % If both P1,P2 are nonempty, apply (P1*P2)\X to approximate A\X.
65 | % If P1 is [] and P2 is nonempty, then (P2'*P2)\X approximates A'*A.
66 | % P2 can be the R factor of an (incomplete) QR factorization of A or the L
67 | % factor of an (incomplete) LL' factorization of A'*A (RIF).
68 | % If both P1 and P2 are [] then no preconditioner is applied.
69 | %
70 | % [U,S,V] = PRIMME_SVDS(...) returns also the corresponding singular vectors.
71 | % If A is M-by-N and K singular triplets are computed, then U is M-by-K
72 | % with orthonormal columns, S is K-by-K diagonal, and V is N-by-K with
73 | % orthonormal columns.
74 | %
75 | % [S,R] = PRIMME_SVDS(...)
76 | % [U,S,V,R] = PRIMME_SVDS(...) returns the residual norm of each K triplet,
77 | % NORM([A*V(:,i)-S(i,i)*U(:,i); A'*U(:,i)-S(i,i)*V(:,i)]).
78 | %
79 | % [U,S,V,R,STATS] = PRIMME_SVDS(...) returns how many times A and P were
80 | % used and elapsed time. The application of A is counted independently from
81 | % the application of A'.
82 | %
83 | % [U,S,V,R,STATS,HIST] = PRIMME_SVDS(...) returns the convergence history,
84 | % instead of printing it. Every row is a record, and the columns report:
85 | %
86 | % HIST(:,1): number of matvecs
87 | % HIST(:,2): time
88 | % HIST(:,3): number of converged/locked triplets
89 | % HIST(:,4): stage
90 | % HIST(:,5): block index
91 | % HIST(:,6): approximate singular value
92 | % HIST(:,7): residual norm
93 | % HIST(:,8): QMR residual norm
94 | %
95 | % OPTS.disp controls the granularity of the record. If OPTS.disp == 1, HIST
96 | % has one row per converged triplet and only the first four columns are
97 | % reported; if OPTS.disp == 2, HIST has one row per outer iteration and only
98 | % the first seven columns are reported; and otherwise HIST has one row per QMR
99 | % iteration and all columns are reported.
100 | %
101 | % Examples:
102 | % A = diag(1:50); A(200,1) = 0; % rectangular matrix of size 200x50
103 | %
104 | % s = primme_svds(A,10) % the 10 largest singular values
105 | %
106 | % s = primme_svds(A,10,'S') % the 10 smallest singular values
107 | %
108 | % s = primme_svds(A,10,25) % the 10 closest singular values to 25
109 | %
110 | % opts = struct();
111 | % opts.tol = 1e-4; % set tolerance
112 | % opts.method = 'primme_svds_normalequations' % set svd solver method
113 | % opts.primme.method = 'DEFAULT_MIN_TIME' % set first stage eigensolver method
114 | % opts.primme.maxBlockSize = 2; % set block size for first stage
115 | % [u,s,v] = primme_svds(A,10,'S',opts); % find 10 smallest svd triplets
116 | %
117 | % opts.orthoConst = {u,v};
118 | % [s,rnorms] = primme_svds(A,10,'S',opts) % find another 10
119 | %
120 | % % Compute the 5 smallest singular values of a square matrix using ILU(0)
121 | % % as a preconditioner
122 | % A = sparse(diag(1:50) + diag(ones(49,1), 1));
123 | % [L,U] = ilu(A, struct('type', 'nofill'));
124 | % svals = primme_svds(A, 5, 'S', [], L, U);
125 | %
126 | % % Compute the 5 smallest singular values of a rectangular matrix using
127 | % % Jacobi preconditioner on (A'*A)
128 | % A = sparse(diag(1:50) + diag(ones(49,1), 1));
129 | % A(200,50) = 1; % size(A)=[200 50]
130 | % Pstruct = struct('AHA', diag(A'*A),...
131 | % 'AAH', ones(200,1), 'aug', ones(250,1));
132 | % Pfun = @(x,mode)Pstruct.(mode).\x;
133 | % s = primme_svds(A,5,'S',[],Pfun) % find the 5 smallest values
134 | %
135 | % For more details see PRIMME documentation at
136 | % http://www.cs.wm.edu/~andreas/software/doc/readme.html
137 | %
138 | % See also PRIMME_EIGS, SVDS.
139 |
140 | % Check primme_mex exists
141 | if ~ exist('primme_mex')
142 | error 'primme_mex is not available. Try to recompile the MATLAB/Octave''s PRIMME module'
143 | end
144 |
145 | % Check arity of input and output arguments
146 | minInputs = 1;
147 | maxInputs = 8;
148 | narginchk(minInputs,maxInputs);
149 |
150 | minOutputs = 0;
151 | maxOutputs = 6;
152 | nargoutchk(minOutputs,maxOutputs);
153 |
154 | % Check input arguments
155 | opts = struct();
156 | A = varargin{1};
157 | nextArg = 2;
158 | if isnumeric(A)
159 | % Check matrix is Hermitian and get matrix dimension
160 | [m, n] = size(A);
161 | opts.m = m;
162 | opts.n = n;
163 | opts.matrixMatvec = @(x,mode)matvecsvds(A,x,mode);
164 |
165 | % Get type and complexity
166 | Acomplex = ~isreal(A);
167 | Adouble = strcmp(class(A), 'double');
168 | else
169 | opts.matrixMatvec = fcnchk_gen(A); % get the function handle of user's function
170 | m = round(varargin{nextArg});
171 | n = round(varargin{nextArg+1});
172 | if ~isscalar(m) || ~isreal(m) || (m<0) || ~isfinite(m) || ...
173 | ~isscalar(n) || ~isreal(n) || (n<0) || ~isfinite(n)
174 | error(message('The size of input matrix A must be an positive integer'));
175 | end
176 | opts.m = m;
177 | opts.n = n;
178 | nextArg = nextArg + 2;
179 |
180 | % Assume complex double matrix
181 | Acomplex = 1;
182 | Adouble = 1;
183 | end
184 |
185 | if nargin >= nextArg
186 | opts.numSvals = round(varargin{nextArg});
187 | nextArg = nextArg + 1;
188 | else
189 | opts.numSvals = min([6 opts.m opts.n]);
190 | end
191 |
192 | if nargin >= nextArg
193 | target = varargin{nextArg};
194 | if ischar(target)
195 | targets = struct('L', 'primme_svds_largest', ...
196 | 'S', 'primme_svds_smallest');
197 | if ~isfield(targets, target(1))
198 | error('target must be L, S or real non-negative numbers');
199 | end
200 | opts.target = getfield(targets, target(1));
201 | elseif isnumeric(target) && all(target == 0)
202 | opts.target = 'primme_svds_smallest';
203 | elseif isnumeric(target) && all(target >= 0)
204 | opts.targetShifts = target;
205 | opts.target = 'primme_svds_closest_abs';
206 | else
207 | error('target must be L, S or real non-negative numbers');
208 | end
209 | nextArg = nextArg + 1;
210 | else
211 | opts.target = 'primme_svds_largest';
212 | end
213 |
214 | if nargin >= nextArg
215 | if ~isempty(varargin{nextArg})
216 | opts0 = varargin{nextArg};
217 | if ~isstruct(opts0)
218 | error('opts must be a struct');
219 | end
220 | opts0_names = fieldnames(opts0);
221 | for i=1:numel(opts0_names)
222 | opts.(opts0_names{i}) = opts0.(opts0_names{i});
223 | end
224 | end
225 | nextArg = nextArg + 1;
226 | end
227 |
228 | if nargin == nextArg || (nargin > nextArg && isempty(varargin{nextArg+1}))
229 | P = varargin{nextArg};
230 | if isnumeric(P)
231 | if ~isempty(P)
232 | P = @(x,mode)precondsvds_Pmat(P,x,mode);
233 | end
234 | elseif isstruct(P)
235 | P = @(x,mode)precondsvds_Pstruct(P,x,mode);
236 | else
237 | P = fcnchk_gen(P); % get the function handle of user's function
238 | P = @(x,mode)precondsvds_Pfun(P,x,mode,opts.m);
239 | end
240 | if ~isempty(P)
241 | opts.applyPreconditioner = P;
242 | opts.precondition = 1;
243 | end
244 | elseif nargin >= nextArg
245 | P1 = varargin{nextArg};
246 | P2 = varargin{nextArg+1};
247 | if (~isempty(P1) && ~isnumeric(P1)) || ~isnumeric(P2)
248 | error('P1 and P2 must be matrices');
249 | end
250 | P = @(x,mode)precondsvds_P1P2(P1, P2, x, mode);
251 | opts.applyPreconditioner = P;
252 | opts.precondition = 1;
253 | end
254 |
255 | % Test whether the given matrix and preconditioner are valid
256 | try
257 | x = opts.matrixMatvec(ones(opts.n, 1), 'notransp');
258 | x = opts.matrixMatvec(ones(opts.m, 1), 'transp');
259 | if isfield(opts, 'applyPreconditioner')
260 | x = opts.applyPreconditioner(ones(opts.n, 1), 'AHA');
261 | x = opts.applyPreconditioner(ones(opts.m, 1), 'AAH');
262 | x = opts.applyPreconditioner(ones(opts.m+opts.n, 1), 'aug');
263 | end
264 | clear x;
265 | catch ME
266 | rethrow(ME);
267 | end
268 |
269 | % Process 'isreal' in opts
270 | if isfield(opts, 'isreal')
271 | Acomplex = ~opts.isreal;
272 | opts = rmfield(opts, 'isreal');
273 | end
274 |
275 | % Process 'isdouble' in opts
276 | if isfield(opts, 'isdouble')
277 | Adouble = opts.isdouble;
278 | opts = rmfield(opts, 'isdouble');
279 | end
280 |
281 | % Process 'disp' in opts
282 | if isfield(opts, 'disp')
283 | dispLevel = opts.disp;
284 | if dispLevel > 3 || dispLevel < 0
285 | error('Invalid value in opts.disp; it should be 0, 1, 2 or 3');
286 | end
287 | opts = rmfield(opts, 'disp');
288 | elseif nargout >= 6
289 | dispLevel = 1;
290 | else
291 | dispLevel = 0;
292 | end
293 |
294 | % Rename tol, maxit and p as eps, maxMatvecs and maxBasisSize
295 | changes = {{'tol', 'eps'}, {'maxit', 'maxMatvecs'}, {'p', 'maxBasisSize'}};
296 | for i=1:numel(changes)
297 | if isfield(opts, changes{i}{1})
298 | opts.(changes{i}{2}) = opts.(changes{i}{1});
299 | opts = rmfield(opts, changes{i}{1});
300 | end
301 | end
302 |
303 | % Set default tol to 1e-10
304 | if ~isfield(opts, 'eps')
305 | opts.eps = 1e-10;
306 | end
307 |
308 | % Move options that are outside of primme_parms' hierarchy
309 | changes = {{'projection', 'projection_projection'}, ...
310 | {'scheme', 'restarting_scheme'}, ...
311 | {'maxPrevRetain', 'restarting_maxPrevRetain'}, ...
312 | {'precondition', 'correction_precondition'}, ...
313 | {'robustShifts', 'correction_robustShifts'}, ...
314 | {'maxInnerIterations', 'correction_maxInnerIterations'}, ...
315 | {'LeftQ', 'correction_projectors_LeftQ'}, ...
316 | {'LeftX', 'correction_projectors_LeftX'}, ...
317 | {'RightQ', 'correction_projectors_RightQ'}, ...
318 | {'RightX', 'correction_projectors_RightX'}, ...
319 | {'SkewQ', 'correction_projectors_SkewQ'}, ...
320 | {'SkewX', 'correction_projectors_SkewX'}, ...
321 | {'convTest', 'correction_convTest'}, ...
322 | {'relTolBase', 'correction_relTolBase'}};
323 | primme_fields = {'primme', 'primmeStage2'};
324 | for j=1:numel(primme_fields)
325 | if isfield(opts, primme_fields{j})
326 | opts0 = opts.(primme_fields{j});
327 | for i=1:numel(changes)
328 | if isfield(opts0, changes{i}{1})
329 | opts0.(changes{i}{2}) = opts0.(changes{i}{1});
330 | opts0 = rmfield(opts0, changes{i}{1});
331 | end
332 | end
333 | opts.(primme_fields{j}) = opts0;
334 | end
335 | end
336 |
337 | % Process method, primme.method and primmeStage2.method
338 | if isfield(opts, 'method')
339 | method = opts.method;
340 | opts = rmfield(opts, 'method');
341 | else
342 | method = 'primme_svds_default';
343 | end
344 | if isfield(opts, 'primme') && isfield(opts.primme, 'method')
345 | primmeStage0method = opts.primme.method;
346 | opts.primme = rmfield(opts.primme, 'method');
347 | if ischar(primmeStage0method)
348 | primmeStage0method = ['PRIMME_' primmeStage0method];
349 | end
350 | else
351 | primmeStage0method = 'PRIMME_DEFAULT_METHOD';
352 | end
353 | if isfield(opts, 'primmeStage2') && isfield(opts.primmeStage2, 'method')
354 | primmeStage1method = opts.primmeStage2.method;
355 | opts.primmeStage2 = rmfield(opts.primmeStage2, 'method');
356 | if ischar(primmeStage1method)
357 | primmeStage1method = ['PRIMME_' primmeStage1method];
358 | end
359 | else
360 | primmeStage1method = 'PRIMME_DEFAULT_METHOD';
361 | end
362 |
363 | % Prepare numOrthoConst and initSize
364 | if isfield(opts, 'orthoConst')
365 | init = opts.orthoConst;
366 | if ~iscell(init) || numel(init) ~= 2 || (isempty(init{1}) && isempty(init{2}))
367 | error('opts.orthoConst should be {left_vectors, right_vectors}');
368 | end
369 | if isempty(init{1})
370 | init{1} = opts.matrixMatvec(init{2}, 'notransp');
371 | elseif isempty(init{2})
372 | init{2} = opts.matrixMatvec(init{1}, 'transp');
373 | end
374 | if size(init{1}, 1) ~= opts.m || size(init{2}, 1) ~= opts.n || ...
375 | size(init{1}, 2) ~= size(init{2}, 2)
376 | error('Invalid matrix dimensions in opts.orthoConst');
377 | end
378 | opts = rmfield(opts, 'orthoConst');
379 | opts.numOrthoConst = size(init{1}, 2);
380 | else
381 | init = {[],[]};
382 | end
383 |
384 | if isfield(opts, 'v0') || isfield(opts, 'u0')
385 | if ~isfield(opts, 'v0'), opts.v0 = []; end
386 | if ~isfield(opts, 'u0'), opts.u0 = []; end
387 | init0 = {opts.v0, opts.u0};
388 | if isempty(init0{1})
389 | init0{1} = opts.matrixMatvec(init0{2}, 'notransp');
390 | elseif isempty(init{2})
391 | init0{2} = opts.matrixMatvec(init0{1}, 'transp');
392 | end
393 | if size(init0{1}, 1) ~= opts.m || size(init0{2}, 1) ~= opts.n || ...
394 | size(init0{1}, 2) ~= size(init0{2}, 2)
395 | error('Invalid matrix dimensions in opts.init');
396 | end
397 | opts = rmfield(opts, 'u0');
398 | opts = rmfield(opts, 'v0');
399 | opts.initSize = size(init0{1}, 2);
400 | init = {[init{1} init0{1}], [init{2} init0{2}]};
401 | end
402 |
403 | % Create primme_params
404 | primme_svds = primme_mex('primme_svds_initialize');
405 |
406 | % Set other options in primme_svds_params
407 | primme_svds_set_members(opts, primme_svds);
408 |
409 | % Set method in primme_svds_params
410 | primme_mex('primme_svds_set_method', method, primmeStage0method, ...
411 | primmeStage1method, primme_svds);
412 |
413 | % Set monitor and shared variables with the monitor
414 | hist = [];
415 | %locking = primme_mex('primme_get_member', primme, 'locking');
416 | nconv = 0;
417 | return_hist = 0;
418 | if dispLevel > 0
419 | % NOTE: Octave doesn't support function handler for nested functions
420 | primme_mex('primme_svds_set_member', primme_svds, 'monitorFun', ...
421 | @(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11)record_history(a0,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11));
422 | end
423 | if nargout >= 5
424 | return_hist = 1;
425 | elseif dispLevel == 1
426 | fprintf('#MV\tTime\t\tNConv\tStage\n');
427 | elseif dispLevel == 2
428 | fprintf('#MV\tTime\t\tNConv\tStage\tIdx\tValue\tRes\n');
429 | elseif dispLevel == 3
430 | fprintf('#MV\tTime\t\tNConv\tStage\tIdx\tValue\tRes\tQMR_Res\n');
431 | end
432 |
433 | % Select solver
434 | if Adouble
435 | if Acomplex
436 | type = 'z';
437 | else
438 | type = 'd';
439 | end
440 | else
441 | if Acomplex
442 | type = 'c';
443 | else
444 | type = 's';
445 | end
446 | end
447 | xprimme_svds = [type 'primme_svds'];
448 |
449 | % Call xprimme_svds
450 | [ierr, svals, norms, svecsl, svecsr] = primme_mex(xprimme_svds, init{1}, ...
451 | init{2}, primme_svds);
452 |
453 | % Process error code and return the required arguments
454 | if ierr ~= 0
455 | error([xprimme_svds ' returned ' num2str(ierr) ': ' primme_svds_error_msg(ierr)]);
456 | end
457 |
458 | % Return smallest or interior singular triplets in descending order
459 | if strcmp(opts.target,'primme_svds_smallest') || strcmp(opts.target,'primme_svds_closest_abs')
460 | [svals,ind] = sort(svals,'descend');
461 | svecsl = svecsl(:,ind);
462 | svecsr = svecsr(:,ind);
463 | end
464 |
465 | if nargout <= 1
466 | varargout{1} = svals;
467 | elseif nargout == 2
468 | varargout{1} = svals;
469 | varargout{2} = norms;
470 | elseif nargout >= 3
471 | varargout{1} = svecsl;
472 | varargout{2} = diag(svals);
473 | varargout{3} = svecsr;
474 | end
475 | if (nargout >= 4)
476 | varargout{4} = norms;
477 | end
478 | if (nargout >= 5)
479 | stats = struct();
480 | stats.numMatvecs = primme_mex('primme_svds_get_member', primme_svds, 'stats_numMatvecs');
481 | stats.elapsedTime = primme_mex('primme_svds_get_member', primme_svds, 'stats_elapsedTime');
482 | stats.aNorm = primme_mex('primme_svds_get_member', primme_svds, 'aNorm');
483 | varargout{5} = stats;
484 | end
485 | if (nargout >= 6)
486 | varargout{6} = hist;
487 | end
488 |
489 | function record_history(basisSvals, basisFlags, iblock, basisNorms, ...
490 | numConverged, lockedSvals, lockedFlags, lockedNorms, inner_its, ...
491 | LSRes, event, stage)
492 |
493 | numMatvecs = double(primme_mex('primme_svds_get_member', primme_svds, 'stats_numMatvecs'));
494 | methodStage2 = double(primme_mex('primme_svds_get_member', primme_svds, 'methodStage2'));
495 | if stage == 0
496 | primme = primme_mex('primme_svds_get_member', primme_svds, 'primme');
497 | else
498 | primme = primme_mex('primme_svds_get_member', primme_svds, 'primmeStage2');
499 | end
500 | if stage == 0 && methodStage2 ~= 0
501 | locking = 1;
502 | else
503 | locking = primme_mex('primme_get_member', primme, 'locking');
504 | end
505 | maxInnerIterations = primme_mex('primme_get_member', primme, 'correction_maxInnerIterations');
506 | elapsedTime = primme_mex('primme_svds_get_member', primme_svds, 'stats_elapsedTime');
507 | hist_rows = size(hist, 1);
508 | if event == 0 || (event == 4 && ~locking) || event == 5
509 | if ~locking && ~isempty(numConverged)
510 | nconv = double(numConverged);
511 | elseif locking && ~isempty(lockedSvals)
512 | nconv = numel(lockedSvals);
513 | end
514 | end
515 | stage = double(stage) + 1;
516 | if dispLevel == 0
517 | elseif dispLevel == 1
518 | if (event == 4 && ~locking) || event == 5
519 | hist = [hist; numMatvecs elapsedTime nconv stage];
520 | end
521 | elseif dispLevel == 2
522 | if event == 0 || (nconv == opts.numSvals && ((event == 4 && ~locking) || event == 5))
523 | for i=1:numel(iblock)
524 | hist = [hist; numMatvecs elapsedTime nconv stage i basisSvals(iblock(i)+1) basisNorms(iblock(i)+1)];
525 | end
526 | end
527 | elseif dispLevel == 3
528 | if event == 1
529 | if ~isempty(basisSvals)
530 | value = basisSvals(iblock(1)+1);
531 | resNorm = basisNorms(iblock(1)+1);
532 | else
533 | value = nan;
534 | resNorm = nan;
535 | end
536 | hist = [hist; numMatvecs elapsedTime nconv stage nan value resNorm LSRes];
537 | elseif (maxInnerIterations == 0 || nconv == opts.numSvals) && (event == 0 || ((event == 4 && ~locking) || event == 5))
538 | for i=1:numel(iblock)
539 | hist = [hist; numMatvecs elapsedTime nconv stage i basisSvals(iblock(i)+1) basisNorms(iblock(i)+1) nan];
540 | end
541 | end
542 | end
543 | if ~return_hist && size(hist,1) > hist_rows
544 | template{1} = '%d\t%f\t%d\t%d\n';
545 | template{2} = '%d\t%f\t%d\t%d\t%d\t%g\t%e\n';
546 | template{3} = '%d\t%f\t%d\t%d\t%d\t%g\t%e\t%e\n';
547 | for i=hist_rows+1:size(hist,1)
548 | a = num2cell(hist(i,:));
549 | fprintf(template{dispLevel}, a{:});
550 | end
551 | hist = [];
552 | end
553 | end
554 | end
555 |
556 | function [y] = matvecsvds(A, x, mode)
557 | if mode(1) == 'n'
558 | y = A*x;
559 | else
560 | y = A'*x;
561 | end
562 | end
563 |
564 | function [y] = precondsvds_Pmat(P, x, mode)
565 | if strcmp(mode, 'AHA')
566 | y = P\(P'\x);
567 | elseif strcmp(mode, 'AAH')
568 | y = P'\(P\x);
569 | else
570 | y = [P\x(size(P,1)+1:end,:); P'\x(1:size(P,1),:)];
571 | end
572 | end
573 |
574 | function [y] = precondsvds_Pfun(P, x, mode, m)
575 | if strcmp(mode, 'AHA')
576 | y = P(P(x, 'transp'), 'notransp');
577 | elseif strcmp(mode, 'AAH')
578 | y = P(P(x, 'notransp'), 'transp');
579 | else
580 | y = [P(x(m+1:end,:), 'notransp'); P(x(1:m,:), 'transp')];
581 | end
582 | end
583 |
584 | function [y] = precondsvds_P1P2(P1, P2, x, mode)
585 | if ~isempty(P1)
586 | if strcmp(mode, 'AHA')
587 | y = P2\(P1\(P1'\(P2'\x)));
588 | elseif strcmp(mode, 'AAH')
589 | y = P1'\(P2'\(P2\(P1\x)));
590 | else
591 | y = [P2\(P1\x(size(P1,1)+1:end,:)); P1'\(P2'\x(1:size(P1,1),:))];
592 | end
593 | else
594 | if strcmp(mode, 'AHA')
595 | y = P2\(P2'\x);
596 | elseif strcmp(mode, 'AAH')
597 | y = P2'\(P2\x);
598 | else
599 | y = x;
600 | end
601 | end
602 | end
603 |
604 | function [y] = precondsvds_Pstruct(P, x, mode)
605 | if isfield(P, mode)
606 | M = P.(mode);
607 | if isnumeric(M)
608 | y = M\x;
609 | else
610 | y = M(x);
611 | end
612 | else
613 | y = x;
614 | end
615 | end
616 |
617 |
618 | function [f] = fcnchk_gen(x)
619 | if exist('fcnchk', 'var')
620 | f = fcnchk(x);
621 | else
622 | f = x;
623 | end
624 | end
625 |
626 | function primme_svds_set_members(opts, primme_svds, f, prefix)
627 | %PRIMME_SVDS_SET_MEMBERS Set options in primme_svds_params
628 | % PRIMME_SVDS_SET_MEMBERS(S, P) sets the options in struct S into the
629 | % primme_svds_params reference P.
630 | %
631 | % Example:
632 | % primme_svds = primme_mex('primme_svds_initialize');
633 | % ops.n = 10;
634 | % ops.target = 'primme_svds_largest';
635 | % primme_svds_set_members(ops, primme_svds);
636 |
637 | % NOTE: MATLAB doesn't support default values in function
638 | % declaration, Octave does.
639 | if nargin < 3, f = 'primme_svds_set_member'; end
640 | if nargin < 4, prefix = ''; end
641 |
642 | fields = fieldnames(opts);
643 | for i=1:numel(fields)
644 | value = getfield(opts, fields{i});
645 | label = fields{i};
646 | if isstruct(value) && ~strcmp('primme', label) && ~strcmp('primmeStage2', label)
647 | primme_svds_set_members(value, primme_svds, f, [prefix label '_']);
648 | elseif isstruct(value)
649 | primme0 = primme_mex('primme_svds_get_member', primme_svds, [prefix label]);
650 | primme_svds_set_members(value, primme0, 'primme_set_member');
651 | else
652 | try
653 | primme_mex(f, primme_svds, [prefix label], value);
654 | catch ME
655 | if isnumeric(value)
656 | error(['Error setting the option ' prefix label ' to value ' num2str(value)]);
657 | else
658 | error(['Error setting the option ' prefix label ' to value ' value]);
659 | end
660 | end
661 | end
662 | end
663 | end
664 |
665 |
666 | function s = primme_error_msg(errorCode)
667 |
668 | msg = {};
669 | msg{39+ 0} = 'success';
670 | msg{39+ 1} = 'reported only amount of required memory';
671 | msg{39+ -1} = 'failed in allocating int or real workspace';
672 | msg{39+ -2} = 'malloc failed in allocating a permutation integer array';
673 | msg{39+ -3} = 'main_iter() encountered problem; the calling stack of the functions where the error occurred was printed in stderr';
674 | msg{39+ -4} = 'argument primme is NULL';
675 | msg{39+ -5} = 'n < 0 or nLocal < 0 or nLocal > n';
676 | msg{39+ -6} = 'numProcs' < 1';
677 | msg{39+ -7} = 'matrixMatvec is NULL';
678 | msg{39+ -8} = 'applyPreconditioner is NULL and precondition is not NULL';
679 | msg{39+ -9} = 'not used';
680 | msg{39+-10} = 'numEvals > n';
681 | msg{39+-11} = 'numEvals < 0';
682 | msg{39+-12} = 'eps > 0 and eps < machine precision';
683 | msg{39+-13} = 'target is not properly defined';
684 | msg{39+-14} = 'target is one of primme_largest_abs, primme_closest_geq, primme_closest_leq or primme_closest_abs but numTargetShifts <= 0 (no shifts)';
685 | msg{39+-15} = 'target is one of primme_largest_abs primme_closest_geq primme_closest_leq or primme_closest_abs but targetShifts is NULL (no shifts array)';
686 | msg{39+-16} = 'numOrthoConst < 0 or numOrthoConst > n (no free dimensions left)';
687 | msg{39+-17} = 'maxBasisSize < 2';
688 | msg{39+-18} = 'minRestartSize < 0 or minRestartSize shouldn''t be zero';
689 | msg{39+-19} = 'maxBlockSize < 0 or maxBlockSize shouldn''t be zero';
690 | msg{39+-20} = 'maxPrevRetain < 0';
691 | msg{39+-21} = 'scheme is not one of *primme_thick* or *primme_dtr*';
692 | msg{39+-22} = 'initSize < 0';
693 | msg{39+-23} = 'locking == 0 and initSize > maxBasisSize';
694 | msg{39+-24} = 'locking and initSize > numEvals';
695 | msg{39+-25} = 'maxPrevRetain + minRestartSize >= maxBasisSize';
696 | msg{39+-26} = 'minRestartSize >= n';
697 | msg{39+-27} = 'printLevel < 0 or printLevel > 5';
698 | msg{39+-28} = 'convTest is not one of primme_full_LTolerance primme_decreasing_LTolerance primme_adaptive_ETolerance or primme_adaptive';
699 | msg{39+-29} = 'convTest == primme_decreasing_LTolerance and relTolBase <= 1';
700 | msg{39+-30} = 'evals is NULL, but not evecs and resNorms';
701 | msg{39+-31} = 'evecs is NULL, but not evals and resNorms';
702 | msg{39+-32} = 'resNorms is NULL, but not evecs and evals';
703 | msg{39+-33} = 'locking == 0 and minRestartSize < numEvals';
704 | msg{39+-34} = 'ldevecs is less than nLocal';
705 | msg{39+-35} = 'ldOPs is non-zero and less than nLocal';
706 | msg{39+-36} = 'not enough memory for realWork';
707 | msg{39+-37} = 'not enough memory for intWork';
708 | msg{39+-38} = 'locking == 0 and target is primme_closest_leq or primme_closet_geq';
709 |
710 | errorCode = errorCode + 39;
711 | if errorCode > 0 && errorCode <= numel(msg)
712 | s = msg{errorCode};
713 | else
714 | s = 'Unknown error code';
715 | end
716 | end
717 |
718 | function s = primme_svds_error_msg(errorCode)
719 | msg = {};
720 | msg{22+ 0} = 'success';
721 | msg{22+ 1} = 'reported only amount of required memory';
722 | msg{22+ -1} = 'failed in allocating int or real workspace';
723 | msg{22+ -2} = 'malloc failed in allocating a permutation integer array';
724 | msg{22+ -3} = 'main_iter() encountered problem; the calling stack of the functions where the error occurred was printed in stderr';
725 | msg{22+ -4} = 'primme_svds is NULL';
726 | msg{22+ -5} = 'Wrong value for m or n or mLocal or nLocal';
727 | msg{22+ -6} = 'Wrong value for numProcs';
728 | msg{22+ -7} = 'matrixMatvec is not set';
729 | msg{22+ -8} = 'applyPreconditioner is not set but precondition == 1 ';
730 | msg{22+ -9} = 'numProcs >1 but globalSumDouble is not set';
731 | msg{22+-10} = 'Wrong value for numSvals, it''s larger than min(m, n)';
732 | msg{22+-11} = 'Wrong value for numSvals, it''s smaller than 1';
733 | msg{22+-13} = 'Wrong value for target';
734 | msg{22+-14} = 'Wrong value for method';
735 | msg{22+-15} = 'Not supported combination of method and methodStage2';
736 | msg{22+-16} = 'Wrong value for printLevel';
737 | msg{22+-17} = 'svals is not set';
738 | msg{22+-18} = 'svecs is not set';
739 | msg{22+-19} = 'resNorms is not set';
740 | msg{22+-20} = 'not enough memory for realWork';
741 | msg{22+-21} = 'not enough memory for intWork';
742 |
743 | if errorCode >= -100
744 | errorCode = errorCode + 22;
745 | if errorCode > 0 && errorCode < numel(msg)
746 | s = msg{errorCode};
747 | else
748 | s = 'Unknown error code';
749 | end
750 | elseif errorCode >= -200
751 | s = ['Error from first stage: ' primme_error_msg(errorCode+100)];
752 | else
753 | s = ['Error from second stage: ' primme_error_msg(errorCode+200)];
754 | end
755 | end
756 |
--------------------------------------------------------------------------------
/rb_grid.m:
--------------------------------------------------------------------------------
1 | function [delta, mu] = rb_grid(kernel, sigma, R, d, seed)
2 | %RB_GRID Generates the random grid used for generating random binding map
3 | %
4 | % [DELTA, MU] = RB_GRID(KERNEL, SIGMA, R, D) returns the random grid
5 | % parameters for R grids of dimension D. DELTA are the widths of the grids and
6 | % are generated from the distribution selected by KERNEL and standard
7 | % deviation SIGMA. The option for KERNEL are:
8 | % 0: normal distribution
9 | % 1: multivariate t-Student
10 | % 2: t-Student
11 | % MU are the bias of the grids. MU is generated as uniform distribution [0,1]
12 | % times DELTA.
13 | %
14 | % [DELTA, MU] = RB_GRID(..., SEED) set the seed of the random number generator.
15 |
16 | if nargin == 4
17 | [delta, mu] = rb_grid_mex(kernel, sigma, R, d);
18 | else
19 | [delta, mu] = rb_grid_mex(kernel, sigma, R, d, seed);
20 | end
21 |
--------------------------------------------------------------------------------
/rb_test.m:
--------------------------------------------------------------------------------
1 | function [phi] = rb_test(A, offset, coor, delta, mu)
2 | %RB_TEST Generates the sparse feature matrix
3 | %
4 | % PHI = RB_TEST(A, OFFSET, COOR, DELTA, MU) returns the coordinates
5 | % associated to the rows of A of using the random grids described by DELTA and
6 | % MU, that are also in COOR. The column indices of the nonzeros in PHI are the
7 | % indices of the coordinates in COOR.
8 |
9 | phi = rb_test_mex(A', delta, mu, uint64(offset), int32(coor'));
10 | phi = phi';
11 |
--------------------------------------------------------------------------------
/rb_train.m:
--------------------------------------------------------------------------------
1 | function [varargout] = rb_train(varargin)
2 | %RB_TRAIN Generates the random binding map
3 | %
4 | % [OFFSET, COOR, DELTA, MU] = RB_TRAIN(A, KERNEL, SIGMA, R) returns the
5 | % coordinates of the nonempty bins, COOR, for the R random grids and
6 | % parameters of the random grids, DELTA and MU. The widths of the random grids,
7 | % DELTA, are generated from the distribution selected by KERNEL and standard
8 | % deviation SIGMA. The option for KERNEL are:
9 | % 0: normal distribution
10 | % 1: multivariate t-Student (default)
11 | % 2: t-Student
12 | % The bias of the random grids, MU, are generated as uniform distribution [0,1]
13 | % times the corresponding widths. COOR(OFFSET(I):OFFSET(I+1)-1,:) are the
14 | % coordinates of the nonempty bins for grid I.
15 | %
16 | % [OFFSET, COOR, DELTA, MU] = RB_TRAIN(A, KERNEL, SIGMA, R, SEED) sets the
17 | % seed of the random number generator used for generating the grids.
18 | %
19 | % [OFFSET, COOR] = RB_TRAIN(A, DELTA, MU) returns the coordinates of the
20 | % nonempty bins for the grids specified the width, DELTA, and the bias, MU.
21 | %
22 | % [..., PHI] = RB_TRAIN(...) returns also the sparse feature matrix.
23 |
24 | if nargin ~= 4 && nargin ~= 5 && nargin ~= 3
25 | error('Invalid number of arguments')
26 | end
27 | A = varargin{1};
28 | if nargin >= 4
29 | kernel = varargin{2};
30 | sigma = varargin{3};
31 | R = varargin{4};
32 | d = size(A,2);
33 | if nargin == 5
34 | seed = varargin{5};
35 | else
36 | seed = 0;
37 | end
38 | [delta, mu] = rb_grid_mex(kernel, sigma, R, d, seed);
39 | else
40 | delta = varargin{2};
41 | mu = varargin{3};
42 | end
43 | gen_phi = 0;
44 | if (nargin >= 4 && nargout == 5) || (nargin == 3 && nargout == 3)
45 | gen_phi = 1;
46 | end
47 | if gen_phi
48 | [offset, coor, phi] = rb_train_mex(A', delta, mu);
49 | else
50 | [offset, coor] = rb_train_mex(A', delta, mu);
51 | end
52 |
53 | varargout{1} = offset;
54 | varargout{2} = coor';
55 | if nargin == 3 && gen_phi
56 | varargout{3} = phi';
57 | else
58 | varargout{3} = delta;
59 | varargout{4} = mu;
60 | if gen_phi
61 | varargout{5} = phi';
62 | end
63 | end
64 |
65 |
--------------------------------------------------------------------------------
/selfloops.m:
--------------------------------------------------------------------------------
1 | % counts the number of self-loops in the graph
2 | % INPUT: adjacency matrix
3 | % OUTPUT: interger, number of self-loops
4 | % Last Updated: GB, October 1, 2009
5 |
6 | function sl=selfloops(adj)
7 |
8 | sl=sum(diag(adj));
--------------------------------------------------------------------------------
/specClustering_rb_example.m:
--------------------------------------------------------------------------------
1 | %*************************************************************************
2 | % Scalable spectral clustering based on random binning and primme
3 | %
4 | % Author: Lingfei Wu
5 | % Date: 01/16/2019
6 | %*************************************************************************
7 |
8 | clear,clc
9 | format shorte
10 | addpath(genpath('./utilities'));
11 | file_dir = './datasets/';
12 | filename_list = {'pendigits'};
13 |
14 | normalize_laplacian_flag = 1; % 1) 1:normalized laplacian; 2) 0:laplacian
15 | R_list = [16 32 64 128 256]; % increasing R typically improve performance
16 | sigma_list = [];
17 | for jj = 1:length(filename_list)
18 | info = [];
19 | filename = filename_list{jj};
20 | disp(filename);
21 | if strcmp(filename, 'pendigits')
22 | KERNEL = 1; % Laplacian Kernel
23 | sigma = 0.39; % For other dataset, tune it for best performance
24 | end
25 |
26 | Accu_best_list = zeros(4,length(R_list));
27 | telapsed_rb_gen_list = zeros(1,length(R_list));
28 | telapsed_laplacian_eigen_list = zeros(1,length(R_list));
29 | telapsed_kmeans_list = zeros(1,length(R_list));
30 | telapsed_runtime_list = zeros(1,length(R_list));
31 | for j = 1:length(R_list)
32 | R = R_list(j);
33 | fprintf('R = %d\n',R);
34 |
35 | % load train and test feature data, A := Z * Z', Z is a N*R feature
36 | % matrix to approximate adjacency matrix A of a fully connected graph
37 | timer_start = tic;
38 | file_path = strcat(file_dir,'/',filename,'.train.test');
39 | [Y, X] = libsvmread(file_path);
40 | [OFFSET, COOR, DELTA, MU, Z] = rb_train(X, KERNEL, sigma, R);
41 | labels = unique(Y);
42 | numClasses = length(labels);
43 | if numClasses > 2
44 | for i=numClasses:-1:1
45 | ind = (Y == labels(i));
46 | Y(ind) = i;
47 | end
48 | else
49 | ind = (Y == labels(1));
50 | Y(ind) = 2;
51 | ind = (Y == labels(2));
52 | Y(ind) = 1;
53 | end
54 | telapsed_rb_gen = toc(timer_start);
55 | telapsed_rb_gen_list(j) = telapsed_rb_gen;
56 |
57 | % compute degree diagonal matrix D := A * 1 = Z * (Z' * 1)
58 | N = size(Z,1);
59 | ZT1 = Z'*ones(N,1);
60 | ZZT1 = Z*ZT1;
61 | D = spdiags([ZZT1], 0, N, N);
62 |
63 | % Two ways to implicitly formulate laplacian:
64 | % 1) L = D - A = D - Z*Z';
65 | % 2) L = I - sqrt(inv(D))*A*sqrt(inv(D))
66 | % = I - sqrt(inv(D))*Z*Z'*sqrt(inv(D))
67 | timer_start = tic;
68 | K = length(unique(Y));
69 | opts.tol = 1e-4;
70 | opts.disp = 1;
71 | opts.isreal = 1;
72 | if normalize_laplacian_flag == 0
73 | [U,S] = primme_eigs(@(x)Lap_Afun(x,Z,D),N,K,'SA',opts);
74 | else
75 | Z2 = sqrt(inv(D))*Z;
76 | [U,S,V] = primme_svds(Z2,K,'L',opts);
77 | end
78 | telapsed_laplacian_eigen = toc(timer_start);
79 |
80 | % apply kmeans on resulting latent embedding from eigen
81 | timer_start = tic;
82 | U2 = zeros(size(U));
83 | for i=1:size(U,1)
84 | U2(i,:) = U(i,:)./norm(U(i,:));
85 | end
86 | kopts = statset('Display','final');
87 | rng('default');
88 | [IDX, C] = kmeans(real(U2),K,'Distance','sqeuclidean',...
89 | 'Replicates',10,'Options',kopts); % 'cityblock'
90 | telapsed_kmeans = toc(timer_start);
91 | [nmi,fm,RI] = clustering_metric(0,Z,IDX,Y);
92 | accu = accuracy(IDX, Y)/100; % Calculate accuracy
93 | Accu_best_list(:,j) = [nmi;fm;accu;RI];
94 | telapsed_laplacian_eigen_list(j) = telapsed_laplacian_eigen;
95 | telapsed_kmeans_list(j) = telapsed_kmeans;
96 | telapsed_runtime_list(j) = telapsed_rb_gen + ...
97 | telapsed_laplacian_eigen + telapsed_kmeans;
98 | end
99 |
100 | info.Accu_best = Accu_best_list;
101 | info.singvalue = diag(S);
102 | info.telapsed_rb_gen = telapsed_rb_gen_list;
103 | info.telapsed_laplacian_eigen = telapsed_laplacian_eigen_list;
104 | info.telapsed_kmeans = telapsed_kmeans_list;
105 | info.telapsed_runtime = telapsed_runtime_list;
106 | info.R = R_list;
107 | info.sigma = sigma;
108 | disp(info);
109 | savefilename = [filename '_SC_RB_varyingR'];
110 | save(savefilename,'info')
111 | end
--------------------------------------------------------------------------------
/utilities/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/.DS_Store
--------------------------------------------------------------------------------
/utilities/libsvm_mex/libsvmread.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/libsvmread.mexa64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/libsvmread.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/libsvmread.mexmaci64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/libsvmwrite.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/libsvmwrite.mexa64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/libsvmwrite.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/libsvmwrite.mexmaci64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/svmpredict.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/svmpredict.mexa64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/svmpredict.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/svmpredict.mexmaci64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/svmtrain.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/svmtrain.mexa64
--------------------------------------------------------------------------------
/utilities/libsvm_mex/svmtrain.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/libsvm_mex/svmtrain.mexmaci64
--------------------------------------------------------------------------------
/utilities/primme_mex/primme_mex.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/primme_mex/primme_mex.mexa64
--------------------------------------------------------------------------------
/utilities/primme_mex/primme_mex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/primme_mex/primme_mex.mexmaci64
--------------------------------------------------------------------------------
/utilities/rb_mex/rb_grid_mex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/rb_mex/rb_grid_mex.mexmaci64
--------------------------------------------------------------------------------
/utilities/rb_mex/rb_test_mex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/rb_mex/rb_test_mex.mexmaci64
--------------------------------------------------------------------------------
/utilities/rb_mex/rb_train_mex.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IBM/SpectralClustering_RandomBinning/dde6aae3b3afef82e2b8de998dbffba7cfabf452/utilities/rb_mex/rb_train_mex.mexmaci64
--------------------------------------------------------------------------------