├── README.md ├── algorithms ├── CSP-master │ └── CSP-master │ │ ├── README.md │ │ ├── active_query.m │ │ ├── csp.m │ │ ├── csp_K.m │ │ ├── demo_active.m │ │ ├── demo_csp.asv │ │ ├── demo_csp.m │ │ ├── demo_pareto.m │ │ ├── demo_sf.m │ │ ├── euclidean.m │ │ ├── eval_rand.m │ │ ├── iris.data │ │ ├── load_wine.m │ │ ├── pareto_multiview.m │ │ ├── sf.m │ │ └── wine.data ├── MPCKMeans │ ├── runMPCKMeans.m │ └── weka-latest.jar ├── cecm │ └── cecm │ │ ├── CECM.m │ │ ├── README.pdf │ │ ├── addNewConstraints.m │ │ ├── iris.m │ │ ├── setCentersECM.m │ │ ├── setDistances.m │ │ └── solqp │ │ ├── solqp.m │ │ ├── spphase1.m │ │ └── spphase2.m ├── cevclus │ └── cevclus │ │ ├── CEVCLUS.m │ │ ├── README.pdf │ │ ├── addConstraints.m │ │ ├── iris.m │ │ ├── tmp2.mat │ │ └── tmp3.mat ├── constrained_kmeans │ └── constrained_kmeans.m ├── cosc_v1_1 │ ├── LICENSE │ ├── README.md │ ├── bal_cut.m │ ├── bfs.m │ ├── bfs_traversal.m │ ├── build_weights.m │ ├── cluster_err.m │ ├── cnstr_1spec_clustering.m │ ├── cnstr_inner_obj.m │ ├── cnstr_opt_thresh.m │ ├── computeCutValue.m │ ├── computeMultiCut.m │ ├── compute_cheeger_cut.m │ ├── connectedComponents.m │ ├── construct_cnstr_graph.m │ ├── cosc.m │ ├── createClustersGeneral.m │ ├── derive_mls_frm_cls.m │ ├── eig_std_Laplacian.m │ ├── eigs_Laplacian.m │ ├── eigs_lcnstrs.m │ ├── fctval_cnstr_one_spec_Q.m │ ├── feas_partitions.m │ ├── hierarchical_cnstr_1spec_clustering.m │ ├── hierarchical_solve_cnstr_functional.m │ ├── hierarchical_solve_cnstr_functional_incremental.m │ ├── hierarchical_solve_cnstr_functional_incremental_subgraph.m │ ├── hierarchical_spec_clustering_lcnstrs_merging.m │ ├── isConnected.m │ ├── merge.m │ ├── mex_solve_cnstr_inner_problem.cpp │ ├── mex_solve_cnstr_inner_problem.mexa64 │ ├── mex_solve_cnstr_inner_problem.mexglx │ ├── mex_solve_cnstr_inner_problem.mexw64 │ ├── mex_solve_inner_problem.cpp │ ├── mex_solve_inner_problem.mexa64 │ ├── mex_solve_inner_problem.mexglx │ ├── mex_solve_inner_problem.mexw64 │ ├── opt_thresh.m │ ├── opt_thresh_cnstr_functional.m │ ├── opt_thresh_cnstr_functional_subgraph.m │ ├── opt_thresh_ncut.m │ ├── process_mls.m │ ├── solve_cnstr_functional.m │ ├── solve_cnstr_functional_incremental.m │ ├── spec_clustering_lcnstrs.m │ ├── spec_clustering_lcnstrs_merging.m │ ├── start_cnstr_1spec_clustering.m │ ├── start_hierachical_cnstr_1spec_clustering.m │ ├── std_laplacian.m │ ├── test_cosc.m │ ├── top_fsolns.m │ ├── two_coloring.m │ └── two_moons.mat ├── cvpr09_ccsr_v1.0 │ ├── averagekmin_dm.m │ ├── coquad.m │ ├── csdp6.1.0winp4.tgz │ ├── csdp6.1.0winp4 │ │ └── csdp6.1.0winp4 │ │ │ ├── AUTHORS │ │ │ ├── LICENSE │ │ │ ├── README │ │ │ ├── atlas-license.txt │ │ │ ├── bin │ │ │ ├── complement.exe │ │ │ ├── csdp.exe │ │ │ ├── graphtoprob.exe │ │ │ ├── libsdp.a │ │ │ ├── rand_graph.exe │ │ │ └── theta.exe │ │ │ ├── doc │ │ │ ├── .svn │ │ │ │ ├── all-wcprops │ │ │ │ ├── entries │ │ │ │ ├── format │ │ │ │ ├── prop-base │ │ │ │ │ ├── a1block1.pdf.svn-base │ │ │ │ │ ├── cmat.pdf.svn-base │ │ │ │ │ ├── constraints.pdf.svn-base │ │ │ │ │ └── csdpuser.pdf.svn-base │ │ │ │ └── text-base │ │ │ │ │ ├── README.svn-base │ │ │ │ │ ├── a1block1.eps.svn-base │ │ │ │ │ ├── a1block1.fig.svn-base │ │ │ │ │ ├── a1block1.pdf.svn-base │ │ │ │ │ ├── cmat.eps.svn-base │ │ │ │ │ ├── cmat.fig.svn-base │ │ │ │ │ ├── cmat.pdf.svn-base │ │ │ │ │ ├── constraints.eps.svn-base │ │ │ │ │ ├── constraints.fig.svn-base │ │ │ │ │ ├── constraints.pdf.svn-base │ │ │ │ │ ├── csdpuser.aux.svn-base │ │ │ │ │ ├── csdpuser.bbl.svn-base │ │ │ │ │ ├── csdpuser.blg.svn-base │ │ │ │ │ ├── csdpuser.log.svn-base │ │ │ │ │ ├── csdpuser.pdf.svn-base │ │ │ │ │ ├── csdpuser.tex.svn-base │ │ │ │ │ ├── example.c.svn-base │ │ │ │ │ └── sdp.bib.svn-base │ │ │ ├── README │ │ │ ├── a1block1.eps │ │ │ ├── a1block1.fig │ │ │ ├── a1block1.pdf │ │ │ ├── cmat.eps │ │ │ ├── cmat.fig │ │ │ ├── cmat.pdf │ │ │ ├── constraints.eps │ │ │ ├── constraints.fig │ │ │ ├── constraints.pdf │ │ │ ├── csdpuser.aux │ │ │ ├── csdpuser.bbl │ │ │ ├── csdpuser.blg │ │ │ ├── csdpuser.log │ │ │ ├── csdpuser.pdf │ │ │ ├── csdpuser.tex │ │ │ ├── example.c │ │ │ └── sdp.bib │ │ │ └── matlab │ │ │ ├── .svn │ │ │ ├── all-wcprops │ │ │ ├── entries │ │ │ ├── format │ │ │ ├── prop-base │ │ │ │ └── control1.mat.svn-base │ │ │ └── text-base │ │ │ │ ├── README.svn-base │ │ │ │ ├── control1.correct.svn-base │ │ │ │ ├── control1.mat.svn-base │ │ │ │ ├── convertf.m.svn-base │ │ │ │ ├── csdp.m.svn-base │ │ │ │ ├── readsdpa.m.svn-base │ │ │ │ ├── readsol.m.svn-base │ │ │ │ ├── writesdpa.m.svn-base │ │ │ │ └── writesol.m.svn-base │ │ │ ├── README │ │ │ ├── control1.correct │ │ │ ├── control1.mat │ │ │ ├── convertf.m │ │ │ ├── csdp.m │ │ │ ├── readsdpa.m │ │ │ ├── readsol.m │ │ │ ├── writesdpa.m │ │ │ └── writesol.m │ ├── demo_CCSR_usps.m │ ├── eval │ │ ├── get_error_rate.m │ │ └── maximum_matching_bipartite.m │ ├── formulateSDP.m │ ├── genPWC.m │ ├── getY.m │ ├── graph_Laplacian │ │ ├── graph_knn_dm.m │ │ ├── spnlaplacian_dm.m │ │ ├── spnsimilarity_dm.m │ │ └── spsimilarity_dm.m │ ├── localformulateSDP.m │ ├── readme.m │ ├── run_CCSR.m │ ├── sdpToSeDuMi.m │ └── usps.mat ├── dpmeans │ └── dpmeans.m ├── kmeans │ └── kmeans2.m ├── rdpmeans │ ├── objective.m │ └── rdpmeans.m ├── thiago_lcvqe │ └── lcvqe.m └── tvclust │ ├── TVClust_variational.m │ ├── func_TVClust.r │ ├── main_VB_TVClust_sim_data2.r │ └── mathFun.r ├── data ├── UCI │ ├── balance │ │ ├── Index │ │ ├── balance-scale.data │ │ └── balance-scale.names │ ├── ecoli │ │ ├── ecoli.data.txt │ │ └── ecoli.names.txt │ ├── glass │ │ ├── glass.data │ │ ├── glass.names │ │ └── index.html │ ├── ionosphere │ │ ├── ionosphere.data.txt │ │ └── ionosphere.names.txt │ ├── iris │ │ ├── Index │ │ ├── bezdekIris.data │ │ ├── iris.data │ │ └── iris.names │ └── wine │ │ ├── Index │ │ ├── wine.data │ │ └── wine.names └── toyDataGenerators │ ├── clusterincluster.m │ ├── corners.m │ ├── crescentfullmoon.m │ ├── dbmoon.m │ ├── gaussians.m │ ├── halfkernel.m │ ├── outlier.m │ └── twospirals.m ├── distance ├── gaussianDifference.m └── multDifference.m ├── experiment ├── Gaussian-Mixtures_iter=1.png ├── Gaussian-Mixtures_iter=1.tif ├── calculateResults.m ├── containsMethod.m ├── evaluateAll.m ├── experiment_toy.m ├── experiment_uci.m ├── pathAll.m ├── plotExperiments.m ├── readUCIData.m └── runClustering.m └── metrics ├── adjrand.m ├── getbcubed.m └── nmi ├── Contents.m ├── README ├── bkberns.m ├── bkmns.m ├── bkvmfs.m ├── bpart.m ├── cm.m ├── compare.m ├── daberns.m ├── damns.m ├── davmfs.m ├── entro.m ├── entroa.m ├── kberns.m ├── kkzinit.m ├── kmns.m ├── kvmfs.m ├── logidf.m ├── mi.m ├── mixberns.m ├── mixmns.m ├── mixvmfs.m ├── perturbinit.m ├── puritya.m ├── randinit.m ├── skberns.m ├── skmns.m ├── skvmfs.m ├── test.m ├── tr11.mat └── unitnorm.m /README.md: -------------------------------------------------------------------------------- 1 | # Constrained Clustering 2 | 3 | This is a MATLAB code containing a set of clustering algorithms. 4 | 5 | Part of this code is used to simulate experiments in [this](http://arxiv.org/abs/1508.06235) work. 6 | 7 | Also there is a list of constrained clustering algorithms with available codes [here](http://web.engr.illinois.edu/~khashab2/files/2015_constrained_clustering/constrainedClustering.html). 8 | 9 | ## How to run: 10 | To see output on toy data, go to the directory `experiment`, and run the script `experiment_toy.m`. You should be able to see the following output, followed by some other outputs: 11 | 12 | ![alt text](https://github.com/danyaljj/constrained_clustering/blob/master/experiment/Gaussian-Mixtures_iter=1.png?raw=true) 13 | 14 | You can run the script `experiment_uci.m` to see the output of the algorithms on the UCI dataset as well. 15 | 16 | ## Structure of this package 17 | Here is how the code structured: 18 | - `algorithms` contains a the algorithms we have studied / experimented with, at some point. Many of these codes are downloaded from somewhere, and included directly (or with small modifications). Some of these algorithms contain a `README.md` inside their folder, which explain where they are downloaded, and possible modifications / extensions on them. 19 | Note that not all of these algorithms are used in the evaluation script (either due to instability, being slow, or not being compatible with our purposes). That said, you can always add these to the script and use them. 20 | - `data`: UCI data + toy data 21 | - `distance`: some of the distance measures we have used across multiple algorithms. 22 | - `experiment`: scripts for running the algorithms on datasets. 23 | - `metrics`: contains the evaluation metrics we have used. 24 | 25 | ## Questions / Comments / Suggestions 26 | Email Daniel: [http://web.engr.illinois.edu/~khashab2/](http://web.engr.illinois.edu/~khashab2/) 27 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/README.md: -------------------------------------------------------------------------------- 1 | CSP 2 | === 3 | 4 | Spectral clustering for complex graphs, including: 5 | + Constrained spectral clustering [KDD10,DMKD] 6 | + Active spectral clustering [ICDM10] 7 | + Multi-view spectral clustering [SDM13] 8 | + Self-taught spectral clustering [SDM14] 9 | 10 | ###Author### 11 | Xiang Wang 12 | 13 | ###Website### 14 | https://sites.google.com/site/gnaixgnaw 15 | 16 | ###Citation### 17 | + Xiang Wang, Ian Davidson. Flexible constrained spectral clustering. In KDD 2010, pp. 563-572. 18 | + Xiang Wang, Buyue Qian, Ian Davidson. On constrained spectral clustering and its applications. Data Min. Knowl. Discov., in press, 2012. 19 | + Xiang Wang, Ian Davidson. Active spectral clustering. In ICDM 2010, pp. 561-568. 20 | + Xiang Wang, Buyue Qian, Jieping Ye, Ian Davidson. Multi-objective multi-view spectral clustering via Pareto optimization. In SDM 2013, pp. 234-242. 21 | + Xiang Wang, Jun Wang, Buyue Qian, Fei Wang, Ian Davidson. Self-taught spectral clustering via constraint augmentation. In SDM 2014, pp. 416-424. 22 | 23 | ###Environment### 24 | Matlab R2009b or later 25 | 26 | ###Usage### 27 | Run the demo_* files to start. 28 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/active_query.m: -------------------------------------------------------------------------------- 1 | % This is our active query strategy, which evaluates the current partition 2 | % u and the currently known queries Q to decide which pair of points to 3 | % query next 4 | 5 | % Please see our paper for the technical details of this strategy 6 | 7 | function [i,j] = active_query(Q_active,u,N,Q_touch) 8 | 9 | [U,S,V]=svd(Q_active); 10 | Q_1=U(:,1)*S(1,1)*V(:,1)'; 11 | Q_1(Q_1>1)=1; 12 | Q_1(Q_1<-1)=-1; 13 | P=(Q_1+1)./2; 14 | 15 | Q_u=u*u'; 16 | Q_u(Q_u>1)=1; 17 | Q_u(Q_u<-1)=-1; 18 | exp_err=(Q_u-1).^2.*P + (Q_u+1).^2.*(1-P); 19 | val=0; 20 | for k=1:N 21 | for l=(k+1):N 22 | if (exp_err(k,l)>val) && (Q_touch(k,l)==0) 23 | i=k;j=l; 24 | val=exp_err(k,l); 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/csp.m: -------------------------------------------------------------------------------- 1 | % Input: graph Laplacian L, constraint matrix Q, the normalization matrix 2 | % D_norm, the graph of the graph vol, the number of instances N 3 | % Output: the relaxed cluster indicator vector u (for 2-way partition only) 4 | function u = csp (L,Q,D_norm,vol,N) 5 | 6 | % Normalize the constraint matrix 7 | Q_norm = D_norm*Q*D_norm; 8 | 9 | % Set the parameter alpha to 0.5*(the largest eigenvalue of Q_norm) 10 | lam=svds(Q_norm,1); 11 | Q1=Q_norm-(lam*0.5)*eye(N); 12 | [vec,val] = eig(L,Q1); 13 | 14 | % Find the positive eigenvectors 15 | I=find(diag(val)>=0); 16 | 17 | % Compute the respective costs of the cuts 18 | cost=zeros(length(I),1); 19 | for i=1:length(I) 20 | v=vec(:,I(i))/norm(vec(:,I(i)))*vol^(1/2); 21 | cost(i)=v'*L*v; 22 | end 23 | 24 | % Find the one with minimum cost 25 | [cost_val,cost_ind]=sort(cost,'ascend'); 26 | i=1; 27 | while i<=length(cost) 28 | % Deal with numerical issues here 29 | if cost_val(i)>10^(-10) 30 | ind = cost_ind(i); 31 | break; 32 | end 33 | i=i+1; 34 | end 35 | 36 | % Output the cluster indicator vector 37 | v=vec(:,I(ind))/norm(vec(:,I(ind)))*vol^(1/2); 38 | u=D_norm*v; 39 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/csp_K.m: -------------------------------------------------------------------------------- 1 | % Constrained Spectral Clustering: The K-way version 2 | % 3 | % Input: 4 | % The normalized graph Laplacian, L; 5 | % The constraint matrix, Q; 6 | % The normalization matrix, D_norm = D^{-1/2}; 7 | % The volume of the graph, vol; 8 | % The number of clusters, K; 9 | % Ouput: 10 | % The relaxed cluster indicator vectors, U; 11 | 12 | function U = csp_K (L, Q, D_norm, vol, K) 13 | 14 | % number of nodes 15 | N = size(L,1); 16 | 17 | % set beta such that we have K feasible solutions 18 | lam = svds(Q,2*K); 19 | beta = (lam(K+1)+lam(K))/2-10^(-6); 20 | 21 | Q1 = Q - beta*eye(N); 22 | 23 | % solve the generalized eigenvalue problem 24 | [vec,~] = eig(L,Q1); 25 | 26 | % normalized the eigenvectors 27 | for i = 1:N 28 | vec(:,i) = vec(:,i)/norm(vec(:,i)); 29 | end 30 | 31 | % find feasible cuts 32 | satisf = diag(vec'*Q1*vec); 33 | I = find(satisf >= 0); 34 | 35 | % sort the feasible cuts by their costs 36 | cost = diag(vec(:,I)'*L*vec(:,I)); 37 | [~,ind] = sort(cost,'ascend'); 38 | 39 | % remove trivial cuts 40 | i = 1; 41 | while 1 42 | if nnz(vec(:,I(ind(i)))>0)~=0 && nnz(vec(:,I(ind(i)))<0) ~= 0 43 | break; 44 | end 45 | i = i + 1; 46 | end 47 | ind(1:i-1) = []; 48 | 49 | % output cluster indicators 50 | ind = ind(1:min(length(ind),K-1)); 51 | cost = cost(ind); 52 | U = vec(:,I(ind)); 53 | for i = 1:size(U,2) 54 | U(:,i) = D_norm * (U(:,i) * vol^(1/2)) * (1-cost(i)); 55 | end 56 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/demo_active.m: -------------------------------------------------------------------------------- 1 | % This is the demo program for active spectral clustering 2 | 3 | clear; 4 | 5 | % Load the data set 6 | load_wine; 7 | % load_iris; 8 | 9 | % T is how many times we repeat the randomized query strategy, so that we 10 | % can compare it to our active query strategy 11 | T=10; 12 | 13 | % Number of queries 14 | max_iter=min(N*(N-1)/2,2*N); 15 | 16 | disp('active method starts'); 17 | 18 | % We keep track of the performance of the active algorithm at each 19 | % iteration, i.e. after each new query 20 | record_active=zeros(max_iter,1); 21 | 22 | % Initiate the active algorithm by computing the unconstrained spectral 23 | % clustering 24 | [vec,val]=svd(L); 25 | v=vec(:,N-1)/norm(vec(:,N-1))*vol^(1/2); 26 | u_active=D_norm*v; 27 | 28 | % Q_active keeps track of the answers returned from the oracle 29 | Q_active=eye(N); 30 | 31 | % Q_touch keeps track of what has been queried and what has not 32 | Q_touch=eye(N); 33 | 34 | ind=1; 35 | while ind<=max_iter 36 | 37 | % Evaluate the performance of the current result, using Rand index 38 | record_active(ind) = eval_rand(u_active,Q_star); 39 | 40 | % Compute which pair of points to query next 41 | [i,j] = active_query(Q_active,u_active,N,Q_touch); 42 | 43 | % Query the oracle 44 | Q_active(i,j)=Q_star(i,j); 45 | Q_active(j,i)=Q_star(j,i); 46 | Q_touch(i,j)=1; 47 | Q_touch(j,i)=1; 48 | 49 | % Update the partition 50 | u_active = csp(L,Q_active,D_norm,vol,N); 51 | 52 | ind=ind+1; 53 | 54 | end 55 | 56 | % This is the baseline method where we randomly choose pairs of points to 57 | % query and compute the partition using the same constrained 58 | % spectralclustering algorithm csp() 59 | disp('random method starts'); 60 | 61 | record_random=zeros(max_iter,T); 62 | 63 | for out_iter=1:T 64 | 65 | [vec,val]=svd(L); 66 | v=vec(:,N-1)/norm(vec(:,N-1))*vol^(1/2); 67 | u_random=D_norm*v; 68 | 69 | Q_random=eye(N); 70 | Q_touch=eye(N); 71 | 72 | ind=1; 73 | while ind<=max_iter 74 | 75 | record_random(ind,out_iter) = eval_rand(u_random,Q_star); 76 | [I,J]=find(Q_random==0); 77 | tmp=randi(length(I)); 78 | i=I(tmp); 79 | j=J(tmp); 80 | Q_random(i,j)=Q_star(i,j); 81 | Q_random(j,i)=Q_star(j,i); 82 | 83 | u_random = csp(L,Q_random,D_norm,vol,N); 84 | 85 | ind=ind+1; 86 | 87 | end 88 | end 89 | 90 | % Plotting the results 91 | 92 | figure; 93 | set(gca,'fontsize',12); 94 | xlim([0 max_iter]); 95 | xlabel('# constraints queried','fontsize',12); 96 | ylabel('Rand index','fontsize',12); 97 | hold on; 98 | plot(record_active,'b','Linewidth',2); 99 | plot(max(record_random,[],2),'-.r','Linewidth',1); 100 | plot(mean(record_random,2),'r','Linewidth',1); 101 | plot(min(record_random,[],2),':r','Linewidth',1); 102 | legend('active','random-max','random-avg','random-min'); 103 | hold off; 104 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/demo_csp.asv: -------------------------------------------------------------------------------- 1 | % Load the toy data set 2 | load_wine; 3 | 4 | % Generate random constraints from the groundtruth label. You can also fill 5 | % in Q directly, as long as Q remains symmetric. Note that positive entries 6 | % are must-links, negative entries are cannot-links, and 0 means no 7 | % information. Due to the nature of 2-way partition, the algorithm works 8 | % best when the numbers of ML and CL are approximately balanced. 9 | 10 | % Set the number of known labels. Do not set it to 0 because it will cause 11 | % numerical issues for the generalized eigenvalue decomposition. The 12 | % maximum value should be N, the total number of instances. 13 | 14 | C=min(10,N); 15 | rp=randperm(N); 16 | tmp=sort(rp(1:C)); 17 | clear rp; 18 | 19 | Q=zeros(N,N); 20 | for i=1:length(tmp) 21 | for j=1:length(tmp) 22 | Q(tmp(i),tmp(j))=label(tmp(i))*label(tmp(j)); 23 | end 24 | end 25 | 26 | clear tmp; 27 | 28 | % Q --> constraints 29 | % N -> number of points 30 | % vol -> 31 | % L -> input data points 32 | % D_norm --> 33 | 34 | % Apply our algorithm 35 | u=csp(L,Q,D_norm,vol,N); 36 | 37 | % Turn the relaxed indicator vector into a 2-way partition 38 | result=zeros(N); 39 | result(u>0)=1; 40 | result(u<0)=-1; 41 | 42 | % Compute the Rand index 43 | Q_u=result*result'; 44 | ri=nnz(Q_u==Q_star)/(N^2); 45 | disp(ri); 46 | 47 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/demo_csp.m: -------------------------------------------------------------------------------- 1 | % Load the toy data set 2 | load_wine; 3 | 4 | % Generate random constraints from the groundtruth label. You can also fill 5 | % in Q directly, as long as Q remains symmetric. Note that positive entries 6 | % are must-links, negative entries are cannot-links, and 0 means no 7 | % information. Due to the nature of 2-way partition, the algorithm works 8 | % best when the numbers of ML and CL are approximately balanced. 9 | 10 | % Set the number of known labels. Do not set it to 0 because it will cause 11 | % numerical issues for the generalized eigenvalue decomposition. The 12 | % maximum value should be N, the total number of instances. 13 | 14 | C=min(10,N); 15 | rp=randperm(N); 16 | tmp=sort(rp(1:C)); 17 | clear rp; 18 | 19 | Q=zeros(N,N); 20 | for i=1:length(tmp) 21 | for j=1:length(tmp) 22 | Q(tmp(i),tmp(j))=label(tmp(i))*label(tmp(j)); 23 | end 24 | end 25 | 26 | clear tmp; 27 | 28 | % Q --> constraints: 1:ML -1:CL 29 | % N -> number of points 30 | % vol -> 31 | % L -> Laplacian 32 | % D_norm --> The normaliziation matrix 33 | 34 | % Apply our algorithm 35 | % Input: graph Laplacian L, constraint matrix Q, the normalization matrix 36 | % D_norm, the graph of the graph vol, the number of instances N 37 | % Output: the relaxed cluster indicator vector u (for 2-way partition only) 38 | u=csp(L,Q,D_norm,vol,N); 39 | 40 | % Turn the relaxed indicator vector into a 2-way partition 41 | result=zeros(N); 42 | result(u>0)=1; 43 | result(u<0)=-1; 44 | 45 | % Compute the Rand index 46 | Q_u=result*result'; 47 | ri=nnz(Q_u==Q_star)/(N^2); 48 | disp(ri); 49 | 50 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/demo_pareto.m: -------------------------------------------------------------------------------- 1 | clear; 2 | 3 | load_wine; 4 | 5 | U = pareto_multiview(L1,L2); -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/demo_sf.m: -------------------------------------------------------------------------------- 1 | clear all;close all; 2 | 3 | %% load data and generate graph 4 | 5 | tmp = load('iris.data'); 6 | 7 | label = tmp(:,end); 8 | data = tmp(:,1:end-1); 9 | clear tmp; 10 | 11 | N = size(data,1); 12 | 13 | for i = 1:size(data,2) 14 | data(:,i) = data(:,i) - mean(data(:,i)); 15 | end 16 | 17 | K = length(unique(label)); 18 | 19 | my_std = std(data); 20 | my_std(my_std==0) = 1; 21 | data = data*diag(1./my_std); 22 | 23 | my_mean = mean(data,1); 24 | my_dist = zeros(N,1); 25 | for i=1:N 26 | my_dist(i) = norm(data(i,:)-my_mean); 27 | end 28 | sigma = 0.2; 29 | W = eye(N); 30 | for i = 1:N 31 | for j = (i+1):N 32 | W(i,j) = exp( -1 * norm(data(i,:)-data(j,:))^2 / (2*(sigma*max(my_dist))^2) ); 33 | W(j,i) = W(i,j); 34 | end 35 | end 36 | 37 | Q_star = zeros(N,N); 38 | for i=1:N 39 | for j=1:N 40 | if label(i)==label(j) 41 | Q_star(i,j) = 1; 42 | end 43 | end 44 | end 45 | 46 | %% generate random constraints 47 | Omega = eye(N); 48 | idx = zeros((N^2 - N)/2,2); 49 | t = 0; 50 | for i = 1:N 51 | for j = i+1:N 52 | t = t+1; 53 | idx(t,:) = [i,j]; 54 | end 55 | end 56 | 57 | tmp = randperm(size(idx,1)); 58 | for i = 1:200 59 | Omega(idx(tmp(i),1),idx(tmp(i),2)) = 1; 60 | Omega(idx(tmp(i),2),idx(tmp(i),1)) = 1; 61 | end 62 | 63 | %% main algorithm 64 | 65 | % initialize parameters 66 | alpha = 1e-1; 67 | beta = 1e-1; 68 | mu_start = 10; 69 | mu_final = 1e-1; 70 | iter_max = 5; 71 | iter_in_max = 1000; 72 | 73 | % initialize v 74 | X = alpha*W + beta*(Q_star.*Omega); 75 | X = (X+X')/2; 76 | [~,~,v] = svds(X,K); 77 | 78 | % do the main thing 79 | [Q,~,~] = sf(W,v,K,Q_star,Omega,alpha,beta,mu_start,mu_final,iter_max,iter_in_max); 80 | [~,~,v] = svds(Q,K); 81 | 82 | % post-processing, generate clusters 83 | clust = kmeans(v,K,'EmptyAction','singleton','Replicates',10); 84 | [ari,ri] = RandIndex(clust,label); 85 | fprintf('Adjusted RandIndex:\t%f\tRandIndex:\t%f\n', ari, ri); 86 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/euclidean.m: -------------------------------------------------------------------------------- 1 | function dist = euclidean(X1, X2) 2 | L1 = X1 - X2; 3 | dist = sqrt(L1 * L1'); 4 | end -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/eval_rand.m: -------------------------------------------------------------------------------- 1 | % Evaluate the quality of u using Rand index 2 | % Q_star is the grountruth result 3 | function ri = eval_rand(u,Q_star) 4 | 5 | result=zeros(size(u)); 6 | result(u>0)=1; 7 | result(u<0)=-1; 8 | Q_u=result*result'; 9 | 10 | ri=nnz(Q_u==Q_star)/(size(Q_star,1))^2; -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/iris.data: -------------------------------------------------------------------------------- 1 | 5.1,3.5,1.4,0.2,1 2 | 4.9,3.0,1.4,0.2,1 3 | 4.7,3.2,1.3,0.2,1 4 | 4.6,3.1,1.5,0.2,1 5 | 5.0,3.6,1.4,0.2,1 6 | 5.4,3.9,1.7,0.4,1 7 | 4.6,3.4,1.4,0.3,1 8 | 5.0,3.4,1.5,0.2,1 9 | 4.4,2.9,1.4,0.2,1 10 | 4.9,3.1,1.5,0.1,1 11 | 5.4,3.7,1.5,0.2,1 12 | 4.8,3.4,1.6,0.2,1 13 | 4.8,3.0,1.4,0.1,1 14 | 4.3,3.0,1.1,0.1,1 15 | 5.8,4.0,1.2,0.2,1 16 | 5.7,4.4,1.5,0.4,1 17 | 5.4,3.9,1.3,0.4,1 18 | 5.1,3.5,1.4,0.3,1 19 | 5.7,3.8,1.7,0.3,1 20 | 5.1,3.8,1.5,0.3,1 21 | 5.4,3.4,1.7,0.2,1 22 | 5.1,3.7,1.5,0.4,1 23 | 4.6,3.6,1.0,0.2,1 24 | 5.1,3.3,1.7,0.5,1 25 | 4.8,3.4,1.9,0.2,1 26 | 5.0,3.0,1.6,0.2,1 27 | 5.0,3.4,1.6,0.4,1 28 | 5.2,3.5,1.5,0.2,1 29 | 5.2,3.4,1.4,0.2,1 30 | 4.7,3.2,1.6,0.2,1 31 | 4.8,3.1,1.6,0.2,1 32 | 5.4,3.4,1.5,0.4,1 33 | 5.2,4.1,1.5,0.1,1 34 | 5.5,4.2,1.4,0.2,1 35 | 4.9,3.1,1.5,0.1,1 36 | 5.0,3.2,1.2,0.2,1 37 | 5.5,3.5,1.3,0.2,1 38 | 4.9,3.1,1.5,0.1,1 39 | 4.4,3.0,1.3,0.2,1 40 | 5.1,3.4,1.5,0.2,1 41 | 5.0,3.5,1.3,0.3,1 42 | 4.5,2.3,1.3,0.3,1 43 | 4.4,3.2,1.3,0.2,1 44 | 5.0,3.5,1.6,0.6,1 45 | 5.1,3.8,1.9,0.4,1 46 | 4.8,3.0,1.4,0.3,1 47 | 5.1,3.8,1.6,0.2,1 48 | 4.6,3.2,1.4,0.2,1 49 | 5.3,3.7,1.5,0.2,1 50 | 5.0,3.3,1.4,0.2,1 51 | 7.0,3.2,4.7,1.4,2 52 | 6.4,3.2,4.5,1.5,2 53 | 6.9,3.1,4.9,1.5,2 54 | 5.5,2.3,4.0,1.3,2 55 | 6.5,2.8,4.6,1.5,2 56 | 5.7,2.8,4.5,1.3,2 57 | 6.3,3.3,4.7,1.6,2 58 | 4.9,2.4,3.3,1.0,2 59 | 6.6,2.9,4.6,1.3,2 60 | 5.2,2.7,3.9,1.4,2 61 | 5.0,2.0,3.5,1.0,2 62 | 5.9,3.0,4.2,1.5,2 63 | 6.0,2.2,4.0,1.0,2 64 | 6.1,2.9,4.7,1.4,2 65 | 5.6,2.9,3.6,1.3,2 66 | 6.7,3.1,4.4,1.4,2 67 | 5.6,3.0,4.5,1.5,2 68 | 5.8,2.7,4.1,1.0,2 69 | 6.2,2.2,4.5,1.5,2 70 | 5.6,2.5,3.9,1.1,2 71 | 5.9,3.2,4.8,1.8,2 72 | 6.1,2.8,4.0,1.3,2 73 | 6.3,2.5,4.9,1.5,2 74 | 6.1,2.8,4.7,1.2,2 75 | 6.4,2.9,4.3,1.3,2 76 | 6.6,3.0,4.4,1.4,2 77 | 6.8,2.8,4.8,1.4,2 78 | 6.7,3.0,5.0,1.7,2 79 | 6.0,2.9,4.5,1.5,2 80 | 5.7,2.6,3.5,1.0,2 81 | 5.5,2.4,3.8,1.1,2 82 | 5.5,2.4,3.7,1.0,2 83 | 5.8,2.7,3.9,1.2,2 84 | 6.0,2.7,5.1,1.6,2 85 | 5.4,3.0,4.5,1.5,2 86 | 6.0,3.4,4.5,1.6,2 87 | 6.7,3.1,4.7,1.5,2 88 | 6.3,2.3,4.4,1.3,2 89 | 5.6,3.0,4.1,1.3,2 90 | 5.5,2.5,4.0,1.3,2 91 | 5.5,2.6,4.4,1.2,2 92 | 6.1,3.0,4.6,1.4,2 93 | 5.8,2.6,4.0,1.2,2 94 | 5.0,2.3,3.3,1.0,2 95 | 5.6,2.7,4.2,1.3,2 96 | 5.7,3.0,4.2,1.2,2 97 | 5.7,2.9,4.2,1.3,2 98 | 6.2,2.9,4.3,1.3,2 99 | 5.1,2.5,3.0,1.1,2 100 | 5.7,2.8,4.1,1.3,2 101 | 6.3,3.3,6.0,2.5,3 102 | 5.8,2.7,5.1,1.9,3 103 | 7.1,3.0,5.9,2.1,3 104 | 6.3,2.9,5.6,1.8,3 105 | 6.5,3.0,5.8,2.2,3 106 | 7.6,3.0,6.6,2.1,3 107 | 4.9,2.5,4.5,1.7,3 108 | 7.3,2.9,6.3,1.8,3 109 | 6.7,2.5,5.8,1.8,3 110 | 7.2,3.6,6.1,2.5,3 111 | 6.5,3.2,5.1,2.0,3 112 | 6.4,2.7,5.3,1.9,3 113 | 6.8,3.0,5.5,2.1,3 114 | 5.7,2.5,5.0,2.0,3 115 | 5.8,2.8,5.1,2.4,3 116 | 6.4,3.2,5.3,2.3,3 117 | 6.5,3.0,5.5,1.8,3 118 | 7.7,3.8,6.7,2.2,3 119 | 7.7,2.6,6.9,2.3,3 120 | 6.0,2.2,5.0,1.5,3 121 | 6.9,3.2,5.7,2.3,3 122 | 5.6,2.8,4.9,2.0,3 123 | 7.7,2.8,6.7,2.0,3 124 | 6.3,2.7,4.9,1.8,3 125 | 6.7,3.3,5.7,2.1,3 126 | 7.2,3.2,6.0,1.8,3 127 | 6.2,2.8,4.8,1.8,3 128 | 6.1,3.0,4.9,1.8,3 129 | 6.4,2.8,5.6,2.1,3 130 | 7.2,3.0,5.8,1.6,3 131 | 7.4,2.8,6.1,1.9,3 132 | 7.9,3.8,6.4,2.0,3 133 | 6.4,2.8,5.6,2.2,3 134 | 6.3,2.8,5.1,1.5,3 135 | 6.1,2.6,5.6,1.4,3 136 | 7.7,3.0,6.1,2.3,3 137 | 6.3,3.4,5.6,2.4,3 138 | 6.4,3.1,5.5,1.8,3 139 | 6.0,3.0,4.8,1.8,3 140 | 6.9,3.1,5.4,2.1,3 141 | 6.7,3.1,5.6,2.4,3 142 | 6.9,3.1,5.1,2.3,3 143 | 5.8,2.7,5.1,1.9,3 144 | 6.8,3.2,5.9,2.3,3 145 | 6.7,3.3,5.7,2.5,3 146 | 6.7,3.0,5.2,2.3,3 147 | 6.3,2.5,5.0,1.9,3 148 | 6.5,3.0,5.2,2.0,3 149 | 6.2,3.4,5.4,2.3,3 150 | 5.9,3.0,5.1,1.8,3 151 | 152 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/load_wine.m: -------------------------------------------------------------------------------- 1 | % Load the data set, trim it into two classes. 2 | tmp = load('wine.data'); 3 | label = tmp(:,1); 4 | data = tmp(label~=1,2:end); 5 | label(label==1) = []; 6 | clear tmp; 7 | 8 | % Convert labels, '1' for one class and '-1' for the other. 9 | label(label==2) = 1; 10 | label(label==3) = -1; 11 | 12 | % The complete constraint matrix 13 | Q_star = label*label'; 14 | 15 | % Center and normalize the data attributes. Each row is an instace and 16 | % each column is an attribute. 17 | for i = 1:size(data,2) 18 | data(:,i) = data(:,i) - mean(data(:,i)); 19 | end 20 | my_var = var(data); 21 | % The size of the data set 22 | N = size(data,1); 23 | 24 | % Compute the similarity matrix A using RBF kernel. 25 | % Set diagonal entries to 0 for numerical consideration. 26 | A = zeros(N,N); 27 | for i = 1:N 28 | for j=(i+1):N 29 | A(i,j) = exp(-1*sum((data(i,:)-data(j,:)).^2./(2*my_var))); 30 | A(j,i) = A(i,j); 31 | end 32 | end 33 | 34 | % Compute the graph Laplacian. 35 | D = diag(sum(A)); vol = sum(diag(D)); D_norm = D^(-1/2); 36 | L = eye(N) - D_norm*A*D_norm; 37 | 38 | % Construct View 1 for multi-view learning 39 | data1 = data(:,1:6); 40 | my_var1 = var(data1); 41 | A1 = zeros(N,N); 42 | for i = 1:N 43 | for j = (i+1):N 44 | A1(i,j) = exp(-1*sum((data1(i,:)-data1(j,:)).^2./(2*my_var1))); 45 | A1(j,i) = A1(i,j); 46 | end 47 | end 48 | D1 = diag(sum(A1)); vol1 = sum(diag(D1)); D_norm1 = D1^(-1/2); 49 | L1 = eye(N) - D_norm1*A1*D_norm1; 50 | 51 | % Construct View 2 for multi-view learning 52 | data2 = data(:,7:end); 53 | my_var2 = var(data2); 54 | A2 = zeros(N,N); 55 | for i = 1:N 56 | for j = (i+1):N 57 | A2(i,j) = exp(-1*sum((data2(i,:)-data2(j,:)).^2./(2*my_var2))); 58 | A2(j,i) = A2(i,j); 59 | end 60 | end 61 | D2 = diag(sum(A2)); vol2 = sum(diag(D2)); D_norm2 = D2^(-1/2); 62 | L2 = eye(N) - D_norm2*A2*D_norm2; 63 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/pareto_multiview.m: -------------------------------------------------------------------------------- 1 | function U = pareto_multiview (L1, L2) 2 | 3 | N = size(L1,1); 4 | [vec,~] = eig(L1,L2); 5 | for i = 1:N 6 | vec(:,i) = vec(:,i) / norm(vec(:,i)); 7 | end 8 | 9 | cost1 = diag(vec'*L1*vec); 10 | cost2 = diag(vec'*L2*vec); 11 | [Y,I] = sort(cost1,'ascend'); 12 | cost1 = Y; 13 | cost2 = cost2(I); 14 | 15 | figure; 16 | hold on; 17 | axis([0 2 0 2]); 18 | 19 | scatter(cost1,cost2,'b+'); 20 | 21 | ex = []; % The cuts to be excluded 22 | U = []; % The output 23 | iter = 0; 24 | pick = 0; 25 | while size(U,2) < 1 26 | 27 | % Pick the smallest cut for Graph A, excluding those in ex 28 | for i=1:N 29 | if ismember(I(i),ex)==false 30 | U_ind = i; 31 | break; 32 | end 33 | end 34 | 35 | % Compute the Pareto frontier 36 | cur_cost = cost2(U_ind); 37 | start = U_ind; 38 | for i = start:(N-1) 39 | if cost2(i+1) < cur_cost && ismember(I(i+1),ex)==false 40 | U_ind = [U_ind, i+1]; 41 | cur_cost = cost2(i+1); 42 | end 43 | end 44 | 45 | ex = [ex, I(U_ind)']; % Exclude chosen cuts 46 | if iter > 0 % Skip first pass 47 | % fprintf('iter:\t%d\n', iter); 48 | for i=1:size(U_ind,2) 49 | % if nnz(vec(:,I(U_ind(i)))>0)>0 && nnz(vec(:,I(U_ind(i)))<0)>0 50 | U = [U, vec(:,I(U_ind(i)))]; 51 | pick = pick + 1; 52 | fprintf('%d\t%f\t%f\n', pick, cost1(U_ind(i)), cost2(U_ind(i))); 53 | scatter(cost1(U_ind(i)),cost2(U_ind(i)),'ro'); 54 | text(cost1(U_ind(i)),cost2(U_ind(i)),int2str(pick),'Color',[1 0 0]); 55 | % end 56 | end 57 | end 58 | 59 | iter = iter + 1; 60 | 61 | end 62 | 63 | cost1 = diag(U'*L1*U); 64 | cost2 = diag(U'*L2*U); 65 | 66 | hold off; 67 | -------------------------------------------------------------------------------- /algorithms/CSP-master/CSP-master/sf.m: -------------------------------------------------------------------------------- 1 | % Input: 2 | % W: graph affinity matrix, NxN 3 | % v: initial cut, NxK 4 | % K: number of clusters 5 | % Q_star: ground truth constraint matrix, NxN 6 | % Omega: mask (what entries of Q_star are revealed), NxN 7 | % 8 | % Output: 9 | % Q: learned constraint matrix, NxN 10 | % v: final constrained cut, NxK 11 | % obj_overall: objective value 12 | 13 | function [Q,v,obj_overall] = sf_core(W,v,K,Q_star,Omega,alpha,beta,mu_start,mu_final,iter_max,iter_in_max) 14 | 15 | tau = 1.99; 16 | eta_mu = 1/2; 17 | N = size(W,1); 18 | 19 | for iter = 1:iter_max 20 | mu = mu_start; 21 | if iter > 1 22 | Q_last = Q; 23 | end 24 | Q = zeros(size(Q_star)); 25 | obj_overall = []; 26 | while 1 27 | for iter_in = 1:iter_in_max 28 | Y = Q - tau*((Q-Q_star).*Omega) + tau*beta*(v*v'); 29 | [U,S,V] = svd(Y); 30 | for i=1:N 31 | S(i,i) = max(S(i,i) - tau*mu, 0); 32 | end 33 | Q = U*S*V'; 34 | obj = mu*sum(diag(S))+norm((Q-Q_star).*Omega,'fro')^2/2 - beta*trace(v'*Q*v); 35 | obj_overall = [obj_overall,-alpha*trace(v'*W*v)+ mu_final*sum(diag(S)) + norm((Q-Q_star).*Omega,'fro')^2/2 - beta*trace(v'*Q*v)]; 36 | if iter_in > 1 37 | diff_obj = abs(obj_overall(end-1) - obj_overall(end))/abs(obj_overall(end-1)); 38 | if diff_obj<1e-5 39 | break 40 | end 41 | end 42 | end 43 | 44 | if mu == mu_final 45 | break; 46 | else 47 | mu = max(mu*eta_mu,mu_final); 48 | end 49 | end 50 | 51 | X = alpha*W + beta*(Q_star.*Omega); 52 | X = (X+X')/2; 53 | [~,~,v] = svds(X,K); 54 | 55 | if iter > 1 56 | diff_Q = norm(Q-Q_last,'fro')/max(1,norm(Q_last,'fro')); 57 | if diff_Q < 1e-2 58 | break; 59 | end 60 | end 61 | end -------------------------------------------------------------------------------- /algorithms/MPCKMeans/runMPCKMeans.m: -------------------------------------------------------------------------------- 1 | function [assignment, centroids_vectors] = runMPCKMeans(X, Y, k, C_m) 2 | 3 | javaaddpath('../algorithms/MPCKMeans/weka-latest.jar'); 4 | 5 | import weka.* 6 | import weka.clusterers.* 7 | import weka.core.* 8 | import java.util.ArrayList 9 | 10 | size1 = size(X,1); 11 | featureVector = FastVector(); 12 | featureVector.addElement(Attribute('att1')); 13 | featureVector.addElement(Attribute('att2')); 14 | featureVector.addElement(Attribute('label')); 15 | 16 | data = Instances('Persons', featureVector, size1); 17 | 18 | instance = Instance(3); 19 | for j = 1:size1 % data points 20 | for i =0:1 % attributes 21 | attr = featureVector.elementAt(i); 22 | instance.setValue(attr, X(j, i+1)); 23 | end 24 | attr = featureVector.elementAt(2); 25 | instance.setValue(attr, Y(j)); 26 | data.add(instance) 27 | end 28 | data.setClassIndex(2); 29 | 30 | mpckmeans = MPCKMeans(); 31 | mpckmeans.setNumClusters(k); 32 | 33 | classIndex = data.numAttributes()-1; 34 | if classIndex >= 0 35 | data.setClassIndex(classIndex); % starts with 0 36 | clusterData = Instances(data); 37 | mpckmeans.setNumClusters(k); 38 | clusterData.deleteClassAttribute(); 39 | else 40 | clusterData = Instances(data); 41 | end 42 | 43 | % constraints 44 | if true 45 | pairs = ArrayList(); 46 | for i = 1:size(C_m,1) 47 | if C_m(i,1) > C_m(i,2) 48 | continue; 49 | end 50 | if C_m(i,3) == 1 51 | pair = InstancePair(C_m(i,1)-1, C_m(i,2)-1, InstancePair.MUST_LINK); 52 | else 53 | pair = InstancePair(C_m(i,1)-1, C_m(i,2)-1, InstancePair.CANNOT_LINK); 54 | end 55 | pairs.add(pair); 56 | labeledPairs = pairs; 57 | end 58 | else 59 | labeledPairs = ArrayList(0); 60 | end 61 | mpckmeans.setTotalTrainWithLabels(data); 62 | numClusters1 = mpckmeans.getNumClusters(); 63 | assignments1 = mpckmeans.getClusterAssignments(); 64 | 65 | mpckmeans.buildClusterer(labeledPairs, clusterData, data, mpckmeans.getNumClusters(), data.numInstances()); 66 | nCorrect = 0; 67 | totalTrainWithLabels = mpckmeans.getTotalTrainWithLabels(); 68 | assignment = mpckmeans.getClusterAssignments(); 69 | 70 | centroids = mpckmeans.getClusterCentroids(); 71 | centroids_vectors = []; 72 | for i = 0:centroids.numAttributes()-1 73 | centroids_vectors = [centroids_vectors centroids.attributeToDoubleArray(i)]; 74 | end 75 | end -------------------------------------------------------------------------------- /algorithms/MPCKMeans/weka-latest.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/MPCKMeans/weka-latest.jar -------------------------------------------------------------------------------- /algorithms/cecm/cecm/README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cecm/cecm/README.pdf -------------------------------------------------------------------------------- /algorithms/cecm/cecm/addNewConstraints.m: -------------------------------------------------------------------------------- 1 | function [matConst] = addNewConstraints(x,y,matConst,nbConst, noise, prop); 2 | % x : The input matrix nxnbAtt. 3 | % y : The output matrix nx1. 4 | % matConst : The constraints matrix nxn : 1 values is for ML constraints and -1 5 | % for CL contraits. 0 is when there is no constraint. 6 | % nbConst : The number of constraint to add. 7 | % noise : Percentage of noise to include 8 | % prop : Increase the transitive closure of the constraints if =1. 9 | % @return The constraint matrix with the add of the new contraints 10 | 11 | 12 | [n,nbAtt]=size(x); 13 | 14 | % check the number of constraint to find 15 | nbConstMax=factorial(n-1); 16 | nbConstActual=length(find(matConst-eye(n)~=0)); 17 | if nbConstMaxrand 45 | matConst(ind1,ind2)=(matConst(ind1,ind2)==-1)*2-1; 46 | end 47 | i=i+1; 48 | end 49 | end 50 | 51 | % increasing constraints set 52 | if prop==1 53 | %% fermeture transitive 54 | aux1=sign(matConst+matConst'-eye(n)); 55 | aux2=matConst.*sign(matConst); 56 | aux2=max(aux2,aux2'); 57 | matConst=aux1.*aux2; 58 | 59 | % Must-link propagation 60 | ML=(matConst>0); 61 | for i=2:nbConst%n 62 | ML=ML|ML^i; 63 | end 64 | 65 | % Cannot-link propagation 66 | CL=(matConst<0)*1; 67 | CL=(CL|CL*ML|ML*CL)*1; % dist1 68 | CL=CL|CL*ML; % dist2 69 | 70 | matConst=ML-CL; 71 | end 72 | -------------------------------------------------------------------------------- /algorithms/cecm/cecm/iris.m: -------------------------------------------------------------------------------- 1 | clear; 2 | addpath(genpath('.')); 3 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 4 | 5 | load fisheriris 6 | x=meas; 7 | y=strcmp('setosa',species)*1 + strcmp('versicolor',species)*2 + strcmp('virginica',species)*3; 8 | n=length(y); 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | 11 | K=3; % number of cluster 12 | option = struct('init',1,'alpha',1,'rho2',1000,'bal',0,'distance',1); 13 | 14 | nbConst=10; 15 | noise=0; 16 | matConst=eye(n); 17 | matConst=addNewConstraints(x,y,matConst,nbConst,noise,0); 18 | 19 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 20 | 21 | [m,g,BetP,J]=CECM(x,K,matConst,option); 22 | 23 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%% 24 | p = 1; 25 | % [X,Y] = gaussians(200); 26 | [X,Y] = twospirals(); 27 | % [X,Y] = clusterincluster(); 28 | 29 | size1 = size(X,1); 30 | order = randperm(size1); 31 | X = X(order,:); 32 | Y = Y(order,:); 33 | k = 2; 34 | n=length(Y); 35 | matConst=eye(n); 36 | matConst=addNewConstraints(X,Y,matConst,nbConst,noise,0); 37 | [m,g,BetP,J]=CECM(X,k,matConst,option); 38 | [C,Idx] = max(BetP,[],2); 39 | 40 | scatter(X(:,1), X(:,2), 12, Idx); axis equal; 41 | -------------------------------------------------------------------------------- /algorithms/cecm/cecm/setCentersECM.m: -------------------------------------------------------------------------------- 1 | function [g]=setCentersECM(x,m,F,Smean,alpha,beta); 2 | 3 | [nbFoc,K]=size(F); 4 | [n nbAtt]=size(x); 5 | 6 | c = sum(F(2:end,:),2)'; % cardinality of focal sets 7 | indSingleton=find(c==1)+1; 8 | 9 | R=[]; 10 | B=[]; 11 | for l=1:K % pour chaque centre de gravite 12 | indl=indSingleton(l); 13 | 14 | Rl=[]; 15 | for i=1:n 16 | Ril=zeros(nbAtt,nbAtt); 17 | Fl=repmat(F(indl,:),nbFoc,1); 18 | indAj=find(sum(and(Fl,F),2)==c(indl-1))-1; 19 | for j=1:length(indAj) 20 | Ril=Ril+c(indAj(j))^(alpha-1)*m(i,indAj(j))^beta*Smean{indAj(j)}; 21 | end 22 | 23 | Rl=[Rl;Ril]; 24 | end 25 | R=[R Rl]; 26 | 27 | Bl=[]; 28 | for k=1:K 29 | Bkl=zeros(nbAtt,nbAtt); 30 | for i=1:n 31 | indk=indSingleton(k); 32 | Fl=repmat(sign(F(indl,:)+F(indk,:)),nbFoc,1); 33 | indAj=find(sum(and(Fl,F),2)==sum(Fl(1,:)))-1; 34 | for j=1:length(indAj) 35 | Bkl=Bkl+c(indAj(j))^(alpha-2)*m(i,indAj(j))^beta*Smean{indAj(j)}; 36 | end 37 | end 38 | 39 | Bl=[Bl;Bkl]; 40 | end 41 | B=[B Bl]; 42 | end 43 | 44 | X=reshape(x',n*nbAtt,1); 45 | g=B'\R'*X; 46 | g=reshape(g,nbAtt,K)'; 47 | 48 | 49 | -------------------------------------------------------------------------------- /algorithms/cecm/cecm/setDistances.m: -------------------------------------------------------------------------------- 1 | function [D,Splot,Smean] = setDistances(x,F,g,m,alpha,distance) 2 | % Calcul des centres agrandis 3 | [n,nbAtt]=size(x); 4 | [nbFoc,K]=size(F); 5 | 6 | beta=2; 7 | 8 | gplus=[]; 9 | for i=2:nbFoc 10 | fi = F(i,:); 11 | truc = repmat(fi',1,nbAtt); 12 | gplus = [gplus;sum(g.*truc)./sum(truc)]; 13 | end 14 | 15 | if distance==0 16 | Splot=repmat({eye(nbAtt)},1,K); % distance euclidienne 17 | S=repmat({eye(nbAtt)},1,K); 18 | else % Mahalanobis 19 | 20 | S=[]; 21 | Sigma=[]; 22 | Splot=[]; % matrice de var-cov utilisé pour les plots 23 | 24 | ind=find(sum(F,2)==1); 25 | for i=1:length(ind) % i=2:nbFoc 26 | denomSplot=0; 27 | indi=ind(i); 28 | 29 | Sigmai=zeros(nbAtt,nbAtt); 30 | for k=1:n % k : le numero de l'individu 31 | omegai=repmat(F(indi,:),nbFoc,1); 32 | indAj=find(sum(and(omegai,F),2)>0); 33 | 34 | for j=1:length(indAj) 35 | indj=indAj(j); 36 | aux = x(k,:)-gplus(indj-1,:);%(x - ones(n,1)*gplus(indj-1,:)); 37 | Sigmai=Sigmai+sum(F(indj,:)).^(alpha-1)*m(k,indj-1).^beta.*(aux'*aux); 38 | 39 | denomSplot=denomSplot+sum(F(indj,:)).^(alpha-1)*m(k,indj-1).^beta; % denominateur utilise pour le calcul de Splot (normalisation) 40 | end 41 | end 42 | 43 | try 44 | warning('off'); 45 | lastwarn('',''); 46 | Si=det(Sigmai).^(1/nbAtt)*inv(Sigmai); 47 | error(lastwarn); 48 | catch 49 | Si=det(Sigmai).^(1/nbAtt)*pinv(Sigmai);% dist GK pour Ionosphere 50 | disp('Utilisation de pinv'); 51 | end 52 | 53 | Splot = [Splot {Sigmai./denomSplot}]; 54 | S = [S {Si}]; % variance des elements singletons uniquement 55 | end 56 | end 57 | 58 | Smean=[]; 59 | for i=1:nbFoc-1 60 | aux=zeros(nbAtt,nbAtt); 61 | for j=1:K 62 | aux=aux+F(i+1,j)*S{j}; 63 | end 64 | Smean=[Smean {aux./max(sum(F(i+1,:)),1)}];% variance de tous les elements 65 | end 66 | 67 | 68 | % calculation of distances to centers 69 | D=[]; 70 | for j=1:nbFoc-1 71 | aux = (x - ones(n,1)*gplus(j,:)); 72 | 73 | if distance==0 74 | B = diag(aux*aux'); %dist euclidienne 75 | else 76 | B = diag(aux*Smean{j}*aux');% dist GK 77 | end 78 | 79 | D = [D B]; 80 | end % D comprend les distances entre les individus et les centres de gravite (chaque ligne = un individu, chaque colonne = un element sans l'element vide) 81 | %end 82 | -------------------------------------------------------------------------------- /algorithms/cecm/cecm/solqp/spphase1.m: -------------------------------------------------------------------------------- 1 | % spphase1 2 | % 3 | % solve the scaled least squares against two vectors 4 | % 5 | % [i,j,sa]=find(A); 6 | % AX = sparse(i,j,sa.*x(j),m,n); 7 | % clear i j sa; 8 | % AX'\[ones(n,1) x.*c1]; 9 | % 10 | dx = ones(n,1)./x(1:n); 11 | DD = sparse(1:n,1:n,dx.*dx,n,n); 12 | [DD A';A sparse(m,m)]\[dx sparse(n,1); sparse(m,1) a]; 13 | % 14 | y1=ans(n+1:n+m,1); 15 | y2=ans(n+1:n+m,2); 16 | clear dx ans DD; 17 | w1=(1/ob - a'*y1)/(1/ob^2 - a'*y2); 18 | w2=1/(1/ob^2 - a'*y2); 19 | y1=y1-w1*y2; 20 | y2=-w2*y2; 21 | % 22 | w1=b'*y1; 23 | w2=b'*y2; 24 | y1=y1/(1+w1); 25 | y2=y2-w2*y1; 26 | u=[x(1:n).*(-y2'*A)';x(n+1)*(1-y2'*a);w2/(1+w1)]; 27 | v=[x(1:n).*(y1'*A)' ;x(n+1)*(y1'*a) ; 1/(1+w1)]; 28 | % 29 | % update the dual and the objective lower bound 30 | % 31 | if min(u-z*v)>=0, 32 | y = y2+z*y1; 33 | z=b'*y; 34 | end; 35 | clear y1 y2 w1 w2; 36 | % 37 | % find the descent direction 38 | % 39 | u=u-z*v-((ob-z)/(n+2))*ones(n+2,1); 40 | nora=max(u); 41 | % 42 | % update the solution along the descent direction 43 | % 44 | if nora==u(n+1), 45 | alpha=1.; 46 | end; 47 | v=ones(n+2,1)-(alpha/nora)*u; 48 | x=(x.*v(1:n+1))/v(n+2); 49 | clear u v 50 | % 51 | %return 52 | % 53 | % This is the Phase 1 procedure called by SPSOLQP. 54 | -------------------------------------------------------------------------------- /algorithms/cecm/cecm/solqp/spphase2.m: -------------------------------------------------------------------------------- 1 | % spphase2 2 | % 3 | lamda=(1.-beta)*lamda; 4 | % if gap <= 5*toler; 5 | % lamda = lamda/2; 6 | % end; 7 | go=0; 8 | gg = Q*x+c; 9 | XX = spdiags(x,0,n,n); 10 | AA = A*XX; 11 | XX = XX*Q*XX; 12 | %dx = ones(n,1)./x; 13 | % 14 | % Repeatly solve an ellipsoid constrained QP problem by solving a linear 15 | % system equation until find a positive solution. 16 | % 17 | 18 | while go <= 0, 19 | % DD = sparse(1:n,1:n,(lamda*dx).*dx,n,n); 20 | % 21 | % u=[Q+DD A';A sparse(m,m)]\[-(Q*x+c)+(lamda/n)*dx;sparse(m,1)]; 22 | % 23 | % u=[Q+DD A';A sparse(m,m)]\[-gg;sparse(m,1)]; 24 | u=[XX+lamda*speye(n,n) AA';AA sparse(m,m)]\[-x.*gg;sparse(m,1)]; 25 | %u(1:n)=x.*u(1:n); 26 | xx=x+x.*u(1:n); 27 | go=min(xx); 28 | if go > 0, 29 | ob=xx'*Q*xx/2+c'*xx; 30 | go = min([go obvalue-ob+eps]); 31 | end; 32 | lamda=2*lamda; 33 | if lamda >= (1+abs(obvalue))/toler, 34 | %disp('The problem seems unbounded.'); 35 | if ~exist('y') 36 | y=-u(n+1:n+m); 37 | end 38 | return 39 | end; 40 | end; 41 | % 42 | y=-u(n+1:n+m); 43 | u=u(1:n); 44 | nora = min(u); 45 | if nora < 0, 46 | nora=-alpha/nora; 47 | elseif nora == 0, 48 | nora=alpha; 49 | else 50 | nora=inf; 51 | end 52 | % 53 | u = x.*u; 54 | w1 = u'*Q*u; 55 | w2 = -u'*gg; 56 | if w1 > 0, 57 | nora=min([w2/w1,nora]); 58 | end; 59 | if nora == inf, 60 | ob = -inf; 61 | else 62 | x =x+nora*u; 63 | ob=x'*Q*x/2+c'*x; 64 | end; 65 | clear u dx xx DD w1 w2 66 | % 67 | % This is the Phase 2 procedure called by SPSOLQP. 68 | -------------------------------------------------------------------------------- /algorithms/cevclus/cevclus/README.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cevclus/cevclus/README.pdf -------------------------------------------------------------------------------- /algorithms/cevclus/cevclus/addConstraints.m: -------------------------------------------------------------------------------- 1 | function [link] = addConstraints(y,nbConst,noise,option) 2 | % y : The output matrix nx1. 3 | % nbConst : The number of constraint to add 4 | % noise : Percentage of noise to include 5 | % option : Select only one type of constraint 6 | % : 0 => solely Cannot-Link 7 | % : 1 => solely Must-Link 8 | % : 2 => both type of constraints (by default) 9 | % @return The constraints. The two first columns correspond to the 10 | % pairs of objects, the last column the type of constraint 11 | % (Cannot-Link=0, Must-Link=1) 12 | 13 | n=length(y); 14 | 15 | if nargin<3 16 | noise=0; 17 | end 18 | if nargin<4 19 | option=2; 20 | end 21 | typeASup=(option-1)*(-1); 22 | 23 | link=[]; 24 | iConst=nbConst; 25 | while iConst~=0 26 | link=[link; ceil(rand(iConst,2)*n)]; 27 | [linkinorder idxLink]=unique(sort(link,2),'rows'); 28 | link=link(sort(idxLink),:); 29 | link(link(:,1)==link(:,2),:)=[]; % suppress links between same points 30 | typeLink=(y(link(:,1))==y(link(:,2))); 31 | link(typeLink==typeASup,:)=[]; % suppress link not desired (CL or 32 | % ML or nothing) 33 | 34 | iConst=nbConst-size(link,1); 35 | end 36 | 37 | 38 | if nbConst~=0 39 | nbRand=rand(nbConst,1); 40 | 41 | link(:,3)=(y(link(:,1))==y(link(:,2))); 42 | link(nbRand 0 26 | % display('inconsistency'); 27 | % end 28 | 29 | queue = [queue unlabeled_neigh]; 30 | visited( unlabeled_neigh ) = 1; 31 | 32 | 33 | end 34 | 35 | assert( sum(visited) == n ); 36 | 37 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/build_weights.m: -------------------------------------------------------------------------------- 1 | % Builds a KNN-Graph or epsilon-Neighbourhood Graph 2 | function K=build_weights(points,graphType,adaptive,numKNN, scale) 3 | % Usage: K=buildWeights(points,graphType,adaptive,numKNN) 4 | % 5 | % Input: 6 | % points - the coordinates of points in Euclidean space. It is a num x dim matrix. 7 | % graphType - <2: symmetric KNN; otherwise epilson neighborhood 8 | % adaptive - true 9 | % numKNN - number of neighbors to connect to 10 | % scale - 4 in the paper 11 | % 12 | % (C)2010 Thomas Buehler, Syama Sundar Rangapuram, Matthias Hein 13 | % Machine Learning Group, Saarland University, Germany 14 | % http://www.ml.uni-saarland.de 15 | 16 | 17 | % epsilon for graphType >= 2 18 | eps=0.3; 19 | 20 | 21 | %numKNN=10; 22 | if nargin<4 23 | numKNN=10; 24 | end 25 | 26 | % Compute the squares of the pairwise distances. dist2 is num x num 27 | % matrix 28 | dist2 =DistEuclideanPiotrDollar(points,points); % squared distances 29 | 30 | num =size(points,1); 31 | 32 | if(graphType<2) 33 | 34 | % ith column of SD contains neighbors sorted according to their 35 | % distances to ith point 36 | [SD,IX]=sort(dist2,1); 37 | KNN = IX(2:numKNN+1,:); 38 | 39 | % KNNDist is numKNN x n matrix containing the (squares of) distances to numKNN neighbors (excluding self) 40 | KNNDist = SD(2:numKNN+1,:); 41 | 42 | if (~adaptive) 43 | gamma_squared=mean(mean(KNNDist))*ones(1,num); 44 | else 45 | % find the square of the distance of the current point to its 46 | % numKNNth neighbor 47 | %gamma_squared=mean(KNNDist);%*ones(1,num);%KNNDist(numKNN,:);%mean(KNNDist); 48 | gamma_squared=KNNDist(numKNN,:);%mean(KNNDist); 49 | end 50 | 51 | % get kNN weight matrix 52 | K = sparse(num,num); 53 | for i=1:num 54 | K(KNN(:,i),i)=exp(-scale/(gamma_squared(i))*KNNDist(:,i)); 55 | end 56 | % note that K is not symmetric yet , now we symmetrize K 57 | if(graphType==1) K=(K+K')+abs(K-K'); K=0.5*K; end 58 | if(graphType==0) K=(K+K')-abs(K-K'); K=0.5*K; end 59 | 60 | K=K-spdiags(diag(K),0,num,num); 61 | else 62 | K = exp(-1/(2*gamma^2)*dist2).*(dist2 < eps^2 & dist2~=0); 63 | end 64 | 65 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/cluster_err.m: -------------------------------------------------------------------------------- 1 | % Computes the error of the clustering. The label of each cluster is 2 | % obtained via a majority vote. 3 | % The first argument is predicted labels and the second one is the ground truth! 4 | function [error, u_reallabels,votes] = cluster_err(u,Y) 5 | % 6 | % (C)2010 Thomas Buehler, Matthias Hein 7 | % Machine Learning Group, Saarland University, Germany 8 | % http://www.ml.uni-saarland.de 9 | 10 | labels=unique(u); 11 | error=0; 12 | 13 | u_reallabels=zeros(size(u)); 14 | reallabels=unique(Y); 15 | votes=zeros(size(reallabels,1),size(labels,1)); 16 | 17 | 18 | for k=1:size(labels,1) 19 | % extract indices of current cluster 20 | ukIndex=u==labels(k); 21 | 22 | % extract real labels of current cluster 23 | Yk=Y(ukIndex); 24 | 25 | % perform majority vote 26 | currentvotes=zeros(size(reallabels,1),1); 27 | for l=1:size(reallabels,1) 28 | currentvotes(l) = sum(Yk==reallabels(l)); 29 | end 30 | ind=find(currentvotes==max(currentvotes)); 31 | 32 | votes(:,k)=currentvotes; 33 | 34 | % relabel u 35 | u_reallabels(ukIndex)=reallabels(ind(1)); 36 | 37 | % compute error 38 | currentError= sum(Yk~=reallabels(ind(1))); 39 | error=error + currentError; 40 | end 41 | error=error/size(u,1); 42 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/cnstr_1spec_clustering.m: -------------------------------------------------------------------------------- 1 | function [cut, clusters, viols] = ... 2 | cnstr_1spec_clustering(W,ML,CL,vertex_weights,start_flags,nRuns,prev_clusters,perturbation,MAX_ITERS,verbosity) 3 | % Performs Constrained 1-Spectral Clustering 4 | % 5 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 6 | % Max Planck Institute for Computer Science, Saarbruecken 7 | % Machine Learning Group, Saarland University, Germany 8 | % http://www.ml.uni-saarland.de 9 | % 10 | 11 | %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% INITIALIZATION %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 12 | 13 | 14 | assert(isnumeric(W) && issparse(W),'Wrong usage. W should be sparse and numeric.'); 15 | assert(sum(diag(W))==0,'Wrong usage. W should have zeros on the diagonal') 16 | 17 | cnstr1 = []; 18 | cnstr2 = []; 19 | if ~isempty(CL) 20 | cnstr1 = CL(:,1); 21 | cnstr2 = CL(:,2); 22 | end 23 | 24 | %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% MUST LINK CONSTRAINTS %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% 25 | % Handle must-link constraints via sparsification. 26 | 27 | if verbosity>=1 && ~isempty(ML), display('Processing Must links via sparsification'); end 28 | %display('Processing Must links via sparsification'); 29 | [W, vertex_weights, map, prev_clusters] = process_mls( W, vertex_weights, ML, prev_clusters ); 30 | cnstr1 = map(cnstr1); 31 | cnstr2 = map(cnstr2); 32 | 33 | %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% CANNOT LINK CONSTRAINTS %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%% 34 | % Now the cannot link constraints 35 | % First derive must links from cannot links 36 | 37 | % if verbosity>=2 38 | % display('Deriving Must links from cannot links'); 39 | % end 40 | 41 | [dML] = derive_mls_frm_cls( W, [cnstr1, cnstr2] ); 42 | [W, vertex_weights, map_dml, prev_clusters] = merge( W, vertex_weights, dML, prev_clusters); 43 | W = triu(W); % merging of edges might yield non-symmetric edge weights with a very small error (< 1e-15) 44 | W = W + W'; 45 | cnstr1 = map_dml(cnstr1); 46 | cnstr2 = map_dml(cnstr2); 47 | 48 | if size(W,1)==2 % Check if the constraints already specified the partition! 49 | clusters = [0; 1]; 50 | cut = bal_cut( W, vertex_weights, clusters); 51 | 52 | % Recover the labels on the original graph via two maps.. first dml's and then ml's; 53 | clusters = clusters(map_dml); 54 | clusters = clusters(map); 55 | 56 | viols = 0; 57 | 58 | else 59 | %%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%% FUNCTIONAL MINIMIZATION %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%% 60 | % Now the real work starts! 61 | dCL = CL; 62 | % Remove redundant cannot link constraints! 63 | if ~isempty(cnstr1) > 0 64 | dCL = [cnstr1 cnstr2]; 65 | dCL = sort(dCL, 2); 66 | dCL = unique(dCL, 'rows'); 67 | if verbosity >=1, fprintf('Minimizing the Functional subject to Cannot link constraints using %d starting points\n', nRuns+start_flags); end 68 | else 69 | if verbosity >=1, fprintf('Minimizing the unconstrained Functional using %d starting points\n', nRuns+start_flags); end 70 | end 71 | 72 | [vmin, cut, clusters, viols] = ... 73 | solve_cnstr_functional_incremental(W,dCL,vertex_weights,start_flags, nRuns, prev_clusters, perturbation,MAX_ITERS, verbosity); 74 | 75 | % Recover the labels on the original graph via two maps.. first dml's and then ml's; 76 | clusters = clusters(map_dml); 77 | clusters = clusters(map); 78 | 79 | end 80 | 81 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/cnstr_inner_obj.m: -------------------------------------------------------------------------------- 1 | function obj = cnstr_inner_obj( D, FctVal, r2, vec, gamma, volQ, g) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | obj = sum(abs(D*g))+gamma*volQ*(max(g)-min(g))/2 - g'*r2/2 - FctVal*g'*vec/2; 10 | 11 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/computeCutValue.m: -------------------------------------------------------------------------------- 1 | function [cutpart1,cutpart2] = computeCutValue(clusters,W,normalized,deg) 2 | % Computes the components in the Ratio/Normalized Cut and Ratio/Normalized Cheeger Cut expression. 3 | % 4 | % Usage: [cutpart1,cutpart2] = computeCutValue(clusters,W,normalized) 5 | % 6 | % One then has Ratio/Normalized Cut = cutpart1 + cutpart2 7 | % and Ratio/Normalized Cheeger Cut = max(cutpart1,cutpart2) 8 | % 9 | % (C)2010-11 Thomas Buehler and Matthias Hein 10 | % Machine Learning Group, Saarland University, Germany 11 | % http://www.ml.uni-saarland.de 12 | 13 | W3= sum(W(clusters==1,:),1); 14 | cut=full(sum(W3(clusters~=1),2)); 15 | 16 | if(cut==0) 17 | cutpart1=0; 18 | cutpart2=0; 19 | else 20 | if (~normalized) 21 | sizeA = sum(clusters==1); 22 | sizeB = size(clusters,1)-sizeA; 23 | 24 | cutpart1=cut/sizeA; 25 | cutpart2=cut/sizeB; 26 | else 27 | degA=deg(clusters==1); 28 | volA=sum(degA); 29 | 30 | degB=deg(clusters~=1); 31 | volB=sum(degB); 32 | 33 | cutpart1=cut/volA; 34 | cutpart2=cut/volB; 35 | end 36 | end 37 | 38 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/computeMultiCut.m: -------------------------------------------------------------------------------- 1 | function [cut,cheeger,cutParts]= computeMultiCut(W,allClusters,normalized) 2 | % Evaluates the multicut versions of Ratio/Normalized Cut and 3 | % Ratio/Normalized Cheeger Cut 4 | % 5 | % Usage: [cut,cheeger,cutParts]= computeMultiCut(W,allClusters,normalized) 6 | % 7 | % (C)2010-11 Thomas Buehler and Matthias Hein 8 | % Machine Learning Group, Saarland University, Germany 9 | % http://www.ml.uni-saarland.de 10 | 11 | if(normalized) 12 | deg=sum(W,2); 13 | else 14 | deg=ones(size(W,1),1); 15 | end 16 | 17 | labels=unique(allClusters); 18 | cut=0; 19 | cheeger=0; 20 | for k=1:length(labels) 21 | 22 | clustersM=zeros(size(allClusters,1),1); 23 | clustersM(allClusters==labels(k))=1; 24 | cutParts(k) = computeCutValue(clustersM,W,normalized,deg); 25 | 26 | end 27 | cut=sum(cutParts); 28 | cheeger=max(cutParts); 29 | 30 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/compute_cheeger_cut.m: -------------------------------------------------------------------------------- 1 | function ccut = compute_cheeger_cut(W, gdeg, Y) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | labels = unique(Y); 10 | assert( length(labels) == 2, 'only binary partitions are allowed'); 11 | 12 | ix = sum(Y == labels(1) ); 13 | ixc = sum(Y == labels(2) ); 14 | cut = sum(sum(W(ix, ixc))); 15 | vol = sum(gdeg(ix)); 16 | volc = sum(gdeg(ixc)); 17 | 18 | ccut = cut/ min(vol, volc); 19 | 20 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/connectedComponents.m: -------------------------------------------------------------------------------- 1 | function [comp,connected,sizes]=connectedComponents(W) 2 | % Returns all connected components of the graph represented by weight 3 | % matrix W. 4 | % 5 | % Usage: [comp,connected,sizes]=connectedComponents(W) 6 | % 7 | % (C)2010 Thomas Buehler and Matthias Hein 8 | % Machine Learning Group, Saarland University, Germany 9 | % http://www.ml.uni-saarland.de 10 | 11 | l=1; 12 | [isCon, first_comp] = isConnected(W); 13 | sizes(l)=sum(first_comp); 14 | connected=isCon; 15 | 16 | comp=first_comp; 17 | while ~isCon 18 | ind=find(comp==0); 19 | Wpart=W(ind,ind); 20 | [isCon, first_comp] = isConnected(Wpart); 21 | l=l+1; 22 | sizes(l)=sum(first_comp); 23 | comp(ind)=l*first_comp; 24 | end 25 | 26 | end 27 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/construct_cnstr_graph.m: -------------------------------------------------------------------------------- 1 | function Q = construct_cnstr_graph( cnstrs, n ) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | 10 | Q = sparse(n,n); 11 | if ~isempty(cnstrs) 12 | Q = sparse(cnstrs(:,1), cnstrs(:,2), 1, n,n) + sparse(cnstrs(:,2), cnstrs(:,1), 1, n,n); 13 | 14 | Q(Q>0) = 1; 15 | end 16 | 17 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/derive_mls_frm_cls.m: -------------------------------------------------------------------------------- 1 | function [dML, f] = derive_mls_frm_cls( W, CL ) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | dML = cell(0); 10 | n = size(W,1); 11 | f = -10*ones(n,1); 12 | 13 | if ~isempty(CL) 14 | u = 1; 15 | visited = false(n,1); 16 | 17 | % Construct constraint graph 18 | Q = construct_cnstr_graph( CL, n ); 19 | 20 | [color, comp] = two_coloring(Q, u, n); 21 | visited(comp) = true; 22 | ml1 = comp(color(comp)==1); 23 | if length(ml1) > 1 24 | dML = [dML; ml1]; 25 | end 26 | ml2 = comp(color(comp)~=1); 27 | if length(ml2) > 1 28 | dML = [dML; ml2]; 29 | end 30 | fpartitions(comp) = color(comp); 31 | 32 | 33 | while sum(visited) < n 34 | 35 | u = find(visited==0, 1); 36 | [color, comp] = two_coloring(Q, u, n); 37 | visited(comp) = 1; 38 | ml1 = comp(color(comp)==1); 39 | if length(ml1) > 1 40 | dML = [dML; ml1]; 41 | end 42 | ml2 = comp(color(comp)~=1); 43 | if length(ml2) > 1 44 | dML = [dML; ml2]; 45 | end 46 | fpartitions(comp) = color(comp); 47 | 48 | end 49 | 50 | % Sanity Check 51 | % assert( sum(fpartitions(CL(:,1)) == fpartitions(CL(:,2))) == 0 ); 52 | end 53 | 54 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/eig_std_Laplacian.m: -------------------------------------------------------------------------------- 1 | function [v2, l2, ones_C] = eig_std_Laplacian(W,normalized,deg) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | W = triu(W); 10 | W = W+W'; 11 | 12 | D=spdiags(sum(W,2),0,size(W,1),size(W,1)); 13 | opts.disp=0; 14 | options.tol = 1E-6; 15 | options.maxit=20; 16 | options.issym = 1; 17 | if (normalized) 18 | [eigvec,eigval]= eigs(D-W, spdiags(deg,0,size(W,1),size(W,1)),2,'SA',opts); 19 | %[eigvec,eigval]= eigs(D-W, diag(deg),2,'SA',opts); 20 | else 21 | [eigvec,eigval]= eigs(D-W, 2,'SA',opts); 22 | end 23 | v2 = eigvec(:,2); 24 | l2 = eigval(2,2); 25 | 26 | start = createClustersGeneral(v2,W,normalized,-1,2,deg,true); 27 | if (sum(start)>sum(start==0)) start=1-start; end 28 | start=start/sum(start); 29 | ones_C = start; 30 | 31 | end 32 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/eigs_Laplacian.m: -------------------------------------------------------------------------------- 1 | function [v, lambda, ones_C] = eigs_Laplacian( W, b ) 2 | % Solves the optimization problem 3 | % minimize/maximize v'*L*v/v'*diag(b)*v subject to A*v = 0; where L is the Laplacian, 4 | % L = D - W 5 | % 6 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 7 | % Max Planck Institute for Computer Science, Saarbruecken 8 | % Machine Learning Group, Saarland University, Germany 9 | % http://www.ml.uni-saarland.de 10 | % 11 | 12 | %disp('Eigenproblem start'); 13 | 14 | A = b'; 15 | n = size(W,1); 16 | B=spdiags(b,0,size(W,1),size(W,1)); 17 | D=spdiags(sum(W,2),0,size(W,1),size(W,1)); 18 | L = D-W; 19 | L = sparse(L); 20 | 21 | % First do the linear transformation to convert ellisoid (denominator) to sphere 22 | % and obtain the equivalent problem 23 | % minimize w'* B^-1/2*L*B^-1/2 *w/w'*w subject to A*B^-1/2*w = 0, 24 | % where w = B^1/2 *v 25 | 26 | b = diag(B); 27 | B_inverse_square_root = spdiags(b.^-0.5,0,length(B), length(B)); 28 | L = B_inverse_square_root * L * B_inverse_square_root; 29 | L1 = L; 30 | L = (L+L')/2; 31 | A = A*B_inverse_square_root; 32 | 33 | opts.disp = 0; 34 | %[vmax, lmax] = eigs(L, 1, 'LA', opts); 35 | lmax = 2*max(sum(W,2))/min(b); % We need an upper bound on the maximum eigenvalue of the generalized Laplcian. 36 | L = lmax*speye(n,n) - L; 37 | %display(['bound on lambda max: ', num2str(lmax)]); %, ' trace of L: ', num2str(sum(diag(L)))]); 38 | 39 | u = rand(size(L,1),1); 40 | converged = false; 41 | lambda_old = 0; 42 | 43 | counter = 0; 44 | 45 | %call cpp code here... 46 | % [v, lambda] = eig_lcnstrs_mex( L, P, u); 47 | 48 | AATranspose = A*A'; 49 | totalTime = tic; 50 | while ~converged && counter < 1000 51 | 52 | counter = counter+1; 53 | u = u/norm(u); 54 | Lu = L*u; 55 | v = Lu - ((A*Lu)/AATranspose)*A'; 56 | lambda = u'*v; 57 | 58 | err = abs((lambda-lambda_old)/lambda_old); 59 | if err < 1e-20 || abs(lmax - lambda)/lmax < 1e-16 60 | %if abs(lmax - lambda)/lmax < 1e-16 61 | converged = true; 62 | %display(['iter: ', num2str(counter), ' lambda: ', num2str(lambda), ' error: ', num2str(err), ' lambda difference: ', num2str((lmax-lambda)/lambda), ' converged: ', num2str(converged)]); 63 | end 64 | 65 | lambda_old = lambda; 66 | 67 | if norm(v) < 1e-8 68 | lambda = lmax; 69 | break; 70 | end 71 | if rem(counter,100) == 0 72 | % display(['iter: ', num2str(counter), ' lambda: ', num2str(lambda), ' error: ', num2str(err), ' lambda difference: ', num2str((lmax-lambda)/lambda), ' avg time per iteration: ', num2str(toc(iterTime)/50)]); 73 | if toc(totalTime) > 3600 74 | %display('Time out for eig computation'); 75 | %display(['iter: ', num2str(counter), ' lambda: ', num2str(lambda), ' error: ', num2str(err), ' lambda difference: ', num2str((lmax-lambda)/lambda), ' converged: ', num2str(converged)]); 76 | break; 77 | end 78 | end 79 | u = v; 80 | end 81 | 82 | % Get back the solution for the original problem! 83 | v = B_inverse_square_root*v; 84 | v = v/norm(v); 85 | lambda = lmax - lambda; 86 | %disp('Eigenproblem end'); 87 | %fprintf('converged in %d iterations\n', counter); 88 | 89 | start = createClustersGeneral(v,W,true,-1,2,b,true); 90 | if (sum(start)>sum(start==0)) start=1-start; end 91 | start=start/sum(start); 92 | ones_C = start; 93 | end 94 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/eigs_lcnstrs.m: -------------------------------------------------------------------------------- 1 | function [lambda, v] = eigs_lcnstrs( W, b, A ) 2 | % Solves the optimization problem 3 | % minimize/maximize v'*L*v/v'*diag(b)*v subject to A*v = 0; where L is the Laplacian, 4 | % L = D - W 5 | % 6 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 7 | % Max Planck Institute for Computer Science, Saarbruecken 8 | % Machine Learning Group, Saarland University, Germany 9 | % http://www.ml.uni-saarland.de 10 | % 11 | 12 | %disp('Eigenproblem start'); 13 | 14 | n = size(W,1); 15 | B=spdiags(b,0,size(W,1),size(W,1)); 16 | D=spdiags(sum(W,2),0,size(W,1),size(W,1)); 17 | L = D-W; 18 | L = sparse(L); 19 | 20 | % First do the linear transformation to convert ellisoid (denominator) to sphere 21 | % and obtain the equivalent problem 22 | % minimize w'* B^-1/2*L*B^-1/2 *w/w'*w subject to A*B^-1/2*w = 0, 23 | % where w = B^1/2 *v 24 | 25 | b = diag(B); 26 | B_inverse_square_root = spdiags(b.^-0.5,0,length(B), length(B)); 27 | L = B_inverse_square_root * L * B_inverse_square_root; 28 | L1 = L; 29 | L = (L+L')/2; 30 | %assert( sum(sum( (L-L1).^2 )) <= 1e-12 ); 31 | A = A*B_inverse_square_root; 32 | 33 | opts.disp = 0; 34 | [vmax, lmax] = eigs(L, 1, 'LA', opts); 35 | L = lmax*speye(n,n) - L; 36 | 37 | %P = speye(n) - A'*inv(A*A')*A; % Here A is row vector, so A*A' is a scalar. 38 | %P = speye(n) - A'*((A*A')\A); 39 | % P = A'*((A*A')\A); 40 | 41 | u = rand(size(L,1),1); 42 | converged = false; 43 | lambda_old = 0; 44 | 45 | % M = P*L*P; 46 | %M = P*L; 47 | % M = L - A'*((A*A')\A)*L; 48 | counter = 0; 49 | 50 | %call cpp code here... 51 | % [v, lambda] = eig_lcnstrs_mex( L, P, u); 52 | while ~converged && counter < 20000 53 | 54 | counter = counter+1; 55 | u = u/norm(u); 56 | Lu = L*u; 57 | %v = Lu - P*Lu; 58 | v = Lu - A'*((A*A')\(A*Lu)); 59 | %v = M*u; 60 | %v = (P*L*P)\u; 61 | % v = L*u; 62 | 63 | lambda = u'*v; 64 | 65 | err = abs((lambda-lambda_old)/lambda_old); 66 | if err < 1e-32 67 | converged = true; 68 | end 69 | 70 | lambda_old = lambda; 71 | 72 | %if counter==100 && norm(v) < 1e-10 73 | if norm(v) < 1e-10 74 | lambda = lmax; 75 | break; 76 | end 77 | % if rem(counter,100) == 0 78 | % lambda 79 | % end 80 | u = v; 81 | end 82 | 83 | % Get back the solution for the original problem! 84 | v = B_inverse_square_root*v; 85 | v = v/norm(v); 86 | lambda = lmax - lambda; 87 | %disp('Eigenproblem end'); 88 | %fprintf('converged in %d iterations\n', counter); 89 | end 90 | 91 | % 92 | % v = rand(n,1); 93 | % v = v/norm(v); 94 | % v_old = sparse(n,1); 95 | % beta = 0; 96 | % 97 | % converged = false; 98 | % k=1; 99 | % while ~converged 100 | % 101 | % w = C*v - beta*v_old; 102 | % alpha = w'*v; 103 | % w = w - alpha*v; 104 | % beta = norm(w); 105 | % v = w/beta; 106 | % 107 | % k = k+1; 108 | % if k>1000 109 | % converged = true; 110 | % end 111 | % 112 | % end 113 | % 114 | % lambda = v'*C*v; 115 | % end 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/fctval_cnstr_one_spec_Q.m: -------------------------------------------------------------------------------- 1 | function FctVal = fctval_cnstr_one_spec_Q(W, vertex_weights, Q, gamma, fnew, Lovasz) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | 9 | % Q = construct_cnstr_graph(W, CL); 10 | if ~exist('Lovasz','var') 11 | Lovasz = true; 12 | end 13 | [ix, jx, wval] = find(W); 14 | [qix, qjx, qval] = find(Q); 15 | 16 | volQ = sum(sum(Q)); 17 | sval = wval.*abs(fnew(ix)-fnew(jx));% + gamma* volQ * (max(fnew) - min(fnew)); 18 | 19 | if ~Lovasz 20 | Pfnew = fnew - (fnew'*vertex_weights/sum(vertex_weights)); 21 | FctVal = (sum(sval) - gamma* sum(qval.*abs(fnew(qix)-fnew(qjx))) + gamma*volQ * (max(fnew) - min(fnew)))/(vertex_weights'*abs(Pfnew)); 22 | else 23 | n = length(fnew); volV = sum(vertex_weights); 24 | [fsort,sortind]=sort(fnew); 25 | sdeg = vertex_weights(sortind); 26 | if size(sdeg,2)~=1, sdeg = sdeg'; end; 27 | rcumvols = flipud(cumsum(flipud(sdeg))); 28 | %rcumvols2 = sum(sdeg) - [0; cumsum(sdeg(1:n-1))]; 29 | %assert( sum(abs(rcumvols-rcumvols2)) == 0 ); 30 | vec = zeros(n,1); 31 | vec(sortind) = 2*sdeg.*(volV - rcumvols - [rcumvols(2:end); 0])/volV; 32 | FctVal = (sum(sval) - gamma* sum(qval.*abs(fnew(qix)-fnew(qjx))) + gamma*volQ * (max(fnew) - min(fnew)))/(vec'*fnew); % we cancelled factor 0.5 from numerator and denominator. 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/feas_partitions.m: -------------------------------------------------------------------------------- 1 | function fpartitions = feas_partitions( W, CL, Y, C ) 2 | %% the kth feasible solution, fsolns(:, k) has three values: 0, 1, -10. 3 | %% where -10 is for unconstrained vertices. 4 | % 5 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 6 | % Max Planck Institute for Computer Science, Saarbruecken 7 | % Machine Learning Group, Saarland University, Germany 8 | % http://www.ml.uni-saarland.de 9 | % 10 | 11 | n = size(W,1); 12 | fpartitions = -10*ones(n,1); 13 | 14 | if ~isempty(CL) 15 | 16 | u = 1; 17 | % u = CL(1,1); 18 | visited = sparse(size(W,1),1); 19 | 20 | % Construct constraint graph 21 | Q = construct_cnstr_graph( CL, n ); 22 | 23 | % start breadth first search on Q from u. 24 | comp = bfs(Q, u, n); 25 | visited(comp) = 1; 26 | if length(comp) > 1 27 | Qcomp = Q(comp, comp); 28 | color = two_coloring(Qcomp, 1, size(Qcomp,1)); 29 | fpartitions(comp) = color; 30 | end 31 | 32 | 33 | while sum(visited) < n 34 | 35 | u = find(visited==0, 1); 36 | comp = bfs(Q, u, n); 37 | visited(comp) = 1; 38 | if length(comp) > 1 39 | Qcomp = Q(comp, comp); 40 | color = two_coloring(Qcomp, 1, size(Qcomp,1)); 41 | fpartitions(comp) = color; 42 | end 43 | 44 | end 45 | end 46 | 47 | assert( sum(fpartitions(CL(:,1)) == fpartitions(CL(:,2))) == 0 ); 48 | 49 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/hierarchical_spec_clustering_lcnstrs_merging.m: -------------------------------------------------------------------------------- 1 | function [clusters,cuts,cheegers, vmin, W, vertex_weights, Y, cnstr1, cnstr2] = ... 2 | hierarchical_spec_clustering_lcnstrs_merging(W, vertex_weights, CL, ML, normalized, verbosity, Y) 3 | % 4 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 5 | % Max Planck Institute for Computer Science, Saarbruecken 6 | % Machine Learning Group, Saarland University, Germany 7 | % http://www.ml.uni-saarland.de 8 | % 9 | 10 | cnstr1 = []; 11 | cnstr2 = []; 12 | 13 | if ~isempty(CL) 14 | cnstr1 = CL(:,1); 15 | cnstr2 = CL(:,2); 16 | end 17 | 18 | [W, vertex_weights, map, Y] = process_mls( W, vertex_weights, ML, Y ); 19 | cnstr1 = map(cnstr1); 20 | cnstr2 = map(cnstr2); 21 | 22 | W = triu(W); 23 | W = W + W'; 24 | 25 | [clusters,cuts,cheegers, vmin, W, vertex_weights, Y, cnstr1, cnstr2] = ... 26 | spec_clustering_lcnstrs(W, vertex_weights, [cnstr1 cnstr2], [], normalized, verbosity, Y); 27 | 28 | clusters = clusters(map_dml); 29 | clusters = clusters(map); 30 | 31 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/isConnected.m: -------------------------------------------------------------------------------- 1 | function [connected,components]=isConnected(W) 2 | % Checks whether a graph is connected. 3 | % 4 | % Usage: [connected,components]=isConnected(W) 5 | % 6 | % (C)2010 Thomas Buehler and Matthias Hein 7 | % Machine Learning Group, Saarland University, Germany 8 | % http://www.ml.uni-saarland.de 9 | 10 | A = W>0; % adjacency matrix 11 | 12 | alreadyseen = zeros(size(W,1),1); 13 | 14 | currentCandidates=1; 15 | 16 | while ~isempty(currentCandidates) 17 | candidates= (sum(A(:,currentCandidates),2)>0); 18 | alreadyseen(currentCandidates)=1; 19 | currentCandidates=find(candidates-alreadyseen>0); 20 | end 21 | 22 | connected = sum(alreadyseen)==size(W,2); 23 | 24 | components=alreadyseen; 25 | 26 | end 27 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/merge.m: -------------------------------------------------------------------------------- 1 | function [Wp, bp, map, prev_clustersp] = merge( W, b, ixs, prev_clusters ) 2 | % Usage: [Wp, b, map] = sparsify( W, b, ix ) 3 | % 4 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 5 | % Max Planck Institute for Computer Science, Saarbruecken 6 | % Machine Learning Group, Saarland University, Germany 7 | % http://www.ml.uni-saarland.de 8 | % 9 | 10 | n = size(W,1); 11 | k = 0; 12 | ixs_all = []; 13 | for i=1:length(ixs) 14 | k = k+length(ixs{i}) - 1; 15 | ixs_all = [ixs_all; ixs{i}]; 16 | end 17 | 18 | ixsc = setdiff(1:n, ixs_all); 19 | np = n - k; 20 | n_ixsc = length(ixsc); 21 | 22 | Wp = sparse(np, np); 23 | 24 | bp = zeros(np, 1); 25 | bp(1:n_ixsc) = b(ixsc); 26 | 27 | prev_clustersp = cell(size(prev_clusters)); 28 | for i=1:length(prev_clustersp) 29 | prev_clustersp{i} = inf*ones(np,1); 30 | prev_clustersp{i}(1:n_ixsc) = prev_clusters{i}(ixsc); 31 | end 32 | 33 | map = zeros(n,1); 34 | map(ixsc) = 1:n_ixsc; 35 | for i=1:length(ixs) 36 | map(ixs{i}) = n_ixsc + i; 37 | end 38 | 39 | all_indices = 1:n; 40 | all_indices_p = map(all_indices); 41 | %temp_W = sparse(np, length(ixs)); 42 | for i=1:length(ixs) 43 | 44 | subset = ixs{i}; 45 | % bp(n_ixsc+i) = sum(b(subset)); 46 | tot_weight = 0; 47 | for index=1:length(subset) 48 | tot_weight = tot_weight + b(subset(index)); 49 | end 50 | bp(n_ixsc+i) = tot_weight; 51 | 52 | for jj=1:length(prev_clustersp) 53 | % Choose the major label of the vertices being merged 54 | % This makes no sense for a random start! 55 | % labels = unique(prev_clusters{jj}); 56 | % nix1 = sum(prev_clusters{jj}(subset)==labels(1)); 57 | % major_label = nix1; 58 | % 59 | % if length(labels) > 1 60 | % nix2 = sum(prev_clusters{jj}(subset)==labels(2)); 61 | % 62 | % if nix2 > nix1 63 | % major_label = nix2; 64 | % end 65 | % end 66 | 67 | prev_clustersp{jj}(n_ixsc+i) = prev_clusters{jj}(subset(1)); 68 | end 69 | 70 | 71 | Wp(:, n_ixsc+i) = sparse( all_indices_p,1, sum( W(:, subset), 2 ), np, 1); 72 | %temp_W(:,i) = sparse( all_indices_p,1, sum( W(:, subset), 2 ), np, 1); 73 | 74 | end 75 | 76 | %Wp(:, n_ixsc+1:n_ixsc+length(ixs)) = temp_W; 77 | Wp = [W(ixsc, ixsc) Wp(1:n_ixsc, n_ixsc+1:end); Wp(:, n_ixsc+1:end)']; 78 | 79 | %set the diagonal entries to zero 80 | Wp(1:np+1:np*np) = 0; 81 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexa64 -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexglx -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_cnstr_inner_problem.mexw64 -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_inner_problem.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_inner_problem.mexa64 -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_inner_problem.mexglx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_inner_problem.mexglx -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/mex_solve_inner_problem.mexw64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/mex_solve_inner_problem.mexw64 -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/process_mls.m: -------------------------------------------------------------------------------- 1 | function [Wp, vertex_weights_p, map,prev_clustersp] = process_mls( W, vertex_weights, ML, prev_clusters) 2 | % Usage: [Wp, vertex_weights_p, map] = process_mls( W, vertex_weights, ML) 3 | % 4 | % Output: 5 | % Wp = Weight matrix of the sparsified graph 6 | % vertex_weights_p = vertex weights of the sparsified graph 7 | % map = map to the new indices in the sparsified graph 8 | % 9 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 10 | % Max Planck Institute for Computer Science, Saarbruecken 11 | % Machine Learning Group, Saarland University, Germany 12 | % http://www.ml.uni-saarland.de 13 | % 14 | 15 | 16 | Wp = W; 17 | vertex_weights_p = vertex_weights; 18 | map = 1:size(W,1); 19 | map = map'; 20 | prev_clustersp = prev_clusters; 21 | 22 | if ~isempty(ML) 23 | 24 | ml_vtxs = ML(:); 25 | ml_vtxs = unique(ml_vtxs); 26 | 27 | cur_ix = 1; 28 | u = ml_vtxs(cur_ix); 29 | n = size(W,1); 30 | 31 | ixs = cell(n,1); 32 | ilarge_comp = 0; 33 | 34 | visited = sparse(size(W,1),1); 35 | %unvisited = ones(size(W,1),1); 36 | 37 | % Construct constraint graph 38 | Q = construct_cnstr_graph( ML, n ); 39 | 40 | % start breadth first search on Q from u. 41 | comp = bfs(Q, u, n); 42 | % comp1 = graphtraverse(Q, u, 'METHOD', 'BFS', 'DIRECTED', false); %comp = comp1'; 43 | % assert( sum( sort(comp1) - comp ) == 0 ); 44 | visited(comp) = 1; 45 | % unvisited(comp)=0; 46 | if length(comp) > 1 47 | ilarge_comp = ilarge_comp + 1; 48 | ixs{ilarge_comp} = comp; 49 | end 50 | 51 | 52 | %while sum(visited) < n 53 | while cur_ix < length(ml_vtxs) 54 | 55 | %u = find(visited==0, 1); 56 | %u = find(unvisited, 1); 57 | cur_ix = cur_ix+1; 58 | u = ml_vtxs(cur_ix); 59 | 60 | while visited(u) 61 | cur_ix = cur_ix+1; 62 | if cur_ix > length(ml_vtxs) 63 | break; 64 | end 65 | u = ml_vtxs(cur_ix); 66 | end 67 | 68 | if cur_ix > length(ml_vtxs) 69 | break; 70 | end 71 | comp = bfs(Q, u, n); 72 | % comp1 = graphtraverse(Q, u, 'METHOD', 'BFS', 'DIRECTED', false);%comp = comp1'; 73 | % assert( sum( sort(comp1') - comp ) == 0 ); 74 | visited(comp) = 1; 75 | %unvisited(comp) = 0; 76 | if length(comp) > 1 77 | ilarge_comp = ilarge_comp + 1; 78 | ixs{ilarge_comp} = comp; 79 | end 80 | 81 | end 82 | 83 | [Wp, vertex_weights_p, map, prev_clustersp] = merge( W, vertex_weights, ixs(1:ilarge_comp), prev_clusters); 84 | end 85 | 86 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/spec_clustering_lcnstrs_merging.m: -------------------------------------------------------------------------------- 1 | function [clusters,cuts,cheegers, vmin, W, vertex_weights, Y, cnstr1, cnstr2] = ... 2 | spec_clustering_lcnstrs_merging(W, vertex_weights, CL, ML, normalized, verbosity) 3 | % 4 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 5 | % Max Planck Institute for Computer Science, Saarbruecken 6 | % Machine Learning Group, Saarland University, Germany 7 | % http://www.ml.uni-saarland.de 8 | % 9 | 10 | Y = 0; 11 | cnstr1 = []; 12 | cnstr2 = []; 13 | 14 | if ~isempty(CL) 15 | cnstr1 = CL(:,1); 16 | cnstr2 = CL(:,2); 17 | end 18 | 19 | temp = cell(1,1); 20 | temp{1,1} = ones(size(W,1),1); 21 | 22 | [W, vertex_weights, map, prev_clusters] = process_mls( W, vertex_weights, ML, temp ); 23 | cnstr1 = map(cnstr1); 24 | cnstr2 = map(cnstr2); 25 | 26 | [dML] = derive_mls_frm_cls( W, [cnstr1 cnstr2] ); 27 | [W, vertex_weights, map_dml] = merge( W, vertex_weights, dML, temp ); 28 | W = triu(W); 29 | W = W + W'; 30 | cnstr1 = map_dml(cnstr1); 31 | cnstr2 = map_dml(cnstr2); 32 | 33 | [clusters,cuts,cheegers, vmin, W, vertex_weights, cnstr1, cnstr2] = ... 34 | spec_clustering_lcnstrs(W, vertex_weights, [cnstr1 cnstr2], [], normalized, verbosity); 35 | 36 | clusters = clusters(map_dml); 37 | clusters = clusters(map); 38 | 39 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/std_laplacian.m: -------------------------------------------------------------------------------- 1 | function L = std_laplacian(W) 2 | % 3 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 4 | % Max Planck Institute for Computer Science, Saarbruecken 5 | % Machine Learning Group, Saarland University, Germany 6 | % http://www.ml.uni-saarland.de 7 | % 8 | D=spdiags(sum(W,2),0,size(W,1),size(W,1)); 9 | L = D-W; 10 | 11 | end -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/test_cosc.m: -------------------------------------------------------------------------------- 1 | load two_moons.mat % this has the weight matrix W, must-links ML, cannot-links CL and ground truth Y (used only for computing the clustering error) 2 | 3 | vertex_weights = sum(W,2); % this choice corresponds to normalized cut 4 | %vertex_weights = ones(size(W,1),1); % this choice corresponds to ratio cut 5 | k = length(unique(Y)); % no. of clusters 6 | 7 | 8 | %------------------------------------------- no. of clusters = 2 ---------------------------------------------------% 9 | % This is the default call. It starts the method from 10 different 10 | % initializations and takes the best (according to the cut value) among them. 11 | [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL); 12 | 13 | % This is a fast version (with some compromise on quality). It starts the method from only 2 special 14 | % initializations. 15 | % [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL, 2, 2, 0, false, 1000); 16 | 17 | % Use this to track progress by displaying the intermediate results 18 | % [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL, 2, 2, 0, false, 1000, 2); 19 | 20 | % To get the detailed help, type at the matlab command prompt: help cosc.m 21 | 22 | %--------------------------------------------------------------------------------------------------------------------% 23 | 24 | 25 | %------------------------------------------- no. of clusters > 2 ---------------------------------------------------% 26 | % k = 4; 27 | % This is the default call. It takes the best result (according to the 28 | % multi-cut) out of 5 complete recursive splits. In each step, it uses two initializations for 29 | % computing the 2-way split. 30 | % [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL); 31 | 32 | % This is a fast version (with some compromise on quality). It computes only one complete recursive split and in each step only 1 special 33 | % initialization is used for the 2-way split. 34 | % [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL, 1, 1, 1, false, 1000); 35 | 36 | % Use this to obtain the intermediate clusterings of the recursive split in the last output argument. 37 | % For example, clusters_intermediate(:, i) provides the clustering result for i+1 clusters. 38 | % [cut, clusters, viols, clusters_intermediate] = cosc(W, vertex_weights, k, ML, CL, 1, 1, 1, false, 1000); 39 | 40 | % Use this to track progress by displaying the intermediate results 41 | % [cut, clusters, viols] = cosc(W, vertex_weights, k, ML, CL, 1, 1, 1, false, 1000, 2); 42 | 43 | % To get the detailed help, type at the matlab command prompt: help cosc.m 44 | 45 | %--------------------------------------------------------------------------------------------------------------------% 46 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/two_coloring.m: -------------------------------------------------------------------------------- 1 | function [color, comp] = two_coloring(W, u, n) 2 | % Usage color = two_coloring(W, u, n) 3 | % u is starting vertex, n is the number of vertices. 4 | % 5 | % (C)2012 Syama Sundar Rangapuram and Matthias Hein 6 | % Max Planck Institute for Computer Science, Saarbruecken 7 | % Machine Learning Group, Saarland University, Germany 8 | % http://www.ml.uni-saarland.de 9 | % 10 | visited = sparse(n,1); 11 | color = false(n,1); 12 | 13 | unexplored = u; 14 | visited( unexplored ) = 1; % one step after visited is set to 1, the corresponding vertices are explored. 15 | color(unexplored) = true; 16 | while ~isempty(unexplored) 17 | 18 | current_color = color(unexplored(1)); 19 | [neigh, jj] = find(W(:,unexplored)); 20 | unexplored = neigh( ~visited(neigh) ); 21 | color(unexplored) = ~current_color; 22 | visited( unexplored ) = 1; 23 | 24 | end 25 | 26 | % Sanity check 27 | comp = find(visited); 28 | assert( sum(visited) == length(comp) ); % every vertex colored 29 | [ix, jx] = find(W(comp, comp)); 30 | % assert( sum(color(comp(ix)) == color(comp(jx))) == 0 ); % and colors respect given edges 31 | 32 | % display(sum(color(comp(ix)) == color(comp(jx))) == 0 ); % and colors respect given edges 33 | end 34 | -------------------------------------------------------------------------------- /algorithms/cosc_v1_1/two_moons.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cosc_v1_1/two_moons.mat -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/averagekmin_dm.m: -------------------------------------------------------------------------------- 1 | function val = averagekmin_dm(dm,k) 2 | 3 | % val = averagekmin_dm(dm,k) 4 | % compute the averaged distance of each point to its k-th nearest neigbor. 5 | % dm - distance matrix of a data set 6 | 7 | dm = sort(dm); 8 | 9 | val = mean(dm(k+1,:)); -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/coquad.m: -------------------------------------------------------------------------------- 1 | function [B, b] = coquad(Q,M,C) 2 | 3 | % Formulate eq.(18) in paper 4 | % Li,et al., CVPR 2009, "Constrained Clustering via Spectral Regularization" 5 | % Please report problems to Zhenguo Li at zgli@ee.columbia.edu 6 | 7 | [Npts m] = size(Q); 8 | B = zeros(m^2); 9 | b = zeros(m^2,1); 10 | 11 | for i = 1:Npts 12 | U = Q(i,:)'*Q(i,:); 13 | s = U(:); 14 | B = B + s*s'; 15 | b = b + s; 16 | end 17 | 18 | for k = 1 : size(M,1) 19 | i = M(k,1); 20 | j = M(k,2); 21 | U = Q(j,:)'*Q(i,:); 22 | s = U(:); 23 | B = B + s*s'; 24 | b = b + s; 25 | end 26 | 27 | for k = 1 : size(C,1) 28 | i = C(k,1); 29 | j = C(k,2); 30 | U = Q(j,:)'*Q(i,:); 31 | s = U(:); 32 | B = B + s*s'; 33 | end 34 | 35 | b = -2 * b; -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4.tgz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4.tgz -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/AUTHORS: -------------------------------------------------------------------------------- 1 | Main author(s): 2 | Dr. Brian Borchers main contributor 3 | 4 | Other contributors: 5 | Joseph Young Original parallel version of 6 | op_o. 7 | Aaron Wilson modified documentation/install 8 | procedure for COIN-OR 9 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/README: -------------------------------------------------------------------------------- 1 | Copyright 1997-2010, Brian Borchers. This copy of CSDP is made 2 | available under the Common Public License. See LICENSE for the 3 | details of the CPL. 4 | 5 | CSDP is a software package for solving semidefinite programming 6 | problems. The algorithm is a predictor-corrector version of the 7 | primal-dual barrier method of Helmberg, Rendl, Vanderbei, and 8 | Wolkowicz. 9 | 10 | This file includes binary code for SDP for Windows. 11 | 12 | doc documentation. 13 | 14 | matlab MATLAB/Octave routines for interfacing to CSDP. 15 | 16 | bin The binary code. 17 | 18 | Contact/Support: 19 | 20 | If you are having trouble running the code, see the doc directory 21 | first. The project's website can be found at 22 | 23 | https://projects.coin-or.org/Csdp/ 24 | 25 | The project's maintainer can be reached by email at borchers@nmt.edu. Please 26 | email bug reports and feature requests. 27 | 28 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/atlas-license.txt: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * Automatically Tuned Linear Algebra Software v3.6.0 4 | * (C) Copyright 1998 R. Clint Whaley 5 | * 6 | * Redistribution and use in source and binary forms, with or without 7 | * modification, are permitted provided that the following conditions 8 | * are met: 9 | * 1. Redistributions of source code must retain the above copyright 10 | * notice, this list of conditions and the following disclaimer. 11 | * 2. Redistributions in binary form must reproduce the above copyright 12 | * notice, this list of conditions, and the following disclaimer in the 13 | * documentation and/or other materials provided with the distribution. 14 | * 3. The name of the ATLAS group or the names of its contributers may 15 | * not be used to endorse or promote products derived from this 16 | * software without specific written permission. 17 | * 18 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE ATLAS GROUP OR ITS CONTRIBUTORS 22 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | * POSSIBILITY OF SUCH DAMAGE. 29 | * 30 | */ 31 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/complement.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/complement.exe -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/csdp.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/csdp.exe -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/graphtoprob.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/graphtoprob.exe -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/libsdp.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/libsdp.a -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/rand_graph.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/rand_graph.exe -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/theta.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/bin/theta.exe -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/all-wcprops: -------------------------------------------------------------------------------- 1 | K 25 2 | svn:wc:ra_dav:version-url 3 | V 40 4 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc 5 | END 6 | constraints.pdf 7 | K 25 8 | svn:wc:ra_dav:version-url 9 | V 56 10 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/constraints.pdf 11 | END 12 | sdp.bib 13 | K 25 14 | svn:wc:ra_dav:version-url 15 | V 48 16 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/sdp.bib 17 | END 18 | a1block1.eps 19 | K 25 20 | svn:wc:ra_dav:version-url 21 | V 53 22 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/a1block1.eps 23 | END 24 | csdpuser.aux 25 | K 25 26 | svn:wc:ra_dav:version-url 27 | V 53 28 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.aux 29 | END 30 | csdpuser.bbl 31 | K 25 32 | svn:wc:ra_dav:version-url 33 | V 53 34 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.bbl 35 | END 36 | csdpuser.tex 37 | K 25 38 | svn:wc:ra_dav:version-url 39 | V 53 40 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.tex 41 | END 42 | cmat.fig 43 | K 25 44 | svn:wc:ra_dav:version-url 45 | V 49 46 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/cmat.fig 47 | END 48 | csdpuser.blg 49 | K 25 50 | svn:wc:ra_dav:version-url 51 | V 53 52 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.blg 53 | END 54 | cmat.pdf 55 | K 25 56 | svn:wc:ra_dav:version-url 57 | V 49 58 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/cmat.pdf 59 | END 60 | constraints.eps 61 | K 25 62 | svn:wc:ra_dav:version-url 63 | V 56 64 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/constraints.eps 65 | END 66 | README 67 | K 25 68 | svn:wc:ra_dav:version-url 69 | V 47 70 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/README 71 | END 72 | csdpuser.pdf 73 | K 25 74 | svn:wc:ra_dav:version-url 75 | V 53 76 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.pdf 77 | END 78 | a1block1.fig 79 | K 25 80 | svn:wc:ra_dav:version-url 81 | V 53 82 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/a1block1.fig 83 | END 84 | a1block1.pdf 85 | K 25 86 | svn:wc:ra_dav:version-url 87 | V 53 88 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/a1block1.pdf 89 | END 90 | csdpuser.log 91 | K 25 92 | svn:wc:ra_dav:version-url 93 | V 53 94 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/csdpuser.log 95 | END 96 | cmat.eps 97 | K 25 98 | svn:wc:ra_dav:version-url 99 | V 49 100 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/cmat.eps 101 | END 102 | constraints.fig 103 | K 25 104 | svn:wc:ra_dav:version-url 105 | V 56 106 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/constraints.fig 107 | END 108 | example.c 109 | K 25 110 | svn:wc:ra_dav:version-url 111 | V 50 112 | /svn/Csdp/!svn/ver/43/releases/6.1.0/doc/example.c 113 | END 114 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/format: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/prop-base/a1block1.pdf.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/prop-base/cmat.pdf.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/prop-base/constraints.pdf.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/prop-base/csdpuser.pdf.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/README.svn-base: -------------------------------------------------------------------------------- 1 | This directory contains the PDF output and LaTeX source for the CSDP 2 | User's Guide, together with some figures used in the guide. There 3 | should be no need to run pdflatex on the source since the .pdf file is 4 | already available. 5 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/a1block1.fig.svn-base: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5-alpha5 2 | Landscape 3 | Center 4 | Metric 5 | A4 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 810 45 45 2250 810 2295 810 11 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 1485 45 45 2250 1485 2295 1485 12 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 2160 45 45 2250 2160 2295 2160 13 | 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 14 | 1800 465 2700 465 2700 4500 1800 4500 1800 465 15 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 16 | 1785 1140 2685 1140 17 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 18 | 1785 1815 2685 1815 19 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 20 | 1800 2490 2700 2490 21 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 22 | 1785 3165 2685 3165 23 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 24 | 1785 3825 2685 3825 25 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 26 | 1 1 1.00 60.00 120.00 27 | 2250 2160 3150 2160 28 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 29 | 3150 2610 3825 2610 30 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 31 | 3150 3150 3825 3150 32 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 33 | 3150 3690 3825 3690 34 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 35 | 3150 2070 3825 2070 3825 4275 3150 4275 3150 2070 36 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 37 | 4275 1350 4950 1350 4950 3555 4275 3555 4275 1350 38 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 39 | 5490 450 6165 450 6165 2655 5490 2655 5490 450 40 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 41 | 4275 1935 4950 1935 42 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 43 | 5490 2115 6165 2115 44 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 45 | 5490 1530 6165 1530 46 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 47 | 5490 945 6165 945 48 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 49 | 4275 2475 4950 2475 50 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 51 | 4275 3015 4950 3015 52 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 53 | 1 1 1.00 60.00 120.00 54 | 2250 1485 4275 1485 55 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 56 | 1 1 1.00 60.00 120.00 57 | 2250 810 5490 810 58 | 4 0 0 50 -1 1 18 0.0000 0 195 855 675 900 iindices\001 59 | 4 0 0 50 -1 1 18 0.0000 0 255 855 675 1575 jindices\001 60 | 4 0 0 50 -1 1 18 0.0000 0 195 1680 0 4275 constraintnum\001 61 | 4 0 0 50 -1 1 18 0.0000 0 195 1125 450 2925 blocknum\001 62 | 4 0 0 50 -1 1 18 0.0000 0 195 1020 450 3600 blocksize\001 63 | 4 0 0 50 -1 1 18 0.0000 0 195 765 675 2250 entries\001 64 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 2925 1\001 65 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 3600 2\001 66 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 4275 1\001 67 | 4 0 0 50 -1 1 18 0.0000 0 30 180 3330 2475 --\001 68 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 3060 3\001 69 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 3555 1\001 70 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 4050 3\001 71 | 4 0 0 50 -1 1 18 0.0000 0 30 180 4545 1710 --\001 72 | 4 0 0 50 -1 1 18 0.0000 0 30 180 5760 765 --\001 73 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 2295 1\001 74 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 2835 2\001 75 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 3375 2\001 76 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 1350 1\001 77 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 1935 1\001 78 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 2475 2\001 79 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/a1block1.pdf.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/a1block1.pdf.svn-base -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/cmat.pdf.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/cmat.pdf.svn-base -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/constraints.fig.svn-base: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5-alpha5 2 | Landscape 3 | Center 4 | Metric 5 | A4 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 1710 2430 90 90 1710 2430 1800 2430 11 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 1710 1710 90 90 1710 1710 1800 1710 12 | 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 13 | 1350 675 1350 2700 2025 2700 2025 675 1350 675 14 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 15 | 1350 1350 2025 1350 16 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 17 | 1350 2025 2025 2025 18 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 19 | 3060 1440 4140 1440 4140 1980 3060 1980 3060 1440 20 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 21 | 3060 2160 4140 2160 4140 2700 3060 2700 3060 2160 22 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 23 | 4860 2160 5940 2160 5940 2700 4860 2700 4860 2160 24 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 25 | 4860 1440 5940 1440 5940 1980 4860 1980 4860 1440 26 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 27 | 1 1 1.00 60.00 120.00 28 | 1710 1710 3060 1710 29 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 30 | 1 1 1.00 60.00 120.00 31 | 4140 1710 4860 1710 32 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 33 | 1 1 1.00 60.00 120.00 34 | 1710 2430 3060 2430 35 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 36 | 1 1 1.00 60.00 120.00 37 | 4140 2430 4860 2430 38 | 4 0 0 50 -1 1 18 0.0000 0 195 870 3150 2475 Block 2\001 39 | 4 0 0 50 -1 1 18 0.0000 0 195 870 4950 2475 Block 3\001 40 | 4 0 0 50 -1 1 18 0.0000 0 195 870 3150 1800 Block 1\001 41 | 4 0 0 50 -1 1 18 0.0000 0 195 870 4950 1800 Block 3\001 42 | 4 0 0 50 -1 1 18 0.0000 0 30 180 1620 1170 --\001 43 | 4 0 0 50 -1 1 18 0.0000 0 195 345 900 1800 A1\001 44 | 4 0 0 50 -1 1 18 0.0000 0 195 345 900 2520 A2\001 45 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/constraints.pdf.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/constraints.pdf.svn-base -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/csdpuser.aux.svn-base: -------------------------------------------------------------------------------- 1 | \relax 2 | \bibstyle{plain} 3 | \citation{HelmbergC:Anims} 4 | \citation{BorchersB:CSDCls} 5 | \citation{SDPA} 6 | \citation{SturmJF:UsiS1M} 7 | \citation{MittelmannHD:AnibS} 8 | \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The C matrix.}}{11}} 9 | \newlabel{cmat}{{1}{11}} 10 | \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The constraints.}}{11}} 11 | \newlabel{constraints}{{2}{11}} 12 | \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Block 1 of $A_{1}$.}}{12}} 13 | \newlabel{a1block1}{{3}{12}} 14 | \bibdata{sdp} 15 | \bibcite{BorchersB:CSDCls}{1} 16 | \bibcite{SDPA}{2} 17 | \bibcite{HelmbergC:Anims}{3} 18 | \bibcite{MittelmannHD:AnibS}{4} 19 | \bibcite{SturmJF:UsiS1M}{5} 20 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/csdpuser.bbl.svn-base: -------------------------------------------------------------------------------- 1 | \begin{thebibliography}{1} 2 | 3 | \bibitem{BorchersB:CSDCls} 4 | B.~Borchers. 5 | \newblock {CSDP}, a {C} library for semidefinite programming. 6 | \newblock {\em Optimization Methods \& Software}, 11-2(1-4):613 -- 623, 1999. 7 | 8 | \bibitem{SDPA} 9 | K.~Fujisawa, M.~Kojima, K.~Nakata, and M.~Yamashita. 10 | \newblock {SDPA} (semidefinite programming algorithm) users manual - version 11 | 6.00. 12 | \newblock Technical Report B--308, Tokyo Institute of Technology, 1995. 13 | 14 | \bibitem{HelmbergC:Anims} 15 | C.~Helmberg, F.~Rendl, R.~J. Vanderbei, and H.~Wolkowicz. 16 | \newblock An interior-point method for semidefinite programming. 17 | \newblock {\em {SIAM} Journal on Optimization}, 6(2):342 -- 361, May 1996. 18 | 19 | \bibitem{MittelmannHD:AnibS} 20 | H.~D. Mittelmann. 21 | \newblock An independent benchmarking of {SDP} and {SOCP} solvers. 22 | \newblock {\em Mathematical Programming}, 95(2):407 -- 430, February 2003. 23 | 24 | \bibitem{SturmJF:UsiS1M} 25 | J.~F. Sturm. 26 | \newblock Using {S}e{D}u{M}i 1.02, a {MATLAB} toolbox for optimization over 27 | symmetric cones. 28 | \newblock {\em Optimization Methods \& Software}, 11-2(1-4):625 -- 653, 1999. 29 | 30 | \end{thebibliography} 31 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/csdpuser.blg.svn-base: -------------------------------------------------------------------------------- 1 | This is BibTeX, Version 0.99c (Web2C 7.4.5) 2 | The top-level auxiliary file: csdpuser.aux 3 | The style file: plain.bst 4 | Database file #1: sdp.bib 5 | You've used 5 entries, 6 | 2118 wiz_defined-function locations, 7 | 534 strings with 4778 characters, 8 | and the built_in function-call counts, 1686 in all, are: 9 | = -- 171 10 | > -- 67 11 | < -- 1 12 | + -- 29 13 | - -- 22 14 | * -- 137 15 | := -- 289 16 | add.period$ -- 15 17 | call.type$ -- 5 18 | change.case$ -- 26 19 | chr.to.int$ -- 0 20 | cite$ -- 5 21 | duplicate$ -- 53 22 | empty$ -- 132 23 | format.name$ -- 22 24 | if$ -- 335 25 | int.to.chr$ -- 0 26 | int.to.str$ -- 5 27 | missing$ -- 4 28 | newline$ -- 28 29 | num.names$ -- 10 30 | pop$ -- 17 31 | preamble$ -- 1 32 | purify$ -- 21 33 | quote$ -- 0 34 | skip$ -- 33 35 | stack$ -- 0 36 | substring$ -- 151 37 | swap$ -- 5 38 | text.length$ -- 1 39 | text.prefix$ -- 0 40 | top$ -- 0 41 | type$ -- 20 42 | warning$ -- 0 43 | while$ -- 18 44 | width$ -- 6 45 | write$ -- 57 46 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/csdpuser.pdf.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/csdpuser.pdf.svn-base -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/.svn/text-base/sdp.bib.svn-base: -------------------------------------------------------------------------------- 1 | @article{BorchersB:CSDCls, 2 | title = {{CSDP}, a {C} library for semidefinite programming}, 3 | author = {B. Borchers}, 4 | journal = {Optimization Methods \& Software}, 5 | volume = {11-2}, 6 | number = {1-4}, 7 | pages = {613 -- 623}, 8 | year = {1999}, 9 | } 10 | 11 | @article{HelmbergC:Anims, 12 | title = "An interior-point method for semidefinite programming", 13 | author = "C. Helmberg and F. Rendl and R. J. Vanderbei and H. Wolkowicz", 14 | journal = "{SIAM} Journal on Optimization", 15 | volume = "6", 16 | number = "2", 17 | pages = "342 -- 361", 18 | month = "May", 19 | year = "1996", 20 | } 21 | 22 | @article{SturmJF:UsiS1M, 23 | title = "Using {S}e{D}u{M}i 1.02, a {MATLAB} toolbox for optimization over symmetric 24 | cones", 25 | author = {J. F. Sturm}, 26 | journal = {Optimization Methods \& Software}, 27 | volume = {11-2}, 28 | number = {1-4}, 29 | pages = {625 -- 653}, 30 | year = {1999}, 31 | } 32 | 33 | @article{MittelmannHD:AnibS, 34 | title = "An independent benchmarking of {SDP} and {SOCP} solvers", 35 | author = "H. D. Mittelmann", 36 | journal = {Mathematical Programming}, 37 | volume = {95}, 38 | number = {2}, 39 | pages = {407 -- 430}, 40 | month = {February}, 41 | year = {2003}, 42 | } 43 | 44 | @techreport{SDPA, 45 | author="K. Fujisawa and M. Kojima and K. Nakata and M. Yamashita", 46 | title="{SDPA} (semidefinite programming algorithm) users manual - version 6.00", 47 | number="B--308", 48 | institution="Tokyo Institute of Technology", 49 | year="1995", 50 | notes="Revised July 2002"} 51 | 52 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/README: -------------------------------------------------------------------------------- 1 | This directory contains the PDF output and LaTeX source for the CSDP 2 | User's Guide, together with some figures used in the guide. There 3 | should be no need to run pdflatex on the source since the .pdf file is 4 | already available. 5 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/a1block1.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5-alpha5 2 | Landscape 3 | Center 4 | Metric 5 | A4 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 810 45 45 2250 810 2295 810 11 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 1485 45 45 2250 1485 2295 1485 12 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 2250 2160 45 45 2250 2160 2295 2160 13 | 2 2 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 14 | 1800 465 2700 465 2700 4500 1800 4500 1800 465 15 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 16 | 1785 1140 2685 1140 17 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 18 | 1785 1815 2685 1815 19 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 20 | 1800 2490 2700 2490 21 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 22 | 1785 3165 2685 3165 23 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 24 | 1785 3825 2685 3825 25 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 26 | 1 1 1.00 60.00 120.00 27 | 2250 2160 3150 2160 28 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 29 | 3150 2610 3825 2610 30 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 31 | 3150 3150 3825 3150 32 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 33 | 3150 3690 3825 3690 34 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 35 | 3150 2070 3825 2070 3825 4275 3150 4275 3150 2070 36 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 37 | 4275 1350 4950 1350 4950 3555 4275 3555 4275 1350 38 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 39 | 5490 450 6165 450 6165 2655 5490 2655 5490 450 40 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 41 | 4275 1935 4950 1935 42 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 43 | 5490 2115 6165 2115 44 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 45 | 5490 1530 6165 1530 46 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 47 | 5490 945 6165 945 48 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 49 | 4275 2475 4950 2475 50 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 51 | 4275 3015 4950 3015 52 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 53 | 1 1 1.00 60.00 120.00 54 | 2250 1485 4275 1485 55 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 56 | 1 1 1.00 60.00 120.00 57 | 2250 810 5490 810 58 | 4 0 0 50 -1 1 18 0.0000 0 195 855 675 900 iindices\001 59 | 4 0 0 50 -1 1 18 0.0000 0 255 855 675 1575 jindices\001 60 | 4 0 0 50 -1 1 18 0.0000 0 195 1680 0 4275 constraintnum\001 61 | 4 0 0 50 -1 1 18 0.0000 0 195 1125 450 2925 blocknum\001 62 | 4 0 0 50 -1 1 18 0.0000 0 195 1020 450 3600 blocksize\001 63 | 4 0 0 50 -1 1 18 0.0000 0 195 765 675 2250 entries\001 64 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 2925 1\001 65 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 3600 2\001 66 | 4 0 0 50 -1 1 18 0.0000 0 195 135 2160 4275 1\001 67 | 4 0 0 50 -1 1 18 0.0000 0 30 180 3330 2475 --\001 68 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 3060 3\001 69 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 3555 1\001 70 | 4 0 0 50 -1 1 18 0.0000 0 195 135 3330 4050 3\001 71 | 4 0 0 50 -1 1 18 0.0000 0 30 180 4545 1710 --\001 72 | 4 0 0 50 -1 1 18 0.0000 0 30 180 5760 765 --\001 73 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 2295 1\001 74 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 2835 2\001 75 | 4 0 0 50 -1 1 18 0.0000 0 195 135 4545 3375 2\001 76 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 1350 1\001 77 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 1935 1\001 78 | 4 0 0 50 -1 1 18 0.0000 0 195 135 5760 2475 2\001 79 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/a1block1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/a1block1.pdf -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/cmat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/cmat.pdf -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/constraints.fig: -------------------------------------------------------------------------------- 1 | #FIG 3.2 Produced by xfig version 3.2.5-alpha5 2 | Landscape 3 | Center 4 | Metric 5 | A4 6 | 100.00 7 | Single 8 | -2 9 | 1200 2 10 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 1710 2430 90 90 1710 2430 1800 2430 11 | 1 3 0 1 0 -1 50 -1 20 0.000 1 0.0000 1710 1710 90 90 1710 1710 1800 1710 12 | 2 1 0 2 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 13 | 1350 675 1350 2700 2025 2700 2025 675 1350 675 14 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 15 | 1350 1350 2025 1350 16 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 2 17 | 1350 2025 2025 2025 18 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 19 | 3060 1440 4140 1440 4140 1980 3060 1980 3060 1440 20 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 21 | 3060 2160 4140 2160 4140 2700 3060 2700 3060 2160 22 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 23 | 4860 2160 5940 2160 5940 2700 4860 2700 4860 2160 24 | 2 2 0 1 0 7 50 -1 -1 0.000 0 0 -1 0 0 5 25 | 4860 1440 5940 1440 5940 1980 4860 1980 4860 1440 26 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 27 | 1 1 1.00 60.00 120.00 28 | 1710 1710 3060 1710 29 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 30 | 1 1 1.00 60.00 120.00 31 | 4140 1710 4860 1710 32 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 33 | 1 1 1.00 60.00 120.00 34 | 1710 2430 3060 2430 35 | 2 1 0 1 0 7 50 -1 -1 0.000 0 0 -1 1 0 2 36 | 1 1 1.00 60.00 120.00 37 | 4140 2430 4860 2430 38 | 4 0 0 50 -1 1 18 0.0000 0 195 870 3150 2475 Block 2\001 39 | 4 0 0 50 -1 1 18 0.0000 0 195 870 4950 2475 Block 3\001 40 | 4 0 0 50 -1 1 18 0.0000 0 195 870 3150 1800 Block 1\001 41 | 4 0 0 50 -1 1 18 0.0000 0 195 870 4950 1800 Block 3\001 42 | 4 0 0 50 -1 1 18 0.0000 0 30 180 1620 1170 --\001 43 | 4 0 0 50 -1 1 18 0.0000 0 195 345 900 1800 A1\001 44 | 4 0 0 50 -1 1 18 0.0000 0 195 345 900 2520 A2\001 45 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/constraints.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/constraints.pdf -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/csdpuser.aux: -------------------------------------------------------------------------------- 1 | \relax 2 | \bibstyle{plain} 3 | \citation{HelmbergC:Anims} 4 | \citation{BorchersB:CSDCls} 5 | \citation{SDPA} 6 | \citation{SturmJF:UsiS1M} 7 | \citation{MittelmannHD:AnibS} 8 | \@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The C matrix.}}{11}} 9 | \newlabel{cmat}{{1}{11}} 10 | \@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The constraints.}}{11}} 11 | \newlabel{constraints}{{2}{11}} 12 | \@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Block 1 of $A_{1}$.}}{12}} 13 | \newlabel{a1block1}{{3}{12}} 14 | \bibdata{sdp} 15 | \bibcite{BorchersB:CSDCls}{1} 16 | \bibcite{SDPA}{2} 17 | \bibcite{HelmbergC:Anims}{3} 18 | \bibcite{MittelmannHD:AnibS}{4} 19 | \bibcite{SturmJF:UsiS1M}{5} 20 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/csdpuser.bbl: -------------------------------------------------------------------------------- 1 | \begin{thebibliography}{1} 2 | 3 | \bibitem{BorchersB:CSDCls} 4 | B.~Borchers. 5 | \newblock {CSDP}, a {C} library for semidefinite programming. 6 | \newblock {\em Optimization Methods \& Software}, 11-2(1-4):613 -- 623, 1999. 7 | 8 | \bibitem{SDPA} 9 | K.~Fujisawa, M.~Kojima, K.~Nakata, and M.~Yamashita. 10 | \newblock {SDPA} (semidefinite programming algorithm) users manual - version 11 | 6.00. 12 | \newblock Technical Report B--308, Tokyo Institute of Technology, 1995. 13 | 14 | \bibitem{HelmbergC:Anims} 15 | C.~Helmberg, F.~Rendl, R.~J. Vanderbei, and H.~Wolkowicz. 16 | \newblock An interior-point method for semidefinite programming. 17 | \newblock {\em {SIAM} Journal on Optimization}, 6(2):342 -- 361, May 1996. 18 | 19 | \bibitem{MittelmannHD:AnibS} 20 | H.~D. Mittelmann. 21 | \newblock An independent benchmarking of {SDP} and {SOCP} solvers. 22 | \newblock {\em Mathematical Programming}, 95(2):407 -- 430, February 2003. 23 | 24 | \bibitem{SturmJF:UsiS1M} 25 | J.~F. Sturm. 26 | \newblock Using {S}e{D}u{M}i 1.02, a {MATLAB} toolbox for optimization over 27 | symmetric cones. 28 | \newblock {\em Optimization Methods \& Software}, 11-2(1-4):625 -- 653, 1999. 29 | 30 | \end{thebibliography} 31 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/csdpuser.blg: -------------------------------------------------------------------------------- 1 | This is BibTeX, Version 0.99c (Web2C 7.4.5) 2 | The top-level auxiliary file: csdpuser.aux 3 | The style file: plain.bst 4 | Database file #1: sdp.bib 5 | You've used 5 entries, 6 | 2118 wiz_defined-function locations, 7 | 534 strings with 4778 characters, 8 | and the built_in function-call counts, 1686 in all, are: 9 | = -- 171 10 | > -- 67 11 | < -- 1 12 | + -- 29 13 | - -- 22 14 | * -- 137 15 | := -- 289 16 | add.period$ -- 15 17 | call.type$ -- 5 18 | change.case$ -- 26 19 | chr.to.int$ -- 0 20 | cite$ -- 5 21 | duplicate$ -- 53 22 | empty$ -- 132 23 | format.name$ -- 22 24 | if$ -- 335 25 | int.to.chr$ -- 0 26 | int.to.str$ -- 5 27 | missing$ -- 4 28 | newline$ -- 28 29 | num.names$ -- 10 30 | pop$ -- 17 31 | preamble$ -- 1 32 | purify$ -- 21 33 | quote$ -- 0 34 | skip$ -- 33 35 | stack$ -- 0 36 | substring$ -- 151 37 | swap$ -- 5 38 | text.length$ -- 1 39 | text.prefix$ -- 0 40 | top$ -- 0 41 | type$ -- 20 42 | warning$ -- 0 43 | while$ -- 18 44 | width$ -- 6 45 | write$ -- 57 46 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/csdpuser.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/csdpuser.pdf -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/doc/sdp.bib: -------------------------------------------------------------------------------- 1 | @article{BorchersB:CSDCls, 2 | title = {{CSDP}, a {C} library for semidefinite programming}, 3 | author = {B. Borchers}, 4 | journal = {Optimization Methods \& Software}, 5 | volume = {11-2}, 6 | number = {1-4}, 7 | pages = {613 -- 623}, 8 | year = {1999}, 9 | } 10 | 11 | @article{HelmbergC:Anims, 12 | title = "An interior-point method for semidefinite programming", 13 | author = "C. Helmberg and F. Rendl and R. J. Vanderbei and H. Wolkowicz", 14 | journal = "{SIAM} Journal on Optimization", 15 | volume = "6", 16 | number = "2", 17 | pages = "342 -- 361", 18 | month = "May", 19 | year = "1996", 20 | } 21 | 22 | @article{SturmJF:UsiS1M, 23 | title = "Using {S}e{D}u{M}i 1.02, a {MATLAB} toolbox for optimization over symmetric 24 | cones", 25 | author = {J. F. Sturm}, 26 | journal = {Optimization Methods \& Software}, 27 | volume = {11-2}, 28 | number = {1-4}, 29 | pages = {625 -- 653}, 30 | year = {1999}, 31 | } 32 | 33 | @article{MittelmannHD:AnibS, 34 | title = "An independent benchmarking of {SDP} and {SOCP} solvers", 35 | author = "H. D. Mittelmann", 36 | journal = {Mathematical Programming}, 37 | volume = {95}, 38 | number = {2}, 39 | pages = {407 -- 430}, 40 | month = {February}, 41 | year = {2003}, 42 | } 43 | 44 | @techreport{SDPA, 45 | author="K. Fujisawa and M. Kojima and K. Nakata and M. Yamashita", 46 | title="{SDPA} (semidefinite programming algorithm) users manual - version 6.00", 47 | number="B--308", 48 | institution="Tokyo Institute of Technology", 49 | year="1995", 50 | notes="Revised July 2002"} 51 | 52 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/all-wcprops: -------------------------------------------------------------------------------- 1 | K 25 2 | svn:wc:ra_dav:version-url 3 | V 43 4 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab 5 | END 6 | control1.correct 7 | K 25 8 | svn:wc:ra_dav:version-url 9 | V 60 10 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/control1.correct 11 | END 12 | control1.mat 13 | K 25 14 | svn:wc:ra_dav:version-url 15 | V 56 16 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/control1.mat 17 | END 18 | convertf.m 19 | K 25 20 | svn:wc:ra_dav:version-url 21 | V 54 22 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/convertf.m 23 | END 24 | writesol.m 25 | K 25 26 | svn:wc:ra_dav:version-url 27 | V 54 28 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/writesol.m 29 | END 30 | csdp.m 31 | K 25 32 | svn:wc:ra_dav:version-url 33 | V 50 34 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/csdp.m 35 | END 36 | readsol.m 37 | K 25 38 | svn:wc:ra_dav:version-url 39 | V 53 40 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/readsol.m 41 | END 42 | README 43 | K 25 44 | svn:wc:ra_dav:version-url 45 | V 50 46 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/README 47 | END 48 | writesdpa.m 49 | K 25 50 | svn:wc:ra_dav:version-url 51 | V 55 52 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/writesdpa.m 53 | END 54 | readsdpa.m 55 | K 25 56 | svn:wc:ra_dav:version-url 57 | V 54 58 | /svn/Csdp/!svn/ver/43/releases/6.1.0/matlab/readsdpa.m 59 | END 60 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/entries: -------------------------------------------------------------------------------- 1 | 9 2 | 3 | dir 4 | 43 5 | https://projects.coin-or.org/svn/Csdp/releases/6.1.0/matlab 6 | https://projects.coin-or.org/svn/Csdp 7 | 8 | 9 | 10 | 2009-12-30T04:58:31.861463Z 11 | 38 12 | borchers@nmt.edu 13 | 14 | 15 | svn:special svn:externals svn:needs-lock 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | a1393fef-6618-0410-a479-b13a6cd8ff1e 28 | 29 | control1.correct 30 | file 31 | 32 | 33 | 34 | 35 | 2010-01-07T18:29:05.000000Z 36 | d9385a3b7fc67f65d3962bbcb32bca8f 37 | 2006-07-12T18:52:29.916046Z 38 | 1 39 | coin 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 2528 62 | 63 | control1.mat 64 | file 65 | 66 | 67 | 68 | 69 | 2010-01-07T18:29:05.000000Z 70 | a63461780e3f9b8d23a75b5af1c9b041 71 | 2006-07-12T18:52:29.916046Z 72 | 1 73 | coin 74 | has-props 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 8480 96 | 97 | convertf.m 98 | file 99 | 100 | 101 | 102 | 103 | 2010-01-07T18:29:05.000000Z 104 | 0b4046d80e4b28167468bb63068caa95 105 | 2006-07-12T18:52:29.916046Z 106 | 1 107 | coin 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 1437 130 | 131 | writesol.m 132 | file 133 | 134 | 135 | 136 | 137 | 2010-01-07T18:29:05.000000Z 138 | 45c03d187e8ae5d6aa257199c7c80f75 139 | 2009-11-21T23:01:14.362159Z 140 | 32 141 | borchers@nmt.edu 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 3539 164 | 165 | csdp.m 166 | file 167 | 168 | 169 | 170 | 171 | 2010-01-07T18:29:05.000000Z 172 | 8b2567074ddd136e22e0933ade5210d8 173 | 2009-12-30T04:58:31.861463Z 174 | 38 175 | borchers@nmt.edu 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 5693 198 | 199 | readsol.m 200 | file 201 | 202 | 203 | 204 | 205 | 2010-01-07T18:29:05.000000Z 206 | 47653eea25fcde6b27811c335d3a899b 207 | 2008-07-23T04:13:11.221155Z 208 | 24 209 | borchers@nmt.edu 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 3712 232 | 233 | README 234 | file 235 | 236 | 237 | 238 | 239 | 2010-01-07T18:29:05.000000Z 240 | d67d5637947316e243ed7cbfbb608499 241 | 2006-07-12T18:52:29.916046Z 242 | 1 243 | coin 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 1320 266 | 267 | writesdpa.m 268 | file 269 | 270 | 271 | 272 | 273 | 2010-01-07T18:29:05.000000Z 274 | 3fc52dd04326a97ae0a6c3355634a6ff 275 | 2006-07-12T18:52:29.916046Z 276 | 1 277 | coin 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 7470 300 | 301 | readsdpa.m 302 | file 303 | 304 | 305 | 306 | 307 | 2010-01-07T18:29:05.000000Z 308 | 1801da4521b127a71051894d685de254 309 | 2007-07-20T21:49:38.687205Z 310 | 21 311 | borchers@nmt.edu 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 3062 334 | 335 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/format: -------------------------------------------------------------------------------- 1 | 9 2 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/prop-base/control1.mat.svn-base: -------------------------------------------------------------------------------- 1 | K 13 2 | svn:mime-type 3 | V 24 4 | application/octet-stream 5 | END 6 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/text-base/README.svn-base: -------------------------------------------------------------------------------- 1 | This directory contains the MATLAB interface to CSDP. There are three 2 | MATLAB functions: 3 | 4 | csdp Solve a problem in SeDuMi format. 5 | writesdpa Takes a problem in SeDuMi format and outputs it 6 | to a file in SDPA sparse format. 7 | readsol Reads a CSDP solution into the workspace in SeDuMi form. 8 | convertf Converts free variables in a SeDuMi problem into the 9 | differences of nonnegative variables, so that the problem 10 | can be solved by CSDP. 11 | 12 | Note that these .m files must be in your MATLAB search path, and that 13 | the csdp executable must be in your shell's search path for this 14 | interface to work. 15 | 16 | To add the .m files to the MATLAB path, see the path function in MATLAB. 17 | It can be used to show the current path and add new directories to the 18 | current path. 19 | 20 | Once you've installed CSDP and the MATLAB interface routines, you can test 21 | them with 22 | 23 | >> load control1.mat 24 | >> whos 25 | >> pars.objtol=1.0e-9; 26 | >> [x,y,z,info]=csdp(At,b,c,K,pars); 27 | >> info 28 | 29 | The file control1.correct shows correct output from these commands. Your 30 | results should be similar, although there are likely to be slight differences 31 | in the actual values. 32 | 33 | For help with using the routines, see 34 | 35 | >> help csdp 36 | >> help writesdpa 37 | >> help readsol 38 | >> help convertf 39 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/text-base/control1.correct.svn-base: -------------------------------------------------------------------------------- 1 | >> load control1.mat 2 | >> whos 3 | Name Size Bytes Class 4 | 5 | At 125x21 8488 double array (sparse) 6 | K 1x1 140 struct array 7 | ans 2x1 16 double array 8 | b 21x1 168 double array 9 | c 125x1 80 double array (sparse) 10 | 11 | Grand total is 732 elements using 8892 bytes 12 | 13 | >> pars.objtol=1.0e-9; 14 | >> [x,y,z,info]=csdp(At,b,c,K,pars); 15 | Transposing A to match b 16 | Number of constraints: 21 17 | Number of SDP blocks: 2 18 | Number of LP vars: 0 19 | Iter: 0 Ap: 0.00e+00 Pobj: 3.6037961e+02 Ad: 0.00e+00 Dobj: 0.0000000e+00 20 | Iter: 1 Ap: 9.56e-01 Pobj: 3.7527534e+02 Ad: 9.60e-01 Dobj: 6.4836002e+04 21 | Iter: 2 Ap: 8.55e-01 Pobj: 4.0344779e+02 Ad: 9.67e-01 Dobj: 6.9001508e+04 22 | Iter: 3 Ap: 8.77e-01 Pobj: 1.4924982e+02 Ad: 1.00e+00 Dobj: 6.0425319e+04 23 | Iter: 4 Ap: 7.14e-01 Pobj: 8.2819409e+01 Ad: 1.00e+00 Dobj: 1.2926534e+04 24 | Iter: 5 Ap: 8.23e-01 Pobj: 4.7411689e+01 Ad: 1.00e+00 Dobj: 4.9040115e+03 25 | Iter: 6 Ap: 7.97e-01 Pobj: 2.6300213e+01 Ad: 1.00e+00 Dobj: 1.4672743e+03 26 | Iter: 7 Ap: 7.12e-01 Pobj: 1.5215577e+01 Ad: 1.00e+00 Dobj: 4.0561826e+02 27 | Iter: 8 Ap: 8.73e-01 Pobj: 7.5119220e+00 Ad: 1.00e+00 Dobj: 1.7418715e+02 28 | Iter: 9 Ap: 9.87e-01 Pobj: 5.3076526e+00 Ad: 1.00e+00 Dobj: 5.2097318e+01 29 | Iter: 10 Ap: 1.00e+00 Pobj: 7.8594697e+00 Ad: 1.00e+00 Dobj: 2.2172447e+01 30 | Iter: 11 Ap: 7.62e-01 Pobj: 1.5871010e+01 Ad: 1.00e+00 Dobj: 1.9629658e+01 31 | Iter: 12 Ap: 9.21e-01 Pobj: 1.7549388e+01 Ad: 9.68e-01 Dobj: 1.7931413e+01 32 | Iter: 13 Ap: 9.70e-01 Pobj: 1.7769861e+01 Ad: 9.72e-01 Dobj: 1.7792992e+01 33 | Iter: 14 Ap: 8.87e-01 Pobj: 1.7782917e+01 Ad: 9.70e-01 Dobj: 1.7785344e+01 34 | Iter: 15 Ap: 9.27e-01 Pobj: 1.7784457e+01 Ad: 9.85e-01 Dobj: 1.7784731e+01 35 | Iter: 16 Ap: 9.35e-01 Pobj: 1.7784609e+01 Ad: 9.35e-01 Dobj: 1.7784640e+01 36 | Iter: 17 Ap: 1.00e+00 Pobj: 1.7784624e+01 Ad: 1.00e+00 Dobj: 1.7784628e+01 37 | Iter: 18 Ap: 1.00e+00 Pobj: 1.7784627e+01 Ad: 1.00e+00 Dobj: 1.7784627e+01 38 | Iter: 19 Ap: 9.54e-01 Pobj: 1.7784627e+01 Ad: 9.59e-01 Dobj: 1.7784627e+01 39 | Success: SDP solved 40 | Primal objective value: 1.7784627e+01 41 | Dual objective value: 1.7784627e+01 42 | Relative primal infeasibility: 1.34e-09 43 | Relative dual infeasibility: 1.53e-11 44 | Real Relative Gap: 8.76e-10 45 | XZ Relative Gap: 2.16e-10 46 | DIMACS error measures: 1.34e-09 0.00e+00 2.47e-11 0.00e+00 8.76e-10 2.16e-10 47 | 0.020u 0.000s 0:00.07 28.5% 0+0k 0+0io 218pf+0w 48 | >> info 49 | 50 | info = 51 | 52 | 0 53 | 54 | >> quit 55 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/text-base/control1.mat.svn-base: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/text-base/control1.mat.svn-base -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/.svn/text-base/convertf.m.svn-base: -------------------------------------------------------------------------------- 1 | % 2 | % [A,b,c,K]=convertf(A,b,c,K) 3 | % 4 | % converts free variables in a SeDuMi problem into nonnegative LP variables. 5 | % 6 | function [A,b,c,K]=convertf(A,b,c,K) 7 | % 8 | % Get the number of constraints. 9 | % 10 | m=length(b); 11 | % 12 | % Deal with the following special case. If A is transposed, transpose 13 | % it again so that it is of the right size. 14 | % 15 | [Am,An]=size(A); 16 | if (Am ~= m) 17 | if (An == m) 18 | fprintf('Transposing A to match b \n'); 19 | A=A'; 20 | else 21 | fprintf('A is not of the correct size to match b \n'); 22 | return 23 | end 24 | end 25 | % 26 | % Deal with the following special case: if c==0, then c should really 27 | % be a zero vector of the appropriate size. 28 | % 29 | if (c == 0) 30 | fprintf('Expanding c to the appropriate size\n'); 31 | [Am,An]=size(A); 32 | c=zeros(An,1); 33 | end 34 | % 35 | % If c is empty, then act as if it was zero. 36 | % 37 | if (isempty(c)) 38 | fprintf('Expanding empty c to zeros of the appropriate size\n'); 39 | [Am,An]=size(A); 40 | c=zeros(An,1); 41 | end 42 | % 43 | % If c is a row vector, make it a column vector. 44 | % 45 | [cm,cn]=size(c); 46 | if (cn > cm) 47 | c=c'; 48 | end 49 | % 50 | % Check for any free LP variables and rewrite them as the differences of 51 | % regular LP variables. 52 | % 53 | if (isfield(K,'f')) 54 | nfree=K.f 55 | fprintf('Converting %d free variables to LP variables\n',nfree); 56 | if (isfield(K,'l')) 57 | nlin=K.l; 58 | else 59 | nlin=0; 60 | end 61 | [Am,An]=size(A); 62 | Anew=[A(:,1:nfree) -A(:,1:nfree) A(:,nfree+1:An)]; 63 | A=Anew; 64 | cnew=[c(1:nfree); -c(1:nfree); c(nfree+1:An)]; 65 | c=cnew; 66 | 67 | K.l=nlin+2*nfree; 68 | K.f=0; 69 | end 70 | 71 | 72 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/README: -------------------------------------------------------------------------------- 1 | This directory contains the MATLAB interface to CSDP. There are three 2 | MATLAB functions: 3 | 4 | csdp Solve a problem in SeDuMi format. 5 | writesdpa Takes a problem in SeDuMi format and outputs it 6 | to a file in SDPA sparse format. 7 | readsol Reads a CSDP solution into the workspace in SeDuMi form. 8 | convertf Converts free variables in a SeDuMi problem into the 9 | differences of nonnegative variables, so that the problem 10 | can be solved by CSDP. 11 | 12 | Note that these .m files must be in your MATLAB search path, and that 13 | the csdp executable must be in your shell's search path for this 14 | interface to work. 15 | 16 | To add the .m files to the MATLAB path, see the path function in MATLAB. 17 | It can be used to show the current path and add new directories to the 18 | current path. 19 | 20 | Once you've installed CSDP and the MATLAB interface routines, you can test 21 | them with 22 | 23 | >> load control1.mat 24 | >> whos 25 | >> pars.objtol=1.0e-9; 26 | >> [x,y,z,info]=csdp(At,b,c,K,pars); 27 | >> info 28 | 29 | The file control1.correct shows correct output from these commands. Your 30 | results should be similar, although there are likely to be slight differences 31 | in the actual values. 32 | 33 | For help with using the routines, see 34 | 35 | >> help csdp 36 | >> help writesdpa 37 | >> help readsol 38 | >> help convertf 39 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/control1.correct: -------------------------------------------------------------------------------- 1 | >> load control1.mat 2 | >> whos 3 | Name Size Bytes Class 4 | 5 | At 125x21 8488 double array (sparse) 6 | K 1x1 140 struct array 7 | ans 2x1 16 double array 8 | b 21x1 168 double array 9 | c 125x1 80 double array (sparse) 10 | 11 | Grand total is 732 elements using 8892 bytes 12 | 13 | >> pars.objtol=1.0e-9; 14 | >> [x,y,z,info]=csdp(At,b,c,K,pars); 15 | Transposing A to match b 16 | Number of constraints: 21 17 | Number of SDP blocks: 2 18 | Number of LP vars: 0 19 | Iter: 0 Ap: 0.00e+00 Pobj: 3.6037961e+02 Ad: 0.00e+00 Dobj: 0.0000000e+00 20 | Iter: 1 Ap: 9.56e-01 Pobj: 3.7527534e+02 Ad: 9.60e-01 Dobj: 6.4836002e+04 21 | Iter: 2 Ap: 8.55e-01 Pobj: 4.0344779e+02 Ad: 9.67e-01 Dobj: 6.9001508e+04 22 | Iter: 3 Ap: 8.77e-01 Pobj: 1.4924982e+02 Ad: 1.00e+00 Dobj: 6.0425319e+04 23 | Iter: 4 Ap: 7.14e-01 Pobj: 8.2819409e+01 Ad: 1.00e+00 Dobj: 1.2926534e+04 24 | Iter: 5 Ap: 8.23e-01 Pobj: 4.7411689e+01 Ad: 1.00e+00 Dobj: 4.9040115e+03 25 | Iter: 6 Ap: 7.97e-01 Pobj: 2.6300213e+01 Ad: 1.00e+00 Dobj: 1.4672743e+03 26 | Iter: 7 Ap: 7.12e-01 Pobj: 1.5215577e+01 Ad: 1.00e+00 Dobj: 4.0561826e+02 27 | Iter: 8 Ap: 8.73e-01 Pobj: 7.5119220e+00 Ad: 1.00e+00 Dobj: 1.7418715e+02 28 | Iter: 9 Ap: 9.87e-01 Pobj: 5.3076526e+00 Ad: 1.00e+00 Dobj: 5.2097318e+01 29 | Iter: 10 Ap: 1.00e+00 Pobj: 7.8594697e+00 Ad: 1.00e+00 Dobj: 2.2172447e+01 30 | Iter: 11 Ap: 7.62e-01 Pobj: 1.5871010e+01 Ad: 1.00e+00 Dobj: 1.9629658e+01 31 | Iter: 12 Ap: 9.21e-01 Pobj: 1.7549388e+01 Ad: 9.68e-01 Dobj: 1.7931413e+01 32 | Iter: 13 Ap: 9.70e-01 Pobj: 1.7769861e+01 Ad: 9.72e-01 Dobj: 1.7792992e+01 33 | Iter: 14 Ap: 8.87e-01 Pobj: 1.7782917e+01 Ad: 9.70e-01 Dobj: 1.7785344e+01 34 | Iter: 15 Ap: 9.27e-01 Pobj: 1.7784457e+01 Ad: 9.85e-01 Dobj: 1.7784731e+01 35 | Iter: 16 Ap: 9.35e-01 Pobj: 1.7784609e+01 Ad: 9.35e-01 Dobj: 1.7784640e+01 36 | Iter: 17 Ap: 1.00e+00 Pobj: 1.7784624e+01 Ad: 1.00e+00 Dobj: 1.7784628e+01 37 | Iter: 18 Ap: 1.00e+00 Pobj: 1.7784627e+01 Ad: 1.00e+00 Dobj: 1.7784627e+01 38 | Iter: 19 Ap: 9.54e-01 Pobj: 1.7784627e+01 Ad: 9.59e-01 Dobj: 1.7784627e+01 39 | Success: SDP solved 40 | Primal objective value: 1.7784627e+01 41 | Dual objective value: 1.7784627e+01 42 | Relative primal infeasibility: 1.34e-09 43 | Relative dual infeasibility: 1.53e-11 44 | Real Relative Gap: 8.76e-10 45 | XZ Relative Gap: 2.16e-10 46 | DIMACS error measures: 1.34e-09 0.00e+00 2.47e-11 0.00e+00 8.76e-10 2.16e-10 47 | 0.020u 0.000s 0:00.07 28.5% 0+0k 0+0io 218pf+0w 48 | >> info 49 | 50 | info = 51 | 52 | 0 53 | 54 | >> quit 55 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/control1.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/control1.mat -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab/convertf.m: -------------------------------------------------------------------------------- 1 | % 2 | % [A,b,c,K]=convertf(A,b,c,K) 3 | % 4 | % converts free variables in a SeDuMi problem into nonnegative LP variables. 5 | % 6 | function [A,b,c,K]=convertf(A,b,c,K) 7 | % 8 | % Get the number of constraints. 9 | % 10 | m=length(b); 11 | % 12 | % Deal with the following special case. If A is transposed, transpose 13 | % it again so that it is of the right size. 14 | % 15 | [Am,An]=size(A); 16 | if (Am ~= m) 17 | if (An == m) 18 | fprintf('Transposing A to match b \n'); 19 | A=A'; 20 | else 21 | fprintf('A is not of the correct size to match b \n'); 22 | return 23 | end 24 | end 25 | % 26 | % Deal with the following special case: if c==0, then c should really 27 | % be a zero vector of the appropriate size. 28 | % 29 | if (c == 0) 30 | fprintf('Expanding c to the appropriate size\n'); 31 | [Am,An]=size(A); 32 | c=zeros(An,1); 33 | end 34 | % 35 | % If c is empty, then act as if it was zero. 36 | % 37 | if (isempty(c)) 38 | fprintf('Expanding empty c to zeros of the appropriate size\n'); 39 | [Am,An]=size(A); 40 | c=zeros(An,1); 41 | end 42 | % 43 | % If c is a row vector, make it a column vector. 44 | % 45 | [cm,cn]=size(c); 46 | if (cn > cm) 47 | c=c'; 48 | end 49 | % 50 | % Check for any free LP variables and rewrite them as the differences of 51 | % regular LP variables. 52 | % 53 | if (isfield(K,'f')) 54 | nfree=K.f 55 | fprintf('Converting %d free variables to LP variables\n',nfree); 56 | if (isfield(K,'l')) 57 | nlin=K.l; 58 | else 59 | nlin=0; 60 | end 61 | [Am,An]=size(A); 62 | Anew=[A(:,1:nfree) -A(:,1:nfree) A(:,nfree+1:An)]; 63 | A=Anew; 64 | cnew=[c(1:nfree); -c(1:nfree); c(nfree+1:An)]; 65 | c=cnew; 66 | 67 | K.l=nlin+2*nfree; 68 | K.f=0; 69 | end 70 | 71 | 72 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/demo_CCSR_usps.m: -------------------------------------------------------------------------------- 1 | % Codes for the paper 2 | % "Constrained Clustering via Spectral Regularization", CVPR 2009 3 | % Zhenguo Li, Jianzhuang Liu, and Xiaoou Tang. 4 | % Written by Zhenguo Li, zgli@ee.columbia.edu 5 | % Version 1.0, Dec. 01, 2010 6 | 7 | clc,clear,close all; 8 | 9 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 10 | % 1. load data 11 | 12 | load usps 13 | 14 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 15 | % 2. parameters 16 | 17 | k = length(unique(labels)); % number of clusters 18 | Npts = size(X,1); % number of points 19 | dm = squareform(pdist(X)); % distance matrix 20 | r = averagekmin_dm(dm,20); 21 | r1 = linspace(0.1*r, r, 5); 22 | r2 = linspace(r, 10*r, 5); 23 | sigma = unique([r1,r2]); % set of scale factors in graph construction 24 | 25 | 26 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 27 | % 3. generate pairwise constraints 28 | 29 | [ML CL] = genPWC(labels,20,20); % generate pairwise constraints randomly 30 | 31 | err_sigma = zeros(length(sigma),1); 32 | for i_sigma = 1:length(sigma) 33 | 34 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 35 | % 4. compute normalized graph Laplacian 36 | 37 | L = spnlaplacian_dm(dm,sigma(i_sigma),20); % sparse normalized Laplacian 38 | 39 | 40 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 41 | % 5. compute the first m eigenvectors of normalized graph Laplacian 42 | 43 | m = 15; % number of eigenvectors used in the paper 44 | opts.disp = 0; 45 | [Q,E] = eigs(L,m,'sm',opts); 46 | 47 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 48 | % 6. formuate the convex quadratic semidefinite program 49 | [A, b] = coquad(Q,ML,CL); 50 | 51 | 52 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 53 | % 7. formuate and solve the semidefinite program 54 | 55 | S = sqrtm(A); S = real(S); S = (S + S')/2; % matrix squared root 56 | 57 | % symmetrize b, necessary due to formulateSDP below 58 | b = reshape(b,[m,m]);b = (b+b')/2; 59 | b = b(:); 60 | 61 | [AA, bb, cc] = formulateSDP(S, m, -b); % formulate the SDP 62 | K.s = m^2 + 1 + m; 63 | [xx, yy, zz, info] = csdp(AA, bb, cc,K); % solve the SDP 64 | yy = -yy; % the negative of yy is our solution 65 | 66 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 67 | % 8. obtain the low-dimensional embedding and call kmeans 68 | 69 | M = getY(yy,m); 70 | P = sqrtm(M); P = real(P); P = P + P'; 71 | Y = Q * P; 72 | 73 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 74 | % 9. call kmeans and show results 75 | 76 | res = kmeans(Y,k,'Replicate',10); 77 | 78 | err_sigma(i_sigma) = get_error_rate(labels,res); 79 | 80 | sprintf('%d out of %d, the error rate is: %f\n', i_sigma, length(i_sigma), err_sigma(i_sigma)), 81 | end 82 | 83 | sprintf('the best error rate: %f\n',min(err_sigma)), -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/eval/get_error_rate.m: -------------------------------------------------------------------------------- 1 | function [err corr] = get_error_rate(result, label) 2 | % 3 | % [ERR CORR]= GET_ERROR_RATE(RESULT, LABEL) 4 | % 5 | % To get the error rate for a clustering result, compared to the given 6 | % clustering labels. 7 | % 8 | % ERR is the error rate of clustering and CORR is the nx2 vector which 9 | % denote mapping the from the label of clustering result to given labels. 10 | 11 | 12 | % check data integrity 13 | l = length(result); 14 | if l ~= length(label), 15 | error('- failed in data integrity check.'); 16 | end; 17 | 18 | result = reshape(result,[l 1]); 19 | label = reshape(label,[l,1]); 20 | 21 | result_unique = unique(result); 22 | label_unique = unique(label); 23 | 24 | % check the integrity of result 25 | if length(result_unique) ~= length(label_unique), 26 | error('- The clustering result is not consistent with label.'); 27 | end; 28 | 29 | n = length(result_unique); 30 | 31 | % build the bipartite graph 32 | W = zeros(n); 33 | 34 | for I = 1:n 35 | for J = 1:n 36 | W(I,J) = length(find(result==result_unique(I)&label==label_unique(J))); 37 | %W(J,I) = W(I,J); 38 | end; 39 | end; 40 | 41 | % find the maximum matching of the derived bipartite graph. 42 | M = maximum_matching_bipartite(W); 43 | 44 | idx = find(M>0); 45 | [X Y] = ind2sub([n n],idx); 46 | 47 | corr = [result_unique(X) label_unique(Y)]; 48 | err = 1-sum(W(idx))/l; 49 | 50 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/eval/maximum_matching_bipartite.m: -------------------------------------------------------------------------------- 1 | function M = maxmum_matching_bipartite(W); 2 | 3 | %MAXMATCHING Maximum matching. 4 | % 5 | %Maxmatching(W) implements the Kuhn-Munkres algorithm 6 | %to find the maximum matching in a complete bipartite 7 | %graph G=(X U Y,X x Y), where X and Y have the same size, n. Elements 8 | %of matrix W (n x n) contain weights of edges between vertices of X 9 | %and Y. 10 | % 11 | %The result is a permutation matrix M of the size n x n, where 12 | %(i,j)-entry of M is equal to one, iff x_i is matched with y_j. 13 | % 14 | 15 | n=size(W,2); 16 | M=zeros(n,n); 17 | lx=max(W'); 18 | ly=zeros(1,n); 19 | 20 | Gl=double((lx'*ones(1,n)+ones(n,1)*ly)==W); 21 | M=diag(sum(Gl')==1)*Gl*diag(sum(Gl)==1); 22 | if (sum(sum(M))==0) 23 | pom=find(Gl==1); 24 | M(pom(1))=1; 25 | end 26 | while(sum(sum(M))~=n) 27 | %1 28 | pom=find(sum(M')==0); 29 | x=pom(1); S=[x]; T=[]; 30 | run=1; y=1; 31 | while ((sum(M(:,y))==1)|run) 32 | %2 33 | run=0; 34 | if (isempty(setdiff(find(sum(Gl(S,:),1)>0),T))) 35 | pom=lx'*ones(1,n)+ones(n,1)*ly-W; 36 | alfa=min(min(pom(S,setdiff(1:n,T)))); 37 | lx(S)=lx(S)-alfa; ly(T)=ly(T)+alfa; 38 | Gl=abs((lx'*ones(1,n)+ones(n,1)*ly)-W)<0.00000001; 39 | end 40 | %3 41 | pom=setdiff(find(sum(Gl(S,:),1)>0),T); 42 | y=pom(1); 43 | if (sum(M(:,y))==1) 44 | z=find(M(:,y)==1); 45 | S(length(S)+1)=z; 46 | T(length(T)+1)=y; 47 | end 48 | end 49 | S=augmentingpath(x,y,Gl,M); 50 | M(S(1),S(2))=1; 51 | for i=4:2:length(S) 52 | M(S(i-1),S(i-2))=0; 53 | M(S(i-1),S(i))=1; 54 | end 55 | end 56 | 57 | 58 | function S=augmentingpath(x,y,Gl,M) 59 | 60 | n=size(Gl,2); 61 | cesty=zeros(n,2*n); 62 | cesty(1,1)=x; uroven=1; pocetcest=1; 63 | while (ismember(y,cesty(:,2:2:2*n))==0) 64 | if (mod(uroven,2)) 65 | pom=Gl-M; 66 | k=2; 67 | else 68 | pom=M'; 69 | k=1; 70 | end 71 | novypocetcest=pocetcest; 72 | i=1; 73 | while (i<=pocetcest) 74 | sousedi=find(pom(cesty(i,uroven),:)==1); 75 | pridano=0; 76 | for j=1:length(sousedi) 77 | if (ismember(sousedi(j),cesty(:,k:2:2*n))==0) 78 | if (pridano==0) 79 | cesty(i,uroven+1)=sousedi(j); 80 | else 81 | novypocetcest=novypocetcest+1; 82 | cesty(novypocetcest,1:uroven+1)=[cesty(i,1:uroven) sousedi(j)]; 83 | end 84 | pridano=pridano+1; 85 | end 86 | end 87 | if (pridano==0) 88 | novypocetcest=novypocetcest-1; 89 | cesty=[cesty([1:i-1, i+1:n],:);zeros(1,2*n)]; 90 | i=i-1; 91 | pocetcest=pocetcest-1; 92 | end 93 | i=i+1; 94 | end 95 | pocetcest=novypocetcest; 96 | uroven=uroven+1; 97 | end 98 | pom=find(cesty(:,uroven)==y); 99 | S=cesty(pom(1),1:uroven); 100 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/formulateSDP.m: -------------------------------------------------------------------------------- 1 | function [A, b, c] = formulateSDP(S, D, bb) 2 | [F0, FI, c] = localformulateSDP(S, D, bb); 3 | [A, b, c] = sdpToSeDuMi(F0, FI, c); 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/genPWC.m: -------------------------------------------------------------------------------- 1 | function [M C] = genPWC(labels,nM,nC) 2 | 3 | % [M C] = genPWC(labels,nM,nC) 4 | % generate pairwise constraints 5 | % nM - number of must-link constraint to be generated for each cluster 6 | % nC - number of cannot-link constraint to be generated for every two 7 | % clusters 8 | 9 | labels = labels(:);u = unique(labels);k = length(u); 10 | 11 | M = []; C = []; 12 | 13 | % must-link 14 | for i = 1:k 15 | idx = find(labels==u(i)); 16 | n = length(idx); 17 | M = [M;[idx(ceil(rand(nM,1)*n)),idx(ceil(rand(nM,1)*n))]]; 18 | end 19 | 20 | % cannot-link 21 | for i = 1:k 22 | for j = i+1:k 23 | idx1 = find(labels==u(i)); idx2 = find(labels==u(j)); 24 | n1 = length(idx1); n2 = length(idx2); 25 | C =[C;[idx1(ceil(rand(nC,1)*n1)),idx2(ceil(rand(nC,1)*n2))]]; 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/getY.m: -------------------------------------------------------------------------------- 1 | function Y = getY(y,m) 2 | 3 | idx = 0; 4 | Y = zeros(m); 5 | for col = 1 : m 6 | for row = col : m 7 | idx = idx + 1; 8 | Y(row, col) = y(idx); 9 | end 10 | end 11 | 12 | Y = Y + Y' - diag(diag(Y)); -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/graph_Laplacian/graph_knn_dm.m: -------------------------------------------------------------------------------- 1 | function W = graph_knn_dm(dm,k) 2 | 3 | % W = graph_knn_dm(dm,k) 4 | % form symmetric knn graph 5 | % dm - distance matrix 6 | % k - number specifying the neighborhood 7 | 8 | Npts = size(dm,1); 9 | W = spalloc(Npts, Npts, Npts * k); 10 | 11 | for i = 1 : Npts 12 | [tmp,idx] = sort(dm(:,i)); 13 | W(i,idx(2:k+1)) = 1; 14 | end 15 | 16 | W = W | W'; -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/graph_Laplacian/spnlaplacian_dm.m: -------------------------------------------------------------------------------- 1 | function nL = spnlaplacian_dm(distMatrix,sigma,k) 2 | 3 | % nL = nlaplacian_sdm(sdm,sigma) 4 | % compute the normalized graph Laplacian from squared distance matrix 5 | % distMatrix - squared distance matrix 6 | 7 | nW = spnsimilarity_dm(distMatrix,sigma,k); 8 | 9 | nL = speye(size(nW,1)) - nW; -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/graph_Laplacian/spnsimilarity_dm.m: -------------------------------------------------------------------------------- 1 | function nW = spnsimilarity_dm(distMatrix,sigma,k) 2 | 3 | % nW = spnsimilarity_dm(distMatrix,sigma,k) 4 | % compute the sparse normalized similarity matrix from a distance matrix. 5 | % X - data matrix whose rows corresond to data points 6 | % sigma - scale factor 7 | % k - number of neighbors 8 | 9 | W = spsimilarity_dm(distMatrix,sigma,k); 10 | 11 | E = sum(W); 12 | E = 1./sqrt(E); 13 | 14 | nW = W.*(E'*E); -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/graph_Laplacian/spsimilarity_dm.m: -------------------------------------------------------------------------------- 1 | function W = spsimilarity_dm(distMatrix,sigma,k) 2 | 3 | % W = similarity_dm(dm,sigma) 4 | % compute the sparse similarity matrix from a distance matrix using Gaussian kernel 5 | % w_ij = exp{-d^2(x_i,x_j)/(2*sigma^2)} 6 | % distMatrix - a distance matrix 7 | % sigma - scale factor 8 | % k - number of neighbors 9 | 10 | W = exp(-(distMatrix.^2)/(2*sigma^2)); 11 | A = graph_knn_dm(distMatrix,k); % adjacency matrix 12 | W = A .* W; 13 | 14 | Npts = size(distMatrix,1); 15 | W(1:Npts+1:end) = 0; 16 | 17 | 18 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/localformulateSDP.m: -------------------------------------------------------------------------------- 1 | function [F0, FI, c] = localformulateSDP(S, D, b) 2 | % formulate SDP problem 3 | % each FI that corresponds to the LMI for the quadratic cost function has 4 | % precisely 2*D^2 nonzero elements. But we need only D^2 storage for 5 | % indexing these elements since the FI are symmetric 6 | tempFidx = zeros(D^2, 3); 7 | dimF = (D^2+1) + D; 8 | idx= 0; 9 | for col=1:D 10 | for row=col:D 11 | idx = idx+1; 12 | lindx1 = sub2ind([D D], row, col); 13 | lindx2 = sub2ind([D D], col, row); 14 | tempFidx(:,1) = [1:D^2]'; 15 | tempFidx(:,2) = D^2+1; 16 | if col==row 17 | tempFidx(:,3) = S(:, lindx1) ; 18 | FI{idx} = sparse([tempFidx(:,1); ... % for cost function 19 | tempFidx(:,2); ... % symmetric 20 | row+D^2+1 ... % for P being p.s.d 21 | 22 | ], ... 23 | [tempFidx(:,2); ... % for cost function 24 | tempFidx(:,1); ... % symmetric 25 | row+D^2+1; ... % for P being p.s.d 26 | 27 | ],... 28 | [tempFidx(:,3); ... % for cost function 29 | tempFidx(:,3); ... % symmetric 30 | 1; % for P being p.s.d 31 | 32 | ], dimF, dimF); 33 | else 34 | 35 | tempFidx(:,3) = S(:, lindx1) + S(:, lindx2); 36 | FI{idx} = sparse([tempFidx(:,1); ... % for cost function 37 | tempFidx(:,2); ... % symmetric 38 | row+D^2+1; ... % for P being p.s.d 39 | col+D^2+1; ... % symmetric 40 | ], ... 41 | [tempFidx(:,2); ... % for cost function 42 | tempFidx(:,1); ... % symmetric 43 | col+D^2+1; ... % for P being p.s.d 44 | row+D^2+1; ... % being symmetric 45 | ],... 46 | [tempFidx(:,3); ... % for cost function 47 | tempFidx(:,3); ... % symmetric 48 | 1; % for P being p.s.d 49 | 1; % symmetric 50 | ], dimF, dimF); 51 | 52 | end 53 | end 54 | end 55 | idx=idx+1; 56 | % for the F matrix corresponding to t 57 | FI{idx} = sparse(D^2+1, D^2+1, 1, dimF, dimF); 58 | 59 | % now for F0 60 | F0 = sparse( [[1:D^2]], [[1:D^2]], [ones(1, D^2)], dimF, dimF); 61 | 62 | % now for c 63 | b = reshape(-b, D, D); 64 | b = b*2 - diag(diag(b)); 65 | c = zeros(idx-1,1); 66 | kdx=0; 67 | %keyboard; 68 | for col=1:D 69 | for row=col:D 70 | kdx = kdx+1; 71 | c(kdx) = b(row, col); 72 | end 73 | end 74 | %keyboard; 75 | c = [c; 1]; % remember: we use only half of P 76 | return; 77 | -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/readme.m: -------------------------------------------------------------------------------- 1 | To run the codes, one should install the SDP solvers CSDP first. 2 | 3 | 1. To download CSDP, go to https://projects.coin-or.org/Csdp/. 4 | 5 | 2. To set up CSDP successfully, one should modify the paths in csdp.m. 6 | 7 | 8 | For any problems, please report to Zhenguo Li at zgli@ee.columbia.edu -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/run_CCSR.m: -------------------------------------------------------------------------------- 1 | function out = run_CCSR( k, labels, X ) 2 | % Codes for the paper 3 | % "Constrained Clustering via Spectral Regularization", CVPR 2009 4 | % Zhenguo Li, Jianzhuang Liu, and Xiaoou Tang. 5 | % Written by Zhenguo Li, zgli@ee.columbia.edu 6 | % Version 1.0, Dec. 01, 2010 7 | 8 | % clc,clear,close all; 9 | 10 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 11 | % 1. load data 12 | % load usps 13 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 14 | % 2. parameters 15 | % k = length(unique(labels)); % number of clusters 16 | Npts = size(X,1); % number of points 17 | dm = squareform(pdist(X)); % distance matrix 18 | r = averagekmin_dm(dm,20); 19 | r1 = linspace(0.1*r, r, 5); 20 | r2 = linspace(r, 10*r, 5); 21 | sigma = unique([r1,r2]); % set of scale factors in graph construction 22 | 23 | 24 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 25 | % 3. generate pairwise constraints 26 | 27 | [ML CL] = genPWC(labels,20,20); % generate pairwise constraints randomly 28 | 29 | err_sigma = zeros(length(sigma),1); 30 | for i_sigma = 1:length(sigma) 31 | 32 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 33 | % 4. compute normalized graph Laplacian 34 | 35 | L = spnlaplacian_dm(dm,sigma(i_sigma),20); % sparse normalized Laplacian 36 | 37 | 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | % 5. compute the first m eigenvectors of normalized graph Laplacian 40 | 41 | m = 15; % number of eigenvectors used in the paper 42 | opts.disp = 0; 43 | [Q,E] = eigs(L,m,'sm',opts); 44 | 45 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 46 | % 6. formuate the convex quadratic semidefinite program 47 | [A, b] = coquad(Q,ML,CL); 48 | 49 | 50 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 51 | % 7. formuate and solve the semidefinite program 52 | 53 | S = sqrtm(A); S = real(S); S = (S + S')/2; % matrix squared root 54 | 55 | % symmetrize b, necessary due to formulateSDP below 56 | b = reshape(b,[m,m]);b = (b+b')/2; 57 | b = b(:); 58 | 59 | [AA, bb, cc] = formulateSDP(S, m, -b); % formulate the SDP 60 | K.s = m^2 + 1 + m; 61 | [xx, yy, zz, info] = csdp(AA, bb, cc,K); % solve the SDP 62 | yy = -yy; % the negative of yy is our solution 63 | 64 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 65 | % 8. obtain the low-dimensional embedding and call kmeans 66 | 67 | M = getY(yy,m); 68 | P = sqrtm(M); P = real(P); P = P + P'; 69 | Y = Q * P; 70 | 71 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 72 | % 9. call kmeans and show results 73 | 74 | res = kmeans(Y,k,'Replicate',10); 75 | 76 | err_sigma(i_sigma) = get_error_rate(labels,res); 77 | 78 | sprintf('%d out of %d, the error rate is: %f\n', i_sigma, length(i_sigma), err_sigma(i_sigma)), 79 | end 80 | 81 | sprintf('the best error rate: %f\n',min(err_sigma)), -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/sdpToSeDuMi.m: -------------------------------------------------------------------------------- 1 | 2 | function [A, b, c] = sdpToSeDuMi(F0, FI, cc) 3 | % convert the canonical SDP dual formulation: 4 | % (see Vandenberche and Boyd 1996, SIAM Review) 5 | % max -Tr(F0 Z) 6 | % s.t. Tr(Fi Z) = cci and Z is positive definite 7 | % 8 | % in which cc = (cc1, cc2, cc3,..) and FI = {F1, F2, F3,...} 9 | % 10 | % to SeDuMi format (formulated as vector decision variables ): 11 | % min c'x 12 | % s.t. Ax = b and x is positive definite (x is a vector, so SeDuMi 13 | % really means that vec2mat(x) is positive definite) 14 | % 15 | % by feisha@cis.upenn.edu, June, 10, 2004 16 | 17 | if nargin < 3 18 | error('Cannot convert SDP formulation to SeDuMi formulation in sdpToSeDumi!'); 19 | end 20 | 21 | [m, n] = size(F0); 22 | if m ~= n 23 | error('F0 matrix must be squared matrix in sdpToSeDumi(F0, FI, b)'); 24 | end 25 | 26 | p = length(cc); 27 | if p ~= length(FI) 28 | error('FI matrix cellarray must have the same length as b in sdpToSeDumi(F0,FI,b)'); 29 | end 30 | 31 | % should check every element in the cell array FI...later.. 32 | 33 | % x = reshape(Z, n*n, 1); % optimization variables from matrix to vector 34 | 35 | % converting objective function of the canonical SDP 36 | c = reshape(F0', n*n,1); 37 | 38 | % converting equality constraints of the canonical SDP 39 | zz= 0; 40 | for idx=1:length(FI) 41 | zz= zz + nnz(FI{idx}); 42 | end 43 | A = spalloc( n*n, p, zz); 44 | for idx = 1:p 45 | temp = reshape(FI{idx}, n*n,1); 46 | lst = find(temp~=0); 47 | A(lst, idx) = temp(lst); 48 | end 49 | % The SeDuMi solver actually expects the transpose of A as in following 50 | % dual problem 51 | % max b'y 52 | % s.t. c - A'y is positive definite 53 | % Therefore, we transpose A 54 | % A = A'; 55 | 56 | % b doesn't need to be changed 57 | b = cc; 58 | return; -------------------------------------------------------------------------------- /algorithms/cvpr09_ccsr_v1.0/usps.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/algorithms/cvpr09_ccsr_v1.0/usps.mat -------------------------------------------------------------------------------- /algorithms/kmeans/kmeans2.m: -------------------------------------------------------------------------------- 1 | function[centroid, pointsInCluster, assignment]= kmeans(data, nbCluster) 2 | % usage 3 | % function[centroid, pointsInCluster, assignment]= 4 | % myKmeans(data, nbCluster) 5 | % 6 | % Output: 7 | % centroid: matrix in each row are the Coordinates of a centroid 8 | % pointsInCluster: row vector with the nbDatapoints belonging to 9 | % the centroid 10 | % assignment: row Vector with clusterAssignment of the dataRows 11 | % 12 | % Input: 13 | % data in rows 14 | % nbCluster : nb of centroids to determine 15 | % 16 | % (c) by Christian Herta ( www.christianherta.de ) 17 | % 18 | data_dim = length(data(1,:)); 19 | nbData = length(data(:,1)); 20 | 21 | 22 | % init the centroids randomly 23 | data_min = min(data); 24 | data_max = max(data); 25 | data_diff = data_max - data_min ; 26 | % every row is a centroid 27 | centroid = ones(nbCluster, data_dim) .* rand(nbCluster, data_dim); 28 | for i=1 : 1 : length(centroid(:,1)) 29 | centroid( i , : ) = centroid( i , : ) .* data_diff; 30 | centroid( i , : ) = centroid( i , : ) + data_min; 31 | end 32 | % end init centroids 33 | 34 | 35 | 36 | % no stopping at start 37 | pos_diff = 1.; 38 | iter = 0; 39 | 40 | % main loop until 41 | while pos_diff > 0.0 42 | if(iter > 50) 43 | break 44 | end 45 | % E-Step 46 | assignment = []; 47 | % assign each datapoint to the closest centroid 48 | for d = 1 : length( data(:, 1) ); 49 | 50 | min_diff = ( data( d, :) - centroid( 1,:) ); 51 | min_diff = min_diff * min_diff'; 52 | curAssignment = 1; 53 | 54 | for c = 2 : nbCluster; 55 | diff2c = ( data( d, :) - centroid( c,:) ); 56 | diff2c = diff2c * diff2c'; 57 | if( min_diff >= diff2c) 58 | curAssignment = c; 59 | min_diff = diff2c; 60 | end 61 | end 62 | 63 | % assign the d-th dataPoint 64 | assignment = [ assignment; curAssignment]; 65 | 66 | end 67 | 68 | % for the stoppingCriterion 69 | oldPositions = centroid; 70 | 71 | % M-Step 72 | % recalculate the positions of the centroids 73 | centroid = zeros(nbCluster, data_dim); 74 | pointsInCluster = zeros(nbCluster, 1); 75 | 76 | for d = 1: length(assignment); 77 | centroid( assignment(d),:) = data(d,:) + centroid( assignment(d),:) ; 78 | pointsInCluster( assignment(d), 1 ) = 1 + pointsInCluster( assignment(d), 1 ); 79 | end 80 | 81 | for c = 1: nbCluster; 82 | if( pointsInCluster(c, 1) ~= 0) 83 | centroid( c , : ) = centroid( c, : ) / pointsInCluster(c, 1); 84 | else 85 | % set cluster randomly to new position 86 | centroid( c , : ) = (rand( 1, data_dim) .* data_diff) + data_min; 87 | end 88 | end 89 | 90 | %stoppingCriterion 91 | pos_diff = sum (sum( (centroid - oldPositions).^2 ) ); 92 | 93 | iter = iter + 1; 94 | end 95 | -------------------------------------------------------------------------------- /algorithms/rdpmeans/objective.m: -------------------------------------------------------------------------------- 1 | % this calculates the objective function being optimized (indirectly) by rdp-means 2 | function [ obj ] = objective(data, assignment, centroid, E, xi, lambda, distancef) 3 | obj = 0; 4 | if(nargin < 6) 5 | distancef ='Gaussian'; 6 | end 7 | 8 | for d=1 : length(data(:,1)) 9 | obj = 0; 10 | if(strcmp(distancef,'Gaussian')) 11 | obj = gaussianDifference(data(d,:), centroid(assignment(d),:)); 12 | else 13 | obj = multDifference(data(d,:), centroid(assignment(d),:)); 14 | end 15 | 16 | ptsInCluster = find(assignment == assignment(d)); 17 | 18 | friends = 0; 19 | strangers = 0; 20 | for p =1: length(ptsInCluster) 21 | if E(ptsInCluster(p),d) == 1 22 | friends = friends + 1; 23 | elseif E(ptsInCluster(p),d) == -1 24 | strangers = strangers + 1; 25 | end 26 | end 27 | 28 | obj = obj - xi*(friends - strangers); 29 | end 30 | obj = obj + lambda*size(centroid,1); 31 | end 32 | 33 | -------------------------------------------------------------------------------- /algorithms/tvclust/TVClust_variational.m: -------------------------------------------------------------------------------- 1 | function [assignment] = TVClust_variational(C, SM, X) 2 | dlmwrite('C2.txt',C,'delimiter',' ','precision',3) 3 | dlmwrite('x2.txt',X,'delimiter',' ','precision',3) 4 | dlmwrite('SM2.txt',SM,'delimiter',' ','precision',3) 5 | 6 | system('Rscript main_VB_TVClust_sim_data2.r '); 7 | fileID = fopen('result2.txt','r'); 8 | assignment = fscanf(fileID,'%d'); 9 | 10 | end -------------------------------------------------------------------------------- /algorithms/tvclust/main_VB_TVClust_sim_data2.r: -------------------------------------------------------------------------------- 1 | source("func_TVClust.r"); 2 | 3 | ############### Generate Synthetic Data ############### 4 | set.seed(1) 5 | 6 | # load 7 | x <- read.table("x2.txt", sep=" ") 8 | C <- read.table("C2.txt", sep=" ") 9 | SM <- read.table("SM2.txt", sep=" ") 10 | #zVB <- read.table("result.txt", sep=" ") 11 | #pObsLink <- read.table("pObsLink.txt", sep=" ") 12 | #trueLables <- read.table("trueLables.txt", sep=" ") 13 | 14 | ############### Run Program ############### 15 | K = 15; #initialize K clusters 16 | iterN = 30; #max iterations 17 | isKeepL = 1; #whether monitor the convergence metric 18 | alpha0 = 1; #a tuning para of Dirichlet process 19 | result = VB_TVClust(x, SM, C, K, iterN, isKeepL, alpha0, stopThreshold= 0.00005); 20 | 21 | zVB = result$z; 22 | 23 | write.table(zVB, "result2.txt", sep=" ", row.names = FALSE, col.names = FALSE) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /algorithms/tvclust/mathFun.r: -------------------------------------------------------------------------------- 1 | logB = function(W, v) #Appendix B.79 of PRML 2 | { 3 | p = dim(W)[1]; 4 | result = (-v/2) * log( det(W) ); 5 | tmp = (v*p/2) * log(2) + (p*(p-1)/4) * log(pi) + sum( lgamma( (v+1 - (1:p) )/2 ) ); 6 | result = result - tmp; 7 | } 8 | 9 | H = function( W, v ) 10 | { 11 | p = dim(W)[1]; 12 | E = sum( digamma( (v+1-(1:p)) / 2 ) ) + p * log(2) + log( det(W) ); 13 | result = -logB(W,v) - (v-p-1)/2 * E + v * p / 2; 14 | } 15 | 16 | CalRandIdx = function(y, SM0) 17 | { 18 | diag(SM0) = 0; 19 | n = length(y); 20 | outputSM = (rep(y, each=n) == rep(y, n))*1; 21 | dim(outputSM) = c(n,n); 22 | RandIdx = (sum(SM0 == outputSM)) / n / (n-1); 23 | } 24 | 25 | entropy = function(freqs) 26 | { 27 | freqs = freqs/sum(freqs) 28 | H = -sum(ifelse(freqs > 0, freqs * log(freqs), 0)) 29 | H = H/log(2) 30 | return(H) 31 | } 32 | 33 | GetGeodesicDist = function(DistEuclid, epsilon) 34 | { 35 | DistEuclid[DistEuclid > epsilon] = INFINITY; 36 | g = graph.adjacency(DistEuclid, mode="undirected", weighted=T); 37 | GeoDist = shortest.paths(g); 38 | } 39 | 40 | 41 | CalRandIdx = function(y, SM0) 42 | { 43 | diag(SM0) = 0; 44 | n = length(y); 45 | outputSM = (rep(y, each=n) == rep(y, n))*1; 46 | dim(outputSM) = c(n,n); 47 | RandIdx = (sum(SM0 == outputSM)) / n / (n-1); 48 | } 49 | 50 | 51 | CalRandIdx2 = function(x, y) 52 | { 53 | n = length(x); 54 | SMx = (rep(x, each=n) == rep(x, n))*1; 55 | dim(SMx) = c(n,n); 56 | SMy = (rep(y, each=n) == rep(y, n))*1; 57 | dim(SMy) = c(n,n); 58 | RandIdx = (sum(SMx == SMy) - n) / n / (n-1); 59 | RandIdx 60 | } 61 | 62 | fillSM = function(SM) 63 | { 64 | if( !isSymmetric(SM) ) 65 | print("Matrix not symmetric"); 66 | n = dim(SM)[1]; 67 | for( i in 1 : (n-2) ) 68 | { 69 | posOne = which( SM[i, (i+1) : n] == 1 ) + i; 70 | nOne = length(posOne); 71 | tmp1 = rep( posOne, times = nOne ); 72 | tmp2 = rep( posOne, each = nOne ); 73 | tmpCombine = cbind( tmp1, tmp2 ); 74 | rows = (tmpCombine[,1] < tmpCombine[,2] ); 75 | if( sum(rows) > 0 ) 76 | { 77 | index = tmpCombine[rows,]; 78 | if( is.vector(index) ) 79 | index = matrix( index, 1, 2 ); 80 | K = dim(index)[1]; 81 | for( k in 1 : K ) 82 | SM[index[k,1], index[k,2]] = 1; 83 | } 84 | } 85 | for( i in 1 : (n-1) ) 86 | for( j in (i+1) : n ) 87 | SM[j,i] = SM[i,j]; 88 | diag(SM) = rep(0,n); 89 | SM; 90 | } 91 | -------------------------------------------------------------------------------- /data/UCI/balance/Index: -------------------------------------------------------------------------------- 1 | Index of balance-scale 2 | 3 | 02 Dec 1996 132 Index 4 | 13 May 1994 6250 balance-scale.data 5 | 13 May 1994 2222 balance-scale.names 6 | -------------------------------------------------------------------------------- /data/UCI/balance/balance-scale.names: -------------------------------------------------------------------------------- 1 | 1. Title: Balance Scale Weight & Distance Database 2 | 3 | 2. Source Information: 4 | (a) Source: Generated to model psychological experiments reported 5 | by Siegler, R. S. (1976). Three Aspects of Cognitive 6 | Development. Cognitive Psychology, 8, 481-520. 7 | (b) Donor: Tim Hume (hume@ics.uci.edu) 8 | (c) Date: 22 April 1994 9 | 10 | 3. Past Usage: (possibly different formats of this data) 11 | - Publications 12 | 1. Klahr, D., & Siegler, R.S. (1978). The Representation of 13 | Children's Knowledge. In H. W. Reese & L. P. Lipsitt (Eds.), 14 | Advances in Child Development and Behavior, pp. 61-116. New 15 | York: Academic Press 16 | 2. Langley,P. (1987). A General Theory of Discrimination 17 | Learning. In D. Klahr, P. Langley, & R. Neches (Eds.), 18 | Production System Models of Learning and Development, pp. 19 | 99-161. Cambridge, MA: MIT Press 20 | 3. Newell, A. (1990). Unified Theories of Cognition. 21 | Cambridge, MA: Harvard University Press 22 | 4. McClelland, J.L. (1988). Parallel Distibuted Processing: 23 | Implications for Cognition and Development. Technical 24 | Report AIP-47, Department of Psychology, Carnegie-Mellon 25 | University 26 | 5. Shultz, T., Mareschal, D., & Schmidt, W. (1994). Modeling 27 | Cognitive Development on Balance Scale Phenomena. Machine 28 | Learning, Vol. 16, pp. 59-88. 29 | 30 | 4. Relevant Information: 31 | This data set was generated to model psychological 32 | experimental results. Each example is classified as having the 33 | balance scale tip to the right, tip to the left, or be 34 | balanced. The attributes are the left weight, the left 35 | distance, the right weight, and the right distance. The 36 | correct way to find the class is the greater of 37 | (left-distance * left-weight) and (right-distance * 38 | right-weight). If they are equal, it is balanced. 39 | 40 | 5. Number of Instances: 625 (49 balanced, 288 left, 288 right) 41 | 42 | 6. Number of Attributes: 4 (numeric) + class name = 5 43 | 44 | 7. Attribute Information: 45 | 1. Class Name: 3 (L, B, R) 46 | 2. Left-Weight: 5 (1, 2, 3, 4, 5) 47 | 3. Left-Distance: 5 (1, 2, 3, 4, 5) 48 | 4. Right-Weight: 5 (1, 2, 3, 4, 5) 49 | 5. Right-Distance: 5 (1, 2, 3, 4, 5) 50 | 51 | 8. Missing Attribute Values: 52 | none 53 | 54 | 9. Class Distribution: 55 | 1. 46.08 percent are L 56 | 2. 07.84 percent are B 57 | 3. 46.08 percent are R 58 | -------------------------------------------------------------------------------- /data/UCI/ecoli/ecoli.names.txt: -------------------------------------------------------------------------------- 1 | 1. Title: Protein Localization Sites 2 | 3 | 4 | 2. Creator and Maintainer: 5 | Kenta Nakai 6 | Institue of Molecular and Cellular Biology 7 | Osaka, University 8 | 1-3 Yamada-oka, Suita 565 Japan 9 | nakai@imcb.osaka-u.ac.jp 10 | http://www.imcb.osaka-u.ac.jp/nakai/psort.html 11 | Donor: Paul Horton (paulh@cs.berkeley.edu) 12 | Date: September, 1996 13 | See also: yeast database 14 | 15 | 3. Past Usage. 16 | Reference: "A Probablistic Classification System for Predicting the Cellular 17 | Localization Sites of Proteins", Paul Horton & Kenta Nakai, 18 | Intelligent Systems in Molecular Biology, 109-115. 19 | St. Louis, USA 1996. 20 | Results: 81% for E.coli with an ad hoc structured 21 | probability model. Also similar accuracy for Binary Decision Tree and 22 | Bayesian Classifier methods applied by the same authors in 23 | unpublished results. 24 | 25 | Predicted Attribute: Localization site of protein. ( non-numeric ). 26 | 27 | 28 | 4. The references below describe a predecessor to this dataset and its 29 | development. They also give results (not cross-validated) for classification 30 | by a rule-based expert system with that version of the dataset. 31 | 32 | Reference: "Expert Sytem for Predicting Protein Localization Sites in 33 | Gram-Negative Bacteria", Kenta Nakai & Minoru Kanehisa, 34 | PROTEINS: Structure, Function, and Genetics 11:95-110, 1991. 35 | 36 | Reference: "A Knowledge Base for Predicting Protein Localization Sites in 37 | Eukaryotic Cells", Kenta Nakai & Minoru Kanehisa, 38 | Genomics 14:897-911, 1992. 39 | 40 | 41 | 5. Number of Instances: 336 for the E.coli dataset and 42 | 43 | 44 | 6. Number of Attributes. 45 | for E.coli dataset: 8 ( 7 predictive, 1 name ) 46 | 47 | 48 | 7. Attribute Information. 49 | 50 | 1. Sequence Name: Accession number for the SWISS-PROT database 51 | 2. mcg: McGeoch's method for signal sequence recognition. 52 | 3. gvh: von Heijne's method for signal sequence recognition. 53 | 4. lip: von Heijne's Signal Peptidase II consensus sequence score. 54 | Binary attribute. 55 | 5. chg: Presence of charge on N-terminus of predicted lipoproteins. 56 | Binary attribute. 57 | 6. aac: score of discriminant analysis of the amino acid content of 58 | outer membrane and periplasmic proteins. 59 | 7. alm1: score of the ALOM membrane spanning region prediction program. 60 | 8. alm2: score of ALOM program after excluding putative cleavable signal 61 | regions from the sequence. 62 | 63 | 64 | 65 | 8. Missing Attribute Values: None. 66 | 67 | 68 | 9. Class Distribution. The class is the localization site. Please see Nakai & 69 | Kanehisa referenced above for more details. 70 | 71 | cp (cytoplasm) 143 72 | im (inner membrane without signal sequence) 77 73 | pp (perisplasm) 52 74 | imU (inner membrane, uncleavable signal sequence) 35 75 | om (outer membrane) 20 76 | omL (outer membrane lipoprotein) 5 77 | imL (inner membrane lipoprotein) 2 78 | imS (inner membrane, cleavable signal sequence) 2 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | -------------------------------------------------------------------------------- /data/UCI/glass/index.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data/UCI/ionosphere/ionosphere.names.txt: -------------------------------------------------------------------------------- 1 | 1. Title: Johns Hopkins University Ionosphere database 2 | 3 | 2. Source Information: 4 | -- Donor: Vince Sigillito (vgs@aplcen.apl.jhu.edu) 5 | -- Date: 1989 6 | -- Source: Space Physics Group 7 | Applied Physics Laboratory 8 | Johns Hopkins University 9 | Johns Hopkins Road 10 | Laurel, MD 20723 11 | 12 | 3. Past Usage: 13 | -- Sigillito, V. G., Wing, S. P., Hutton, L. V., \& Baker, K. B. (1989). 14 | Classification of radar returns from the ionosphere using neural 15 | networks. Johns Hopkins APL Technical Digest, 10, 262-266. 16 | 17 | They investigated using backprop and the perceptron training algorithm 18 | on this database. Using the first 200 instances for training, which 19 | were carefully split almost 50% positive and 50% negative, they found 20 | that a "linear" perceptron attained 90.7%, a "non-linear" perceptron 21 | attained 92%, and backprop an average of over 96% accuracy on the 22 | remaining 150 test instances, consisting of 123 "good" and only 24 "bad" 23 | instances. (There was a counting error or some mistake somewhere; there 24 | are a total of 351 rather than 350 instances in this domain.) Accuracy 25 | on "good" instances was much higher than for "bad" instances. Backprop 26 | was tested with several different numbers of hidden units (in [0,15]) 27 | and incremental results were also reported (corresponding to how well 28 | the different variants of backprop did after a periodic number of 29 | epochs). 30 | 31 | David Aha (aha@ics.uci.edu) briefly investigated this database. 32 | He found that nearest neighbor attains an accuracy of 92.1%, that 33 | Ross Quinlan's C4 algorithm attains 94.0% (no windowing), and that 34 | IB3 (Aha \& Kibler, IJCAI-1989) attained 96.7% (parameter settings: 35 | 70% and 80% for acceptance and dropping respectively). 36 | 37 | 4. Relevant Information: 38 | This radar data was collected by a system in Goose Bay, Labrador. This 39 | system consists of a phased array of 16 high-frequency antennas with a 40 | total transmitted power on the order of 6.4 kilowatts. See the paper 41 | for more details. The targets were free electrons in the ionosphere. 42 | "Good" radar returns are those showing evidence of some type of structure 43 | in the ionosphere. "Bad" returns are those that do not; their signals pass 44 | through the ionosphere. 45 | 46 | Received signals were processed using an autocorrelation function whose 47 | arguments are the time of a pulse and the pulse number. There were 17 48 | pulse numbers for the Goose Bay system. Instances in this databse are 49 | described by 2 attributes per pulse number, corresponding to the complex 50 | values returned by the function resulting from the complex electromagnetic 51 | signal. 52 | 53 | 5. Number of Instances: 351 54 | 55 | 6. Number of Attributes: 34 plus the class attribute 56 | -- All 34 predictor attributes are continuous 57 | 58 | 7. Attribute Information: 59 | -- All 34 are continuous, as described above 60 | -- The 35th attribute is either "good" or "bad" according to the definition 61 | summarized above. This is a binary classification task. 62 | 63 | 8. Missing Values: None 64 | 65 | 66 | -------------------------------------------------------------------------------- /data/UCI/iris/Index: -------------------------------------------------------------------------------- 1 | Index of iris 2 | 3 | 02 Dec 1996 105 Index 4 | 08 Mar 1993 4551 iris.data 5 | 30 May 1989 2604 iris.names 6 | -------------------------------------------------------------------------------- /data/UCI/iris/iris.names: -------------------------------------------------------------------------------- 1 | 1. Title: Iris Plants Database 2 | Updated Sept 21 by C.Blake - Added discrepency information 3 | 4 | 2. Sources: 5 | (a) Creator: R.A. Fisher 6 | (b) Donor: Michael Marshall (MARSHALL%PLU@io.arc.nasa.gov) 7 | (c) Date: July, 1988 8 | 9 | 3. Past Usage: 10 | - Publications: too many to mention!!! Here are a few. 11 | 1. Fisher,R.A. "The use of multiple measurements in taxonomic problems" 12 | Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions 13 | to Mathematical Statistics" (John Wiley, NY, 1950). 14 | 2. Duda,R.O., & Hart,P.E. (1973) Pattern Classification and Scene Analysis. 15 | (Q327.D83) John Wiley & Sons. ISBN 0-471-22361-1. See page 218. 16 | 3. Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System 17 | Structure and Classification Rule for Recognition in Partially Exposed 18 | Environments". IEEE Transactions on Pattern Analysis and Machine 19 | Intelligence, Vol. PAMI-2, No. 1, 67-71. 20 | -- Results: 21 | -- very low misclassification rates (0% for the setosa class) 22 | 4. Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule". IEEE 23 | Transactions on Information Theory, May 1972, 431-433. 24 | -- Results: 25 | -- very low misclassification rates again 26 | 5. See also: 1988 MLC Proceedings, 54-64. Cheeseman et al's AUTOCLASS II 27 | conceptual clustering system finds 3 classes in the data. 28 | 29 | 4. Relevant Information: 30 | --- This is perhaps the best known database to be found in the pattern 31 | recognition literature. Fisher's paper is a classic in the field 32 | and is referenced frequently to this day. (See Duda & Hart, for 33 | example.) The data set contains 3 classes of 50 instances each, 34 | where each class refers to a type of iris plant. One class is 35 | linearly separable from the other 2; the latter are NOT linearly 36 | separable from each other. 37 | --- Predicted attribute: class of iris plant. 38 | --- This is an exceedingly simple domain. 39 | --- This data differs from the data presented in Fishers article 40 | (identified by Steve Chadwick, spchadwick@espeedaz.net ) 41 | The 35th sample should be: 4.9,3.1,1.5,0.2,"Iris-setosa" 42 | where the error is in the fourth feature. 43 | The 38th sample: 4.9,3.6,1.4,0.1,"Iris-setosa" 44 | where the errors are in the second and third features. 45 | 46 | 5. Number of Instances: 150 (50 in each of three classes) 47 | 48 | 6. Number of Attributes: 4 numeric, predictive attributes and the class 49 | 50 | 7. Attribute Information: 51 | 1. sepal length in cm 52 | 2. sepal width in cm 53 | 3. petal length in cm 54 | 4. petal width in cm 55 | 5. class: 56 | -- Iris Setosa 57 | -- Iris Versicolour 58 | -- Iris Virginica 59 | 60 | 8. Missing Attribute Values: None 61 | 62 | Summary Statistics: 63 | Min Max Mean SD Class Correlation 64 | sepal length: 4.3 7.9 5.84 0.83 0.7826 65 | sepal width: 2.0 4.4 3.05 0.43 -0.4194 66 | petal length: 1.0 6.9 3.76 1.76 0.9490 (high!) 67 | petal width: 0.1 2.5 1.20 0.76 0.9565 (high!) 68 | 69 | 9. Class Distribution: 33.3% for each of 3 classes. 70 | -------------------------------------------------------------------------------- /data/UCI/wine/Index: -------------------------------------------------------------------------------- 1 | Index of wine 2 | 3 | 02 Dec 1996 105 Index 4 | 30 Oct 1995 10782 wine.data 5 | 19 Sep 1992 2643 wine.names 6 | -------------------------------------------------------------------------------- /data/UCI/wine/wine.names: -------------------------------------------------------------------------------- 1 | 1. Title of Database: Wine recognition data 2 | Updated Sept 21, 1998 by C.Blake : Added attribute information 3 | 4 | 2. Sources: 5 | (a) Forina, M. et al, PARVUS - An Extendible Package for Data 6 | Exploration, Classification and Correlation. Institute of Pharmaceutical 7 | and Food Analysis and Technologies, Via Brigata Salerno, 8 | 16147 Genoa, Italy. 9 | 10 | (b) Stefan Aeberhard, email: stefan@coral.cs.jcu.edu.au 11 | (c) July 1991 12 | 3. Past Usage: 13 | 14 | (1) 15 | S. Aeberhard, D. Coomans and O. de Vel, 16 | Comparison of Classifiers in High Dimensional Settings, 17 | Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of 18 | Mathematics and Statistics, James Cook University of North Queensland. 19 | (Also submitted to Technometrics). 20 | 21 | The data was used with many others for comparing various 22 | classifiers. The classes are separable, though only RDA 23 | has achieved 100% correct classification. 24 | (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 25 | (All results using the leave-one-out technique) 26 | 27 | In a classification context, this is a well posed problem 28 | with "well behaved" class structures. A good data set 29 | for first testing of a new classifier, but not very 30 | challenging. 31 | 32 | (2) 33 | S. Aeberhard, D. Coomans and O. de Vel, 34 | "THE CLASSIFICATION PERFORMANCE OF RDA" 35 | Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 36 | Mathematics and Statistics, James Cook University of North Queensland. 37 | (Also submitted to Journal of Chemometrics). 38 | 39 | Here, the data was used to illustrate the superior performance of 40 | the use of a new appreciation function with RDA. 41 | 42 | 4. Relevant Information: 43 | 44 | -- These data are the results of a chemical analysis of 45 | wines grown in the same region in Italy but derived from three 46 | different cultivars. 47 | The analysis determined the quantities of 13 constituents 48 | found in each of the three types of wines. 49 | 50 | -- I think that the initial data set had around 30 variables, but 51 | for some reason I only have the 13 dimensional version. 52 | I had a list of what the 30 or so variables were, but a.) 53 | I lost it, and b.), I would not know which 13 variables 54 | are included in the set. 55 | 56 | -- The attributes are (dontated by Riccardo Leardi, 57 | riclea@anchem.unige.it ) 58 | 1) Alcohol 59 | 2) Malic acid 60 | 3) Ash 61 | 4) Alcalinity of ash 62 | 5) Magnesium 63 | 6) Total phenols 64 | 7) Flavanoids 65 | 8) Nonflavanoid phenols 66 | 9) Proanthocyanins 67 | 10)Color intensity 68 | 11)Hue 69 | 12)OD280/OD315 of diluted wines 70 | 13)Proline 71 | 72 | 5. Number of Instances 73 | 74 | class 1 59 75 | class 2 71 76 | class 3 48 77 | 78 | 6. Number of Attributes 79 | 80 | 13 81 | 82 | 7. For Each Attribute: 83 | 84 | All attributes are continuous 85 | 86 | No statistics available, but suggest to standardise 87 | variables for certain uses (e.g. for us with classifiers 88 | which are NOT scale invariant) 89 | 90 | NOTE: 1st attribute is class identifier (1-3) 91 | 92 | 8. Missing Attribute Values: 93 | 94 | None 95 | 96 | 9. Class Distribution: number of instances per class 97 | 98 | class 1 59 99 | class 2 71 100 | class 3 48 101 | -------------------------------------------------------------------------------- /data/toyDataGenerators/clusterincluster.m: -------------------------------------------------------------------------------- 1 | function data = clusterincluster(N, r1, r2, w1, w2, arms) 2 | 3 | if nargin < 1 4 | N = 500; 5 | end 6 | if nargin < 2 7 | r1 = 1; 8 | end 9 | if nargin < 3 10 | r2 = 5*r1; 11 | end 12 | if nargin < 4 13 | w1 = 0.8; 14 | end 15 | if nargin < 5 16 | w2 = 1/3; 17 | end 18 | if nargin < 6 19 | arms = 64; 20 | end 21 | 22 | data = []; 23 | 24 | N1 = floor(N/2); 25 | N2 = N-N1; 26 | 27 | phi1 = rand(N1,1) * 2 * pi; 28 | 29 | 30 | dist1 = r1 + (randi(3,N1,1)-1)/3 * r1 * w1; 31 | d1 = [dist1 .* cos(phi1) dist1 .* sin(phi1) zeros(N1,1)]; 32 | 33 | perarm = round(N2/arms); 34 | N2 = perarm * arms; 35 | radperarm = (2*pi)/arms; 36 | phi2 = ((1:N2) - mod(1:N2, perarm))/perarm * (radperarm); 37 | phi2 = phi2'; 38 | dist2 = r2 * (1 - w2/2) + r2 * w2 * mod(1:N2, perarm)'/perarm; 39 | d2 = [dist2 .* cos(phi2) dist2 .* sin(phi2) ones(N2,1)]; 40 | data = [d1;d2]; 41 | 42 | %scatter(data(:,1), data(:,2), 20, data(:,3)); axis square; 43 | end -------------------------------------------------------------------------------- /data/toyDataGenerators/corners.m: -------------------------------------------------------------------------------- 1 | function data = corners(N, scale, gapwidth, cornerwidth) 2 | 3 | if nargin < 1 4 | N = 550; 5 | end 6 | if mod(N,8) ~= 0 7 | N = round(N/8) * 8; 8 | end 9 | 10 | if nargin < 2 11 | scale = 10; 12 | end 13 | if nargin < 3 14 | gapwidth = 2; 15 | end 16 | if nargin < 4 17 | cornerwidth = 2; 18 | end 19 | 20 | perCorner = N/4; 21 | 22 | xplusmin = [ones(perCorner,1); -1*ones(perCorner,1); ones(perCorner,1); -1*ones(perCorner,1)]; 23 | yplusmin = [ones(perCorner,1); -1*ones(2*perCorner,1); ones(perCorner,1)]; 24 | 25 | horizontal = [xplusmin(1:2:end) * gapwidth + xplusmin(1:2:end) * scale .* rand(N/2,1), ... 26 | yplusmin(1:2:end) * gapwidth + cornerwidth * yplusmin(1:2:end) .* rand(N/2,1), ... 27 | floor((0:N/2-1)'/(perCorner*.5))]; 28 | 29 | vertical = [xplusmin(2:2:end) * gapwidth + cornerwidth * xplusmin(2:2:end) .* rand(N/2,1), ... 30 | yplusmin(2:2:end) * gapwidth + yplusmin(2:2:end) * scale .* rand(N/2,1), ... 31 | floor((0:N/2-1)'/(perCorner*.5))]; 32 | 33 | data= [horizontal; vertical]; 34 | 35 | end -------------------------------------------------------------------------------- /data/toyDataGenerators/crescentfullmoon.m: -------------------------------------------------------------------------------- 1 | function data = crescentfullmoon(N, r1, r2, r3) 2 | 3 | if nargin < 1 4 | N = 300; 5 | end 6 | if mod(N,4) ~= 0 7 | N = round(N/4) * 4; 8 | end 9 | if nargin < 2 10 | r1 = 5; 11 | end 12 | if nargin < 3 13 | r2 = 10; 14 | end 15 | if nargin < 4 16 | r3 = 15; 17 | end 18 | 19 | N1 = N/4; 20 | N2 = N-N1; 21 | 22 | phi1 = rand(N1,1) * 2 * pi; 23 | R1 = sqrt(rand(N1, 1)); 24 | moon = [cos(phi1) .* R1 * r1 sin(phi1) .* R1 * r1 zeros(N1,1)]; 25 | 26 | d = r3 - r2; 27 | phi2 = pi + rand(N2,1) * pi; 28 | R2 = sqrt(rand(N2,1)); 29 | crescent = [cos(phi2) .* (r2 + R2 * d) sin(phi2) .* (r2 + R2 * d) ones(N2,1)]; 30 | 31 | data = [moon; crescent]; -------------------------------------------------------------------------------- /data/toyDataGenerators/dbmoon.m: -------------------------------------------------------------------------------- 1 | function data=dbmoon(N,d,r,w) 2 | % Usage: data=dbmoon(N,d,r,w) 3 | % doublemoon.m - genereate the double moon data set in Haykin's book titled 4 | % "neural networks and learning machine" third edition 2009 Pearson 5 | % Figure 1.8 pp. 61 6 | % The data set contains two regions A and B representing 2 classes 7 | % each region is a half ring with radius r = 10, width = 6, one is upper 8 | % half and the other is lower half 9 | % d: distance between the two regions 10 | % will generate region A centered at (0, 0) and region B is a mirror image 11 | % of region A (w.r.t. x axis) with a (r, d) shift of origin 12 | % N: # of samples each class, default = 1000 13 | % d: seperation of two class, negative value means overlapping (default=1) 14 | % r: radius (default=10), w: width of ring (default=6) 15 | % 16 | % (C) 2010 by Yu Hen Hu 17 | % Created: Sept. 3, 2010 18 | 19 | % clear all; close all; 20 | if nargin<4, w=6; 21 | elseif nargin<3, r=10; 22 | elseif nargin<2, d=1; 23 | elseif nargin < 1, N=1000; 24 | end 25 | 26 | % generate region A: 27 | % first generate a uniformly random distributed data points from (-r-w/2, 0) 28 | % to (r+w/2, r+w/2) 29 | N1=10*N; % generate more points and select those meet criteria 30 | w2=w/2; 31 | done=0; data=[]; tmp1=[]; 32 | while ~done, 33 | tmp=[2*(r+w2)*(rand(N1,1)-0.5) (r+w2)*rand(N1,1)]; 34 | % 3rd column of tmp is the magnitude of each data point 35 | tmp(:,3)=sqrt(tmp(:,1).*tmp(:,1)+tmp(:,2).*tmp(:,2)); 36 | idx=find([tmp(:,3)>r-w2] & [tmp(:,3)= N, 39 | done=1; 40 | end 41 | % if not enough data point, generate more and test 42 | end 43 | % region A data and class label 0 44 | % region B data is region A data flip y coordinate - d, and x coordinate +r 45 | data=[tmp1(1:N,:) zeros(N,1); 46 | [tmp1(1:N,1)+r -tmp1(1:N,2)-d ones(N,1)]]; 47 | 48 | % plot(data(1:N,1),data(1:N,2),'.r',data(N+1:end,1),data(N+1:end,2),'.b'); 49 | % title(['Fig. 1.8 Double moon data set, d = ' num2str(d)]), 50 | % axis([-r-w2 2*r+w2 -r-w2-d r+w2]) 51 | 52 | %save dbmoon N r w d data; 53 | -------------------------------------------------------------------------------- /data/toyDataGenerators/gaussians.m: -------------------------------------------------------------------------------- 1 | function[X,Y] = gaussians(size) 2 | size1 = size; 3 | size2 = size; 4 | mean1 = [2,-1]; 5 | cov1 = [1,0.1; 0.1,1]; 6 | mean2 = [8,3]; 7 | cov2 = [1 .2; 0.2,1]; 8 | X = [mvnrnd(mean1, cov1, size1); mvnrnd(mean2, cov2, size1)]; 9 | Y = [ones(size1,1) ; -1*ones(size1,1)]; 10 | order = randperm(2*size1); 11 | X = X(order,:); 12 | Y = Y(order,:); 13 | end -------------------------------------------------------------------------------- /data/toyDataGenerators/halfkernel.m: -------------------------------------------------------------------------------- 1 | function data = halfkernel(N, minx, r1, r2, noise, ratio) 2 | 3 | if nargin < 1 4 | N = 500; 5 | end 6 | if mod(N,2) ~= 0 7 | N = N + 1; 8 | end 9 | if nargin < 2 10 | minx = -20; 11 | end 12 | if nargin < 3 13 | r1 = 20; 14 | end 15 | if nargin < 4 16 | r2 = 35; 17 | end 18 | if nargin < 5 19 | noise = 4; 20 | end 21 | if nargin < 6 22 | ratio = 0.6; 23 | end 24 | 25 | phi1 = rand(N/2,1) * pi; 26 | inner = [minx + r1 * sin(phi1) - .5 * noise + noise * rand(N/2,1) r1 * ratio * cos(phi1) - .5 * noise + noise * rand(N/2,1) ones(N/2,1)]; 27 | 28 | phi2 = rand(N/2,1) * pi; 29 | outer = [minx + r2 * sin(phi2) - .5 * noise + noise * rand(N/2,1) r2 * ratio * cos(phi2) - .5 * noise + noise * rand(N/2,1) zeros(N/2,1)]; 30 | 31 | data = [inner; outer]; 32 | end -------------------------------------------------------------------------------- /data/toyDataGenerators/outlier.m: -------------------------------------------------------------------------------- 1 | function data = outlier(N, r, dist, outliers, noise) 2 | 3 | if nargin < 1 4 | N = 300; 5 | end 6 | if nargin < 2 7 | r = 20; 8 | end 9 | if nargin < 3 10 | dist = 30; 11 | end 12 | if nargin < 4 13 | outliers = 0.04; 14 | end 15 | if nargin < 5 16 | noise = 5; 17 | end 18 | 19 | N1 = round(N * (.5-outliers)); 20 | N2 = N1; 21 | N3 = round(N * outliers); 22 | N4 = N-N1-N2-N3; 23 | 24 | phi1 = rand(N1,1) * pi; 25 | r1 = sqrt(rand(N1,1))*r; 26 | P1 = [-dist + r1.*sin(phi1) r1.*cos(phi1) zeros(N1,1)]; 27 | 28 | phi2 = rand(N2,1) * pi; 29 | r2 = sqrt(rand(N2,1))*r; 30 | P2 = [dist - r2.*sin(phi2) r2.*cos(phi2) 3*ones(N2,1)]; 31 | 32 | P3 = [rand(N3,1)*noise dist+rand(N3,1)*noise 2*ones(N3,1)]; 33 | 34 | P4 = [rand(N4,1)*noise -dist+rand(N4,1)*noise ones(N4,1)]; 35 | 36 | data = [P1; P2; P3; P4]; 37 | 38 | end -------------------------------------------------------------------------------- /data/toyDataGenerators/twospirals.m: -------------------------------------------------------------------------------- 1 | function data = twospirals(N, degrees, start, noise) 2 | % Generate "two spirals" dataset with N instances. 3 | % degrees controls the length of the spirals 4 | % start determines how far from the origin the spirals start, in degrees 5 | % noise displaces the instances from the spiral. 6 | % 0 is no noise, at 1 the spirals will start overlapping 7 | 8 | if nargin < 1 9 | N = 500; 10 | end 11 | if nargin < 2 12 | degrees = 570; 13 | end 14 | if nargin < 3 15 | start = 90; 16 | end 17 | if nargin < 5 18 | noise = 0.2; 19 | end 20 | 21 | deg2rad = (2*pi)/360; 22 | start = start * deg2rad; 23 | 24 | N1 = floor(N/2); 25 | N2 = N-N1; 26 | 27 | n = start + sqrt(rand(N1,1)) * degrees * deg2rad; 28 | d1 = [-cos(n).*n + rand(N1,1)*noise sin(n).*n+rand(N1,1)*noise zeros(N1,1)]; 29 | 30 | n = start + sqrt(rand(N1,1)) * degrees * deg2rad; 31 | d2 = [cos(n).*n+rand(N2,1)*noise -sin(n).*n+rand(N2,1)*noise ones(N2,1)]; 32 | 33 | data = [d1;d2]; 34 | end -------------------------------------------------------------------------------- /distance/gaussianDifference.m: -------------------------------------------------------------------------------- 1 | function [ dist ] = gaussianDifference( x,y ) 2 | 3 | dist = x - y; 4 | dist = sum(dist*dist'); 5 | dist = sqrt(dist); 6 | 7 | end 8 | 9 | -------------------------------------------------------------------------------- /distance/multDifference.m: -------------------------------------------------------------------------------- 1 | function [ dist ] = multDifference( x,y ) 2 | 3 | dist = 0; 4 | %penalty = 10; 5 | 6 | for i = 1:1:length(x) 7 | num = x(i); 8 | den = y(i); 9 | frac = num / den; 10 | %if y(i) == 0 11 | % if x(i) ~= 0 12 | %dist = inf; 13 | %break; 14 | % dist = dist + penalty; 15 | % continue; 16 | % else 17 | % continue; 18 | % end 19 | %end 20 | 21 | %if x(i) == 0 22 | % if y(i) ~= 0 23 | %dist = inf; 24 | %break; 25 | % dist = dist + penalty; 26 | % continue; 27 | % end 28 | %end 29 | 30 | dist = dist + x(i)*log(frac); 31 | 32 | end 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /experiment/Gaussian-Mixtures_iter=1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/experiment/Gaussian-Mixtures_iter=1.png -------------------------------------------------------------------------------- /experiment/Gaussian-Mixtures_iter=1.tif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/experiment/Gaussian-Mixtures_iter=1.tif -------------------------------------------------------------------------------- /experiment/calculateResults.m: -------------------------------------------------------------------------------- 1 | function [results] = calculateResults(dataAll, assignmentAll, titles, trueLabels) 2 | 3 | results{1,1} = 'Method'; 4 | results{1,2} = 'P'; 5 | results{1,3} = 'R'; 6 | results{1,4} = 'F'; 7 | results{1,5} = 'AdjRnd'; 8 | results{1,6} = 'NMI'; 9 | for i = 1:length(dataAll) 10 | disp([ 'i = ' num2str(i) ' - name = ' titles{i} ]) 11 | if( ~isempty( assignmentAll{i} ) ) 12 | [vnmi, p, r, f1, adjrnd] = evaluateAll( assignmentAll{i}, trueLabels); 13 | else 14 | vnmi = 0; p = 0; r = 0; f1 = 0; adjrnd = 0; 15 | end 16 | results{i+1,1} = titles{i}; 17 | results{i+1,2} = p; 18 | results{i+1,3} = r; 19 | results{i+1,4} = f1; 20 | results{i+1,5} = adjrnd; 21 | results{i+1,6} = vnmi; 22 | end 23 | end -------------------------------------------------------------------------------- /experiment/containsMethod.m: -------------------------------------------------------------------------------- 1 | function out = containsMethod(name, titles) 2 | DataSize = length(titles); 3 | for i=1:DataSize 4 | if strcmp( titles{i}, name ) 5 | out = true; 6 | return; 7 | end 8 | end 9 | out = false; 10 | end 11 | -------------------------------------------------------------------------------- /experiment/evaluateAll.m: -------------------------------------------------------------------------------- 1 | function [vnmi, p, r, f1, ajrnd] = evaluateAll( assignment, trueLabels ) 2 | min1 = min(min(assignment)); 3 | if min1 <= 0 4 | assignment = assignment + 1 - min1; 5 | end 6 | min1 = min(min(trueLabels)); 7 | if min1 <= 0 8 | trueLabels = trueLabels + 1 - min1; 9 | end 10 | [p, r] = getbcubed(trueLabels, assignment); 11 | %vnmi = nmi(trueLabels, assignment); 12 | cmatrix = cm(trueLabels,assignment); 13 | vnmi = mi(cmatrix); 14 | f1 = 2*p*r / (p+r); 15 | ajrnd = adjrand(assignment, trueLabels); 16 | end -------------------------------------------------------------------------------- /experiment/experiment_toy.m: -------------------------------------------------------------------------------- 1 | % this script runs clustering on toy examples 2 | clear all; close all; clc; 3 | pathAll(''); 4 | 5 | % This is the rate in which we sample side information; 6 | % more accurate, this is the size of the pairwise constraints 7 | % over the total number of constraints. 8 | rate = 0.01; 9 | 10 | % The confidence on quality of the constraints. When p = 1, the constraints 11 | % are high quality (no noise). As `p` gets closer to zero, it is more 12 | % likely that the constraints will be fliped (hence noisier). 13 | p = 1; 14 | 15 | %% Mixture of Gaussians data 16 | [X,Y] = gaussians(200); 17 | size1 = size(X,1); 18 | order = randperm(size1); 19 | X = X(order,:); 20 | Y = Y(order,:); 21 | k = 2; 22 | runClustering(X,Y,k, 'Gaussian-Mixtures', rate, p, true); 23 | 24 | %% two spirals data 25 | data = twospirals(); 26 | X = data(:,1:2); 27 | Y = data(:,3); 28 | size1 = size(X,1); 29 | order = randperm(size1); 30 | X = X(order,:); 31 | Y = Y(order,:); 32 | k = 2; 33 | runClustering(X,Y,k, 'Two-Spirals', rate, p, true); 34 | 35 | %% Cluster In Cluster dataset 36 | data = clusterincluster(); 37 | X = data(:,1:2); 38 | Y = data(:,3); 39 | size1 = size(X,1); 40 | order = randperm(size1); 41 | X = X(order,:); 42 | Y = Y(order,:); 43 | k = 2; 44 | runClustering(X,Y,k, 'Cluster-In-Cluster', rate, p, true); 45 | 46 | %% Corners dataset 47 | k = 4; 48 | %for k = 1:10 49 | data = corners(); 50 | X = data(:,1:2); 51 | Y = data(:,3); 52 | size1 = size(X,1); 53 | order = randperm(size1); 54 | X = X(order,:); 55 | Y = Y(order,:); 56 | runClustering(X,Y,k, 'Corners', rate, p, true); 57 | 58 | %% Half-kernels dataset 59 | data = halfkernel(); 60 | X = data(:,1:2); 61 | Y = data(:,3); 62 | size1 = size(X,1); 63 | order = randperm(size1); 64 | X = X(order,:); 65 | Y = Y(order,:); 66 | k = 2; 67 | runClustering(X,Y,k, 'Half-Kernel', rate, p, true); 68 | 69 | %% Full-moon dataset 70 | data = crescentfullmoon(); 71 | X = data(:,1:2); 72 | Y = data(:,3); 73 | size1 = size(X,1); 74 | order = randperm(size1); 75 | X = X(order,:); 76 | Y = Y(order,:); 77 | k = 2; 78 | runClustering(X,Y,k, 'crescentfullmoon', rate, p, true); 79 | 80 | %% Outlier 81 | k = 4; 82 | data = outlier(); 83 | X = data(:,1:2); 84 | Y = data(:,3); 85 | size1 = size(X,1); 86 | order = randperm(size1); 87 | X = X(order,:); 88 | Y = Y(order,:); 89 | runClustering(X,Y,k, 'outlier', rate, p, true); 90 | 91 | -------------------------------------------------------------------------------- /experiment/experiment_uci.m: -------------------------------------------------------------------------------- 1 | % this script runs clustering on UCI datasets 2 | clear all; close all; clc; 3 | pathAll(''); 4 | % This is the rate in which we sample side information; 5 | % more accurate, this is the size of the pairwise constraints 6 | % over the total number of constraints. 7 | rate = 0.03; 8 | 9 | % The confidence on quality of the constraints. When p = 1, the constraints 10 | % are high quality (no noise). As `p` gets closer to zero, it is more 11 | % likely that the constraints will be fliped (hence noisier). 12 | p = 1; 13 | 14 | 15 | % rand of values for k 16 | k_variance=1 17 | 18 | % number of trials 19 | iii=3 20 | 21 | %% iris 22 | [X,Y] = readUCIData('iris'); 23 | size1 = size(X,1); 24 | order = randperm(size1); 25 | X = X(order,:); 26 | Y = Y(order,:); 27 | k = 3; 28 | experimentName = ['Variable_k_iris_p=' num2str(p) '_rate=' num2str(rate) '_i=' num2str(iii) '_kvariance_' num2str(k_variance) ]; 29 | runClustering(X,Y,k, experimentName, rate, p, false); 30 | 31 | %% wine 32 | [X,Y] = readUCIData('wine'); 33 | size1 = size(X,1); 34 | order = randperm(size1); 35 | X = X(order,:); 36 | Y = Y(order,:); 37 | k = 3; 38 | experimentName = ['Variable_k_wine_p=' num2str(p) '_rate=' num2str(rate) '_i=' num2str(iii) '_kvariance_' num2str(k_variance)]; 39 | runClustering(X,Y,k, experimentName, rate, p, false); 40 | 41 | %% ecoli 42 | [X,Y] = readUCIData('ecoli'); 43 | size1 = size(X,1); 44 | order = randperm(size1); 45 | X = X(order,:); 46 | Y = Y(order,:); 47 | k = 8; 48 | experimentName = ['Variable_k_eclli_p=' num2str(p) '_rate=' num2str(rate) '_i=' num2str(iii) '_kvariance_' num2str(k_variance)]; 49 | runClustering(X,Y,k, experimentName, rate, p, false); 50 | 51 | %% glass 52 | [X,Y] = readUCIData('glass'); 53 | size1 = size(X,1); 54 | order = randperm(size1); 55 | X = X(order,:); 56 | Y = Y(order,:); 57 | k = 7; 58 | experimentName = ['Variable_k_glass_p=' num2str(p) '_rate=' num2str(rate) '_i=' num2str(iii) '_kvariance_' num2str(k_variance)]; 59 | runClustering(X,Y,k, experimentName, rate, p, false); 60 | 61 | %% balance 62 | [X,Y] = readUCIData('balance'); 63 | size1 = size(X,1); 64 | order = randperm(size1); 65 | X = X(order,:); 66 | Y = Y(order,:); 67 | k = 3; 68 | experimentName = ['Variable_k_balance_p=' num2str(p) '_rate=' num2str(rate) '_i=' num2str(iii) '_kvariance_' num2str(k_variance)]; 69 | runClustering(X,Y,k, experimentName, rate, p, false); 70 | -------------------------------------------------------------------------------- /experiment/pathAll.m: -------------------------------------------------------------------------------- 1 | function pathAll(initial) 2 | addpath([initial '../data/toyDataGenerators']) 3 | addpath([initial '../distance']) 4 | addpath([initial '../metrics']) 5 | addpath([initial '../metrics/nmi']) 6 | addpath([initial '../algorithms/kmeans']) 7 | addpath([initial '../algorithms/dpmeans']) 8 | addpath([initial '../algorithms/tvclust']) 9 | addpath([initial '../algorithms/constrained_kmeans']) 10 | addpath([initial '../algorithms/rdpmeans']) 11 | addpath([initial '../algorithms/cosc_v1_1']) 12 | addpath([initial '../algorithms/cvpr09_ccsr_v1.0/csdp6.1.0winp4/csdp6.1.0winp4/matlab']) 13 | addpath([initial '../algorithms/thiago_lcvqe']) 14 | addpath([initial '../algorithms/MPCKMeans']) 15 | addpath([initial '../algorithms/cecm/cecm']) 16 | addpath([initial '../algorithms/cecm/cecm/solqp']) 17 | end 18 | -------------------------------------------------------------------------------- /experiment/plotExperiments.m: -------------------------------------------------------------------------------- 1 | function [h] = plotExperiments(dataAll, assignmentsAll, titles, height, width) 2 | h = figure; 3 | hold on; 4 | colormap([1 0 .5; % magenta 5 | 0 0 .8; % blue 6 | 0 .6 0; % dark green 7 | .3 1 0]); % bright green 8 | dotsize = 12; 9 | DataSize = length(dataAll); 10 | for i=1:DataSize 11 | subplot(height,width,i); 12 | data = dataAll{i}; 13 | assignments = assignmentsAll{i}; 14 | scatter(data(:,1), data(:,2), dotsize, assignments); axis equal; 15 | t = titles{i}; 16 | title(t); 17 | end 18 | hold off; 19 | end -------------------------------------------------------------------------------- /metrics/adjrand.m: -------------------------------------------------------------------------------- 1 | function adjrand=adjrand(u,v) 2 | 3 | %function adjrand=adjrand(u,v) 4 | % 5 | % Computes the adjusted Rand index to assess the quality of a clustering. 6 | % Perfectly random clustering returns the minimum score of 0, perfect 7 | % clustering returns the maximum score of 1. 8 | % 9 | %INPUTS 10 | % u = the labeling as predicted by a clustering algorithm 11 | % v = the true labeling 12 | % 13 | %OUTPUTS 14 | % adjrand = the adjusted Rand index 15 | % 16 | % 17 | %Author: Tijl De Bie, february 2003. 18 | 19 | 20 | 21 | n=length(u); 22 | ku=max(u); 23 | kv=max(v); 24 | m=zeros(ku,kv); 25 | for i=1:n 26 | m(u(i),v(i))=m(u(i),v(i))+1; 27 | end 28 | mu=sum(m,2); 29 | mv=sum(m,1); 30 | 31 | a=0; 32 | for i=1:ku 33 | for j=1:kv 34 | if m(i,j)>1 35 | a=a+nchoosek(m(i,j),2); 36 | end 37 | end 38 | end 39 | 40 | b1=0; 41 | b2=0; 42 | for i=1:ku 43 | if mu(i)>1 44 | b1=b1+nchoosek(mu(i),2); 45 | end 46 | end 47 | for i=1:kv 48 | if mv(i)>1 49 | b2=b2+nchoosek(mv(i),2); 50 | end 51 | end 52 | 53 | c=nchoosek(n,2); 54 | 55 | adjrand=(a-b1*b2/c)/(0.5*(b1+b2)-b1*b2/c); 56 | -------------------------------------------------------------------------------- /metrics/getbcubed.m: -------------------------------------------------------------------------------- 1 | function [precision , recall ] = getbcubed(assignmentGold, assignmentPred) 2 | 3 | numPred = 0; 4 | numGold = 0; 5 | numBoth = 0; 6 | for i = 1:length(assignmentGold) 7 | 8 | % assignmentGold 9 | % length(assignmentGold) 10 | for j = 1:length(assignmentGold) 11 | if( i == j ) 12 | continue; 13 | end 14 | if( assignmentGold(i) == assignmentGold(j) && assignmentPred(i) == assignmentPred(j) ) 15 | numBoth = numBoth + 1; 16 | end 17 | if( assignmentGold(i) == assignmentGold(j) ) 18 | numGold = numGold + 1; 19 | end 20 | if( assignmentPred(i) == assignmentPred(j) ) 21 | numPred = numPred + 1; 22 | end 23 | end 24 | end 25 | 26 | % numBoth 27 | % numGold 28 | % numPred 29 | 30 | precision = numBoth / numGold; 31 | recall = numBoth / numPred; 32 | 33 | -------------------------------------------------------------------------------- /metrics/nmi/Contents.m: -------------------------------------------------------------------------------- 1 | % TEXTCLUST toolbox 2 | % A model-based document clustering toolbox. 3 | % 4 | % Author: Shi Zhong, ECE Dept., The University of Texas at Austin 5 | % 6 | % Version 1.0, June 2003 7 | % 8 | % 9 | % TEXTCLUST is a free software and comes with ABSOLUTELY NO WARRANTY. 10 | % You are free to use and redistribute it except for commercial purpose. 11 | % 12 | % 13 | % Algorithms implemented: 14 | % k-means, stochastic k-means, EM, balanced k-means, and 15 | % deterministic annealing 16 | % 17 | % Models implemented: 18 | % Bernoulli, multinomial, and von Mises-Fisher (simplified) 19 | % 20 | % Bernoulli model-based clustering 21 | % kberns - Bernoulli-based k-means 22 | % skberns - Bernoulli-based stochastic k-means 23 | % mixberns - Bernoulli-based EM 24 | % bkberns - balanced Bernoulli-based k-means 25 | % daberns - Bernoulli-based deterministic annealing 26 | % 27 | % Multinomial model-based clustering 28 | % kmns - multinomial-based k-means 29 | % skmns - multinomial-based stochastic k-means 30 | % mixmns - multinomial-based EM 31 | % bkmns - balanced multinomial-based k-means 32 | % damns - multinomial-based deterministic annealing 33 | % 34 | % von Mises-Fisher model-based clustering 35 | % kvmfs - vMF-based k-means 36 | % skvmfs - vMF-based stochastic k-means 37 | % mixvmfs - vMF-based soft clustering 38 | % bkvmfs - balanced vMF-based k-means 39 | % davmfs - vMF-based deterministic annealing 40 | % 41 | % Utility functions 42 | % cm - computes confusion matrix 43 | % mi - computes normalized mutual information 44 | % logidf - transforms dtm matrix using log(IDF) weighting 45 | % unitnorm - normalizes each row or column of a matrix into unit length 46 | % entro - calculates entropy (base-2) of a non-negative vector 47 | % entroa - average entropy of a matrix (average over rows) 48 | % puritya - average purity of a confusion matrix (average over rows) 49 | % bpart - balanced partition of a log-likelihood matrix 50 | % 51 | % Demo, example uses 52 | % test - example test code (needs enclosed data tr11.mat) 53 | % compare - example comparison code (needs enclosed data tr11.mat) 54 | 55 | -------------------------------------------------------------------------------- /metrics/nmi/README: -------------------------------------------------------------------------------- 1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%% TEXTCLUST toolbox %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % % 3 | % A Model-based Document Clustering Toolbox % 4 | % % 5 | % Author: Shi Zhong, ECE Dept., The University of Texas at Austin % 6 | % % 7 | % Version 1.0, June 2003 % 8 | % % 9 | % % 10 | % TEXTCLUST is a free software and comes with ABSOLUTELY NO WARRANTY. % 11 | % You are free to use and redistribute it except for commercial purpose. % 12 | % % 13 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 14 | 15 | INSTALLATION 16 | Download textclust.tar.gz and uncompress it into the desired directory. 17 | It will create a new directory textclust/ under your desired directory. 18 | Add the path to Matlab using: 'addpath desireddir/textclust -end' in 19 | Matlab command line. 20 | 21 | USAGE 22 | Use 'help textclust' in Matlab to view available algorithms and use 23 | 'help kberns', 'help kmns', etc. to view details of individual algorithms. 24 | Please view the references for further detials. 25 | 26 | REFERENCES 27 | A. Banerjee and J. Ghosh, "Frequency sensitive competitive learning 28 | for clustering on high-dimensional hyperspheres," Proc. IEEE Int. Joint 29 | Conf. Neural Networks, 2002 30 | 31 | A. Banerjee, I. Dhillon, J. Ghosh, and S. Sra, "Clustering on 32 | hyperspheres using expectation maximization," Tech. Report TR-03-07, 33 | Department of Computer Science, The University of Texas at Austin, 2003. 34 | 35 | G. Karypis, "CLUTO - a clustering toolkit," Department of Computer 36 | Science, University of Minnesota, Twins Cities. 37 | 38 | S. Zhong and J. Ghosh, "Scalable, balanced model-based clustering," 39 | Proc. SIAM Int. Conf. Data Mining, May 2003 40 | 41 | S. Zhong and J. Ghosh, "A comparative study of generative models for 42 | document clustering," SDM Workshop on Clustering High Dimensional 43 | Data and Its Applications, May 2003 44 | 45 | S. Zhong, "Probabilistic model-based clustering of complex data," 46 | Ph.D. Thesis, The University of Texas at Austin, August, 2003 47 | 48 | -------------------------------------------------------------------------------- /metrics/nmi/bpart.m: -------------------------------------------------------------------------------- 1 | function partition = bpart(ll,m) 2 | 3 | % partition = bpart(ll,m) 4 | % 5 | % Balanced partitioning of a log-likelihood matrix (ll, size n by kc). 6 | % The second argument (m) is optional and, if given, means that 7 | % at least m samples must be in each cluster. m=0 corresponds to 8 | % a completely balanced partition. 9 | 10 | if nargin < 2, m = 0; end 11 | 12 | [n,k] = size(ll); 13 | 14 | % balanced re-partition 15 | pn = 1:n; 16 | partition = zeros(n,1); 17 | noti = 1:k; 18 | 19 | if m > 0 20 | % guarantee each cluster to have at least m samples 21 | for i = 1 : k 22 | noti = setdiff(noti,i); 23 | if i < k 24 | dll = ll(pn,noti) - repmat(ll(pn,i),[1,length(noti)]); 25 | else 26 | dll = - ll(pn,i); 27 | end 28 | [oll, order] = sort(max(dll,[],2)); 29 | tmp2 = pn(order(1:m)); 30 | partition(tmp2) = i; 31 | pn = setdiff(pn, tmp2); 32 | end 33 | % use ML assignment for rest samples 34 | [oll, order] = max(ll(pn,:),[],2); 35 | partition(pn) = order; 36 | else 37 | for i = 1 : k 38 | %disp(sprintf('i = %d',i)); 39 | nci = round(i*n/k) - round((i-1)*n/k); 40 | noti = setdiff(noti,i); 41 | if i < k 42 | dll = ll(pn,noti) - repmat(ll(pn,i),[1,length(noti)]); 43 | [oll, order] = sort(max(dll,[],2)); 44 | tmp2 = pn(order(1:nci)); 45 | else 46 | tmp2 = pn; 47 | end 48 | partition(tmp2) = i; 49 | pn = setdiff(pn, tmp2); 50 | end 51 | end 52 | 53 | return; 54 | 55 | -------------------------------------------------------------------------------- /metrics/nmi/cm.m: -------------------------------------------------------------------------------- 1 | function out = cm(class, cluster) 2 | 3 | % function cmat = cm(class, cluster) 4 | % Compute the confusion matrix (cmat) given class labels (class) 5 | % and cluster IDs (cluster) 6 | 7 | if size(class,1)==1 8 | cla = class'; 9 | else 10 | cla = class; 11 | end 12 | n = length(class); 13 | a = spconvert([(1:n)' cla ones(n,1)]); 14 | 15 | if min(size(cluster)) == 1 16 | if size(cluster,1)==1 17 | clu = cluster'; 18 | else 19 | clu = cluster; 20 | end 21 | b = spconvert([(1:n)' clu ones(n,1)]); 22 | b = b'; 23 | else 24 | if size(cluster,1) == n 25 | b = cluster'; 26 | else 27 | b = cluster; 28 | end 29 | end 30 | 31 | out = full(b * a); 32 | 33 | return; 34 | -------------------------------------------------------------------------------- /metrics/nmi/compare.m: -------------------------------------------------------------------------------- 1 | load tr11 2 | kc = max(classid); 3 | dat = unitnorm(logidf(dtm),2)'; 4 | 5 | for i = 1 : 10 6 | disp(sprintf('i = %d ...',i)); 7 | 8 | tic; [pw,ci,ll,la] = kberns(dtm,kc); 9 | t(i,1) = toc; 10 | m(i,1) = mi(cm(classid,ci)); 11 | 12 | tic; [pw,ci,ll,la] = skberns(dtm,kc); 13 | t(i,2) = toc; 14 | m(i,2) = mi(cm(classid,ci)); 15 | 16 | tic; [mob,ci,ll,la] = mixberns(dtm,kc); 17 | t(i,3) = toc; 18 | m(i,3) = mi(cm(classid,ci)); 19 | 20 | tic; [mob,ci,ll,la,en] = daberns(dtm,kc); 21 | t(i,4) = toc; 22 | m(i,4) = mi(cm(classid,ci)); 23 | 24 | tic; [pw,ci,ll,la] = kmns(dtm,kc); 25 | t(i,5) = toc; 26 | m(i,5) = mi(cm(classid,ci)); 27 | 28 | tic; [pw,ci,ll,la] = skmns(dtm,kc); 29 | t(i,6) = toc; 30 | m(i,6) = mi(cm(classid,ci)); 31 | 32 | tic; [pw,ci,ll,la] = mixmns(dtm,kc); 33 | t(i,7) = toc; 34 | m(i,7) = mi(cm(classid,ci)); 35 | 36 | tic; [mom,ci,ll,la,en] = damns(dtm,kc); 37 | t(i,8) = toc; 38 | m(i,8) = mi(cm(classid,ci)); 39 | 40 | tic; [pw,ci,ll,la] = kvmfs(dat,kc); 41 | t(i,9) = toc; 42 | m(i,9) = mi(cm(classid,ci)); 43 | 44 | tic; [pw,ci,ll,la] = skvmfs(dat,kc); 45 | t(i,10) = toc; 46 | m(i,10) = mi(cm(classid,ci)); 47 | 48 | tic; [pw,ci,ll,la] = mixvmfs(dat,kc); 49 | t(i,11) = toc; 50 | m(i,11) = mi(cm(classid,ci)); 51 | 52 | tic; [mov,ci,ll,la,en] = davmfs(dat,kc); 53 | t(i,12) = toc; 54 | m(i,12) = mi(cm(classid,ci)); 55 | 56 | end 57 | 58 | -------------------------------------------------------------------------------- /metrics/nmi/entro.m: -------------------------------------------------------------------------------- 1 | function e = entro(x) 2 | 3 | % e = entro(x) 4 | % 5 | % calculate the entropy of a non-negative vector x, which will be 6 | % normalized first so that sum(x) = 1 7 | 8 | if min(size(x)) ~= 1 9 | error('input x must be a vector'); 10 | end 11 | if ~isempty(find(x < 0)) 12 | error('input x must be non-negative'); 13 | end 14 | 15 | p = x / sum(x); 16 | p(p==0) = 1; 17 | lp = log2(p); 18 | 19 | if size(p,1) == 1 20 | e = - p * lp'; 21 | else 22 | e = - lp' * p; 23 | end 24 | 25 | return; 26 | 27 | -------------------------------------------------------------------------------- /metrics/nmi/entroa.m: -------------------------------------------------------------------------------- 1 | function e = entroa(cmat) 2 | 3 | % function e = entroa(cmat) 4 | % 5 | % Calculates the average entropy (e) of a clustering, given by the 6 | % confusion matrix (cmat). Each column in the matrix corresponds 7 | % a class and each row a cluster. 8 | 9 | kc = size(cmat,1); 10 | e0 = zeros(kc,1); 11 | nc = sum(cmat,2); 12 | I = find(nc>0); 13 | for i = 1 : length(I) 14 | e0(I(i)) = entro(cmat(I(i),:)); 15 | end 16 | 17 | e = mean(e0(I))/log2(size(cmat,2)); 18 | 19 | return; 20 | -------------------------------------------------------------------------------- /metrics/nmi/kkzinit.m: -------------------------------------------------------------------------------- 1 | function mu = kkzinit(dat, kc) 2 | 3 | % mu = kkz(dat, kc) 4 | % initialize according to Katsavounidis, Kuo, and Zhang (1994) 5 | 6 | mu(:,1) = unitnorm(sum(dat,2),2); 7 | [y, i] = min(mu'*dat); % take the first centroid to be the doc 8 | % most distant to global mean 9 | %[y, i] = max(sum(dat>0,1)); 10 | mu(:,1) = dat(:,i); 11 | 12 | [d, n] = size(dat); 13 | dist = zeros(1,n); 14 | ind = zeros(1,n); 15 | 16 | k = 1; 17 | while k < kc 18 | dd = mu(:,k)' * dat; 19 | [dist, I] = max([dd; dist],[],1); 20 | ind(I==1) = k; 21 | [y, i] = min(dist); 22 | k = k + 1; 23 | mu(:,k) = dat(:,i); 24 | end 25 | 26 | return; 27 | -------------------------------------------------------------------------------- /metrics/nmi/logidf.m: -------------------------------------------------------------------------------- 1 | function dat = logidf(dtm) 2 | 3 | % DAT = LOGIDF(DTM) 4 | % 5 | % To weight each word using log of its inverse document frequency (IDF) 6 | % this is usually used for vMF models. DTM is the input document-term 7 | % matrix and DAT is the transformed data matrix. Both have a size of n by d. 8 | 9 | n = size(dtm,1); 10 | idf = sum(dtm>0,1); 11 | I = find(idf>0); 12 | idf(I) = n ./ idf(I); 13 | idf(I) = sparse(log(idf(I))); 14 | sidf = diag(idf); 15 | dat = dtm * sidf; 16 | 17 | return; 18 | 19 | -------------------------------------------------------------------------------- /metrics/nmi/mi.m: -------------------------------------------------------------------------------- 1 | function score = mi(cmat) 2 | 3 | % function score = mi(cmat) 4 | % compute normalized mutual information (NMI) from the confusion matrix 5 | % 'cmat' and return the value in 'score' 6 | 7 | nh = sum(cmat,1); 8 | nl = sum(cmat,2); 9 | [s1,s2] = size(cmat); 10 | nlh = repmat(nl,[1,s2]) .* repmat(nh,[s1,1]); %nlh = nl * nh; 11 | ind = find(nlh > 0); 12 | 13 | nlh(ind) = cmat(ind) ./ nlh(ind); 14 | n = sum(nl); 15 | nlh = nlh * n; 16 | 17 | ind = find(nlh>0); 18 | nlh(ind) = log(nlh(ind)); 19 | nlh = cmat .* nlh; 20 | score = sum(nlh(:)); 21 | 22 | ind = find(nh>0); 23 | nh(ind) = nh(ind) .* log(nh(ind)/n); 24 | ind = find(nl>0); 25 | nl(ind) = nl(ind) .* log(nl(ind)/n); 26 | tmp = sqrt(sum(nh)*sum(nl)); 27 | if tmp == 0 28 | score = 0; 29 | else 30 | score = score / tmp; 31 | end 32 | 33 | return; 34 | -------------------------------------------------------------------------------- /metrics/nmi/perturbinit.m: -------------------------------------------------------------------------------- 1 | function mu = perturbinit(dat, kc) 2 | 3 | d = size(dat,1); 4 | mu = repmat(sum(dat,2),[1,kc]); 5 | mu = mu .* (1+(rand(d,kc)-0.5)/10); 6 | mu = unitnorm(mu,1); 7 | 8 | return; 9 | 10 | -------------------------------------------------------------------------------- /metrics/nmi/puritya.m: -------------------------------------------------------------------------------- 1 | function p = puritya(cmat) 2 | 3 | % function p = puritya(cmat) 4 | % 5 | % Calculates the average purity (p) of a clustering, given by 6 | % the confusion matrix (cmat). In the confusion matrix, we restrict 7 | % that each column is a class and each row a cluster. 8 | 9 | nc = sum(cmat,2); 10 | mc = max(cmat,[],2); 11 | I = find(nc>0); 12 | mc(I) = mc(I) ./ nc(I); 13 | 14 | p = mean(mc(I)); 15 | 16 | return; 17 | -------------------------------------------------------------------------------- /metrics/nmi/randinit.m: -------------------------------------------------------------------------------- 1 | function mu = randinit(dat, kc) 2 | 3 | n = size(dat,2); 4 | r = mod(randperm(n),kc)+1; 5 | rp = spconvert([(1:n)' r' ones(n,1)]); 6 | mu = dat * rp; 7 | mu = unitnorm(mu,1); 8 | 9 | return; 10 | 11 | -------------------------------------------------------------------------------- /metrics/nmi/tr11.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/danyaljj/constrained_clustering/679298bc0e6d3d2a80954b1b7d2d2c867ba1e363/metrics/nmi/tr11.mat -------------------------------------------------------------------------------- /metrics/nmi/unitnorm.m: -------------------------------------------------------------------------------- 1 | function dat = unitnorm(dat,dim) 2 | 3 | % dat = unitnorm(dat, dim) 4 | % normalize dat along given dimension (default dim=1, i.e. each 5 | % column gets normalized to unit length in L2-norm) 6 | 7 | if nargin < 2, dim = 1; end 8 | 9 | nd = sum(dat.^2, dim); 10 | I = find(nd>0); 11 | nd(I) = 1 ./ sqrt(nd(I)); 12 | nd = diag(sparse(nd)); 13 | if dim == 2 14 | dat = nd * dat; 15 | else 16 | dat = dat * nd; 17 | end 18 | 19 | return; 20 | 21 | --------------------------------------------------------------------------------