├── README.md
├── Noise_Remover.m
├── sample.m
├── SMOTE.m
├── ClusterSMOTE.m
├── license.txt
├── ASUWO_Main.m
├── Orig_agg_cluster.m
├── BorSMOTE.m
├── Safe_Level_SMOTE.m
├── padadd.m
├── Num_OV_Finder.m
├── ASUWO.m
├── Mod_AggCluster.m
└── nearestneighbour.m


/README.md:
--------------------------------------------------------------------------------
1 | # MATLAB-Source-Code-Oversampling-Methods
2 | This repository contains the source code for four oversampling methods to address imbalanced binary data classification that I wrote in MATLAB: 1) SMOTE 2) Borderline SMOTE 3) Safe Level SMOTE 4) ASUWO (Adaptive Semi-Unsupervised Weighted Oversampling)
3 | 
4 | For more details, please check out my paper at:
5 | http://www.sciencedirect.com/science/article/pii/S0957417415007356
6 | 


--------------------------------------------------------------------------------
/Noise_Remover.m:
--------------------------------------------------------------------------------
 1 | function [ClearData, ClearLabel] = Noise_Remover(WholeDataInst, WholeDataLable, KNN)  
 2 | 
 3 | Ins_neighbors = knnsearch(WholeDataInst, WholeDataInst, 'k', KNN);
 4 | Safe_Level = zeros(1,size(WholeDataInst,1));
 5 | 
 6 | for i = 1:size(WholeDataInst,1)
 7 |     for j = 2:KNN
 8 |         if(WholeDataLable(Ins_neighbors(i,j),1) == WholeDataLable(i,1))
 9 |             Safe_Level(1,i) = Safe_Level(1,i) + 1;
10 |         end
11 |     end
12 | end
13 | 
14 | ToRemove = find(Safe_Level == 0);
15 | ClearData = WholeDataInst;
16 | ClearData(ToRemove,:) = [];
17 | ClearLabel = WholeDataLable;
18 | ClearLabel(ToRemove,:) = [];
19 | 
20 | end


--------------------------------------------------------------------------------
/sample.m:
--------------------------------------------------------------------------------
 1 | function d=sample(I,P,N)
 2 | % This code is from the authors of the paper MWMOTE. The paper can be found
 3 | % in the following link:
 4 | % http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=6361394&tag=1
 5 | 
 6 | 
 7 | %samples N data from the input vectors according to probability
 8 | %distribution P;
 9 | 
10 | [m,n]=size(P);
11 | C=zeros(m,1);
12 | prev=0;
13 | for i=1:m
14 |     C(i)=P(i)+prev;
15 |     prev=C(i);
16 | end
17 | d=[];
18 | 
19 | for i=1:N
20 |     rn=rand(1);
21 |  
22 |     for j=1:m
23 |         if(rn<=C(j))
24 |             d=[d;I(j,:)];
25 |             break;
26 |         end;
27 |     end
28 | end
29 | 
30 | 


--------------------------------------------------------------------------------
/SMOTE.m:
--------------------------------------------------------------------------------
 1 | function [final_features ,final_mark] = SMOTE(original_features, original_mark)
 2 | 
 3 | ind = find(original_mark == -1);
 4 | P = original_features(ind,:);
 5 | KNN = 5;
 6 | final_features = original_features;
 7 | Limit = size(original_features,2);
 8 | 
 9 | Num_Ov = ceil(max(size(find(original_mark == -1),1) - size(find(original_mark == 1),1),size(find(original_mark == 1),1) - size(find(original_mark == -1),1)));
10 | j2 = 1;
11 | while j2 <= Num_Ov
12 |     %find nearest K samples from S2(i,:)
13 |     S2= datasample(P,1);
14 |     Condidates = nearestneighbour(S2', P', 'NumberOfNeighbours', min(KNN,Limit));
15 |     Condidates(:,1) = [] ;
16 |     rn=ceil(rand(1)*(size(Condidates,2)));
17 |     Sel_index = Condidates(:,rn);
18 |     g = P(Sel_index,:);
19 |     alpha = rand(1);
20 |     snew = S2(1,:) + alpha.*(g-S2(1,:));
21 |     final_features = [final_features;snew];
22 |     j2=j2+1;
23 | end
24 | 
25 | mark = -1 * ones(Num_Ov,1);
26 | final_mark = [original_mark; mark];
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/ClusterSMOTE.m:
--------------------------------------------------------------------------------
 1 | function [final_features ,final_mark] = ClusterSMOTE(original_features, original_mark, Ncluster)
 2 | 
 3 | ind = find(original_mark == -1);
 4 | Min_instances = original_features(ind,:);
 5 | min_clusters = kmeans(Min_instances,Ncluster);
 6 | 
 7 | KNN = 6;
 8 | final_features = original_features;
 9 | 
10 | Num_Ov = ceil(max(size(find(original_mark == -1),1) - size(find(original_mark == 1),1),size(find(original_mark == 1),1) - size(find(original_mark == -1),1)));
11 | j2 = 1;
12 | 
13 | 
14 | while j2 <= Num_Ov
15 |     %find nearest K samples from S2(i,:)
16 |     [S2 idx]= datasample(Min_instances,1);
17 |     Min_Cluster = find(min_clusters == min_clusters(idx));
18 |     Min_cand = Min_instances(Min_Cluster,:);
19 |     Limit = size(Min_cand,1);
20 |     Condidates = nearestneighbour(S2', Min_cand', 'NumberOfNeighbours', min(KNN,Limit));
21 |     Condidates(:,1) = [] ;
22 |     if size(Condidates,2)>= 1
23 |         rn=ceil(rand(1)*(size(Condidates,2)));
24 |         Sel_index = Condidates(:,rn);
25 |         g = Min_instances(Sel_index,:);
26 |         alpha = rand(1);
27 |         snew = S2(1,:) + alpha.*(g-S2(1,:));
28 |         final_features = [final_features;snew];
29 |         j2=j2+1;
30 |     end
31 | end
32 | 
33 | mark = -1 * ones(Num_Ov,1);
34 | final_mark = [original_mark; mark];


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2015, Iman Nekooeimehr
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without 
 5 | modification, are permitted provided that the following conditions are 
 6 | met:
 7 | 
 8 |     * Redistributions of source code must retain the above copyright 
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above copyright 
11 |       notice, this list of conditions and the following disclaimer in 
12 |       the documentation and/or other materials provided with the distribution
13 |       
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
17 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
18 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
19 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
20 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
21 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
22 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
23 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
24 | POSSIBILITY OF SUCH DAMAGE.
25 | 


--------------------------------------------------------------------------------
/ASUWO_Main.m:
--------------------------------------------------------------------------------
 1 | clear
 2 | clc
 3 | close all
 4 | 
 5 | % Loading the example dataset
 6 | load fisheriris
 7 | X = meas;
 8 | %Y = [ones(100,1); -1 * ones(50,1)];
 9 | Y = [ones(50,1); -1 * ones(50,1);ones(50,1)];
10 | 
11 | [N D] = size(X);
12 | % Standardize the feature sapce
13 | for i = 1:D
14 |     X_scaled(:,i) = 2*((X(:,i) - min(X(:,i))) / ( max(X(:,i)) - min(X(:,i)) ))-1;
15 | end
16 | X_scaled = X_scaled + normrnd(0,0.01,size(X_scaled));
17 | 
18 | NumberFolds = 3;
19 | NumIteration = 2;
20 | 
21 | SR_RG = 1;
22 | stepSize = 1;
23 | 
24 | division = round(N/NumberFolds);
25 | 
26 | %% Buiding the models
27 | for ite = 1:NumIteration
28 |     C = cvpartition(Y,'k',NumberFolds);
29 |     for num = 1:NumberFolds;
30 |         trainData = X_scaled(training(C,num),:);
31 |         trainLabel = Y(training(C,num),:);
32 |         testData = X_scaled(test(C,num),:);
33 |         testLabel = Y(test(C,num),:);
34 |         %% Oversampling using SMOTE
35 |         display ('SMOTE:')
36 |         [trainDataSMOTE, trainLabelSMOTE] = SMOTE(trainData,trainLabel);
37 |         %% Oversampling using Borderline SMOTE
38 |         display ('Borderline SMOTE:')
39 |         NNC = 5;
40 |         [borderMin_BorSMOTE, trainDatanewBorSMOTE, trainLabelnewBorSMOTE] = BorSMOTE(trainData,trainLabel,NNC);
41 |         %% Oversampling using Safe-level SMOTE
42 |         display ('Safe-level SMOTE:')
43 |         NNC = 5;
44 |         [trainDatanewSafeSMOTE, trainLabelnewSafeSMOTE] = Safe_Level_SMOTE(trainData,trainLabel,NNC);
45 |         %% Oversampling using ASUWO
46 |         display ('ASUWO:')
47 |         CThresh = 1;
48 |         K = 3;
49 |         NN = 5;
50 |         NS = 5;
51 |         [trainDatanewASUWO, trainLabelnewASUWO] = ASUWO(trainData,trainLabel, CThresh , K, NN, NS);
52 |     end
53 |     perm = [];
54 | end


--------------------------------------------------------------------------------
/Orig_agg_cluster.m:
--------------------------------------------------------------------------------
 1 | function labels = Orig_agg_cluster(data, CThresh)
 2 | 
 3 | N = size(data,2);
 4 | 
 5 | % Clusters is a cell array of vectors.  Each vector contains the
 6 | % indicies of the points belonging to that cluster.
 7 | % Initially, each point is in it's own cluster.
 8 | clusters = cell(N,1);
 9 | for cc = 1:length(clusters)
10 |     clusters{cc} = [cc];
11 | end
12 | 
13 | % the distance between each pair of points
14 | % point_dist = point_distance(data);
15 | D = pdist(data,'euclidean');
16 | point_dist = squareform(D);
17 | point_dist2 = point_dist; 
18 | for i=1:N
19 |     point_dist2(i,i) = 100;
20 | end
21 | thresh = mean(median(point_dist2)).* CThresh;
22 | 
23 | Z = linkage(D,'complete');
24 | labels = cluster(Z,'cutoff',thresh, 'criterion', 'distance');
25 | 
26 | function d = point_distance(X)
27 | N = size(X,2);
28 | d = sum(X.^2,1);
29 | d = ones(N,1)*d + d'*ones(1,N) - 2*X'*X;
30 | 
31 | 
32 | 
33 | %//////////////////////////////////////////////////////////
34 | % d = cluster_distance(c1,c2,point_dist,linkage)
35 | %    Computes the pairwise distances between clusters c1
36 | %    and c2, using the point distance info in point_dist.
37 | %----------------------------------------------------------
38 | 
39 | function d = cluster_distance(c1,c2,point_dist,version)
40 | 
41 | M1 = length(c1);
42 | M2 = length(c2);
43 | MaxM = max([M1,M2]);
44 | 
45 | d = point_dist(c1,c2);
46 | if version == 1
47 |     d = min(d(:))*MaxM^0;
48 | else if version == 2
49 |         d = mean(d(:))*MaxM^0;
50 |     else
51 |         d = max(d(:))*MaxM^0;
52 |     end
53 | end
54 | 
55 | %//////////////////////////////////////////////////////////
56 | % clusters = merge_clusters(clusters, indicies)
57 | %   Merge the clusters indicated by the entries indicies(1)
58 | %   and indicies(2) of cell array 'clusters'.
59 | %----------------------------------------------------------
60 | function clusters = merge_clusters(clusters, indicies)
61 | clusters{indicies(1)} = [clusters{indicies(1)} clusters{indicies(2)}];
62 | clusters(indicies(2)) = [];
63 | 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/BorSMOTE.m:
--------------------------------------------------------------------------------
 1 | function [border_min, final_features, final_mark] = BorSMOTE(original_features, original_mark, NNC)
 2 | 
 3 | %NNC = NNC + 1;
 4 | 
 5 | Minority_index = find(original_mark == -1);
 6 | Minority_features = original_features(Minority_index,:);
 7 | 
 8 | % Finding the 5 positive nearest neighbours of all the positive blobs
 9 | Minority_neighbors = nearestneighbour(Minority_features', original_features', 'NumberOfNeighbours', NNC);
10 | 
11 | num_min_neighbor = zeros(1,length(Minority_index));
12 | for i=1:length(Minority_index)
13 |     for j = 2:NNC
14 |         if(original_mark(Minority_neighbors(j,i),1)== 1) 
15 |             num_min_neighbor(1,i) = num_min_neighbor(1,i)+1;
16 |         end
17 |     end
18 | end
19 | 
20 | border_min = Minority_index(find(num_min_neighbor > (NNC-1)/2),1); 
21 | while size( border_min,1) < 4
22 |     NNC = NNC - 1;
23 |     border_min = Minority_index(find(num_min_neighbor > (NNC-1)/2),1);
24 | end
25 | Border_min_features = original_features(border_min,:);
26 | NNC = 5;
27 | Num_Ov = ceil(max(size(find(original_mark == -1),1) - size(find(original_mark == 1),1),size(find(original_mark == 1),1) - size(find(original_mark == -1),1)));
28 | j2 = 1;
29 | Limit = size(Border_min_features,1);
30 | 
31 | if Limit > 3
32 |     final_features = original_features;
33 |     while j2 <= Num_Ov
34 |         %find nearest K samples from S2(i,:)
35 |         S2 = datasample(Border_min_features,1);
36 |         Condidates = nearestneighbour(S2', Minority_features', 'NumberOfNeighbours', min(NNC-1,Limit));
37 |         Condidates(:,1) = [] ;
38 |         rn = ceil(rand(1)*(size(Condidates,2)));
39 |         Sel_index = Condidates(:,rn);
40 |         g = Minority_features(Sel_index,:);
41 |         alpha = rand(1);
42 |         snew = S2(1,:) + alpha.*(g-S2(1,:));
43 |         final_features = [final_features;snew];
44 |         j2=j2+1;
45 |     end 
46 | mark = -1 * ones(Num_Ov,1);
47 | final_mark = [original_mark; mark];
48 | else 
49 |     [final_features ,final_mark] = SMOTE(original_features, original_mark);
50 | end
51 | 
52 | 
53 | 
54 | 
55 | 


--------------------------------------------------------------------------------
/Safe_Level_SMOTE.m:
--------------------------------------------------------------------------------
 1 | function [final_features ,final_mark] = Safe_Level_SMOTE(original_features, original_mark, KNN)
 2 | 
 3 | ind = find(original_mark == -1);
 4 | Min_ins = original_features(ind,:);
 5 | KNN = KNN + 1;
 6 | final_features = original_features;
 7 | Limit = size(Min_ins,1);
 8 | 
 9 | Num_Ov = ceil(max(size(find(original_mark == -1),1) - size(find(original_mark == 1),1),size(find(original_mark == 1),1) - size(find(original_mark == -1),1)));
10 | j2 = 1;
11 | 
12 | Safe_Level = safe_level_Finder(Min_ins, original_features, original_mark, KNN);
13 | 
14 | while j2 <= Num_Ov
15 |     %find nearest K samples from S2(i,:)
16 |     [FirstCand idx] = datasample(Min_ins,1);
17 |     Safe_Level_cand1 = Safe_Level(idx);
18 |     Condidates = nearestneighbour(FirstCand', Min_ins', 'NumberOfNeighbours', min(KNN,Limit));
19 |     Condidates(:,1) = [] ;
20 |     rn=ceil(rand(1)*(size(Condidates,2)));
21 |     Sel_index = Condidates(:,rn);
22 |     SecondCand = Min_ins(Sel_index,:);
23 |     Safe_Level_cand2 = Safe_Level(Sel_index);
24 |     
25 |     if Safe_Level_cand2 ~= 0
26 |     Safe_level_ratio = Safe_Level_cand1/Safe_Level_cand2;
27 |     else
28 |         Safe_level_ratio = inf;
29 |     end
30 |     
31 |     if (Safe_level_ratio == inf && Safe_Level_cand1 == 0)
32 |     else
33 |         if (Safe_level_ratio == inf && Safe_Level_cand1 ~= 0)
34 |             gap = 0;
35 |         else if Safe_level_ratio == 1
36 |                 gap = rand(1);
37 |             else if Safe_level_ratio > 1
38 |                     gap = rand(1)*(1/Safe_level_ratio);
39 |                 else if Safe_level_ratio < 1
40 |                         gap = rand(1) * Safe_level_ratio + 1 - Safe_level_ratio;
41 |                     end
42 |                 end
43 |             end
44 |         end
45 |     snew = FirstCand(1,:) + gap.*(SecondCand - FirstCand(1,:));
46 |     final_features = [final_features;snew];
47 |     j2=j2+1;    
48 |     end    
49 | end
50 | 
51 | mark = -1 * ones(Num_Ov,1);
52 | final_mark = [original_mark; mark];
53 | end
54 | 
55 | function Safe_Level = safe_level_Finder(Minority_features, WholeDataInst, WholeDataLable, KNN)  
56 | 
57 | Ins_neighbors = nearestneighbour(Minority_features', WholeDataInst', 'NumberOfNeighbours', KNN);
58 | Safe_Level = zeros(1,size(Minority_features,1));
59 | 
60 | for i = 1:size(Minority_features,1)
61 |     for j = 2:KNN
62 |         if(WholeDataLable(Ins_neighbors(j,i),1)== -1)
63 |             Safe_Level(1,i) = Safe_Level(1,i) + 1;
64 |         end
65 |     end
66 | end
67 | 
68 | end


--------------------------------------------------------------------------------
/padadd.m:
--------------------------------------------------------------------------------
 1 | function [output] = padadd(A, x, index)
 2 | % PADADD  Adds data columns to an array even column lengths don't match.
 3 | %   Missmatched areas of data array are padded with NaNs.
 4 | %
 5 | %   answer = padadd(A, x) 
 6 | %     appends "x" column vector as the last column of "A" 
 7 | %
 8 | %   answer = padadd(A, x, index) 
 9 | %     assigns "x" to the column specified by "index" in "A"  
10 | %     by overwriting any existing data.
11 | %
12 | %   If "x" is a matrix, "index" specifies the leftmost column written to.
13 | %
14 | %   The result is saved recursively to "A" if the output argument is omitted
15 | %   and "A" is a defined variable
16 | %
17 | %Example:
18 | % padadd( eye(2,2), 2*ones(4,1) )
19 | %
20 | %     ans =
21 | %
22 | %          1     0     2
23 | %          0     1     2
24 | %        NaN   NaN     2
25 | %        NaN   NaN     2
26 | %
27 | %Author: HDJ
28 | 
29 | %check input argument number
30 | if (nargin < 2)
31 |    error('not enough input arguments')
32 | end
33 | 
34 | %transpose 'x' if it is a row vector
35 | if (size(x,1) == 1) | (size(x,2) == 1) & (size(x,2) > size(x,1))
36 |    x = x';				
37 | end
38 | 
39 | %get sizes of 'A' and 'x'
40 | dAr = size(A,1);		
41 | dAc = size(A,2);	
42 | dxr = size(x,1);	
43 | dxc = size(x,2);
44 | 
45 | 
46 | if nargin == 2 
47 |    %if index is not specified 
48 |    %index = dAc + 1;  %default to adding a column to the end 
49 |    index = dAc + (1:dxc); %default to adding all columns to the end
50 | else
51 |    %create index array from index argument
52 |    index = index(1)+ (0:dxc-1);
53 | end
54 | 
55 | %%%%%%BEGIN PADDING SECTION%%%%%%
56 | %if index is outside current size of 'A' then pad whole columns of 'A'
57 | if dAc < index(end)
58 |    answer = [A,NaN*ones(dAr,index(end)-dAc)];
59 | else
60 |    answer = A;
61 | end
62 | 
63 | %if 'x' is shorter or the same height as 'A' then pad 'x' as necessary
64 | if dAr >= dxr,
65 |    %answer(:,index) = [ x(:,1); NaN*ones(dAr-dxr,1)];  
66 |    answer(:,index) = [ x; NaN*ones(dAr-dxr,dxc)];
67 | end
68 | 
69 | %if 'x' is taller than 'A' then pad 'A'
70 | if dAr < dxr,	
71 |    answer = [answer; NaN*ones(dxr-dAr,size(answer,2))];
72 |    %answer(:,index) = x(:,1);
73 |    answer(:,index) = x;
74 | end
75 | %%%%%%END PADDING SECTION%%%%%%
76 | 
77 | %%%%%%DECIDE OUTPUT METHOD%%%%%%
78 | %get input arguments name
79 | ARGIN = inputname(1);
80 | %if no output argument, ouput to A is available
81 | if (nargout == 0) 
82 |    %if ARG is a variable
83 |    if ~(isempty(ARGIN))        
84 |       assignin('caller', ARGIN, answer);
85 |       return
86 |    end
87 | end
88 | 
89 | %default action if either there is an ouput argument
90 | %or if input is not a variable
91 | output = answer;
92 | %%%%%%END DECIDE OUTPUT METHOD%%%%%%


--------------------------------------------------------------------------------
/Num_OV_Finder.m:
--------------------------------------------------------------------------------
 1 | function [Kmin2, rand_matrix, Final_Ov] = Num_OV_Finder(IDX_min, Majority_features, Minority_features, m_each_min, Kmin, folds, Out_Th)
 2 | 
 3 | pow = 0.2;
 4 | Maj_size = size(Majority_features,1);
 5 | Min_size = size(Minority_features,1);
 6 | % Randomely permute the memebrs in each minority cluster
 7 | rand_matrix = [];
 8 | for i=1:Kmin
 9 |     perm = [];
10 |     buff_min_ind = find(IDX_min == i);
11 |     des_min_sam = size(find(IDX_min == i),1);
12 |     perm = randsample(buff_min_ind,des_min_sam);
13 |     padadd(rand_matrix,perm)
14 | end
15 | rand_matrix(:, m_each_min <= Out_Th) = [];
16 | m_each_min(m_each_min <= Out_Th) = [];
17 | Kmin2 = size(rand_matrix,2);
18 | LessFoldsIn = find(m_each_min<folds);
19 | if size(LessFoldsIn,1)>=1
20 |     for fk = 1:size(LessFoldsIn,1)
21 |         temp1 = rand_matrix(~isnan(rand_matrix(1:end,LessFoldsIn(fk))),LessFoldsIn(fk));
22 |         Added = randsample(temp1,folds-size(temp1,1),true);
23 |         rand_matrix ((size(temp1,1)+1):folds,LessFoldsIn(fk)) = Added;
24 |     end
25 | end
26 | 
27 | % Split each Minority cluster and put some portion of each in the fold matrix
28 | buffer = [] ;
29 | folds_matrix = [];
30 | for i = 1:folds-1
31 |     for j=1:Kmin2 
32 |         temp = rand_matrix(~isnan(rand_matrix(1:end,j)),j);
33 |         division = floor(size(temp,1)/folds);
34 |         buffer = [buffer; temp(((i-1)*division+1):i*division,1)];
35 |     end
36 |     padadd(folds_matrix,buffer)
37 |     buffer = [];
38 | end
39 | 
40 | for j=1:Kmin2
41 |     temp = rand_matrix(~isnan(rand_matrix(1:end,j)),j);
42 |     division = floor(size(temp,1)/folds);
43 |     buffer = [buffer; temp(((folds-1)*division+1):end,1)];
44 | end
45 | padadd(folds_matrix,buffer)
46 | 
47 | % Finding the number of misclassified instances 
48 | errorCluster_min = zeros(1,Kmin2);
49 | C = nchoosek(1:folds,folds-1);
50 | % for ite = 1:folds
51 | ite = 1;
52 |     A_min = folds_matrix(:,C(ite,:));
53 |     Min_Feat_Train = Minority_features(A_min(~isnan(A_min)),:);
54 |     B_min = folds_matrix(:,~ismember(1:folds,C(ite,:)));
55 |     Min_Feat_Valid = Minority_features(B_min(~isnan(B_min)),:);    
56 |     % Train the SVM
57 |     Feat_Train_whole = [Min_Feat_Train; Majority_features];
58 |     trainLabel_whole = [-1*ones(size(Min_Feat_Train,1),1);ones(Maj_size,1)];
59 |     [trainDatanew, trainLabelnew] = SMOTE(Feat_Train_whole, trainLabel_whole);
60 |     %model = svmtrain(trainLabelnew, trainDatanew, Options);
61 |     model = fitcdiscr(trainDatanew, trainLabelnew);
62 |     
63 |     % Use the LDA/SVM model to classify the data
64 |     predict_label_SMOTE = predict(model, Min_Feat_Valid);
65 |     % predict_label_SMOTE = svmpredict(testLabel, Min_Feat_Valid, model, '-q'); % run the SVM model on the test data
66 |     misclassified = B_min(predict_label_SMOTE == 1);
67 |     errorCluster_min = sum(ismember(rand_matrix,misclassified)) + errorCluster_min;
68 | %end
69 | 
70 | NeedOv = Maj_size - Min_size;
71 | % Kmin_real = size(m_each_min_real,1);
72 | Pow_m_each = m_each_min .^ pow;
73 | Ratio_Size = (1./Pow_m_each)/sum(1./Pow_m_each,1);
74 | 
75 | ratio_min = errorCluster_min./sum(~isnan(rand_matrix));
76 | ratio_min(ratio_min <= 0.1)= 0.1;
77 | ratio_min2 = ratio_min/sum(ratio_min);
78 | 
79 | % ratio_min2(ratio_min2 <= 0.1)= 0.1;
80 | New_Ratio = ratio_min2 .* Ratio_Size'; 
81 | ratio_min_scaled = New_Ratio/sum(New_Ratio)
82 | Final_Ov = floor(NeedOv * ratio_min_scaled)
83 | 
84 | end


--------------------------------------------------------------------------------
/ASUWO.m:
--------------------------------------------------------------------------------
 1 | function [final_features, final_mark] = ASUWO(original_features, original_mark, CThresh , K, NN, NS)
 2 | 
 3 | %Inputs:
 4 |     % original_features: The features of original dataset needed to be oversampled. 
 5 |     % original_mark: The label of original dataset needed to be oversampled.
 6 | 	% CThresh: Coefficient to tune the threshold for clustering.
 7 | 	% NN: Number of nearest neighbors to be found for each minority instance to determine the weights.
 8 | 	% NS: Number of nearest neighbors used to identify noisy instances. 
 9 | 	% K: Number of folds in the K-fold Cross Validation.
10 | 
11 | %Outputs:
12 |     % final_features: The features of dataset after being oversampled.
13 |     % final_mark: The label of dataset after being oversampled.
14 | % Copyright 2015 Iman Nekooeimehr. This code may be freely used and
15 | % distributed, so long as it maintains this copyright line.
16 |     
17 | %Removing noisy instances for both minority and majority class:
18 | [Clean_orig_inst, Clean_orig_mark] = Noise_Remover(original_features, original_mark, NS); 
19 |     
20 | NNC = 5;
21 | Out_Th = 2;
22 | 
23 | % Separating Minority and Majority instances
24 | MinorityIndex = find(Clean_orig_mark == -1);
25 | MajorityIndex = find(Clean_orig_mark == 1);
26 | Majority_features = Clean_orig_inst(MajorityIndex,:);
27 | Minority_features = Clean_orig_inst(MinorityIndex,:);
28 | Maj_size = size(Majority_features,1);
29 | 
30 | %% Clustering the minority instances by considering majority instances:
31 | [IDX_min] = Mod_AggCluster(Majority_features, Minority_features ,CThresh);
32 | Kmin = size(unique(IDX_min),1);
33 | m_each_min = histc(IDX_min,1:Kmin);
34 | 
35 | %% Finding cluster sizes for minority class using K fold cross validation
36 | [Kmin2, rand_matrix, num_cluster_min] = Num_OV_Finder(IDX_min, Majority_features, Minority_features, m_each_min, Kmin, K, Out_Th);
37 | 
38 | final_features = Minority_features;
39 | 
40 | %% find selection probability and oversample within minority clusters
41 | [p,q]=size(Majority_features);
42 | 
43 | for i=1:Kmin2
44 |     minority_clustered = rand_matrix(~isnan(rand_matrix(:,i)),i);
45 |     Minority_clustered_features = Minority_features(minority_clustered,:);
46 |     [m,n]=size(Minority_clustered_features);
47 |     dist_vec = [];
48 |     for i2=1:m
49 |         %find nearest K1 borderline majority sets
50 |         dist = zeros(p,1);
51 |         for j=1:p
52 |             x = sum((Majority_features(j,:) - Minority_clustered_features(i2,:)).^2);
53 |             dist(j,1) = x;
54 |         end
55 |         distm = sort (dist);
56 |         dist_vec = [dist_vec distm(1:NN)];
57 |     end
58 |     thresh = quantile(dist_vec(1,:),0.5);
59 |     dist_vec(dist_vec > thresh) = thresh;
60 |     dist_vec = dist_vec./n;
61 |     dist_rec = (1./dist_vec).^1;
62 |     mean_dis = mean(dist_rec,1);
63 |     totw = sum(mean_dis);
64 |     P = mean_dis ./ totw;
65 |     %end of our selection probability algorithm
66 |     
67 |     j2 = 1;
68 |     while j2 <= (num_cluster_min(1,i))
69 |         %find nearest K samples from S2(i,:)
70 |         S2=sample(Minority_clustered_features,P',1);
71 |         Condidates = nearestneighbour(S2', Minority_clustered_features', 'NumberOfNeighbours', min(NNC,m));
72 |         Condidates(:,1) = [] ;
73 |         rn=ceil(rand(1)*(size(Condidates,2)));
74 |         Sel_index = Condidates(:,rn);
75 |         g = Minority_clustered_features(Sel_index,:);
76 |         alpha=rand(1) ;
77 |         snew = S2(1,:) + alpha.*(g-S2(1,:));
78 |         final_features = [final_features;snew];
79 |         j2=j2+1;
80 |     end    
81 | end
82 | 
83 | r = size(final_features,1);
84 | MinMark = -1 * ones(r,1);
85 | MaxMark = ones(Maj_size,1);
86 | final_mark = [MinMark; MaxMark];
87 | final_features = [final_features; Majority_features];
88 | 
89 | 


--------------------------------------------------------------------------------
/Mod_AggCluster.m:
--------------------------------------------------------------------------------
  1 | function [min_clusters] = Mod_AggCluster(Majority_features, Minority_features ,CThresh)
  2 | 
  3 | % This code is a modification of the source code for Hierachical Clustering
  4 | % implemented by David Ross
  5 | % The source code for the original Hierachical Clustering can be found in: 
  6 | % http://www.cs.toronto.edu/~dross/code/
  7 | 
  8 | SizeMin = size(Minority_features,1);
  9 | min_clusters = (1:SizeMin)';
 10 | 
 11 | %% Clustering the majority class using Hierachical Clustering
 12 | maj_clusters = Orig_agg_cluster(Majority_features, CThresh);
 13 | 
 14 | % Kmaj = size(unique(maj_clusters),1);
 15 | % m_each_maj = histc(maj_clusters,1:Kmaj);
 16 | 
 17 | Whole_data_min = [Minority_features; Majority_features];
 18 | D = pdist(Whole_data_min,'euclidean');
 19 | point_dist_min = squareform(D);
 20 | 
 21 | %% Clustering the Minority instances using majority clusters
 22 | min_clusters = inside_AggCluster(Minority_features', min_clusters, maj_clusters, point_dist_min, CThresh);
 23 | 
 24 | function labels  = inside_AggCluster(data, same_clusters, other_clusters, point_dist_whole, CThresh)
 25 | Num_Reject = 0; 
 26 | N = size(data,2);
 27 | Exist_Clus = unique(same_clusters);
 28 | M = size(Exist_Clus ,1);
 29 | 
 30 | % the distance between each pair of points
 31 | point_dist = point_dist_whole(1:N,1:N);
 32 | point_dist2 = point_dist; 
 33 | for i=1:N
 34 |     point_dist2(i,i) = 100;
 35 | end
 36 | 
 37 | % Measuring the threshold
 38 | thresh = mean(median(point_dist2)).* CThresh;
 39 | 
 40 | % Clusters is a cell array of vectors.  Each vector contains the
 41 | % indicies of the points belonging to that cluster.
 42 | % Initially, each point is in it's own cluster.
 43 | clusters = cell(M,1);
 44 | for cc = 1:M
 45 |     clusters{cc} = find(same_clusters == Exist_Clus(cc))';
 46 | end
 47 | 
 48 | % until the termination condition is met
 49 | mm = 0;                                                    
 50 | while mm < thresh
 51 |     
 52 |     % compute the distances between all pairs of clusters
 53 |     cluster_dist = inf*ones(length(clusters));
 54 |     for c1 = 1:length(clusters)
 55 |         for c2 = (c1+1):length(clusters)
 56 |             cluster_dist(c1,c2) = cluster_distance(clusters{c1}, clusters{c2}, point_dist, 3);
 57 |         end
 58 |     end
 59 |     
 60 |     % merge the two nearest clusters
 61 |     [mm ii] = min(cluster_dist(:));
 62 |     [ii(1) ii(2)] = ind2sub(size(cluster_dist), ii(1));
 63 |     
 64 |     if mm > thresh || length(clusters) < 3,
 65 |         break
 66 |     end
 67 |     % find the distance of nearest clusters to other class clusters:
 68 |     Unique_Other = unique(other_clusters);
 69 |     num_clus = size(Unique_Other,1);
 70 |     
 71 |     for k = 1:num_clus
 72 |         MN2other(k) = cluster_distance_maj(clusters{ii(1)}, N + find(other_clusters == Unique_Other(k)), point_dist_whole, 3);
 73 |     end
 74 |     flag = 1;
 75 |     Distr = histc(other_clusters,1:max(other_clusters));
 76 |     Distr(Distr == 0) = [] ;
 77 |     near_other_ind = find(MN2other < mm & Distr' > 3);
 78 |     for t = 1:length(near_other_ind)
 79 |         check_dis = cluster_distance_maj(clusters{ii(2)}, N + find(other_clusters == Unique_Other(near_other_ind(t))) , point_dist_whole, 3);
 80 |         if check_dis <mm
 81 |             flag = 0;
 82 |             Num_Reject = Num_Reject + 1;
 83 |             A = clusters{ii(1)};
 84 |             B = clusters{ii(2)};
 85 |             point_dist (A(1,1),B(1,1)) = inf;
 86 |             point_dist (B(1,1),A(1,1)) = inf;
 87 |         end
 88 |     end
 89 |     % Place the if condition if there exist a majority cluster between them or not
 90 |     if flag == 1;
 91 |         clusters = merge_clusters(clusters, ii);
 92 |     end
 93 | end
 94 | 
 95 | % assign labels to the points, based on their cluster membership
 96 | Num_Reject
 97 | labels = zeros(N,1);
 98 | for cc = 1:length(clusters)
 99 |     labels(clusters{cc}) = cc;
100 | end
101 | 
102 | 
103 | 
104 | %//////////////////////////////////////////////////////////
105 | % d = point_distance(X)
106 | %    Computes the pairwise distances between columns of X.
107 | %----------------------------------------------------------
108 | function d = Point_Distance(X)
109 | N = size(X,2);
110 | d = sum(X.^2,1);
111 | d = ones(N,1)*d + d'*ones(1,N) - 2*X'*X;
112 | 
113 | 
114 | 
115 | %//////////////////////////////////////////////////////////
116 | % d = cluster_distance(c1,c2,point_dist,linkage)
117 | %    Computes the pairwise distances between clusters c1
118 | %    and c2, using the point distance info in point_dist.
119 | %----------------------------------------------------------
120 | function d = cluster_distance(c1,c2,point_dist,version)
121 | 
122 | M1 = length(c1);
123 | M2 = length(c2);
124 | MaxM = max([M1,M2]);
125 | d = point_dist(c1,c2);
126 | if version == 1
127 |     d = min(d(:))*MaxM^0.04;
128 | else if version == 2
129 |         d = mean(d(:))*MaxM^0.04;
130 |     else
131 |         d = max(d(:))*MaxM^0.04;
132 |     end
133 | end
134 | 
135 | function d = cluster_distance_maj(c1,c2,point_dist,version)
136 | d = point_dist(c1,c2);
137 | if version == 1
138 |     d = min(d(:));
139 | else if version == 2
140 |         d = mean(d(:));
141 |     else
142 |         d = max(d(:));
143 |     end
144 | end
145 | %//////////////////////////////////////////////////////////
146 | % clusters = merge_clusters(clusters, indicies)
147 | %   Merge the clusters indicated by the entries indicies(1)
148 | %   and indicies(2) of cell array 'clusters'.
149 | %----------------------------------------------------------
150 | function clusters = merge_clusters(clusters, indicies)
151 | clusters{indicies(1)} = [clusters{indicies(1)} clusters{indicies(2)}];
152 | clusters(indicies(2)) = [];
153 | 
154 | 


--------------------------------------------------------------------------------
/nearestneighbour.m:
--------------------------------------------------------------------------------
  1 | function [idx, tri] = nearestneighbour(varargin)
  2 | %NEARESTNEIGHBOUR    find nearest neighbours
  3 | %   IDX = NEARESTNEIGHBOUR(X) finds the nearest neighbour by Euclidean
  4 | %   distance to each point (column) in X from X. X is a matrix with points
  5 | %   as columns. IDX is a vector of indices into X, such that X(:, IDX) are
  6 | %   the nearest neighbours to X. e.g. the nearest neighbour to X(:, 2) is
  7 | %   X(:, IDX(2))
  8 | %
  9 | %   IDX = NEARESTNEIGHBOUR(P, X) finds the nearest neighbour by Euclidean
 10 | %   distance to each point in P from X. P and X are both matrices with the
 11 | %   same number of rows, and points are the columns of the matrices. Output
 12 | %   is a vector of indices into X such that X(:, IDX) are the nearest
 13 | %   neighbours to P
 14 | %
 15 | %   IDX = NEARESTNEIGHBOUR(I, X) where I is a logical vector or vector of
 16 | %   indices, and X has at least two rows, finds the nearest neighbour in X
 17 | %   to each of the points X(:, I).
 18 | %   I must be a row vector to distinguish it from a single point.
 19 | %   If X has only one row, the first input is treated as a set of 1D points
 20 | %   rather than a vector of indices
 21 | %
 22 | %   IDX = NEARESTNEIGHBOUR(..., Property, Value)
 23 | %   Calls NEARESTNEIGHBOUR with the indicated parameters set. Property
 24 | %   names can be supplied as just the first letters of the property name if
 25 | %   this is unambiguous, e.g. NEARESTNEIGHBOUR(..., 'num', 5) is equivalent
 26 | %   to NEARESTNEIGHBOUR(..., 'NumberOfNeighbours', 5). Properties are case
 27 | %   insensitive, and are as follows:
 28 | %      Property:                         Value:
 29 | %      ---------                         ------
 30 | %         NumberOfNeighbours             natural number, default 1
 31 | %            NEARESTNEIGHBOUR(..., 'NumberOfNeighbours', K) finds the closest
 32 | %            K points in ascending order to each point, rather than the
 33 | %            closest point. If Radius is specified and there are not
 34 | %            sufficient numbers, fewer than K neighbours may be returned
 35 | %
 36 | %         Radius                         positive, default +inf
 37 | %            NEARESTNEIGHBOUR(..., 'Radius', R) finds neighbours within
 38 | %            radius R. If NumberOfNeighbours is not set, it will find all
 39 | %            neighbours within R, otherwise it will find at most
 40 | %            NumberOfNeighbours. The IDX matrix is padded with zeros if not
 41 | %            all points have the same number of neighbours returned. Note
 42 | %            that specifying a radius means that the Delaunay method will
 43 | %            not be used.
 44 | %
 45 | %         DelaunayMode                   {'on', 'off', |'auto'|}
 46 | %            DelaunayMode being set to 'on' means NEARESTNEIGHBOUR uses the
 47 | %            a Delaunay triangulation with dsearchn to find the points, if
 48 | %            possible. Setting it to 'auto' means NEARESTNEIGHBOUR decides
 49 | %            whether to use the triangulation, based on efficiency. Note
 50 | %            that the Delaunay triangulation will not be used if a radius
 51 | %            is specified.
 52 | %
 53 | %         Triangulation                  Valid triangulation produced by
 54 | %                                        delaunay or delaunayn
 55 | %            If a triangulation is supplied, NEARESTNEIGHBOUR will attempt
 56 | %            to use it (in conjunction with dsearchn) to find the
 57 | %            neighbours.
 58 | %
 59 | %   [IDX, TRI] = NEARESTNEIGHBOUR( ... )
 60 | %   If the Delaunay Triangulation is used, TRI is the triangulation of X'.
 61 | %   Otherwise, TRI is an empty matrix
 62 | %
 63 | %   Example:
 64 | %
 65 | %     % Find the nearest neighbour in X to each column of X
 66 | %     x = rand(2, 10);
 67 | %     idx = nearestneighbour(x);
 68 | %
 69 | %     % Find the nearest neighbours to each point in p
 70 | %     p = rand(2, 5);
 71 | %     x = rand(2, 20);
 72 | %     idx = nearestneighbour(p, x)
 73 | %
 74 | %     % Find the five nearest neighbours to points x(:, [1 6 20]) in x
 75 | %     x = rand(4, 1000)
 76 | %     idx = nearestneighbour([1 6 20], x, 'NumberOfNeighbours', 5)
 77 | %
 78 | %     % Find all neighbours within radius of 0.1 of the points in p
 79 | %     p = rand(2, 10);
 80 | %     x = rand(2, 100);
 81 | %     idx = nearestneighbour(p, x, 'r', 0.1)
 82 | %
 83 | %     % Find at most 10 nearest neighbours to point p from x within a
 84 | %     % radius of 0.2
 85 | %     p = rand(1, 2);
 86 | %     x = rand(2, 30);
 87 | %     idx = nearestneighbour(p, x, 'n', 10, 'r', 0.2)
 88 | %
 89 | %
 90 | %   See also DELAUNAYN, DSEARCHN, TSEARCH
 91 | 
 92 | %TODO    Allow other metrics than Euclidean distance
 93 | %TODO    Implement the Delaunay mode for multiple neighbours
 94 | 
 95 | % Copyright 2006 Richard Brown. This code may be freely used and
 96 | % distributed, so long as it maintains this copyright line
 97 | error(nargchk(1, Inf, nargin, 'struct'));
 98 | 
 99 | % Default parameters
100 | userParams.NumberOfNeighbours = []    ; % Finds one
101 | userParams.DelaunayMode       = 'auto'; % {'on', 'off', |'auto'|}
102 | userParams.Triangulation      = []    ;
103 | userParams.Radius             = inf   ;
104 | 
105 | % Parse inputs
106 | [P, X, fIndexed, userParams] = parseinputs(userParams, varargin{:});
107 | 
108 | % Special case uses Delaunay triangulation for speed.
109 | 
110 | % Determine whether to use Delaunay - set fDelaunay true or false
111 | nX  = size(X, 2);
112 | nP  = size(P, 2);
113 | dim = size(X, 1);
114 | 
115 | switch lower(userParams.DelaunayMode)
116 |     case 'on'
117 |         %TODO Delaunay can't currently be used for finding more than one
118 |         %neighbour
119 |         fDelaunay = userParams.NumberOfNeighbours == 1 && ...
120 |             size(X, 2) > size(X, 1)                    && ...
121 |             ~fIndexed                                  && ...
122 |             userParams.Radius == inf;
123 |     case 'off'
124 |         fDelaunay = false;
125 |     case 'auto'
126 |         fDelaunay = userParams.NumberOfNeighbours == 1 && ...
127 |             ~fIndexed                                  && ...
128 |             size(X, 2) > size(X, 1)                    && ...
129 |             userParams.Radius == inf                   && ...
130 |             ( ~isempty(userParams.Triangulation) || delaunaytest(nX, nP, dim) );
131 | end
132 | 
133 | % Try doing Delaunay, if fDelaunay.
134 | fDone = false;
135 | if fDelaunay
136 |     tri = userParams.Triangulation;
137 |     if isempty(tri)
138 |         try
139 |             tri   = delaunayn(X');
140 |         catch
141 |             msgId = 'NearestNeighbour:DelaunayFail';
142 |             msg = ['Unable to compute delaunay triangulation, not using it. ',...
143 |                 'Set the DelaunayMode parameter to ''off'''];
144 |             warning(msgId, msg);
145 |         end
146 |     end
147 |     if ~isempty(tri)
148 |         try
149 |             idx = dsearchn(X', tri, P')';
150 |             fDone = true;
151 |         catch
152 |             warning('NearestNeighbour:DSearchFail', ...
153 |                 'dsearchn failed on triangulation, not using Delaunay');
154 |         end
155 |     end
156 | else % if fDelaunay
157 |     tri = [];
158 | end
159 | 
160 | % If it didn't use Delaunay triangulation, find the neighbours directly by
161 | % finding minimum distances
162 | if ~fDone
163 |     idx = zeros(userParams.NumberOfNeighbours, size(P, 2));
164 | 
165 |     % Loop through the set of points P, finding the neighbours
166 |     Y = zeros(size(X));
167 |     for iPoint = 1:size(P, 2)
168 |         x = P(:, iPoint);
169 | 
170 |         % This is the faster than using repmat based techniques such as
171 |         % Y = X - repmat(x, 1, size(X, 2))
172 |         for i = 1:size(Y, 1)
173 |             Y(i, :) = X(i, :) - x(i);
174 |         end
175 | 
176 |         % Find the closest points, and remove matches beneath a radius
177 |         dSq = sum(abs(Y).^2, 1);
178 |         iRad = find(dSq < userParams.Radius^2);
179 |         if ~fIndexed
180 |             iSorted = iRad(minn(dSq(iRad), userParams.NumberOfNeighbours));
181 |         else
182 |             iSorted = iRad(minn(dSq(iRad), userParams.NumberOfNeighbours + 1));
183 |             iSorted = iSorted(2:end);
184 |         end
185 | 
186 |         % Remove any bad ones
187 |         idx(1:length(iSorted), iPoint) = iSorted';
188 |     end
189 |     %while ~isempty(idx) && isequal(idx(end, :), zeros(1, size(idx, 2)))
190 |     %    idx(end, :) = [];
191 |     %end
192 |     idx( all(idx == 0, 2), :) = [];
193 | end % if ~fDone
194 | if isvector(idx)
195 |     idx = idx(:)';
196 | end
197 | end % nearestneighbour
198 | 
199 | 
200 | 
201 | 
202 | %DELAUNAYTEST   Work out whether the combination of dimensions makes
203 | %fastest to use a Delaunay triangulation in conjunction with dsearchn.
204 | %These parameters have been determined empirically on a Pentium M 1.6G /
205 | %WinXP / 512MB / Matlab R14SP3 platform. Their precision is not
206 | %particularly important
207 | function tf = delaunaytest(nx, np, dim)
208 | switch dim
209 |     case 2
210 |         tf = np > min(1.5 * nx, 400);
211 |     case 3
212 |         tf = np > min(4 * nx  , 1200);
213 |     case 4
214 |         tf = np > min(40 * nx , 5000);
215 | 
216 |         % if the dimension is higher than 4, it is almost invariably better not
217 |         % to try to use the Delaunay triangulation
218 |     otherwise
219 |         tf = false;
220 | end % switch
221 | end % delaunaytest
222 | 
223 | 
224 | 
225 | 
226 | %MINN   find the n most negative elements in x, and return their indices
227 | %  in ascending order
228 | function I = minn(x, n)
229 | 
230 | % Make sure n is no larger than length(x)
231 | n = min(n, length(x));
232 | 
233 | % Sort the first n
234 | [xsn, I] = sort(x(1:n));
235 | 
236 | % Go through the rest of the entries, and insert them into the sorted block
237 | % if they are negative enough
238 | for i = (n+1):length(x)
239 |     j = n;
240 |     while j > 0 && x(i) < xsn(j)
241 |         j = j - 1;
242 |     end
243 | 
244 |     if j < n
245 |         % x(i) should go into the (j+1) position
246 |         xsn = [xsn(1:j), x(i), xsn((j+1):(n-1))];
247 |         I   = [I(1:j), i, I((j+1):(n-1))];
248 |     end
249 | end
250 | 
251 | end %minn
252 | 
253 | 
254 | %PARSEINPUTS    Support function for nearestneighbour
255 | function [P, X, fIndexed, userParams] = parseinputs(userParams, varargin)
256 | if length(varargin) == 1 || ~isnumeric(varargin{2})
257 |     P           = varargin{1};
258 |     X           = varargin{1};
259 |     fIndexed    = true;
260 |     varargin(1) = [];
261 | else
262 |     P             = varargin{1};
263 |     X             = varargin{2};
264 |     varargin(1:2) = [];
265 | 
266 |     % Check the dimensions of X and P
267 |     if size(X, 1) ~= 1
268 |         % Check to see whether P is in fact a vector of indices
269 |         if size(P, 1) == 1
270 |             try
271 |                 P = X(:, P);
272 |             catch
273 |                 error('NearestNeighbour:InvalidIndexVector', ...
274 |                     'Unable to index matrix using index vector');
275 |             end
276 |             fIndexed = true;
277 |         else
278 |             fIndexed = false;
279 |         end % if size(P, 1) == 1
280 |     else % if size(X, 1) ~= 1
281 |         fIndexed = false;
282 |     end
283 | 
284 |     if ~fIndexed && size(P, 1) ~= size(X, 1)
285 |         error('NearestNeighbour:DimensionMismatch', ...
286 |             'No. of rows of input arrays doesn''t match');
287 |     end
288 | end
289 | % Parse the Property/Value pairs
290 | if rem(length(varargin), 2) ~= 0
291 |     error('NearestNeighbour:propertyValueNotPair', ...
292 |         'Additional arguments must take the form of Property/Value pairs');
293 | end
294 | 
295 | propertyNames = {'numberofneighbours', 'delaunaymode', 'triangulation', ...
296 |     'radius'};
297 | while length(varargin) ~= 0
298 |     property = varargin{1};
299 |     value    = varargin{2};
300 | 
301 |     % If the property has been supplied in a shortened form, lengthen it
302 |     iProperty = find(strncmpi(property, propertyNames, length(property)));
303 |     if isempty(iProperty)
304 |         error('NearestNeighbour:InvalidProperty', 'Invalid Property');
305 |     elseif length(iProperty) > 1
306 |         error('NearestNeighbour:AmbiguousProperty', ...
307 |             'Supplied shortened property name is ambiguous');
308 |     end
309 |     property = propertyNames{iProperty};
310 | 
311 |     switch property
312 |         case 'numberofneighbours'
313 |             if rem(value, 1) ~= 0 || ...
314 |                     value > length(X) - double(fIndexed) || ...
315 |                     value < 1
316 |                 error('NearestNeighbour:InvalidNumberOfNeighbours', ...
317 |                     'Number of Neighbours must be an integer, and smaller than the no. of points in X');
318 |             end
319 |             userParams.NumberOfNeighbours = value;
320 | 
321 |         case 'delaunaymode'
322 |             fOn = strcmpi(value, 'on');
323 |             if strcmpi(value, 'off')
324 |                 userParams.DelaunayMode = 'off';
325 |             elseif fOn || strcmpi(value, 'auto')
326 |                 if userParams.NumberOfNeighbours ~= 1
327 |                     if fOn
328 |                         warning('NearestNeighbour:TooMuchForDelaunay', ...
329 |                             'Delaunay Triangulation method works only for one neighbour');
330 |                     end
331 |                     userParams.DelaunayMode = 'off';
332 |                 elseif size(X, 2) < size(X, 1) + 1
333 |                     if fOn
334 |                         warning('NearestNeighbour:TooFewDelaunayPoints', ...
335 |                             'Insufficient points to compute Delaunay triangulation');
336 |                     end
337 |                     userParams.DelaunayMode = 'off';
338 | 
339 |                 elseif size(X, 1) == 1
340 |                     if fOn
341 |                         warning('NearestNeighbour:DelaunayDimensionOne', ...
342 |                             'Cannot compute Delaunay triangulation for 1D input');
343 |                     end
344 |                     userParams.DelaunayMode = 'off';
345 |                 else
346 |                     userParams.DelaunayMode = value;
347 |                 end
348 |             else
349 |                 warning('NearestNeighbour:InvalidOption', ...
350 |                     'Invalid Option');
351 |             end % if strcmpi(value, 'off')
352 | 
353 |         case 'radius'
354 |             if isscalar(value) && isnumeric(value) && isreal(value) && value > 0
355 |                 userParams.Radius = value;
356 |                 if isempty(userParams.NumberOfNeighbours)
357 |                     userParams.NumberOfNeighbours = size(X, 2) - double(fIndexed);
358 |                 end
359 |             else
360 |                 error('NearestNeighbour:InvalidRadius', ...
361 |                     'Radius must be a positive real number');
362 |             end
363 |     
364 | 
365 |         case 'triangulation'
366 |             if isnumeric(value) && size(value, 2) == size(X, 1) + 1 && ...
367 |                     all(ismember(1:size(X, 2), value))
368 |                 userParams.Triangulation = value;
369 |             else
370 |                 error('NearestNeighbour:InvalidTriangulation', ...
371 |                     'Triangulation not a valid Delaunay Triangulation');
372 |             end
373 |     end % switch property
374 | 
375 |     varargin(1:2) = [];
376 | end % while
377 | if isempty(userParams.NumberOfNeighbours)
378 |     userParams.NumberOfNeighbours = 1;
379 | end
380 | end %parseinputs


--------------------------------------------------------------------------------