├── ED.m
├── dtw.mexa64
├── SPIRAL
    ├── dtw_c.mexa64
    ├── exactCDmex.mexa64
    ├── README.md
    ├── matrix_completion_sparse_mex.m
    ├── construct_sparse.m
    ├── exactCDmex.c
    └── dtw_c.c
├── RWS
    ├── utilities
    │   ├── svm-scale
    │   ├── dtw_c.mexa64
    │   ├── train.mexa64
    │   ├── dtw_c.mexmaci64
    │   ├── predict.mexa64
    │   ├── svmtrain.mexa64
    │   ├── train.mexmaci64
    │   ├── libsvmread.mexa64
    │   ├── predict.mexmaci64
    │   ├── svmpredict.mexa64
    │   ├── train_omp.mexa64
    │   ├── libsvmread.mexmaci64
    │   ├── libsvmwrite.mexa64
    │   ├── predict_omp.mexa64
    │   ├── svmpredict.mexmaci64
    │   ├── svmtrain.mexmaci64
    │   ├── libsvmwrite.mexmaci64
    │   ├── demo_dtw.m
    │   ├── dtw_m.m
    │   ├── svm-scale-README
    │   └── dtw_c.c
    ├── Gun_Point_rws_R128_10fold_CV.mat
    ├── dtw_similarity_cell_mulvar.m
    ├── dtw_similarity_cell.m
    ├── rws_GenFea_example_mulvar.m
    ├── rws_GenFea_example.m
    ├── rws_GenFea_mulvar.m
    ├── rws_VaryingR_CV_R128.m
    ├── rws_VaryingR_CV_R128_mulvar.m
    ├── README.md
    └── rws_gridsearch_CV_mulvar.m
├── SINK.m
├── DM2KM.m
├── OriginalKPCA.m
├── SINKCompressed.m
├── NystromKPCA.m
├── NCCc.m
├── SBD.m
├── KernelKmeansClustering.m
├── normalizedata.m
├── cDTW.m
├── KMCompSINK.m
├── KMCompSINK_TestToTrain.m
├── KMCompGAK_TestToTrain.m
├── RandIndex.m
├── KMCompSINKCompressed.m
├── NystromMatrixGivenWandE.m
├── DMComp.m
├── KernelSCApprox.m
├── GibbsDPP.m
├── KMCompGAK.m
├── KMCompSINK_TrainToTrain.m
├── OneNNClassifierED.m
├── OneNNClassifierSBD.m
├── OneNNClassifierDTW.m
├── OneNNClassifierGAK.m
├── NCCcCompressed.m
├── OneNNClassifierSBDCompressed.m
├── LoadUCRdataset.m
├── LeaveOneOutClassifierZREP.m
├── TestVarianceExact.m
├── kMeans.m
├── OneNNClassifierZREP.m
├── SPIRALRepLearning.m
├── LOOClassifierDTW.m
├── SIDL
    ├── README
    ├── LICENSE
    ├── op_shift.m
    ├── unsup_obj.m
    ├── update_S.m
    ├── USIDL.m
    ├── main_example.m
    └── update_A_par.m
├── RunOneNNED.m
├── RunOneNNGAKTiming.m
├── RunOneNNSBD.m
├── RunSPIRALRepLearning.m
├── NystromMatrixDictionary.m
├── RunKMCompSINK.m
├── RunOneNNDTW.m
├── RunDMComp.m
├── FrequentDirections.m
├── RunClusteringKShape.m
├── RunSIDLRepLearning.m
├── RunKMCompSINKCompressed.m
├── RunClusteringKShapeORIGINAL.m
├── RunClusteringKMeans.m
├── kShape.m
├── SIDLRepLearning.m
├── RWSRepLearning.m
├── RunRWSRepLearning.m
├── RunOneNNSBDCompressed.m
├── RunClusteringSPIRAL.m
├── CollectStatistics.m
├── RepLearnKM.m
├── RunClusteringSIDL.m
├── RunKMCompSINKSPLIT.m
├── RunLOOCandOneNNDTW.m
├── TestVarianceApproximate.m
├── RunClusteringRWS.m
├── DatasetToFourier.m
├── RunTestVarianceApproximate.m
├── RepLearnFINAL.m
├── RepLearnFINALSINKComp.m
├── RunKMCompGAK.m
├── RunOneNNSINKCompressed.m
├── RunTestVarianceExact.m
├── RunRepLearningKM.m
├── mySVD.m
├── OneNNClassifierLB.m
├── RunLinearSVMRWS.m
├── RunLinearSVMSPIRAL.m
├── dtw.c
├── RunDictEvaluation.m
├── kShapeCentroids.m
├── RunVisualization.m
└── RunDictLearning.m


/ED.m:
--------------------------------------------------------------------------------
1 | function Dist = ED(x,y)
2 | 
3 | Dist = sqrt(sum((x - y).^2));
4 | 
5 | end


--------------------------------------------------------------------------------
/dtw.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/dtw.mexa64


--------------------------------------------------------------------------------
/SPIRAL/dtw_c.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/SPIRAL/dtw_c.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/svm-scale:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svm-scale


--------------------------------------------------------------------------------
/SPIRAL/exactCDmex.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/SPIRAL/exactCDmex.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/dtw_c.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/dtw_c.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/train.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/dtw_c.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/dtw_c.mexmaci64


--------------------------------------------------------------------------------
/RWS/utilities/predict.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/svmtrain.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmtrain.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/train.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train.mexmaci64


--------------------------------------------------------------------------------
/RWS/utilities/libsvmread.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmread.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/predict.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict.mexmaci64


--------------------------------------------------------------------------------
/RWS/utilities/svmpredict.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmpredict.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/train_omp.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train_omp.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/libsvmread.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmread.mexmaci64


--------------------------------------------------------------------------------
/RWS/utilities/libsvmwrite.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmwrite.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/predict_omp.mexa64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict_omp.mexa64


--------------------------------------------------------------------------------
/RWS/utilities/svmpredict.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmpredict.mexmaci64


--------------------------------------------------------------------------------
/RWS/utilities/svmtrain.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmtrain.mexmaci64


--------------------------------------------------------------------------------
/RWS/Gun_Point_rws_R128_10fold_CV.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/Gun_Point_rws_R128_10fold_CV.mat


--------------------------------------------------------------------------------
/RWS/utilities/libsvmwrite.mexmaci64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmwrite.mexmaci64


--------------------------------------------------------------------------------
/SINK.m:
--------------------------------------------------------------------------------
 1 | function sim = SINK(x,y,gamma)
 2 | % Shift INvariant Kernel
 3 | 
 4 | sim = SumExpNCCc(x,y,gamma)/sqrt(SumExpNCCc(x,x,gamma) * SumExpNCCc(y,y,gamma));
 5 | 
 6 | end
 7 | 
 8 | function sim = SumExpNCCc(x,y,gamma)
 9 | 
10 | sim = sum(exp(gamma*NCCc(x,y)));
11 | 
12 | end


--------------------------------------------------------------------------------
/DM2KM.m:
--------------------------------------------------------------------------------
 1 | function DM = DM2KM(DM)
 2 | % DM is nXn distance matrix: n are # of time series
 3 | 
 4 | [n, ~]=size(DM);
 5 | 
 6 | sigma = mean(mean(DM));
 7 | 
 8 | for i=1:n
 9 |        for j=1:n
10 |             DM(i,j) = exp(-DM(i,j).^2/(2*sigma^2));
11 |        end    
12 | end
13 | 
14 | end


--------------------------------------------------------------------------------
/OriginalKPCA.m:
--------------------------------------------------------------------------------
 1 | function [U,ProjData] = OriginalKPCA(K)
 2 | N=size(K,1);
 3 | K_Centered=K - (2/N)*ones(N,N)*K + ((1/N)*ones(N,N))*K*((1/N)*ones(N,N));
 4 | 
 5 | [U,L] = eig(K_Centered);
 6 | 
 7 | [va, dex] = sort(diag(L),'descend');
 8 | U = real(U(:, dex));
 9 | 
10 | ProjData = K_Centered*U;
11 | 
12 | end


--------------------------------------------------------------------------------
/SINKCompressed.m:
--------------------------------------------------------------------------------
 1 | function sim = SINKCompressed(x,y, gamma, k)
 2 | % Shift INvariant Kernel
 3 | 
 4 | sim = SumExpNCCcCompressed(x,y,gamma,k)/sqrt(SumExpNCCcCompressed(x,x,gamma,k) * SumExpNCCcCompressed(y,y,gamma,k));
 5 | 
 6 | end
 7 | 
 8 | function sim = SumExpNCCcCompressed(x,y,gamma,k)
 9 | 
10 | sim = sum(exp(gamma*NCCcCompressed(x,y,k)));
11 | 
12 | end


--------------------------------------------------------------------------------
/NystromKPCA.m:
--------------------------------------------------------------------------------
 1 | function [V,ProjData] = NystromKPCA(Z)
 2 | 
 3 | Z = Z - repmat(mean(Z), size(Z,1), 1);
 4 | 
 5 | [BSketch, ~] = FrequentDirections(Z, ceil(0.5*size(Z,2)));
 6 | NewL = BSketch'*BSketch;
 7 | 
 8 | [U,L] = eig(NewL);
 9 | 
10 | V = Z * U * L^(-1/2);
11 | [va, dex] = sort(diag(L),'descend');
12 | V = real(V(:, dex));
13 | 
14 | ProjData = Z*U;
15 | 
16 | 
17 | 
18 | end


--------------------------------------------------------------------------------
/NCCc.m:
--------------------------------------------------------------------------------
 1 | function cc_sequence = NCCc(x,y)
 2 | 
 3 | if isrow(y)
 4 |     y=y';
 5 | end
 6 | if isrow(x)
 7 |     x=x';
 8 | end
 9 | 
10 | len = max(length(x),length(y));
11 | 
12 | fftlength = 2^nextpow2(2*len-1);
13 | 
14 | r = ifft( fft(x,fftlength) .* conj(fft(y,fftlength)) );
15 | 
16 | r = [r(end-len+2:end) ; r(1:len)];
17 | 
18 | cc_sequence = r./(norm(x)*norm(y));
19 | 
20 | end


--------------------------------------------------------------------------------
/SBD.m:
--------------------------------------------------------------------------------
 1 | function [dist shift yshift]= SBD(x,y)
 2 | 
 3 | if iscolumn(x)
 4 |     x=x';
 5 | end
 6 | if iscolumn(y)
 7 |     y=y';
 8 | end
 9 | 
10 | X1=NCCc(x,y);
11 | 
12 | [m,d]=max(X1);
13 | 
14 | shift=d-max(length(x),length(y));
15 |  
16 | if shift < 0
17 |         yshift = [y(-shift + 1:end) zeros(1, -shift)];
18 |     else
19 |         yshift = [zeros(1,shift) y(1:end-shift) ];
20 | end
21 | 
22 | dist = 1-m;
23 | 
24 | end


--------------------------------------------------------------------------------
/KernelKmeansClustering.m:
--------------------------------------------------------------------------------
 1 | function mem = KernelKmeansClustering(ZExact, ZReduced, k)
 2 | 
 3 |     try
 4 |         SmplPoints = DualDPP_FD(ZReduced, k);
 5 |     catch
 6 |         SmplPoints = DualDPP_FD(ZExact, k);
 7 |     end
 8 |     
 9 |     
10 |     try
11 |         mem = kmeans(ZReduced,k,'Start',ZReduced(SmplPoints,:));
12 |     catch
13 |         mem = kmeans(ZExact,k,'Start',ZExact(SmplPoints,:));
14 |     end
15 |     
16 |     
17 | end


--------------------------------------------------------------------------------
/normalizedata.m:
--------------------------------------------------------------------------------
 1 | function norm_data = normalizedata(test,scaling)
 2 | 
 3 | norm_data = zeros(size(test,1),size(test,2));
 4 | 
 5 | for i=1:size(test,1)
 6 |         
 7 |     
 8 |     minvalue = min(test(i,:))-min(test(i,:))*scaling;
 9 |     maxvalue = max(test(i,:))+max(test(i,:))*scaling;
10 |     
11 |     norm_data(i,:) = (test(i,:) - minvalue) / ( maxvalue - minvalue );
12 |     norm_data(i,:) = 1-norm_data(i,:);
13 | end
14 | 
15 | 
16 | end


--------------------------------------------------------------------------------
/cDTW.m:
--------------------------------------------------------------------------------
 1 | function Dist=cDTW(t,r,W)
 2 |     
 3 |     if iscolumn(t)
 4 |         t=t';
 5 |     end
 6 |     if iscolumn(r)
 7 |         r=r';
 8 |     end
 9 |     [rows,N]=size(t);
10 | 	[rows,M]=size(r);
11 | 
12 | 	D=ones(N+1,M+1)*inf;
13 | 
14 | 	D(1,1) = 0;
15 | 	for i=2:N+1
16 | 		for j=max(2, i-W):min(M+1, i+W)
17 | 			cost = (t(i-1)-r(j-1))^2;
18 | 			D(i,j)=cost+min([D(i-1,j),D(i-1,j-1),D(i,j-1)]);
19 | 		end
20 | 	end
21 | 	Dist=sqrt(D(N+1, M+1));
22 | end


--------------------------------------------------------------------------------
/KMCompSINK.m:
--------------------------------------------------------------------------------
 1 | function [KM, DistComp] = KMCompSINK(X,gamma)
 2 | 
 3 |     [m, ~] = size(X);
 4 | 
 5 |     KM = ones(m,m);
 6 | 
 7 |     DistComp = 0;
 8 | 
 9 |     for i=1:m-1
10 |         disp(i)
11 |         rowi = X(i,:);    
12 |            for j=i+1:m
13 |                 rowj = X(j,:); 
14 |                 KM(i,j) = SINK(rowi,rowj,gamma);
15 |                 DistComp = DistComp+1;
16 |                 KM(j,i) = KM(i,j);
17 |            end    
18 |     end
19 | 
20 | end


--------------------------------------------------------------------------------
/KMCompSINK_TestToTrain.m:
--------------------------------------------------------------------------------
 1 | function [KM,DistComp] = KMCompSINK_TestToTrain(X,Y,sigma)
 2 | 
 3 |     [nrowsX, ~]=size(X);
 4 |     [nrowsY, ~]=size(Y);
 5 | 
 6 |     KM = zeros(nrowsX,nrowsY);
 7 | 
 8 |     DistComp = 0;
 9 |     for i=1:nrowsX
10 |             disp(i);
11 |             tmpX = X(i,:);
12 |             for j=1:nrowsY
13 |                     KM(i,j) = SINK(tmpX,Y(j,:),sigma);
14 |                     DistComp = DistComp+1;
15 |             end    
16 |     end
17 | end


--------------------------------------------------------------------------------
/KMCompGAK_TestToTrain.m:
--------------------------------------------------------------------------------
 1 | function [KM,DistComp] = KMCompGAK_TestToTrain(X,Y,sigma)
 2 | 
 3 |     [nrowsX, ~]=size(X);
 4 |     [nrowsY, ~]=size(Y);
 5 | 
 6 |     KM = zeros(nrowsX,nrowsY);
 7 | 
 8 |     DistComp = 0;
 9 |     for i=1:nrowsX
10 |             disp(i);
11 |             tmpX = X(i,:);
12 |             parfor j=1:nrowsY
13 |                     KM(i,j) = logGAK(tmpX',Y(j,:)',sigma,0);
14 |                     DistComp = DistComp+1;
15 |             end    
16 |     end
17 | end


--------------------------------------------------------------------------------
/RandIndex.m:
--------------------------------------------------------------------------------
 1 | function RI=RandIndex(c1,c2)
 2 |   C=Contingency(c1,c2);
 3 | 
 4 |   n=sum(sum(C));
 5 |   nis=sum(sum(C,2).^2);
 6 |   njs=sum(sum(C,1).^2);
 7 |   
 8 |   t1=nchoosek(n,2);
 9 |   t2=sum(sum(C.^2));
10 |   t3=.5*(nis+njs);
11 | 
12 |   A=t1+t2-t3;
13 | 
14 |   RI=A/t1;
15 | end
16 | 
17 | function Cont=Contingency(Mem1,Mem2)
18 |   Cont=zeros(max(Mem1),max(Mem2));
19 | 
20 |   for i = 1:length(Mem1);
21 |     Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1;
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/KMCompSINKCompressed.m:
--------------------------------------------------------------------------------
 1 | function [KM, DistComp] = KMCompSINKCompressed(X,gamma,k)
 2 | 
 3 |     [m, ~] = size(X);
 4 | 
 5 |     KM = ones(m,m);
 6 | 
 7 |     DistComp = 0;
 8 | 
 9 |     for i=1:m-1
10 |         disp(i)
11 |         rowi = X(i,:);    
12 |            for j=i+1:m
13 |                 rowj = X(j,:); 
14 |                 KM(i,j) = SINKCompressed(rowi,rowj, gamma, k);
15 |                 DistComp = DistComp+1;
16 |                 KM(j,i) = KM(i,j);
17 |            end    
18 |     end
19 | 
20 | end


--------------------------------------------------------------------------------
/NystromMatrixGivenWandE.m:
--------------------------------------------------------------------------------
 1 | function [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, C, Winv)
 2 | % KM: nXn kernel matrix, where n # of time series of m length
 3 | % Dictionary: kxm matrxi containing the dictionary atoms
 4 | % Absolute and Relative errors for Nystrom Approximation
 5 | [nrowsX, ncolumnsX] = size(KM);
 6 | 
 7 | KMtilde = C*Winv*C';
 8 | 
 9 | AbsFroError = ( norm(KM-KMtilde,'fro') );
10 | RelFroError = ( norm(KM-KMtilde,'fro')/norm(KM,'fro') );
11 | NormFroError = ( norm(KM-KMtilde,'fro')/nrowsX^2);
12 | 
13 | end
14 | 


--------------------------------------------------------------------------------
/DMComp.m:
--------------------------------------------------------------------------------
 1 | function [DM, DistComp] = DMComp(X, DistanceIndex)
 2 | % X is mXn matrix: m are # of time series
 3 | 
 4 |     [m, ~]=size(X);
 5 | 
 6 |     DM = zeros(m,m);
 7 | 
 8 |     DistComp = 0;
 9 | 
10 |     for i=1:m-1   
11 |            for j=i+1:m
12 |                 if DistanceIndex==1
13 |                     DM(i,j) = ED(X(i,:),X(j,:));
14 |                 elseif DistanceIndex==2
15 |                     DM(i,j) = 1-max( NCCc(X(i,:),X(j,:)) );
16 |                 end
17 |                 DistComp = DistComp+1;
18 | 
19 |                 DM(j,i) = DM(i,j);
20 |            end    
21 |     end
22 | 
23 | end


--------------------------------------------------------------------------------
/KernelSCApprox.m:
--------------------------------------------------------------------------------
 1 | function mem = KernelSCApprox(Z, k)
 2 | 
 3 | if size(Z,2)<k
 4 |     Z = padarray(Z,[0,abs(size(Z,2)-k-2)],0.0001*rand, 'post');
 5 | end
 6 | 
 7 | %Degree comp
 8 | ColumnSum = sum(Z,1);
 9 | DegMatrix = Z*ColumnSum';
10 | DegMatrixinv = spdiags(DegMatrix.^(-0.5), 0, size(Z,1), size(Z,1));  
11 | ZDegNorm = (DegMatrixinv'*Z);
12 | disp('Degree Normalization Done..');
13 | 
14 | U = mySVD(ZDegNorm,k+1);
15 | U(:,1) = [];
16 | disp('Approximation of EigVector Done..');
17 | 
18 | U=U./repmat(sqrt(sum(U.^2,2)),1,size(U,2));
19 | 
20 | mem = kmeans(U,k);
21 | 
22 | disp('Done with kmeans');
23 | 
24 | end
25 | 


--------------------------------------------------------------------------------
/GibbsDPP.m:
--------------------------------------------------------------------------------
 1 | function [C,DistComp] = GibbsDPP(L, mixStep, k)
 2 | % sampling subsets from (Gibbs) Markov chain k-DPP with Gauss quadrature
 3 | %
 4 | % -input
 5 | %   L: data kernel matrix, N*N where N is number of samples
 6 | %   mixStep: number of burn-in iterations
 7 | %   k: the size of sampled subset
 8 | %
 9 | % -output
10 | %   C: sampled subset
11 | %
12 | % sample usage:
13 | %   C = GibbsDPP(L,1000,5)
14 | 
15 | addpath(genpath('GibbsDPP/.'));
16 | 
17 | permed_index = randperm(size(L,1));
18 | sample_data = permed_index(1:k);
19 | 
20 | [C,DistComp] = gauss_kdpp(L, k, @gershgorin, mixStep, sample_data);
21 | C = sort(C, 'ascend');
22 | end
23 | 
24 | 


--------------------------------------------------------------------------------
/SPIRAL/README.md:
--------------------------------------------------------------------------------
 1 | # SPIRAL
 2 | 
 3 | ## Synopsis
 4 | We convert a set of time series with equal or unequal lengths to a matrix format. The matrix format can be used for data clustering or classification using existing machine learning models. 
 5 | 
 6 | ## Testing Data
 7 | In ./data/ a test data "50words" comes from UCR Archive: http://www.cs.ucr.edu/~eamonn/time_series_data/
 8 | 
 9 | The data is in the format of a nxm matrix, consiting of n rows of samples. For each row, the first value is the label, and the remaining part is the time series data.
10 | 
11 | ## Sample Run
12 | * need to run "mex dtw_c.c" and "mex exactCDmex.c" in matlab
13 | * sample run:
14 | 	* open Matlab
15 | 	* runme('50words')
16 | 
17 | 


--------------------------------------------------------------------------------
/KMCompGAK.m:
--------------------------------------------------------------------------------
 1 | function [KM, DistComp] = KMCompGAK(X, sigma)
 2 | 
 3 |     [m, ~] = size(X);
 4 | 
 5 |     KM = zeros(m,m);
 6 | 
 7 |     DistComp = 0;
 8 | 
 9 |     for i=1:m-1
10 |         disp(i);
11 |         rowi = X(i,:);
12 |         tmpVector = zeros(1,m);
13 |            parfor j=i+1:m
14 |                 rowj = X(j,:); 
15 |                 tmpVector(j) = logGAK(rowi',rowj',sigma,0);
16 |                 DistComp = DistComp+1;
17 |            end    
18 |         KM(i,:) = tmpVector;   
19 |     end
20 | 
21 |     for i=1:m-1
22 |            for j=i+1:m
23 |                 KM(j,i) = KM(i,j);
24 |            end    
25 |     end
26 | 
27 |     for i=1:m
28 |         KM(i,i) = logGAK(X(i,:)',X(i,:)',sigma,0);
29 |     end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/KMCompSINK_TrainToTrain.m:
--------------------------------------------------------------------------------
 1 | function [KM, DistComp] = KMCompSINK_TrainToTrain(X, sigma)
 2 | 
 3 |     [m, ~] = size(X);
 4 | 
 5 |     KM = zeros(m,m);
 6 | 
 7 |     DistComp = 0;
 8 | 
 9 |     for i=1:m-1
10 |         disp(i);
11 |         rowi = X(i,:);
12 |         tmpVector = zeros(1,m);
13 |            for j=i+1:m
14 |                 rowj = X(j,:); 
15 |                 tmpVector(j) = SINK(rowi,rowj,sigma);
16 |                 DistComp = DistComp+1;
17 |            end    
18 |         KM(i,:) = tmpVector;   
19 |     end
20 | 
21 |     for i=1:m-1
22 |            for j=i+1:m
23 |                 KM(j,i) = KM(i,j);
24 |            end    
25 |     end
26 | 
27 |     for i=1:m
28 |         KM(i,i) = SINK(X(i,:),X(i,:),sigma);
29 |     end
30 | 
31 | end
32 | 


--------------------------------------------------------------------------------
/OneNNClassifierED.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierED(DS)
 2 |     
 3 |     acc = 0;
 4 |     
 5 |     for id = 1 : DS.TestInstancesCount
 6 |         disp(id);
 7 |         classify_this = DS.Test(id,:);
 8 |         
 9 |         best_so_far = inf;
10 | 
11 |         for i = 1 : DS.TrainInstancesCount
12 |             
13 |             compare_to_this = DS.Train(i,:);
14 | 
15 |             distance = ED(compare_to_this, classify_this);
16 | 
17 |             if distance < best_so_far
18 |                 class = DS.TrainClassLabels(i);
19 |                 best_so_far = distance;
20 |             end
21 |         end
22 |         
23 |         if (DS.TestClassLabels(id) == class)
24 |             acc = acc + 1;
25 |         end
26 |     end
27 |     
28 |     acc = acc / DS.TestInstancesCount;
29 | end
30 | 


--------------------------------------------------------------------------------
/OneNNClassifierSBD.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierSBD(DS)
 2 |     
 3 |     acc = 0;
 4 |     
 5 |     for id = 1 : DS.TestInstancesCount
 6 |         disp(id);
 7 |         classify_this = DS.Test(id,:);
 8 |         
 9 |         best_so_far = inf;
10 | 
11 |         for i = 1 : DS.TrainInstancesCount
12 |             
13 |             compare_to_this = DS.Train(i,:);
14 | 
15 |             distance = 1-max( NCCc(compare_to_this, classify_this));
16 | 
17 |             if distance < best_so_far
18 |                 class = DS.TrainClassLabels(i);
19 |                 best_so_far = distance;
20 |             end
21 |         end
22 |         
23 |         if (DS.TestClassLabels(id) == class)
24 |             acc = acc + 1;
25 |         end
26 |     end
27 |     
28 |     acc = acc / DS.TestInstancesCount;
29 | end
30 | 


--------------------------------------------------------------------------------
/OneNNClassifierDTW.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierDTW(DS,window)
 2 |     
 3 |     acc = 0;
 4 |     
 5 |     for id = 1 : DS.TestInstancesCount
 6 |         disp(id);
 7 |         classify_this = DS.Test(id,:);
 8 |         
 9 |         best_so_far = inf;
10 | 
11 |         for i = 1 : DS.TrainInstancesCount
12 |             
13 |             compare_to_this = DS.Train(i,:);
14 | 
15 |             distance = dtw(compare_to_this, classify_this, window);
16 | 
17 |             if distance < best_so_far
18 |                 class = DS.TrainClassLabels(i);
19 |                 best_so_far = distance;
20 |             end
21 |         end
22 |         
23 |         if (DS.TestClassLabels(id) == class)
24 |             acc = acc + 1;
25 |         end
26 |     end
27 |     
28 |     acc = acc / DS.TestInstancesCount;
29 | end
30 | 


--------------------------------------------------------------------------------
/OneNNClassifierGAK.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierGAK(DS,sigma)
 2 |     
 3 |     acc = 0;
 4 |     
 5 |     for id = 1 : DS.TestInstancesCount
 6 |         disp(id);
 7 |         classify_this = DS.Test(id,:);
 8 | 
 9 |         best_so_far = 0;
10 |         
11 |         for i = 1 : DS.TrainInstancesCount
12 |             
13 |             compare_to_this = DS.Train(i,:);
14 |             
15 |             distance = logGAK(compare_to_this',classify_this',sigma,0);
16 | 
17 |             if distance > best_so_far
18 |                 class = DS.TrainClassLabels(i);
19 |                 best_so_far = distance;
20 |             end
21 |         end
22 |         
23 |         if (DS.TestClassLabels(id) == class)
24 |             acc = acc + 1;
25 |         end
26 |     end
27 |     
28 |     acc = acc / DS.TestInstancesCount;
29 | end
30 | 


--------------------------------------------------------------------------------
/NCCcCompressed.m:
--------------------------------------------------------------------------------
 1 | function cc_sequence = NCCcCompressed(x, y, k)
 2 | % x is a time series
 3 | % y is a time series
 4 | % k is the # of Fourier coefficients to keep
 5 | 
 6 | if isrow(x)
 7 |     x=x';
 8 | end
 9 | if isrow(y)
10 |     y=y';
11 | end
12 | 
13 | len = max(length(x),length(y));
14 | 
15 | fftlength = 2^nextpow2(2*len-1);
16 | 
17 | FFTx = leading_fourier(fft(x',fftlength),k);
18 | FFTy = leading_fourier(fft(y',fftlength),k);
19 | 
20 | r = ifft( FFTx.' .* conj(FFTy.') );
21 | 
22 | r = [r(end-len+2:end) ; r(1:len)];
23 | 
24 | cc_sequence = r./(norm(x)*norm(y));
25 | 
26 | end
27 | 
28 | function x = leading_fourier(x, k)
29 | % leading_fourier(x,k) returns leading k and trailing k-1 (real is symmetric) coeffs
30 | %   by zeroing out middle window and renormalizing
31 | m = floor(size(x, 2) / 2) + 1;
32 | x((k+1):(m - 1 + m - k)) = 0;
33 | end


--------------------------------------------------------------------------------
/SPIRAL/matrix_completion_sparse_mex.m:
--------------------------------------------------------------------------------
 1 | function X=matrix_completion_sparse_mex(A,d,Omega,X0,options)
 2 | % matrix completion: 
 3 | % A- given matrix, each row only has the nonzeros indices;
 4 | % d-diagonal indices of A
 5 | % Omega- visible indices:consists of n vectors, must be symmetric;
 6 | % X0 initial-all zeros
 7 | 
 8 | % preprocessing:
 9 | %mex exactCDmex.c
10 | fprintf('Step 2: matrix factorization...\n');
11 | n=size(A,2);
12 | m=0;
13 | lenA=zeros(n,1);
14 | for i=1:n
15 | 	%if (length(A{i})>m)
16 | 	%	m=length(A{i});
17 | 	%end
18 | 	lenA(i)=length(A{i});
19 | end
20 | 
21 | m=max(lenA);
22 | 
23 | nA=zeros(n,m);
24 | nO=nA;
25 | for i=1:n
26 | 	nA(i,1:length(A{i}))=A{i};
27 | 	nO(i,1:length(A{i}))=Omega{i}-1;
28 | end
29 | d=d-1;
30 | nR=nA;
31 | k=size(X0,2);
32 | X=exactCDmex(nA,nR,nO,X0,lenA,d,norm(nA,'fro'),options.maxiter);
33 | 
34 | end
35 | 
36 | 


--------------------------------------------------------------------------------
/OneNNClassifierSBDCompressed.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierSBDCompressed(DS,numofcoeffs)
 2 |     
 3 |     acc = 0;
 4 |     
 5 |     for id = 1 : DS.TestInstancesCount
 6 |         disp(id);
 7 |         classify_this = DS.Test(id,:);
 8 |         
 9 |         best_so_far = inf;
10 | 
11 |         for i = 1 : DS.TrainInstancesCount
12 |             
13 |             compare_to_this = DS.Train(i,:);
14 | 
15 |             distance = 1-max( NCCcCompressed(compare_to_this, classify_this, numofcoeffs));
16 | 
17 |             if distance < best_so_far
18 |                 class = DS.TrainClassLabels(i);
19 |                 best_so_far = distance;
20 |             end
21 |         end
22 |         
23 |         if (DS.TestClassLabels(id) == class)
24 |             acc = acc + 1;
25 |         end
26 |     end
27 |     
28 |     acc = acc / DS.TestInstancesCount;
29 | end
30 | 


--------------------------------------------------------------------------------
/RWS/utilities/demo_dtw.m:
--------------------------------------------------------------------------------
 1 | % Copyright (C) 2013 Quan Wang <wangq10@rpi.edu>,
 2 | % Signal Analysis and Machine Perception Laboratory,
 3 | % Department of Electrical, Computer, and Systems Engineering,
 4 | % Rensselaer Polytechnic Institute, Troy, NY 12180, USA
 5 | 
 6 | % this is a demo showing the use of our dynamic time warping package 
 7 | % we provide both Matlab version and C/MEX version
 8 | % the C/MEX version is much faster and highly recommended
 9 | 
10 | clear;clc;close all;
11 | 
12 | mex dtw_c.c;
13 | 
14 | a=rand(500,3);
15 | b=rand(520,3);
16 | w=50;
17 | 
18 | tic;
19 | d1=dtw_m(a,b,w);
20 | t1=toc;
21 | 
22 | tic;
23 | d2=dtw_c(a,b,w);
24 | t2=toc;
25 | 
26 | tic;
27 | d3=dtw(a',b',w);
28 | t3=toc;
29 | 
30 | fprintf('Using Matlab dtw: distance=%f, running time=%f\n',d1,t1);
31 | fprintf('Using C/MEX dtw: distance=%f, running time=%f\n',d2,t2);
32 | fprintf('Using Matlab Internal dtw: distance=%f, running time=%f\n',d3,t3);
33 | 


--------------------------------------------------------------------------------
/LoadUCRdataset.m:
--------------------------------------------------------------------------------
 1 | function DS = LoadUCRdataset(datasetname)
 2 | 
 3 |     TRAIN = load(['/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/',datasetname,'/',datasetname,'_TRAIN']);
 4 |     TEST  = load(['/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/',datasetname,'/',datasetname,'_TEST']);
 5 | 
 6 |     %rng('default');
 7 |     %Train_numSamples = size(TRAIN,1);
 8 |     %TRAIN = TRAIN(randperm(Train_numSamples),:); % shuffle the data
 9 |     
10 |     TRAIN_labels = TRAIN(:,1);
11 |     TRAIN(:,1) = [];
12 |     TEST_labels = TEST(:,1);
13 |     TEST(:,1) = [];
14 | 
15 |     DS.TrainClassLabels = TRAIN_labels;
16 |     DS.TestClassLabels = TEST_labels;
17 |     DS.DataClassLabels = [TRAIN_labels;TEST_labels];
18 | 
19 |     DS.Train = TRAIN;
20 |     DS.Test = TEST;
21 |     DS.Data = [TRAIN;TEST];
22 | 
23 |     DS.ClassNames = unique(TRAIN_labels);
24 | 
25 |     DS.TrainInstancesCount = length(DS.Train(:,1));
26 |     DS.TestInstancesCount = length(DS.Test(:,1));
27 |     DS.DataInstancesCount = length(DS.Data(:,1));
28 | end


--------------------------------------------------------------------------------
/LeaveOneOutClassifierZREP.m:
--------------------------------------------------------------------------------
 1 | function acc = LeaveOneOutClassifierZREP(DS,ZRepresentation)
 2 | 
 3 |     ZRepTrain = ZRepresentation(1:DS.TrainInstancesCount,:);
 4 | 
 5 |     acc = 0;
 6 | 
 7 |     for id = 1 : DS.TrainInstancesCount
 8 | 
 9 |         %classify_this = DS.Train(id,:);
10 |         classify_this = ZRepTrain(id,:);
11 | 
12 |         best_so_far = inf;
13 | 
14 |         for i = 1 : DS.TrainInstancesCount
15 | 
16 |             if (i ~= id)
17 | 
18 |                 %compare_to_this = DS.Train(i,:);                
19 |                 compare_to_this = ZRepTrain(i,:);
20 |                 
21 |                 distance = ED(compare_to_this, classify_this)^2;
22 | 
23 |                 if distance < best_so_far
24 |                     class = DS.TrainClassLabels(i);
25 |                     best_so_far = distance;
26 |                 end
27 |             end
28 | 
29 |         end
30 | 
31 |         if (DS.TrainClassLabels(id) == class)
32 |             acc = acc + 1;
33 |         end
34 | 
35 |     end
36 |     acc = acc / DS.TrainInstancesCount;
37 | end


--------------------------------------------------------------------------------
/TestVarianceExact.m:
--------------------------------------------------------------------------------
 1 | function [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,VarExplainedCumSum]=TestVarianceExact(KM)
 2 |               
 3 |     [nrowsKM, ncolumnsKM] = size(KM);
 4 | 
 5 |     KMtmp = [];
 6 |     for i=1:nrowsKM 
 7 |         KMtmp = [KMtmp, KM(i,:)];
 8 |     end
 9 |     
10 |     Variance=var(KMtmp);
11 |     clear KMtmp;
12 |     
13 |     [Q,L] = eig(KM);
14 | 
15 |     eigValue=diag(L);
16 |     [~,IX]=sort(eigValue,'descend');
17 |     eigVector=Q(:,IX);
18 |     eigValue=eigValue(IX);
19 | 
20 |     VarExplainedCumSum = cumsum(eigValue)/sum(eigValue);
21 | 
22 |     VarExplainedTop5 = VarExplainedCumSum(5);
23 |     VarExplainedTop10 = VarExplainedCumSum(10);
24 |     VarExplainedTop20 = VarExplainedCumSum(20);
25 | 
26 |     DimFor98 = find(VarExplainedCumSum>=0.98,1);
27 |     DimFor95 = find(VarExplainedCumSum>=0.95,1);
28 |     DimFor90 = find(VarExplainedCumSum>=0.90,1);
29 |     DimFor85 = find(VarExplainedCumSum>=0.85,1);
30 |     DimFor80 = find(VarExplainedCumSum>=0.80,1);
31 | 
32 | end


--------------------------------------------------------------------------------
/kMeans.m:
--------------------------------------------------------------------------------
 1 | function [mem,cent] = kMeans(A, K)
 2 | 
 3 | m=size(A, 1);
 4 | mem = ceil(K*rand(m, 1));
 5 | cent = zeros(K, size(A, 2));
 6 | 
 7 | for iter = 1:100
 8 |     disp(iter);
 9 |     prev_mem = mem;
10 | 
11 |     for k = 1:K
12 |         cent(k,:) = kmeans_centroid(mem, A, k, cent(k,:));     
13 |     end
14 | 
15 |     D = zeros(m,K);
16 | 
17 |     for i = 1:m
18 |         %x = A(i,:);
19 |         for k = 1:K
20 |             %y = cent(k,:);
21 |             dist = ED(A(i,:),cent(k,:));
22 |             D(i,k) = dist;
23 |         end
24 |     end
25 | 
26 |     
27 |     [val mem] = min(D,[],2);
28 | 
29 |     if norm(prev_mem-mem) == 0
30 |         break;
31 |     end
32 | end
33 | 
34 | end
35 | 
36 | function ksc = kmeans_centroid(mem, A, k, cur_center)
37 | % Slower version
38 | %a = [];
39 | %for i=1:length(mem)
40 | %    if mem(i) == k
41 | %        opt_a = A(i,:);
42 | %        a = [a; opt_a];
43 | %    end
44 | %end
45 | 
46 | a = A(mem==k,:);
47 | 
48 | if size(a,1) == 0
49 |     ksc = zeros(1, size(A,2)); 
50 |     return;
51 | end
52 | 
53 | ksc = mean(a);
54 | 
55 | end


--------------------------------------------------------------------------------
/OneNNClassifierZREP.m:
--------------------------------------------------------------------------------
 1 | function acc = OneNNClassifierZREP(DS,ZRepresentation)
 2 |     
 3 |     ZRepTrain = ZRepresentation(1:DS.TrainInstancesCount,:);
 4 |     ZRepTest = ZRepresentation(DS.TrainInstancesCount+1:end,:);
 5 | 
 6 |     acc = 0;
 7 |     
 8 |     for id = 1 : DS.TestInstancesCount
 9 |         
10 |         %classify_this = DS.Test(id,:);
11 |         classify_this = ZRepTest(id,:);
12 |         
13 |         best_so_far = inf;
14 |         %best_so_far = 0;
15 |         for i = 1 : DS.TrainInstancesCount
16 |             
17 |             %compare_to_this = DS.Train(i,:);
18 |             compare_to_this = ZRepTrain(i,:);
19 | 
20 |             distance = ED(compare_to_this, classify_this)^2;
21 | 
22 |             
23 |            
24 |             if distance < best_so_far
25 |                 class = DS.TrainClassLabels(i);
26 |                 best_so_far = distance;
27 |             end
28 |         end
29 |         
30 |         if (DS.TestClassLabels(id) == class)
31 |             acc = acc + 1;
32 |         end
33 |     end
34 |     
35 |     acc = acc / DS.TestInstancesCount;
36 | end
37 | 


--------------------------------------------------------------------------------
/SPIRALRepLearning.m:
--------------------------------------------------------------------------------
 1 | function ZRep = SPIRALRepLearning(DS,coeffs)
 2 | 
 3 | 
 4 | label_train=DS.TrainClassLabels;
 5 | Train=DS.Train;
 6 | label_test=DS.TestClassLabels;
 7 | Test=DS.Test;
 8 | 
 9 | X={};
10 | n=size(Train,1);
11 | for i=1:n
12 | 	X{i}=Train(i,:)';
13 | end
14 | 
15 | for i=n+1:n+size(Test,1)
16 | 	X{i}=Test(i-n,:)';
17 | end
18 | n=size(X,2);
19 | %m=n*20*ceil(log(n));
20 | % so that it's comparable to our method
21 | m=n*coeffs;
22 | if (2*m>n*n)
23 | 	m=floor(n*n/2);
24 | end
25 | [D,Omega,d]=construct_sparse(X,n,m);
26 | X0=zeros(n,coeffs);
27 | options.maxiter=20;
28 | tic;X_train=matrix_completion_sparse_mex(D,d,Omega,X0,options);toc
29 | 
30 | Train=[X_train(1:size(Train,1),:)];
31 | Test=[X_train(size(Train,1)+1:size(X_train,1),:)];
32 | 
33 | ZRep = [Train;Test];
34 | 
35 | %Train=[label_train,X_train(1:size(Train,1),:)];
36 | %Test=[label_test,X_train(size(Train,1)+1:size(X_train,1),:)];
37 | %csvwrite(strcat(file_dir,filename,'/',filename,'_sparse_Train'),Train);
38 | %csvwrite(strcat(file_dir,filename,'/',filename,'_sparse_Test'),Test);
39 | %save features for Train/Test data
40 | 
41 | end
42 | 
43 | 


--------------------------------------------------------------------------------
/LOOClassifierDTW.m:
--------------------------------------------------------------------------------
 1 | function acc = LOOClassifierDTW(DS,window)
 2 |           
 3 |     acc = 0;
 4 | 
 5 |     for id = 1 : DS.TrainInstancesCount
 6 | 
 7 |         disp(id);
 8 |         classify_this = DS.Train(id,:);
 9 | 
10 |         best_so_far = inf;
11 | 
12 |         distances = ones(DS.TrainInstancesCount,1)*inf;
13 | 
14 |         for i = 1 : DS.TrainInstancesCount
15 | 
16 |             if (i ~= id)
17 | 
18 |                 compare_to_this = DS.Train(i,:);
19 | 
20 |                 distances(i) = dtw(classify_this,compare_to_this,window);
21 |                 
22 |             end
23 | 
24 |         end
25 |         
26 |         for i = 1 : DS.TrainInstancesCount
27 | 
28 |             if (i ~= id)
29 | 
30 |                 if distances(i) < best_so_far
31 |                     class = DS.TrainClassLabels(i);
32 |                     best_so_far = distances(i);
33 |                 end
34 |                 
35 |             end
36 | 
37 |         end
38 | 
39 |         if (DS.TrainClassLabels(id) == class)
40 |             acc = acc + 1;
41 |         end
42 | 
43 |     end
44 |     
45 |     acc = acc / DS.TrainInstancesCount;
46 | 
47 | end
48 | 
49 | 


--------------------------------------------------------------------------------
/SIDL/README:
--------------------------------------------------------------------------------
 1 | Code for Efficient Shift-Invariant Dictionary Learning
 2 | 
 3 | Guoqing Zheng, 2016
 4 | 
 5 | 1. This package provides a sample implementation of the SIDL model proposed in [1]. The main entry for the model
 6 |    is the function defined in "USIDL.m" (See the comment in the file for detailed parameter explanations);
 7 | 
 8 | 2. An example main file "main_example.m" to run the model on the Trace data set is also provided. 
 9 |    The Trace data set is kindly contributed by [2];
10 | 
11 | 3. This package (excluding the Trace data set) is released under the MIT license (See LICENSE for details); 
12 | 
13 | 4. If you find this package useful, please cite [1] in your work.
14 | 
15 | Reference:
16 | 
17 | [1] Efficient Shift-Invariant Dictionary Learning
18 |     Guoqing Zheng, Yiming Yang, Jaime Carbonell 
19 |     In proceedings of the 22nd ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2016), San Francisco, CA.
20 | 
21 | [2] The UCR Time Series Classification Archive
22 |     Yanping Chen, Eamonn Keogh, Bing Hu, Nurjahan Begum, Anthony Bagnall, Abdullah, Mueen and Gustavo, Batista.
23 |     http://www.cs.ucr.edu/~eamonn/time_series_data/
24 | 


--------------------------------------------------------------------------------
/RWS/utilities/dtw_m.m:
--------------------------------------------------------------------------------
 1 | % Copyright (C) 2013 Quan Wang <wangq10@rpi.edu>,
 2 | % Signal Analysis and Machine Perception Laboratory,
 3 | % Department of Electrical, Computer, and Systems Engineering,
 4 | % Rensselaer Polytechnic Institute, Troy, NY 12180, USA
 5 | 
 6 | % dynamic time warping of two signals
 7 | 
 8 | function d=dtw_m(s,t,w)
 9 | % s: signal 1, size is ns*k, row for time, colume for channel 
10 | % t: signal 2, size is nt*k, row for time, colume for channel 
11 | % w: window parameter
12 | %      if s(i) is matched with t(j) then |i-j|<=w
13 | % d: resulting distance
14 | 
15 | if nargin<3
16 |     w=Inf;
17 | end
18 | 
19 | ns=size(s,1);
20 | nt=size(t,1);
21 | if size(s,2)~=size(t,2)
22 |     error('Error in dtw(): the dimensions of the two input signals do not match.');
23 | end
24 | w=max(w, abs(ns-nt)); % adapt window size
25 | 
26 | %% initialization
27 | D=zeros(ns+1,nt+1)+Inf; % cache matrix
28 | D(1,1)=0;
29 | 
30 | %% begin dynamic programming
31 | for i=1:ns
32 |     for j=max(i-w,1):min(i+w,nt)
33 |         oost=norm(s(i,:)-t(j,:));
34 |         D(i+1,j+1)=oost+min( [D(i,j+1), D(i+1,j), D(i,j)] );
35 |         
36 |     end
37 | end
38 | d=D(ns+1,nt+1);
39 | 


--------------------------------------------------------------------------------
/RunOneNNED.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNED(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);
10 | 
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     Results = zeros(length(Datasets),2);
16 | 
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     tic;
21 |                     OneNNAcc = OneNNClassifierED(DS);
22 |                     
23 |                     Results(i,1) = OneNNAcc;
24 |                     Results(i,2) = toc;
25 |    
26 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNED/', 'RunOneNNED_Dataset_', num2str(i)), Results, 'delimiter', '\t');
27 |    
28 |             end
29 |             
30 |             
31 |     end
32 |     
33 | end
34 | 
35 | 


--------------------------------------------------------------------------------
/RunOneNNGAKTiming.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNGAKTiming(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 | 
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     Results = zeros(length(Datasets),2);
16 |     
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     tic;
21 |                     OneNNAcc = OneNNClassifierGAK(DS,10);
22 |                     
23 |                     Results(i,1) = OneNNAcc;
24 |                     Results(i,2) = toc;
25 |  
26 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNGAKTiming/', 'RunOneNNGAKTiming_', num2str(i)), Results, 'delimiter', '\t');
27 |    
28 |             end
29 |             
30 |             
31 |     end
32 |     
33 | end


--------------------------------------------------------------------------------
/RWS/dtw_similarity_cell_mulvar.m:
--------------------------------------------------------------------------------
 1 | % This script computes the dissimilairty between random series and raw 
 2 | % time-series. We use dynamic time warping to compute the distance between 
 3 | % a pair of time-series. Other distance measure can be used as well. 
 4 | %
 5 | % Author: Lingfei Wu
 6 | % Date: 01/20/2019
 7 | 
 8 | function [KMat, user_dtw_runtime] = dtw_similarity_cell_mulvar(newX, baseX)
 9 |     
10 |     m = size(newX,1);
11 |     n = size(baseX,1);    
12 |     KMat = zeros(m,n);
13 |     user_dtw_runtime = 0;
14 |     tic;
15 |     parfor i = 1 : m
16 |         Ei = zeros(1,n);
17 |         l1 = size(newX{i},2);
18 |         data1 = newX{i}';
19 |         for j = 1 : n
20 |             l2 = size(baseX{j},2);
21 |             data2 = baseX{j}';
22 |             wSize = min(40, ceil(max(l1,l2)/10));
23 |             wSize = max(wSize, abs(l1 - l2));
24 |             dtw_telapsed = tic;
25 |             dist = dtw_c(data1, data2, wSize);% window constraints
26 | %             dist = dtw_c(newX(i,:)', baseX(j,:)');% no constraints
27 |             user_dtw_runtime = user_dtw_runtime + toc(dtw_telapsed);
28 |             Ei(j) = dist;
29 |         end
30 |         KMat(i,:) = Ei;
31 |     end
32 |     toc;
33 |     
34 | end
35 | 


--------------------------------------------------------------------------------
/SIDL/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/RunOneNNSBD.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNSBD(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 |     
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     Results = zeros(length(Datasets),2);
16 |                 
17 |                     display(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     tic;
21 |                     OneNNAcc = OneNNClassifierSBD(DS);
22 |                     
23 |                     Results(i,1) = OneNNAcc;
24 |                     Results(i,2) = toc;
25 |                     
26 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNSBD/', 'RunOneNNSBD_Dataset_', num2str(i)), Results, 'delimiter', '\t');
27 |    
28 |             end
29 |             
30 |             
31 |     end
32 |     
33 | end


--------------------------------------------------------------------------------
/RunSPIRALRepLearning.m:
--------------------------------------------------------------------------------
 1 | function RunSPIRALRepLearning(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 |     
11 |     addpath(genpath('SPIRAL/.'));
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     display(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100);
21 | 
22 |                     ZRep = SPIRALRepLearning(DS, NumOfSamples); 
23 |                     dlmwrite( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep'), ZRep, 'delimiter', '\t');
24 |                             
25 |                                    
26 |                                     
27 |             end
28 |             
29 |             
30 |     end
31 |     
32 |     
33 | end
34 | 


--------------------------------------------------------------------------------
/RWS/utilities/svm-scale-README:
--------------------------------------------------------------------------------
 1 | `svm-scale' Usage
 2 | =================
 3 | 
 4 | Usage: svm-scale [options] data_filename
 5 | options:
 6 | -l lower : x scaling lower limit (default -1)
 7 | -u upper : x scaling upper limit (default +1)
 8 | -y y_lower y_upper : y scaling limits (default: no y scaling)
 9 | -s save_filename : save scaling parameters to save_filename
10 | -r restore_filename : restore scaling parameters from restore_filename
11 | 
12 | See 'Examples' in this file for examples.
13 | 
14 | Tips on Practical Use
15 | =====================
16 | 
17 | * Scale your data. For example, scale each attribute to [0,1] or [-1,+1].
18 | * For C-SVC, consider using the model selection tool in the tools directory.
19 | * nu in nu-SVC/one-class-SVM/nu-SVR approximates the fraction of training
20 | errors and support vectors.
21 | * If data for classification are unbalanced (e.g. many positive and
22 | few negative), try different penalty parameters C by -wi (see
23 | examples below).
24 | * Specify larger cache size (i.e., larger -m) for huge problems.
25 | 
26 | Examples
27 | ========
28 | 
29 | svm-scale -l -1 -u 1 -s range train > train.scale
30 | svm-scale -r range test > test.scale
31 | 
32 | Scale each feature of the training data to be in [-1,1]. Scaling
33 | factors are stored in the file range and then used for scaling the
34 | test data.
35 | 
36 | 


--------------------------------------------------------------------------------
/NystromMatrixDictionary.m:
--------------------------------------------------------------------------------
 1 | function [AbsFroError,RelFroError,NormFroError] = NystromMatrixDictionary(KM, X, Dictionary, gamma)
 2 | % KM: nXn kernel matrix, where n # of time series of m length
 3 | % Dictionary: kxm matrxi containing the dictionary atoms
 4 | % Absolute and Relative errors for Nystrom Approximation
 5 | [nrowsX, ncolumnsX] = size(X);
 6 | [nrowsDic, ncolumnsDic] = size(Dictionary);
 7 | 
 8 | W = zeros(nrowsDic,nrowsDic);
 9 | 
10 | for i=1:nrowsDic
11 |     for j=1:nrowsDic
12 |         W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),gamma);
13 |     end    
14 | end
15 |         
16 | E = zeros(nrowsX,nrowsDic);
17 | 
18 | for i=1:nrowsX
19 |        for j=1:nrowsDic
20 |            E(i,j) = SINK(X(i,:),Dictionary(j,:),gamma);
21 |        end    
22 | end
23 | 
24 | [Ve, Va] = eig(W);
25 | va = diag(Va);
26 | inVa = diag(va.^(-0.5));
27 | 
28 | Zexact = CheckNaNInfComplex( E * Ve * inVa );
29 | 
30 | KMtilde = Zexact*Zexact';
31 | 
32 | AbsFroError = ( norm(KM-KMtilde,'fro') );
33 | RelFroError = ( norm(KM-KMtilde,'fro')/norm(KM,'fro') );
34 | NormFroError = ( norm(KM-KMtilde,'fro')/nrowsX^2);
35 | 
36 | end
37 | 
38 | function Z = CheckNaNInfComplex(Z)
39 | 
40 |     for i=1:size(Z,1)
41 |         for j=1:size(Z,2)
42 |             if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 
43 |                 Z(i,j)=0;
44 |             end
45 |         end
46 |     end
47 | 
48 | end


--------------------------------------------------------------------------------
/RunKMCompSINK.m:
--------------------------------------------------------------------------------
 1 | function RunKMCompSINK(DataSetStartIndex, DataSetEndIndex, gamma)
 2 | 
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, ~] = sort(Datasets);
10 | 
11 |     disp(gamma);
12 |     
13 |     for i = 1:length(Datasets)
14 |         
15 |         if (i>=DataSetStartIndex & i<=DataSetEndIndex)
16 |             
17 |             Results = zeros(length(Datasets),2);
18 |             
19 |             disp(['Dataset being processed: ', char(Datasets(i))]);
20 | 
21 |             DS = LoadUCRdataset(char(Datasets(i)));
22 |             
23 |             tic;
24 |             
25 |             [KM, DistComp] = KMCompSINK(DS.Data,gamma);
26 | 
27 |             Results(i,1) = DistComp;
28 |             Results(i,2) = toc;
29 |             
30 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix'), KM, 'delimiter', '\t');
31 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINK/', 'RunKMCompSINK_Gamma_', num2str(gamma), '_Dataset_' , num2str(i)), Results, 'delimiter', '\t');
32 |    
33 |         end
34 |         
35 |     end
36 |     
37 | end


--------------------------------------------------------------------------------
/RunOneNNDTW.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNDTW(DataSetStartIndex, DataSetEndIndex,WindowPercent)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);
10 | 
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     Results = zeros(length(Datasets),2);
16 | 
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     % warping window
21 |                     TSLength = length(DS.Data(1,:));
22 |                     window = floor(WindowPercent/100 * TSLength); 
23 |                     
24 |                     tic;
25 |                     OneNNAcc = OneNNClassifierDTW(DS,window);
26 |                     
27 |                     Results(i,1) = OneNNAcc;
28 |                     Results(i,2) = toc;
29 |    
30 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNDTW/', 'RunOneNNDTW_Dataset_', num2str(i), '_WindowPercent_', num2str(WindowPercent)), Results, 'delimiter', '\t');
31 | 
32 |             end
33 |             
34 |             
35 |     end
36 |     
37 | end


--------------------------------------------------------------------------------
/RWS/dtw_similarity_cell.m:
--------------------------------------------------------------------------------
 1 | % This script computes the dissimilairty between random series and raw 
 2 | % time-series. We use dynamic time warping to compute the distance between 
 3 | % a pair of time-series. Other distance measure can be used as well. 
 4 | %
 5 | % Author: Lingfei Wu
 6 | % Date: 01/20/2019
 7 | 
 8 | function [KMat, user_dtw_runtime] = dtw_similarity_cell(newX, baseX)
 9 |     
10 |     [m, l1] = size(newX);
11 |     n = size(baseX,1);
12 |     
13 |     nrm_newX = zeros(m,1);
14 |     tic;
15 |     for i=1:m
16 | 		nrm_newX(i)=dtw_c(newX(i,:)',zeros(1));
17 |     end
18 |     nrm_baseX = zeros(n,1);
19 |     for i=1:n
20 | 		nrm_baseX(i)=dtw_c(baseX{i}',zeros(1));
21 |     end
22 |     toc
23 |     
24 |     KMat = zeros(m,n);
25 |     user_dtw_runtime = 0;
26 |     tic;
27 |     for i = 1 : m
28 |         Ei = zeros(1,n);
29 |         data1 = newX(i,:)';
30 |         for j = 1 : n
31 |             l2 = length(baseX{j});
32 |             wSize = min(40, ceil(max(l1,l2)/10));
33 |             wSize = max(wSize, abs(l1 - l2));
34 |             wSize = 0;
35 |             data2 = baseX{j}';
36 |             dtw_telapsed = tic;
37 |             dist = dtw_c(data1, data2, wSize);% window constraints
38 | %             dist = dtw_c(newX(i,:)', baseX(j,:)');% no constraints
39 |             user_dtw_runtime = user_dtw_runtime + toc(dtw_telapsed);
40 |             Ei(j) = dist;
41 |         end
42 |         KMat(i,:) = Ei;
43 |     end
44 |     toc;
45 |     
46 | end
47 | 


--------------------------------------------------------------------------------
/RunDMComp.m:
--------------------------------------------------------------------------------
 1 | function RunDMComp(DataSetStartIndex, DataSetEndIndex, DistanceIndex)
 2 | 
 3 |     % Distance Matrices for ED and SBD
 4 |     Methods = [cellstr('ED'), 'SBD'];
 5 | 
 6 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 7 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 8 |     Datasets = {dir_struct(3:130).name};
 9 |                      
10 |     % Sort Datasets
11 |     
12 |     [Datasets, ~] = sort(Datasets);
13 |     
14 |     for i = 1:length(Datasets)
15 |         
16 |         if (i>=DataSetStartIndex && i<=DataSetEndIndex)
17 |             
18 |             Results = zeros(length(Datasets),2);
19 |             
20 |             disp(['Dataset being processed: ', char(Datasets(i))]);
21 |             DS = LoadUCRdataset(char(Datasets(i)));
22 |                     
23 |             tic;
24 |             
25 |             [DM, DistComp] = DMComp(DS.Data, DistanceIndex);
26 |             
27 |             Results(i,1) = DistComp;
28 |             Results(i,2) = toc;
29 |             
30 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/DistanceMatrices/',char(Datasets(i)),'/', char(Datasets(i)),'_',char(Methods(DistanceIndex)),'.distmatrix'), DM, 'delimiter', '\t');
31 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunDMComp/', 'RunDMComp_', char(Methods(DistanceIndex)), '_Dataset_', num2str(i) ), Results, 'delimiter', '\t');
32 |    
33 |         end
34 |         
35 |     end
36 | 
37 | end


--------------------------------------------------------------------------------
/FrequentDirections.m:
--------------------------------------------------------------------------------
 1 | % Outputs sketch of input matrix
 2 | % Author: Terence Lim
 3 | % Original paper/code by Liberty "Simple and Deterministric Matrix Sketching"
 4 | 
 5 | function [sketch, vout] = FrequentDirections(A, ell)
 6 | % input A is n rows x m columns; 
 7 | % output sketch B is l rows x m columns, vout is m x 1 first right eigenvector
 8 | 
 9 | rows = size(A, 1);
10 | d = size(A, 2);
11 | m = 2 * ell;
12 | 
13 | if (rows <= m)
14 |   [U, S, Vt] = svd(A, 0);
15 |   vout = Vt(:,1);
16 |   sketch =  S * Vt';
17 |   return
18 | end
19 | 
20 | sketch = zeros(m, d);
21 | nextZeroRow = 1;
22 | 
23 | for i=1:rows
24 |   vector = A(i,:);      % append row
25 |   
26 |   if (nextZeroRow > m)  % rotate
27 |     [U, S, Vt] = svd(sketch,0);  % economy SVD: sketch = U S V 
28 |     disp(i);
29 |     vout = Vt;
30 |     s = diag(S);
31 |     len = length(s);
32 |     if (len >= ell)           % if rank is greater than ell, then shrink
33 |       sShrunk = sqrt(s(1:ell).^2 - s(ell).^2);
34 |       sketch(1:ell,:) = diag(sShrunk) * Vt(:,1:ell)';
35 |       sketch((ell+1):end,:) = 0;
36 |       nextZeroRow = ell + 1;     % maintain invariant that row l is zeros
37 |     else                     % otherwise fewer than ell non-zero rows
38 |       sketch(1:len,:) = S * Vt(:,1:len)';
39 |       sketch((len+1):end,:) = 0;
40 |       nextZeroRow = len + 1;
41 |     end
42 |   end
43 |   
44 |   sketch(nextZeroRow,:) = vector;    % append row
45 |   nextZeroRow = nextZeroRow + 1;
46 | end
47 | sketch = sketch(1:ell, :);
48 | return;
49 | 


--------------------------------------------------------------------------------
/RWS/rws_GenFea_example_mulvar.m:
--------------------------------------------------------------------------------
 1 | % This script generates low-rank approximation of latent kernel matrix using 
 2 | % random features approach based on dtw like distance for multi-variate 
 3 | % time-series datasets. Note: the default low-rank R = 512. 
 4 | 
 5 | clear,clc
 6 | 
 7 | addpath(genpath('utilities'));
 8 | file_dir = './datasets/';
 9 | filename = 'auslan';
10 | disp(filename);
11 | sigma = 0.79;
12 | R = 512; % Generally, Large R, Better Accuracy.
13 | DMin = 1;
14 | DMax = 25;
15 | 
16 | timer_start = tic;
17 | [trainData, testData] = rws_GenFea_mulvar(file_dir,filename,sigma,R,DMin,DMax);
18 | trainy = trainData(:,1);
19 | testy = testData(:,1);
20 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..)
21 | labels = unique(trainy);
22 | numClasses = length(labels);
23 | if numClasses > 2
24 |     for i=1:numClasses
25 |         ind = (trainy == labels(i));
26 |         trainy(ind) = i;
27 |     end
28 |     for i=1:numClasses
29 |         ind = (testy == labels(i));
30 |         testy(ind) = i;
31 |     end
32 | else
33 |     ind = (trainy == labels(1));
34 |     trainy(ind) = -1;
35 |     ind = (trainy == labels(2));
36 |     trainy(ind) = 1;
37 |     ind = (testy == labels(1));
38 |     testy(ind) = -1;
39 |     ind = (testy == labels(2));
40 |     testy(ind) = 1;
41 | end
42 | trainData(:,1) = trainy;
43 | testData(:,1) = testy;
44 | telapsed_features_dtw_random = toc(timer_start)
45 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Train'), trainData);
46 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Test'), testData);


--------------------------------------------------------------------------------
/SPIRAL/construct_sparse.m:
--------------------------------------------------------------------------------
 1 | %generating the kernel matrix
 2 | function [D,Omega,d]=construct_sparse(X,n,m)
 3 | 	% use the first n users, and generate approximately m pairs among them
 4 | 	% mex dtw_c.c;
 5 | 	fprintf('Step 1: sample and calculate dtw distance...\n')
 6 | 	D={};
 7 | 	Omega={};
 8 | 	d=zeros(n,1);
 9 |     length=size(X{1},1);
10 | 	wsize=ceil(length/30);
11 | 	if wsize>40
12 | 		wsize=40;
13 | 	end
14 | 	if wsize<1
15 | 		wsize=1;
16 | 	end
17 | 	id2d=randsample(n*n,2*m,'false');
18 | 	idi=floor((id2d-1)/n)+1;
19 | 	idj=id2d-n*(idi-1);
20 | 	id=find(idi<idj);
21 | 	idi=idi(id);
22 | 	idi=idi(1:floor((m-n)/2));
23 | 	idj=idj(id);
24 | 	idj=idj(1:floor((m-n)/2));
25 | 	
26 | 	v=zeros(floor((m-n)/2),1);
27 |     nrm=zeros(n,1);
28 | 	tic;
29 | 	for i=1:n
30 | 		nrm(i)=dtw_c(X{i},zeros(1,size(X{i},2)),2);
31 | 	end
32 |     	
33 | 	for k=1:floor((m-n)/2)
34 | 		%v(i)=0;i
35 | 		i=idi(k);
36 | 		j=idj(k);
37 | 		v(k)=(nrm(i)^2+nrm(j)^2-dtw_c(X{i},X{j},wsize)^2)/2/(nrm(i)*nrm(j));
38 | 		%v(k)=(nrm(i)^2+nrm(j)^2-dtw_c(X{i},X{j},15)^2)/(nrm(i)^2+nrm(j)^2);
39 | 	end
40 |     toc
41 | 	col=[idi;idj;(1:n)'];
42 | 	row=[idj;idi;(1:n)'];
43 | 	v=[v;v;ones(n,1)];
44 | 	m=size(col);
45 | 	[col,Index]=sort(col);
46 | 	row=row(Index);
47 | 	v=v(Index);
48 | 	start=1;
49 | 	nd=1;
50 | 
51 | 	for i=1:n
52 | 	    while (true)
53 | 			if (nd>m)
54 | 				break;
55 | 			end
56 | 			if (col(nd)~=i)
57 | 				break;
58 | 			end
59 | 			nd=nd+1;
60 | 	    end
61 | 	    Omega{i}=row(start:nd-1);
62 | 	    D{i}=v(start:nd-1);
63 | 	    d(i)=find(Omega{i}==i);
64 | 	    start=nd;
65 | 	end
66 | 
67 | end
68 | 
69 | 


--------------------------------------------------------------------------------
/RWS/rws_GenFea_example.m:
--------------------------------------------------------------------------------
 1 | % This script generates low-rank approximation of latent kernel matrix using 
 2 | % random features approach based on dtw like distance for multi-variate 
 3 | % time-series datasets. Note: the default low-rank R = 512. 
 4 | %
 5 | % Author: Lingfei Wu
 6 | % Date: 01/20/2019
 7 | 
 8 | clear,clc
 9 | 
10 | addpath(genpath('utilities'));
11 | file_dir = './datasets/';
12 | filename = 'Gun_Point';
13 | disp(filename);
14 | sigma = 4.46; 
15 | R = 512; % Generally, Large R, Better Accuracy.
16 | DMin = 1;
17 | DMax = 25;
18 | 
19 | timer_start = tic;
20 | [trainData, testData] = rws_GenFea(file_dir,filename,sigma,R,DMin,DMax);
21 | trainy = trainData(:,1);
22 | testy = testData(:,1);
23 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..)
24 | labels = unique(trainy);
25 | numClasses = length(labels);
26 | if numClasses > 2
27 |     for i=1:numClasses
28 |         ind = (trainy == labels(i));
29 |         trainy(ind) = i;
30 |     end
31 |     for i=1:numClasses
32 |         ind = (testy == labels(i));
33 |         testy(ind) = i;
34 |     end
35 | else
36 |     ind = (trainy == labels(1));
37 |     trainy(ind) = -1;
38 |     ind = (trainy == labels(2));
39 |     trainy(ind) = 1;
40 |     ind = (testy == labels(1));
41 |     testy(ind) = -1;
42 |     ind = (testy == labels(2));
43 |     testy(ind) = 1;
44 | end
45 | trainData(:,1) = trainy;
46 | testData(:,1) = testy;
47 | telapsed_features_dtw_random = toc(timer_start)
48 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Train'), trainData);
49 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Test'), testData);


--------------------------------------------------------------------------------
/SIDL/op_shift.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | function shifted_S = op_shift(S, offsets, target_dim)
26 | % offsets must be row vector
27 | 
28 |   [K, q] = size(S);
29 | 
30 |   res = zeros(target_dim, K);
31 |   IDX = repmat(offsets+1, q, 1);
32 |   IDX = bsxfun(@plus, IDX, [0:q-1]');
33 |   IDX = bsxfun(@plus, IDX, [0:(K-1)] * target_dim);
34 | 
35 |   res(IDX) = S';
36 |   shifted_S = res';
37 | end
38 | 


--------------------------------------------------------------------------------
/SIDL/unsup_obj.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | function F = unsup_obj(X, S, A, Offsets, lambda)
26 | % X: n x p
27 | % S: K x q
28 | % A: n x K
29 | % Offsets: n x K
30 | 
31 |   [n, p] = size(X);
32 |   [K, q] = size(S);
33 | 
34 |   F = 0;
35 | 
36 |   for i=1:n
37 |     x = X(i,:);
38 |     shifted_S = op_shift(S, Offsets(i,:), p);
39 |     F = F + 0.5 * norm(x - A(i,:) * shifted_S)^2 + lambda * norm(A(i,:), 1);
40 | 
41 |   end
42 | 
43 | end
44 | 


--------------------------------------------------------------------------------
/RunClusteringKShape.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringKShape(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);    
10 |     
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
14 | 
15 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
16 |                     DS = LoadUCRdataset(char(Datasets(i)));
17 |                     
18 |                     Results = zeros(length(Datasets),2);
19 |                     
20 |                     for rep = 1 : 10
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         tic;
25 |                         [mem cent] = kShape(DS.Data, length(DS.ClassNames));
26 |                         ClusteringTime = toc;
27 |                         
28 |                         RI = RandIndex(mem, DS.DataClassLabels);
29 |                         
30 |                         ResultsTmp = [RI,ClusteringTime];
31 |                            
32 |                         %
33 |                         Results(i,:) = Results(i,:) + ResultsTmp;
34 |                     end
35 |                     Results(i,:) = Results(i,:) ./ 10;
36 |                     dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKShape/','RunClusteringKShape_Dataset_', num2str(i)), Results, 'delimiter', '\t');
37 |   
38 |             end
39 |             
40 |    
41 |     end
42 |     
43 | end


--------------------------------------------------------------------------------
/RunSIDLRepLearning.m:
--------------------------------------------------------------------------------
 1 | function RunSIDLRepLearning(DataSetStartIndex, DataSetEndIndex, lambda, r)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 |     
11 |     addpath(genpath('SIDL/.'));
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     display(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100);
21 |                     
22 |                     %for lambda = [0.1, 1, 10]
23 |                     %    for r = [0.1, 0.25, 0.5]
24 |                             
25 |                             lambda
26 |                             r
27 |                     
28 |                             [ZRep,~,~]= SIDLRepLearning(char(Datasets(i)), DS, NumOfSamples, lambda, r); 
29 |                             dlmwrite( strcat( 'SIDLREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '_L_', num2str(lambda), '_R_', num2str(r) ,'.Zrep'), ZRep, 'delimiter', '\t');
30 |                             
31 |                     %    end
32 |                     %end
33 |                                 
34 | 
35 |                                     
36 |                                     
37 |             end
38 |             
39 |             
40 |     end
41 |     
42 |     
43 | end
44 | 


--------------------------------------------------------------------------------
/RunKMCompSINKCompressed.m:
--------------------------------------------------------------------------------
 1 | function RunKMCompSINKCompressed(DataSetStartIndex, DataSetEndIndex, gamma, FourierEnergy, DatasetPercentile)
 2 | 
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, ~] = sort(Datasets);
10 | 
11 |     disp(gamma);
12 |     
13 |     for i = 1:length(Datasets)
14 |         
15 |         if (i>=DataSetStartIndex & i<=DataSetEndIndex)
16 |             
17 |             Results = zeros(length(Datasets),2);
18 |             
19 |             disp(['Dataset being processed: ', char(Datasets(i))]);
20 | 
21 |             DS = LoadUCRdataset(char(Datasets(i)));
22 |             
23 |             % Estimating required number of coefficients to
24 |             % guarantee energy level between comparisons
25 |             DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile);
26 |                     
27 |             tic;
28 |             
29 |             [KM, DistComp] = KMCompSINKCompressed(DS.Data,gamma,DSFourier.NumCoeffs);
30 | 
31 |             Results(i,1) = DistComp;
32 |             Results(i,2) = toc;
33 |             
34 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKCompressed/',char(Datasets(i)),'/', char(Datasets(i)), '_SINKComp_Gamma_', num2str(gamma) ,'.kernelmatrix'), KM, 'delimiter', '\t');
35 |             dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKCompressed/', 'RunKMCompSINKCompE99D100_Gamma_', num2str(gamma), '_Dataset_' , num2str(i)), Results, 'delimiter', '\t');
36 |    
37 |         end
38 |         
39 |     end
40 |     
41 | end


--------------------------------------------------------------------------------
/RunClusteringKShapeORIGINAL.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringKShapeORIGINAL(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);    
10 |     
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
14 | 
15 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
16 |                     DS = LoadUCRdataset(char(Datasets(i)));
17 |                     
18 |                     Results = zeros(length(Datasets),2);
19 |                     
20 |                     for rep = 1 : 10
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         tic;
25 |                         [mem cent] = kShapeORIGINAL(DS.Data, length(DS.ClassNames));
26 |                         ClusteringTime = toc;
27 |                         
28 |                         RI = RandIndex(mem, DS.DataClassLabels);
29 |                         
30 |                         ResultsTmp = [RI,ClusteringTime];
31 |                            
32 |                         %
33 |                         Results(i,:) = Results(i,:) + ResultsTmp;
34 |                     end
35 |                     Results(i,:) = Results(i,:) ./ 10;
36 |                     dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKShapeORIGINAL/','RunClusteringKShapeORIGINAL_Dataset_', num2str(i)), Results, 'delimiter', '\t');
37 |   
38 |             end
39 |             
40 |    
41 |     end
42 |     
43 | end


--------------------------------------------------------------------------------
/RunClusteringKMeans.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringKMeans(DataSetStartIndex, DataSetEndIndex)  
 2 | 
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets); 
10 |     
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
14 | 
15 |                 
16 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
17 |                     DS = LoadUCRdataset(char(Datasets(i)));
18 |                     
19 |                     Results = zeros(length(Datasets),2);
20 |                     
21 |                     for rep = 1 : 10
22 |                         rep
23 |                         rng(rep);
24 |                         
25 |                         tic;
26 |                         [mem cent] = kMeans(DS.Data, length(DS.ClassNames));
27 |                         
28 |                         ClusteringTime = toc;
29 |                         
30 |                         RI = RandIndex(mem, DS.DataClassLabels);
31 |                         
32 |                         ResultsTmp = [RI,ClusteringTime];
33 |                            
34 |                         %
35 |                         Results(i,:) = Results(i,:) + ResultsTmp;
36 |                     end
37 |                     Results(i,:) = Results(i,:) ./ 10;
38 |                     dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKMeans/','RunClusteringKMeans_Dataset_', num2str(i)), Results, 'delimiter', '\t');
39 |    
40 |             end
41 |             
42 |             
43 |     end
44 |     
45 | end


--------------------------------------------------------------------------------
/kShape.m:
--------------------------------------------------------------------------------
 1 | function [mem cent] = kShape(A, K)
 2 | 
 3 | m=size(A, 1);
 4 | mem = ceil(K*rand(m, 1));
 5 | cent = zeros(K, size(A, 2));
 6 | 
 7 | for iter = 1:100
 8 |     disp(iter);
 9 |     prev_mem = mem;
10 | 
11 |     for k = 1:K
12 |         cent(k,:) = kshape_centroid(mem, A, k, cent(k,:));    
13 |         cent(k,:) = zscore(cent(k,:));
14 |     end
15 |     
16 |     for i = 1:m
17 |         
18 |         %x = A(i,:);
19 |         for k = 1:K
20 |             %y = cent(k,:);
21 |             dist = 1-max( NCCc( A(i,:), cent(k,:)) );
22 |             D(i,k) = dist;
23 |         end
24 |     end
25 | 
26 |     [val mem] = min(D,[],2);
27 |     if norm(prev_mem-mem) == 0
28 |         break;
29 |     end
30 | end
31 | 
32 | end
33 | 
34 | function ksc = kshape_centroid(mem, A, k, cur_center)
35 | % Slower version
36 | %Computes ksc centroid
37 | %a = [];
38 | %for i=1:length(mem)
39 | %    if mem(i) == k
40 | %        if sum(cur_center) == 0
41 | %            opt_a = A(i,:);
42 | %        else
43 | %             [tmp tmps opt_a] = SBD(zscore(cur_center), A(i,:));
44 | %        end
45 | %        a = [a; opt_a];
46 | %    end
47 | %end
48 | 
49 | a = A(mem==k,:);
50 | 
51 | if sum(cur_center) ~= 0
52 |     for i=1:size(a,1)
53 |     [tmp tmps opt_a] = SBD(cur_center, a(i,:));
54 |     a(i,:) = opt_a;
55 |     end
56 |    
57 | end
58 | 
59 | if size(a,1) == 0
60 |     ksc = zeros(1, size(A,2));
61 |     return;
62 | end
63 | 
64 | [m, ncolumns]=size(a);
65 | [Y mean2 std2] = zscore(a,[],2);
66 | S = Y' * Y;
67 | P = (eye(ncolumns) - 1 / ncolumns * ones(ncolumns));
68 | M = P*S*P;
69 | [V D] = eigs(M,1);
70 | ksc = V(:,1);
71 | 
72 | finddistance1 = sqrt(sum((a(1,:) - ksc').^2));
73 | finddistance2 = sqrt(sum((a(1,:) - (-ksc')).^2));
74 | 
75 | if (finddistance1<finddistance2)
76 |     ksc = ksc;
77 | else
78 |     ksc = -ksc;
79 | end
80 | 
81 | end
82 | 


--------------------------------------------------------------------------------
/SIDLRepLearning.m:
--------------------------------------------------------------------------------
 1 | function [ZRep,learn_time,fit_time] = SIDLRepLearning(dataset_name, DS, K, lambda, r)  
 2 | % dataset_name is dataset name
 3 | % DS is the dataset structure
 4 | % K is number of coefficients [10, 20, 50]
 5 | % lambda is the regularizer [0.1, 1, 10]
 6 | % r is length of time series [0.1, 0.25, 0.5]
 7 | 
 8 | 
 9 | rng(1);
10 | 
11 | train_X = DS.Train;
12 | test_X  = DS.Test;
13 | 
14 | train_y = DS.TrainClassLabels;
15 | test_y  = DS.TestClassLabels;
16 | 
17 | [n_train, p] = size(train_X);
18 | [n_test, p] = size(test_X);
19 | 
20 | c = 100;
21 | epsilon = 1e-3;
22 | maxIter = 10;
23 | maxInnerIter = 3;
24 | 
25 | %epsilon = 1e-3;
26 | %maxIter = 50;
27 | %maxInnerIter = 5;
28 | 
29 | 
30 |     A_rand_init = randn(n_test, K);
31 | 
32 |       q = ceil(p*r);
33 |       % run id 
34 |       runid = strcat(dataset_name, '_l_', num2str(lambda), '_K_', num2str(K), '_q_', num2str(q));
35 | 
36 |       % train SIDL on training set
37 |       tic;
38 |       [S, A, Offsets] = USIDL(train_X, train_y, lambda, K, q, c, epsilon, maxIter, maxInnerIter, runid);
39 | 
40 |       learn_time = toc;
41 |       %fprintf('\n##### TRAINING TIME on TRAIN SET (K=%f, lambda=%f, r=%f): %f secs.\n\n', K, lambda, r, learn_time);
42 | 
43 |       % learn sparse coding on test set with dictionary learned from training set
44 |       A_test = A_rand_init;
45 |       Offsets_test = randi([0, p-q], n_test, K);
46 |       tic;
47 |       [A_test, Offsets_test, F_all] = update_A_par(test_X, S, A_test, Offsets_test, lambda, maxIter, epsilon);
48 |       fit_time = toc;
49 |       
50 |       % get reconstruciton for SIDL
51 |       test_recons_error_sidl = unsup_obj(test_X, S, A_test, Offsets_test, 0) / n_test;
52 |       
53 |       %fprintf('\n\n##### RECONS ERROR on TEST SET (K=%f, lambda=%f) SIDL (r=%f): %f\n\n', K, lambda, r, test_recons_error_sidl);
54 | 
55 |     
56 | 
57 | ZRep = [A;A_test];
58 |   
59 |   
60 | end
61 | 
62 | 


--------------------------------------------------------------------------------
/RWSRepLearning.m:
--------------------------------------------------------------------------------
 1 | % This script generates the feature representation of each time series by 
 2 | % computing random features between random series and raw time-series. We
 3 | % use dynamic time warping to compute the distance between a pair of
 4 | % time-series. 
 5 | %
 6 | % Author: Lingfei Wu
 7 | % Date: 01/20/2019
 8 | 
 9 | function ZRep = RWSRepLearning(DS,sigma,R,DMin,DMax)
10 |     
11 |     % load data and generate corresponding train and test data
12 |    
13 | 
14 |     trainX = DS.Train;
15 |     %trainy = trainData(:,1);
16 |     testX = DS.Test;
17 |     %testy = testData(:,1);    
18 |     
19 |     
20 | 
21 | 
22 |     % generate random time series with variable length, where each value in
23 |     % random series is sampled from Gaussian distribution parameterized by sigma. 
24 |     timer_start = tic;
25 |     rng('default')
26 |     sampleX = cell(R,1);
27 |     for i=1:R
28 |         D = randi([DMin, DMax],1);
29 |         sampleX{i} = randn(1, D)./sigma; % gaussian
30 |     end
31 |     [trainFeaX_random, train_dtw_time] = dtw_similarity_cell(trainX, sampleX);
32 |     trainFeaX_random = trainFeaX_random/sqrt(R); 
33 |     [testFeaX_random, test_dtw_time] = dtw_similarity_cell(testX, sampleX);
34 |     testFeaX_random = testFeaX_random/sqrt(R); 
35 |     Train = trainFeaX_random;
36 |     Test = testFeaX_random;
37 |     
38 |     ZRep = [Train;Test];
39 |     
40 |     %telapsed_random_fea_gen = toc(timer_start);
41 |     
42 |     % Note: real_total_end_time is the real total time, including both dtw
43 |     % and ground distance, of generating both train and test features using 
44 |     % multithreads. user_dtw_time is the real time that accounts for 
45 |     % computation of dtw with one thread. 
46 |     %Runtime.real_total_dtw_time = telapsed_random_fea_gen;
47 |     %Runtime.user_dtw_time = train_dtw_time + test_dtw_time;
48 |     %Runtime.user_train_dtw_time = train_dtw_time;
49 |     %Runtime.user_test_dtw_time = test_dtw_time;
50 | end
51 | 


--------------------------------------------------------------------------------
/RunRWSRepLearning.m:
--------------------------------------------------------------------------------
 1 | function RunRWSRepLearning(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 |     
11 |     addpath(genpath('RWS/.'));
12 |     addpath(genpath('RWS/utilities/.'));
13 |     
14 |     for i = 1:length(Datasets)
15 | 
16 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
17 | 
18 |                     display(['Dataset being processed: ', char(Datasets(i))]);
19 |                     DS = LoadUCRdataset(char(Datasets(i)));
20 |                     
21 |                     NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100);
22 | 
23 |                     %ZRep = SPIRALRepLearning(DS, NumOfSamples); 
24 |                     
25 |                     % Supervised Tuning
26 |                     info = RWSTuneParameters(DS,NumOfSamples);
27 |                     ZRepSup = RWSRepLearning(DS,info.sigma,NumOfSamples,1,info.DMax);
28 |                     
29 |                     % Without Tuning for Clustering
30 |                     ZRepUNSup = RWSRepLearning(DS,1,NumOfSamples,1,25);
31 |                     %ZRepUNSup = RWSRepLearning(DS,1000,NumOfSamples,1,25);
32 |                     
33 |                     dlmwrite( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep'), ZRepSup, 'delimiter', '\t');
34 |                     dlmwrite( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1000_DMax25', '.Zrep'), ZRepUNSup, 'delimiter', '\t');
35 |                             
36 |                                    
37 |                                     
38 |             end
39 |             
40 |             
41 |     end
42 |     
43 |     
44 | end
45 | 


--------------------------------------------------------------------------------
/RunOneNNSBDCompressed.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNSBDCompressed(DataSetStartIndex, DataSetEndIndex, FourierEnergy, DatasetPercentile)  
 2 | % FourierEnergy is like 0.9    
 3 | % DatasetPercentile is like 99
 4 | 
 5 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 6 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 7 |     Datasets = {dir_struct(3:130).name};
 8 |                      
 9 |     % Sort Datasets
10 |     
11 |     [Datasets, DSOrder] = sort(Datasets);  
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     Results = zeros(length(Datasets),7);
18 |                 
19 |                     display(['Dataset being processed: ', char(Datasets(i))]);
20 |                     DS = LoadUCRdataset(char(Datasets(i)));
21 |                     
22 |                     % Estimating required number of coefficients to
23 |                     % guarantee energy level between comparisons
24 |                     DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile);
25 |                     
26 |                     tic;
27 |                     OneNNAcc = OneNNClassifierSBDCompressed(DS,DSFourier.NumCoeffs);
28 |                     
29 |                     Results(i,1) = FourierEnergy;
30 |                     Results(i,2) = DatasetPercentile;
31 |                     Results(i,3) = DSFourier.len;
32 |                     Results(i,4) = DSFourier.fftlength;
33 |                     Results(i,5) = DSFourier.NumCoeffs;
34 |                     Results(i,6) = OneNNAcc;
35 |                     Results(i,7) = toc;
36 |                     
37 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNSBDCompressed/', 'RunOneNNSBD_Dataset_', num2str(i),'_DatasetPercentile_',num2str(DatasetPercentile),'_FourierEnergy_',num2str(FourierEnergy) ), Results, 'delimiter', '\t');
38 |    
39 |             end
40 |             
41 |             
42 |     end
43 |     
44 | end


--------------------------------------------------------------------------------
/RWS/rws_GenFea_mulvar.m:
--------------------------------------------------------------------------------
 1 | % This script generates the feature representation of each time series by 
 2 | % computing random features between random series and raw time-series. We
 3 | % use dynamic time warping to compute the distance between a pair of
 4 | % time-series. 
 5 | %
 6 | % Author: Lingfei Wu
 7 | % Date: 01/20/2019
 8 | 
 9 | function [Train,Test,Runtime] = rws_GenFea_mulvar(file_dir,filename,sigma,R,DMin,DMax)
10 |     
11 |     % load data and generate corresponding train and test data
12 |     timer_start = tic;
13 |     Data = load(strcat(file_dir,filename,'/',filename,'.mat'));
14 |     trainX = Data.train_X;
15 |     trainy = Data.train_Y;
16 |     testX = Data.test_X;
17 |     testy = Data.test_Y;
18 |     telapsed_data_load = toc(timer_start)
19 | 
20 |     % generate random time series with variable length, where each value in
21 |     % random series is sampled from Gaussian distribution parameterized by sigma. 
22 |     timer_start = tic;
23 |     rng('default')
24 |     sampleX = cell(R,1);
25 |     d = size(trainX{1},1); % number of variates
26 |     for i=1:R
27 |         D = randi([DMin, DMax],1);
28 |         sampleX{i} = randn(d, D)./sigma; % gaussian
29 |     end
30 |     [trainFeaX_random, train_dtw_time] = dtw_similarity_cell_mulvar(trainX,sampleX);
31 |     trainFeaX_random = trainFeaX_random/sqrt(R); 
32 |     [testFeaX_random, test_dtw_time] = dtw_similarity_cell_mulvar(testX,sampleX);
33 |     testFeaX_random = testFeaX_random/sqrt(R);
34 |     Train = [trainy, trainFeaX_random];
35 |     Test = [testy, testFeaX_random];
36 |     telapsed_random_fea_gen = toc(timer_start);
37 |     
38 |     % Note: real_total_end_time is the real total time, including both dtw
39 |     % and ground distance, of generating both train and test features using 
40 |     % multithreads. user_dtw_time is the real time that accounts for 
41 |     % computation of dtw with one thread. 
42 |     Runtime.real_total_dtw_time = telapsed_random_fea_gen;
43 |     Runtime.user_dtw_time = train_dtw_time + test_dtw_time;
44 |     Runtime.user_train_dtw_time = train_dtw_time;
45 |     Runtime.user_test_dtw_time = test_dtw_time;
46 | end
47 | 


--------------------------------------------------------------------------------
/RunClusteringSPIRAL.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringSPIRAL(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 | 	
11 |     Results = zeros(length(Datasets),2);
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     for rep = 1 : 10
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         
25 |                         % Extract Sample Points
26 | 
27 |                         ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep')  );
28 | 
29 |                         tic;
30 |                         
31 |                         [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1);
32 |                         
33 |                         ClusteringTime = toc;
34 | 
35 |                         RI = RandIndex(mem, DS.DataClassLabels);
36 |                         
37 |                         % Evaluate SmplPoints in terms of clustering
38 |                         % measures (e.g., SSE, RandIndex, NystromAppx)
39 |                         
40 |                         ResultsTmp = [RI,ClusteringTime];
41 |                            
42 |                         %
43 |                         Results(i,:) = Results(i,:) + ResultsTmp;
44 |                     end
45 |                     Results(i,:) = Results(i,:) ./ 10;
46 |                     
47 |                     dlmwrite( strcat( 'RunClusteringSPIRAL/','RunClusteringSPIRAL_Dataset_', num2str(i)), Results, 'delimiter', '\t');
48 |    
49 |             end
50 |             
51 |     end
52 |     
53 | end


--------------------------------------------------------------------------------
/CollectStatistics.m:
--------------------------------------------------------------------------------
 1 | function CollectStatistics(DataSetStartIndex, DataSetEndIndex)  
 2 | 
 3 |     Methods = [cellstr('Random'), 'KShape'];
 4 |     Types = [cellstr('Zexact'), 'Ztop5', 'Ztop10', 'Ztop20', 'Z99per', 'Z95per', 'Z90per', 'Z85per', 'Z80per'];
 5 | 
 6 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 7 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 8 |     Datasets = {dir_struct(3:130).name};
 9 |                      
10 |     % Sort Datasets
11 |     
12 |     [Datasets, DSOrder] = sort(Datasets);
13 |     
14 |     %FourierEnergy = 1;
15 |     %DatasetPercentile = 100;
16 |     
17 |     for i = 1:length(Datasets)
18 | 
19 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
20 | 
21 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
22 |                     %DS = LoadUCRdataset(char(Datasets(i)));
23 |                     %disp([char(Datasets(i)),',',num2str(length(DS.ClassNames)),',',num2str(DS.TrainInstancesCount),',',num2str(DS.TestInstancesCount),',',num2str(length(DS.Train(1,:)))]);
24 |                     
25 |                     ResultsTmp = dlmread( strcat('RunLinearSVMRWS/','RunLinearSVMRWS', '_Dataset_', num2str(i)) );
26 |                     
27 |                     %ResultsTmp = dlmread( strcat( 'RunClassificationZREP/RunClassificationZREP_FULLKM_Z20_KShape_', num2str(i),'.results') );
28 |                     %ResultsTmp = dlmread( strcat('RunOneNNTOPFFTED/', 'RunOneNNTOPFFTED_Dataset_', num2str(i), '_NumOfCoeff_',num2str(10)) );
29 |                                         
30 |                     Results(i,:) = ResultsTmp(i,:);
31 |                     
32 |             end
33 |                     
34 |            
35 |     end
36 |             
37 |     dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RESULTS_RunLinearSVMRWS_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex)), Results, 'delimiter', ',');
38 |     
39 |     %dlmwrite( strcat( '/rigel/dsi/users/ikp2103/JOPA/GRAIL2/RESULTS/RunOneNNTOPFFTED_NumOfCoeff_10_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex)), Results, 'delimiter', ',');
40 |     
41 | end
42 | 


--------------------------------------------------------------------------------
/RepLearnKM.m:
--------------------------------------------------------------------------------
 1 | function [Z99per,Z98per,Z97per,Z95per,Z90per,Z85per,Z80per,Ztop20,Ztop10,Ztop5,RepLearnTime]=RepLearnKM(KM)
 2 | % Input
 3 | % KM: Kernel matrix (nxn)
 4 | % Dim: Dimensions to keep in the end over the learned representation
 5 | % Output
 6 | % Ktilde: Approximated kernel matrix (nxn)
 7 | % Z: New learned representation (nxDim)
 8 | 
 9 | tic;
10 | [Q,L]=eig(KM);
11 | 
12 | eigValue=diag(L);
13 | [~,IX]=sort(eigValue,'descend');
14 | eigVector=Q(:,IX);
15 | eigValue=eigValue(IX);
16 | 
17 | VarExplainedCumSum = cumsum(eigValue)/sum(eigValue);
18 | 
19 | DimFor99 = find(VarExplainedCumSum>=0.99,1);
20 | DimFor98 = find(VarExplainedCumSum>=0.98,1);
21 | DimFor97 = find(VarExplainedCumSum>=0.97,1);
22 | DimFor95 = find(VarExplainedCumSum>=0.95,1);
23 | DimFor90 = find(VarExplainedCumSum>=0.90,1);
24 | DimFor85 = find(VarExplainedCumSum>=0.85,1);
25 | DimFor80 = find(VarExplainedCumSum>=0.80,1);
26 | 
27 | RepLearnTime = toc;
28 | 
29 | Z99per = CheckNaNInfComplex( eigVector(:,1:DimFor99)*sqrt(diag(eigValue(1:DimFor99))) );
30 | Z98per = CheckNaNInfComplex( eigVector(:,1:DimFor98)*sqrt(diag(eigValue(1:DimFor98))) );
31 | Z97per = CheckNaNInfComplex( eigVector(:,1:DimFor97)*sqrt(diag(eigValue(1:DimFor97))) );
32 | Z95per = CheckNaNInfComplex( eigVector(:,1:DimFor95)*sqrt(diag(eigValue(1:DimFor95))) );
33 | Z90per = CheckNaNInfComplex( eigVector(:,1:DimFor90)*sqrt(diag(eigValue(1:DimFor90))) );
34 | Z85per = CheckNaNInfComplex( eigVector(:,1:DimFor85)*sqrt(diag(eigValue(1:DimFor85))) );
35 | Z80per = CheckNaNInfComplex( eigVector(:,1:DimFor80)*sqrt(diag(eigValue(1:DimFor80))) );
36 | 
37 | Ztop20 = CheckNaNInfComplex( eigVector(:,1:20)*sqrt(diag(eigValue(1:20))) );
38 | Ztop10 = CheckNaNInfComplex( eigVector(:,1:10)*sqrt(diag(eigValue(1:10))) );
39 | Ztop5 = CheckNaNInfComplex( eigVector(:,1:5)*sqrt(diag(eigValue(1:5))) );
40 | 
41 | end
42 | 
43 | function Z = CheckNaNInfComplex(Z)
44 | 
45 |     for i=1:size(Z,1)
46 |         for j=1:size(Z,2)
47 |             if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 
48 |                 Z(i,j)=0;
49 |                 disp('ERROR ON REPRESENTATION');
50 |             end
51 |         end
52 |     end
53 | 
54 | end
55 | 


--------------------------------------------------------------------------------
/RunClusteringSIDL.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringSIDL(DataSetStartIndex, DataSetEndIndex, lambda, r)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 | 	
11 |     Results = zeros(length(Datasets),2);
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     for rep = 1 : 10
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         
25 |                         % Extract Sample Points
26 | 
27 |                         ZRep = dlmread( strcat( 'SIDLREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '_L_', num2str(lambda), '_R_', num2str(r) ,'.Zrep')  );
28 | 
29 |                         tic;
30 |                         
31 |                         [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1);
32 |                         
33 |                         ClusteringTime = toc;
34 | 
35 |                         RI = RandIndex(mem, DS.DataClassLabels);
36 |                         
37 |                         % Evaluate SmplPoints in terms of clustering
38 |                         % measures (e.g., SSE, RandIndex, NystromAppx)
39 |                         
40 |                         ResultsTmp = [RI,ClusteringTime];
41 |                            
42 |                         %
43 |                         Results(i,:) = Results(i,:) + ResultsTmp;
44 |                     end
45 |                     Results(i,:) = Results(i,:) ./ 10;
46 |                     
47 |                     dlmwrite( strcat( 'RunClusteringSIDL/','RunClusteringSIDL','_L_', num2str(lambda), '_R_', num2str(r), '_Dataset_', num2str(i)), Results, 'delimiter', '\t');
48 |    
49 |             end
50 |             
51 |     end
52 |     
53 | end


--------------------------------------------------------------------------------
/RunKMCompSINKSPLIT.m:
--------------------------------------------------------------------------------
 1 | function RunKMCompSINKSPLIT(DataSetStartIndex, DataSetEndIndex, TrainKM, sigma)
 2 | 
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, ~] = sort(Datasets);
10 | 
11 |     for i = 1:length(Datasets)
12 |         
13 |         if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 |             
15 |             Results = zeros(length(Datasets),4);
16 |             
17 |             disp(['Dataset being processed: ', char(Datasets(i))]);
18 | 
19 |             DS = LoadUCRdataset(char(Datasets(i)));
20 |             
21 |             if (TrainKM==1)
22 |                 
23 |                 tic;
24 |                 [KMTrain, DistComp] = KMCompSINK_TrainToTrain(DS.Train, sigma);
25 | 
26 |                 Results(i,1) = DistComp; 
27 |                 Results(i,2) = toc;
28 | 
29 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKSPLIT/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Sigma_', num2str(sigma) ,'_TRAIN.kernelmatrix'), KMTrain, 'delimiter', '\t');
30 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKSPLIT/', 'RunKMCompSINKSPLIT_TrainToTrain_Sigma_', num2str(sigma), '_TrainToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t');
31 | 
32 | 
33 |             else
34 |                 tic;
35 |                 [KMTestToTrain, DistComp2] = KMCompSINK_TestToTrain(DS.Test,DS.Train,sigma);
36 | 
37 |                 Results(i,3) = DistComp2;
38 |                 Results(i,4) = toc;
39 |                 
40 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKSPLIT/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Sigma_', num2str(sigma) ,'_TESTTOTRAIN.kernelmatrix'), KMTestToTrain, 'delimiter', '\t');          
41 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKSPLIT/', 'RunKMCompSINKSPLIT_TestToTrain_Sigma_', num2str(sigma), '_TestToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t');
42 |  
43 |                 
44 |             end
45 | 
46 |         end
47 |         
48 |     end
49 |     
50 | end


--------------------------------------------------------------------------------
/RunLOOCandOneNNDTW.m:
--------------------------------------------------------------------------------
 1 | function RunLOOCandOneNNDTW(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);
10 | 
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     LeaveOneOutAccuracies = zeros(length(Datasets),20);
16 |                     LeaveOneOutRuntimes = zeros(length(Datasets),20);
17 |     
18 |                     Results = zeros(length(Datasets),6);
19 |                 
20 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
21 |                     DS = LoadUCRdataset(char(Datasets(i)));
22 |                     
23 |                     TSLength = length(DS.Data(1,:));
24 |                     
25 |                     for gamma=1:20
26 | 
27 |                         gammaTmp = gamma-1
28 |                         window = floor(gammaTmp/100 * TSLength); 
29 |                         tic;
30 |                         acc = LOOClassifierDTW(DS,window);
31 |                         LeaveOneOutRuntimes(i,gamma) = toc;
32 |                         LeaveOneOutAccuracies(i,gamma) = acc;
33 |                     end
34 | 
35 |                     [MaxLeaveOneOutAcc,MaxLeaveOneOutAccGamma] = max(LeaveOneOutAccuracies(i,:));
36 |                     
37 |                     tic;
38 |                     window = floor((MaxLeaveOneOutAccGamma-1)/100 * TSLength); 
39 |                     OneNNAcc = OneNNClassifierDTW(DS,window);
40 |                     
41 |                     Results(i,1) = MaxLeaveOneOutAccGamma-1;
42 |                     Results(i,2) = MaxLeaveOneOutAcc;
43 |                     Results(i,3) = LeaveOneOutRuntimes(i,MaxLeaveOneOutAccGamma);
44 |                     Results(i,4) = sum(LeaveOneOutRuntimes(i,:));
45 |                     Results(i,5) = OneNNAcc;
46 |                     Results(i,6) = toc;
47 |    
48 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunLOOCandOneNNDTW/', 'RunLOOCandOneNNDTW_Dataset_', num2str(i)),  Results, 'delimiter', '\t');
49 |    
50 |             end
51 |             
52 |             
53 |     end
54 |     
55 | end


--------------------------------------------------------------------------------
/TestVarianceApproximate.m:
--------------------------------------------------------------------------------
 1 | function Results = TestVarianceApproximate(Dictionary)
 2 |               
 3 | [nrowsDic, ncolumnsDic] = size(Dictionary);
 4 | 
 5 | W = zeros(nrowsDic,nrowsDic);
 6 | 
 7 | Var4Gamma = zeros(1,20);
 8 | VarExplained20 = zeros(1,20);
 9 | 
10 | DimFor98 = zeros(1,20);
11 | DimFor95 = zeros(1,20);
12 | DimFor90 = zeros(1,20);
13 | DimFor85 = zeros(1,20);
14 | DimFor80 = zeros(1,20);
15 | 
16 | for g=1:20
17 |     g
18 |     Wtmp = [];
19 |     for i=1:nrowsDic
20 |         %disp(i);
21 |         for j=1:nrowsDic
22 |             W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),g);
23 |         end    
24 |         Wtmp = [Wtmp, W(i,:)];
25 |     end
26 | 
27 |     Var4Gamma(g)=var(Wtmp);
28 |     
29 |     [Q,L] = eig(W);
30 | 
31 |     eigValue=diag(L);
32 |     [~,IX]=sort(eigValue,'descend');
33 |     eigVector=Q(:,IX);
34 |     eigValue=eigValue(IX);
35 |     
36 |     VarExplainedCumSum = cumsum(eigValue)/sum(eigValue);
37 |     
38 |     VarExplained20(g) = VarExplainedCumSum(20);
39 | 
40 |     DimFor98(g) = find(VarExplainedCumSum>=0.98,1);
41 |     DimFor95(g) = find(VarExplainedCumSum>=0.95,1);
42 |     DimFor90(g) = find(VarExplainedCumSum>=0.90,1);
43 |     DimFor85(g) = find(VarExplainedCumSum>=0.85,1);
44 |     DimFor80(g) = find(VarExplainedCumSum>=0.80,1); 
45 | end
46 | 
47 | VarByVarExplained20 = Var4Gamma.*VarExplained20;
48 | 
49 | [~, GammaForMaxVariance] = max(Var4Gamma);
50 | [~, GammaForMaxVarByVarExplained20] = max(VarByVarExplained20);
51 | 
52 | Results = [];
53 | 
54 | MaxVarExpained20 = VarExplained20(GammaForMaxVariance);
55 | MaxVarDimFor98 = DimFor98(GammaForMaxVariance);
56 | MaxVarDimFor95 = DimFor95(GammaForMaxVariance);
57 | MaxVarDimFor90 = DimFor90(GammaForMaxVariance);
58 | MaxVarDimFor85 = DimFor85(GammaForMaxVariance);
59 | MaxVarDimFor80 = DimFor80(GammaForMaxVariance);
60 | 
61 | Results = [Results,GammaForMaxVariance,MaxVarExpained20,MaxVarDimFor98,MaxVarDimFor95,MaxVarDimFor90,MaxVarDimFor85,MaxVarDimFor80];
62 | 
63 | MaxVarByVarExpained20 = VarExplained20(GammaForMaxVarByVarExplained20);
64 | MaxVarByVarExpDimFor98 = DimFor98(GammaForMaxVarByVarExplained20);
65 | MaxVarByVarExpDimFor95 = DimFor95(GammaForMaxVarByVarExplained20);
66 | MaxVarByVarExpDimFor90 = DimFor90(GammaForMaxVarByVarExplained20);
67 | MaxVarByVarExpDimFor85 = DimFor85(GammaForMaxVarByVarExplained20);
68 | MaxVarByVarExpDimFor80 = DimFor80(GammaForMaxVarByVarExplained20);
69 | 
70 | Results = [Results,GammaForMaxVarByVarExplained20,MaxVarByVarExpained20,MaxVarByVarExpDimFor98,MaxVarByVarExpDimFor95,MaxVarByVarExpDimFor90,MaxVarByVarExpDimFor85,MaxVarByVarExpDimFor80];
71 | 
72 | end


--------------------------------------------------------------------------------
/SIDL/update_S.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | function S = update_S(X, S, A, Offsets, lambda, c, maxIter, epsilon)
26 | % X: n x p
27 | % S: K x q
28 | % A: n x K
29 | % Offsets: n x K
30 | 
31 |   [n, p] = size(X);
32 | 
33 |   [K, q] = size(S);
34 | 
35 | 
36 |   F_obj = [];
37 | 
38 |   for iter = 1:maxIter
39 |     for k=1:K % optimize s_k
40 |       M_k = norm(A(:,k))^2;
41 |       if M_k == 0 % inactive bases, no need to update
42 |         continue
43 |       end
44 | 
45 |       s_k = 0;
46 | 
47 |       for i=1:n
48 |         temp_a = A(i,:);
49 |         temp_a(k) = 0;
50 |         shifted_S = op_shift(S, Offsets(i,:), p);
51 |         xi_residue = X(i,:) - temp_a * shifted_S;
52 | 
53 |         t_ik = Offsets(i,k);
54 |         s_k = s_k + A(i,k) * xi_residue(1+t_ik:q+t_ik);
55 |       end
56 | 
57 |       % compute s_k
58 |       
59 |       if M_k <= norm(s_k) / sqrt(c)
60 |         s_k = sqrt(c) / norm(s_k) * s_k;
61 |       else
62 |         s_k = s_k / M_k;
63 |       end
64 |     
65 |       S(k,:) = s_k;
66 | 
67 |     end
68 | 
69 |     F_all = unsup_obj(X, S, A, Offsets, lambda);
70 |     %fprintf('Current F_all: %f\n', F_all);
71 |     F_obj(end+1) = F_all;
72 |     if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon
73 |       %fprintf('Updating S: Converged!\n\n');
74 |       return
75 |     end
76 |   end
77 | 
78 |   
79 |   %fprintf('Updating S: Reached max iter.\n\n');
80 | end
81 | 


--------------------------------------------------------------------------------
/RunClusteringRWS.m:
--------------------------------------------------------------------------------
 1 | function RunClusteringRWS(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);  
10 | 	
11 |     Results = zeros(length(Datasets),2);
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
18 |                     DS = LoadUCRdataset(char(Datasets(i)));
19 |                     
20 |                     for rep = 1 : 10
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         
25 |                         % Extract Sample Points
26 | 
27 |                         %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep')  );
28 |                         %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised', '.Zrep')  );
29 | 
30 |                         %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma0.001_DMax25', '.Zrep')  );
31 |                         %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1_DMax100', '.Zrep')  );
32 |                         ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1000_DMax25', '.Zrep')  );
33 | 
34 |                         tic;
35 |                         
36 |                         [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1);
37 |                         
38 |                         ClusteringTime = toc;
39 | 
40 |                         RI = RandIndex(mem, DS.DataClassLabels);
41 |                         
42 |                         % Evaluate SmplPoints in terms of clustering
43 |                         % measures (e.g., SSE, RandIndex, NystromAppx)
44 |                         
45 |                         ResultsTmp = [RI,ClusteringTime];
46 |                            
47 |                         %
48 |                         Results(i,:) = Results(i,:) + ResultsTmp;
49 |                     end
50 |                     Results(i,:) = Results(i,:) ./ 10;
51 |                     
52 |                     dlmwrite( strcat( 'RunClusteringRWS/','RunClusteringRWS_UNSupervised_Sigma1000_DMax25_Dataset_', num2str(i)), Results, 'delimiter', '\t');
53 |    
54 |             end
55 |             
56 |     end
57 |     
58 | end


--------------------------------------------------------------------------------
/DatasetToFourier.m:
--------------------------------------------------------------------------------
 1 | function DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile)
 2 | % zscore and zeropad time-series to 2x length and replace Data, Train and
 3 | %   Test with dft coefficients.  Call after ds = LoadUCRDatasets
 4 | %   Optionally fills .F with number of coeffs needed for energy>eta/s
 5 | 
 6 | DS.Train = DS.Train ./ norm(DS.Train(1,:));
 7 | DS.Test = DS.Test ./ norm(DS.Test(1,:));
 8 | DS.Data = DS.Data ./ norm(DS.Data(1,:));
 9 | 
10 | DSFourier = DS;
11 | 
12 | 
13 | % Compute DFT of the data
14 | DSFourier.len = length(DS.Data(1,:));
15 | DSFourier.fftlength = 2^nextpow2(2*DSFourier.len-1);
16 | %TrainTemp = [zeros(DS.TrainInstancesCount,floor((DSFourier.fftlength-DSFourier.len)/2)),DS.Train,zeros(DS.TrainInstancesCount,ceil((DSFourier.fftlength-DSFourier.len)/2))];
17 | %TestTemp = [zeros(DS.TestInstancesCount,floor((DSFourier.fftlength-DSFourier.len)/2)),DS.Test,zeros(DS.TestInstancesCount,ceil((DSFourier.fftlength-DSFourier.len)/2))];
18 | %DSFourier.TrainFourier = fft(TrainTemp,[],2);
19 | %DSFourier.TestFourier = fft(TestTemp,[],2);
20 | DSFourier.fftlength = 2^nextpow2(2*DSFourier.len-1);
21 | DSFourier.TrainFourier = fft(DS.Train,DSFourier.fftlength,2);
22 | DSFourier.TestFourier = fft(DS.Test,DSFourier.fftlength,2);
23 | DSFourier.DataFourier = [DSFourier.TrainFourier; DSFourier.TestFourier];
24 | 
25 | % Preserve Percent of Energy in Fourier space of each time series
26 | 
27 | E = cumsum(abs(DSFourier.DataFourier) .^ 2, 2);    % Energy is squared abs
28 | E = bsxfun(@rdivide, E, E(:, end));
29 | DSFourier.DataCoeffsUntilEnergy = zeros(size(DSFourier.DataFourier,1),length(FourierEnergy));
30 | for i = 1:size(DSFourier.DataCoeffsUntilEnergy,1)
31 |   for j = 1:size(DSFourier.DataCoeffsUntilEnergy, 2)
32 |     % find first coefficient that exceeds eta/2 - due to symmetry as we
33 |     % give the full DFT and not half of it
34 |     DSFourier.DataCoeffsUntilEnergy(i, j) = find(E(i, :) >= FourierEnergy(j)/2, 1);   
35 |   end
36 | end
37 | 
38 | % Keep number of coefficients across all time series so that you preserve
39 | % at least FourierEnergy for DatasetPercentile specified
40 | 
41 | DSFourier.NumCoeffs = ceil(prctile(DSFourier.DataCoeffsUntilEnergy,DatasetPercentile));
42 | 
43 | DSFourier.TrainFourierCompressed = leading_fourier(DSFourier.TrainFourier, DSFourier.NumCoeffs);
44 | DSFourier.TestFourierCompressed = leading_fourier(DSFourier.TestFourier, DSFourier.NumCoeffs);
45 | DSFourier.DataFourierCompressed = [DSFourier.TrainFourierCompressed; DSFourier.TestFourierCompressed];
46 | 
47 | end
48 | 
49 | function x = leading_fourier(x, k)
50 | % leading_fourier(x,k) returns leading k and trailing k-1 (real is symmetric) coeffs
51 | %   by zeroing out middle window and renormalizing
52 | m = floor(size(x, 2) / 2) + 1;
53 | x(:, (k+1):(m - 1 + m - k)) = 0;
54 | end


--------------------------------------------------------------------------------
/RunTestVarianceApproximate.m:
--------------------------------------------------------------------------------
 1 | function RunTestVarianceApproximate(DataSetStartIndex, DataSetEndIndex, RepStartIndex, RepEndIndex, Method)  
 2 |     
 3 |     Methods = [cellstr('Random'), 'KShape'];
 4 | 
 5 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 6 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 7 |     Datasets = {dir_struct(3:130).name};
 8 |                              
 9 |     % Sort Datasets
10 |     [Datasets, DSOrder] = sort(Datasets);    
11 |     
12 |     for i = 1:length(Datasets)
13 | 
14 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
15 | 
16 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
17 |                     DS = LoadUCRdataset(char(Datasets(i)));
18 |                     
19 |                     for rep = 1 : 10
20 |                         
21 |                         rep
22 |                         rng(rep);
23 |                         
24 |                         if (rep>=RepStartIndex && rep<=RepEndIndex)
25 | 
26 |                                 if Method==1
27 |                                     Dictionary = dlmread( strcat( 'DICTIONARIESRANDOM/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') );
28 |                                 elseif Method==2
29 |                                     Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') );
30 |                                 end
31 |                                 
32 |                                 tic;
33 |                                 TestVarianceResults = TestVarianceApproximate(Dictionary);
34 |                                 RunTime = toc;
35 |                                 
36 |                                 % GammaForMaxVarByVarExplained20
37 |                                 gamma = TestVarianceResults(8);
38 |     
39 |                                 Results = [gamma,RunTime];
40 |                                 
41 |                                 dlmwrite( strcat( 'RunTestVarianceApproximate/', 'RESULTS_RunTestVarianceApproximate_', char(Datasets(i)), '_', char(Methods(Method)), '_',num2str(rep) ,'.Results'), Results, 'delimiter', '\t');      
42 |                                 dlmwrite( strcat( 'RunTestVarianceApproximate/', 'RESULTS_RunTestVarianceApproximate_', char(Datasets(i)), '_', char(Methods(Method)), '_',num2str(rep) ,'.TestVarianceResults'), TestVarianceResults, 'delimiter', '\t');      
43 |                             
44 |                             
45 |                             
46 |                         end
47 |                         
48 |                     end
49 |             end
50 |             
51 |             
52 |     end
53 |     
54 |     
55 | end


--------------------------------------------------------------------------------
/RepLearnFINAL.m:
--------------------------------------------------------------------------------
 1 | function [Zexact, Ztop5, Ztop10, Ztop20, Z99per, Z98per, Z97per, Z95per, Z90per, Z85per, Z80per, DistComp, RuntimeNystrom, RuntimeFD]=RepLearnFINAL(X, Dictionary, gamma)
 2 | % Input
 3 | % X: original data (nxm)
 4 | % Dictionary: kShape's centroids (cxm) or randomly chosen time series
 5 | % Dim: Dimensions to keep in the end over the learned representation
 6 | % gamma: kernel's parameter
 7 | % Output
 8 | % Ktilde: Approximated kernel matrix (nxn)
 9 | % Z: New learned representation (nxDim)
10 | tic;
11 | DistComp = 0;
12 | 
13 | [nrowsX, ncolumnsX] = size(X);
14 | [nrowsDic, ncolumnsDic] = size(Dictionary);
15 | 
16 | W = zeros(nrowsDic,nrowsDic);
17 | 
18 | for i=1:nrowsDic
19 |     for j=1:nrowsDic
20 |         W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),gamma);
21 |         DistComp = DistComp + 1;
22 |     end    
23 | end
24 |         
25 | E = zeros(nrowsX,nrowsDic);
26 | 
27 | for i=1:nrowsX
28 |     disp(i);
29 |        for j=1:nrowsDic
30 |            E(i,j) = SINK(X(i,:),Dictionary(j,:),gamma);
31 |            DistComp = DistComp + 1;
32 |        end    
33 | end
34 | 
35 | [Ve, Va] = eig(W);
36 | va = diag(Va);
37 | inVa = diag(va.^(-0.5));
38 | Zexact = E * Ve * inVa;
39 | 
40 | RuntimeNystrom = toc;
41 | 
42 | Zexact = CheckNaNInfComplex(Zexact);
43 | 
44 | tic;
45 | [BSketch, ~] = FrequentDirections(Zexact, ceil(0.5*size(Zexact,2)));
46 | 
47 | [V2, L2] = eig(BSketch'*BSketch);
48 | %[V2, L2] = eig(Zexact'*Zexact);
49 | eigvalue = diag(L2);     
50 | [dump, index] = sort(-eigvalue);
51 | eigvalue = eigvalue(index);
52 | V2 = V2(:, index);
53 | 
54 | RuntimeFD = toc;
55 | 
56 |     VarExplainedCumSum = cumsum(eigvalue)/sum(eigvalue);
57 | 
58 |     DimFor99 = find(VarExplainedCumSum>=0.99,1);
59 |     DimFor98 = find(VarExplainedCumSum>=0.98,1);
60 |     DimFor97 = find(VarExplainedCumSum>=0.97,1);
61 |     DimFor95 = find(VarExplainedCumSum>=0.95,1);
62 |     DimFor90 = find(VarExplainedCumSum>=0.90,1);
63 |     DimFor85 = find(VarExplainedCumSum>=0.85,1);
64 |     DimFor80 = find(VarExplainedCumSum>=0.80,1);
65 | 
66 |     Ztop5 = CheckNaNInfComplex( Zexact*V2(:,1:5) );
67 |     Ztop10 = CheckNaNInfComplex( Zexact*V2(:,1:10) );
68 |     Ztop20 = CheckNaNInfComplex( Zexact*V2(:,1:20) );
69 | 
70 |     Z99per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor99) );
71 |     Z98per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor98) );
72 |     Z97per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor97) );
73 |     Z95per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor95) );
74 |     Z90per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor90) );
75 |     Z85per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor85) );
76 |     Z80per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor80) );
77 | 
78 | end
79 | 
80 | function Z = CheckNaNInfComplex(Z)
81 | 
82 |     for i=1:size(Z,1)
83 |         for j=1:size(Z,2)
84 |             if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 
85 |                 Z(i,j)=0;
86 |             end
87 |         end
88 |     end
89 | 
90 | end


--------------------------------------------------------------------------------
/SIDL/USIDL.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | function [S, A, Offsets, F_obj] = USIDL(X, y, lambda, K, q, c, epsilon, maxIter, maxInnerIter, runid)
26 | % function [S, A, Offsets, F_obj] = USIDL(X, y, lambda, K, q, c, epsilon, maxIter, maxInnerIter)
27 | %
28 | % X: n x p, training data, n times series with length p
29 | % y: binary label (-1, 1), not used in this model, just for plotting
30 | % lambda: regularization parameter for l1 norm
31 | % K: number of basis
32 | % q: length of basis over
33 | % c: Squared L2-norm of basis, i.e., ||s_k||^2 <= c
34 | % epsilon: epsilon
35 | % maxIter: maximum outter iterations
36 | % maxInnerIter: maximum inner iterations
37 | % runid: magic string prefix for plotting  
38 | %
39 | % Returns: S: learned basis
40 | %          A: coefficients for training data
41 | %          Offsets: matched location of the basis
42 | %          F_obj: array of objective values
43 |   
44 |   [n,p] = size(X);
45 | 
46 |   S = randn(K, q);                 % initialize bases
47 |   A = randn(n, K);                 % basis initializations
48 |   Offsets = randi([0, p-q], n, K);      % initialize offsets
49 |   
50 |   F_obj = [];
51 |   
52 |   for iter =1:maxIter
53 |     % update coefficients and matching offsets
54 |     [A, Offsets] = update_A_par(X, S, A, Offsets, lambda, maxInnerIter, epsilon);
55 |     
56 |     % update bases
57 |     S = update_S(X, S, A, Offsets, lambda, c, maxInnerIter, epsilon);
58 |     
59 |     % check convergence
60 |     F_all = unsup_obj(X, S, A, Offsets, lambda);
61 | 
62 |     F_obj(end+1) = F_all;
63 |     if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon
64 |       fprintf('Converged!\n');
65 |       return
66 |     end
67 |     
68 |   end
69 |   fprintf('Maximum Iteration Reached!\n');
70 | end
71 | 


--------------------------------------------------------------------------------
/RepLearnFINALSINKComp.m:
--------------------------------------------------------------------------------
 1 | function [Zexact, Ztop5, Ztop10, Ztop20, Z99per, Z98per, Z97per, Z95per, Z90per, Z85per, Z80per, DistComp, RuntimeNystrom, RuntimeFD]=RepLearnFINALSINKComp(X, Dictionary, gamma, coeffs)
 2 | % Input
 3 | % X: original data (nxm)
 4 | % Dictionary: kShape's centroids (cxm) or randomly chosen time series
 5 | % Dim: Dimensions to keep in the end over the learned representation
 6 | % gamma: kernel's parameter
 7 | % Output
 8 | % Ktilde: Approximated kernel matrix (nxn)
 9 | % Z: New learned representation (nxDim)
10 | tic;
11 | DistComp = 0;
12 | 
13 | [nrowsX, ncolumnsX] = size(X);
14 | [nrowsDic, ncolumnsDic] = size(Dictionary);
15 | 
16 | W = zeros(nrowsDic,nrowsDic);
17 | 
18 | for i=1:nrowsDic
19 |     for j=1:nrowsDic
20 |         W(i,j) = SINKCompressed(Dictionary(i,:),Dictionary(j,:),gamma,coeffs);
21 |         DistComp = DistComp + 1;
22 |     end    
23 | end
24 |         
25 | E = zeros(nrowsX,nrowsDic);
26 | 
27 | for i=1:nrowsX
28 |     disp(i);
29 |        for j=1:nrowsDic
30 |            E(i,j) = SINKCompressed(X(i,:),Dictionary(j,:),gamma,coeffs);
31 |            DistComp = DistComp + 1;
32 |        end    
33 | end
34 | 
35 | [Ve, Va] = eig(W);
36 | va = diag(Va);
37 | inVa = diag(va.^(-0.5));
38 | Zexact = E * Ve * inVa;
39 | 
40 | RuntimeNystrom = toc;
41 | 
42 | Zexact = CheckNaNInfComplex(Zexact);
43 | 
44 | tic;
45 | [BSketch, ~] = FrequentDirections(Zexact, ceil(0.5*size(Zexact,2)));
46 | 
47 | [V2, L2] = eig(BSketch'*BSketch);
48 | %[V2, L2] = eig(Zexact'*Zexact);
49 | eigvalue = diag(L2);     
50 | [dump, index] = sort(-eigvalue);
51 | eigvalue = eigvalue(index);
52 | V2 = V2(:, index);
53 | 
54 | RuntimeFD = toc;
55 | 
56 |     VarExplainedCumSum = cumsum(eigvalue)/sum(eigvalue);
57 | 
58 |     DimFor99 = find(VarExplainedCumSum>=0.99,1);
59 |     DimFor98 = find(VarExplainedCumSum>=0.98,1);
60 |     DimFor97 = find(VarExplainedCumSum>=0.97,1);
61 |     DimFor95 = find(VarExplainedCumSum>=0.95,1);
62 |     DimFor90 = find(VarExplainedCumSum>=0.90,1);
63 |     DimFor85 = find(VarExplainedCumSum>=0.85,1);
64 |     DimFor80 = find(VarExplainedCumSum>=0.80,1);
65 | 
66 |     Ztop5 = CheckNaNInfComplex( Zexact*V2(:,1:5) );
67 |     Ztop10 = CheckNaNInfComplex( Zexact*V2(:,1:10) );
68 |     Ztop20 = CheckNaNInfComplex( Zexact*V2(:,1:20) );
69 | 
70 |     Z99per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor99) );
71 |     Z98per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor98) );
72 |     Z97per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor97) );
73 |     Z95per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor95) );
74 |     Z90per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor90) );
75 |     Z85per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor85) );
76 |     Z80per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor80) );
77 | 
78 | end
79 | 
80 | function Z = CheckNaNInfComplex(Z)
81 | 
82 |     for i=1:size(Z,1)
83 |         for j=1:size(Z,2)
84 |             if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 
85 |                 Z(i,j)=0;
86 |             end
87 |         end
88 |     end
89 | 
90 | end


--------------------------------------------------------------------------------
/RunKMCompGAK.m:
--------------------------------------------------------------------------------
 1 | function RunKMCompGAK(DataSetStartIndex, DataSetEndIndex, TrainKM, sigma)
 2 | 
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, ~] = sort(Datasets);
10 | 
11 |     disp(sigma);
12 |     
13 |     rng(DataSetStartIndex*sigma);
14 |     pause(180*rand);
15 |     
16 |     distcomp.feature( 'LocalUseMpiexec', false )
17 | 
18 |     poolobj = gcp('nocreate');
19 |     delete(poolobj);
20 |     
21 |     parpool(20);
22 | 
23 |     for i = 1:length(Datasets)
24 |         
25 |         if (i>=DataSetStartIndex && i<=DataSetEndIndex)
26 |             
27 |             Results = zeros(length(Datasets),4);
28 |             
29 |             disp(['Dataset being processed: ', char(Datasets(i))]);
30 | 
31 |             DS = LoadUCRdataset(char(Datasets(i)));
32 | 
33 |             % Sampling to estimate sigma appropriately
34 |             dists = [];
35 |             for l=1:20
36 |                 rng(l);
37 |                 x = DS.Train(ceil(rand*DS.TrainInstancesCount),:);
38 |                 y = DS.Train(ceil(rand*DS.TrainInstancesCount),:);
39 |                 w = [];
40 |                 for p=1:length(DS.Train(1,:))
41 |                     w(p)= ED(x(p),y(p));
42 |                 end
43 |                 dists=[dists,w];
44 |             end
45 | 
46 |             sigma2 = sigma*median(dists)*sqrt(length(DS.Train(1,:)));
47 | 
48 |             
49 |             if (TrainKM==1)
50 |                 
51 |                 tic;
52 |                 [KMTrain, DistComp] = KMCompGAK(DS.Train,sigma2);
53 |                 Results(i,1) = DistComp; 
54 |                 Results(i,2) = toc;
55 | 
56 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesGAK/',char(Datasets(i)),'/', char(Datasets(i)), '_GAK_Sigma_', num2str(sigma) ,'_TRAIN.kernelmatrix'), KMTrain, 'delimiter', '\t');
57 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompGAK/', 'RunKMCompGAK_GAK_TrainToTrain_Sigma_', num2str(sigma), '_TrainToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t');
58 | 
59 | 
60 |             else
61 |                 tic;
62 |                 [KMTestToTrain, DistComp2]= KMCompGAK_TestToTrain(DS.Test,DS.Train,sigma2);
63 | 
64 |                 Results(i,3) = DistComp2;
65 |                 Results(i,4) = toc;
66 |                 
67 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesGAK/',char(Datasets(i)),'/', char(Datasets(i)), '_GAK_Sigma_', num2str(sigma) ,'_TESTTOTRAIN.kernelmatrix'), KMTestToTrain, 'delimiter', '\t');          
68 |                 dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompGAK/', 'RunKMCompGAK_GAK_TestToTrain_Sigma_', num2str(sigma), '_TestToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t');
69 |  
70 |                 
71 |             end
72 | 
73 |         end
74 |         
75 |     end
76 |     
77 |     poolobj = gcp('nocreate');
78 |     delete(poolobj);
79 |     
80 | end


--------------------------------------------------------------------------------
/SIDL/main_example.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | rng(1);
26 | 
27 | dataset_name = 'Trace';
28 | 
29 | train_file = strcat(dataset_name, '_TRAIN');
30 | test_file  = strcat(dataset_name, '_TEST');
31 | 
32 | train_data = load(train_file);
33 | test_data = load(test_file);
34 | 
35 | 
36 | train_X = train_data(:, [2:end]);
37 | test_X  = test_data(:, [2:end]);
38 | 
39 | train_y = train_data(:, 1);
40 | test_y  = test_data(:, 1);
41 | 
42 | [n_train, p] = size(train_X);
43 | [n_test, p] = size(test_X);
44 | 
45 | c = 100;
46 | epsilon = 1e-5;
47 | maxIter = 1e3;
48 | maxInnerIter = 5;
49 | 
50 | % loop through a set of variables
51 | Ks = [20];%, 20, 50, 100];  
52 | lambdas = [1];%0.1, 1, 10, 100];
53 | rs = [0.25];%, 0.5, 0.25]; 
54 | 
55 | for K = Ks
56 |   for lambda = lambdas
57 |     A_rand_init = randn(n_test, K);
58 |     for r = rs
59 |       q = ceil(p*r);
60 |       % run id 
61 |       runid = strcat(dataset_name, '_l_', num2str(lambda), '_K_', num2str(K), '_q_', num2str(q));
62 | 
63 |       % train SIDL on training set
64 |       tic;
65 |       [S, A, Offsets] = USIDL(train_X, train_y, lambda, K, q, c, epsilon, maxIter, maxInnerIter, runid);
66 | 
67 |       learn_time = toc;
68 |       fprintf('\n##### TRAINING TIME on TRAIN SET (K=%f, lambda=%f, r=%f): %f secs.\n\n', K, lambda, r, learn_time);
69 | 
70 |       % learn sparse coding on test set with dictionary learned from training set
71 |       A_test = A_rand_init;
72 |       Offsets_test = randi([0, p-q], n_test, K);
73 |       tic;
74 |       [A_test, Offsets_test, F_all] = update_A_par(test_X, S, A_test, Offsets_test, lambda, maxIter, epsilon);
75 |       fit_time = toc;
76 |       
77 |       % get reconstruciton for SIDL
78 |       test_recons_error_sidl = unsup_obj(test_X, S, A_test, Offsets_test, 0) / n_test;
79 |       
80 |       fprintf('\n\n##### RECONS ERROR on TEST SET (K=%f, lambda=%f) SIDL (r=%f): %f\n\n', K, lambda, r, test_recons_error_sidl);
81 | 
82 |       save(runid);
83 |     end
84 |   end
85 | end
86 | 
87 | 


--------------------------------------------------------------------------------
/RunOneNNSINKCompressed.m:
--------------------------------------------------------------------------------
 1 | function RunOneNNSINKCompressed(DataSetStartIndex, DataSetEndIndex, FourierEnergy, DatasetPercentile)  
 2 | % FourierEnergy is percentage e.g., 0.99
 3 | % DatasetPercentile is percentage in the form of 99, 95 etc.
 4 | 
 5 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 6 |     dir_struct = dir('/rigel/dsi/users/ikp2103/JOPA/GRAIL2/UCR2018/');
 7 |     Datasets = {dir_struct(3:130).name};
 8 |                      
 9 |     % Sort Datasets
10 |     
11 |     [Datasets, DSOrder] = sort(Datasets);  
12 |     
13 |     for i = 1:length(Datasets)
14 | 
15 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
16 | 
17 |                     LeaveOneOutAccuracies = zeros(length(Datasets),20);
18 |                     LeaveOneOutRuntimes = zeros(length(Datasets),20);
19 |                     
20 |                     Results = zeros(length(Datasets),10);
21 |                 
22 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
23 |                     DS = LoadUCRdataset(char(Datasets(i)));
24 |                     
25 |                     tic;
26 |                     
27 |                     DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile);
28 |                     
29 |                     RTPreprocessing = toc;
30 |                     
31 |                     gammaValues = 1:20;
32 |                     
33 |                     %parfor gamma = 1:20
34 |                     for gammaIter = 1:20
35 | 
36 |                         gammaIter
37 |                         tic;
38 |                         acc = LOOCSINKCompressed(DSFourier,gammaValues(gammaIter));
39 |                         LeaveOneOutRuntimes(i,gammaIter) = toc;
40 |                         LeaveOneOutAccuracies(i,gammaIter) = acc;
41 |                     end
42 |                     
43 |                     [MaxLeaveOneOutAcc,MaxLeaveOneOutAccGamma] = max(LeaveOneOutAccuracies(i,:));
44 | 
45 |                     tic;
46 |                     OneNNAcc = OneNNClassifierSINKCompressed(DSFourier, gammaValues(MaxLeaveOneOutAccGamma));
47 |                     
48 |                     RTOneNN = toc;
49 |                     
50 |                     Results(i,1) = DSFourier.len;
51 |                     Results(i,2) = DSFourier.fftlength;
52 |                     Results(i,3) = DSFourier.NumCoeffs;
53 |                     
54 |                     Results(i,4) = RTPreprocessing;
55 |                     
56 |                     Results(i,5) = gammaValues(MaxLeaveOneOutAccGamma);
57 |                     Results(i,6) = MaxLeaveOneOutAcc;
58 |                     Results(i,7) = LeaveOneOutRuntimes(i,MaxLeaveOneOutAccGamma);
59 |                     Results(i,8) = sum(LeaveOneOutRuntimes(i,:));
60 |                     Results(i,9) = RTOneNN;
61 |                     Results(i,10) = OneNNAcc;
62 |                     
63 |                     dlmwrite( strcat('/rigel/dsi/users/ikp2103/JOPA/GRAIL2/RunOneNNSINKCompressed/', 'RESULTS_RunOneNNSINKCompressed_FourierEnergy_', num2str(FourierEnergy), '_DatasetPercentile_', num2str(DatasetPercentile), '_Dataset_' ,num2str(i)), Results, 'delimiter', '\t');
64 |    
65 |             end
66 |             
67 |             
68 |     end
69 |     
70 | end


--------------------------------------------------------------------------------
/SIDL/update_A_par.m:
--------------------------------------------------------------------------------
 1 | %{
 2 | The MIT License (MIT)
 3 | Copyright (c) 2016 Guoqing Zheng
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies of the Software, including modified versions of the software,
14 | and substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | %}
24 | 
25 | function [A, Offsets, F_all] = update_A_par(X, S, A, Offsets, lambda, maxIter, epsilon)
26 | % X: n x p
27 | % S: K x q
28 | % A: n x K
29 | % Offsets: n x K
30 |   
31 |   [n,p] = size(X);
32 |   [KK,q] = size(S);
33 |   seg_idx = bsxfun(@plus, repmat([1:q], p-q+1, 1), [0:p-q]');
34 | 
35 |   F_obj = [];
36 |   for iter = 1:maxIter
37 |     for i=1:n % compute activation and matching offset for X_i
38 |       x = X(i,:);
39 |       offs = Offsets(i,:);
40 |       shifted_S = op_shift(S, offs, p);
41 | 
42 |       %for k=1:KK % compute for base k % RAND PERM DOESN'T HURT
43 |       for k=randperm(KK) % compute for base k % RAND PERM DOESN'T HURT
44 |         base = S(k,:);
45 |         temp_a = A(i,:);
46 |         temp_a(k) = 0; % exclude alpha_k
47 |         
48 |         x_residue = x - temp_a * shifted_S;
49 |         residue_norm2 = norm(x_residue)^2;
50 |         base_norm2 = norm(base)^2; %||s_k||^2
51 | 
52 |         segs = x_residue(seg_idx);
53 |         dot_prods = segs * base';
54 | 
55 |         [M_dp, M_idx] = max(abs(dot_prods));
56 | 
57 |         if M_dp <= lambda
58 |           a_k_star = 0;
59 |         else
60 |           a_k_star = sign(dot_prods(M_idx)) * (M_dp - lambda) / base_norm2;
61 |           t_k_star = M_idx -1;
62 |         end
63 | 
64 |         A(i, k) = a_k_star;
65 |         if a_k_star ~= 0
66 | %          shifted_base = op_shift(base, t_k_star, p);
67 |           shifted_S(k,:) = 0;
68 |           shifted_S(k, [t_k_star + 1: t_k_star + q]) = base;
69 | %          shifted_S(k,:) = shifted_base;
70 |           Offsets(i, k) = t_k_star;
71 |         end
72 |       end
73 |       
74 |     end
75 | 
76 |     F_all = unsup_obj(X, S, A, Offsets, lambda);
77 |     %fprintf('Current F_all: %f\n', F_all);
78 |     F_obj(end+1) = F_all;
79 |     if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon
80 |       %fprintf('Updating A: Converged!\n\n');
81 |       return
82 |     end
83 | 
84 |   end
85 | 
86 |   %fprintf('Updating A: Reached max iter.\n\n');
87 | end
88 | 
89 | 


--------------------------------------------------------------------------------
/RunTestVarianceExact.m:
--------------------------------------------------------------------------------
 1 | function RunTestVarianceExact(DataSetStartIndex, DataSetEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                              
 7 |     % Sort Datasets
 8 |     [Datasets, DSOrder] = sort(Datasets);   
 9 |     
10 |     Results = zeros(length(Datasets),55);
11 |     
12 |     %rng(ceil(DataSetStartIndex*100))
13 |     %pause(300*rand);
14 |     
15 |     poolobj = gcp('nocreate');
16 |     delete(poolobj);
17 |     
18 |     parpool(20);
19 |     
20 |     for i = 1:length(Datasets)
21 | 
22 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
23 | 
24 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
25 |                     DS = LoadUCRdataset(char(Datasets(i)));
26 |                                        
27 |                     VarExplainedCumSumMatrix = zeros(20,DS.DataInstancesCount);
28 |                     StatisticsForGamma = zeros(20,14);
29 |                     
30 |                     parfor gamma = 1 : 20
31 |                         
32 |                         gamma
33 |                         rng(gamma);
34 |                   
35 |                         KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix'));
36 |                         
37 |                         KM = KM(1:7200,1:7200);
38 |                         
39 |                         [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,VarExplainedCumSum]=TestVarianceExact(KM);
40 |                         
41 |                         Z20 = dlmread( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z20') );
42 |                         Z90per = dlmread( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z90per') );
43 |                         
44 |                         LOOCAccuracyZ20 = LeaveOneOutClassifierZREP(DS,Z20);
45 |                         LOOCAccuracyZ90per = LeaveOneOutClassifierZREP(DS,Z90per);
46 |                         
47 |                         OneNNAccuracyZ20 = OneNNClassifierZREP(DS,Z20);
48 |                         OneNNAccuracyZ90per = OneNNClassifierZREP(DS,Z90per);
49 |                         
50 |                         VarExplainedCumSumMatrix(gamma,:) = VarExplainedCumSum;
51 |                         StatisticsForGamma(gamma,:) = [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,trapz(1:size(KM,1),VarExplainedCumSum),LOOCAccuracyZ20,LOOCAccuracyZ90per,OneNNAccuracyZ20,OneNNAccuracyZ90per];
52 | 
53 |                     end
54 | 
55 |                     dlmwrite( strcat('RunTestVarianceExactVarExplainedCumSum/','RESULTS_RunTestVarianceExactVarExplainedCumSum_', num2str(i)), VarExplainedCumSumMatrix, 'delimiter', '\t');
56 |                     dlmwrite( strcat('RunTestVarianceExactStatisticsForGamma/','RESULTS_RunTestVarianceExactStatisticsForGamma_', num2str(i)), StatisticsForGamma, 'delimiter', '\t');
57 |    
58 |                     
59 |             end
60 |             
61 |     end
62 |     
63 |     poolobj = gcp('nocreate');
64 |     delete(poolobj);
65 |     
66 | end


--------------------------------------------------------------------------------
/RunRepLearningKM.m:
--------------------------------------------------------------------------------
 1 | function RunRepLearningKM(DataSetStartIndex, DataSetEndIndex, GammaStartIndex, GammaEndIndex)  
 2 |     
 3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 5 |     Datasets = {dir_struct(3:130).name};
 6 |                      
 7 |     % Sort Datasets
 8 |     
 9 |     [Datasets, DSOrder] = sort(Datasets);   
10 |    
11 |     for i = 1:length(Datasets)
12 | 
13 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
14 | 
15 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
16 |                     DS = LoadUCRdataset(char(Datasets(i)));
17 |                     
18 |                     Results = zeros(length(Datasets),20);
19 |                     
20 |                     for gamma = 1 : 20
21 |                         
22 |                         if (gamma>=GammaStartIndex && gamma<=GammaEndIndex)
23 |                     
24 |                             gamma 
25 |                             
26 |                             KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix'));
27 | 
28 |                             [Z99per,Z98per,Z97per,Z95per,Z90per,Z85per,Z80per,Ztop20,Ztop10,Ztop5,RepLearnTime]=RepLearnKM(KM);
29 | 
30 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z99per'), Z99per, 'delimiter', '\t');
31 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z98per'), Z98per, 'delimiter', '\t');
32 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z97per'), Z97per, 'delimiter', '\t');
33 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z95per'), Z95per, 'delimiter', '\t');
34 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z90per'), Z90per, 'delimiter', '\t');
35 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z85per'), Z85per, 'delimiter', '\t');
36 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z80per'), Z80per, 'delimiter', '\t');
37 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z20'), Ztop20, 'delimiter', '\t');
38 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z10'), Ztop10, 'delimiter', '\t');
39 |                             dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z5'), Ztop5, 'delimiter', '\t');
40 | 
41 |                             Results(i,gamma)=RepLearnTime;
42 |                             
43 |                             dlmwrite( strcat('RunRepLearningKM/','RunRepLearningKM_Gamma_', num2str(gamma), '_Dataset_', num2str(i)), Results, 'delimiter', '\t');
44 |                     
45 |                             
46 |                         end
47 |                         
48 |                     end
49 |                     
50 |                     
51 |                     
52 |             end
53 |             
54 |     end
55 |     
56 | end
57 | 


--------------------------------------------------------------------------------
/RWS/rws_VaryingR_CV_R128.m:
--------------------------------------------------------------------------------
 1 | % This script generates low-rank approximation of latent kernel matrix using 
 2 | % random features approach based on dtw like distance for UCR time-series 
 3 | % datasets. Expts A: investigate performance changes when varying R using
 4 | % the parameters learned from 10-folds cross validation with R = 128.
 5 | %
 6 | % Author: Lingfei Wu
 7 | % Date: 01/20/2019
 8 | 
 9 | clear,clc
10 | nthreads = 12;
11 | parpool('local', nthreads);
12 | addpath(genpath('utilities'));
13 | file_dir = './datasets/';
14 | 
15 | % List all datasets
16 | filename_list = {'Gun_Point'};
17 | 
18 | DMin = 1;    
19 | R_list = [4 8 16 32 64]; % Generally, Large R, Better Accuracy.
20 | info = [];
21 | for jj = 1:length(filename_list)
22 |     filename = filename_list{jj};
23 |     if strcmp(filename, 'Gun_Point')
24 |         sigma = 4.46;
25 |         DMax = 25;
26 |         lambda_inverse = 10;
27 |     end 
28 |     
29 |     Accu_best = zeros(2,length(R_list));
30 |     telapsed_liblinear = zeros(1,length(R_list));
31 |     real_total_dtw_time = zeros(1,length(R_list));
32 |     real_user_dtw_time = zeros(1,length(R_list));
33 |     for j = 1:length(R_list)
34 |         R = R_list(j);
35 |         [trainData,testData,telapsed_fea_gen]=rws_GenFea(file_dir,...
36 |             filename,sigma,R,DMin,DMax);
37 |         trainy = trainData(:,1);
38 |         testy = testData(:,1);
39 |         % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..)
40 |         labels = unique(trainy);
41 |         numClasses = length(labels);
42 |         if numClasses > 2
43 |             for i=numClasses:-1:1
44 |                 ind = (trainy == labels(i));
45 |                 trainy(ind) = i;
46 |             end
47 |             for i=numClasses:-1:1
48 |                 ind = (testy == labels(i));
49 |                 testy(ind) = i;
50 |             end
51 |         else
52 |            ind = (trainy == labels(1));
53 |             trainy(ind) = -1;
54 |             ind = (trainy == labels(2));
55 |             trainy(ind) = 1;
56 |             ind = (testy == labels(1));
57 |             testy(ind) = -1;
58 |             ind = (testy == labels(2));
59 |             testy(ind) = 1;
60 |         end
61 | 
62 |         disp('------------------------------------------------------');
63 |         disp('LIBLinear performs basic grid search by varying lambda');
64 |         disp('------------------------------------------------------');
65 |         trainFeaX = trainData(:,2:end);
66 |         testFeaX = testData(:,2:end);
67 | 
68 |         % Linear Kernel
69 |         timer_start = tic;
70 |         s2 = num2str(lambda_inverse);
71 |         s1 = '-s 2 -e 0.0001 -q -c ';
72 |         s = [s1 s2];
73 |         model_linear = train(trainy, sparse(trainFeaX), s);
74 |         [train_predict_label, train_accuracy, train_dec_values] = ...
75 |             predict(trainy, sparse(trainFeaX), model_linear);
76 |         [test_predict_label, test_accuracy, test_dec_values] = ...
77 |             predict(testy, sparse(testFeaX), model_linear);
78 |         Accu_best(1,j) = train_accuracy(1);
79 |         Accu_best(2,j) = test_accuracy(1);
80 |         telapsed_liblinear(1,j) = toc(timer_start)
81 |         real_total_dtw_time(1,j) = telapsed_fea_gen.real_total_dtw_time;
82 |         real_user_dtw_time(1,j) = telapsed_fea_gen.user_dtw_time/nthreads;
83 |     end
84 |     info.Accu_best = Accu_best;
85 |     info.real_total_dtw_time = real_total_dtw_time;
86 |     info.real_user_dtw_time = real_user_dtw_time;
87 |     info.telapsed_liblinear = telapsed_liblinear;
88 |     info.R = R_list;
89 |     info.DMin = DMin;
90 |     info.DMax = DMax;
91 |     info.sigma = sigma;
92 |     info.lambda_inverse = lambda_inverse;
93 |     disp(info);
94 |     savefilename = [filename '_rws_VaryingR_CV_R128'];
95 |     save(savefilename,'info')
96 | end


--------------------------------------------------------------------------------
/RWS/rws_VaryingR_CV_R128_mulvar.m:
--------------------------------------------------------------------------------
 1 | % This script generates low-rank approximation of latent kernel matrix using 
 2 | % random features approach based on dtw like distance for UCR time-series 
 3 | % datasets. Expts A: investigate performance changes when varying R using
 4 | % the parameters learned from 10-folds cross validation with R = 128.
 5 | %
 6 | % Author: Lingfei Wu
 7 | % Date: 01/20/2019
 8 | 
 9 | clear,clc
10 | nthreads = 12;
11 | parpool('local', nthreads);
12 | addpath(genpath('utilities'));
13 | file_dir = './datasets/';
14 | 
15 | % List all datasets
16 | filename_list = {'auslan'};
17 | 
18 | DMin = 1;    
19 | R_list = [4 8 16 32 64 128 256 512];
20 | info = [];
21 | for jj = 1:length(filename_list)
22 |     filename = filename_list{jj};
23 |     if strcmp(filename, 'auslan')
24 |         sigma = 0.79;
25 |         DMax = 25;
26 |         lambda_inverse = 10;
27 |     end
28 |     
29 |     Accu_best = zeros(2,length(R_list));
30 |     telapsed_liblinear = zeros(1,length(R_list));
31 |     real_total_dtw_time = zeros(1,length(R_list));
32 |     real_user_dtw_time = zeros(1,length(R_list));
33 |     for j = 1:length(R_list)
34 |         R = R_list(j);
35 |         [trainData,testData,telapsed_fea_gen]=rws_GenFea_mulvar(file_dir,...
36 |             filename,sigma,R,DMin,DMax);
37 |         trainy = trainData(:,1);
38 |         testy = testData(:,1);
39 |         % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..)
40 |         labels = unique(trainy);
41 |         numClasses = length(labels);
42 |         if numClasses > 2
43 |             for i=numClasses:-1:1
44 |                 ind = (trainy == labels(i));
45 |                 trainy(ind) = i;
46 |             end
47 |             for i=numClasses:-1:1
48 |                 ind = (testy == labels(i));
49 |                 testy(ind) = i;
50 |             end
51 |         else
52 |            ind = (trainy == labels(1));
53 |             trainy(ind) = -1;
54 |             ind = (trainy == labels(2));
55 |             trainy(ind) = 1;
56 |             ind = (testy == labels(1));
57 |             testy(ind) = -1;
58 |             ind = (testy == labels(2));
59 |             testy(ind) = 1;
60 |         end
61 | 
62 |         disp('------------------------------------------------------');
63 |         disp('LIBLinear performs basic grid search by varying lambda');
64 |         disp('------------------------------------------------------');
65 |         trainFeaX = trainData(:,2:end);
66 |         testFeaX = testData(:,2:end);
67 | 
68 |         % Linear Kernel
69 |         timer_start = tic;
70 |         s2 = num2str(lambda_inverse);
71 |         s1 = '-s 2 -e 0.0001 -q -c '; % for regular liblinear
72 | %         s1 = '-s 2 -e 0.0001 -n 8 -q -c '; % for omp version
73 |         s = [s1 s2];
74 |         model_linear = train(trainy, sparse(trainFeaX), s);
75 |         [train_predict_label, train_accuracy, train_dec_values] = ...
76 |             predict(trainy, sparse(trainFeaX), model_linear);
77 |         [test_predict_label, test_accuracy, test_dec_values] = ...
78 |             predict(testy, sparse(testFeaX), model_linear);
79 |         Accu_best(1,j) = train_accuracy(1);
80 |         Accu_best(2,j) = test_accuracy(1);
81 |         telapsed_liblinear(1,j) = toc(timer_start);
82 |         real_total_dtw_time(1,j) = telapsed_fea_gen.real_total_dtw_time;
83 |         real_user_dtw_time(1,j) = telapsed_fea_gen.user_dtw_time/nthreads;
84 |     end
85 |     info.Accu_best = Accu_best;
86 |     info.real_total_dtw_time = real_total_dtw_time;
87 |     info.real_user_dtw_time = real_user_dtw_time;
88 |     info.telapsed_liblinear = telapsed_liblinear;
89 |     info.R = R_list;
90 |     info.DMin = DMin;
91 |     info.DMax = DMax;
92 |     info.sigma = sigma;
93 |     info.lambda_inverse = lambda_inverse;
94 |     disp(info);
95 |     savefilename = [filename '_rws_VaryingR_CV_R128'];
96 |     save(savefilename,'info')
97 | end
98 | 


--------------------------------------------------------------------------------
/mySVD.m:
--------------------------------------------------------------------------------
  1 | function [U, S, V] = mySVD(X,ReducedDim)
  2 | %mySVD    Accelerated singular value decomposition.
  3 | %   [U,S,V] = mySVD(X) produces a diagonal matrix S, of the  
  4 | %   dimension as the rank of X and with nonnegative diagonal elements in
  5 | %   decreasing order, and unitary matrices U and V so that
  6 | %   X = U*S*V'.
  7 | %
  8 | %   [U,S,V] = mySVD(X,ReducedDim) produces a diagonal matrix S, of the  
  9 | %   dimension as ReducedDim and with nonnegative diagonal elements in
 10 | %   decreasing order, and unitary matrices U and V so that
 11 | %   Xhat = U*S*V' is the best approximation (with respect to F norm) of X
 12 | %   among all the matrices with rank no larger than ReducedDim.
 13 | %
 14 | %   Based on the size of X, mySVD computes the eigvectors of X*X^T or X^T*X
 15 | %   first, and then convert them to the eigenvectors of the other.  
 16 | %
 17 | %   See also SVD.
 18 | %
 19 | %   version 2.0 --Feb/2009 
 20 | %   version 1.0 --April/2004 
 21 | %
 22 | %   Written by Deng Cai (dengcai AT gmail.com)
 23 | %                                                   
 24 | 
 25 | MAX_MATRIX_SIZE = 10000; % You can change this number according your machine computational power
 26 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power
 27 | 
 28 | 
 29 | if ~exist('ReducedDim','var')
 30 |     ReducedDim = 0;
 31 | end
 32 | 
 33 | [nSmp, mFea] = size(X);
 34 | if mFea/nSmp > 1.0713
 35 |     ddata = X*X';
 36 |     ddata = max(ddata,ddata');
 37 |     
 38 |     dimMatrix = size(ddata,1);
 39 |     if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO)
 40 |         option = struct('disp',0);
 41 |         [U, eigvalue] = eigs(ddata,ReducedDim,'la',option);
 42 |         eigvalue = diag(eigvalue);
 43 |     else
 44 |         if issparse(ddata)
 45 |             ddata = full(ddata);
 46 |         end
 47 |         
 48 |         [U, eigvalue] = eig(ddata);
 49 |         eigvalue = diag(eigvalue);
 50 |         [dump, index] = sort(-eigvalue);
 51 |         eigvalue = eigvalue(index);
 52 |         U = U(:, index);
 53 |     end
 54 |     clear ddata;
 55 |     
 56 |     maxEigValue = max(abs(eigvalue));
 57 |     eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10);
 58 |     eigvalue(eigIdx) = [];
 59 |     U(:,eigIdx) = [];
 60 |     
 61 |     if (ReducedDim > 0) && (ReducedDim < length(eigvalue))
 62 |         eigvalue = eigvalue(1:ReducedDim);
 63 |         U = U(:,1:ReducedDim);
 64 |     end
 65 |     
 66 |     eigvalue_Half = eigvalue.^.5;
 67 |     S =  spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half));
 68 | 
 69 |     if nargout >= 3
 70 |         eigvalue_MinusHalf = eigvalue_Half.^-1;
 71 |         V = X'*(U.*repmat(eigvalue_MinusHalf',size(U,1),1));
 72 |     end
 73 | else
 74 |     ddata = X'*X;
 75 |     ddata = max(ddata,ddata');
 76 |     
 77 |     dimMatrix = size(ddata,1);
 78 |     if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO)
 79 |         option = struct('disp',0);
 80 |         [V, eigvalue] = eigs(ddata,ReducedDim,'la',option);
 81 |         eigvalue = diag(eigvalue);
 82 |     else
 83 |         if issparse(ddata)
 84 |             ddata = full(ddata);
 85 |         end
 86 |         
 87 |         [V, eigvalue] = eig(ddata);
 88 |         eigvalue = diag(eigvalue);
 89 |         
 90 |         [dump, index] = sort(-eigvalue);
 91 |         eigvalue = eigvalue(index);
 92 |         V = V(:, index);
 93 |     end
 94 |     clear ddata;
 95 |     
 96 |     maxEigValue = max(abs(eigvalue));
 97 |     eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10);
 98 |     eigvalue(eigIdx) = [];
 99 |     V(:,eigIdx) = [];
100 |     
101 |     if (ReducedDim > 0) && (ReducedDim < length(eigvalue))
102 |         eigvalue = eigvalue(1:ReducedDim);
103 |         V = V(:,1:ReducedDim);
104 |     end
105 |     
106 |     eigvalue_Half = eigvalue.^.5;
107 |     S =  spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half));
108 |     
109 |     eigvalue_MinusHalf = eigvalue_Half.^-1;
110 |     U = X*(V.*repmat(eigvalue_MinusHalf',size(V,1),1));
111 | end
112 | 


--------------------------------------------------------------------------------
/RWS/README.md:
--------------------------------------------------------------------------------
 1 | # RandomWarpingSeries
 2 | RandomWarpingSeries (RWS) is a simple code for generating the vector representation of time-series for time-series classification, clustering, and regression.
 3 | This code is a simple implementation (mix of Matlab, Matlab MEX, and C) of the WME in (Wu et al, "Random Warping Series: A Random Features Method for Time-Series Embedding", AISTATS'18). We refer more information about RWS to the following paper link: http://proceedings.mlr.press/v84/wu18b/wu18b.pdf.
 4 | 
 5 | 
 6 | # Prerequisites
 7 | 
 8 | There are at least two required tool packages in order to run this code. You need to download DTW, LibLinear, or LibSVM and compile the corresponding MEX files for your operating systems (Mac, Linux, or Windows).
 9 | 
10 | For DTW: https://www.mathworks.com/matlabcentral/fileexchange/43156-dynamic-time-warping-dtw <br/>
11 | For LibSVM: https://github.com/cjlin1/libsvm or LibLinear: https://github.com/cjlin1/liblinear <br/>
12 | 
13 | 
14 | For single-variate time-series datasets, you can download some datasets from the UCR time-series collections (http://www.cs.ucr.edu/~eamonn/time_series_data/) or from the UEA time-series collection (http://www.timeseriesclassification.com/). <br/> 
15 | For multi-variate time-series datasets, you can download some datasets from UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/index.php) or from your favorate applications. <br/>
16 | It is generally advised to perform Z-formalization on data before feeding it to our time-series embedding codes. 
17 | 
18 | 
19 | # How To Run The Codes
20 | Note that, in order to achieve the best performance, the hyperparameters DMax, sigma, and even lambda_inverse (for classification using SVM) have to be searched (using cross validation or other techniques). This is a crucial step for RWS.  
21 | 
22 | To generate the RWS and use RWS for time-series claddification or clustering tasks, you need:
23 | 
24 | (1) If you use linux and Mac, you should be fine to skip compiling MEX for DTW, LibLinear, and LibSVM. Otherwise, you need to download them form the above links and compile them in their Matlab folders. Then you need copy these MEX files into the utilities folder.
25 | 
26 | (2) Open Matlab terminal console and run rws_gridsearch_CV.m on single-variate time-series for performing K-fold cross validation for searching good hyperparameters 
27 |     The RWS embeddings that performs the best on the dev data will be saved.
28 | 
29 | (3) Open Matlab terminal console and run rws_gridsearch_CV_mulvar.m on multi-variate time-series for performing K-fold cross validation for searching good hyperparameters 
30 |     The RWS embeddings that performs the best on the dev data will be saved. 
31 | 
32 | (4) Test the model by running the following code rws_VaryingR_CV_R128.m on single-variate time-series and rws_VaryingR_CV_R128_mulvar.m on multi-variate time-series using best parameters from CV
33 |     The testing result on different data splits will be saved. 
34 | 
35 | (5) To generate RWS embedding only, please run this code rws_GenFea_example.m on single-variate time-series and rws_GenFea_example_mulvar.m on multi-variate time-series. <br/> 
36 | 
37 | Note that there are no default numbers for the hyperparameters DMax, sigma. You should searching for the best numbers before generating RWS time-series embeddings for your applications. In general, the larger the parameter R is, the better quality of embedding is. 
38 | 
39 | 
40 | # How To Cite The Codes
41 | Please cite our work if you like or are using our codes for your projects! Let me know if you have any questions: lwu at email.wm.edu.
42 | 
43 | Lingfei Wu, Ian En-Hsu Yen, Jinfeng Yi, Fangli Xu, Qi Lei, and Michael Witbrock, "Random Warping Series: A Random Features Method for Time-Series Embedding", AISTATS'18.
44 | 
45 | @inproceedings{wu2018random,  <br/>
46 |   title={Random Warping Series: A Random Features Method for Time-Series Embedding},  <br/>
47 |   author={Wu, Lingfei and Yen, Ian En-Hsu and Yi, Jinfeng and Xu, Fangli and Lei, Qi and Witbrock, Michael},  <br/>
48 |   booktitle={International Conference on Artificial Intelligence and Statistics},  <br/>
49 |   pages={793--802},  <br/>
50 |   year={2018}  <br/>
51 | }
52 | 
53 | ------------------------------------------------------
54 | Contributors: Lingfei Wu <br/>
55 | Created date: January 20, 2019 <br/>
56 | Last update: January 20, 2019 <br/>
57 | 


--------------------------------------------------------------------------------
/OneNNClassifierLB.m:
--------------------------------------------------------------------------------
  1 | function [acc,pruningpower] = OneNNClassifierLB(DS,ZReduced,LBType,gamma)
  2 |     
  3 |     % 1 - LB with FFT using the first-k coefficients
  4 |     % 2 - LB with FFT using the best-k coefficients
  5 |     % 3 - Our approach
  6 |     % 4 - LBKeogh for DTW
  7 | 
  8 |     ZRepTrain = ZReduced(1:DS.TrainInstancesCount,:);
  9 |     ZRepTest = ZReduced(DS.TrainInstancesCount+1:end,:);
 10 |     
 11 |     Dim = size(ZReduced,2);
 12 |     
 13 |     acc = 0;
 14 |     
 15 |     for id = 1 : DS.TestInstancesCount
 16 |         
 17 |         best_so_far = inf;
 18 |         
 19 | 		distance_lb = zeros(1, DS.TrainInstancesCount);
 20 | 		
 21 |         if LBType==4
 22 |             u = upper_b(DS.Test(id,:), DS.DTW_WindowPercentage);
 23 |             l = lower_b(DS.Test(id,:), DS.DTW_WindowPercentage);
 24 |         end
 25 |         
 26 |         lbdistcomp = 0;
 27 |         for i = 1 : DS.TrainInstancesCount
 28 |             switch LBType
 29 |                 case 1
 30 |                     distance_lb(i) = FFTLBTopCoeff(DS.Train(i,:),DS.Test(id,:), Dim);
 31 |                 case 2
 32 |                     distance_lb(i) = FFTLBBestCoeff(DS.Train(i,:),DS.Test(id,:), Dim);
 33 |                 case 3
 34 |                     distance_lb(i) = sqrt(sum((ZRepTrain(i,:)-ZRepTest(id,:)).^2));
 35 |                 case 4
 36 |                     distance_lb(i) = lb_keogh(DS.Train(i,:),DS.Test(id,:), u, l);
 37 |             end
 38 |             
 39 |             
 40 | 			%distance_lb(i) = lb_keogh(DS.Train(i,:),DS.Test(id,:), u, l);
 41 |             %distance_lb(i) = FFTLBBestCoeff(DS.Train(i,:),DS.Test(id,:), size(ZReduced,2));
 42 |             %distance_lb(i) = sqrt(sum((ZRepTrain(i,:)-ZRepTest(id,:)).^2));
 43 |             
 44 |             lbdistcomp=lbdistcomp+1;
 45 |         end
 46 |         
 47 |         [distance_lb, ordering] = sort(distance_lb);
 48 |         
 49 | 		traindata = DS.Train(ordering,:);
 50 |         
 51 | 		trainclasses = DS.TrainClassLabels(ordering);
 52 |         
 53 |         actualdistcomp = 0;
 54 |         
 55 | 		for i = 1 : DS.TrainInstancesCount
 56 | 			if distance_lb(i) < best_so_far
 57 | 				
 58 |                switch LBType
 59 |                     case 1
 60 |                         distance = sqrt(sum((traindata(i,:)-DS.Test(id,:)).^2));
 61 |                     case 2
 62 |                         distance = sqrt(sum((traindata(i,:)-DS.Test(id,:)).^2));
 63 |                     case 3
 64 |                         distance = 2*(1-SINK(traindata(i,:),DS.Test(id,:),gamma));
 65 |                     case 4
 66 |                         distance = dtw(traindata(i,:),DS.Test(id,:),DS.DTW_WindowPercentage);
 67 |                end
 68 | 
 69 |                 actualdistcomp=actualdistcomp+1;
 70 | 
 71 |                 if distance < best_so_far
 72 |                         class = trainclasses(i);
 73 |                         best_so_far = distance;
 74 |                 end
 75 |             else
 76 |                     break;
 77 |             end
 78 |             
 79 |         end
 80 |         
 81 |         if (DS.TestClassLabels(id) == class)
 82 |             acc = acc + 1;
 83 |         end
 84 |         
 85 |         pruningpower = 1- (actualdistcomp/lbdistcomp);
 86 |    end
 87 |     acc = acc / DS.TestInstancesCount;
 88 | end
 89 | 
 90 | function lbdist = FFTLBTopCoeff(x, y, coeff)
 91 |     fx = fft(x)/sqrt(length(x));
 92 |     fy = fft(y)/sqrt(length(x));
 93 |     lbdist = sqrt(sum(abs(fx(1:coeff) - fy(1:coeff)).^2));
 94 | end
 95 | 
 96 | function lbdist = FFTLBBestCoeff(x, y, coeff)
 97 |     fx = fft(x)/sqrt(length(x));    
 98 |     fy = fft(y)/sqrt(length(x));
 99 |     
100 |     Xred = BestCoeff(fx, coeff);
101 |     Yred = BestCoeff(fy, coeff);
102 |     
103 |     lbdist = sqrt(sum(abs(Xred - Yred).^2));
104 | end
105 | 
106 | function X = BestCoeff(X, coeff)
107 | 
108 | Y = abs(X).^2;
109 | %sum(Y)
110 | [Ysorted Yorder] = sort(-Y);           % sort descending
111 | Ysorted = cumsum(-Ysorted)/sum(Y);
112 | X(Yorder((coeff+1):end)) = 0;
113 | 
114 | end
115 | 
116 | function lb = lb_keogh(T, Q, U, L)
117 | 	T = T.';
118 | 	Q = Q.';
119 | 	lb = sqrt(sum([[T > U].* [T-U]; [T < L].* [L-T]].^2));
120 | end
121 | 
122 | function b = lower_b(t, w)
123 | 	l = length(t);
124 | 	b = zeros(1,l).';
125 | 	for i = 1 : l
126 | 		b(i) = min(t(max(1,i-w):min(l,i+w)));
127 | 	end
128 | end
129 | 
130 | function b = upper_b(t, w)
131 | 	l = length(t);
132 | 	b = zeros(1,l).';
133 | 	for i = 1 : l
134 | 		b(i) = max(t(max(1,i-w):min(l,i+w)));
135 | 	end
136 | end
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/SPIRAL/exactCDmex.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2013 Quan Wang <wangq10@rpi.edu>,
  3 |  * Signal Analysis and Machine Perception Laboratory,
  4 |  * Department of Electrical, Computer, and Systems Engineering,
  5 |  * Rensselaer Polytechnic Institute, Troy, NY 12180, USA
  6 |  */
  7 | 
  8 | /**
  9 |  * This is the C/MEX code of dynamic time warping of two signals
 10 |  *
 11 |  * compile:
 12 |  *     mex dtw_c.c
 13 |  *
 14 |  * usage:
 15 |  *     d=dtw_c(s,t)  or  d=dtw_c(s,t,w)
 16 |  *     where s is signal 1, t is signal 2, w is window parameter
 17 |  */
 18 | 
 19 | #include "mex.h"
 20 | #include <stdlib.h>
 21 | #include <stdio.h>
 22 | #include <math.h>
 23 | 
 24 | double cubicRoot(double d)
 25 | {
 26 | 	  if(d<0.0)
 27 | 		        return -cubicRoot(-d);
 28 | 	    else
 29 | 			      return pow(d,1.0/3.0);
 30 | }
 31 | 
 32 | /* This function solves the following problem:
 33 |  min_{x>=0} x^3+ax+b */
 34 | double root_c(double a, double b)
 35 | {
 36 |     double x=0, y=0;
 37 |     double a3=4*pow(a,3), b2=27*pow(b,2);
 38 |     double delta = a3+b2;
 39 | 	int k;
 40 |     if(delta<=0) /* 3 distinct real roots or 1 real multiple solution */
 41 |     {
 42 | 	    double r3  = 2*sqrt(-a/3);
 43 |         double th3 = atan2(sqrt(-delta/108),-b/2)/3;
 44 |         double ymax=0, xopt=0;
 45 |         for(k=0;k<=4;k=k+2)
 46 |         {
 47 |             x = r3*cos(th3+((k*3.14159265)/3));
 48 |             y=pow(x,4)/4+a*pow(x,2)/2+b*x;
 49 | 	 	    if(y<ymax)
 50 | 	               {ymax=y; xopt=x;}
 51 |         }
 52 |         return xopt;
 53 |     }
 54 |     else /* 1 real root and two complex */
 55 |     {
 56 |          double z = sqrt(delta/27);
 57 |          x = cubicRoot(0.5*(-b+z))+cubicRoot(0.5*(-b-z));
 58 |          y = pow(x,4)/4+a*pow(x,2)/2+b*x;
 59 |          return x;
 60 |     }
 61 | }
 62 | 
 63 | 
 64 | 
 65 | double residue(double * nR, double normA, int n, int m, int * lenA)
 66 | {
 67 | 	double r=0;
 68 | 	int i,j;
 69 | 	for (i=0;i<n;i++)
 70 | 		for (j=0;j<lenA[i];j++)
 71 | 			r+=nR[j*n+i]*nR[j*n+i];
 72 | 	return r/normA/normA;
 73 | }
 74 | 
 75 | void iterations(double * nA, double *nR, int* nO, double * X, int n, int m, int k, int * lenA, int n_iter, double normA,int * d){
 76 | 	int iter,i,j,t;
 77 | 	double p,q;
 78 | 	printf("# 0: residue=1\n");
 79 | 	for (iter=0;iter<n_iter;++iter)
 80 | 	{
 81 | 	    for(i=0;i<k;i++)
 82 | 		{/*column of X*/
 83 |             int in=i*n;
 84 | 			for(t=0;t<n;t++)/*row of x_i=X[:,i]=X[i*n+...]*/
 85 | 			{	double x=X[in+t];
 86 | 				for (j=0;j<lenA[t];j++)/*R{t}=R{t}+x(t)*x(Omega{t})*/
 87 | 		        	nR[j*n+t]+=x*X[in+nO[j*n+t]];
 88 | 			}
 89 | 			for(j=0;j<n;j++)
 90 | 			{/*coordinate-wise update for X[j,i]*/
 91 | 				/*     id=nO[j,:]
 92 | 				 *     p=norm(x(id))^2-x(j)^2-R{j}(d(j));
 93 |                        q=-x(id)'*R{j}+R{j}(d(j))*x(j); */
 94 | 	    	    p=q=0;
 95 | 		    	for (t=0;t<lenA[j];t++)
 96 | 				{
 97 | 					int tn=t*n;
 98 | 					p+=X[in+nO[tn+j]]*X[in+nO[tn+j]];
 99 | 					q-=X[in+nO[tn+j]]*nR[tn+j];
100 | 				}
101 | 				p-=X[in+j]*X[in+j]+nR[j+n*d[j]];
102 | 		    	q+=nR[d[j]*n+j]*X[in+j];
103 | 		    	X[in+j]=root_c(p,q);
104 | 	    	}
105 | 		    for (t=0;t<n;t++)
106 | 			{
107 | 				double x=X[in+t];
108 | 		        for (j=0;j<lenA[t];j++)
109 | 			        nR[j*n+t]-=x*X[nO[j*n+t]+in];
110 | 			}
111 | 		}
112 | 		printf("# %d: residue=%f\n",iter+1,residue(nR,normA,n,m,lenA));
113 | 	}
114 | }
115 | 
116 | /* the gateway function
117 | X=exactCDmex(nA,nR,nO,X0,lenA,d,normA,options.maxiter);
118 | */
119 | 
120 | void mexFunction( int nlhs, mxArray *plhs[],
121 |         int nrhs, const mxArray *prhs[])
122 | {
123 | 	double *values;
124 | 	double normA;
125 | 
126 | 	int n, k, m;
127 | 	int n_iter;
128 | 
129 | 	double* nA = mxGetPr(prhs[0]);
130 | 	n=mxGetM(prhs[0]);
131 | 	m=mxGetN(prhs[0]);
132 | 
133 | 	double * nR=mxGetPr(prhs[1]);
134 | 	values=mxGetPr(prhs[2]);
135 | 	int * nO=(int *)malloc(sizeof(int)*(n*m));
136 | 	int i;
137 | 	for (i=0;i<n*m;i++)
138 | 	    nO[i]=(int)(values[i]);
139 | 
140 | 	double * X0= mxGetPr(prhs[3]);
141 | 	k=mxGetN(prhs[3]);
142 | 	values=mxGetPr(prhs[4]);
143 | 	int * lenA=(int *)malloc(sizeof(int)*n);
144 | 
145 | 	for (i=0;i<n;++i)
146 | 	    lenA[i]=(int)(values[i]);
147 | 	values=mxGetPr(prhs[5]);
148 | 	int * d=(int *)malloc(sizeof(int)*n);
149 | 	for (i=0;i<n;++i)
150 | 	    d[i]=(int)(values[i]);
151 | 
152 | 	values=mxGetPr(prhs[6]);
153 | 	normA=values[0];
154 | 	values=mxGetPr(prhs[7]);
155 | 	n_iter=values[0];
156 | 
157 | 	iterations(nA,nR,nO,X0,n,m,k,lenA,n_iter,normA,d);
158 | 
159 |     /*  set the output pointer to the output matrix */
160 |     plhs[0] = mxCreateDoubleMatrix( n, k, mxREAL);
161 | 	double *X = mxGetPr(plhs[0]);
162 | 	for(i=0; i<n*k; ++i)
163 | 		X[i]=X0[i];
164 | 	free(nO);
165 | 	free(lenA);
166 | 	free(d);
167 |     return;
168 | 
169 | }
170 | 


--------------------------------------------------------------------------------
/SPIRAL/dtw_c.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2013 Quan Wang <wangq10@rpi.edu>,
  3 |  * Signal Analysis and Machine Perception Laboratory,
  4 |  * Department of Electrical, Computer, and Systems Engineering,
  5 |  * Rensselaer Polytechnic Institute, Troy, NY 12180, USA
  6 |  */
  7 | 
  8 | /**
  9 |  * This is the C/MEX code of dynamic time warping of two signals
 10 |  *
 11 |  * compile:
 12 |  *     mex dtw_c.c
 13 |  *
 14 |  * usage:
 15 |  *     d=dtw_c(s,t)  or  d=dtw_c(s,t,w)
 16 |  *     where s is signal 1, t is signal 2, w is window parameter
 17 |  */
 18 | 
 19 | #include "mex.h"
 20 | #include <stdlib.h>
 21 | #include <stdio.h>
 22 | #include <math.h>
 23 | 
 24 | double vectorDistance(double *s, double *t, int ns, int nt, int k, int i, int j)
 25 | {
 26 |     double result=0;
 27 |     double ss,tt;
 28 |     int x;
 29 |     for(x=0;x<k;x++)
 30 |     {
 31 |         ss=s[i+ns*x];
 32 |         tt=t[j+nt*x];
 33 |         result+=((ss-tt)*(ss-tt));
 34 |     }
 35 |     result=sqrt(result);
 36 |     return result;
 37 | }
 38 | 
 39 | double dtw_c(double *s, double *t, int w, int ns, int nt, int k)
 40 | {
 41 |     double d=0;
 42 |     int sizediff=ns-nt>0 ? ns-nt : nt-ns;
 43 |     double ** D;
 44 |     int i,j;
 45 |     int j1,j2;
 46 |     double cost,temp;
 47 | 
 48 | 
 49 |     if(w!=-1 && w<sizediff) w=sizediff;
 50 | 
 51 |     D=(double **)malloc((ns+1)*sizeof(double *));
 52 |     for(i=0;i<ns+1;i++)
 53 |     {
 54 |         D[i]=(double *)malloc((nt+1)*sizeof(double));
 55 |     }
 56 | 
 57 |     for(i=0;i<ns+1;i++)
 58 |     {
 59 |         for(j=0;j<nt+1;j++)
 60 |         {
 61 |             D[i][j]=-1;
 62 |         }
 63 |     }
 64 |     D[0][0]=0;
 65 | 
 66 | 	for(i=1;i<=ns;i++)
 67 |     {
 68 |         if(w==-1)
 69 |         {
 70 |             j1=1;
 71 |             j2=nt;
 72 |         }
 73 |         else
 74 |         {
 75 |             j1= i-w>1 ? i-w : 1;
 76 |             j2= i+w<nt ? i+w : nt;
 77 |         }
 78 |         for(j=j1;j<=j2;j++)
 79 |         {
 80 |             cost=vectorDistance(s,t,ns,nt,k,i-1,j-1);
 81 | 
 82 |             temp=D[i-1][j];
 83 |             if(D[i][j-1]!=-1)
 84 |             {
 85 |                 if(temp==-1 || D[i][j-1]<temp) temp=D[i][j-1];
 86 |             }
 87 |             if(D[i-1][j-1]!=-1)
 88 |             {
 89 |                 if(temp==-1 || D[i-1][j-1]<temp) temp=D[i-1][j-1];
 90 |             }
 91 | 
 92 |             D[i][j]=cost+temp;
 93 |         }
 94 |     }
 95 | 
 96 | 
 97 |     d=D[ns][nt];
 98 | 
 99 |     /* view matrix D */
100 |     /*
101 |     for(i=0;i<ns+1;i++)
102 |     {
103 |         for(j=0;j<nt+1;j++)
104 |         {
105 |             printf("%f  ",D[i][j]);
106 |         }
107 |         printf("\n");
108 |     }
109 |     */
110 | 
111 | 
112 |     for(i=0;i<ns+1;i++)
113 |     {
114 |         free(D[i]);
115 |     }
116 |     free(D);
117 | 
118 |     return d;
119 | }
120 | 
121 | /* the gateway function */
122 | void mexFunction( int nlhs, mxArray *plhs[],
123 |         int nrhs, const mxArray *prhs[])
124 | {
125 |     double *s,*t;
126 |     int w;
127 |     int ns,nt,k;
128 |     double *dp;
129 | 
130 |     /*  check for proper number of arguments */
131 |     if(nrhs!=2&&nrhs!=3)
132 |     {
133 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumInputs",
134 |                 "Two or three inputs required.");
135 |     }
136 |     if(nlhs>1)
137 |     {
138 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumOutputs",
139 |                 "dtw_c: One output required.");
140 |     }
141 | 
142 |     /* check to make sure w is a scalar */
143 |     if(nrhs==2)
144 |     {
145 |         w=-1;
146 |     }
147 |     else if(nrhs==3)
148 |     {
149 |         if( !mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) ||
150 |                 mxGetN(prhs[2])*mxGetM(prhs[2])!=1 )
151 |         {
152 |             mexErrMsgIdAndTxt( "MATLAB:dtw_c:wNotScalar",
153 |                     "dtw_c: Input w must be a scalar.");
154 |         }
155 | 
156 |         /*  get the scalar input w */
157 |         w = (int) mxGetScalar(prhs[2]);
158 |     }
159 | 
160 | 
161 |     /*  create a pointer to the input matrix s */
162 |     s = mxGetPr(prhs[0]);
163 | 
164 |     /*  create a pointer to the input matrix t */
165 |     t = mxGetPr(prhs[1]);
166 | 
167 |     /*  get the dimensions of the matrix input s */
168 |     ns = mxGetM(prhs[0]);
169 |     k = mxGetN(prhs[0]);
170 | 
171 |     /*  get the dimensions of the matrix input t */
172 |     nt = mxGetM(prhs[1]);
173 |     if(mxGetN(prhs[1])!=k)
174 |     {
175 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:dimNotMatch",
176 |                     "dtw_c: Dimensions of input s and t must match.");
177 |     }
178 | 
179 |     /*  set the output pointer to the output matrix */
180 |     plhs[0] = mxCreateDoubleMatrix( 1, 1, mxREAL);
181 | 
182 |     /*  create a C pointer to a copy of the output matrix */
183 |     dp = mxGetPr(plhs[0]);
184 | 
185 |     /*  call the C subroutine */
186 |     dp[0]=dtw_c(s,t,w,ns,nt,k);
187 | 
188 |     return;
189 | 
190 | }
191 | 


--------------------------------------------------------------------------------
/RWS/utilities/dtw_c.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright (C) 2013 Quan Wang <wangq10@rpi.edu>,
  3 |  * Signal Analysis and Machine Perception Laboratory,
  4 |  * Department of Electrical, Computer, and Systems Engineering,
  5 |  * Rensselaer Polytechnic Institute, Troy, NY 12180, USA
  6 |  */
  7 | 
  8 | /** 
  9 |  * This is the C/MEX code of dynamic time warping of two signals
 10 |  *
 11 |  * compile: 
 12 |  *     mex dtw_c.c
 13 |  *
 14 |  * usage:
 15 |  *     d=dtw_c(s,t)  or  d=dtw_c(s,t,w)
 16 |  *     where s is signal 1, t is signal 2, w is window parameter 
 17 |  */
 18 | 
 19 | #include "mex.h"
 20 | #include <stdlib.h>
 21 | #include <stdio.h>
 22 | #include <math.h>
 23 | 
 24 | double vectorDistance(double *s, double *t, int ns, int nt, int k, int i, int j)
 25 | {
 26 |     double result=0;
 27 |     double ss,tt;
 28 |     int x;
 29 |     for(x=0;x<k;x++)
 30 |     {
 31 |         ss=s[i+ns*x];
 32 |         tt=t[j+nt*x];
 33 |         result+=((ss-tt)*(ss-tt));
 34 |     }
 35 |     result=sqrt(result);
 36 |     return result;
 37 | }
 38 | 
 39 | double dtw_c(double *s, double *t, int w, int ns, int nt, int k)
 40 | {
 41 |     double d=0;
 42 |     int sizediff=ns-nt>0 ? ns-nt : nt-ns;
 43 |     double ** D;
 44 |     int i,j;
 45 |     int j1,j2;
 46 |     double cost,temp;
 47 |     
 48 |     // printf("ns=%d, nt=%d, w=%d, s[0]=%f, t[0]=%f\n",ns,nt,w,s[0],t[0]);
 49 |     
 50 |     
 51 |     if(w!=-1 && w<sizediff) w=sizediff; // adapt window size
 52 |     
 53 |     // create D
 54 |     D=(double **)malloc((ns+1)*sizeof(double *));
 55 |     for(i=0;i<ns+1;i++)
 56 |     {
 57 |         D[i]=(double *)malloc((nt+1)*sizeof(double));
 58 |     }
 59 |     
 60 |     // initialization
 61 |     for(i=0;i<ns+1;i++)
 62 |     {
 63 |         for(j=0;j<nt+1;j++)
 64 |         {
 65 |             D[i][j]=-1;
 66 |         }
 67 |     }
 68 |     D[0][0]=0;
 69 |     
 70 |     // dynamic programming
 71 |     for(i=1;i<=ns;i++)
 72 |     {
 73 |         if(w==-1)
 74 |         {
 75 |             j1=1;
 76 |             j2=nt;
 77 |         }
 78 |         else
 79 |         {
 80 |             j1= i-w>1 ? i-w : 1;
 81 |             j2= i+w<nt ? i+w : nt;
 82 |         }
 83 |         for(j=j1;j<=j2;j++)
 84 |         {
 85 |             cost=vectorDistance(s,t,ns,nt,k,i-1,j-1);
 86 |             
 87 |             temp=D[i-1][j];
 88 |             if(D[i][j-1]!=-1) 
 89 |             {
 90 |                 if(temp==-1 || D[i][j-1]<temp) temp=D[i][j-1];
 91 |             }
 92 |             if(D[i-1][j-1]!=-1) 
 93 |             {
 94 |                 if(temp==-1 || D[i-1][j-1]<temp) temp=D[i-1][j-1];
 95 |             }
 96 |             
 97 |             D[i][j]=cost+temp;
 98 |         }
 99 |     }
100 |     
101 |     
102 |     d=D[ns][nt];
103 |     
104 |     /* view matrix D */
105 |     /*
106 |     for(i=0;i<ns+1;i++)
107 |     {
108 |         for(j=0;j<nt+1;j++)
109 |         {
110 |             printf("%f  ",D[i][j]);
111 |         }
112 |         printf("\n");
113 |     }
114 |     */ 
115 |     
116 |     // free D
117 |     for(i=0;i<ns+1;i++)
118 |     {
119 |         free(D[i]);
120 |     }
121 |     free(D);
122 |     
123 |     return d;
124 | }
125 | 
126 | /* the gateway function */
127 | void mexFunction( int nlhs, mxArray *plhs[],
128 |         int nrhs, const mxArray *prhs[])
129 | {
130 |     double *s,*t;
131 |     int w;
132 |     int ns,nt,k;
133 |     double *dp;
134 |     
135 |     /*  check for proper number of arguments */
136 |     if(nrhs!=2&&nrhs!=3)
137 |     {
138 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumInputs",
139 |                 "Two or three inputs required.");
140 |     }
141 |     if(nlhs>1)
142 |     {
143 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumOutputs",
144 |                 "dtw_c: One output required.");
145 |     }
146 |     
147 |     /* check to make sure w is a scalar */
148 |     if(nrhs==2)
149 |     {
150 |         w=-1;
151 |     }
152 |     else if(nrhs==3)
153 |     {
154 |         if( !mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) ||
155 |                 mxGetN(prhs[2])*mxGetM(prhs[2])!=1 )
156 |         {
157 |             mexErrMsgIdAndTxt( "MATLAB:dtw_c:wNotScalar",
158 |                     "dtw_c: Input w must be a scalar.");
159 |         }
160 |         
161 |         /*  get the scalar input w */
162 |         w = (int) mxGetScalar(prhs[2]);
163 |     }
164 |     
165 |     
166 |     /*  create a pointer to the input matrix s */
167 |     s = mxGetPr(prhs[0]);
168 |     
169 |     /*  create a pointer to the input matrix t */
170 |     t = mxGetPr(prhs[1]);
171 |     
172 |     /*  get the dimensions of the matrix input s */
173 |     ns = mxGetM(prhs[0]);
174 |     k = mxGetN(prhs[0]);
175 |     
176 |     /*  get the dimensions of the matrix input t */
177 |     nt = mxGetM(prhs[1]);
178 |     if(mxGetN(prhs[1])!=k)
179 |     {
180 |         mexErrMsgIdAndTxt( "MATLAB:dtw_c:dimNotMatch",
181 |                     "dtw_c: Dimensions of input s and t must match.");
182 |     }  
183 |     
184 |     /*  set the output pointer to the output matrix */
185 |     plhs[0] = mxCreateDoubleMatrix( 1, 1, mxREAL);
186 |     
187 |     /*  create a C pointer to a copy of the output matrix */
188 |     dp = mxGetPr(plhs[0]);
189 |     
190 |     /*  call the C subroutine */
191 |     dp[0]=dtw_c(s,t,w,ns,nt,k);
192 |     
193 |     return;
194 |     
195 | }
196 | 


--------------------------------------------------------------------------------
/RunLinearSVMRWS.m:
--------------------------------------------------------------------------------
  1 | function RunLinearSVMRWS(DataSetStartIndex, DataSetEndIndex)  
  2 |  
  3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
  4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
  5 |     Datasets = {dir_struct(3:130).name};
  6 |                      
  7 |     % Sort Datasets
  8 |     
  9 |     [Datasets, DSOrder] = sort(Datasets);    
 10 | 
 11 |     Results = zeros(length(Datasets),11);
 12 |     
 13 |     addpath(genpath('LibLinear/matlab/.'));
 14 |     
 15 |     distcomp.feature( 'LocalUseMpiexec', false )
 16 |     
 17 |     %rng(ceil(DataSetStartIndex*100))
 18 |     %pause(100*rand);
 19 |         
 20 |     poolobj = gcp('nocreate');
 21 |     delete(poolobj);
 22 |     
 23 |     parpool(22);
 24 |     
 25 |     rng('default')
 26 |     
 27 |     for i = 1:length(Datasets)
 28 | 
 29 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
 30 | 
 31 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
 32 |                     DS = LoadUCRdataset(char(Datasets(i)));
 33 | 
 34 |                     
 35 |                     [Thebestcost2,Thebestacc2,Thebestiming2] = GridSearchLinearSVM2(-10,0.1,20,DS.TrainInstancesCount,DS.TrainClassLabels,Datasets,i);
 36 |                     
 37 |                     ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep')  );
 38 |                     
 39 |                     ZRepTrain = ZRep(1:DS.TrainInstancesCount,:);
 40 |                     ZRepTest = ZRep(DS.TrainInstancesCount+1:end,:);
 41 |                     
 42 |                     ZRepTrain = sparse(ZRepTrain);
 43 |                     ZRepTest = sparse(ZRepTest);
 44 |                       
 45 |                     tic;
 46 |                     cmd = ['-e 0.001 -s 2 -c ', num2str(2^Thebestcost2)];
 47 |                     model_precomputed = train(DS.TrainClassLabels, ZRepTrain, cmd);
 48 |                     
 49 |                     ModelTrainingRuntime = toc;
 50 |                     
 51 |                     tic;
 52 |                     
 53 |                     [predict_label_P, accuracy_P, dec_values_P] = predict(DS.TestClassLabels, ZRepTest, model_precomputed);
 54 |                     
 55 |                     PredictionRuntime = toc;
 56 |                     
 57 |                     Results(i,1) = 0;
 58 |                     
 59 |                     Results(i,2) = 0;
 60 |                     Results(i,3) = Thebestcost2;
 61 |                     %Results(i,4) = Thebestcost3;
 62 |                     Results(i,4) = 0;
 63 |                     
 64 |                     Results(i,5) = 0;
 65 |                     Results(i,6) = Thebestacc2*0.01;
 66 |                     %Results(i,7) = Thebestacc3*0.01;
 67 |                     Results(i,7) = 0;
 68 | 
 69 |                     %Results(i,8) = Thebestiming1+Thebestiming2+Thebestiming3;
 70 |                     Results(i,8) = Thebestiming2;
 71 |                     
 72 |                     Results(i,9) = accuracy_P(1)*0.01;
 73 |                     Results(i,10) = ModelTrainingRuntime;
 74 |                     Results(i,11) = PredictionRuntime;
 75 |                 
 76 |                     dlmwrite( strcat('RunLinearSVMRWS/','RunLinearSVMRWS', '_Dataset_', num2str(i)) , Results, 'delimiter', '\t');
 77 |            
 78 | 
 79 |             end
 80 |             
 81 |             
 82 |     end
 83 |     
 84 |     poolobj = gcp('nocreate');
 85 |     delete(poolobj);
 86 |     
 87 | end
 88 | 
 89 | function [Thebestcost,Thebestacc,Thebestiming] = GridSearchLinearSVM2(GridStart,GridStep,GridEnd,TrainInstancesCount,TrainClassLabels,Datasets,DatasetsNumber)
 90 | 
 91 |                     
 92 |                     % Tuning Parameters
 93 | 
 94 |                     log2cTmp = GridStart:GridStep:GridEnd; 
 95 | 
 96 |                     bestacc = zeros(1,length(log2cTmp));
 97 |                     bestcost = zeros(1,length(log2cTmp));
 98 |                     besttiming = zeros(1,length(log2cTmp));
 99 |                     
100 |                     ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(DatasetsNumber)),'/','RWS_Supervised', '.Zrep')  );
101 |                     
102 |                     ZRepTrain = ZRep(1:TrainInstancesCount,:);
103 |                     ZRepTrain = sparse(ZRepTrain);
104 |                       
105 |                       % grid search
106 |                       parfor log2cNEW = 1:length(log2cTmp)
107 |                         
108 |                         log2cNEW
109 |                         tic;
110 |                         log2c = log2cTmp(log2cNEW);
111 |                         cmd = ['-q -e 0.001 -s 2 -v ' num2str(10) ' -c ', num2str(2^log2c)];
112 |                         cv = train(TrainClassLabels, ZRepTrain, cmd);
113 |                           
114 |                         bestacc(log2cNEW) = cv;
115 |                         bestcost(log2cNEW) = log2c; 
116 |                         besttiming(log2cNEW) = toc;
117 | 
118 |                       end
119 | 
120 | 
121 |                     [Maxbestacc,~] = max(bestacc);
122 |                     Posbestacc = find(bestacc==Maxbestacc,1,'last');
123 |                     
124 |                     Thebestiming = sum(besttiming);
125 |                     Thebestcost = bestcost(Posbestacc);
126 |                     Thebestacc = Maxbestacc;
127 | 
128 | 
129 | end
130 | 
131 | 


--------------------------------------------------------------------------------
/RunLinearSVMSPIRAL.m:
--------------------------------------------------------------------------------
  1 | function RunLinearSVMSPIRAL(DataSetStartIndex, DataSetEndIndex)  
  2 |  
  3 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
  4 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
  5 |     Datasets = {dir_struct(3:130).name};
  6 |                      
  7 |     % Sort Datasets
  8 |     
  9 |     [Datasets, DSOrder] = sort(Datasets);    
 10 | 
 11 |     Results = zeros(length(Datasets),11);
 12 |     
 13 |     addpath(genpath('LibLinear/matlab/.'));
 14 |     
 15 |     distcomp.feature( 'LocalUseMpiexec', false )
 16 |     
 17 |     %rng(ceil(DataSetStartIndex*100))
 18 |     %pause(100*rand);
 19 |         
 20 |     poolobj = gcp('nocreate');
 21 |     delete(poolobj);
 22 |     
 23 |     parpool(22);
 24 |     
 25 |     rng('default')
 26 |     
 27 |     for i = 1:length(Datasets)
 28 | 
 29 |             if (i>=DataSetStartIndex && i<=DataSetEndIndex)
 30 | 
 31 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
 32 |                     DS = LoadUCRdataset(char(Datasets(i)));
 33 | 
 34 |                     
 35 |                     [Thebestcost2,Thebestacc2,Thebestiming2] = GridSearchLinearSVM2(-10,0.1,20,DS.TrainInstancesCount,DS.TrainClassLabels,Datasets,i);
 36 |                     
 37 |                     
 38 |                     ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep')  );
 39 |                     
 40 |                     ZRepTrain = ZRep(1:DS.TrainInstancesCount,:);
 41 |                     ZRepTest = ZRep(DS.TrainInstancesCount+1:end,:);
 42 |                     
 43 |                     ZRepTrain = sparse(ZRepTrain);
 44 |                     ZRepTest = sparse(ZRepTest);
 45 |                       
 46 |                     tic;
 47 |                     cmd = ['-e 0.001 -s 2 -c ', num2str(2^Thebestcost2)];
 48 |                     model_precomputed = train(DS.TrainClassLabels, ZRepTrain, cmd);
 49 |                     
 50 |                     ModelTrainingRuntime = toc;
 51 |                     
 52 |                     tic;
 53 |                     
 54 |                     [predict_label_P, accuracy_P, dec_values_P] = predict(DS.TestClassLabels, ZRepTest, model_precomputed);
 55 |                     
 56 |                     PredictionRuntime = toc;
 57 |                     
 58 |                     Results(i,1) = 0;
 59 |                     
 60 |                     Results(i,2) = 0;
 61 |                     Results(i,3) = Thebestcost2;
 62 |                     %Results(i,4) = Thebestcost3;
 63 |                     Results(i,4) = 0;
 64 |                     
 65 |                     Results(i,5) = 0;
 66 |                     Results(i,6) = Thebestacc2*0.01;
 67 |                     %Results(i,7) = Thebestacc3*0.01;
 68 |                     Results(i,7) = 0;
 69 | 
 70 |                     %Results(i,8) = Thebestiming1+Thebestiming2+Thebestiming3;
 71 |                     Results(i,8) = Thebestiming2;
 72 |                     
 73 |                     Results(i,9) = accuracy_P(1)*0.01;
 74 |                     Results(i,10) = ModelTrainingRuntime;
 75 |                     Results(i,11) = PredictionRuntime;
 76 |                 
 77 |                     dlmwrite( strcat('RunLinearSVMSPIRAL/','RunLinearSVMSPIRAL', '_Dataset_', num2str(i)) , Results, 'delimiter', '\t');
 78 |            
 79 | 
 80 |             end
 81 |             
 82 |             
 83 |     end
 84 |     
 85 |     poolobj = gcp('nocreate');
 86 |     delete(poolobj);
 87 |     
 88 | end
 89 | 
 90 | function [Thebestcost,Thebestacc,Thebestiming] = GridSearchLinearSVM2(GridStart,GridStep,GridEnd,TrainInstancesCount,TrainClassLabels,Datasets,DatasetsNumber)
 91 | 
 92 |                     
 93 |                     % Tuning Parameters
 94 | 
 95 |                     log2cTmp = GridStart:GridStep:GridEnd; 
 96 | 
 97 |                     bestacc = zeros(1,length(log2cTmp));
 98 |                     bestcost = zeros(1,length(log2cTmp));
 99 |                     besttiming = zeros(1,length(log2cTmp));
100 |                     
101 |                     ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(DatasetsNumber)),'/','SIDLREPRESENTATIONS', '.Zrep')  );
102 |                     
103 |                     ZRepTrain = ZRep(1:TrainInstancesCount,:);
104 |                     ZRepTrain = sparse(ZRepTrain);
105 |                       
106 |                       % grid search
107 |                       parfor log2cNEW = 1:length(log2cTmp)
108 |                         
109 |                         log2cNEW
110 |                         tic;
111 |                         log2c = log2cTmp(log2cNEW);
112 |                         cmd = ['-q -e 0.001 -s 2 -v ' num2str(10) ' -c ', num2str(2^log2c)];
113 |                         cv = train(TrainClassLabels, ZRepTrain, cmd);
114 |                           
115 |                         bestacc(log2cNEW) = cv;
116 |                         bestcost(log2cNEW) = log2c; 
117 |                         besttiming(log2cNEW) = toc;
118 | 
119 |                       end
120 | 
121 | 
122 |                     [Maxbestacc,~] = max(bestacc);
123 |                     Posbestacc = find(bestacc==Maxbestacc,1,'last');
124 |                     
125 |                     Thebestiming = sum(besttiming);
126 |                     Thebestcost = bestcost(Posbestacc);
127 |                     Thebestacc = Maxbestacc;
128 | 
129 | 
130 | end
131 | 
132 | 


--------------------------------------------------------------------------------
/dtw.c:
--------------------------------------------------------------------------------
  1 | #include "mex.h"
  2 | 
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <math.h>
  6 | #include <time.h>
  7 | 
  8 | #define min(x, y) ((x)<(y)?(x):(y))
  9 | #define max(x, y) ((x)>(y)?(x):(y))
 10 | #define dist(x, y)((x-y)*(x-y))
 11 | 
 12 | 
 13 | #define INF 1e20       /*Pseudo Infitinte number for this code */
 14 | 
 15 | 
 16 | 
 17 | /*
 18 |  Calculate Dynamic Time Wrapping distance
 19 |  A,B: data and query, respectively
 20 |  r  : size of Sakoe-Chiba warpping band */
 21 | double dtw(double* A, double* B, int m, double *radius, int *path1, int *path2, int *pLen) {
 22 |     
 23 |     double *cost;
 24 |     double *cost_prev;
 25 |     double *cost_tmp;
 26 |     int i, j, k;
 27 |     double x, y, z;
 28 |     double final_dtw ;
 29 |     int r;
 30 |     
 31 |     int **trace;
 32 |     
 33 |     r = (int)radius[0];
 34 |     
 35 |     /* Traceback matrix width m x 2r+1 */
 36 |     trace = (int **)malloc(sizeof(int *)*m);
 37 |     for (i=0;i<m;i++)
 38 |         trace[i] = (int *)malloc(sizeof(int)*(2*r+1));
 39 |     
 40 |     /* Instead of using matrix of size O(m^2) or O(mr), we will reuse two array of size O(r). */
 41 |     cost = (double*)malloc(sizeof(double)*(2*r+1));
 42 |     for(k=0; k<2*r+1; k++)    cost[k]=INF;
 43 |     
 44 |     cost_prev = (double*)malloc(sizeof(double)*(2*r+1));
 45 |     for(k=0; k<2*r+1; k++)    cost_prev[k]=INF;
 46 |     
 47 |     for (i=0; i<m; i++) {
 48 |         k = max(0, r-i);
 49 |         
 50 |         for(j=max(0, i-r); j<=min(m-1, i+r); j++, k++) {
 51 |             /* Initialize all row and column */
 52 |             if ((i==0)&&(j==0)) {
 53 |                 cost[k]=dist(A[0], B[0]);
 54 |                 trace[0][r] = 0; /* 0: left 1: diag 2:up */
 55 |                 continue;
 56 |             }
 57 |             
 58 |             /* Left */
 59 |             if ((j-1<0)||(k-1<0))     y = INF;
 60 |             else                      y = cost[k-1];
 61 |             /* Up */
 62 |             if ((i-1<0)||(k+1>2*r))   x = INF;
 63 |             else                      x = cost_prev[k+1];
 64 |             /* Diagonal */
 65 |             if ((i-1<0)||(j-1<0))     z = INF;
 66 |             else                      z = cost_prev[k];
 67 |             
 68 |             /* Classic DTW calculation */
 69 |             cost[k] = min( min( x, y) , z) + dist(A[i], B[j]);
 70 |             /* Let's store the path information */
 71 |             if      (x <= min(y, z))
 72 |                 trace[i][k]= 2; /* up */
 73 |             else if (y <= min(x, z))
 74 |                 trace[i][k]=0; /* left */
 75 |             else
 76 |                 trace[i][k]=1; /* diag */
 77 |         }
 78 |         
 79 |         
 80 |         
 81 |         /* Move current array to previous array. */
 82 |         cost_tmp = cost;
 83 |         cost = cost_prev;
 84 |         cost_prev = cost_tmp;
 85 |     }
 86 |     k--;
 87 |     
 88 |     /* the DTW distance is in the last cell in the matrix of size O(m^2) or at the middle of our array. */
 89 |     final_dtw = cost_prev[k];
 90 |     free(cost);
 91 |     free(cost_prev);
 92 |     
 93 |     /* Print trace matrix */
 94 |     /* for (i=0;i<m;i++)
 95 |      * { for (j=0;j<2*r+1;j++)
 96 |      * printf("%3d ",trace[i][j]);
 97 |      * printf("\n");
 98 |      * } */
 99 |     /* Trace back */
100 |     i = m - 1 ;
101 |     j = r ;
102 |     path1[0] = i; path2[0] = j + i - r ;
103 |     /* printf("Sim [%3d %3d] Trace [%3d,%3d] = %3d\n",i,j+i-r,i,j,trace[i][j]); */
104 |     for (k=1;  !(i == 0 && j == r) ; k++) {
105 |         if      (trace[i][j] == 0) { j--;} /* left */
106 |         else if (trace[i][j] == 1) { i--; } /* diag */
107 |         else                       { i--; j++;} /* up */
108 |         path1[k] = i ;
109 |         path2[k] = j + i - r ;
110 |         /*printf("Sim [%3d %3d] Trace [%3d,%3d] = %3d\n",i,j+i-r,i,j,trace[i][j]); */
111 |     }
112 |     *pLen = k--;
113 |     
114 |     for (i=0;i<m;i++)
115 |         free(trace[i]);
116 |     free(trace);
117 |     
118 |     return sqrt(final_dtw);
119 | }
120 | 
121 | void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
122 |     double *q, *c, *r, *d;
123 |     int ql, cl;
124 |     int *path1, *path2;
125 |     double *mxPath1, *mxPath2;
126 |     int pLen, i;
127 |     
128 |     /* check number of inputs and outputs */
129 |     if (nrhs != 3) {
130 |         mexErrMsgTxt("This function requires 3 input arguments.");
131 |     } else if (nlhs > 3) {
132 |         mexErrMsgTxt("This function only returns 3 output value.");
133 |     }
134 |     
135 |     /* retrieve input arguments */
136 |     q = mxGetPr(prhs[0]);    /* pointer to real values of first  argument  */
137 |     c = mxGetPr(prhs[1]);    /* pointer to real values of second argument */
138 |     r = mxGetPr(prhs[2]);    /* pointer to real value  of third  argument   */
139 |     
140 |     /* check series lengths */
141 |     ql = mxGetNumberOfElements(prhs[0]);
142 |     cl = mxGetNumberOfElements(prhs[1]);
143 |     if (abs(ql - cl) > r[0]) {
144 |         mexErrMsgTxt("Actual distance falls outside radius constraint.");
145 |     }
146 |     
147 |     /* allocate memory for the return value */
148 |     plhs[0] = mxCreateDoubleMatrix(1, 1,    mxREAL);
149 |     
150 |     path1 = (int *)malloc(ql*(2*((int)r[0])+1)* sizeof(int));
151 |     path2 = (int *)malloc(ql*(2*((int)r[0])+1)* sizeof(int));
152 |     
153 |     /* printf("Query Length:%d Path Length:%d\n", ql,ql*(2*((int)r[0])+1)); */
154 |     d = mxGetPr(plhs[0]);    /* pointer to Matlab managed memory for result */
155 |     
156 |     d[0]=dtw(q, c, ql, r, path1, path2, &pLen);
157 |     
158 |     /* printf("Path length %d\n",pLen); */
159 |     
160 |     plhs[1] = mxCreateDoubleMatrix(1, pLen, mxREAL);
161 |     plhs[2] = mxCreateDoubleMatrix(1, pLen, mxREAL);
162 |     
163 |     mxPath1 = mxGetPr(plhs[1]);
164 |     mxPath2 = mxGetPr(plhs[2]);
165 |     
166 |     for (i=0; i < pLen ; i++) {
167 |         mxPath1[i] = path1[i] + 1 ; /* 1 based indexing */
168 |         mxPath2[i] = path2[i] + 1 ; /* 1 based indexing */
169 |     }
170 |     
171 |     free(path1);
172 |     free(path2);
173 | }
174 | 


--------------------------------------------------------------------------------
/RunDictEvaluation.m:
--------------------------------------------------------------------------------
  1 | function RunDictEvaluation(DataSetStartIndex, DataSetEndIndex, Method, gamma)  
  2 |         
  3 |     Methods = [cellstr('Random'), 'KShape', 'AFKMC2', 'GibbsDPP','SRFT','LevScore','Gaussian'];
  4 | 
  5 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
  6 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
  7 |     Datasets = {dir_struct(3:130).name};
  8 |                              
  9 |     % Sort Datasets
 10 |     [Datasets, DSOrder] = sort(Datasets);   
 11 |     
 12 |     addpath(genpath('NystromBestiary/.'));
 13 |     
 14 |     for i = 1:length(Datasets)
 15 | 
 16 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
 17 | 
 18 |                     Results = zeros(length(Datasets),4);
 19 |                 
 20 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
 21 |                     DS = LoadUCRdataset(char(Datasets(i)));
 22 |                     % Get Kernel Matrix
 23 |                     
 24 |                     KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix') );
 25 |                     
 26 |                     NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100);
 27 |                     
 28 |                     Runtime = 0;
 29 |                     for rep = 1 : 10
 30 |                         rep
 31 |                         rng(rep);
 32 |                         
 33 |                         if Method==1
 34 |                             Dictionary = dlmread( strcat( 'DICTIONARIESRANDOM/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') );
 35 |                         elseif Method==2
 36 |                             Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') );
 37 |                         elseif Method==3
 38 |                             Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char('KShape'), '_', num2str(rep) ,'.KppCentroids') );                            
 39 |                         elseif Method==4
 40 |                             Dictionary = dlmread( strcat( 'DICTIONARIESGIBBSDPP/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') );
 41 |                         elseif Method==5
 42 |                             
 43 |                             tic;
 44 |                             in.A = KM;
 45 |                             in.linearkernelflag = 0;
 46 |                             in.k = 5;
 47 |                             in.l = NumOfSamples;
 48 |                             in.q = 1;
 49 |                             out = srft_Nystrom(in);
 50 |                             Runtime = Runtime + toc;
 51 |                             
 52 |                         elseif Method==6
 53 |                             
 54 |                             tic;
 55 |                             in.A = KM;
 56 |                             in.linearkernelflag = 0;
 57 |                             in.k = 5;
 58 |                             in.l = NumOfSamples;
 59 |                             in.q = 1;
 60 |                             
 61 |                             [U, Sigma] = orderedeigs(in.A, in.k+1);
 62 |                              U1t = U(:, 1:in.k)';
 63 |                              levscores = sum(U1t.*U1t);
 64 |                              in.levscorecomputationtime = 0;
 65 |                              in.levscoreprobs = levscores/in.k;
 66 |                             
 67 |                              out = levscore_Nystrom(in);
 68 |                              
 69 |                              Runtime = Runtime + toc;
 70 |                              
 71 |                          elseif Method==7
 72 |                             
 73 |                             tic;
 74 |                             in.A = KM;
 75 |                             in.linearkernelflag = 0;
 76 |                             in.k = 5;
 77 |                             in.l = NumOfSamples;
 78 |                             in.q = 1;
 79 |                             out = gaussian_Nystrom(in);    
 80 |                             Runtime = Runtime + toc;
 81 |                         end
 82 |                         
 83 |                         if Method==5
 84 |                             [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv);
 85 |                         elseif Method==6
 86 |                             [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv);
 87 |                         elseif Method==7
 88 |                             [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv);
 89 |                         else
 90 |                             [AbsFroError,RelFroError,NormFroError] = NystromMatrixDictionary(KM, DS.Data, Dictionary, gamma);
 91 |                         end
 92 |                         ResultsTmp = [AbsFroError,RelFroError,NormFroError,Runtime];
 93 |                           
 94 |                         %
 95 |                         Results(i,:) = Results(i,:) + ResultsTmp;
 96 |                         %if rep==10
 97 |                         %    ResultsRep10 = Results(i,:) ./ 10;
 98 |                         %    dlmwrite( strcat( 'EvaluateDictionaries/','RESULTS_EvaluateDictionaries_10Rep_', char(Methods(Method)), '_', num2str(gamma), '_' ,num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex) ,'.results'), ResultsRep10, 'delimiter', '\t');
 99 |                         %
100 |                         %end
101 |                     end
102 |                     Results(i,:) = Results(i,:) ./ 10;
103 |                     
104 |                     dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunDictEvaluation/','RunDictEvaluation_10Rep_', char(Methods(Method)), '_', num2str(gamma), '_' ,num2str(i) ,'.results'), Results, 'delimiter', '\t');
105 |    
106 |             end
107 |             
108 |             
109 |     end
110 |     
111 | end


--------------------------------------------------------------------------------
/kShapeCentroids.m:
--------------------------------------------------------------------------------
  1 | function [mem,cent,iter,sumd, centKpp, centKppSmplPoints, DistValues, DistShifts,DistComp,RT1,DistComp2,RT2] = kShapeCentroids(A, K, Seeding)
  2 | % A = nXm : n # of time series; m length
  3 | % K clusters
  4 | 
  5 | DistComp=0;
  6 | DistComp2=0;
  7 | centKpp = [];
  8 | centKppSmplPoints = [];
  9 | 
 10 | n=size(A, 1);
 11 | 
 12 | if Seeding==1
 13 |     tic;
 14 |     [centKpp,centKppSmplPoints,DistComp2] = Seeding_SBD(A, K, 10);
 15 |     RT2 = toc;
 16 |     cent = centKpp;
 17 |     DistComp=DistComp+DistComp2;
 18 |     [~, ~, ~, mem] = Cent2Membership(A, cent, 2);
 19 | else
 20 |    mem = ceil(K*rand(n, 1)); 
 21 |    cent = zeros(K, size(A, 2));
 22 | end
 23 | 
 24 | %n=size(A, 1);
 25 | %mem = ceil(K*rand(m, 1));
 26 | %cent = zeros(K, size(A, 2));
 27 | 
 28 | DistValues = zeros(n,K);
 29 | DistShifts = zeros(n,K);
 30 | tic;
 31 | for iter = 1:100
 32 |     disp(iter);
 33 |     prev_mem = mem;
 34 |     
 35 |     for k = 1:K
 36 |         [centTmp,DistComp3] = kshape_centroid(mem, A, k, cent(k,:));  
 37 |         cent(k,:) = centTmp';
 38 |         %DistComp=DistComp+DistComp3; Computing it twice - this can be
 39 |         %optimized
 40 |     end
 41 |     
 42 |     for i = 1:n
 43 |         for k = 1:K
 44 |             
 45 |             [dist, shift, yshift]= SBD(A(i,:), zscore(cent(k,:)));
 46 |             DistComp=DistComp+1;
 47 |             DistValues(i,k) = dist;
 48 |             DistShifts(i,k) = shift;
 49 |             
 50 |         end
 51 |     end
 52 |     
 53 |     [val mem] = min(DistValues,[],2);
 54 |     sumd = sum(val);
 55 |     if norm(prev_mem-mem) == 0
 56 |         break;
 57 |     end
 58 | end
 59 | RT1 = toc;
 60 | end
 61 | 
 62 | function [ksc,DistComp] = kshape_centroid(mem, A, k, cur_center)
 63 | %Computes ksc centroid
 64 | a = [];
 65 | DistComp=0;
 66 | for i=1:length(mem)
 67 |     if mem(i) == k
 68 |         if sum(cur_center) == 0
 69 |             opt_a = A(i,:);
 70 |         else
 71 |              [~, ~, opt_a] = SBD(zscore(cur_center), A(i,:));
 72 |              DistComp=DistComp+1;
 73 |         end
 74 |         a = [a; opt_a];
 75 |     end
 76 | end
 77 | 
 78 | if size(a,1) == 0;
 79 |     %ksc = zeros(1, size(A,2));     
 80 |     permed_index = randperm(size(A,1));
 81 |     ksc = A(permed_index(1),:);
 82 |     return;
 83 | elseif size(a,1) == 1;
 84 |     ksc = a;
 85 |     return;
 86 | end
 87 | 
 88 | [~, ncolumns]=size(a);
 89 | [Y,~,~] = zscore(a,[],2);
 90 | P = (eye(ncolumns) - 1 / ncolumns * ones(ncolumns));
 91 | ksc = (sum(Y)*P)/norm(sum(Y)*P);
 92 | 
 93 | ksc = zscore(ksc);
 94 | 
 95 | end
 96 | 
 97 | function  [C,SmplPoints,DistComp] = Seeding_SBD(A, k, m)
 98 | % Calculate AFK-MC2 centers and distances, with correlation distance
 99 | % Usage: [centers] = kmc2(A, k, m)
100 | %   A is d x n data matrix, where d is #objects and n is #timeperiods
101 | %   k is desired numbered of centers
102 | %   m is chain length (if <0, then expressed as percent of n timeperiods)
103 | % Author: Terence Lim
104 | % Original paper/code by Bachem, Lucic, Hassani and Krause "Fast and
105 | %   Provably Good Seedings for k-Means"
106 | 
107 |   DistComp = 0;
108 |   n = size(A,2);  % n columns of timeseries length
109 |   d = size(A,1);  % d rows of objects
110 |   if (m < 1)      % chain length expressed as % of objects
111 |     m = ceil(m * d);
112 |   end
113 |   SmplPoints = [ceil(d * rand)];
114 |   C = A(ceil(d * rand), :);    % sample first center
115 | 
116 |   q = Data2Centroids_SBD(A, C);   % compute proposal (already squared euclidean)
117 |   
118 |   DistComp = DistComp + size(A,1)*(size(C,1));
119 |   
120 |   q(find(isnan(q))) = 1;
121 |   if (sum(q) == 0)
122 |     q = repmat(1/d, size(q,1),size(q,2));
123 |   else
124 |     q = (q / sum(q)) + (1 / d); 
125 |   end;
126 |   q = q / sum(q);
127 | 
128 |   for i=1:(k-1)  % sequentially pick centers
129 |     cand_ind = randsample(d, m, true, q);
130 |     q_cand = q(cand_ind);                  % extract proposal probability
131 |     p_cand = Data2Centroids_SBD(A(cand_ind,:), C);  % compute potentials
132 |     
133 |     DistComp = DistComp + size(A(cand_ind,:),1)*(size(C,1));
134 |     
135 |     rand_a = random('unif',0,1,m,1);       % compute acceptance probabilities
136 |     for j=1:m                              % mix up to chain length m
137 |       cand_prob = p_cand(j)/q_cand(j);
138 |       if (j == 1 | curr_prob == 0.0 | cand_prob/curr_prob > rand_a(j))
139 |         curr_ind = j;
140 |         curr_prob = cand_prob;
141 |       end
142 |     end
143 |     SmplPoints(i+1) = cand_ind(curr_ind);
144 |     C(i+1,:) = A(cand_ind(curr_ind),:);
145 |   end
146 | end
147 | 
148 | function [vals, classes, distances, sumd] = Data2Centroids_SBD(A, c)
149 | %   A is d x n data matrix
150 | %   C is k x n centroids
151 | %  Returns dx1 class labels, dxk distances to every center in c,
152 | %   kx1 sumd within-cluster sum of distances
153 | %  Author: Terence Lim
154 | 
155 | d = size(A,1);  % number of data objects
156 | k = size(c,1);  % number of clusters
157 | n = size(A,2);  % lengths of time series
158 | distances = zeros(d,k);
159 | sumd = zeros(k,1);
160 | 
161 | for i=1:d 
162 |   %  if (rem(i,1000)==0) fprintf(1,'i=%d\n',i); end;
163 |   for j=1:k
164 |     [r shift] = max( NCCc(A(i,:),c(j,:)) );
165 |     distances(i,j) = 1 - r;
166 |   end
167 | end
168 | [vals, classes] = min(distances,[],2);
169 | for i=1:k
170 |   sumd(i,1) = sum(vals(classes==i));
171 | end
172 | 
173 | end
174 | 
175 | function [SSError, MSError, STDError, labels] = Cent2Membership(A, Centroids, DistanceIndex)
176 | %   A is d x n data matrix
177 | %   Centroids is k x n centroids
178 | %   Distance is 1 for ED and 2 for SBD
179 | %   SSError is the sum of distances
180 | %   labels is the cluster membership
181 | 
182 | d = size(A,1);  % number of data objects
183 | k = size(Centroids,1);  % number of clusters
184 | 
185 | distances = zeros(d,k);
186 | 
187 | for i=1:d 
188 |   for j=1:k
189 |     if DistanceIndex==1
190 |         distances(i,j) = ED(A(i,:),Centroids(j,:));
191 |     elseif DistanceIndex==2
192 |         distances(i,j) = 1-max(NCCc(A(i,:),Centroids(j,:)));
193 |     end
194 |   end
195 | end
196 | 
197 | [vals, labels] = min(distances,[],2);
198 | 
199 | SSError = sum(vals);
200 | MSError = mean(vals);
201 | STDError = std(vals);
202 | 
203 | end
204 | 


--------------------------------------------------------------------------------
/RWS/rws_gridsearch_CV_mulvar.m:
--------------------------------------------------------------------------------
  1 | % This script generates low-rank approximation of latent kernel matrix using 
  2 | % random features approach based on dtw like distance for multi-variate 
  3 | % time-series datasets. User Liblinear to perform grid search with 
  4 | % K-fold cross-validation!
  5 | %
  6 | % Author: Lingfei Wu
  7 | % Date: 01/20/2019
  8 | 
  9 | 
 10 | clear,clc
 11 | parpool('local');
 12 | addpath(genpath('utilities'));
 13 | file_dir = './datasets/';
 14 | 
 15 | % List all datasets
 16 | filename_list = {'auslan'};
 17 | 
 18 | DMin = 1;
 19 | DMax_list = [5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100];
 20 | sigma_list = [1e-3 3e-3 1e-2 3e-2 0.10 0.14 0.19 0.28 0.39 0.56 ...
 21 |     0.79 1.12 1.58 2.23 3.16 4.46 6.30 8.91 10 31.62 1e2 3e2 1e3];
 22 | 
 23 | R = 32; % number of random time-series generated
 24 | CV = 10; % number of folders of cross validation
 25 | for jjj = 1:length(filename_list)
 26 |     filename = filename_list{jjj};
 27 |     disp(filename);
 28 |     
 29 |     info.aveAccu_best = 0;
 30 |     info.valAccuHist = [];
 31 |     info.DMaxHist = [];
 32 |     info.sigmaHist = [];
 33 |     info.lambda_invHist = [];
 34 |     for jj = 1:length(DMax_list)
 35 |     for j = 1:length(sigma_list)
 36 |         DMax = DMax_list(jj)
 37 |         sigma = sigma_list(j)
 38 |         
 39 |         % load, shuffle, and prepare the training data
 40 |         timer_start = tic;
 41 |         Data = load(strcat(file_dir,filename,'/',filename,'.mat'));
 42 |         trainX = Data.train_X;
 43 |         trainy = Data.train_Y;
 44 |         N = size(trainX,1);
 45 |         trainData = zeros(N, R+1);
 46 |         shuffle_index = randperm(N);
 47 |         trainX = trainX(shuffle_index); % shuffle the data
 48 |         trainy = trainy(shuffle_index);
 49 |         % generate random time series with variable length, where each
 50 |         % value in random series is sampled from Gaussian distribution
 51 |         % parameterized by sigma. 
 52 |         rng('default')
 53 |         sampleX = cell(R,1);
 54 |         d = size(trainX{1},1); % number of variates
 55 |         for i=1:R
 56 |             D = randi([DMin, DMax],1);
 57 |             sampleX{i} = randn(d, D)./sigma; % gaussian
 58 |         end
 59 |         trainFeaX_random = dtw_similarity_cell_mulvar(trainX, sampleX);
 60 |         trainFeaX_random = trainFeaX_random/sqrt(R); 
 61 |         trainData(:,2:end) = trainFeaX_random;
 62 |         % convert user labels to uniform format binary(-1,1) & 
 63 |         % multiclasses (1,2,..,k)
 64 |         labels = unique(trainy);
 65 |         numClasses = length(labels);
 66 |         if numClasses > 2
 67 |             for i=numClasses:-1:1
 68 |                 ind = (trainy == labels(i));
 69 |                 trainy(ind) = i;
 70 |             end
 71 |         else
 72 |             ind = (trainy == labels(1));
 73 |             trainy(ind) = -1;
 74 |             ind = (trainy == labels(2));
 75 |             trainy(ind) = 1;
 76 |         end
 77 |         trainData(:,1) = trainy;
 78 |         telapsed_fea_gen = toc(timer_start)
 79 | 
 80 |         disp('------------------------------------------------------');
 81 |         disp('LIBLinear performs basic grid search by varying lambda');
 82 |         disp('------------------------------------------------------');
 83 |         % Linear Kernel
 84 |         lambda_inverse = [1e-5 1e-4 1e-3 1e-2 1e-1 1 1e1 1e2 1e3 1e4 1e5];
 85 |         for i=1:length(lambda_inverse)
 86 |             valAccu = zeros(1, CV);
 87 |             for cv = 1:CV
 88 |                 subgroup_start = (cv-1) * floor(N/CV);
 89 |                 mod_remain = mod(N, CV);
 90 |                 div_remain = floor(N/CV);
 91 |                 if  mod_remain >= cv
 92 |                     subgroup_start = subgroup_start + cv;
 93 |                     subgroup_end = subgroup_start + div_remain;
 94 |                 else
 95 |                     subgroup_start = subgroup_start + mod_remain + 1;
 96 |                     subgroup_end = subgroup_start + div_remain -1;
 97 |                 end
 98 |                 test_indRange = subgroup_start:subgroup_end;
 99 |                 train_indRange = setdiff(1:N, test_indRange);
100 |                 trainFeaX = trainData(train_indRange,2:end);
101 |                 trainy = trainData(train_indRange,1);
102 |                 testFeaX = trainData(test_indRange,2:end);
103 |                 testy = trainData(test_indRange,1);
104 |                 
105 |                 s2 = num2str(lambda_inverse(i));
106 |                s1 = '-s 2 -e 0.0001 -q -c '; % for regular liblinear
107 | %                 s1 = '-s 2 -e 0.0001 -n 8 -q -c '; % for omp version
108 |                 s = [s1 s2];
109 |                 timer_start = tic;
110 |                 model_linear = train(trainy, sparse(trainFeaX), s);
111 |                 [test_predict_label, test_accuracy, test_dec_values] = ...
112 |                     predict(testy, sparse(testFeaX), model_linear);
113 |                 telapsed_liblinear = toc(timer_start);
114 |                 valAccu(cv) = test_accuracy(1);
115 |             end
116 |             ave_valAccu = mean(valAccu);
117 |             std_valAccu = std(valAccu);
118 |             if(info.aveAccu_best+0.1 < ave_valAccu)
119 |                 info.DMaxHist = [info.DMaxHist;DMax];
120 |                 info.sigmaHist = [info.sigmaHist;sigma];
121 |                 info.lambda_invHist = [info.lambda_invHist;lambda_inverse(i)];
122 |                 info.valAccuHist = [info.valAccuHist;valAccu];
123 |                 info.valAccu = valAccu;
124 |                 info.aveAccu_best = ave_valAccu;
125 |                 info.stdAccu = std_valAccu;
126 |                 info.telapsed_fea_gen = telapsed_fea_gen;
127 |                 info.telapsed_liblinear = telapsed_liblinear;
128 |                 info.runtime = telapsed_fea_gen + telapsed_liblinear;
129 |                 info.sigma = sigma;
130 |                 info.R = R;
131 |                 info.DMin = DMin;
132 |                 info.DMax = DMax;
133 |                 info.lambda_inverse = lambda_inverse(i);
134 |             end
135 |         end
136 |     end
137 |     end
138 |     disp(info);
139 |     savefilename = [filename '_rws_R' num2str(R) '_' num2str(CV) 'fold_CV'];
140 |     save(savefilename,'info')
141 | end
142 | delete(gcp);
143 | 


--------------------------------------------------------------------------------
/RunVisualization.m:
--------------------------------------------------------------------------------
 1 | function RunVisualization(DataSetStartIndex, DataSetEndIndex, Method, RepType)  
 2 |     
 3 |     Methods = [cellstr('Random'), 'KShape'];
 4 |     Types = [cellstr('ZExact'), 'Z5', 'Z10', 'Z20', 'Z99per', 'Z95per', 'Z90per', 'Z85per', 'Z80per'];
 5 | 
 6 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
 7 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
 8 |     Datasets = {dir_struct(3:130).name};
 9 |                              
10 |     % Sort Datasets
11 |     [Datasets, DSOrder] = sort(Datasets);    
12 | 	
13 |     Results = zeros(length(Datasets),3);
14 |     
15 |     for i = 1:length(Datasets)
16 | 
17 |             if (i>=DataSetStartIndex & i<=DataSetEndIndex)
18 | 
19 |                     disp(['Dataset being processed: ', char(Datasets(i))]);
20 |                     DS = LoadUCRdataset(char(Datasets(i)));
21 |                     % Get Kernel Matrix
22 |                     
23 |                     gamma = 10;
24 |                     
25 |                     KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix') );
26 |                     
27 |                     tic;
28 |                     [EigenVectors,ProjDataOriginal] = OriginalKPCA(KM);
29 |                     RTOriginalKPCA = toc;
30 |                     
31 |                     
32 |                     for rep = 1 : 10
33 |                         rep
34 |                         rng(rep);
35 |                         
36 |                         % Extract Sample Points
37 | 
38 |                         ZExact = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Zexact')  );
39 |                         
40 |                         Z5 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop5')  );
41 |                         Z10 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop10')  );
42 |                         Z20 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop20')  );
43 | 
44 |                         Z98per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z98per')  );
45 |                         Z95per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z95per')  );
46 |                         Z90per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z90per')  );
47 |                         Z85per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z85per')  );
48 |                         Z80per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z80per')  );                 
49 |                         
50 |                         
51 |                         
52 |                         tic;
53 |                         if RepType == 1
54 |                             [ApproxEigVectors,ProjDataApprox] = NystromKPCA(ZExact);
55 |                         elseif RepType == 2
56 |                             [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z5);
57 |                         elseif RepType == 3
58 |                             [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z10);
59 |                         elseif RepType == 4
60 |                             [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z20);
61 |                         elseif RepType == 5
62 |                                 [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z98per);
63 |                         elseif RepType == 6
64 |                                 [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z95per);
65 |                         elseif RepType == 7
66 |                                 [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z90per);
67 |                         elseif RepType == 8
68 |                                 [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z85per);
69 |                         elseif RepType == 9    
70 |                                 [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z80per);
71 |                         end
72 | 
73 |                         RTApproximatelKPCA = toc;
74 |                         
75 |                         dlmwrite( strcat( 'RunVisualizationVectors/','RESULTS_RunVisualization_', num2str(i), '_', num2str(i), '_', char(Methods(Method)), '_', char(Types(RepType)) ,'.Vectors'), ApproxEigVectors, 'delimiter', '\t');
76 |    
77 |                         % Evaluate SmplPoints in terms of clustering
78 |                         % measures (e.g., SSE, RandIndex, NystromAppx)
79 |                         
80 |                         %Error = Arccos dot(u,v)/(norm(u)*norm(v))
81 |                         %AppxError = acos(dot(EigenVectors(:,1),ApproxEigVectors(:,1))/(norm(EigenVectors(:,1))*norm(ApproxEigVectors(:,1))));
82 |                         AppxError = ( norm(ProjDataOriginal*ProjDataOriginal'-ProjDataApprox*ProjDataApprox','fro') );
83 |                         
84 |                         ResultsTmp = [AppxError,RTApproximatelKPCA,RTOriginalKPCA];
85 |                            
86 |                         %
87 |                         Results(i,:) = Results(i,:) + ResultsTmp;
88 |                     end
89 |                     Results(i,:) = Results(i,:) ./ 10;
90 |            
91 |             end
92 |             
93 |             dlmwrite( strcat( 'RunVisualization/','RESULTS_RunVisualization_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex), '_', char(Methods(Method)), '_', char(Types(RepType)) ,'.results'), Results, 'delimiter', '\t');
94 |    
95 |     end
96 |     
97 | end


--------------------------------------------------------------------------------
/RunDictLearning.m:
--------------------------------------------------------------------------------
  1 | function RunDictLearning(DataSetStartIndex, DataSetEndIndex, Method, RepStartIndex, RepEndIndex)  
  2 |     
  3 |     Methods = [cellstr('Random'), 'KShape', 'GibbsDPP'];
  4 |     
  5 |     % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets
  6 |     dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/');
  7 |     Datasets = {dir_struct(3:130).name};
  8 |                      
  9 |     % Sort Datasets
 10 |     
 11 |     [Datasets, DSOrder] = sort(Datasets); 
 12 |     
 13 |     for i = 1:length(Datasets)
 14 |         if (i>=DataSetStartIndex & i<=DataSetEndIndex)
 15 | 
 16 |             disp(['Dataset being processed: ', char(Datasets(i))]);
 17 |             DS = LoadUCRdataset(char(Datasets(i)));
 18 |             
 19 |             for rep = 1 : 10
 20 |                 
 21 |                 if (rep>=RepStartIndex & rep<=RepEndIndex)
 22 |                         
 23 |                     rep
 24 |                     rng(rep);
 25 |                     
 26 |                     NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100);
 27 |                         
 28 |                         
 29 |                         if Method==1
 30 |                             tic;
 31 |                             permed_index = randperm(DS.DataInstancesCount);
 32 |                             Dictionary = DS.Data(permed_index(1:NumOfSamples),:);
 33 |                             timing = toc;
 34 |                         elseif Method==2
 35 |                             sumdtmp=Inf;
 36 |                             for Repetion=1:3
 37 |                             
 38 |                                 %tic;
 39 |                                 [mem,Dictionary,iter,sumd,centKpp,centKppSmplPoints,DistValues,DistShifts,DistComp,RuntimekShape,DistCompSeed,RuntimeSeed] = kShapeCentroids(DS.Data, NumOfSamples, 1);
 40 |                                 %timing = toc;
 41 |                                 
 42 |                                 if sumd<sumdtmp
 43 |                                    BestDictionary = Dictionary; 
 44 |                                    BestcentKpp = centKpp;
 45 |                                    BestcentKppSmplPoints = centKppSmplPoints;
 46 |                                    BesttimingKShape = RuntimekShape;
 47 |                                    BestDistComp = DistComp;
 48 |                                    BestDistCompSeed = DistCompSeed;
 49 |                                    BesttimingSeed = RuntimeSeed;
 50 |                                    sumdtmp = sumd;
 51 |                                 end
 52 |                                 
 53 |                             end
 54 |                             
 55 |                         elseif Method==3
 56 |                                 KM = dlmread( strcat( 'DistanceMatrices/',char(Datasets(i)),'/', char(Datasets(i)), '_SBD.distmatrix'));
 57 |                                 KM = DM2KM(KM);
 58 |                                 tic;
 59 |                                 [SmplPoints,DistComp] = GibbsDPP(KM, 1000, NumOfSamples);
 60 |                                 Dictionary = DS.Data(SmplPoints,:);
 61 |                                 BestDistComp = DistComp;
 62 |                                 timing = toc;
 63 | 
 64 |                         end
 65 |                         
 66 | 
 67 |                             if Method==1
 68 |                                 Centroids = Dictionary;
 69 |                                 ClustRuntime = timing;
 70 |                             elseif Method==2 
 71 |                                 Centroids = BestDictionary;
 72 |                                 KppCentroids = BestcentKpp;
 73 |                                 KppSmplPoints = BestcentKppSmplPoints;
 74 |                                 ClustRuntime = BesttimingKShape;
 75 |                                 DistComputation = BestDistComp;
 76 |                                 DistComputationSeed = BestDistCompSeed;
 77 |                                 SeedRuntime = BesttimingSeed;
 78 |                             elseif Method==3
 79 |                                 Centroids = Dictionary;
 80 |                                 ClustRuntime = timing;  
 81 |                                 DistComputation = BestDistComp;
 82 |                             end
 83 | 
 84 |                     
 85 |                     if Method==1
 86 |                         dlmwrite( strcat( 'DICTIONARIESRANDOM/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary'), Centroids, 'delimiter', '\t');
 87 |                         dlmwrite( strcat( 'RunDictLearning/','RunDLFixedSamples', '_', char(Methods(Method)),'_', num2str(i), '_', num2str(rep) ,'.Statistics'), ClustRuntime, 'delimiter', '\t');
 88 |                      elseif Method==2
 89 |                         dlmwrite( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary'), Centroids, 'delimiter', '\t');
 90 |                         dlmwrite( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.KppCentroids'), KppCentroids, 'delimiter', '\t');
 91 |                         dlmwrite( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.KppSmplPoints'), KppSmplPoints, 'delimiter', '\t');
 92 |                         dlmwrite( strcat( 'RunDictLearning/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(i) ,'_', num2str(rep) ,'.Statistics'), [ClustRuntime,SeedRuntime,DistComputation,DistComputationSeed], 'delimiter', '\t');
 93 |                     elseif Method==3
 94 |                         dlmwrite( strcat( 'DICTIONARIESGIBBSDPP/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary'), Centroids, 'delimiter', '\t');
 95 |                         dlmwrite( strcat( 'RunDictLearning/','RunDLFixedSamples', '_', char(Methods(Method)),'_', num2str(i), '_', num2str(rep) ,'.Statistics'), [ClustRuntime,DistComputation], 'delimiter', '\t');
 96 |                     end
 97 |                     
 98 |                     
 99 |                     
100 |                 end
101 |             end
102 | 
103 |        end
104 |             
105 |     end
106 |     
107 | end


--------------------------------------------------------------------------------