├── ED.m ├── dtw.mexa64 ├── SPIRAL ├── dtw_c.mexa64 ├── exactCDmex.mexa64 ├── README.md ├── matrix_completion_sparse_mex.m ├── construct_sparse.m ├── exactCDmex.c └── dtw_c.c ├── RWS ├── utilities │ ├── svm-scale │ ├── dtw_c.mexa64 │ ├── train.mexa64 │ ├── dtw_c.mexmaci64 │ ├── predict.mexa64 │ ├── svmtrain.mexa64 │ ├── train.mexmaci64 │ ├── libsvmread.mexa64 │ ├── predict.mexmaci64 │ ├── svmpredict.mexa64 │ ├── train_omp.mexa64 │ ├── libsvmread.mexmaci64 │ ├── libsvmwrite.mexa64 │ ├── predict_omp.mexa64 │ ├── svmpredict.mexmaci64 │ ├── svmtrain.mexmaci64 │ ├── libsvmwrite.mexmaci64 │ ├── demo_dtw.m │ ├── dtw_m.m │ ├── svm-scale-README │ └── dtw_c.c ├── Gun_Point_rws_R128_10fold_CV.mat ├── dtw_similarity_cell_mulvar.m ├── dtw_similarity_cell.m ├── rws_GenFea_example_mulvar.m ├── rws_GenFea_example.m ├── rws_GenFea_mulvar.m ├── rws_VaryingR_CV_R128.m ├── rws_VaryingR_CV_R128_mulvar.m ├── README.md └── rws_gridsearch_CV_mulvar.m ├── SINK.m ├── DM2KM.m ├── OriginalKPCA.m ├── SINKCompressed.m ├── NystromKPCA.m ├── NCCc.m ├── SBD.m ├── KernelKmeansClustering.m ├── normalizedata.m ├── cDTW.m ├── KMCompSINK.m ├── KMCompSINK_TestToTrain.m ├── KMCompGAK_TestToTrain.m ├── RandIndex.m ├── KMCompSINKCompressed.m ├── NystromMatrixGivenWandE.m ├── DMComp.m ├── KernelSCApprox.m ├── GibbsDPP.m ├── KMCompGAK.m ├── KMCompSINK_TrainToTrain.m ├── OneNNClassifierED.m ├── OneNNClassifierSBD.m ├── OneNNClassifierDTW.m ├── OneNNClassifierGAK.m ├── NCCcCompressed.m ├── OneNNClassifierSBDCompressed.m ├── LoadUCRdataset.m ├── LeaveOneOutClassifierZREP.m ├── TestVarianceExact.m ├── kMeans.m ├── OneNNClassifierZREP.m ├── SPIRALRepLearning.m ├── LOOClassifierDTW.m ├── SIDL ├── README ├── LICENSE ├── op_shift.m ├── unsup_obj.m ├── update_S.m ├── USIDL.m ├── main_example.m └── update_A_par.m ├── RunOneNNED.m ├── RunOneNNGAKTiming.m ├── RunOneNNSBD.m ├── RunSPIRALRepLearning.m ├── NystromMatrixDictionary.m ├── RunKMCompSINK.m ├── RunOneNNDTW.m ├── RunDMComp.m ├── FrequentDirections.m ├── RunClusteringKShape.m ├── RunSIDLRepLearning.m ├── RunKMCompSINKCompressed.m ├── RunClusteringKShapeORIGINAL.m ├── RunClusteringKMeans.m ├── kShape.m ├── SIDLRepLearning.m ├── RWSRepLearning.m ├── RunRWSRepLearning.m ├── RunOneNNSBDCompressed.m ├── RunClusteringSPIRAL.m ├── CollectStatistics.m ├── RepLearnKM.m ├── RunClusteringSIDL.m ├── RunKMCompSINKSPLIT.m ├── RunLOOCandOneNNDTW.m ├── TestVarianceApproximate.m ├── RunClusteringRWS.m ├── DatasetToFourier.m ├── RunTestVarianceApproximate.m ├── RepLearnFINAL.m ├── RepLearnFINALSINKComp.m ├── RunKMCompGAK.m ├── RunOneNNSINKCompressed.m ├── RunTestVarianceExact.m ├── RunRepLearningKM.m ├── mySVD.m ├── OneNNClassifierLB.m ├── RunLinearSVMRWS.m ├── RunLinearSVMSPIRAL.m ├── dtw.c ├── RunDictEvaluation.m ├── kShapeCentroids.m ├── RunVisualization.m └── RunDictLearning.m /ED.m: -------------------------------------------------------------------------------- 1 | function Dist = ED(x,y) 2 | 3 | Dist = sqrt(sum((x - y).^2)); 4 | 5 | end -------------------------------------------------------------------------------- /dtw.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/dtw.mexa64 -------------------------------------------------------------------------------- /SPIRAL/dtw_c.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/SPIRAL/dtw_c.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/svm-scale: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svm-scale -------------------------------------------------------------------------------- /SPIRAL/exactCDmex.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/SPIRAL/exactCDmex.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/dtw_c.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/dtw_c.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/train.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/dtw_c.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/dtw_c.mexmaci64 -------------------------------------------------------------------------------- /RWS/utilities/predict.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/svmtrain.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmtrain.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/train.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train.mexmaci64 -------------------------------------------------------------------------------- /RWS/utilities/libsvmread.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmread.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/predict.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict.mexmaci64 -------------------------------------------------------------------------------- /RWS/utilities/svmpredict.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmpredict.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/train_omp.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/train_omp.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/libsvmread.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmread.mexmaci64 -------------------------------------------------------------------------------- /RWS/utilities/libsvmwrite.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmwrite.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/predict_omp.mexa64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/predict_omp.mexa64 -------------------------------------------------------------------------------- /RWS/utilities/svmpredict.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmpredict.mexmaci64 -------------------------------------------------------------------------------- /RWS/utilities/svmtrain.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/svmtrain.mexmaci64 -------------------------------------------------------------------------------- /RWS/Gun_Point_rws_R128_10fold_CV.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/Gun_Point_rws_R128_10fold_CV.mat -------------------------------------------------------------------------------- /RWS/utilities/libsvmwrite.mexmaci64: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TheDatumOrg/grail-matlab/HEAD/RWS/utilities/libsvmwrite.mexmaci64 -------------------------------------------------------------------------------- /SINK.m: -------------------------------------------------------------------------------- 1 | function sim = SINK(x,y,gamma) 2 | % Shift INvariant Kernel 3 | 4 | sim = SumExpNCCc(x,y,gamma)/sqrt(SumExpNCCc(x,x,gamma) * SumExpNCCc(y,y,gamma)); 5 | 6 | end 7 | 8 | function sim = SumExpNCCc(x,y,gamma) 9 | 10 | sim = sum(exp(gamma*NCCc(x,y))); 11 | 12 | end -------------------------------------------------------------------------------- /DM2KM.m: -------------------------------------------------------------------------------- 1 | function DM = DM2KM(DM) 2 | % DM is nXn distance matrix: n are # of time series 3 | 4 | [n, ~]=size(DM); 5 | 6 | sigma = mean(mean(DM)); 7 | 8 | for i=1:n 9 | for j=1:n 10 | DM(i,j) = exp(-DM(i,j).^2/(2*sigma^2)); 11 | end 12 | end 13 | 14 | end -------------------------------------------------------------------------------- /OriginalKPCA.m: -------------------------------------------------------------------------------- 1 | function [U,ProjData] = OriginalKPCA(K) 2 | N=size(K,1); 3 | K_Centered=K - (2/N)*ones(N,N)*K + ((1/N)*ones(N,N))*K*((1/N)*ones(N,N)); 4 | 5 | [U,L] = eig(K_Centered); 6 | 7 | [va, dex] = sort(diag(L),'descend'); 8 | U = real(U(:, dex)); 9 | 10 | ProjData = K_Centered*U; 11 | 12 | end -------------------------------------------------------------------------------- /SINKCompressed.m: -------------------------------------------------------------------------------- 1 | function sim = SINKCompressed(x,y, gamma, k) 2 | % Shift INvariant Kernel 3 | 4 | sim = SumExpNCCcCompressed(x,y,gamma,k)/sqrt(SumExpNCCcCompressed(x,x,gamma,k) * SumExpNCCcCompressed(y,y,gamma,k)); 5 | 6 | end 7 | 8 | function sim = SumExpNCCcCompressed(x,y,gamma,k) 9 | 10 | sim = sum(exp(gamma*NCCcCompressed(x,y,k))); 11 | 12 | end -------------------------------------------------------------------------------- /NystromKPCA.m: -------------------------------------------------------------------------------- 1 | function [V,ProjData] = NystromKPCA(Z) 2 | 3 | Z = Z - repmat(mean(Z), size(Z,1), 1); 4 | 5 | [BSketch, ~] = FrequentDirections(Z, ceil(0.5*size(Z,2))); 6 | NewL = BSketch'*BSketch; 7 | 8 | [U,L] = eig(NewL); 9 | 10 | V = Z * U * L^(-1/2); 11 | [va, dex] = sort(diag(L),'descend'); 12 | V = real(V(:, dex)); 13 | 14 | ProjData = Z*U; 15 | 16 | 17 | 18 | end -------------------------------------------------------------------------------- /NCCc.m: -------------------------------------------------------------------------------- 1 | function cc_sequence = NCCc(x,y) 2 | 3 | if isrow(y) 4 | y=y'; 5 | end 6 | if isrow(x) 7 | x=x'; 8 | end 9 | 10 | len = max(length(x),length(y)); 11 | 12 | fftlength = 2^nextpow2(2*len-1); 13 | 14 | r = ifft( fft(x,fftlength) .* conj(fft(y,fftlength)) ); 15 | 16 | r = [r(end-len+2:end) ; r(1:len)]; 17 | 18 | cc_sequence = r./(norm(x)*norm(y)); 19 | 20 | end -------------------------------------------------------------------------------- /SBD.m: -------------------------------------------------------------------------------- 1 | function [dist shift yshift]= SBD(x,y) 2 | 3 | if iscolumn(x) 4 | x=x'; 5 | end 6 | if iscolumn(y) 7 | y=y'; 8 | end 9 | 10 | X1=NCCc(x,y); 11 | 12 | [m,d]=max(X1); 13 | 14 | shift=d-max(length(x),length(y)); 15 | 16 | if shift < 0 17 | yshift = [y(-shift + 1:end) zeros(1, -shift)]; 18 | else 19 | yshift = [zeros(1,shift) y(1:end-shift) ]; 20 | end 21 | 22 | dist = 1-m; 23 | 24 | end -------------------------------------------------------------------------------- /KernelKmeansClustering.m: -------------------------------------------------------------------------------- 1 | function mem = KernelKmeansClustering(ZExact, ZReduced, k) 2 | 3 | try 4 | SmplPoints = DualDPP_FD(ZReduced, k); 5 | catch 6 | SmplPoints = DualDPP_FD(ZExact, k); 7 | end 8 | 9 | 10 | try 11 | mem = kmeans(ZReduced,k,'Start',ZReduced(SmplPoints,:)); 12 | catch 13 | mem = kmeans(ZExact,k,'Start',ZExact(SmplPoints,:)); 14 | end 15 | 16 | 17 | end -------------------------------------------------------------------------------- /normalizedata.m: -------------------------------------------------------------------------------- 1 | function norm_data = normalizedata(test,scaling) 2 | 3 | norm_data = zeros(size(test,1),size(test,2)); 4 | 5 | for i=1:size(test,1) 6 | 7 | 8 | minvalue = min(test(i,:))-min(test(i,:))*scaling; 9 | maxvalue = max(test(i,:))+max(test(i,:))*scaling; 10 | 11 | norm_data(i,:) = (test(i,:) - minvalue) / ( maxvalue - minvalue ); 12 | norm_data(i,:) = 1-norm_data(i,:); 13 | end 14 | 15 | 16 | end -------------------------------------------------------------------------------- /cDTW.m: -------------------------------------------------------------------------------- 1 | function Dist=cDTW(t,r,W) 2 | 3 | if iscolumn(t) 4 | t=t'; 5 | end 6 | if iscolumn(r) 7 | r=r'; 8 | end 9 | [rows,N]=size(t); 10 | [rows,M]=size(r); 11 | 12 | D=ones(N+1,M+1)*inf; 13 | 14 | D(1,1) = 0; 15 | for i=2:N+1 16 | for j=max(2, i-W):min(M+1, i+W) 17 | cost = (t(i-1)-r(j-1))^2; 18 | D(i,j)=cost+min([D(i-1,j),D(i-1,j-1),D(i,j-1)]); 19 | end 20 | end 21 | Dist=sqrt(D(N+1, M+1)); 22 | end -------------------------------------------------------------------------------- /KMCompSINK.m: -------------------------------------------------------------------------------- 1 | function [KM, DistComp] = KMCompSINK(X,gamma) 2 | 3 | [m, ~] = size(X); 4 | 5 | KM = ones(m,m); 6 | 7 | DistComp = 0; 8 | 9 | for i=1:m-1 10 | disp(i) 11 | rowi = X(i,:); 12 | for j=i+1:m 13 | rowj = X(j,:); 14 | KM(i,j) = SINK(rowi,rowj,gamma); 15 | DistComp = DistComp+1; 16 | KM(j,i) = KM(i,j); 17 | end 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /KMCompSINK_TestToTrain.m: -------------------------------------------------------------------------------- 1 | function [KM,DistComp] = KMCompSINK_TestToTrain(X,Y,sigma) 2 | 3 | [nrowsX, ~]=size(X); 4 | [nrowsY, ~]=size(Y); 5 | 6 | KM = zeros(nrowsX,nrowsY); 7 | 8 | DistComp = 0; 9 | for i=1:nrowsX 10 | disp(i); 11 | tmpX = X(i,:); 12 | for j=1:nrowsY 13 | KM(i,j) = SINK(tmpX,Y(j,:),sigma); 14 | DistComp = DistComp+1; 15 | end 16 | end 17 | end -------------------------------------------------------------------------------- /KMCompGAK_TestToTrain.m: -------------------------------------------------------------------------------- 1 | function [KM,DistComp] = KMCompGAK_TestToTrain(X,Y,sigma) 2 | 3 | [nrowsX, ~]=size(X); 4 | [nrowsY, ~]=size(Y); 5 | 6 | KM = zeros(nrowsX,nrowsY); 7 | 8 | DistComp = 0; 9 | for i=1:nrowsX 10 | disp(i); 11 | tmpX = X(i,:); 12 | parfor j=1:nrowsY 13 | KM(i,j) = logGAK(tmpX',Y(j,:)',sigma,0); 14 | DistComp = DistComp+1; 15 | end 16 | end 17 | end -------------------------------------------------------------------------------- /RandIndex.m: -------------------------------------------------------------------------------- 1 | function RI=RandIndex(c1,c2) 2 | C=Contingency(c1,c2); 3 | 4 | n=sum(sum(C)); 5 | nis=sum(sum(C,2).^2); 6 | njs=sum(sum(C,1).^2); 7 | 8 | t1=nchoosek(n,2); 9 | t2=sum(sum(C.^2)); 10 | t3=.5*(nis+njs); 11 | 12 | A=t1+t2-t3; 13 | 14 | RI=A/t1; 15 | end 16 | 17 | function Cont=Contingency(Mem1,Mem2) 18 | Cont=zeros(max(Mem1),max(Mem2)); 19 | 20 | for i = 1:length(Mem1); 21 | Cont(Mem1(i),Mem2(i))=Cont(Mem1(i),Mem2(i))+1; 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /KMCompSINKCompressed.m: -------------------------------------------------------------------------------- 1 | function [KM, DistComp] = KMCompSINKCompressed(X,gamma,k) 2 | 3 | [m, ~] = size(X); 4 | 5 | KM = ones(m,m); 6 | 7 | DistComp = 0; 8 | 9 | for i=1:m-1 10 | disp(i) 11 | rowi = X(i,:); 12 | for j=i+1:m 13 | rowj = X(j,:); 14 | KM(i,j) = SINKCompressed(rowi,rowj, gamma, k); 15 | DistComp = DistComp+1; 16 | KM(j,i) = KM(i,j); 17 | end 18 | end 19 | 20 | end -------------------------------------------------------------------------------- /NystromMatrixGivenWandE.m: -------------------------------------------------------------------------------- 1 | function [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, C, Winv) 2 | % KM: nXn kernel matrix, where n # of time series of m length 3 | % Dictionary: kxm matrxi containing the dictionary atoms 4 | % Absolute and Relative errors for Nystrom Approximation 5 | [nrowsX, ncolumnsX] = size(KM); 6 | 7 | KMtilde = C*Winv*C'; 8 | 9 | AbsFroError = ( norm(KM-KMtilde,'fro') ); 10 | RelFroError = ( norm(KM-KMtilde,'fro')/norm(KM,'fro') ); 11 | NormFroError = ( norm(KM-KMtilde,'fro')/nrowsX^2); 12 | 13 | end 14 | -------------------------------------------------------------------------------- /DMComp.m: -------------------------------------------------------------------------------- 1 | function [DM, DistComp] = DMComp(X, DistanceIndex) 2 | % X is mXn matrix: m are # of time series 3 | 4 | [m, ~]=size(X); 5 | 6 | DM = zeros(m,m); 7 | 8 | DistComp = 0; 9 | 10 | for i=1:m-1 11 | for j=i+1:m 12 | if DistanceIndex==1 13 | DM(i,j) = ED(X(i,:),X(j,:)); 14 | elseif DistanceIndex==2 15 | DM(i,j) = 1-max( NCCc(X(i,:),X(j,:)) ); 16 | end 17 | DistComp = DistComp+1; 18 | 19 | DM(j,i) = DM(i,j); 20 | end 21 | end 22 | 23 | end -------------------------------------------------------------------------------- /KernelSCApprox.m: -------------------------------------------------------------------------------- 1 | function mem = KernelSCApprox(Z, k) 2 | 3 | if size(Z,2) best_so_far 18 | class = DS.TrainClassLabels(i); 19 | best_so_far = distance; 20 | end 21 | end 22 | 23 | if (DS.TestClassLabels(id) == class) 24 | acc = acc + 1; 25 | end 26 | end 27 | 28 | acc = acc / DS.TestInstancesCount; 29 | end 30 | -------------------------------------------------------------------------------- /NCCcCompressed.m: -------------------------------------------------------------------------------- 1 | function cc_sequence = NCCcCompressed(x, y, k) 2 | % x is a time series 3 | % y is a time series 4 | % k is the # of Fourier coefficients to keep 5 | 6 | if isrow(x) 7 | x=x'; 8 | end 9 | if isrow(y) 10 | y=y'; 11 | end 12 | 13 | len = max(length(x),length(y)); 14 | 15 | fftlength = 2^nextpow2(2*len-1); 16 | 17 | FFTx = leading_fourier(fft(x',fftlength),k); 18 | FFTy = leading_fourier(fft(y',fftlength),k); 19 | 20 | r = ifft( FFTx.' .* conj(FFTy.') ); 21 | 22 | r = [r(end-len+2:end) ; r(1:len)]; 23 | 24 | cc_sequence = r./(norm(x)*norm(y)); 25 | 26 | end 27 | 28 | function x = leading_fourier(x, k) 29 | % leading_fourier(x,k) returns leading k and trailing k-1 (real is symmetric) coeffs 30 | % by zeroing out middle window and renormalizing 31 | m = floor(size(x, 2) / 2) + 1; 32 | x((k+1):(m - 1 + m - k)) = 0; 33 | end -------------------------------------------------------------------------------- /SPIRAL/matrix_completion_sparse_mex.m: -------------------------------------------------------------------------------- 1 | function X=matrix_completion_sparse_mex(A,d,Omega,X0,options) 2 | % matrix completion: 3 | % A- given matrix, each row only has the nonzeros indices; 4 | % d-diagonal indices of A 5 | % Omega- visible indices:consists of n vectors, must be symmetric; 6 | % X0 initial-all zeros 7 | 8 | % preprocessing: 9 | %mex exactCDmex.c 10 | fprintf('Step 2: matrix factorization...\n'); 11 | n=size(A,2); 12 | m=0; 13 | lenA=zeros(n,1); 14 | for i=1:n 15 | %if (length(A{i})>m) 16 | % m=length(A{i}); 17 | %end 18 | lenA(i)=length(A{i}); 19 | end 20 | 21 | m=max(lenA); 22 | 23 | nA=zeros(n,m); 24 | nO=nA; 25 | for i=1:n 26 | nA(i,1:length(A{i}))=A{i}; 27 | nO(i,1:length(A{i}))=Omega{i}-1; 28 | end 29 | d=d-1; 30 | nR=nA; 31 | k=size(X0,2); 32 | X=exactCDmex(nA,nR,nO,X0,lenA,d,norm(nA,'fro'),options.maxiter); 33 | 34 | end 35 | 36 | -------------------------------------------------------------------------------- /OneNNClassifierSBDCompressed.m: -------------------------------------------------------------------------------- 1 | function acc = OneNNClassifierSBDCompressed(DS,numofcoeffs) 2 | 3 | acc = 0; 4 | 5 | for id = 1 : DS.TestInstancesCount 6 | disp(id); 7 | classify_this = DS.Test(id,:); 8 | 9 | best_so_far = inf; 10 | 11 | for i = 1 : DS.TrainInstancesCount 12 | 13 | compare_to_this = DS.Train(i,:); 14 | 15 | distance = 1-max( NCCcCompressed(compare_to_this, classify_this, numofcoeffs)); 16 | 17 | if distance < best_so_far 18 | class = DS.TrainClassLabels(i); 19 | best_so_far = distance; 20 | end 21 | end 22 | 23 | if (DS.TestClassLabels(id) == class) 24 | acc = acc + 1; 25 | end 26 | end 27 | 28 | acc = acc / DS.TestInstancesCount; 29 | end 30 | -------------------------------------------------------------------------------- /RWS/utilities/demo_dtw.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2013 Quan Wang , 2 | % Signal Analysis and Machine Perception Laboratory, 3 | % Department of Electrical, Computer, and Systems Engineering, 4 | % Rensselaer Polytechnic Institute, Troy, NY 12180, USA 5 | 6 | % this is a demo showing the use of our dynamic time warping package 7 | % we provide both Matlab version and C/MEX version 8 | % the C/MEX version is much faster and highly recommended 9 | 10 | clear;clc;close all; 11 | 12 | mex dtw_c.c; 13 | 14 | a=rand(500,3); 15 | b=rand(520,3); 16 | w=50; 17 | 18 | tic; 19 | d1=dtw_m(a,b,w); 20 | t1=toc; 21 | 22 | tic; 23 | d2=dtw_c(a,b,w); 24 | t2=toc; 25 | 26 | tic; 27 | d3=dtw(a',b',w); 28 | t3=toc; 29 | 30 | fprintf('Using Matlab dtw: distance=%f, running time=%f\n',d1,t1); 31 | fprintf('Using C/MEX dtw: distance=%f, running time=%f\n',d2,t2); 32 | fprintf('Using Matlab Internal dtw: distance=%f, running time=%f\n',d3,t3); 33 | -------------------------------------------------------------------------------- /LoadUCRdataset.m: -------------------------------------------------------------------------------- 1 | function DS = LoadUCRdataset(datasetname) 2 | 3 | TRAIN = load(['/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/',datasetname,'/',datasetname,'_TRAIN']); 4 | TEST = load(['/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/',datasetname,'/',datasetname,'_TEST']); 5 | 6 | %rng('default'); 7 | %Train_numSamples = size(TRAIN,1); 8 | %TRAIN = TRAIN(randperm(Train_numSamples),:); % shuffle the data 9 | 10 | TRAIN_labels = TRAIN(:,1); 11 | TRAIN(:,1) = []; 12 | TEST_labels = TEST(:,1); 13 | TEST(:,1) = []; 14 | 15 | DS.TrainClassLabels = TRAIN_labels; 16 | DS.TestClassLabels = TEST_labels; 17 | DS.DataClassLabels = [TRAIN_labels;TEST_labels]; 18 | 19 | DS.Train = TRAIN; 20 | DS.Test = TEST; 21 | DS.Data = [TRAIN;TEST]; 22 | 23 | DS.ClassNames = unique(TRAIN_labels); 24 | 25 | DS.TrainInstancesCount = length(DS.Train(:,1)); 26 | DS.TestInstancesCount = length(DS.Test(:,1)); 27 | DS.DataInstancesCount = length(DS.Data(:,1)); 28 | end -------------------------------------------------------------------------------- /LeaveOneOutClassifierZREP.m: -------------------------------------------------------------------------------- 1 | function acc = LeaveOneOutClassifierZREP(DS,ZRepresentation) 2 | 3 | ZRepTrain = ZRepresentation(1:DS.TrainInstancesCount,:); 4 | 5 | acc = 0; 6 | 7 | for id = 1 : DS.TrainInstancesCount 8 | 9 | %classify_this = DS.Train(id,:); 10 | classify_this = ZRepTrain(id,:); 11 | 12 | best_so_far = inf; 13 | 14 | for i = 1 : DS.TrainInstancesCount 15 | 16 | if (i ~= id) 17 | 18 | %compare_to_this = DS.Train(i,:); 19 | compare_to_this = ZRepTrain(i,:); 20 | 21 | distance = ED(compare_to_this, classify_this)^2; 22 | 23 | if distance < best_so_far 24 | class = DS.TrainClassLabels(i); 25 | best_so_far = distance; 26 | end 27 | end 28 | 29 | end 30 | 31 | if (DS.TrainClassLabels(id) == class) 32 | acc = acc + 1; 33 | end 34 | 35 | end 36 | acc = acc / DS.TrainInstancesCount; 37 | end -------------------------------------------------------------------------------- /TestVarianceExact.m: -------------------------------------------------------------------------------- 1 | function [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,VarExplainedCumSum]=TestVarianceExact(KM) 2 | 3 | [nrowsKM, ncolumnsKM] = size(KM); 4 | 5 | KMtmp = []; 6 | for i=1:nrowsKM 7 | KMtmp = [KMtmp, KM(i,:)]; 8 | end 9 | 10 | Variance=var(KMtmp); 11 | clear KMtmp; 12 | 13 | [Q,L] = eig(KM); 14 | 15 | eigValue=diag(L); 16 | [~,IX]=sort(eigValue,'descend'); 17 | eigVector=Q(:,IX); 18 | eigValue=eigValue(IX); 19 | 20 | VarExplainedCumSum = cumsum(eigValue)/sum(eigValue); 21 | 22 | VarExplainedTop5 = VarExplainedCumSum(5); 23 | VarExplainedTop10 = VarExplainedCumSum(10); 24 | VarExplainedTop20 = VarExplainedCumSum(20); 25 | 26 | DimFor98 = find(VarExplainedCumSum>=0.98,1); 27 | DimFor95 = find(VarExplainedCumSum>=0.95,1); 28 | DimFor90 = find(VarExplainedCumSum>=0.90,1); 29 | DimFor85 = find(VarExplainedCumSum>=0.85,1); 30 | DimFor80 = find(VarExplainedCumSum>=0.80,1); 31 | 32 | end -------------------------------------------------------------------------------- /kMeans.m: -------------------------------------------------------------------------------- 1 | function [mem,cent] = kMeans(A, K) 2 | 3 | m=size(A, 1); 4 | mem = ceil(K*rand(m, 1)); 5 | cent = zeros(K, size(A, 2)); 6 | 7 | for iter = 1:100 8 | disp(iter); 9 | prev_mem = mem; 10 | 11 | for k = 1:K 12 | cent(k,:) = kmeans_centroid(mem, A, k, cent(k,:)); 13 | end 14 | 15 | D = zeros(m,K); 16 | 17 | for i = 1:m 18 | %x = A(i,:); 19 | for k = 1:K 20 | %y = cent(k,:); 21 | dist = ED(A(i,:),cent(k,:)); 22 | D(i,k) = dist; 23 | end 24 | end 25 | 26 | 27 | [val mem] = min(D,[],2); 28 | 29 | if norm(prev_mem-mem) == 0 30 | break; 31 | end 32 | end 33 | 34 | end 35 | 36 | function ksc = kmeans_centroid(mem, A, k, cur_center) 37 | % Slower version 38 | %a = []; 39 | %for i=1:length(mem) 40 | % if mem(i) == k 41 | % opt_a = A(i,:); 42 | % a = [a; opt_a]; 43 | % end 44 | %end 45 | 46 | a = A(mem==k,:); 47 | 48 | if size(a,1) == 0 49 | ksc = zeros(1, size(A,2)); 50 | return; 51 | end 52 | 53 | ksc = mean(a); 54 | 55 | end -------------------------------------------------------------------------------- /OneNNClassifierZREP.m: -------------------------------------------------------------------------------- 1 | function acc = OneNNClassifierZREP(DS,ZRepresentation) 2 | 3 | ZRepTrain = ZRepresentation(1:DS.TrainInstancesCount,:); 4 | ZRepTest = ZRepresentation(DS.TrainInstancesCount+1:end,:); 5 | 6 | acc = 0; 7 | 8 | for id = 1 : DS.TestInstancesCount 9 | 10 | %classify_this = DS.Test(id,:); 11 | classify_this = ZRepTest(id,:); 12 | 13 | best_so_far = inf; 14 | %best_so_far = 0; 15 | for i = 1 : DS.TrainInstancesCount 16 | 17 | %compare_to_this = DS.Train(i,:); 18 | compare_to_this = ZRepTrain(i,:); 19 | 20 | distance = ED(compare_to_this, classify_this)^2; 21 | 22 | 23 | 24 | if distance < best_so_far 25 | class = DS.TrainClassLabels(i); 26 | best_so_far = distance; 27 | end 28 | end 29 | 30 | if (DS.TestClassLabels(id) == class) 31 | acc = acc + 1; 32 | end 33 | end 34 | 35 | acc = acc / DS.TestInstancesCount; 36 | end 37 | -------------------------------------------------------------------------------- /SPIRALRepLearning.m: -------------------------------------------------------------------------------- 1 | function ZRep = SPIRALRepLearning(DS,coeffs) 2 | 3 | 4 | label_train=DS.TrainClassLabels; 5 | Train=DS.Train; 6 | label_test=DS.TestClassLabels; 7 | Test=DS.Test; 8 | 9 | X={}; 10 | n=size(Train,1); 11 | for i=1:n 12 | X{i}=Train(i,:)'; 13 | end 14 | 15 | for i=n+1:n+size(Test,1) 16 | X{i}=Test(i-n,:)'; 17 | end 18 | n=size(X,2); 19 | %m=n*20*ceil(log(n)); 20 | % so that it's comparable to our method 21 | m=n*coeffs; 22 | if (2*m>n*n) 23 | m=floor(n*n/2); 24 | end 25 | [D,Omega,d]=construct_sparse(X,n,m); 26 | X0=zeros(n,coeffs); 27 | options.maxiter=20; 28 | tic;X_train=matrix_completion_sparse_mex(D,d,Omega,X0,options);toc 29 | 30 | Train=[X_train(1:size(Train,1),:)]; 31 | Test=[X_train(size(Train,1)+1:size(X_train,1),:)]; 32 | 33 | ZRep = [Train;Test]; 34 | 35 | %Train=[label_train,X_train(1:size(Train,1),:)]; 36 | %Test=[label_test,X_train(size(Train,1)+1:size(X_train,1),:)]; 37 | %csvwrite(strcat(file_dir,filename,'/',filename,'_sparse_Train'),Train); 38 | %csvwrite(strcat(file_dir,filename,'/',filename,'_sparse_Test'),Test); 39 | %save features for Train/Test data 40 | 41 | end 42 | 43 | -------------------------------------------------------------------------------- /LOOClassifierDTW.m: -------------------------------------------------------------------------------- 1 | function acc = LOOClassifierDTW(DS,window) 2 | 3 | acc = 0; 4 | 5 | for id = 1 : DS.TrainInstancesCount 6 | 7 | disp(id); 8 | classify_this = DS.Train(id,:); 9 | 10 | best_so_far = inf; 11 | 12 | distances = ones(DS.TrainInstancesCount,1)*inf; 13 | 14 | for i = 1 : DS.TrainInstancesCount 15 | 16 | if (i ~= id) 17 | 18 | compare_to_this = DS.Train(i,:); 19 | 20 | distances(i) = dtw(classify_this,compare_to_this,window); 21 | 22 | end 23 | 24 | end 25 | 26 | for i = 1 : DS.TrainInstancesCount 27 | 28 | if (i ~= id) 29 | 30 | if distances(i) < best_so_far 31 | class = DS.TrainClassLabels(i); 32 | best_so_far = distances(i); 33 | end 34 | 35 | end 36 | 37 | end 38 | 39 | if (DS.TrainClassLabels(id) == class) 40 | acc = acc + 1; 41 | end 42 | 43 | end 44 | 45 | acc = acc / DS.TrainInstancesCount; 46 | 47 | end 48 | 49 | -------------------------------------------------------------------------------- /SIDL/README: -------------------------------------------------------------------------------- 1 | Code for Efficient Shift-Invariant Dictionary Learning 2 | 3 | Guoqing Zheng, 2016 4 | 5 | 1. This package provides a sample implementation of the SIDL model proposed in [1]. The main entry for the model 6 | is the function defined in "USIDL.m" (See the comment in the file for detailed parameter explanations); 7 | 8 | 2. An example main file "main_example.m" to run the model on the Trace data set is also provided. 9 | The Trace data set is kindly contributed by [2]; 10 | 11 | 3. This package (excluding the Trace data set) is released under the MIT license (See LICENSE for details); 12 | 13 | 4. If you find this package useful, please cite [1] in your work. 14 | 15 | Reference: 16 | 17 | [1] Efficient Shift-Invariant Dictionary Learning 18 | Guoqing Zheng, Yiming Yang, Jaime Carbonell 19 | In proceedings of the 22nd ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD 2016), San Francisco, CA. 20 | 21 | [2] The UCR Time Series Classification Archive 22 | Yanping Chen, Eamonn Keogh, Bing Hu, Nurjahan Begum, Anthony Bagnall, Abdullah, Mueen and Gustavo, Batista. 23 | http://www.cs.ucr.edu/~eamonn/time_series_data/ 24 | -------------------------------------------------------------------------------- /RWS/utilities/dtw_m.m: -------------------------------------------------------------------------------- 1 | % Copyright (C) 2013 Quan Wang , 2 | % Signal Analysis and Machine Perception Laboratory, 3 | % Department of Electrical, Computer, and Systems Engineering, 4 | % Rensselaer Polytechnic Institute, Troy, NY 12180, USA 5 | 6 | % dynamic time warping of two signals 7 | 8 | function d=dtw_m(s,t,w) 9 | % s: signal 1, size is ns*k, row for time, colume for channel 10 | % t: signal 2, size is nt*k, row for time, colume for channel 11 | % w: window parameter 12 | % if s(i) is matched with t(j) then |i-j|<=w 13 | % d: resulting distance 14 | 15 | if nargin<3 16 | w=Inf; 17 | end 18 | 19 | ns=size(s,1); 20 | nt=size(t,1); 21 | if size(s,2)~=size(t,2) 22 | error('Error in dtw(): the dimensions of the two input signals do not match.'); 23 | end 24 | w=max(w, abs(ns-nt)); % adapt window size 25 | 26 | %% initialization 27 | D=zeros(ns+1,nt+1)+Inf; % cache matrix 28 | D(1,1)=0; 29 | 30 | %% begin dynamic programming 31 | for i=1:ns 32 | for j=max(i-w,1):min(i+w,nt) 33 | oost=norm(s(i,:)-t(j,:)); 34 | D(i+1,j+1)=oost+min( [D(i,j+1), D(i+1,j), D(i,j)] ); 35 | 36 | end 37 | end 38 | d=D(ns+1,nt+1); 39 | -------------------------------------------------------------------------------- /RunOneNNED.m: -------------------------------------------------------------------------------- 1 | function RunOneNNED(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | Results = zeros(length(Datasets),2); 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | tic; 21 | OneNNAcc = OneNNClassifierED(DS); 22 | 23 | Results(i,1) = OneNNAcc; 24 | Results(i,2) = toc; 25 | 26 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNED/', 'RunOneNNED_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 27 | 28 | end 29 | 30 | 31 | end 32 | 33 | end 34 | 35 | -------------------------------------------------------------------------------- /RunOneNNGAKTiming.m: -------------------------------------------------------------------------------- 1 | function RunOneNNGAKTiming(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | Results = zeros(length(Datasets),2); 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | tic; 21 | OneNNAcc = OneNNClassifierGAK(DS,10); 22 | 23 | Results(i,1) = OneNNAcc; 24 | Results(i,2) = toc; 25 | 26 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNGAKTiming/', 'RunOneNNGAKTiming_', num2str(i)), Results, 'delimiter', '\t'); 27 | 28 | end 29 | 30 | 31 | end 32 | 33 | end -------------------------------------------------------------------------------- /RWS/dtw_similarity_cell_mulvar.m: -------------------------------------------------------------------------------- 1 | % This script computes the dissimilairty between random series and raw 2 | % time-series. We use dynamic time warping to compute the distance between 3 | % a pair of time-series. Other distance measure can be used as well. 4 | % 5 | % Author: Lingfei Wu 6 | % Date: 01/20/2019 7 | 8 | function [KMat, user_dtw_runtime] = dtw_similarity_cell_mulvar(newX, baseX) 9 | 10 | m = size(newX,1); 11 | n = size(baseX,1); 12 | KMat = zeros(m,n); 13 | user_dtw_runtime = 0; 14 | tic; 15 | parfor i = 1 : m 16 | Ei = zeros(1,n); 17 | l1 = size(newX{i},2); 18 | data1 = newX{i}'; 19 | for j = 1 : n 20 | l2 = size(baseX{j},2); 21 | data2 = baseX{j}'; 22 | wSize = min(40, ceil(max(l1,l2)/10)); 23 | wSize = max(wSize, abs(l1 - l2)); 24 | dtw_telapsed = tic; 25 | dist = dtw_c(data1, data2, wSize);% window constraints 26 | % dist = dtw_c(newX(i,:)', baseX(j,:)');% no constraints 27 | user_dtw_runtime = user_dtw_runtime + toc(dtw_telapsed); 28 | Ei(j) = dist; 29 | end 30 | KMat(i,:) = Ei; 31 | end 32 | toc; 33 | 34 | end 35 | -------------------------------------------------------------------------------- /SIDL/LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /RunOneNNSBD.m: -------------------------------------------------------------------------------- 1 | function RunOneNNSBD(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | Results = zeros(length(Datasets),2); 16 | 17 | display(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | tic; 21 | OneNNAcc = OneNNClassifierSBD(DS); 22 | 23 | Results(i,1) = OneNNAcc; 24 | Results(i,2) = toc; 25 | 26 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNSBD/', 'RunOneNNSBD_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 27 | 28 | end 29 | 30 | 31 | end 32 | 33 | end -------------------------------------------------------------------------------- /RunSPIRALRepLearning.m: -------------------------------------------------------------------------------- 1 | function RunSPIRALRepLearning(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | addpath(genpath('SPIRAL/.')); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | display(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100); 21 | 22 | ZRep = SPIRALRepLearning(DS, NumOfSamples); 23 | dlmwrite( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep'), ZRep, 'delimiter', '\t'); 24 | 25 | 26 | 27 | end 28 | 29 | 30 | end 31 | 32 | 33 | end 34 | -------------------------------------------------------------------------------- /RWS/utilities/svm-scale-README: -------------------------------------------------------------------------------- 1 | `svm-scale' Usage 2 | ================= 3 | 4 | Usage: svm-scale [options] data_filename 5 | options: 6 | -l lower : x scaling lower limit (default -1) 7 | -u upper : x scaling upper limit (default +1) 8 | -y y_lower y_upper : y scaling limits (default: no y scaling) 9 | -s save_filename : save scaling parameters to save_filename 10 | -r restore_filename : restore scaling parameters from restore_filename 11 | 12 | See 'Examples' in this file for examples. 13 | 14 | Tips on Practical Use 15 | ===================== 16 | 17 | * Scale your data. For example, scale each attribute to [0,1] or [-1,+1]. 18 | * For C-SVC, consider using the model selection tool in the tools directory. 19 | * nu in nu-SVC/one-class-SVM/nu-SVR approximates the fraction of training 20 | errors and support vectors. 21 | * If data for classification are unbalanced (e.g. many positive and 22 | few negative), try different penalty parameters C by -wi (see 23 | examples below). 24 | * Specify larger cache size (i.e., larger -m) for huge problems. 25 | 26 | Examples 27 | ======== 28 | 29 | svm-scale -l -1 -u 1 -s range train > train.scale 30 | svm-scale -r range test > test.scale 31 | 32 | Scale each feature of the training data to be in [-1,1]. Scaling 33 | factors are stored in the file range and then used for scaling the 34 | test data. 35 | 36 | -------------------------------------------------------------------------------- /NystromMatrixDictionary.m: -------------------------------------------------------------------------------- 1 | function [AbsFroError,RelFroError,NormFroError] = NystromMatrixDictionary(KM, X, Dictionary, gamma) 2 | % KM: nXn kernel matrix, where n # of time series of m length 3 | % Dictionary: kxm matrxi containing the dictionary atoms 4 | % Absolute and Relative errors for Nystrom Approximation 5 | [nrowsX, ncolumnsX] = size(X); 6 | [nrowsDic, ncolumnsDic] = size(Dictionary); 7 | 8 | W = zeros(nrowsDic,nrowsDic); 9 | 10 | for i=1:nrowsDic 11 | for j=1:nrowsDic 12 | W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),gamma); 13 | end 14 | end 15 | 16 | E = zeros(nrowsX,nrowsDic); 17 | 18 | for i=1:nrowsX 19 | for j=1:nrowsDic 20 | E(i,j) = SINK(X(i,:),Dictionary(j,:),gamma); 21 | end 22 | end 23 | 24 | [Ve, Va] = eig(W); 25 | va = diag(Va); 26 | inVa = diag(va.^(-0.5)); 27 | 28 | Zexact = CheckNaNInfComplex( E * Ve * inVa ); 29 | 30 | KMtilde = Zexact*Zexact'; 31 | 32 | AbsFroError = ( norm(KM-KMtilde,'fro') ); 33 | RelFroError = ( norm(KM-KMtilde,'fro')/norm(KM,'fro') ); 34 | NormFroError = ( norm(KM-KMtilde,'fro')/nrowsX^2); 35 | 36 | end 37 | 38 | function Z = CheckNaNInfComplex(Z) 39 | 40 | for i=1:size(Z,1) 41 | for j=1:size(Z,2) 42 | if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 43 | Z(i,j)=0; 44 | end 45 | end 46 | end 47 | 48 | end -------------------------------------------------------------------------------- /RunKMCompSINK.m: -------------------------------------------------------------------------------- 1 | function RunKMCompSINK(DataSetStartIndex, DataSetEndIndex, gamma) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, ~] = sort(Datasets); 10 | 11 | disp(gamma); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 16 | 17 | Results = zeros(length(Datasets),2); 18 | 19 | disp(['Dataset being processed: ', char(Datasets(i))]); 20 | 21 | DS = LoadUCRdataset(char(Datasets(i))); 22 | 23 | tic; 24 | 25 | [KM, DistComp] = KMCompSINK(DS.Data,gamma); 26 | 27 | Results(i,1) = DistComp; 28 | Results(i,2) = toc; 29 | 30 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix'), KM, 'delimiter', '\t'); 31 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINK/', 'RunKMCompSINK_Gamma_', num2str(gamma), '_Dataset_' , num2str(i)), Results, 'delimiter', '\t'); 32 | 33 | end 34 | 35 | end 36 | 37 | end -------------------------------------------------------------------------------- /RunOneNNDTW.m: -------------------------------------------------------------------------------- 1 | function RunOneNNDTW(DataSetStartIndex, DataSetEndIndex,WindowPercent) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | Results = zeros(length(Datasets),2); 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | % warping window 21 | TSLength = length(DS.Data(1,:)); 22 | window = floor(WindowPercent/100 * TSLength); 23 | 24 | tic; 25 | OneNNAcc = OneNNClassifierDTW(DS,window); 26 | 27 | Results(i,1) = OneNNAcc; 28 | Results(i,2) = toc; 29 | 30 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNDTW/', 'RunOneNNDTW_Dataset_', num2str(i), '_WindowPercent_', num2str(WindowPercent)), Results, 'delimiter', '\t'); 31 | 32 | end 33 | 34 | 35 | end 36 | 37 | end -------------------------------------------------------------------------------- /RWS/dtw_similarity_cell.m: -------------------------------------------------------------------------------- 1 | % This script computes the dissimilairty between random series and raw 2 | % time-series. We use dynamic time warping to compute the distance between 3 | % a pair of time-series. Other distance measure can be used as well. 4 | % 5 | % Author: Lingfei Wu 6 | % Date: 01/20/2019 7 | 8 | function [KMat, user_dtw_runtime] = dtw_similarity_cell(newX, baseX) 9 | 10 | [m, l1] = size(newX); 11 | n = size(baseX,1); 12 | 13 | nrm_newX = zeros(m,1); 14 | tic; 15 | for i=1:m 16 | nrm_newX(i)=dtw_c(newX(i,:)',zeros(1)); 17 | end 18 | nrm_baseX = zeros(n,1); 19 | for i=1:n 20 | nrm_baseX(i)=dtw_c(baseX{i}',zeros(1)); 21 | end 22 | toc 23 | 24 | KMat = zeros(m,n); 25 | user_dtw_runtime = 0; 26 | tic; 27 | for i = 1 : m 28 | Ei = zeros(1,n); 29 | data1 = newX(i,:)'; 30 | for j = 1 : n 31 | l2 = length(baseX{j}); 32 | wSize = min(40, ceil(max(l1,l2)/10)); 33 | wSize = max(wSize, abs(l1 - l2)); 34 | wSize = 0; 35 | data2 = baseX{j}'; 36 | dtw_telapsed = tic; 37 | dist = dtw_c(data1, data2, wSize);% window constraints 38 | % dist = dtw_c(newX(i,:)', baseX(j,:)');% no constraints 39 | user_dtw_runtime = user_dtw_runtime + toc(dtw_telapsed); 40 | Ei(j) = dist; 41 | end 42 | KMat(i,:) = Ei; 43 | end 44 | toc; 45 | 46 | end 47 | -------------------------------------------------------------------------------- /RunDMComp.m: -------------------------------------------------------------------------------- 1 | function RunDMComp(DataSetStartIndex, DataSetEndIndex, DistanceIndex) 2 | 3 | % Distance Matrices for ED and SBD 4 | Methods = [cellstr('ED'), 'SBD']; 5 | 6 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 7 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 8 | Datasets = {dir_struct(3:130).name}; 9 | 10 | % Sort Datasets 11 | 12 | [Datasets, ~] = sort(Datasets); 13 | 14 | for i = 1:length(Datasets) 15 | 16 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 17 | 18 | Results = zeros(length(Datasets),2); 19 | 20 | disp(['Dataset being processed: ', char(Datasets(i))]); 21 | DS = LoadUCRdataset(char(Datasets(i))); 22 | 23 | tic; 24 | 25 | [DM, DistComp] = DMComp(DS.Data, DistanceIndex); 26 | 27 | Results(i,1) = DistComp; 28 | Results(i,2) = toc; 29 | 30 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/DistanceMatrices/',char(Datasets(i)),'/', char(Datasets(i)),'_',char(Methods(DistanceIndex)),'.distmatrix'), DM, 'delimiter', '\t'); 31 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunDMComp/', 'RunDMComp_', char(Methods(DistanceIndex)), '_Dataset_', num2str(i) ), Results, 'delimiter', '\t'); 32 | 33 | end 34 | 35 | end 36 | 37 | end -------------------------------------------------------------------------------- /FrequentDirections.m: -------------------------------------------------------------------------------- 1 | % Outputs sketch of input matrix 2 | % Author: Terence Lim 3 | % Original paper/code by Liberty "Simple and Deterministric Matrix Sketching" 4 | 5 | function [sketch, vout] = FrequentDirections(A, ell) 6 | % input A is n rows x m columns; 7 | % output sketch B is l rows x m columns, vout is m x 1 first right eigenvector 8 | 9 | rows = size(A, 1); 10 | d = size(A, 2); 11 | m = 2 * ell; 12 | 13 | if (rows <= m) 14 | [U, S, Vt] = svd(A, 0); 15 | vout = Vt(:,1); 16 | sketch = S * Vt'; 17 | return 18 | end 19 | 20 | sketch = zeros(m, d); 21 | nextZeroRow = 1; 22 | 23 | for i=1:rows 24 | vector = A(i,:); % append row 25 | 26 | if (nextZeroRow > m) % rotate 27 | [U, S, Vt] = svd(sketch,0); % economy SVD: sketch = U S V 28 | disp(i); 29 | vout = Vt; 30 | s = diag(S); 31 | len = length(s); 32 | if (len >= ell) % if rank is greater than ell, then shrink 33 | sShrunk = sqrt(s(1:ell).^2 - s(ell).^2); 34 | sketch(1:ell,:) = diag(sShrunk) * Vt(:,1:ell)'; 35 | sketch((ell+1):end,:) = 0; 36 | nextZeroRow = ell + 1; % maintain invariant that row l is zeros 37 | else % otherwise fewer than ell non-zero rows 38 | sketch(1:len,:) = S * Vt(:,1:len)'; 39 | sketch((len+1):end,:) = 0; 40 | nextZeroRow = len + 1; 41 | end 42 | end 43 | 44 | sketch(nextZeroRow,:) = vector; % append row 45 | nextZeroRow = nextZeroRow + 1; 46 | end 47 | sketch = sketch(1:ell, :); 48 | return; 49 | -------------------------------------------------------------------------------- /RWS/rws_GenFea_example_mulvar.m: -------------------------------------------------------------------------------- 1 | % This script generates low-rank approximation of latent kernel matrix using 2 | % random features approach based on dtw like distance for multi-variate 3 | % time-series datasets. Note: the default low-rank R = 512. 4 | 5 | clear,clc 6 | 7 | addpath(genpath('utilities')); 8 | file_dir = './datasets/'; 9 | filename = 'auslan'; 10 | disp(filename); 11 | sigma = 0.79; 12 | R = 512; % Generally, Large R, Better Accuracy. 13 | DMin = 1; 14 | DMax = 25; 15 | 16 | timer_start = tic; 17 | [trainData, testData] = rws_GenFea_mulvar(file_dir,filename,sigma,R,DMin,DMax); 18 | trainy = trainData(:,1); 19 | testy = testData(:,1); 20 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..) 21 | labels = unique(trainy); 22 | numClasses = length(labels); 23 | if numClasses > 2 24 | for i=1:numClasses 25 | ind = (trainy == labels(i)); 26 | trainy(ind) = i; 27 | end 28 | for i=1:numClasses 29 | ind = (testy == labels(i)); 30 | testy(ind) = i; 31 | end 32 | else 33 | ind = (trainy == labels(1)); 34 | trainy(ind) = -1; 35 | ind = (trainy == labels(2)); 36 | trainy(ind) = 1; 37 | ind = (testy == labels(1)); 38 | testy(ind) = -1; 39 | ind = (testy == labels(2)); 40 | testy(ind) = 1; 41 | end 42 | trainData(:,1) = trainy; 43 | testData(:,1) = testy; 44 | telapsed_features_dtw_random = toc(timer_start) 45 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Train'), trainData); 46 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Test'), testData); -------------------------------------------------------------------------------- /SPIRAL/construct_sparse.m: -------------------------------------------------------------------------------- 1 | %generating the kernel matrix 2 | function [D,Omega,d]=construct_sparse(X,n,m) 3 | % use the first n users, and generate approximately m pairs among them 4 | % mex dtw_c.c; 5 | fprintf('Step 1: sample and calculate dtw distance...\n') 6 | D={}; 7 | Omega={}; 8 | d=zeros(n,1); 9 | length=size(X{1},1); 10 | wsize=ceil(length/30); 11 | if wsize>40 12 | wsize=40; 13 | end 14 | if wsize<1 15 | wsize=1; 16 | end 17 | id2d=randsample(n*n,2*m,'false'); 18 | idi=floor((id2d-1)/n)+1; 19 | idj=id2d-n*(idi-1); 20 | id=find(idim) 54 | break; 55 | end 56 | if (col(nd)~=i) 57 | break; 58 | end 59 | nd=nd+1; 60 | end 61 | Omega{i}=row(start:nd-1); 62 | D{i}=v(start:nd-1); 63 | d(i)=find(Omega{i}==i); 64 | start=nd; 65 | end 66 | 67 | end 68 | 69 | -------------------------------------------------------------------------------- /RWS/rws_GenFea_example.m: -------------------------------------------------------------------------------- 1 | % This script generates low-rank approximation of latent kernel matrix using 2 | % random features approach based on dtw like distance for multi-variate 3 | % time-series datasets. Note: the default low-rank R = 512. 4 | % 5 | % Author: Lingfei Wu 6 | % Date: 01/20/2019 7 | 8 | clear,clc 9 | 10 | addpath(genpath('utilities')); 11 | file_dir = './datasets/'; 12 | filename = 'Gun_Point'; 13 | disp(filename); 14 | sigma = 4.46; 15 | R = 512; % Generally, Large R, Better Accuracy. 16 | DMin = 1; 17 | DMax = 25; 18 | 19 | timer_start = tic; 20 | [trainData, testData] = rws_GenFea(file_dir,filename,sigma,R,DMin,DMax); 21 | trainy = trainData(:,1); 22 | testy = testData(:,1); 23 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..) 24 | labels = unique(trainy); 25 | numClasses = length(labels); 26 | if numClasses > 2 27 | for i=1:numClasses 28 | ind = (trainy == labels(i)); 29 | trainy(ind) = i; 30 | end 31 | for i=1:numClasses 32 | ind = (testy == labels(i)); 33 | testy(ind) = i; 34 | end 35 | else 36 | ind = (trainy == labels(1)); 37 | trainy(ind) = -1; 38 | ind = (trainy == labels(2)); 39 | trainy(ind) = 1; 40 | ind = (testy == labels(1)); 41 | testy(ind) = -1; 42 | ind = (testy == labels(2)); 43 | testy(ind) = 1; 44 | end 45 | trainData(:,1) = trainy; 46 | testData(:,1) = testy; 47 | telapsed_features_dtw_random = toc(timer_start) 48 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Train'), trainData); 49 | csvwrite(strcat(file_dir,filename,'/',filename,'_rws_Test'), testData); -------------------------------------------------------------------------------- /SIDL/op_shift.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | function shifted_S = op_shift(S, offsets, target_dim) 26 | % offsets must be row vector 27 | 28 | [K, q] = size(S); 29 | 30 | res = zeros(target_dim, K); 31 | IDX = repmat(offsets+1, q, 1); 32 | IDX = bsxfun(@plus, IDX, [0:q-1]'); 33 | IDX = bsxfun(@plus, IDX, [0:(K-1)] * target_dim); 34 | 35 | res(IDX) = S'; 36 | shifted_S = res'; 37 | end 38 | -------------------------------------------------------------------------------- /SIDL/unsup_obj.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | function F = unsup_obj(X, S, A, Offsets, lambda) 26 | % X: n x p 27 | % S: K x q 28 | % A: n x K 29 | % Offsets: n x K 30 | 31 | [n, p] = size(X); 32 | [K, q] = size(S); 33 | 34 | F = 0; 35 | 36 | for i=1:n 37 | x = X(i,:); 38 | shifted_S = op_shift(S, Offsets(i,:), p); 39 | F = F + 0.5 * norm(x - A(i,:) * shifted_S)^2 + lambda * norm(A(i,:), 1); 40 | 41 | end 42 | 43 | end 44 | -------------------------------------------------------------------------------- /RunClusteringKShape.m: -------------------------------------------------------------------------------- 1 | function RunClusteringKShape(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 14 | 15 | disp(['Dataset being processed: ', char(Datasets(i))]); 16 | DS = LoadUCRdataset(char(Datasets(i))); 17 | 18 | Results = zeros(length(Datasets),2); 19 | 20 | for rep = 1 : 10 21 | rep 22 | rng(rep); 23 | 24 | tic; 25 | [mem cent] = kShape(DS.Data, length(DS.ClassNames)); 26 | ClusteringTime = toc; 27 | 28 | RI = RandIndex(mem, DS.DataClassLabels); 29 | 30 | ResultsTmp = [RI,ClusteringTime]; 31 | 32 | % 33 | Results(i,:) = Results(i,:) + ResultsTmp; 34 | end 35 | Results(i,:) = Results(i,:) ./ 10; 36 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKShape/','RunClusteringKShape_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 37 | 38 | end 39 | 40 | 41 | end 42 | 43 | end -------------------------------------------------------------------------------- /RunSIDLRepLearning.m: -------------------------------------------------------------------------------- 1 | function RunSIDLRepLearning(DataSetStartIndex, DataSetEndIndex, lambda, r) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | addpath(genpath('SIDL/.')); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | display(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100); 21 | 22 | %for lambda = [0.1, 1, 10] 23 | % for r = [0.1, 0.25, 0.5] 24 | 25 | lambda 26 | r 27 | 28 | [ZRep,~,~]= SIDLRepLearning(char(Datasets(i)), DS, NumOfSamples, lambda, r); 29 | dlmwrite( strcat( 'SIDLREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '_L_', num2str(lambda), '_R_', num2str(r) ,'.Zrep'), ZRep, 'delimiter', '\t'); 30 | 31 | % end 32 | %end 33 | 34 | 35 | 36 | 37 | end 38 | 39 | 40 | end 41 | 42 | 43 | end 44 | -------------------------------------------------------------------------------- /RunKMCompSINKCompressed.m: -------------------------------------------------------------------------------- 1 | function RunKMCompSINKCompressed(DataSetStartIndex, DataSetEndIndex, gamma, FourierEnergy, DatasetPercentile) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, ~] = sort(Datasets); 10 | 11 | disp(gamma); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 16 | 17 | Results = zeros(length(Datasets),2); 18 | 19 | disp(['Dataset being processed: ', char(Datasets(i))]); 20 | 21 | DS = LoadUCRdataset(char(Datasets(i))); 22 | 23 | % Estimating required number of coefficients to 24 | % guarantee energy level between comparisons 25 | DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile); 26 | 27 | tic; 28 | 29 | [KM, DistComp] = KMCompSINKCompressed(DS.Data,gamma,DSFourier.NumCoeffs); 30 | 31 | Results(i,1) = DistComp; 32 | Results(i,2) = toc; 33 | 34 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKCompressed/',char(Datasets(i)),'/', char(Datasets(i)), '_SINKComp_Gamma_', num2str(gamma) ,'.kernelmatrix'), KM, 'delimiter', '\t'); 35 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKCompressed/', 'RunKMCompSINKCompE99D100_Gamma_', num2str(gamma), '_Dataset_' , num2str(i)), Results, 'delimiter', '\t'); 36 | 37 | end 38 | 39 | end 40 | 41 | end -------------------------------------------------------------------------------- /RunClusteringKShapeORIGINAL.m: -------------------------------------------------------------------------------- 1 | function RunClusteringKShapeORIGINAL(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 14 | 15 | disp(['Dataset being processed: ', char(Datasets(i))]); 16 | DS = LoadUCRdataset(char(Datasets(i))); 17 | 18 | Results = zeros(length(Datasets),2); 19 | 20 | for rep = 1 : 10 21 | rep 22 | rng(rep); 23 | 24 | tic; 25 | [mem cent] = kShapeORIGINAL(DS.Data, length(DS.ClassNames)); 26 | ClusteringTime = toc; 27 | 28 | RI = RandIndex(mem, DS.DataClassLabels); 29 | 30 | ResultsTmp = [RI,ClusteringTime]; 31 | 32 | % 33 | Results(i,:) = Results(i,:) + ResultsTmp; 34 | end 35 | Results(i,:) = Results(i,:) ./ 10; 36 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKShapeORIGINAL/','RunClusteringKShapeORIGINAL_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 37 | 38 | end 39 | 40 | 41 | end 42 | 43 | end -------------------------------------------------------------------------------- /RunClusteringKMeans.m: -------------------------------------------------------------------------------- 1 | function RunClusteringKMeans(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 14 | 15 | 16 | disp(['Dataset being processed: ', char(Datasets(i))]); 17 | DS = LoadUCRdataset(char(Datasets(i))); 18 | 19 | Results = zeros(length(Datasets),2); 20 | 21 | for rep = 1 : 10 22 | rep 23 | rng(rep); 24 | 25 | tic; 26 | [mem cent] = kMeans(DS.Data, length(DS.ClassNames)); 27 | 28 | ClusteringTime = toc; 29 | 30 | RI = RandIndex(mem, DS.DataClassLabels); 31 | 32 | ResultsTmp = [RI,ClusteringTime]; 33 | 34 | % 35 | Results(i,:) = Results(i,:) + ResultsTmp; 36 | end 37 | Results(i,:) = Results(i,:) ./ 10; 38 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunClusteringKMeans/','RunClusteringKMeans_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 39 | 40 | end 41 | 42 | 43 | end 44 | 45 | end -------------------------------------------------------------------------------- /kShape.m: -------------------------------------------------------------------------------- 1 | function [mem cent] = kShape(A, K) 2 | 3 | m=size(A, 1); 4 | mem = ceil(K*rand(m, 1)); 5 | cent = zeros(K, size(A, 2)); 6 | 7 | for iter = 1:100 8 | disp(iter); 9 | prev_mem = mem; 10 | 11 | for k = 1:K 12 | cent(k,:) = kshape_centroid(mem, A, k, cent(k,:)); 13 | cent(k,:) = zscore(cent(k,:)); 14 | end 15 | 16 | for i = 1:m 17 | 18 | %x = A(i,:); 19 | for k = 1:K 20 | %y = cent(k,:); 21 | dist = 1-max( NCCc( A(i,:), cent(k,:)) ); 22 | D(i,k) = dist; 23 | end 24 | end 25 | 26 | [val mem] = min(D,[],2); 27 | if norm(prev_mem-mem) == 0 28 | break; 29 | end 30 | end 31 | 32 | end 33 | 34 | function ksc = kshape_centroid(mem, A, k, cur_center) 35 | % Slower version 36 | %Computes ksc centroid 37 | %a = []; 38 | %for i=1:length(mem) 39 | % if mem(i) == k 40 | % if sum(cur_center) == 0 41 | % opt_a = A(i,:); 42 | % else 43 | % [tmp tmps opt_a] = SBD(zscore(cur_center), A(i,:)); 44 | % end 45 | % a = [a; opt_a]; 46 | % end 47 | %end 48 | 49 | a = A(mem==k,:); 50 | 51 | if sum(cur_center) ~= 0 52 | for i=1:size(a,1) 53 | [tmp tmps opt_a] = SBD(cur_center, a(i,:)); 54 | a(i,:) = opt_a; 55 | end 56 | 57 | end 58 | 59 | if size(a,1) == 0 60 | ksc = zeros(1, size(A,2)); 61 | return; 62 | end 63 | 64 | [m, ncolumns]=size(a); 65 | [Y mean2 std2] = zscore(a,[],2); 66 | S = Y' * Y; 67 | P = (eye(ncolumns) - 1 / ncolumns * ones(ncolumns)); 68 | M = P*S*P; 69 | [V D] = eigs(M,1); 70 | ksc = V(:,1); 71 | 72 | finddistance1 = sqrt(sum((a(1,:) - ksc').^2)); 73 | finddistance2 = sqrt(sum((a(1,:) - (-ksc')).^2)); 74 | 75 | if (finddistance1=DataSetStartIndex && i<=DataSetEndIndex) 17 | 18 | display(['Dataset being processed: ', char(Datasets(i))]); 19 | DS = LoadUCRdataset(char(Datasets(i))); 20 | 21 | NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100); 22 | 23 | %ZRep = SPIRALRepLearning(DS, NumOfSamples); 24 | 25 | % Supervised Tuning 26 | info = RWSTuneParameters(DS,NumOfSamples); 27 | ZRepSup = RWSRepLearning(DS,info.sigma,NumOfSamples,1,info.DMax); 28 | 29 | % Without Tuning for Clustering 30 | ZRepUNSup = RWSRepLearning(DS,1,NumOfSamples,1,25); 31 | %ZRepUNSup = RWSRepLearning(DS,1000,NumOfSamples,1,25); 32 | 33 | dlmwrite( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep'), ZRepSup, 'delimiter', '\t'); 34 | dlmwrite( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1000_DMax25', '.Zrep'), ZRepUNSup, 'delimiter', '\t'); 35 | 36 | 37 | 38 | end 39 | 40 | 41 | end 42 | 43 | 44 | end 45 | -------------------------------------------------------------------------------- /RunOneNNSBDCompressed.m: -------------------------------------------------------------------------------- 1 | function RunOneNNSBDCompressed(DataSetStartIndex, DataSetEndIndex, FourierEnergy, DatasetPercentile) 2 | % FourierEnergy is like 0.9 3 | % DatasetPercentile is like 99 4 | 5 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 6 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 7 | Datasets = {dir_struct(3:130).name}; 8 | 9 | % Sort Datasets 10 | 11 | [Datasets, DSOrder] = sort(Datasets); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | Results = zeros(length(Datasets),7); 18 | 19 | display(['Dataset being processed: ', char(Datasets(i))]); 20 | DS = LoadUCRdataset(char(Datasets(i))); 21 | 22 | % Estimating required number of coefficients to 23 | % guarantee energy level between comparisons 24 | DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile); 25 | 26 | tic; 27 | OneNNAcc = OneNNClassifierSBDCompressed(DS,DSFourier.NumCoeffs); 28 | 29 | Results(i,1) = FourierEnergy; 30 | Results(i,2) = DatasetPercentile; 31 | Results(i,3) = DSFourier.len; 32 | Results(i,4) = DSFourier.fftlength; 33 | Results(i,5) = DSFourier.NumCoeffs; 34 | Results(i,6) = OneNNAcc; 35 | Results(i,7) = toc; 36 | 37 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunOneNNSBDCompressed/', 'RunOneNNSBD_Dataset_', num2str(i),'_DatasetPercentile_',num2str(DatasetPercentile),'_FourierEnergy_',num2str(FourierEnergy) ), Results, 'delimiter', '\t'); 38 | 39 | end 40 | 41 | 42 | end 43 | 44 | end -------------------------------------------------------------------------------- /RWS/rws_GenFea_mulvar.m: -------------------------------------------------------------------------------- 1 | % This script generates the feature representation of each time series by 2 | % computing random features between random series and raw time-series. We 3 | % use dynamic time warping to compute the distance between a pair of 4 | % time-series. 5 | % 6 | % Author: Lingfei Wu 7 | % Date: 01/20/2019 8 | 9 | function [Train,Test,Runtime] = rws_GenFea_mulvar(file_dir,filename,sigma,R,DMin,DMax) 10 | 11 | % load data and generate corresponding train and test data 12 | timer_start = tic; 13 | Data = load(strcat(file_dir,filename,'/',filename,'.mat')); 14 | trainX = Data.train_X; 15 | trainy = Data.train_Y; 16 | testX = Data.test_X; 17 | testy = Data.test_Y; 18 | telapsed_data_load = toc(timer_start) 19 | 20 | % generate random time series with variable length, where each value in 21 | % random series is sampled from Gaussian distribution parameterized by sigma. 22 | timer_start = tic; 23 | rng('default') 24 | sampleX = cell(R,1); 25 | d = size(trainX{1},1); % number of variates 26 | for i=1:R 27 | D = randi([DMin, DMax],1); 28 | sampleX{i} = randn(d, D)./sigma; % gaussian 29 | end 30 | [trainFeaX_random, train_dtw_time] = dtw_similarity_cell_mulvar(trainX,sampleX); 31 | trainFeaX_random = trainFeaX_random/sqrt(R); 32 | [testFeaX_random, test_dtw_time] = dtw_similarity_cell_mulvar(testX,sampleX); 33 | testFeaX_random = testFeaX_random/sqrt(R); 34 | Train = [trainy, trainFeaX_random]; 35 | Test = [testy, testFeaX_random]; 36 | telapsed_random_fea_gen = toc(timer_start); 37 | 38 | % Note: real_total_end_time is the real total time, including both dtw 39 | % and ground distance, of generating both train and test features using 40 | % multithreads. user_dtw_time is the real time that accounts for 41 | % computation of dtw with one thread. 42 | Runtime.real_total_dtw_time = telapsed_random_fea_gen; 43 | Runtime.user_dtw_time = train_dtw_time + test_dtw_time; 44 | Runtime.user_train_dtw_time = train_dtw_time; 45 | Runtime.user_test_dtw_time = test_dtw_time; 46 | end 47 | -------------------------------------------------------------------------------- /RunClusteringSPIRAL.m: -------------------------------------------------------------------------------- 1 | function RunClusteringSPIRAL(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | Results = zeros(length(Datasets),2); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | for rep = 1 : 10 21 | rep 22 | rng(rep); 23 | 24 | 25 | % Extract Sample Points 26 | 27 | ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep') ); 28 | 29 | tic; 30 | 31 | [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1); 32 | 33 | ClusteringTime = toc; 34 | 35 | RI = RandIndex(mem, DS.DataClassLabels); 36 | 37 | % Evaluate SmplPoints in terms of clustering 38 | % measures (e.g., SSE, RandIndex, NystromAppx) 39 | 40 | ResultsTmp = [RI,ClusteringTime]; 41 | 42 | % 43 | Results(i,:) = Results(i,:) + ResultsTmp; 44 | end 45 | Results(i,:) = Results(i,:) ./ 10; 46 | 47 | dlmwrite( strcat( 'RunClusteringSPIRAL/','RunClusteringSPIRAL_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 48 | 49 | end 50 | 51 | end 52 | 53 | end -------------------------------------------------------------------------------- /CollectStatistics.m: -------------------------------------------------------------------------------- 1 | function CollectStatistics(DataSetStartIndex, DataSetEndIndex) 2 | 3 | Methods = [cellstr('Random'), 'KShape']; 4 | Types = [cellstr('Zexact'), 'Ztop5', 'Ztop10', 'Ztop20', 'Z99per', 'Z95per', 'Z90per', 'Z85per', 'Z80per']; 5 | 6 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 7 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 8 | Datasets = {dir_struct(3:130).name}; 9 | 10 | % Sort Datasets 11 | 12 | [Datasets, DSOrder] = sort(Datasets); 13 | 14 | %FourierEnergy = 1; 15 | %DatasetPercentile = 100; 16 | 17 | for i = 1:length(Datasets) 18 | 19 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 20 | 21 | disp(['Dataset being processed: ', char(Datasets(i))]); 22 | %DS = LoadUCRdataset(char(Datasets(i))); 23 | %disp([char(Datasets(i)),',',num2str(length(DS.ClassNames)),',',num2str(DS.TrainInstancesCount),',',num2str(DS.TestInstancesCount),',',num2str(length(DS.Train(1,:)))]); 24 | 25 | ResultsTmp = dlmread( strcat('RunLinearSVMRWS/','RunLinearSVMRWS', '_Dataset_', num2str(i)) ); 26 | 27 | %ResultsTmp = dlmread( strcat( 'RunClassificationZREP/RunClassificationZREP_FULLKM_Z20_KShape_', num2str(i),'.results') ); 28 | %ResultsTmp = dlmread( strcat('RunOneNNTOPFFTED/', 'RunOneNNTOPFFTED_Dataset_', num2str(i), '_NumOfCoeff_',num2str(10)) ); 29 | 30 | Results(i,:) = ResultsTmp(i,:); 31 | 32 | end 33 | 34 | 35 | end 36 | 37 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RESULTS_RunLinearSVMRWS_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex)), Results, 'delimiter', ','); 38 | 39 | %dlmwrite( strcat( '/rigel/dsi/users/ikp2103/JOPA/GRAIL2/RESULTS/RunOneNNTOPFFTED_NumOfCoeff_10_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex)), Results, 'delimiter', ','); 40 | 41 | end 42 | -------------------------------------------------------------------------------- /RepLearnKM.m: -------------------------------------------------------------------------------- 1 | function [Z99per,Z98per,Z97per,Z95per,Z90per,Z85per,Z80per,Ztop20,Ztop10,Ztop5,RepLearnTime]=RepLearnKM(KM) 2 | % Input 3 | % KM: Kernel matrix (nxn) 4 | % Dim: Dimensions to keep in the end over the learned representation 5 | % Output 6 | % Ktilde: Approximated kernel matrix (nxn) 7 | % Z: New learned representation (nxDim) 8 | 9 | tic; 10 | [Q,L]=eig(KM); 11 | 12 | eigValue=diag(L); 13 | [~,IX]=sort(eigValue,'descend'); 14 | eigVector=Q(:,IX); 15 | eigValue=eigValue(IX); 16 | 17 | VarExplainedCumSum = cumsum(eigValue)/sum(eigValue); 18 | 19 | DimFor99 = find(VarExplainedCumSum>=0.99,1); 20 | DimFor98 = find(VarExplainedCumSum>=0.98,1); 21 | DimFor97 = find(VarExplainedCumSum>=0.97,1); 22 | DimFor95 = find(VarExplainedCumSum>=0.95,1); 23 | DimFor90 = find(VarExplainedCumSum>=0.90,1); 24 | DimFor85 = find(VarExplainedCumSum>=0.85,1); 25 | DimFor80 = find(VarExplainedCumSum>=0.80,1); 26 | 27 | RepLearnTime = toc; 28 | 29 | Z99per = CheckNaNInfComplex( eigVector(:,1:DimFor99)*sqrt(diag(eigValue(1:DimFor99))) ); 30 | Z98per = CheckNaNInfComplex( eigVector(:,1:DimFor98)*sqrt(diag(eigValue(1:DimFor98))) ); 31 | Z97per = CheckNaNInfComplex( eigVector(:,1:DimFor97)*sqrt(diag(eigValue(1:DimFor97))) ); 32 | Z95per = CheckNaNInfComplex( eigVector(:,1:DimFor95)*sqrt(diag(eigValue(1:DimFor95))) ); 33 | Z90per = CheckNaNInfComplex( eigVector(:,1:DimFor90)*sqrt(diag(eigValue(1:DimFor90))) ); 34 | Z85per = CheckNaNInfComplex( eigVector(:,1:DimFor85)*sqrt(diag(eigValue(1:DimFor85))) ); 35 | Z80per = CheckNaNInfComplex( eigVector(:,1:DimFor80)*sqrt(diag(eigValue(1:DimFor80))) ); 36 | 37 | Ztop20 = CheckNaNInfComplex( eigVector(:,1:20)*sqrt(diag(eigValue(1:20))) ); 38 | Ztop10 = CheckNaNInfComplex( eigVector(:,1:10)*sqrt(diag(eigValue(1:10))) ); 39 | Ztop5 = CheckNaNInfComplex( eigVector(:,1:5)*sqrt(diag(eigValue(1:5))) ); 40 | 41 | end 42 | 43 | function Z = CheckNaNInfComplex(Z) 44 | 45 | for i=1:size(Z,1) 46 | for j=1:size(Z,2) 47 | if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 48 | Z(i,j)=0; 49 | disp('ERROR ON REPRESENTATION'); 50 | end 51 | end 52 | end 53 | 54 | end 55 | -------------------------------------------------------------------------------- /RunClusteringSIDL.m: -------------------------------------------------------------------------------- 1 | function RunClusteringSIDL(DataSetStartIndex, DataSetEndIndex, lambda, r) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | Results = zeros(length(Datasets),2); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | for rep = 1 : 10 21 | rep 22 | rng(rep); 23 | 24 | 25 | % Extract Sample Points 26 | 27 | ZRep = dlmread( strcat( 'SIDLREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '_L_', num2str(lambda), '_R_', num2str(r) ,'.Zrep') ); 28 | 29 | tic; 30 | 31 | [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1); 32 | 33 | ClusteringTime = toc; 34 | 35 | RI = RandIndex(mem, DS.DataClassLabels); 36 | 37 | % Evaluate SmplPoints in terms of clustering 38 | % measures (e.g., SSE, RandIndex, NystromAppx) 39 | 40 | ResultsTmp = [RI,ClusteringTime]; 41 | 42 | % 43 | Results(i,:) = Results(i,:) + ResultsTmp; 44 | end 45 | Results(i,:) = Results(i,:) ./ 10; 46 | 47 | dlmwrite( strcat( 'RunClusteringSIDL/','RunClusteringSIDL','_L_', num2str(lambda), '_R_', num2str(r), '_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 48 | 49 | end 50 | 51 | end 52 | 53 | end -------------------------------------------------------------------------------- /RunKMCompSINKSPLIT.m: -------------------------------------------------------------------------------- 1 | function RunKMCompSINKSPLIT(DataSetStartIndex, DataSetEndIndex, TrainKM, sigma) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, ~] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | Results = zeros(length(Datasets),4); 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | 19 | DS = LoadUCRdataset(char(Datasets(i))); 20 | 21 | if (TrainKM==1) 22 | 23 | tic; 24 | [KMTrain, DistComp] = KMCompSINK_TrainToTrain(DS.Train, sigma); 25 | 26 | Results(i,1) = DistComp; 27 | Results(i,2) = toc; 28 | 29 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKSPLIT/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Sigma_', num2str(sigma) ,'_TRAIN.kernelmatrix'), KMTrain, 'delimiter', '\t'); 30 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKSPLIT/', 'RunKMCompSINKSPLIT_TrainToTrain_Sigma_', num2str(sigma), '_TrainToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t'); 31 | 32 | 33 | else 34 | tic; 35 | [KMTestToTrain, DistComp2] = KMCompSINK_TestToTrain(DS.Test,DS.Train,sigma); 36 | 37 | Results(i,3) = DistComp2; 38 | Results(i,4) = toc; 39 | 40 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesSINKSPLIT/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Sigma_', num2str(sigma) ,'_TESTTOTRAIN.kernelmatrix'), KMTestToTrain, 'delimiter', '\t'); 41 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompSINKSPLIT/', 'RunKMCompSINKSPLIT_TestToTrain_Sigma_', num2str(sigma), '_TestToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t'); 42 | 43 | 44 | end 45 | 46 | end 47 | 48 | end 49 | 50 | end -------------------------------------------------------------------------------- /RunLOOCandOneNNDTW.m: -------------------------------------------------------------------------------- 1 | function RunLOOCandOneNNDTW(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | LeaveOneOutAccuracies = zeros(length(Datasets),20); 16 | LeaveOneOutRuntimes = zeros(length(Datasets),20); 17 | 18 | Results = zeros(length(Datasets),6); 19 | 20 | disp(['Dataset being processed: ', char(Datasets(i))]); 21 | DS = LoadUCRdataset(char(Datasets(i))); 22 | 23 | TSLength = length(DS.Data(1,:)); 24 | 25 | for gamma=1:20 26 | 27 | gammaTmp = gamma-1 28 | window = floor(gammaTmp/100 * TSLength); 29 | tic; 30 | acc = LOOClassifierDTW(DS,window); 31 | LeaveOneOutRuntimes(i,gamma) = toc; 32 | LeaveOneOutAccuracies(i,gamma) = acc; 33 | end 34 | 35 | [MaxLeaveOneOutAcc,MaxLeaveOneOutAccGamma] = max(LeaveOneOutAccuracies(i,:)); 36 | 37 | tic; 38 | window = floor((MaxLeaveOneOutAccGamma-1)/100 * TSLength); 39 | OneNNAcc = OneNNClassifierDTW(DS,window); 40 | 41 | Results(i,1) = MaxLeaveOneOutAccGamma-1; 42 | Results(i,2) = MaxLeaveOneOutAcc; 43 | Results(i,3) = LeaveOneOutRuntimes(i,MaxLeaveOneOutAccGamma); 44 | Results(i,4) = sum(LeaveOneOutRuntimes(i,:)); 45 | Results(i,5) = OneNNAcc; 46 | Results(i,6) = toc; 47 | 48 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/VLDBGRAIL/RunLOOCandOneNNDTW/', 'RunLOOCandOneNNDTW_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 49 | 50 | end 51 | 52 | 53 | end 54 | 55 | end -------------------------------------------------------------------------------- /TestVarianceApproximate.m: -------------------------------------------------------------------------------- 1 | function Results = TestVarianceApproximate(Dictionary) 2 | 3 | [nrowsDic, ncolumnsDic] = size(Dictionary); 4 | 5 | W = zeros(nrowsDic,nrowsDic); 6 | 7 | Var4Gamma = zeros(1,20); 8 | VarExplained20 = zeros(1,20); 9 | 10 | DimFor98 = zeros(1,20); 11 | DimFor95 = zeros(1,20); 12 | DimFor90 = zeros(1,20); 13 | DimFor85 = zeros(1,20); 14 | DimFor80 = zeros(1,20); 15 | 16 | for g=1:20 17 | g 18 | Wtmp = []; 19 | for i=1:nrowsDic 20 | %disp(i); 21 | for j=1:nrowsDic 22 | W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),g); 23 | end 24 | Wtmp = [Wtmp, W(i,:)]; 25 | end 26 | 27 | Var4Gamma(g)=var(Wtmp); 28 | 29 | [Q,L] = eig(W); 30 | 31 | eigValue=diag(L); 32 | [~,IX]=sort(eigValue,'descend'); 33 | eigVector=Q(:,IX); 34 | eigValue=eigValue(IX); 35 | 36 | VarExplainedCumSum = cumsum(eigValue)/sum(eigValue); 37 | 38 | VarExplained20(g) = VarExplainedCumSum(20); 39 | 40 | DimFor98(g) = find(VarExplainedCumSum>=0.98,1); 41 | DimFor95(g) = find(VarExplainedCumSum>=0.95,1); 42 | DimFor90(g) = find(VarExplainedCumSum>=0.90,1); 43 | DimFor85(g) = find(VarExplainedCumSum>=0.85,1); 44 | DimFor80(g) = find(VarExplainedCumSum>=0.80,1); 45 | end 46 | 47 | VarByVarExplained20 = Var4Gamma.*VarExplained20; 48 | 49 | [~, GammaForMaxVariance] = max(Var4Gamma); 50 | [~, GammaForMaxVarByVarExplained20] = max(VarByVarExplained20); 51 | 52 | Results = []; 53 | 54 | MaxVarExpained20 = VarExplained20(GammaForMaxVariance); 55 | MaxVarDimFor98 = DimFor98(GammaForMaxVariance); 56 | MaxVarDimFor95 = DimFor95(GammaForMaxVariance); 57 | MaxVarDimFor90 = DimFor90(GammaForMaxVariance); 58 | MaxVarDimFor85 = DimFor85(GammaForMaxVariance); 59 | MaxVarDimFor80 = DimFor80(GammaForMaxVariance); 60 | 61 | Results = [Results,GammaForMaxVariance,MaxVarExpained20,MaxVarDimFor98,MaxVarDimFor95,MaxVarDimFor90,MaxVarDimFor85,MaxVarDimFor80]; 62 | 63 | MaxVarByVarExpained20 = VarExplained20(GammaForMaxVarByVarExplained20); 64 | MaxVarByVarExpDimFor98 = DimFor98(GammaForMaxVarByVarExplained20); 65 | MaxVarByVarExpDimFor95 = DimFor95(GammaForMaxVarByVarExplained20); 66 | MaxVarByVarExpDimFor90 = DimFor90(GammaForMaxVarByVarExplained20); 67 | MaxVarByVarExpDimFor85 = DimFor85(GammaForMaxVarByVarExplained20); 68 | MaxVarByVarExpDimFor80 = DimFor80(GammaForMaxVarByVarExplained20); 69 | 70 | Results = [Results,GammaForMaxVarByVarExplained20,MaxVarByVarExpained20,MaxVarByVarExpDimFor98,MaxVarByVarExpDimFor95,MaxVarByVarExpDimFor90,MaxVarByVarExpDimFor85,MaxVarByVarExpDimFor80]; 71 | 72 | end -------------------------------------------------------------------------------- /SIDL/update_S.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | function S = update_S(X, S, A, Offsets, lambda, c, maxIter, epsilon) 26 | % X: n x p 27 | % S: K x q 28 | % A: n x K 29 | % Offsets: n x K 30 | 31 | [n, p] = size(X); 32 | 33 | [K, q] = size(S); 34 | 35 | 36 | F_obj = []; 37 | 38 | for iter = 1:maxIter 39 | for k=1:K % optimize s_k 40 | M_k = norm(A(:,k))^2; 41 | if M_k == 0 % inactive bases, no need to update 42 | continue 43 | end 44 | 45 | s_k = 0; 46 | 47 | for i=1:n 48 | temp_a = A(i,:); 49 | temp_a(k) = 0; 50 | shifted_S = op_shift(S, Offsets(i,:), p); 51 | xi_residue = X(i,:) - temp_a * shifted_S; 52 | 53 | t_ik = Offsets(i,k); 54 | s_k = s_k + A(i,k) * xi_residue(1+t_ik:q+t_ik); 55 | end 56 | 57 | % compute s_k 58 | 59 | if M_k <= norm(s_k) / sqrt(c) 60 | s_k = sqrt(c) / norm(s_k) * s_k; 61 | else 62 | s_k = s_k / M_k; 63 | end 64 | 65 | S(k,:) = s_k; 66 | 67 | end 68 | 69 | F_all = unsup_obj(X, S, A, Offsets, lambda); 70 | %fprintf('Current F_all: %f\n', F_all); 71 | F_obj(end+1) = F_all; 72 | if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon 73 | %fprintf('Updating S: Converged!\n\n'); 74 | return 75 | end 76 | end 77 | 78 | 79 | %fprintf('Updating S: Reached max iter.\n\n'); 80 | end 81 | -------------------------------------------------------------------------------- /RunClusteringRWS.m: -------------------------------------------------------------------------------- 1 | function RunClusteringRWS(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | Results = zeros(length(Datasets),2); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | disp(['Dataset being processed: ', char(Datasets(i))]); 18 | DS = LoadUCRdataset(char(Datasets(i))); 19 | 20 | for rep = 1 : 10 21 | rep 22 | rng(rep); 23 | 24 | 25 | % Extract Sample Points 26 | 27 | %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep') ); 28 | %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised', '.Zrep') ); 29 | 30 | %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma0.001_DMax25', '.Zrep') ); 31 | %ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1_DMax100', '.Zrep') ); 32 | ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_UNSupervised_Sigma1000_DMax25', '.Zrep') ); 33 | 34 | tic; 35 | 36 | [mem cent] = kmeans(ZRep, length(DS.ClassNames),'Replicates',1); 37 | 38 | ClusteringTime = toc; 39 | 40 | RI = RandIndex(mem, DS.DataClassLabels); 41 | 42 | % Evaluate SmplPoints in terms of clustering 43 | % measures (e.g., SSE, RandIndex, NystromAppx) 44 | 45 | ResultsTmp = [RI,ClusteringTime]; 46 | 47 | % 48 | Results(i,:) = Results(i,:) + ResultsTmp; 49 | end 50 | Results(i,:) = Results(i,:) ./ 10; 51 | 52 | dlmwrite( strcat( 'RunClusteringRWS/','RunClusteringRWS_UNSupervised_Sigma1000_DMax25_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 53 | 54 | end 55 | 56 | end 57 | 58 | end -------------------------------------------------------------------------------- /DatasetToFourier.m: -------------------------------------------------------------------------------- 1 | function DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile) 2 | % zscore and zeropad time-series to 2x length and replace Data, Train and 3 | % Test with dft coefficients. Call after ds = LoadUCRDatasets 4 | % Optionally fills .F with number of coeffs needed for energy>eta/s 5 | 6 | DS.Train = DS.Train ./ norm(DS.Train(1,:)); 7 | DS.Test = DS.Test ./ norm(DS.Test(1,:)); 8 | DS.Data = DS.Data ./ norm(DS.Data(1,:)); 9 | 10 | DSFourier = DS; 11 | 12 | 13 | % Compute DFT of the data 14 | DSFourier.len = length(DS.Data(1,:)); 15 | DSFourier.fftlength = 2^nextpow2(2*DSFourier.len-1); 16 | %TrainTemp = [zeros(DS.TrainInstancesCount,floor((DSFourier.fftlength-DSFourier.len)/2)),DS.Train,zeros(DS.TrainInstancesCount,ceil((DSFourier.fftlength-DSFourier.len)/2))]; 17 | %TestTemp = [zeros(DS.TestInstancesCount,floor((DSFourier.fftlength-DSFourier.len)/2)),DS.Test,zeros(DS.TestInstancesCount,ceil((DSFourier.fftlength-DSFourier.len)/2))]; 18 | %DSFourier.TrainFourier = fft(TrainTemp,[],2); 19 | %DSFourier.TestFourier = fft(TestTemp,[],2); 20 | DSFourier.fftlength = 2^nextpow2(2*DSFourier.len-1); 21 | DSFourier.TrainFourier = fft(DS.Train,DSFourier.fftlength,2); 22 | DSFourier.TestFourier = fft(DS.Test,DSFourier.fftlength,2); 23 | DSFourier.DataFourier = [DSFourier.TrainFourier; DSFourier.TestFourier]; 24 | 25 | % Preserve Percent of Energy in Fourier space of each time series 26 | 27 | E = cumsum(abs(DSFourier.DataFourier) .^ 2, 2); % Energy is squared abs 28 | E = bsxfun(@rdivide, E, E(:, end)); 29 | DSFourier.DataCoeffsUntilEnergy = zeros(size(DSFourier.DataFourier,1),length(FourierEnergy)); 30 | for i = 1:size(DSFourier.DataCoeffsUntilEnergy,1) 31 | for j = 1:size(DSFourier.DataCoeffsUntilEnergy, 2) 32 | % find first coefficient that exceeds eta/2 - due to symmetry as we 33 | % give the full DFT and not half of it 34 | DSFourier.DataCoeffsUntilEnergy(i, j) = find(E(i, :) >= FourierEnergy(j)/2, 1); 35 | end 36 | end 37 | 38 | % Keep number of coefficients across all time series so that you preserve 39 | % at least FourierEnergy for DatasetPercentile specified 40 | 41 | DSFourier.NumCoeffs = ceil(prctile(DSFourier.DataCoeffsUntilEnergy,DatasetPercentile)); 42 | 43 | DSFourier.TrainFourierCompressed = leading_fourier(DSFourier.TrainFourier, DSFourier.NumCoeffs); 44 | DSFourier.TestFourierCompressed = leading_fourier(DSFourier.TestFourier, DSFourier.NumCoeffs); 45 | DSFourier.DataFourierCompressed = [DSFourier.TrainFourierCompressed; DSFourier.TestFourierCompressed]; 46 | 47 | end 48 | 49 | function x = leading_fourier(x, k) 50 | % leading_fourier(x,k) returns leading k and trailing k-1 (real is symmetric) coeffs 51 | % by zeroing out middle window and renormalizing 52 | m = floor(size(x, 2) / 2) + 1; 53 | x(:, (k+1):(m - 1 + m - k)) = 0; 54 | end -------------------------------------------------------------------------------- /RunTestVarianceApproximate.m: -------------------------------------------------------------------------------- 1 | function RunTestVarianceApproximate(DataSetStartIndex, DataSetEndIndex, RepStartIndex, RepEndIndex, Method) 2 | 3 | Methods = [cellstr('Random'), 'KShape']; 4 | 5 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 6 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 7 | Datasets = {dir_struct(3:130).name}; 8 | 9 | % Sort Datasets 10 | [Datasets, DSOrder] = sort(Datasets); 11 | 12 | for i = 1:length(Datasets) 13 | 14 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 15 | 16 | disp(['Dataset being processed: ', char(Datasets(i))]); 17 | DS = LoadUCRdataset(char(Datasets(i))); 18 | 19 | for rep = 1 : 10 20 | 21 | rep 22 | rng(rep); 23 | 24 | if (rep>=RepStartIndex && rep<=RepEndIndex) 25 | 26 | if Method==1 27 | Dictionary = dlmread( strcat( 'DICTIONARIESRANDOM/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') ); 28 | elseif Method==2 29 | Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') ); 30 | end 31 | 32 | tic; 33 | TestVarianceResults = TestVarianceApproximate(Dictionary); 34 | RunTime = toc; 35 | 36 | % GammaForMaxVarByVarExplained20 37 | gamma = TestVarianceResults(8); 38 | 39 | Results = [gamma,RunTime]; 40 | 41 | dlmwrite( strcat( 'RunTestVarianceApproximate/', 'RESULTS_RunTestVarianceApproximate_', char(Datasets(i)), '_', char(Methods(Method)), '_',num2str(rep) ,'.Results'), Results, 'delimiter', '\t'); 42 | dlmwrite( strcat( 'RunTestVarianceApproximate/', 'RESULTS_RunTestVarianceApproximate_', char(Datasets(i)), '_', char(Methods(Method)), '_',num2str(rep) ,'.TestVarianceResults'), TestVarianceResults, 'delimiter', '\t'); 43 | 44 | 45 | 46 | end 47 | 48 | end 49 | end 50 | 51 | 52 | end 53 | 54 | 55 | end -------------------------------------------------------------------------------- /RepLearnFINAL.m: -------------------------------------------------------------------------------- 1 | function [Zexact, Ztop5, Ztop10, Ztop20, Z99per, Z98per, Z97per, Z95per, Z90per, Z85per, Z80per, DistComp, RuntimeNystrom, RuntimeFD]=RepLearnFINAL(X, Dictionary, gamma) 2 | % Input 3 | % X: original data (nxm) 4 | % Dictionary: kShape's centroids (cxm) or randomly chosen time series 5 | % Dim: Dimensions to keep in the end over the learned representation 6 | % gamma: kernel's parameter 7 | % Output 8 | % Ktilde: Approximated kernel matrix (nxn) 9 | % Z: New learned representation (nxDim) 10 | tic; 11 | DistComp = 0; 12 | 13 | [nrowsX, ncolumnsX] = size(X); 14 | [nrowsDic, ncolumnsDic] = size(Dictionary); 15 | 16 | W = zeros(nrowsDic,nrowsDic); 17 | 18 | for i=1:nrowsDic 19 | for j=1:nrowsDic 20 | W(i,j) = SINK(Dictionary(i,:),Dictionary(j,:),gamma); 21 | DistComp = DistComp + 1; 22 | end 23 | end 24 | 25 | E = zeros(nrowsX,nrowsDic); 26 | 27 | for i=1:nrowsX 28 | disp(i); 29 | for j=1:nrowsDic 30 | E(i,j) = SINK(X(i,:),Dictionary(j,:),gamma); 31 | DistComp = DistComp + 1; 32 | end 33 | end 34 | 35 | [Ve, Va] = eig(W); 36 | va = diag(Va); 37 | inVa = diag(va.^(-0.5)); 38 | Zexact = E * Ve * inVa; 39 | 40 | RuntimeNystrom = toc; 41 | 42 | Zexact = CheckNaNInfComplex(Zexact); 43 | 44 | tic; 45 | [BSketch, ~] = FrequentDirections(Zexact, ceil(0.5*size(Zexact,2))); 46 | 47 | [V2, L2] = eig(BSketch'*BSketch); 48 | %[V2, L2] = eig(Zexact'*Zexact); 49 | eigvalue = diag(L2); 50 | [dump, index] = sort(-eigvalue); 51 | eigvalue = eigvalue(index); 52 | V2 = V2(:, index); 53 | 54 | RuntimeFD = toc; 55 | 56 | VarExplainedCumSum = cumsum(eigvalue)/sum(eigvalue); 57 | 58 | DimFor99 = find(VarExplainedCumSum>=0.99,1); 59 | DimFor98 = find(VarExplainedCumSum>=0.98,1); 60 | DimFor97 = find(VarExplainedCumSum>=0.97,1); 61 | DimFor95 = find(VarExplainedCumSum>=0.95,1); 62 | DimFor90 = find(VarExplainedCumSum>=0.90,1); 63 | DimFor85 = find(VarExplainedCumSum>=0.85,1); 64 | DimFor80 = find(VarExplainedCumSum>=0.80,1); 65 | 66 | Ztop5 = CheckNaNInfComplex( Zexact*V2(:,1:5) ); 67 | Ztop10 = CheckNaNInfComplex( Zexact*V2(:,1:10) ); 68 | Ztop20 = CheckNaNInfComplex( Zexact*V2(:,1:20) ); 69 | 70 | Z99per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor99) ); 71 | Z98per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor98) ); 72 | Z97per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor97) ); 73 | Z95per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor95) ); 74 | Z90per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor90) ); 75 | Z85per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor85) ); 76 | Z80per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor80) ); 77 | 78 | end 79 | 80 | function Z = CheckNaNInfComplex(Z) 81 | 82 | for i=1:size(Z,1) 83 | for j=1:size(Z,2) 84 | if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 85 | Z(i,j)=0; 86 | end 87 | end 88 | end 89 | 90 | end -------------------------------------------------------------------------------- /SIDL/USIDL.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | function [S, A, Offsets, F_obj] = USIDL(X, y, lambda, K, q, c, epsilon, maxIter, maxInnerIter, runid) 26 | % function [S, A, Offsets, F_obj] = USIDL(X, y, lambda, K, q, c, epsilon, maxIter, maxInnerIter) 27 | % 28 | % X: n x p, training data, n times series with length p 29 | % y: binary label (-1, 1), not used in this model, just for plotting 30 | % lambda: regularization parameter for l1 norm 31 | % K: number of basis 32 | % q: length of basis over 33 | % c: Squared L2-norm of basis, i.e., ||s_k||^2 <= c 34 | % epsilon: epsilon 35 | % maxIter: maximum outter iterations 36 | % maxInnerIter: maximum inner iterations 37 | % runid: magic string prefix for plotting 38 | % 39 | % Returns: S: learned basis 40 | % A: coefficients for training data 41 | % Offsets: matched location of the basis 42 | % F_obj: array of objective values 43 | 44 | [n,p] = size(X); 45 | 46 | S = randn(K, q); % initialize bases 47 | A = randn(n, K); % basis initializations 48 | Offsets = randi([0, p-q], n, K); % initialize offsets 49 | 50 | F_obj = []; 51 | 52 | for iter =1:maxIter 53 | % update coefficients and matching offsets 54 | [A, Offsets] = update_A_par(X, S, A, Offsets, lambda, maxInnerIter, epsilon); 55 | 56 | % update bases 57 | S = update_S(X, S, A, Offsets, lambda, c, maxInnerIter, epsilon); 58 | 59 | % check convergence 60 | F_all = unsup_obj(X, S, A, Offsets, lambda); 61 | 62 | F_obj(end+1) = F_all; 63 | if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon 64 | fprintf('Converged!\n'); 65 | return 66 | end 67 | 68 | end 69 | fprintf('Maximum Iteration Reached!\n'); 70 | end 71 | -------------------------------------------------------------------------------- /RepLearnFINALSINKComp.m: -------------------------------------------------------------------------------- 1 | function [Zexact, Ztop5, Ztop10, Ztop20, Z99per, Z98per, Z97per, Z95per, Z90per, Z85per, Z80per, DistComp, RuntimeNystrom, RuntimeFD]=RepLearnFINALSINKComp(X, Dictionary, gamma, coeffs) 2 | % Input 3 | % X: original data (nxm) 4 | % Dictionary: kShape's centroids (cxm) or randomly chosen time series 5 | % Dim: Dimensions to keep in the end over the learned representation 6 | % gamma: kernel's parameter 7 | % Output 8 | % Ktilde: Approximated kernel matrix (nxn) 9 | % Z: New learned representation (nxDim) 10 | tic; 11 | DistComp = 0; 12 | 13 | [nrowsX, ncolumnsX] = size(X); 14 | [nrowsDic, ncolumnsDic] = size(Dictionary); 15 | 16 | W = zeros(nrowsDic,nrowsDic); 17 | 18 | for i=1:nrowsDic 19 | for j=1:nrowsDic 20 | W(i,j) = SINKCompressed(Dictionary(i,:),Dictionary(j,:),gamma,coeffs); 21 | DistComp = DistComp + 1; 22 | end 23 | end 24 | 25 | E = zeros(nrowsX,nrowsDic); 26 | 27 | for i=1:nrowsX 28 | disp(i); 29 | for j=1:nrowsDic 30 | E(i,j) = SINKCompressed(X(i,:),Dictionary(j,:),gamma,coeffs); 31 | DistComp = DistComp + 1; 32 | end 33 | end 34 | 35 | [Ve, Va] = eig(W); 36 | va = diag(Va); 37 | inVa = diag(va.^(-0.5)); 38 | Zexact = E * Ve * inVa; 39 | 40 | RuntimeNystrom = toc; 41 | 42 | Zexact = CheckNaNInfComplex(Zexact); 43 | 44 | tic; 45 | [BSketch, ~] = FrequentDirections(Zexact, ceil(0.5*size(Zexact,2))); 46 | 47 | [V2, L2] = eig(BSketch'*BSketch); 48 | %[V2, L2] = eig(Zexact'*Zexact); 49 | eigvalue = diag(L2); 50 | [dump, index] = sort(-eigvalue); 51 | eigvalue = eigvalue(index); 52 | V2 = V2(:, index); 53 | 54 | RuntimeFD = toc; 55 | 56 | VarExplainedCumSum = cumsum(eigvalue)/sum(eigvalue); 57 | 58 | DimFor99 = find(VarExplainedCumSum>=0.99,1); 59 | DimFor98 = find(VarExplainedCumSum>=0.98,1); 60 | DimFor97 = find(VarExplainedCumSum>=0.97,1); 61 | DimFor95 = find(VarExplainedCumSum>=0.95,1); 62 | DimFor90 = find(VarExplainedCumSum>=0.90,1); 63 | DimFor85 = find(VarExplainedCumSum>=0.85,1); 64 | DimFor80 = find(VarExplainedCumSum>=0.80,1); 65 | 66 | Ztop5 = CheckNaNInfComplex( Zexact*V2(:,1:5) ); 67 | Ztop10 = CheckNaNInfComplex( Zexact*V2(:,1:10) ); 68 | Ztop20 = CheckNaNInfComplex( Zexact*V2(:,1:20) ); 69 | 70 | Z99per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor99) ); 71 | Z98per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor98) ); 72 | Z97per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor97) ); 73 | Z95per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor95) ); 74 | Z90per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor90) ); 75 | Z85per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor85) ); 76 | Z80per = CheckNaNInfComplex( Zexact*V2(:,1:DimFor80) ); 77 | 78 | end 79 | 80 | function Z = CheckNaNInfComplex(Z) 81 | 82 | for i=1:size(Z,1) 83 | for j=1:size(Z,2) 84 | if (isnan(Z(i,j)) || isinf(Z(i,j)) || ~isreal(Z(i,j))) 85 | Z(i,j)=0; 86 | end 87 | end 88 | end 89 | 90 | end -------------------------------------------------------------------------------- /RunKMCompGAK.m: -------------------------------------------------------------------------------- 1 | function RunKMCompGAK(DataSetStartIndex, DataSetEndIndex, TrainKM, sigma) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, ~] = sort(Datasets); 10 | 11 | disp(sigma); 12 | 13 | rng(DataSetStartIndex*sigma); 14 | pause(180*rand); 15 | 16 | distcomp.feature( 'LocalUseMpiexec', false ) 17 | 18 | poolobj = gcp('nocreate'); 19 | delete(poolobj); 20 | 21 | parpool(20); 22 | 23 | for i = 1:length(Datasets) 24 | 25 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 26 | 27 | Results = zeros(length(Datasets),4); 28 | 29 | disp(['Dataset being processed: ', char(Datasets(i))]); 30 | 31 | DS = LoadUCRdataset(char(Datasets(i))); 32 | 33 | % Sampling to estimate sigma appropriately 34 | dists = []; 35 | for l=1:20 36 | rng(l); 37 | x = DS.Train(ceil(rand*DS.TrainInstancesCount),:); 38 | y = DS.Train(ceil(rand*DS.TrainInstancesCount),:); 39 | w = []; 40 | for p=1:length(DS.Train(1,:)) 41 | w(p)= ED(x(p),y(p)); 42 | end 43 | dists=[dists,w]; 44 | end 45 | 46 | sigma2 = sigma*median(dists)*sqrt(length(DS.Train(1,:))); 47 | 48 | 49 | if (TrainKM==1) 50 | 51 | tic; 52 | [KMTrain, DistComp] = KMCompGAK(DS.Train,sigma2); 53 | Results(i,1) = DistComp; 54 | Results(i,2) = toc; 55 | 56 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesGAK/',char(Datasets(i)),'/', char(Datasets(i)), '_GAK_Sigma_', num2str(sigma) ,'_TRAIN.kernelmatrix'), KMTrain, 'delimiter', '\t'); 57 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompGAK/', 'RunKMCompGAK_GAK_TrainToTrain_Sigma_', num2str(sigma), '_TrainToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t'); 58 | 59 | 60 | else 61 | tic; 62 | [KMTestToTrain, DistComp2]= KMCompGAK_TestToTrain(DS.Test,DS.Train,sigma2); 63 | 64 | Results(i,3) = DistComp2; 65 | Results(i,4) = toc; 66 | 67 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/KernelMatricesGAK/',char(Datasets(i)),'/', char(Datasets(i)), '_GAK_Sigma_', num2str(sigma) ,'_TESTTOTRAIN.kernelmatrix'), KMTestToTrain, 'delimiter', '\t'); 68 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunKMCompGAK/', 'RunKMCompGAK_GAK_TestToTrain_Sigma_', num2str(sigma), '_TestToTrain_Dataset_' , num2str(i) ), Results, 'delimiter', '\t'); 69 | 70 | 71 | end 72 | 73 | end 74 | 75 | end 76 | 77 | poolobj = gcp('nocreate'); 78 | delete(poolobj); 79 | 80 | end -------------------------------------------------------------------------------- /SIDL/main_example.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | rng(1); 26 | 27 | dataset_name = 'Trace'; 28 | 29 | train_file = strcat(dataset_name, '_TRAIN'); 30 | test_file = strcat(dataset_name, '_TEST'); 31 | 32 | train_data = load(train_file); 33 | test_data = load(test_file); 34 | 35 | 36 | train_X = train_data(:, [2:end]); 37 | test_X = test_data(:, [2:end]); 38 | 39 | train_y = train_data(:, 1); 40 | test_y = test_data(:, 1); 41 | 42 | [n_train, p] = size(train_X); 43 | [n_test, p] = size(test_X); 44 | 45 | c = 100; 46 | epsilon = 1e-5; 47 | maxIter = 1e3; 48 | maxInnerIter = 5; 49 | 50 | % loop through a set of variables 51 | Ks = [20];%, 20, 50, 100]; 52 | lambdas = [1];%0.1, 1, 10, 100]; 53 | rs = [0.25];%, 0.5, 0.25]; 54 | 55 | for K = Ks 56 | for lambda = lambdas 57 | A_rand_init = randn(n_test, K); 58 | for r = rs 59 | q = ceil(p*r); 60 | % run id 61 | runid = strcat(dataset_name, '_l_', num2str(lambda), '_K_', num2str(K), '_q_', num2str(q)); 62 | 63 | % train SIDL on training set 64 | tic; 65 | [S, A, Offsets] = USIDL(train_X, train_y, lambda, K, q, c, epsilon, maxIter, maxInnerIter, runid); 66 | 67 | learn_time = toc; 68 | fprintf('\n##### TRAINING TIME on TRAIN SET (K=%f, lambda=%f, r=%f): %f secs.\n\n', K, lambda, r, learn_time); 69 | 70 | % learn sparse coding on test set with dictionary learned from training set 71 | A_test = A_rand_init; 72 | Offsets_test = randi([0, p-q], n_test, K); 73 | tic; 74 | [A_test, Offsets_test, F_all] = update_A_par(test_X, S, A_test, Offsets_test, lambda, maxIter, epsilon); 75 | fit_time = toc; 76 | 77 | % get reconstruciton for SIDL 78 | test_recons_error_sidl = unsup_obj(test_X, S, A_test, Offsets_test, 0) / n_test; 79 | 80 | fprintf('\n\n##### RECONS ERROR on TEST SET (K=%f, lambda=%f) SIDL (r=%f): %f\n\n', K, lambda, r, test_recons_error_sidl); 81 | 82 | save(runid); 83 | end 84 | end 85 | end 86 | 87 | -------------------------------------------------------------------------------- /RunOneNNSINKCompressed.m: -------------------------------------------------------------------------------- 1 | function RunOneNNSINKCompressed(DataSetStartIndex, DataSetEndIndex, FourierEnergy, DatasetPercentile) 2 | % FourierEnergy is percentage e.g., 0.99 3 | % DatasetPercentile is percentage in the form of 99, 95 etc. 4 | 5 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 6 | dir_struct = dir('/rigel/dsi/users/ikp2103/JOPA/GRAIL2/UCR2018/'); 7 | Datasets = {dir_struct(3:130).name}; 8 | 9 | % Sort Datasets 10 | 11 | [Datasets, DSOrder] = sort(Datasets); 12 | 13 | for i = 1:length(Datasets) 14 | 15 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 16 | 17 | LeaveOneOutAccuracies = zeros(length(Datasets),20); 18 | LeaveOneOutRuntimes = zeros(length(Datasets),20); 19 | 20 | Results = zeros(length(Datasets),10); 21 | 22 | disp(['Dataset being processed: ', char(Datasets(i))]); 23 | DS = LoadUCRdataset(char(Datasets(i))); 24 | 25 | tic; 26 | 27 | DSFourier = DatasetToFourier(DS, FourierEnergy, DatasetPercentile); 28 | 29 | RTPreprocessing = toc; 30 | 31 | gammaValues = 1:20; 32 | 33 | %parfor gamma = 1:20 34 | for gammaIter = 1:20 35 | 36 | gammaIter 37 | tic; 38 | acc = LOOCSINKCompressed(DSFourier,gammaValues(gammaIter)); 39 | LeaveOneOutRuntimes(i,gammaIter) = toc; 40 | LeaveOneOutAccuracies(i,gammaIter) = acc; 41 | end 42 | 43 | [MaxLeaveOneOutAcc,MaxLeaveOneOutAccGamma] = max(LeaveOneOutAccuracies(i,:)); 44 | 45 | tic; 46 | OneNNAcc = OneNNClassifierSINKCompressed(DSFourier, gammaValues(MaxLeaveOneOutAccGamma)); 47 | 48 | RTOneNN = toc; 49 | 50 | Results(i,1) = DSFourier.len; 51 | Results(i,2) = DSFourier.fftlength; 52 | Results(i,3) = DSFourier.NumCoeffs; 53 | 54 | Results(i,4) = RTPreprocessing; 55 | 56 | Results(i,5) = gammaValues(MaxLeaveOneOutAccGamma); 57 | Results(i,6) = MaxLeaveOneOutAcc; 58 | Results(i,7) = LeaveOneOutRuntimes(i,MaxLeaveOneOutAccGamma); 59 | Results(i,8) = sum(LeaveOneOutRuntimes(i,:)); 60 | Results(i,9) = RTOneNN; 61 | Results(i,10) = OneNNAcc; 62 | 63 | dlmwrite( strcat('/rigel/dsi/users/ikp2103/JOPA/GRAIL2/RunOneNNSINKCompressed/', 'RESULTS_RunOneNNSINKCompressed_FourierEnergy_', num2str(FourierEnergy), '_DatasetPercentile_', num2str(DatasetPercentile), '_Dataset_' ,num2str(i)), Results, 'delimiter', '\t'); 64 | 65 | end 66 | 67 | 68 | end 69 | 70 | end -------------------------------------------------------------------------------- /SIDL/update_A_par.m: -------------------------------------------------------------------------------- 1 | %{ 2 | The MIT License (MIT) 3 | Copyright (c) 2016 Guoqing Zheng 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies of the Software, including modified versions of the software, 14 | and substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | %} 24 | 25 | function [A, Offsets, F_all] = update_A_par(X, S, A, Offsets, lambda, maxIter, epsilon) 26 | % X: n x p 27 | % S: K x q 28 | % A: n x K 29 | % Offsets: n x K 30 | 31 | [n,p] = size(X); 32 | [KK,q] = size(S); 33 | seg_idx = bsxfun(@plus, repmat([1:q], p-q+1, 1), [0:p-q]'); 34 | 35 | F_obj = []; 36 | for iter = 1:maxIter 37 | for i=1:n % compute activation and matching offset for X_i 38 | x = X(i,:); 39 | offs = Offsets(i,:); 40 | shifted_S = op_shift(S, offs, p); 41 | 42 | %for k=1:KK % compute for base k % RAND PERM DOESN'T HURT 43 | for k=randperm(KK) % compute for base k % RAND PERM DOESN'T HURT 44 | base = S(k,:); 45 | temp_a = A(i,:); 46 | temp_a(k) = 0; % exclude alpha_k 47 | 48 | x_residue = x - temp_a * shifted_S; 49 | residue_norm2 = norm(x_residue)^2; 50 | base_norm2 = norm(base)^2; %||s_k||^2 51 | 52 | segs = x_residue(seg_idx); 53 | dot_prods = segs * base'; 54 | 55 | [M_dp, M_idx] = max(abs(dot_prods)); 56 | 57 | if M_dp <= lambda 58 | a_k_star = 0; 59 | else 60 | a_k_star = sign(dot_prods(M_idx)) * (M_dp - lambda) / base_norm2; 61 | t_k_star = M_idx -1; 62 | end 63 | 64 | A(i, k) = a_k_star; 65 | if a_k_star ~= 0 66 | % shifted_base = op_shift(base, t_k_star, p); 67 | shifted_S(k,:) = 0; 68 | shifted_S(k, [t_k_star + 1: t_k_star + q]) = base; 69 | % shifted_S(k,:) = shifted_base; 70 | Offsets(i, k) = t_k_star; 71 | end 72 | end 73 | 74 | end 75 | 76 | F_all = unsup_obj(X, S, A, Offsets, lambda); 77 | %fprintf('Current F_all: %f\n', F_all); 78 | F_obj(end+1) = F_all; 79 | if length(F_obj) > 1 & abs(F_obj(end) - F_obj(end-1)) / F_obj(end-1) < epsilon 80 | %fprintf('Updating A: Converged!\n\n'); 81 | return 82 | end 83 | 84 | end 85 | 86 | %fprintf('Updating A: Reached max iter.\n\n'); 87 | end 88 | 89 | -------------------------------------------------------------------------------- /RunTestVarianceExact.m: -------------------------------------------------------------------------------- 1 | function RunTestVarianceExact(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | [Datasets, DSOrder] = sort(Datasets); 9 | 10 | Results = zeros(length(Datasets),55); 11 | 12 | %rng(ceil(DataSetStartIndex*100)) 13 | %pause(300*rand); 14 | 15 | poolobj = gcp('nocreate'); 16 | delete(poolobj); 17 | 18 | parpool(20); 19 | 20 | for i = 1:length(Datasets) 21 | 22 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 23 | 24 | disp(['Dataset being processed: ', char(Datasets(i))]); 25 | DS = LoadUCRdataset(char(Datasets(i))); 26 | 27 | VarExplainedCumSumMatrix = zeros(20,DS.DataInstancesCount); 28 | StatisticsForGamma = zeros(20,14); 29 | 30 | parfor gamma = 1 : 20 31 | 32 | gamma 33 | rng(gamma); 34 | 35 | KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix')); 36 | 37 | KM = KM(1:7200,1:7200); 38 | 39 | [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,VarExplainedCumSum]=TestVarianceExact(KM); 40 | 41 | Z20 = dlmread( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z20') ); 42 | Z90per = dlmread( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z90per') ); 43 | 44 | LOOCAccuracyZ20 = LeaveOneOutClassifierZREP(DS,Z20); 45 | LOOCAccuracyZ90per = LeaveOneOutClassifierZREP(DS,Z90per); 46 | 47 | OneNNAccuracyZ20 = OneNNClassifierZREP(DS,Z20); 48 | OneNNAccuracyZ90per = OneNNClassifierZREP(DS,Z90per); 49 | 50 | VarExplainedCumSumMatrix(gamma,:) = VarExplainedCumSum; 51 | StatisticsForGamma(gamma,:) = [Variance,VarExplainedTop5,VarExplainedTop10,VarExplainedTop20,DimFor98,DimFor95,DimFor90,DimFor85,DimFor80,trapz(1:size(KM,1),VarExplainedCumSum),LOOCAccuracyZ20,LOOCAccuracyZ90per,OneNNAccuracyZ20,OneNNAccuracyZ90per]; 52 | 53 | end 54 | 55 | dlmwrite( strcat('RunTestVarianceExactVarExplainedCumSum/','RESULTS_RunTestVarianceExactVarExplainedCumSum_', num2str(i)), VarExplainedCumSumMatrix, 'delimiter', '\t'); 56 | dlmwrite( strcat('RunTestVarianceExactStatisticsForGamma/','RESULTS_RunTestVarianceExactStatisticsForGamma_', num2str(i)), StatisticsForGamma, 'delimiter', '\t'); 57 | 58 | 59 | end 60 | 61 | end 62 | 63 | poolobj = gcp('nocreate'); 64 | delete(poolobj); 65 | 66 | end -------------------------------------------------------------------------------- /RunRepLearningKM.m: -------------------------------------------------------------------------------- 1 | function RunRepLearningKM(DataSetStartIndex, DataSetEndIndex, GammaStartIndex, GammaEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | for i = 1:length(Datasets) 12 | 13 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 14 | 15 | disp(['Dataset being processed: ', char(Datasets(i))]); 16 | DS = LoadUCRdataset(char(Datasets(i))); 17 | 18 | Results = zeros(length(Datasets),20); 19 | 20 | for gamma = 1 : 20 21 | 22 | if (gamma>=GammaStartIndex && gamma<=GammaEndIndex) 23 | 24 | gamma 25 | 26 | KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix')); 27 | 28 | [Z99per,Z98per,Z97per,Z95per,Z90per,Z85per,Z80per,Ztop20,Ztop10,Ztop5,RepLearnTime]=RepLearnKM(KM); 29 | 30 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z99per'), Z99per, 'delimiter', '\t'); 31 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z98per'), Z98per, 'delimiter', '\t'); 32 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z97per'), Z97per, 'delimiter', '\t'); 33 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z95per'), Z95per, 'delimiter', '\t'); 34 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z90per'), Z90per, 'delimiter', '\t'); 35 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z85per'), Z85per, 'delimiter', '\t'); 36 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z80per'), Z80per, 'delimiter', '\t'); 37 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z20'), Ztop20, 'delimiter', '\t'); 38 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z10'), Ztop10, 'delimiter', '\t'); 39 | dlmwrite( strcat( 'REPRESENTATIONSFULLKM/',char(Datasets(i)),'/','RepresentationFULLKM_', num2str(gamma) ,'.Z5'), Ztop5, 'delimiter', '\t'); 40 | 41 | Results(i,gamma)=RepLearnTime; 42 | 43 | dlmwrite( strcat('RunRepLearningKM/','RunRepLearningKM_Gamma_', num2str(gamma), '_Dataset_', num2str(i)), Results, 'delimiter', '\t'); 44 | 45 | 46 | end 47 | 48 | end 49 | 50 | 51 | 52 | end 53 | 54 | end 55 | 56 | end 57 | -------------------------------------------------------------------------------- /RWS/rws_VaryingR_CV_R128.m: -------------------------------------------------------------------------------- 1 | % This script generates low-rank approximation of latent kernel matrix using 2 | % random features approach based on dtw like distance for UCR time-series 3 | % datasets. Expts A: investigate performance changes when varying R using 4 | % the parameters learned from 10-folds cross validation with R = 128. 5 | % 6 | % Author: Lingfei Wu 7 | % Date: 01/20/2019 8 | 9 | clear,clc 10 | nthreads = 12; 11 | parpool('local', nthreads); 12 | addpath(genpath('utilities')); 13 | file_dir = './datasets/'; 14 | 15 | % List all datasets 16 | filename_list = {'Gun_Point'}; 17 | 18 | DMin = 1; 19 | R_list = [4 8 16 32 64]; % Generally, Large R, Better Accuracy. 20 | info = []; 21 | for jj = 1:length(filename_list) 22 | filename = filename_list{jj}; 23 | if strcmp(filename, 'Gun_Point') 24 | sigma = 4.46; 25 | DMax = 25; 26 | lambda_inverse = 10; 27 | end 28 | 29 | Accu_best = zeros(2,length(R_list)); 30 | telapsed_liblinear = zeros(1,length(R_list)); 31 | real_total_dtw_time = zeros(1,length(R_list)); 32 | real_user_dtw_time = zeros(1,length(R_list)); 33 | for j = 1:length(R_list) 34 | R = R_list(j); 35 | [trainData,testData,telapsed_fea_gen]=rws_GenFea(file_dir,... 36 | filename,sigma,R,DMin,DMax); 37 | trainy = trainData(:,1); 38 | testy = testData(:,1); 39 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..) 40 | labels = unique(trainy); 41 | numClasses = length(labels); 42 | if numClasses > 2 43 | for i=numClasses:-1:1 44 | ind = (trainy == labels(i)); 45 | trainy(ind) = i; 46 | end 47 | for i=numClasses:-1:1 48 | ind = (testy == labels(i)); 49 | testy(ind) = i; 50 | end 51 | else 52 | ind = (trainy == labels(1)); 53 | trainy(ind) = -1; 54 | ind = (trainy == labels(2)); 55 | trainy(ind) = 1; 56 | ind = (testy == labels(1)); 57 | testy(ind) = -1; 58 | ind = (testy == labels(2)); 59 | testy(ind) = 1; 60 | end 61 | 62 | disp('------------------------------------------------------'); 63 | disp('LIBLinear performs basic grid search by varying lambda'); 64 | disp('------------------------------------------------------'); 65 | trainFeaX = trainData(:,2:end); 66 | testFeaX = testData(:,2:end); 67 | 68 | % Linear Kernel 69 | timer_start = tic; 70 | s2 = num2str(lambda_inverse); 71 | s1 = '-s 2 -e 0.0001 -q -c '; 72 | s = [s1 s2]; 73 | model_linear = train(trainy, sparse(trainFeaX), s); 74 | [train_predict_label, train_accuracy, train_dec_values] = ... 75 | predict(trainy, sparse(trainFeaX), model_linear); 76 | [test_predict_label, test_accuracy, test_dec_values] = ... 77 | predict(testy, sparse(testFeaX), model_linear); 78 | Accu_best(1,j) = train_accuracy(1); 79 | Accu_best(2,j) = test_accuracy(1); 80 | telapsed_liblinear(1,j) = toc(timer_start) 81 | real_total_dtw_time(1,j) = telapsed_fea_gen.real_total_dtw_time; 82 | real_user_dtw_time(1,j) = telapsed_fea_gen.user_dtw_time/nthreads; 83 | end 84 | info.Accu_best = Accu_best; 85 | info.real_total_dtw_time = real_total_dtw_time; 86 | info.real_user_dtw_time = real_user_dtw_time; 87 | info.telapsed_liblinear = telapsed_liblinear; 88 | info.R = R_list; 89 | info.DMin = DMin; 90 | info.DMax = DMax; 91 | info.sigma = sigma; 92 | info.lambda_inverse = lambda_inverse; 93 | disp(info); 94 | savefilename = [filename '_rws_VaryingR_CV_R128']; 95 | save(savefilename,'info') 96 | end -------------------------------------------------------------------------------- /RWS/rws_VaryingR_CV_R128_mulvar.m: -------------------------------------------------------------------------------- 1 | % This script generates low-rank approximation of latent kernel matrix using 2 | % random features approach based on dtw like distance for UCR time-series 3 | % datasets. Expts A: investigate performance changes when varying R using 4 | % the parameters learned from 10-folds cross validation with R = 128. 5 | % 6 | % Author: Lingfei Wu 7 | % Date: 01/20/2019 8 | 9 | clear,clc 10 | nthreads = 12; 11 | parpool('local', nthreads); 12 | addpath(genpath('utilities')); 13 | file_dir = './datasets/'; 14 | 15 | % List all datasets 16 | filename_list = {'auslan'}; 17 | 18 | DMin = 1; 19 | R_list = [4 8 16 32 64 128 256 512]; 20 | info = []; 21 | for jj = 1:length(filename_list) 22 | filename = filename_list{jj}; 23 | if strcmp(filename, 'auslan') 24 | sigma = 0.79; 25 | DMax = 25; 26 | lambda_inverse = 10; 27 | end 28 | 29 | Accu_best = zeros(2,length(R_list)); 30 | telapsed_liblinear = zeros(1,length(R_list)); 31 | real_total_dtw_time = zeros(1,length(R_list)); 32 | real_user_dtw_time = zeros(1,length(R_list)); 33 | for j = 1:length(R_list) 34 | R = R_list(j); 35 | [trainData,testData,telapsed_fea_gen]=rws_GenFea_mulvar(file_dir,... 36 | filename,sigma,R,DMin,DMax); 37 | trainy = trainData(:,1); 38 | testy = testData(:,1); 39 | % convert user labels to uniform format binary(-1,1) & multiclasses (1,2,..) 40 | labels = unique(trainy); 41 | numClasses = length(labels); 42 | if numClasses > 2 43 | for i=numClasses:-1:1 44 | ind = (trainy == labels(i)); 45 | trainy(ind) = i; 46 | end 47 | for i=numClasses:-1:1 48 | ind = (testy == labels(i)); 49 | testy(ind) = i; 50 | end 51 | else 52 | ind = (trainy == labels(1)); 53 | trainy(ind) = -1; 54 | ind = (trainy == labels(2)); 55 | trainy(ind) = 1; 56 | ind = (testy == labels(1)); 57 | testy(ind) = -1; 58 | ind = (testy == labels(2)); 59 | testy(ind) = 1; 60 | end 61 | 62 | disp('------------------------------------------------------'); 63 | disp('LIBLinear performs basic grid search by varying lambda'); 64 | disp('------------------------------------------------------'); 65 | trainFeaX = trainData(:,2:end); 66 | testFeaX = testData(:,2:end); 67 | 68 | % Linear Kernel 69 | timer_start = tic; 70 | s2 = num2str(lambda_inverse); 71 | s1 = '-s 2 -e 0.0001 -q -c '; % for regular liblinear 72 | % s1 = '-s 2 -e 0.0001 -n 8 -q -c '; % for omp version 73 | s = [s1 s2]; 74 | model_linear = train(trainy, sparse(trainFeaX), s); 75 | [train_predict_label, train_accuracy, train_dec_values] = ... 76 | predict(trainy, sparse(trainFeaX), model_linear); 77 | [test_predict_label, test_accuracy, test_dec_values] = ... 78 | predict(testy, sparse(testFeaX), model_linear); 79 | Accu_best(1,j) = train_accuracy(1); 80 | Accu_best(2,j) = test_accuracy(1); 81 | telapsed_liblinear(1,j) = toc(timer_start); 82 | real_total_dtw_time(1,j) = telapsed_fea_gen.real_total_dtw_time; 83 | real_user_dtw_time(1,j) = telapsed_fea_gen.user_dtw_time/nthreads; 84 | end 85 | info.Accu_best = Accu_best; 86 | info.real_total_dtw_time = real_total_dtw_time; 87 | info.real_user_dtw_time = real_user_dtw_time; 88 | info.telapsed_liblinear = telapsed_liblinear; 89 | info.R = R_list; 90 | info.DMin = DMin; 91 | info.DMax = DMax; 92 | info.sigma = sigma; 93 | info.lambda_inverse = lambda_inverse; 94 | disp(info); 95 | savefilename = [filename '_rws_VaryingR_CV_R128']; 96 | save(savefilename,'info') 97 | end 98 | -------------------------------------------------------------------------------- /mySVD.m: -------------------------------------------------------------------------------- 1 | function [U, S, V] = mySVD(X,ReducedDim) 2 | %mySVD Accelerated singular value decomposition. 3 | % [U,S,V] = mySVD(X) produces a diagonal matrix S, of the 4 | % dimension as the rank of X and with nonnegative diagonal elements in 5 | % decreasing order, and unitary matrices U and V so that 6 | % X = U*S*V'. 7 | % 8 | % [U,S,V] = mySVD(X,ReducedDim) produces a diagonal matrix S, of the 9 | % dimension as ReducedDim and with nonnegative diagonal elements in 10 | % decreasing order, and unitary matrices U and V so that 11 | % Xhat = U*S*V' is the best approximation (with respect to F norm) of X 12 | % among all the matrices with rank no larger than ReducedDim. 13 | % 14 | % Based on the size of X, mySVD computes the eigvectors of X*X^T or X^T*X 15 | % first, and then convert them to the eigenvectors of the other. 16 | % 17 | % See also SVD. 18 | % 19 | % version 2.0 --Feb/2009 20 | % version 1.0 --April/2004 21 | % 22 | % Written by Deng Cai (dengcai AT gmail.com) 23 | % 24 | 25 | MAX_MATRIX_SIZE = 10000; % You can change this number according your machine computational power 26 | EIGVECTOR_RATIO = 0.1; % You can change this number according your machine computational power 27 | 28 | 29 | if ~exist('ReducedDim','var') 30 | ReducedDim = 0; 31 | end 32 | 33 | [nSmp, mFea] = size(X); 34 | if mFea/nSmp > 1.0713 35 | ddata = X*X'; 36 | ddata = max(ddata,ddata'); 37 | 38 | dimMatrix = size(ddata,1); 39 | if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO) 40 | option = struct('disp',0); 41 | [U, eigvalue] = eigs(ddata,ReducedDim,'la',option); 42 | eigvalue = diag(eigvalue); 43 | else 44 | if issparse(ddata) 45 | ddata = full(ddata); 46 | end 47 | 48 | [U, eigvalue] = eig(ddata); 49 | eigvalue = diag(eigvalue); 50 | [dump, index] = sort(-eigvalue); 51 | eigvalue = eigvalue(index); 52 | U = U(:, index); 53 | end 54 | clear ddata; 55 | 56 | maxEigValue = max(abs(eigvalue)); 57 | eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10); 58 | eigvalue(eigIdx) = []; 59 | U(:,eigIdx) = []; 60 | 61 | if (ReducedDim > 0) && (ReducedDim < length(eigvalue)) 62 | eigvalue = eigvalue(1:ReducedDim); 63 | U = U(:,1:ReducedDim); 64 | end 65 | 66 | eigvalue_Half = eigvalue.^.5; 67 | S = spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half)); 68 | 69 | if nargout >= 3 70 | eigvalue_MinusHalf = eigvalue_Half.^-1; 71 | V = X'*(U.*repmat(eigvalue_MinusHalf',size(U,1),1)); 72 | end 73 | else 74 | ddata = X'*X; 75 | ddata = max(ddata,ddata'); 76 | 77 | dimMatrix = size(ddata,1); 78 | if (ReducedDim > 0) && (dimMatrix > MAX_MATRIX_SIZE) && (ReducedDim < dimMatrix*EIGVECTOR_RATIO) 79 | option = struct('disp',0); 80 | [V, eigvalue] = eigs(ddata,ReducedDim,'la',option); 81 | eigvalue = diag(eigvalue); 82 | else 83 | if issparse(ddata) 84 | ddata = full(ddata); 85 | end 86 | 87 | [V, eigvalue] = eig(ddata); 88 | eigvalue = diag(eigvalue); 89 | 90 | [dump, index] = sort(-eigvalue); 91 | eigvalue = eigvalue(index); 92 | V = V(:, index); 93 | end 94 | clear ddata; 95 | 96 | maxEigValue = max(abs(eigvalue)); 97 | eigIdx = find(abs(eigvalue)/maxEigValue < 1e-10); 98 | eigvalue(eigIdx) = []; 99 | V(:,eigIdx) = []; 100 | 101 | if (ReducedDim > 0) && (ReducedDim < length(eigvalue)) 102 | eigvalue = eigvalue(1:ReducedDim); 103 | V = V(:,1:ReducedDim); 104 | end 105 | 106 | eigvalue_Half = eigvalue.^.5; 107 | S = spdiags(eigvalue_Half,0,length(eigvalue_Half),length(eigvalue_Half)); 108 | 109 | eigvalue_MinusHalf = eigvalue_Half.^-1; 110 | U = X*(V.*repmat(eigvalue_MinusHalf',size(V,1),1)); 111 | end 112 | -------------------------------------------------------------------------------- /RWS/README.md: -------------------------------------------------------------------------------- 1 | # RandomWarpingSeries 2 | RandomWarpingSeries (RWS) is a simple code for generating the vector representation of time-series for time-series classification, clustering, and regression. 3 | This code is a simple implementation (mix of Matlab, Matlab MEX, and C) of the WME in (Wu et al, "Random Warping Series: A Random Features Method for Time-Series Embedding", AISTATS'18). We refer more information about RWS to the following paper link: http://proceedings.mlr.press/v84/wu18b/wu18b.pdf. 4 | 5 | 6 | # Prerequisites 7 | 8 | There are at least two required tool packages in order to run this code. You need to download DTW, LibLinear, or LibSVM and compile the corresponding MEX files for your operating systems (Mac, Linux, or Windows). 9 | 10 | For DTW: https://www.mathworks.com/matlabcentral/fileexchange/43156-dynamic-time-warping-dtw
11 | For LibSVM: https://github.com/cjlin1/libsvm or LibLinear: https://github.com/cjlin1/liblinear
12 | 13 | 14 | For single-variate time-series datasets, you can download some datasets from the UCR time-series collections (http://www.cs.ucr.edu/~eamonn/time_series_data/) or from the UEA time-series collection (http://www.timeseriesclassification.com/).
15 | For multi-variate time-series datasets, you can download some datasets from UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/index.php) or from your favorate applications.
16 | It is generally advised to perform Z-formalization on data before feeding it to our time-series embedding codes. 17 | 18 | 19 | # How To Run The Codes 20 | Note that, in order to achieve the best performance, the hyperparameters DMax, sigma, and even lambda_inverse (for classification using SVM) have to be searched (using cross validation or other techniques). This is a crucial step for RWS. 21 | 22 | To generate the RWS and use RWS for time-series claddification or clustering tasks, you need: 23 | 24 | (1) If you use linux and Mac, you should be fine to skip compiling MEX for DTW, LibLinear, and LibSVM. Otherwise, you need to download them form the above links and compile them in their Matlab folders. Then you need copy these MEX files into the utilities folder. 25 | 26 | (2) Open Matlab terminal console and run rws_gridsearch_CV.m on single-variate time-series for performing K-fold cross validation for searching good hyperparameters 27 | The RWS embeddings that performs the best on the dev data will be saved. 28 | 29 | (3) Open Matlab terminal console and run rws_gridsearch_CV_mulvar.m on multi-variate time-series for performing K-fold cross validation for searching good hyperparameters 30 | The RWS embeddings that performs the best on the dev data will be saved. 31 | 32 | (4) Test the model by running the following code rws_VaryingR_CV_R128.m on single-variate time-series and rws_VaryingR_CV_R128_mulvar.m on multi-variate time-series using best parameters from CV 33 | The testing result on different data splits will be saved. 34 | 35 | (5) To generate RWS embedding only, please run this code rws_GenFea_example.m on single-variate time-series and rws_GenFea_example_mulvar.m on multi-variate time-series.
36 | 37 | Note that there are no default numbers for the hyperparameters DMax, sigma. You should searching for the best numbers before generating RWS time-series embeddings for your applications. In general, the larger the parameter R is, the better quality of embedding is. 38 | 39 | 40 | # How To Cite The Codes 41 | Please cite our work if you like or are using our codes for your projects! Let me know if you have any questions: lwu at email.wm.edu. 42 | 43 | Lingfei Wu, Ian En-Hsu Yen, Jinfeng Yi, Fangli Xu, Qi Lei, and Michael Witbrock, "Random Warping Series: A Random Features Method for Time-Series Embedding", AISTATS'18. 44 | 45 | @inproceedings{wu2018random,
46 | title={Random Warping Series: A Random Features Method for Time-Series Embedding},
47 | author={Wu, Lingfei and Yen, Ian En-Hsu and Yi, Jinfeng and Xu, Fangli and Lei, Qi and Witbrock, Michael},
48 | booktitle={International Conference on Artificial Intelligence and Statistics},
49 | pages={793--802},
50 | year={2018}
51 | } 52 | 53 | ------------------------------------------------------ 54 | Contributors: Lingfei Wu
55 | Created date: January 20, 2019
56 | Last update: January 20, 2019
57 | -------------------------------------------------------------------------------- /OneNNClassifierLB.m: -------------------------------------------------------------------------------- 1 | function [acc,pruningpower] = OneNNClassifierLB(DS,ZReduced,LBType,gamma) 2 | 3 | % 1 - LB with FFT using the first-k coefficients 4 | % 2 - LB with FFT using the best-k coefficients 5 | % 3 - Our approach 6 | % 4 - LBKeogh for DTW 7 | 8 | ZRepTrain = ZReduced(1:DS.TrainInstancesCount,:); 9 | ZRepTest = ZReduced(DS.TrainInstancesCount+1:end,:); 10 | 11 | Dim = size(ZReduced,2); 12 | 13 | acc = 0; 14 | 15 | for id = 1 : DS.TestInstancesCount 16 | 17 | best_so_far = inf; 18 | 19 | distance_lb = zeros(1, DS.TrainInstancesCount); 20 | 21 | if LBType==4 22 | u = upper_b(DS.Test(id,:), DS.DTW_WindowPercentage); 23 | l = lower_b(DS.Test(id,:), DS.DTW_WindowPercentage); 24 | end 25 | 26 | lbdistcomp = 0; 27 | for i = 1 : DS.TrainInstancesCount 28 | switch LBType 29 | case 1 30 | distance_lb(i) = FFTLBTopCoeff(DS.Train(i,:),DS.Test(id,:), Dim); 31 | case 2 32 | distance_lb(i) = FFTLBBestCoeff(DS.Train(i,:),DS.Test(id,:), Dim); 33 | case 3 34 | distance_lb(i) = sqrt(sum((ZRepTrain(i,:)-ZRepTest(id,:)).^2)); 35 | case 4 36 | distance_lb(i) = lb_keogh(DS.Train(i,:),DS.Test(id,:), u, l); 37 | end 38 | 39 | 40 | %distance_lb(i) = lb_keogh(DS.Train(i,:),DS.Test(id,:), u, l); 41 | %distance_lb(i) = FFTLBBestCoeff(DS.Train(i,:),DS.Test(id,:), size(ZReduced,2)); 42 | %distance_lb(i) = sqrt(sum((ZRepTrain(i,:)-ZRepTest(id,:)).^2)); 43 | 44 | lbdistcomp=lbdistcomp+1; 45 | end 46 | 47 | [distance_lb, ordering] = sort(distance_lb); 48 | 49 | traindata = DS.Train(ordering,:); 50 | 51 | trainclasses = DS.TrainClassLabels(ordering); 52 | 53 | actualdistcomp = 0; 54 | 55 | for i = 1 : DS.TrainInstancesCount 56 | if distance_lb(i) < best_so_far 57 | 58 | switch LBType 59 | case 1 60 | distance = sqrt(sum((traindata(i,:)-DS.Test(id,:)).^2)); 61 | case 2 62 | distance = sqrt(sum((traindata(i,:)-DS.Test(id,:)).^2)); 63 | case 3 64 | distance = 2*(1-SINK(traindata(i,:),DS.Test(id,:),gamma)); 65 | case 4 66 | distance = dtw(traindata(i,:),DS.Test(id,:),DS.DTW_WindowPercentage); 67 | end 68 | 69 | actualdistcomp=actualdistcomp+1; 70 | 71 | if distance < best_so_far 72 | class = trainclasses(i); 73 | best_so_far = distance; 74 | end 75 | else 76 | break; 77 | end 78 | 79 | end 80 | 81 | if (DS.TestClassLabels(id) == class) 82 | acc = acc + 1; 83 | end 84 | 85 | pruningpower = 1- (actualdistcomp/lbdistcomp); 86 | end 87 | acc = acc / DS.TestInstancesCount; 88 | end 89 | 90 | function lbdist = FFTLBTopCoeff(x, y, coeff) 91 | fx = fft(x)/sqrt(length(x)); 92 | fy = fft(y)/sqrt(length(x)); 93 | lbdist = sqrt(sum(abs(fx(1:coeff) - fy(1:coeff)).^2)); 94 | end 95 | 96 | function lbdist = FFTLBBestCoeff(x, y, coeff) 97 | fx = fft(x)/sqrt(length(x)); 98 | fy = fft(y)/sqrt(length(x)); 99 | 100 | Xred = BestCoeff(fx, coeff); 101 | Yred = BestCoeff(fy, coeff); 102 | 103 | lbdist = sqrt(sum(abs(Xred - Yred).^2)); 104 | end 105 | 106 | function X = BestCoeff(X, coeff) 107 | 108 | Y = abs(X).^2; 109 | %sum(Y) 110 | [Ysorted Yorder] = sort(-Y); % sort descending 111 | Ysorted = cumsum(-Ysorted)/sum(Y); 112 | X(Yorder((coeff+1):end)) = 0; 113 | 114 | end 115 | 116 | function lb = lb_keogh(T, Q, U, L) 117 | T = T.'; 118 | Q = Q.'; 119 | lb = sqrt(sum([[T > U].* [T-U]; [T < L].* [L-T]].^2)); 120 | end 121 | 122 | function b = lower_b(t, w) 123 | l = length(t); 124 | b = zeros(1,l).'; 125 | for i = 1 : l 126 | b(i) = min(t(max(1,i-w):min(l,i+w))); 127 | end 128 | end 129 | 130 | function b = upper_b(t, w) 131 | l = length(t); 132 | b = zeros(1,l).'; 133 | for i = 1 : l 134 | b(i) = max(t(max(1,i-w):min(l,i+w))); 135 | end 136 | end 137 | 138 | 139 | -------------------------------------------------------------------------------- /SPIRAL/exactCDmex.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013 Quan Wang , 3 | * Signal Analysis and Machine Perception Laboratory, 4 | * Department of Electrical, Computer, and Systems Engineering, 5 | * Rensselaer Polytechnic Institute, Troy, NY 12180, USA 6 | */ 7 | 8 | /** 9 | * This is the C/MEX code of dynamic time warping of two signals 10 | * 11 | * compile: 12 | * mex dtw_c.c 13 | * 14 | * usage: 15 | * d=dtw_c(s,t) or d=dtw_c(s,t,w) 16 | * where s is signal 1, t is signal 2, w is window parameter 17 | */ 18 | 19 | #include "mex.h" 20 | #include 21 | #include 22 | #include 23 | 24 | double cubicRoot(double d) 25 | { 26 | if(d<0.0) 27 | return -cubicRoot(-d); 28 | else 29 | return pow(d,1.0/3.0); 30 | } 31 | 32 | /* This function solves the following problem: 33 | min_{x>=0} x^3+ax+b */ 34 | double root_c(double a, double b) 35 | { 36 | double x=0, y=0; 37 | double a3=4*pow(a,3), b2=27*pow(b,2); 38 | double delta = a3+b2; 39 | int k; 40 | if(delta<=0) /* 3 distinct real roots or 1 real multiple solution */ 41 | { 42 | double r3 = 2*sqrt(-a/3); 43 | double th3 = atan2(sqrt(-delta/108),-b/2)/3; 44 | double ymax=0, xopt=0; 45 | for(k=0;k<=4;k=k+2) 46 | { 47 | x = r3*cos(th3+((k*3.14159265)/3)); 48 | y=pow(x,4)/4+a*pow(x,2)/2+b*x; 49 | if(y, 3 | * Signal Analysis and Machine Perception Laboratory, 4 | * Department of Electrical, Computer, and Systems Engineering, 5 | * Rensselaer Polytechnic Institute, Troy, NY 12180, USA 6 | */ 7 | 8 | /** 9 | * This is the C/MEX code of dynamic time warping of two signals 10 | * 11 | * compile: 12 | * mex dtw_c.c 13 | * 14 | * usage: 15 | * d=dtw_c(s,t) or d=dtw_c(s,t,w) 16 | * where s is signal 1, t is signal 2, w is window parameter 17 | */ 18 | 19 | #include "mex.h" 20 | #include 21 | #include 22 | #include 23 | 24 | double vectorDistance(double *s, double *t, int ns, int nt, int k, int i, int j) 25 | { 26 | double result=0; 27 | double ss,tt; 28 | int x; 29 | for(x=0;x0 ? ns-nt : nt-ns; 43 | double ** D; 44 | int i,j; 45 | int j1,j2; 46 | double cost,temp; 47 | 48 | 49 | if(w!=-1 && w1 ? i-w : 1; 76 | j2= i+w1) 137 | { 138 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumOutputs", 139 | "dtw_c: One output required."); 140 | } 141 | 142 | /* check to make sure w is a scalar */ 143 | if(nrhs==2) 144 | { 145 | w=-1; 146 | } 147 | else if(nrhs==3) 148 | { 149 | if( !mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) || 150 | mxGetN(prhs[2])*mxGetM(prhs[2])!=1 ) 151 | { 152 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:wNotScalar", 153 | "dtw_c: Input w must be a scalar."); 154 | } 155 | 156 | /* get the scalar input w */ 157 | w = (int) mxGetScalar(prhs[2]); 158 | } 159 | 160 | 161 | /* create a pointer to the input matrix s */ 162 | s = mxGetPr(prhs[0]); 163 | 164 | /* create a pointer to the input matrix t */ 165 | t = mxGetPr(prhs[1]); 166 | 167 | /* get the dimensions of the matrix input s */ 168 | ns = mxGetM(prhs[0]); 169 | k = mxGetN(prhs[0]); 170 | 171 | /* get the dimensions of the matrix input t */ 172 | nt = mxGetM(prhs[1]); 173 | if(mxGetN(prhs[1])!=k) 174 | { 175 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:dimNotMatch", 176 | "dtw_c: Dimensions of input s and t must match."); 177 | } 178 | 179 | /* set the output pointer to the output matrix */ 180 | plhs[0] = mxCreateDoubleMatrix( 1, 1, mxREAL); 181 | 182 | /* create a C pointer to a copy of the output matrix */ 183 | dp = mxGetPr(plhs[0]); 184 | 185 | /* call the C subroutine */ 186 | dp[0]=dtw_c(s,t,w,ns,nt,k); 187 | 188 | return; 189 | 190 | } 191 | -------------------------------------------------------------------------------- /RWS/utilities/dtw_c.c: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013 Quan Wang , 3 | * Signal Analysis and Machine Perception Laboratory, 4 | * Department of Electrical, Computer, and Systems Engineering, 5 | * Rensselaer Polytechnic Institute, Troy, NY 12180, USA 6 | */ 7 | 8 | /** 9 | * This is the C/MEX code of dynamic time warping of two signals 10 | * 11 | * compile: 12 | * mex dtw_c.c 13 | * 14 | * usage: 15 | * d=dtw_c(s,t) or d=dtw_c(s,t,w) 16 | * where s is signal 1, t is signal 2, w is window parameter 17 | */ 18 | 19 | #include "mex.h" 20 | #include 21 | #include 22 | #include 23 | 24 | double vectorDistance(double *s, double *t, int ns, int nt, int k, int i, int j) 25 | { 26 | double result=0; 27 | double ss,tt; 28 | int x; 29 | for(x=0;x0 ? ns-nt : nt-ns; 43 | double ** D; 44 | int i,j; 45 | int j1,j2; 46 | double cost,temp; 47 | 48 | // printf("ns=%d, nt=%d, w=%d, s[0]=%f, t[0]=%f\n",ns,nt,w,s[0],t[0]); 49 | 50 | 51 | if(w!=-1 && w1 ? i-w : 1; 81 | j2= i+w1) 142 | { 143 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:invalidNumOutputs", 144 | "dtw_c: One output required."); 145 | } 146 | 147 | /* check to make sure w is a scalar */ 148 | if(nrhs==2) 149 | { 150 | w=-1; 151 | } 152 | else if(nrhs==3) 153 | { 154 | if( !mxIsDouble(prhs[2]) || mxIsComplex(prhs[2]) || 155 | mxGetN(prhs[2])*mxGetM(prhs[2])!=1 ) 156 | { 157 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:wNotScalar", 158 | "dtw_c: Input w must be a scalar."); 159 | } 160 | 161 | /* get the scalar input w */ 162 | w = (int) mxGetScalar(prhs[2]); 163 | } 164 | 165 | 166 | /* create a pointer to the input matrix s */ 167 | s = mxGetPr(prhs[0]); 168 | 169 | /* create a pointer to the input matrix t */ 170 | t = mxGetPr(prhs[1]); 171 | 172 | /* get the dimensions of the matrix input s */ 173 | ns = mxGetM(prhs[0]); 174 | k = mxGetN(prhs[0]); 175 | 176 | /* get the dimensions of the matrix input t */ 177 | nt = mxGetM(prhs[1]); 178 | if(mxGetN(prhs[1])!=k) 179 | { 180 | mexErrMsgIdAndTxt( "MATLAB:dtw_c:dimNotMatch", 181 | "dtw_c: Dimensions of input s and t must match."); 182 | } 183 | 184 | /* set the output pointer to the output matrix */ 185 | plhs[0] = mxCreateDoubleMatrix( 1, 1, mxREAL); 186 | 187 | /* create a C pointer to a copy of the output matrix */ 188 | dp = mxGetPr(plhs[0]); 189 | 190 | /* call the C subroutine */ 191 | dp[0]=dtw_c(s,t,w,ns,nt,k); 192 | 193 | return; 194 | 195 | } 196 | -------------------------------------------------------------------------------- /RunLinearSVMRWS.m: -------------------------------------------------------------------------------- 1 | function RunLinearSVMRWS(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | Results = zeros(length(Datasets),11); 12 | 13 | addpath(genpath('LibLinear/matlab/.')); 14 | 15 | distcomp.feature( 'LocalUseMpiexec', false ) 16 | 17 | %rng(ceil(DataSetStartIndex*100)) 18 | %pause(100*rand); 19 | 20 | poolobj = gcp('nocreate'); 21 | delete(poolobj); 22 | 23 | parpool(22); 24 | 25 | rng('default') 26 | 27 | for i = 1:length(Datasets) 28 | 29 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 30 | 31 | disp(['Dataset being processed: ', char(Datasets(i))]); 32 | DS = LoadUCRdataset(char(Datasets(i))); 33 | 34 | 35 | [Thebestcost2,Thebestacc2,Thebestiming2] = GridSearchLinearSVM2(-10,0.1,20,DS.TrainInstancesCount,DS.TrainClassLabels,Datasets,i); 36 | 37 | ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(i)),'/','RWS_Supervised', '.Zrep') ); 38 | 39 | ZRepTrain = ZRep(1:DS.TrainInstancesCount,:); 40 | ZRepTest = ZRep(DS.TrainInstancesCount+1:end,:); 41 | 42 | ZRepTrain = sparse(ZRepTrain); 43 | ZRepTest = sparse(ZRepTest); 44 | 45 | tic; 46 | cmd = ['-e 0.001 -s 2 -c ', num2str(2^Thebestcost2)]; 47 | model_precomputed = train(DS.TrainClassLabels, ZRepTrain, cmd); 48 | 49 | ModelTrainingRuntime = toc; 50 | 51 | tic; 52 | 53 | [predict_label_P, accuracy_P, dec_values_P] = predict(DS.TestClassLabels, ZRepTest, model_precomputed); 54 | 55 | PredictionRuntime = toc; 56 | 57 | Results(i,1) = 0; 58 | 59 | Results(i,2) = 0; 60 | Results(i,3) = Thebestcost2; 61 | %Results(i,4) = Thebestcost3; 62 | Results(i,4) = 0; 63 | 64 | Results(i,5) = 0; 65 | Results(i,6) = Thebestacc2*0.01; 66 | %Results(i,7) = Thebestacc3*0.01; 67 | Results(i,7) = 0; 68 | 69 | %Results(i,8) = Thebestiming1+Thebestiming2+Thebestiming3; 70 | Results(i,8) = Thebestiming2; 71 | 72 | Results(i,9) = accuracy_P(1)*0.01; 73 | Results(i,10) = ModelTrainingRuntime; 74 | Results(i,11) = PredictionRuntime; 75 | 76 | dlmwrite( strcat('RunLinearSVMRWS/','RunLinearSVMRWS', '_Dataset_', num2str(i)) , Results, 'delimiter', '\t'); 77 | 78 | 79 | end 80 | 81 | 82 | end 83 | 84 | poolobj = gcp('nocreate'); 85 | delete(poolobj); 86 | 87 | end 88 | 89 | function [Thebestcost,Thebestacc,Thebestiming] = GridSearchLinearSVM2(GridStart,GridStep,GridEnd,TrainInstancesCount,TrainClassLabels,Datasets,DatasetsNumber) 90 | 91 | 92 | % Tuning Parameters 93 | 94 | log2cTmp = GridStart:GridStep:GridEnd; 95 | 96 | bestacc = zeros(1,length(log2cTmp)); 97 | bestcost = zeros(1,length(log2cTmp)); 98 | besttiming = zeros(1,length(log2cTmp)); 99 | 100 | ZRep = dlmread( strcat( 'RWSREPRESENTATIONS','/',char(Datasets(DatasetsNumber)),'/','RWS_Supervised', '.Zrep') ); 101 | 102 | ZRepTrain = ZRep(1:TrainInstancesCount,:); 103 | ZRepTrain = sparse(ZRepTrain); 104 | 105 | % grid search 106 | parfor log2cNEW = 1:length(log2cTmp) 107 | 108 | log2cNEW 109 | tic; 110 | log2c = log2cTmp(log2cNEW); 111 | cmd = ['-q -e 0.001 -s 2 -v ' num2str(10) ' -c ', num2str(2^log2c)]; 112 | cv = train(TrainClassLabels, ZRepTrain, cmd); 113 | 114 | bestacc(log2cNEW) = cv; 115 | bestcost(log2cNEW) = log2c; 116 | besttiming(log2cNEW) = toc; 117 | 118 | end 119 | 120 | 121 | [Maxbestacc,~] = max(bestacc); 122 | Posbestacc = find(bestacc==Maxbestacc,1,'last'); 123 | 124 | Thebestiming = sum(besttiming); 125 | Thebestcost = bestcost(Posbestacc); 126 | Thebestacc = Maxbestacc; 127 | 128 | 129 | end 130 | 131 | -------------------------------------------------------------------------------- /RunLinearSVMSPIRAL.m: -------------------------------------------------------------------------------- 1 | function RunLinearSVMSPIRAL(DataSetStartIndex, DataSetEndIndex) 2 | 3 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 4 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 5 | Datasets = {dir_struct(3:130).name}; 6 | 7 | % Sort Datasets 8 | 9 | [Datasets, DSOrder] = sort(Datasets); 10 | 11 | Results = zeros(length(Datasets),11); 12 | 13 | addpath(genpath('LibLinear/matlab/.')); 14 | 15 | distcomp.feature( 'LocalUseMpiexec', false ) 16 | 17 | %rng(ceil(DataSetStartIndex*100)) 18 | %pause(100*rand); 19 | 20 | poolobj = gcp('nocreate'); 21 | delete(poolobj); 22 | 23 | parpool(22); 24 | 25 | rng('default') 26 | 27 | for i = 1:length(Datasets) 28 | 29 | if (i>=DataSetStartIndex && i<=DataSetEndIndex) 30 | 31 | disp(['Dataset being processed: ', char(Datasets(i))]); 32 | DS = LoadUCRdataset(char(Datasets(i))); 33 | 34 | 35 | [Thebestcost2,Thebestacc2,Thebestiming2] = GridSearchLinearSVM2(-10,0.1,20,DS.TrainInstancesCount,DS.TrainClassLabels,Datasets,i); 36 | 37 | 38 | ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(i)),'/','SIDLREPRESENTATIONS', '.Zrep') ); 39 | 40 | ZRepTrain = ZRep(1:DS.TrainInstancesCount,:); 41 | ZRepTest = ZRep(DS.TrainInstancesCount+1:end,:); 42 | 43 | ZRepTrain = sparse(ZRepTrain); 44 | ZRepTest = sparse(ZRepTest); 45 | 46 | tic; 47 | cmd = ['-e 0.001 -s 2 -c ', num2str(2^Thebestcost2)]; 48 | model_precomputed = train(DS.TrainClassLabels, ZRepTrain, cmd); 49 | 50 | ModelTrainingRuntime = toc; 51 | 52 | tic; 53 | 54 | [predict_label_P, accuracy_P, dec_values_P] = predict(DS.TestClassLabels, ZRepTest, model_precomputed); 55 | 56 | PredictionRuntime = toc; 57 | 58 | Results(i,1) = 0; 59 | 60 | Results(i,2) = 0; 61 | Results(i,3) = Thebestcost2; 62 | %Results(i,4) = Thebestcost3; 63 | Results(i,4) = 0; 64 | 65 | Results(i,5) = 0; 66 | Results(i,6) = Thebestacc2*0.01; 67 | %Results(i,7) = Thebestacc3*0.01; 68 | Results(i,7) = 0; 69 | 70 | %Results(i,8) = Thebestiming1+Thebestiming2+Thebestiming3; 71 | Results(i,8) = Thebestiming2; 72 | 73 | Results(i,9) = accuracy_P(1)*0.01; 74 | Results(i,10) = ModelTrainingRuntime; 75 | Results(i,11) = PredictionRuntime; 76 | 77 | dlmwrite( strcat('RunLinearSVMSPIRAL/','RunLinearSVMSPIRAL', '_Dataset_', num2str(i)) , Results, 'delimiter', '\t'); 78 | 79 | 80 | end 81 | 82 | 83 | end 84 | 85 | poolobj = gcp('nocreate'); 86 | delete(poolobj); 87 | 88 | end 89 | 90 | function [Thebestcost,Thebestacc,Thebestiming] = GridSearchLinearSVM2(GridStart,GridStep,GridEnd,TrainInstancesCount,TrainClassLabels,Datasets,DatasetsNumber) 91 | 92 | 93 | % Tuning Parameters 94 | 95 | log2cTmp = GridStart:GridStep:GridEnd; 96 | 97 | bestacc = zeros(1,length(log2cTmp)); 98 | bestcost = zeros(1,length(log2cTmp)); 99 | besttiming = zeros(1,length(log2cTmp)); 100 | 101 | ZRep = dlmread( strcat( 'SPIRALREPRESENTATIONS','/',char(Datasets(DatasetsNumber)),'/','SIDLREPRESENTATIONS', '.Zrep') ); 102 | 103 | ZRepTrain = ZRep(1:TrainInstancesCount,:); 104 | ZRepTrain = sparse(ZRepTrain); 105 | 106 | % grid search 107 | parfor log2cNEW = 1:length(log2cTmp) 108 | 109 | log2cNEW 110 | tic; 111 | log2c = log2cTmp(log2cNEW); 112 | cmd = ['-q -e 0.001 -s 2 -v ' num2str(10) ' -c ', num2str(2^log2c)]; 113 | cv = train(TrainClassLabels, ZRepTrain, cmd); 114 | 115 | bestacc(log2cNEW) = cv; 116 | bestcost(log2cNEW) = log2c; 117 | besttiming(log2cNEW) = toc; 118 | 119 | end 120 | 121 | 122 | [Maxbestacc,~] = max(bestacc); 123 | Posbestacc = find(bestacc==Maxbestacc,1,'last'); 124 | 125 | Thebestiming = sum(besttiming); 126 | Thebestcost = bestcost(Posbestacc); 127 | Thebestacc = Maxbestacc; 128 | 129 | 130 | end 131 | 132 | -------------------------------------------------------------------------------- /dtw.c: -------------------------------------------------------------------------------- 1 | #include "mex.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define min(x, y) ((x)<(y)?(x):(y)) 9 | #define max(x, y) ((x)>(y)?(x):(y)) 10 | #define dist(x, y)((x-y)*(x-y)) 11 | 12 | 13 | #define INF 1e20 /*Pseudo Infitinte number for this code */ 14 | 15 | 16 | 17 | /* 18 | Calculate Dynamic Time Wrapping distance 19 | A,B: data and query, respectively 20 | r : size of Sakoe-Chiba warpping band */ 21 | double dtw(double* A, double* B, int m, double *radius, int *path1, int *path2, int *pLen) { 22 | 23 | double *cost; 24 | double *cost_prev; 25 | double *cost_tmp; 26 | int i, j, k; 27 | double x, y, z; 28 | double final_dtw ; 29 | int r; 30 | 31 | int **trace; 32 | 33 | r = (int)radius[0]; 34 | 35 | /* Traceback matrix width m x 2r+1 */ 36 | trace = (int **)malloc(sizeof(int *)*m); 37 | for (i=0;i2*r)) x = INF; 63 | else x = cost_prev[k+1]; 64 | /* Diagonal */ 65 | if ((i-1<0)||(j-1<0)) z = INF; 66 | else z = cost_prev[k]; 67 | 68 | /* Classic DTW calculation */ 69 | cost[k] = min( min( x, y) , z) + dist(A[i], B[j]); 70 | /* Let's store the path information */ 71 | if (x <= min(y, z)) 72 | trace[i][k]= 2; /* up */ 73 | else if (y <= min(x, z)) 74 | trace[i][k]=0; /* left */ 75 | else 76 | trace[i][k]=1; /* diag */ 77 | } 78 | 79 | 80 | 81 | /* Move current array to previous array. */ 82 | cost_tmp = cost; 83 | cost = cost_prev; 84 | cost_prev = cost_tmp; 85 | } 86 | k--; 87 | 88 | /* the DTW distance is in the last cell in the matrix of size O(m^2) or at the middle of our array. */ 89 | final_dtw = cost_prev[k]; 90 | free(cost); 91 | free(cost_prev); 92 | 93 | /* Print trace matrix */ 94 | /* for (i=0;i 3) { 132 | mexErrMsgTxt("This function only returns 3 output value."); 133 | } 134 | 135 | /* retrieve input arguments */ 136 | q = mxGetPr(prhs[0]); /* pointer to real values of first argument */ 137 | c = mxGetPr(prhs[1]); /* pointer to real values of second argument */ 138 | r = mxGetPr(prhs[2]); /* pointer to real value of third argument */ 139 | 140 | /* check series lengths */ 141 | ql = mxGetNumberOfElements(prhs[0]); 142 | cl = mxGetNumberOfElements(prhs[1]); 143 | if (abs(ql - cl) > r[0]) { 144 | mexErrMsgTxt("Actual distance falls outside radius constraint."); 145 | } 146 | 147 | /* allocate memory for the return value */ 148 | plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); 149 | 150 | path1 = (int *)malloc(ql*(2*((int)r[0])+1)* sizeof(int)); 151 | path2 = (int *)malloc(ql*(2*((int)r[0])+1)* sizeof(int)); 152 | 153 | /* printf("Query Length:%d Path Length:%d\n", ql,ql*(2*((int)r[0])+1)); */ 154 | d = mxGetPr(plhs[0]); /* pointer to Matlab managed memory for result */ 155 | 156 | d[0]=dtw(q, c, ql, r, path1, path2, &pLen); 157 | 158 | /* printf("Path length %d\n",pLen); */ 159 | 160 | plhs[1] = mxCreateDoubleMatrix(1, pLen, mxREAL); 161 | plhs[2] = mxCreateDoubleMatrix(1, pLen, mxREAL); 162 | 163 | mxPath1 = mxGetPr(plhs[1]); 164 | mxPath2 = mxGetPr(plhs[2]); 165 | 166 | for (i=0; i < pLen ; i++) { 167 | mxPath1[i] = path1[i] + 1 ; /* 1 based indexing */ 168 | mxPath2[i] = path2[i] + 1 ; /* 1 based indexing */ 169 | } 170 | 171 | free(path1); 172 | free(path2); 173 | } 174 | -------------------------------------------------------------------------------- /RunDictEvaluation.m: -------------------------------------------------------------------------------- 1 | function RunDictEvaluation(DataSetStartIndex, DataSetEndIndex, Method, gamma) 2 | 3 | Methods = [cellstr('Random'), 'KShape', 'AFKMC2', 'GibbsDPP','SRFT','LevScore','Gaussian']; 4 | 5 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 6 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 7 | Datasets = {dir_struct(3:130).name}; 8 | 9 | % Sort Datasets 10 | [Datasets, DSOrder] = sort(Datasets); 11 | 12 | addpath(genpath('NystromBestiary/.')); 13 | 14 | for i = 1:length(Datasets) 15 | 16 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 17 | 18 | Results = zeros(length(Datasets),4); 19 | 20 | disp(['Dataset being processed: ', char(Datasets(i))]); 21 | DS = LoadUCRdataset(char(Datasets(i))); 22 | % Get Kernel Matrix 23 | 24 | KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix') ); 25 | 26 | NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100); 27 | 28 | Runtime = 0; 29 | for rep = 1 : 10 30 | rep 31 | rng(rep); 32 | 33 | if Method==1 34 | Dictionary = dlmread( strcat( 'DICTIONARIESRANDOM/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') ); 35 | elseif Method==2 36 | Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') ); 37 | elseif Method==3 38 | Dictionary = dlmread( strcat( 'DICTIONARIESKSHAPE/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char('KShape'), '_', num2str(rep) ,'.KppCentroids') ); 39 | elseif Method==4 40 | Dictionary = dlmread( strcat( 'DICTIONARIESGIBBSDPP/',char(Datasets(i)),'/','RunDLFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Dictionary') ); 41 | elseif Method==5 42 | 43 | tic; 44 | in.A = KM; 45 | in.linearkernelflag = 0; 46 | in.k = 5; 47 | in.l = NumOfSamples; 48 | in.q = 1; 49 | out = srft_Nystrom(in); 50 | Runtime = Runtime + toc; 51 | 52 | elseif Method==6 53 | 54 | tic; 55 | in.A = KM; 56 | in.linearkernelflag = 0; 57 | in.k = 5; 58 | in.l = NumOfSamples; 59 | in.q = 1; 60 | 61 | [U, Sigma] = orderedeigs(in.A, in.k+1); 62 | U1t = U(:, 1:in.k)'; 63 | levscores = sum(U1t.*U1t); 64 | in.levscorecomputationtime = 0; 65 | in.levscoreprobs = levscores/in.k; 66 | 67 | out = levscore_Nystrom(in); 68 | 69 | Runtime = Runtime + toc; 70 | 71 | elseif Method==7 72 | 73 | tic; 74 | in.A = KM; 75 | in.linearkernelflag = 0; 76 | in.k = 5; 77 | in.l = NumOfSamples; 78 | in.q = 1; 79 | out = gaussian_Nystrom(in); 80 | Runtime = Runtime + toc; 81 | end 82 | 83 | if Method==5 84 | [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv); 85 | elseif Method==6 86 | [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv); 87 | elseif Method==7 88 | [AbsFroError,RelFroError,NormFroError] = NystromMatrixGivenWandE(KM, out.C, out.Winv); 89 | else 90 | [AbsFroError,RelFroError,NormFroError] = NystromMatrixDictionary(KM, DS.Data, Dictionary, gamma); 91 | end 92 | ResultsTmp = [AbsFroError,RelFroError,NormFroError,Runtime]; 93 | 94 | % 95 | Results(i,:) = Results(i,:) + ResultsTmp; 96 | %if rep==10 97 | % ResultsRep10 = Results(i,:) ./ 10; 98 | % dlmwrite( strcat( 'EvaluateDictionaries/','RESULTS_EvaluateDictionaries_10Rep_', char(Methods(Method)), '_', num2str(gamma), '_' ,num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex) ,'.results'), ResultsRep10, 'delimiter', '\t'); 99 | % 100 | %end 101 | end 102 | Results(i,:) = Results(i,:) ./ 10; 103 | 104 | dlmwrite( strcat( '/rigel/dsi/users/ikp2103/VLDBGRAIL/RunDictEvaluation/','RunDictEvaluation_10Rep_', char(Methods(Method)), '_', num2str(gamma), '_' ,num2str(i) ,'.results'), Results, 'delimiter', '\t'); 105 | 106 | end 107 | 108 | 109 | end 110 | 111 | end -------------------------------------------------------------------------------- /kShapeCentroids.m: -------------------------------------------------------------------------------- 1 | function [mem,cent,iter,sumd, centKpp, centKppSmplPoints, DistValues, DistShifts,DistComp,RT1,DistComp2,RT2] = kShapeCentroids(A, K, Seeding) 2 | % A = nXm : n # of time series; m length 3 | % K clusters 4 | 5 | DistComp=0; 6 | DistComp2=0; 7 | centKpp = []; 8 | centKppSmplPoints = []; 9 | 10 | n=size(A, 1); 11 | 12 | if Seeding==1 13 | tic; 14 | [centKpp,centKppSmplPoints,DistComp2] = Seeding_SBD(A, K, 10); 15 | RT2 = toc; 16 | cent = centKpp; 17 | DistComp=DistComp+DistComp2; 18 | [~, ~, ~, mem] = Cent2Membership(A, cent, 2); 19 | else 20 | mem = ceil(K*rand(n, 1)); 21 | cent = zeros(K, size(A, 2)); 22 | end 23 | 24 | %n=size(A, 1); 25 | %mem = ceil(K*rand(m, 1)); 26 | %cent = zeros(K, size(A, 2)); 27 | 28 | DistValues = zeros(n,K); 29 | DistShifts = zeros(n,K); 30 | tic; 31 | for iter = 1:100 32 | disp(iter); 33 | prev_mem = mem; 34 | 35 | for k = 1:K 36 | [centTmp,DistComp3] = kshape_centroid(mem, A, k, cent(k,:)); 37 | cent(k,:) = centTmp'; 38 | %DistComp=DistComp+DistComp3; Computing it twice - this can be 39 | %optimized 40 | end 41 | 42 | for i = 1:n 43 | for k = 1:K 44 | 45 | [dist, shift, yshift]= SBD(A(i,:), zscore(cent(k,:))); 46 | DistComp=DistComp+1; 47 | DistValues(i,k) = dist; 48 | DistShifts(i,k) = shift; 49 | 50 | end 51 | end 52 | 53 | [val mem] = min(DistValues,[],2); 54 | sumd = sum(val); 55 | if norm(prev_mem-mem) == 0 56 | break; 57 | end 58 | end 59 | RT1 = toc; 60 | end 61 | 62 | function [ksc,DistComp] = kshape_centroid(mem, A, k, cur_center) 63 | %Computes ksc centroid 64 | a = []; 65 | DistComp=0; 66 | for i=1:length(mem) 67 | if mem(i) == k 68 | if sum(cur_center) == 0 69 | opt_a = A(i,:); 70 | else 71 | [~, ~, opt_a] = SBD(zscore(cur_center), A(i,:)); 72 | DistComp=DistComp+1; 73 | end 74 | a = [a; opt_a]; 75 | end 76 | end 77 | 78 | if size(a,1) == 0; 79 | %ksc = zeros(1, size(A,2)); 80 | permed_index = randperm(size(A,1)); 81 | ksc = A(permed_index(1),:); 82 | return; 83 | elseif size(a,1) == 1; 84 | ksc = a; 85 | return; 86 | end 87 | 88 | [~, ncolumns]=size(a); 89 | [Y,~,~] = zscore(a,[],2); 90 | P = (eye(ncolumns) - 1 / ncolumns * ones(ncolumns)); 91 | ksc = (sum(Y)*P)/norm(sum(Y)*P); 92 | 93 | ksc = zscore(ksc); 94 | 95 | end 96 | 97 | function [C,SmplPoints,DistComp] = Seeding_SBD(A, k, m) 98 | % Calculate AFK-MC2 centers and distances, with correlation distance 99 | % Usage: [centers] = kmc2(A, k, m) 100 | % A is d x n data matrix, where d is #objects and n is #timeperiods 101 | % k is desired numbered of centers 102 | % m is chain length (if <0, then expressed as percent of n timeperiods) 103 | % Author: Terence Lim 104 | % Original paper/code by Bachem, Lucic, Hassani and Krause "Fast and 105 | % Provably Good Seedings for k-Means" 106 | 107 | DistComp = 0; 108 | n = size(A,2); % n columns of timeseries length 109 | d = size(A,1); % d rows of objects 110 | if (m < 1) % chain length expressed as % of objects 111 | m = ceil(m * d); 112 | end 113 | SmplPoints = [ceil(d * rand)]; 114 | C = A(ceil(d * rand), :); % sample first center 115 | 116 | q = Data2Centroids_SBD(A, C); % compute proposal (already squared euclidean) 117 | 118 | DistComp = DistComp + size(A,1)*(size(C,1)); 119 | 120 | q(find(isnan(q))) = 1; 121 | if (sum(q) == 0) 122 | q = repmat(1/d, size(q,1),size(q,2)); 123 | else 124 | q = (q / sum(q)) + (1 / d); 125 | end; 126 | q = q / sum(q); 127 | 128 | for i=1:(k-1) % sequentially pick centers 129 | cand_ind = randsample(d, m, true, q); 130 | q_cand = q(cand_ind); % extract proposal probability 131 | p_cand = Data2Centroids_SBD(A(cand_ind,:), C); % compute potentials 132 | 133 | DistComp = DistComp + size(A(cand_ind,:),1)*(size(C,1)); 134 | 135 | rand_a = random('unif',0,1,m,1); % compute acceptance probabilities 136 | for j=1:m % mix up to chain length m 137 | cand_prob = p_cand(j)/q_cand(j); 138 | if (j == 1 | curr_prob == 0.0 | cand_prob/curr_prob > rand_a(j)) 139 | curr_ind = j; 140 | curr_prob = cand_prob; 141 | end 142 | end 143 | SmplPoints(i+1) = cand_ind(curr_ind); 144 | C(i+1,:) = A(cand_ind(curr_ind),:); 145 | end 146 | end 147 | 148 | function [vals, classes, distances, sumd] = Data2Centroids_SBD(A, c) 149 | % A is d x n data matrix 150 | % C is k x n centroids 151 | % Returns dx1 class labels, dxk distances to every center in c, 152 | % kx1 sumd within-cluster sum of distances 153 | % Author: Terence Lim 154 | 155 | d = size(A,1); % number of data objects 156 | k = size(c,1); % number of clusters 157 | n = size(A,2); % lengths of time series 158 | distances = zeros(d,k); 159 | sumd = zeros(k,1); 160 | 161 | for i=1:d 162 | % if (rem(i,1000)==0) fprintf(1,'i=%d\n',i); end; 163 | for j=1:k 164 | [r shift] = max( NCCc(A(i,:),c(j,:)) ); 165 | distances(i,j) = 1 - r; 166 | end 167 | end 168 | [vals, classes] = min(distances,[],2); 169 | for i=1:k 170 | sumd(i,1) = sum(vals(classes==i)); 171 | end 172 | 173 | end 174 | 175 | function [SSError, MSError, STDError, labels] = Cent2Membership(A, Centroids, DistanceIndex) 176 | % A is d x n data matrix 177 | % Centroids is k x n centroids 178 | % Distance is 1 for ED and 2 for SBD 179 | % SSError is the sum of distances 180 | % labels is the cluster membership 181 | 182 | d = size(A,1); % number of data objects 183 | k = size(Centroids,1); % number of clusters 184 | 185 | distances = zeros(d,k); 186 | 187 | for i=1:d 188 | for j=1:k 189 | if DistanceIndex==1 190 | distances(i,j) = ED(A(i,:),Centroids(j,:)); 191 | elseif DistanceIndex==2 192 | distances(i,j) = 1-max(NCCc(A(i,:),Centroids(j,:))); 193 | end 194 | end 195 | end 196 | 197 | [vals, labels] = min(distances,[],2); 198 | 199 | SSError = sum(vals); 200 | MSError = mean(vals); 201 | STDError = std(vals); 202 | 203 | end 204 | -------------------------------------------------------------------------------- /RWS/rws_gridsearch_CV_mulvar.m: -------------------------------------------------------------------------------- 1 | % This script generates low-rank approximation of latent kernel matrix using 2 | % random features approach based on dtw like distance for multi-variate 3 | % time-series datasets. User Liblinear to perform grid search with 4 | % K-fold cross-validation! 5 | % 6 | % Author: Lingfei Wu 7 | % Date: 01/20/2019 8 | 9 | 10 | clear,clc 11 | parpool('local'); 12 | addpath(genpath('utilities')); 13 | file_dir = './datasets/'; 14 | 15 | % List all datasets 16 | filename_list = {'auslan'}; 17 | 18 | DMin = 1; 19 | DMax_list = [5 10 15 20 25 30 35 40 45 50 55 60 65 70 75 80 85 90 95 100]; 20 | sigma_list = [1e-3 3e-3 1e-2 3e-2 0.10 0.14 0.19 0.28 0.39 0.56 ... 21 | 0.79 1.12 1.58 2.23 3.16 4.46 6.30 8.91 10 31.62 1e2 3e2 1e3]; 22 | 23 | R = 32; % number of random time-series generated 24 | CV = 10; % number of folders of cross validation 25 | for jjj = 1:length(filename_list) 26 | filename = filename_list{jjj}; 27 | disp(filename); 28 | 29 | info.aveAccu_best = 0; 30 | info.valAccuHist = []; 31 | info.DMaxHist = []; 32 | info.sigmaHist = []; 33 | info.lambda_invHist = []; 34 | for jj = 1:length(DMax_list) 35 | for j = 1:length(sigma_list) 36 | DMax = DMax_list(jj) 37 | sigma = sigma_list(j) 38 | 39 | % load, shuffle, and prepare the training data 40 | timer_start = tic; 41 | Data = load(strcat(file_dir,filename,'/',filename,'.mat')); 42 | trainX = Data.train_X; 43 | trainy = Data.train_Y; 44 | N = size(trainX,1); 45 | trainData = zeros(N, R+1); 46 | shuffle_index = randperm(N); 47 | trainX = trainX(shuffle_index); % shuffle the data 48 | trainy = trainy(shuffle_index); 49 | % generate random time series with variable length, where each 50 | % value in random series is sampled from Gaussian distribution 51 | % parameterized by sigma. 52 | rng('default') 53 | sampleX = cell(R,1); 54 | d = size(trainX{1},1); % number of variates 55 | for i=1:R 56 | D = randi([DMin, DMax],1); 57 | sampleX{i} = randn(d, D)./sigma; % gaussian 58 | end 59 | trainFeaX_random = dtw_similarity_cell_mulvar(trainX, sampleX); 60 | trainFeaX_random = trainFeaX_random/sqrt(R); 61 | trainData(:,2:end) = trainFeaX_random; 62 | % convert user labels to uniform format binary(-1,1) & 63 | % multiclasses (1,2,..,k) 64 | labels = unique(trainy); 65 | numClasses = length(labels); 66 | if numClasses > 2 67 | for i=numClasses:-1:1 68 | ind = (trainy == labels(i)); 69 | trainy(ind) = i; 70 | end 71 | else 72 | ind = (trainy == labels(1)); 73 | trainy(ind) = -1; 74 | ind = (trainy == labels(2)); 75 | trainy(ind) = 1; 76 | end 77 | trainData(:,1) = trainy; 78 | telapsed_fea_gen = toc(timer_start) 79 | 80 | disp('------------------------------------------------------'); 81 | disp('LIBLinear performs basic grid search by varying lambda'); 82 | disp('------------------------------------------------------'); 83 | % Linear Kernel 84 | lambda_inverse = [1e-5 1e-4 1e-3 1e-2 1e-1 1 1e1 1e2 1e3 1e4 1e5]; 85 | for i=1:length(lambda_inverse) 86 | valAccu = zeros(1, CV); 87 | for cv = 1:CV 88 | subgroup_start = (cv-1) * floor(N/CV); 89 | mod_remain = mod(N, CV); 90 | div_remain = floor(N/CV); 91 | if mod_remain >= cv 92 | subgroup_start = subgroup_start + cv; 93 | subgroup_end = subgroup_start + div_remain; 94 | else 95 | subgroup_start = subgroup_start + mod_remain + 1; 96 | subgroup_end = subgroup_start + div_remain -1; 97 | end 98 | test_indRange = subgroup_start:subgroup_end; 99 | train_indRange = setdiff(1:N, test_indRange); 100 | trainFeaX = trainData(train_indRange,2:end); 101 | trainy = trainData(train_indRange,1); 102 | testFeaX = trainData(test_indRange,2:end); 103 | testy = trainData(test_indRange,1); 104 | 105 | s2 = num2str(lambda_inverse(i)); 106 | s1 = '-s 2 -e 0.0001 -q -c '; % for regular liblinear 107 | % s1 = '-s 2 -e 0.0001 -n 8 -q -c '; % for omp version 108 | s = [s1 s2]; 109 | timer_start = tic; 110 | model_linear = train(trainy, sparse(trainFeaX), s); 111 | [test_predict_label, test_accuracy, test_dec_values] = ... 112 | predict(testy, sparse(testFeaX), model_linear); 113 | telapsed_liblinear = toc(timer_start); 114 | valAccu(cv) = test_accuracy(1); 115 | end 116 | ave_valAccu = mean(valAccu); 117 | std_valAccu = std(valAccu); 118 | if(info.aveAccu_best+0.1 < ave_valAccu) 119 | info.DMaxHist = [info.DMaxHist;DMax]; 120 | info.sigmaHist = [info.sigmaHist;sigma]; 121 | info.lambda_invHist = [info.lambda_invHist;lambda_inverse(i)]; 122 | info.valAccuHist = [info.valAccuHist;valAccu]; 123 | info.valAccu = valAccu; 124 | info.aveAccu_best = ave_valAccu; 125 | info.stdAccu = std_valAccu; 126 | info.telapsed_fea_gen = telapsed_fea_gen; 127 | info.telapsed_liblinear = telapsed_liblinear; 128 | info.runtime = telapsed_fea_gen + telapsed_liblinear; 129 | info.sigma = sigma; 130 | info.R = R; 131 | info.DMin = DMin; 132 | info.DMax = DMax; 133 | info.lambda_inverse = lambda_inverse(i); 134 | end 135 | end 136 | end 137 | end 138 | disp(info); 139 | savefilename = [filename '_rws_R' num2str(R) '_' num2str(CV) 'fold_CV']; 140 | save(savefilename,'info') 141 | end 142 | delete(gcp); 143 | -------------------------------------------------------------------------------- /RunVisualization.m: -------------------------------------------------------------------------------- 1 | function RunVisualization(DataSetStartIndex, DataSetEndIndex, Method, RepType) 2 | 3 | Methods = [cellstr('Random'), 'KShape']; 4 | Types = [cellstr('ZExact'), 'Z5', 'Z10', 'Z20', 'Z99per', 'Z95per', 'Z90per', 'Z85per', 'Z80per']; 5 | 6 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 7 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 8 | Datasets = {dir_struct(3:130).name}; 9 | 10 | % Sort Datasets 11 | [Datasets, DSOrder] = sort(Datasets); 12 | 13 | Results = zeros(length(Datasets),3); 14 | 15 | for i = 1:length(Datasets) 16 | 17 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 18 | 19 | disp(['Dataset being processed: ', char(Datasets(i))]); 20 | DS = LoadUCRdataset(char(Datasets(i))); 21 | % Get Kernel Matrix 22 | 23 | gamma = 10; 24 | 25 | KM = dlmread( strcat( 'KernelMatricesSINK/',char(Datasets(i)),'/', char(Datasets(i)), '_SINK_Gamma_', num2str(gamma) ,'.kernelmatrix') ); 26 | 27 | tic; 28 | [EigenVectors,ProjDataOriginal] = OriginalKPCA(KM); 29 | RTOriginalKPCA = toc; 30 | 31 | 32 | for rep = 1 : 10 33 | rep 34 | rng(rep); 35 | 36 | % Extract Sample Points 37 | 38 | ZExact = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Zexact') ); 39 | 40 | Z5 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop5') ); 41 | Z10 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop10') ); 42 | Z20 = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Ztop20') ); 43 | 44 | Z98per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z98per') ); 45 | Z95per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z95per') ); 46 | Z90per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z90per') ); 47 | Z85per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z85per') ); 48 | Z80per = dlmread( strcat( 'REPRESENTATIONSGamma', num2str(gamma),'/',char(Datasets(i)),'/','RepLearningFixedSamples', '_', char(Methods(Method)), '_', num2str(rep) ,'.Z80per') ); 49 | 50 | 51 | 52 | tic; 53 | if RepType == 1 54 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(ZExact); 55 | elseif RepType == 2 56 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z5); 57 | elseif RepType == 3 58 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z10); 59 | elseif RepType == 4 60 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z20); 61 | elseif RepType == 5 62 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z98per); 63 | elseif RepType == 6 64 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z95per); 65 | elseif RepType == 7 66 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z90per); 67 | elseif RepType == 8 68 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z85per); 69 | elseif RepType == 9 70 | [ApproxEigVectors,ProjDataApprox] = NystromKPCA(Z80per); 71 | end 72 | 73 | RTApproximatelKPCA = toc; 74 | 75 | dlmwrite( strcat( 'RunVisualizationVectors/','RESULTS_RunVisualization_', num2str(i), '_', num2str(i), '_', char(Methods(Method)), '_', char(Types(RepType)) ,'.Vectors'), ApproxEigVectors, 'delimiter', '\t'); 76 | 77 | % Evaluate SmplPoints in terms of clustering 78 | % measures (e.g., SSE, RandIndex, NystromAppx) 79 | 80 | %Error = Arccos dot(u,v)/(norm(u)*norm(v)) 81 | %AppxError = acos(dot(EigenVectors(:,1),ApproxEigVectors(:,1))/(norm(EigenVectors(:,1))*norm(ApproxEigVectors(:,1)))); 82 | AppxError = ( norm(ProjDataOriginal*ProjDataOriginal'-ProjDataApprox*ProjDataApprox','fro') ); 83 | 84 | ResultsTmp = [AppxError,RTApproximatelKPCA,RTOriginalKPCA]; 85 | 86 | % 87 | Results(i,:) = Results(i,:) + ResultsTmp; 88 | end 89 | Results(i,:) = Results(i,:) ./ 10; 90 | 91 | end 92 | 93 | dlmwrite( strcat( 'RunVisualization/','RESULTS_RunVisualization_', num2str(DataSetStartIndex), '_', num2str(DataSetEndIndex), '_', char(Methods(Method)), '_', char(Types(RepType)) ,'.results'), Results, 'delimiter', '\t'); 94 | 95 | end 96 | 97 | end -------------------------------------------------------------------------------- /RunDictLearning.m: -------------------------------------------------------------------------------- 1 | function RunDictLearning(DataSetStartIndex, DataSetEndIndex, Method, RepStartIndex, RepEndIndex) 2 | 3 | Methods = [cellstr('Random'), 'KShape', 'GibbsDPP']; 4 | 5 | % first 2 values are '.' and '..' - UCR Archive 2018 version has 128 datasets 6 | dir_struct = dir('/rigel/dsi/users/ikp2103/VLDBGRAIL/UCR2018/'); 7 | Datasets = {dir_struct(3:130).name}; 8 | 9 | % Sort Datasets 10 | 11 | [Datasets, DSOrder] = sort(Datasets); 12 | 13 | for i = 1:length(Datasets) 14 | if (i>=DataSetStartIndex & i<=DataSetEndIndex) 15 | 16 | disp(['Dataset being processed: ', char(Datasets(i))]); 17 | DS = LoadUCRdataset(char(Datasets(i))); 18 | 19 | for rep = 1 : 10 20 | 21 | if (rep>=RepStartIndex & rep<=RepEndIndex) 22 | 23 | rep 24 | rng(rep); 25 | 26 | NumOfSamples = min(max( [4*length(DS.ClassNames), ceil(0.4*DS.DataInstancesCount),20] ),100); 27 | 28 | 29 | if Method==1 30 | tic; 31 | permed_index = randperm(DS.DataInstancesCount); 32 | Dictionary = DS.Data(permed_index(1:NumOfSamples),:); 33 | timing = toc; 34 | elseif Method==2 35 | sumdtmp=Inf; 36 | for Repetion=1:3 37 | 38 | %tic; 39 | [mem,Dictionary,iter,sumd,centKpp,centKppSmplPoints,DistValues,DistShifts,DistComp,RuntimekShape,DistCompSeed,RuntimeSeed] = kShapeCentroids(DS.Data, NumOfSamples, 1); 40 | %timing = toc; 41 | 42 | if sumd