├── README.md ├── matlab ├── ED_dep_multi.m ├── NCCc.m ├── SBD_dep_multi.m ├── cDTW_dep_multi.m ├── dba_centroid.m ├── dhat_shift_dep_multi.m ├── dipm.m ├── kmeans_centroid.m ├── ksc_centroid.m ├── kshape_centroid.m ├── multidim_KSC.m ├── multidim_kDBA.m ├── multidim_kMeans.m ├── multidim_kShape.m ├── qdwh.m └── qdwheig.m └── python ├── centroids.py ├── distances.py ├── kShape.py ├── ksc.py ├── mkShape.py ├── mksc.py ├── mvkShape.py └── util.py /README.md: -------------------------------------------------------------------------------- 1 | # mts-clustering 2 | Multivariate extensions to existing partitional time series clustering algorithms presented and implemented in; 3 | 4 | http://snap.stanford.edu/data/ksc.html 5 | 6 | http://www.cs.columbia.edu/~jopa/kshape.html 7 | 8 | Python and Matlab implementations of the algorithms presented in the paper titled "Discovering Patterns of Online Popularity from Time Series" 9 | 10 | https://arxiv.org/pdf/1904.04994 11 | 12 | https://www.public.asu.edu/~mozer/dipm-SC/supp_material.pdf 13 | -------------------------------------------------------------------------------- /matlab/ED_dep_multi.m: -------------------------------------------------------------------------------- 1 | function [Dist, optshift, opty] = ED_dep_multi(x,y,shift) 2 | [m,d] = size(x); 3 | if d~= size(y,2) 4 | display('This should not happen: ED_multi'); 5 | end 6 | Dist = 0; 7 | for m_i = 1:m 8 | for d_i = 1:d 9 | Dist = Dist + (x(m_i,d_i) - y(m_i,d_i))^2; 10 | end 11 | end 12 | Dist = sqrt(Dist); 13 | optshift = 0; 14 | opty = y; 15 | end -------------------------------------------------------------------------------- /matlab/NCCc.m: -------------------------------------------------------------------------------- 1 | function cc_sequence = NCCc(x,y) 2 | 3 | if isrow(x) 4 | x=x'; 5 | end 6 | if isrow(y) 7 | y=y'; 8 | end 9 | 10 | len = length(x); 11 | 12 | fftlength = 2^nextpow2(2*len-1); 13 | r = ifft( fft(x,fftlength) .* conj(fft(y,fftlength)) ); 14 | 15 | r = [r(end-len+2:end) ; r(1:len)]; 16 | 17 | cc_sequence = r./((norm(x)*norm(y))+eps); -------------------------------------------------------------------------------- /matlab/SBD_dep_multi.m: -------------------------------------------------------------------------------- 1 | function [dist, optshift, yshift]= SBD_dep_multi(x, y, maxshift) 2 | [m,d] = size(x); 3 | 4 | cc_ = 0; 5 | for d_i = 1:d 6 | cc_d_i = NCCc(x(:,d_i),y(:,d_i)); 7 | cc_ = cc_ + cc_d_i; 8 | end 9 | cc = zeros(size(cc_)); 10 | cc(length(x)) = cc_(length(x)); 11 | for i = 1:maxshift 12 | cc(length(x)+i) = cc_(length(x)+i); 13 | cc(length(x)-i) = cc_(length(x)-i); 14 | end 15 | [maxCC,maxCCI]=max(cc); 16 | 17 | shift = maxCCI - max(length(x),length(y)); 18 | 19 | if shift < 0 20 | yshift = [y(-shift + 1:end,:); zeros(-shift,d)]; 21 | else 22 | yshift = [zeros(shift,d); y(1:end-shift,:) ]; 23 | end 24 | optshift = ones(1,d)*shift; 25 | dist = d - maxCC; 26 | end -------------------------------------------------------------------------------- /matlab/cDTW_dep_multi.m: -------------------------------------------------------------------------------- 1 | function [Dist, optshift , opty] = cDTW_dep_multi(x,y,W) 2 | 3 | [m,d]=size(x); 4 | if d ~= size(y,2) 5 | display('This should not happen cDTW'); 6 | end 7 | 8 | Dist = 0; 9 | D=ones(m+1,m+1)*inf; 10 | 11 | D(1,1) = 0; 12 | for i=2:m+1 13 | for j=max(2, i-W):min(m+1, i+W) 14 | cost = 0; 15 | for d_i = 1:d 16 | cost = cost + (x(i-1,d_i)-y(j-1,d_i))^2; 17 | end 18 | D(i,j)=sqrt(cost)+min([D(i-1,j),D(i-1,j-1),D(i,j-1)]); 19 | end 20 | end 21 | Dist = D(m+1, m+1); 22 | opty = y; 23 | optshift = zeros(1,d); 24 | end -------------------------------------------------------------------------------- /matlab/dba_centroid.m: -------------------------------------------------------------------------------- 1 | function ksc = dba_centroid(mem, X, k, cur_center, shift) 2 | %Computes centroid 3 | 4 | a = []; 5 | [n,m,d] = size(X); 6 | cur_center = reshape(cur_center,m,d); 7 | ai = 1; 8 | for i=1:length(mem) 9 | if mem(i) == k 10 | opt_a = X(i,:,:); 11 | if d == 1 12 | a(ai,:) = opt_a; 13 | else 14 | a(ai,:,:) = opt_a; 15 | end 16 | ai = ai + 1; 17 | end 18 | end 19 | 20 | if size(a,1) == 0 21 | ksc = zeros(m,d); 22 | return; 23 | end 24 | 25 | for d_i = 1:d 26 | ksc(:,d_i) = DBA(a(:,:,d_i),cur_center(:,d_i)'); 27 | end 28 | end 29 | 30 | function average = DBA(sequences,cur_center) 31 | 32 | % Use previous centroid as reference 33 | average=cur_center; 34 | average=DBA_one_iteration(average,sequences); 35 | end 36 | 37 | function average = DBA_one_iteration(averageS,sequences) 38 | 39 | tupleAssociation = cell (1, size(averageS,2)); 40 | for t=1:size(averageS,2) 41 | tupleAssociation{t}=[]; 42 | end 43 | 44 | costMatrix = []; 45 | pathMatrix = []; 46 | 47 | for k=1:size(sequences,1) 48 | sequence = sequences(k,:); 49 | costMatrix(1,1) = distanceTo(averageS(1),sequence(1)); 50 | pathMatrix(1,1) = -1; 51 | for i=2:size(averageS,2) 52 | costMatrix(i,1) = costMatrix(i-1,1) + distanceTo(averageS(i),sequence(1)); 53 | pathMatrix(i,1) = 2; 54 | end 55 | 56 | for j=2:size(sequence,2) 57 | costMatrix(1,j) = costMatrix(1,j-1) + distanceTo(sequence(j),averageS(1)); 58 | pathMatrix(1,j) = 1; 59 | end 60 | 61 | for i=2:size(averageS,2) 62 | for j=2:size(sequence,2) 63 | indiceRes = ArgMin3(costMatrix(i-1,j-1),costMatrix(i,j-1),costMatrix(i-1,j)); 64 | pathMatrix(i,j)=indiceRes; 65 | 66 | if indiceRes==0 67 | res = costMatrix(i-1,j-1); 68 | elseif indiceRes==1 69 | res = costMatrix(i,j-1); 70 | elseif indiceRes==2 71 | res = costMatrix(i-1,j); 72 | end 73 | 74 | costMatrix(i,j) = res + distanceTo(averageS(i),sequence(j)); 75 | 76 | end 77 | end 78 | 79 | i=size(averageS,2); 80 | j=size(sequence,2); 81 | 82 | while(true) 83 | tupleAssociation{i}(end+1) = sequence(j); 84 | if pathMatrix(i,j)==0 85 | i=i-1; 86 | j=j-1; 87 | elseif pathMatrix(i,j)==1 88 | j=j-1; 89 | elseif pathMatrix(i,j)==2 90 | i=i-1; 91 | else 92 | break; 93 | end 94 | end 95 | 96 | end 97 | 98 | for t=1:size(averageS,2) 99 | averageS(t) = mean(tupleAssociation{t}); 100 | end 101 | 102 | average = averageS; 103 | 104 | end 105 | 106 | function value = ArgMin3(a,b,c) 107 | 108 | if (a maxscore || maxscore == 0 ) 170 | maxscore = scores(ki); 171 | maxscore_rindex = tnaC; %turn index of highest score cluster among non accepted 172 | maxscore_gindex = ki; % general index of highest score cluster 173 | end 174 | end 175 | end 176 | display(strcat('# of accepted clus this turn: ',num2str(taC),', # of non-accepted clus this turn: ',num2str(tnaC),... 177 | ', # of accepted clus overall: ',num2str(aC))); 178 | if ( ready == 0 ) 179 | k = k + 1 - taC; % increase the k, take out the accepted clusters from the data 180 | tX_ids = rX_ids; 181 | 182 | % split up the highest multimodal cluster with its best splitter 183 | [~,splitViewer] = max(rdip{maxscore_rindex}); 184 | view = reshape(rprojected{maxscore_rindex}(splitViewer,:),1,size(rprojected{maxscore_rindex},2)); 185 | err = intmax; 186 | for kmi = 1:10 187 | [ncm_, ~, err_] = kmeans(view',2,'Start',[min(view(view~=0));max(view(view~=0))]); 188 | err_ = sum(err_); 189 | if err_ < err 190 | ncm = ncm_; 191 | err = err_; 192 | end 193 | end 194 | 195 | sClus = tClus{maxscore_gindex}; 196 | rcenters(k,:,:) = averagingFunc(ones(nnz(ncm==2),1), X(sClus(ncm == 2),:,:),1,rcenters(maxscore_rindex,:,:),shift); 197 | rcenters(maxscore_rindex,:,:) = averagingFunc(ones(nnz(ncm==1),1), X(sClus(ncm == 1),:,:),1,rcenters(maxscore_rindex,:,:),shift); 198 | end 199 | end 200 | 201 | % create the Idx matrix for the resulting partition 202 | mem = zeros(n,1); 203 | for ki=1:length(clus) 204 | mem(clus{ki}) = ki; 205 | end 206 | 207 | 208 | 209 | 210 | -------------------------------------------------------------------------------- /matlab/kmeans_centroid.m: -------------------------------------------------------------------------------- 1 | function centroid = kmeans_centroid(mem, X, k, cur_center,shift) 2 | %Computes centroid 3 | a = []; 4 | [n,m,d] = size(X); 5 | ai = 1; 6 | for i=1:length(mem) 7 | if mem(i) == k 8 | opt_a = X(i,:,:); 9 | if d == 1 10 | a(ai,:) = opt_a; 11 | else 12 | a(ai,:,:) = opt_a; 13 | end 14 | ai = ai + 1; 15 | end 16 | end 17 | 18 | if size(a,1) == 0 19 | centroid = zeros(m, d); 20 | return; 21 | end 22 | 23 | for d_i = 1:d 24 | centroid(:,d_i) = mean(a(:,:,d_i),1); 25 | end 26 | 27 | end -------------------------------------------------------------------------------- /matlab/ksc_centroid.m: -------------------------------------------------------------------------------- 1 | function ksc = ksc_centroid(mem, X, k, cur_center, shift) 2 | %Computes ksc centroid 3 | 4 | [n,m,d] = size(X); 5 | if d == 1 6 | a = zeros(length(find(mem==k)),m); 7 | else 8 | a = zeros(length(find(mem==k)),m,d); 9 | end 10 | ai = 1; 11 | sum_cur_center = sum(cur_center(:)); 12 | for i=1:length(mem) 13 | if mem(i) == k 14 | if sum_cur_center == 0 15 | opt_a = X(i,:,:); 16 | else 17 | [~, ~, opt_a] = dhat_shift_dep_multi(reshape(cur_center,m,d), ... 18 | reshape(X(i,:,:),m,d),shift); 19 | end 20 | if d == 1 21 | a(ai,:) = opt_a; 22 | else 23 | a(ai,:,:) = opt_a; 24 | end 25 | ai = ai + 1; 26 | end 27 | end 28 | if size(a,1) == 0 29 | ksc = zeros(m, d); 30 | return; 31 | end 32 | for d_i = 1:d 33 | a_di = a(:,:,d_i); 34 | b = a_di ./ repmat(sqrt(sum(a_di.^2,2))+eps, [1 m]); 35 | M = b'*b - n * eye(m); 36 | [V, D] = eig(M); 37 | 38 | ksc_di = V(:,end); 39 | 40 | finddistance1 = sqrt(sum((a_di(1,:) - ksc_di').^2)); 41 | finddistance2 = sqrt(sum((a_di(1,:) - (-ksc_di')).^2)); 42 | 43 | if (finddistance1 2 51 | break 52 | end 53 | else 54 | prevErr = err_; 55 | try_ = 0; 56 | end 57 | end 58 | 59 | toc; 60 | disp(strcat('||PrevMem-CurMem||=',num2str(err_))); 61 | end 62 | finalNorm = norm(prev_mem-mem); 63 | sqe = 0; 64 | for i = 1:n 65 | sqe = sqe + Dist(i,mem(i)); 66 | end 67 | end 68 | -------------------------------------------------------------------------------- /matlab/multidim_kDBA.m: -------------------------------------------------------------------------------- 1 | function [mem, cent, finalNorm, sqe] = multidim_kDBA(X, K, shift, cent_init) 2 | 3 | n = size(X, 1); 4 | m = size(X, 2); 5 | d = size(X, 3); 6 | 7 | Dist = zeros(n,K); 8 | if nargin < 4 9 | mem = ceil(K*rand(n, 1)); 10 | cent = zeros(K, m, d); 11 | else 12 | cent = cent_init; 13 | for i = 1:n 14 | for k = 1:K 15 | Dist(i,k) = cDTW_dep_multi(... 16 | reshape(cent(k,:,:),m,d),... 17 | reshape(X(i,:,:),m,d),... 18 | shift... 19 | ); 20 | end 21 | end 22 | [~, mem] = min(Dist,[],2); 23 | end 24 | 25 | prevErr = -1; 26 | try_ = 0; 27 | 28 | for iter = 1:100 29 | tic; 30 | disp(strcat('Iteration-',num2str(iter))); 31 | prev_mem = mem; 32 | 33 | for k = 1:K 34 | cent(k,:,:) = dba_centroid(mem, X, k, reshape(cent(k,:,:),m,d),shift); 35 | end 36 | 37 | for i = 1:n 38 | for k = 1:K 39 | Dist(i,k) = cDTW_dep_multi(reshape(X(i,:,:),m,d),reshape(cent(k,:,:),m,d),shift); 40 | end 41 | end 42 | 43 | [~, mem] = min(Dist,[],2); 44 | err_ = norm(prev_mem-mem); 45 | if err_ == 0 46 | break; 47 | else 48 | if err_ == prevErr 49 | try_ = try_ + 1; 50 | if try_ > 2 51 | break 52 | end 53 | else 54 | prevErr = err_; 55 | try_ = 0; 56 | end 57 | end 58 | toc; 59 | disp(strcat('||PrevMem-CurMem||=',num2str(err_))); 60 | end 61 | finalNorm = norm(prev_mem-mem); 62 | sqe = 0; 63 | for i = 1:n 64 | sqe = sqe + Dist(i,mem(i)); 65 | end 66 | end 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /matlab/multidim_kMeans.m: -------------------------------------------------------------------------------- 1 | function [mem, cent, finalNorm, sqe] = multidim_kMeans(X, K, shift, cent_init) 2 | 3 | n = size(X, 1); 4 | m = size(X, 2); 5 | d = size(X, 3); 6 | Dist = zeros(n,K); 7 | if nargin < 4 8 | mem = ceil(K*rand(n, 1)); 9 | cent = zeros(K, m, d); 10 | else 11 | cent = cent_init; 12 | for i = 1:n 13 | for k = 1:K 14 | Dist(i,k) = ED_dep_multi(... 15 | reshape(cent(k,:,:),m,d),... 16 | reshape(X(i,:,:),m,d)); 17 | end 18 | end 19 | [~, mem] = min(Dist,[],2); 20 | end 21 | 22 | prevErr = -1; 23 | try_ = 0; 24 | 25 | for iter = 1:100 26 | tic; 27 | disp(iter); 28 | prev_mem = mem; 29 | 30 | for k = 1:K 31 | cent(k,:,:) = kmeans_centroid(mem, X, k, reshape(cent(k,:,:),m,d)); 32 | end 33 | 34 | for i = 1:n 35 | for k = 1:K 36 | Dist(i,k) = ED_dep_multi(reshape(X(i,:,:),m,d),reshape(cent(k,:,:),m,d)); 37 | end 38 | end 39 | 40 | [val mem] = min(Dist,[],2); 41 | err_ = norm(prev_mem-mem); 42 | if err_ == 0 43 | break; 44 | else 45 | if err_ == prevErr 46 | try_ = try_ + 1; 47 | if try_ > 2 48 | break 49 | end 50 | else 51 | prevErr = err_; 52 | try_ = 0; 53 | end 54 | end 55 | 56 | toc; 57 | disp(strcat('||PrevMem-CurMem||=',num2str(err_))); 58 | end 59 | finalNorm = norm(prev_mem-mem); 60 | sqe = 0; 61 | for i = 1:n 62 | sqe = sqe + Dist(i,mem(i)); 63 | end 64 | end 65 | 66 | 67 | -------------------------------------------------------------------------------- /matlab/multidim_kShape.m: -------------------------------------------------------------------------------- 1 | function [mem, cent, finalNorm, sqe] = multidim_kShape(X, K, shift,cent_init) 2 | 3 | n = size(X, 1); 4 | m = size(X, 2); 5 | d = size(X, 3); 6 | Dist = zeros(n,K); 7 | if nargin < 4 8 | mem = ceil(K*rand(n, 1)); 9 | cent = zeros(K, m, d); 10 | else 11 | cent = cent_init; 12 | for i = 1:n 13 | for k = 1:K 14 | Dist(i,k) = SBD_dep_multi(... 15 | reshape(cent(k,:,:),m,d),... 16 | reshape(X(i,:,:),m,d),... 17 | shift... 18 | ); 19 | end 20 | end 21 | [~, mem] = min(Dist,[],2); 22 | end 23 | 24 | prevErr = -1; 25 | try_ = 0; 26 | 27 | for iter = 1:100 28 | tic; 29 | disp(strcat('Iteration-',num2str(iter))); 30 | prev_mem = mem; 31 | 32 | for k = 1:K 33 | cent(k,:,:) = kshape_centroid(mem, X, k, reshape(cent(k,:,:), m, d),shift); 34 | end 35 | 36 | for i = 1:n 37 | for k = 1:K 38 | Dist(i,k) = SBD_dep_multi(reshape(cent(k,:,:),m,d), reshape(X(i,:,:),m,d),shift); 39 | end 40 | end 41 | 42 | [~, mem] = min(Dist,[],2); 43 | err_ = norm(prev_mem-mem); 44 | if err_ == 0 45 | break; 46 | else 47 | if err_ == prevErr 48 | try_ = try_ + 1; 49 | if try_ > 2 50 | break 51 | end 52 | else 53 | prevErr = err_; 54 | try_ = 0; 55 | end 56 | end 57 | toc; 58 | disp(strcat('||PrevMem-CurMem||=',num2str(err_))); 59 | end 60 | finalNorm = norm(prev_mem-mem); 61 | sqe = 0; 62 | for i = 1:n 63 | sqe = sqe + Dist(i,mem(i)); 64 | end 65 | end 66 | 67 | 68 | -------------------------------------------------------------------------------- /matlab/qdwh.m: -------------------------------------------------------------------------------- 1 | function [U,H,it] = qdwh(A,alpha,L,piv) 2 | %QDWH QR-based dynamically weighted Halley iteration for polar decomposition. 3 | % [U,H,it,res] = qdwh(A,alpha,L,PIV) computes the 4 | % polar decomposition A = U*H of a full rank M-by-N matrix A with 5 | % M >= N. Optional arguments: ALPHA: an estimate for norm(A,2), 6 | % L: a lower bound for the smallest singular value of A, and 7 | % PIV = 'rc' : column pivoting and row sorting, 8 | % PIV = 'c' : column pivoting only, 9 | % PIV = '' (default): no pivoting. 10 | % The third output argument IT is the number of iterations. 11 | 12 | [m,n] = size(A); 13 | 14 | tol1 = 10*eps/2; tol2 = 10*tol1; tol3 = tol1^(1/3); 15 | if m == n && norm(A-A','fro')/norm(A,'fro') < tol2; 16 | symm = 1; 17 | else 18 | symm = 0; 19 | end 20 | 21 | it = 0; 22 | 23 | if m < n, error('m >= n is required.'), end 24 | 25 | if nargin < 2 || isempty(alpha) % Estimate for largest singular value of A. 26 | alpha = normest(A,0.1); 27 | end 28 | 29 | % Scale original matrix to form X0. 30 | U = A/alpha; Uprev = U; 31 | 32 | if nargin < 3 || isempty(L) % Estimate for smallest singular value of U. 33 | Y = U; if m > n, [Q,Y] = qr(U,0); end 34 | smin_est = norm(Y,1)/condest(Y); % Actually an upper bound for smin. 35 | L = smin_est/sqrt(n); 36 | end 37 | 38 | if nargin < 4, piv = ''; end 39 | 40 | col_piv = strfind(piv,'c'); 41 | row_sort = strfind(piv,'r'); 42 | 43 | if row_sort 44 | row_norms = sum(abs(U),2); 45 | [ignore,rind] = sort(row_norms,1,'descend'); 46 | U = U(rind,:); 47 | end 48 | 49 | while norm(U-Uprev,'fro') > tol3 || it == 0 || abs(1-L) > tol1 50 | 51 | it = it + 1; 52 | Uprev = U; 53 | 54 | % Compute parameters L,a,b,c (second, equivalent way). 55 | L2 = L^2; 56 | dd = ( 4*(1-L2)/L2^2 )^(1/3); 57 | sqd = sqrt(1+dd); 58 | a = sqd + sqrt(8 - 4*dd + 8*(2-L2)/(L2*sqd))/2; 59 | a = real(a); 60 | b = (a-1)^2/4; 61 | c = a+b-1; 62 | % Update L. 63 | L = L*(a+b*L2)/(1+c*L2); 64 | 65 | if c > 100 % Use QR. 66 | B = [sqrt(c)*U; eye(n)]; 67 | 68 | if col_piv 69 | [Q,R,E] = qr(B,0,'vector'); 70 | else 71 | [Q,R] = qr(B,0); %E = 1:n; 72 | end 73 | 74 | Q1 = Q(1:m,:); Q2 = Q(m+1:end,:); 75 | U = b/c*U + (a-b/c)/sqrt(c)*Q1*Q2'; 76 | 77 | else % Use Cholesky when U is well conditioned; faster. 78 | C = chol(c*(U'*U)+eye(n)); 79 | % Utemp = (b/c)*U + (a-b/c)*(U/C)/C'; 80 | % Next three lines are slightly faster. 81 | opts1.UT = true; opts1.TRANSA = true; 82 | opts2.UT = true; opts2.TRANSA = false; 83 | U = (b/c)*U + (a-b/c)*(linsolve(C,linsolve(C,U',opts1),opts2))'; 84 | end 85 | if symm 86 | U = (U+U')/2; 87 | end 88 | end 89 | if row_sort 90 | U(rind,:) = U; 91 | end 92 | 93 | if nargout > 1 94 | H = U'*A; H = (H'+H)/2; 95 | end 96 | -------------------------------------------------------------------------------- /matlab/qdwheig.m: -------------------------------------------------------------------------------- 1 | function [Uout,eigvals] = qdwheig(H,normH,minlen,NS) 2 | %QDWH-EIG Eigendecomposition of symmetric matrix via QDWH. 3 | % [V,D] = QDWHEIG(A) computes the eigenvalues (the diagonal elements 4 | % of D) and an orthogonal matrix V of eigenvectors 5 | % of the symmetric matrix A. This function makes use of the function 6 | % QDWH that implements the QR-based dynamically weighted Halley 7 | % iteration for the polar decomposition. 8 | % [U,D] = QDWHEIG(A,normA,minlen,shift) includes the optional 9 | % input arguments 10 | % normA: norm(A,'fro'), which is used in the recursive calls. 11 | % minlen: the matrix size at which to stop the recursions (default 1). 12 | % NS: Newton-Schulz postprocessing for better accuracy 13 | % 1: do N-S (default), 0: don't N-S (slightly faster). 14 | 15 | backtol = 10*eps/2; % Tolerance for relative backward error. 16 | n = length(H); 17 | %if nargin < 2 || isempty(normH); normH = norm(H,'fro'); end 18 | %if nargin < 3 || isempty(minlen); minlen = 1; end 19 | %if nargin < 4 || isempty(NS); NS = 1; end 20 | 21 | normH = norm(H,'fro'); 22 | minlen = 1; 23 | NS = 1; 24 | 25 | [Uout,eigvals] = qdwheigrep(H,normH,minlen,backtol); 26 | 27 | if NS 28 | Uout = 3/2*Uout-Uout*(Uout'*Uout)/2; % Newton-Schulz postprocessing. 29 | end 30 | 31 | eigvals = diag(sort(eigvals,'ascend')); 32 | Uout = fliplr(Uout); % Order appropriately. 33 | 34 | if nargout == 1; Uout = diag(eigvals); end 35 | 36 | % Subfunctions. 37 | 38 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 39 | function [Uout,eigvals] = qdwheigrep(H,normH,minlen,backtol,a,b,shift) 40 | % Internal recursion. 41 | n = length(H); 42 | 43 | % If already almost diagonal, return trivial solution. 44 | if norm(H-diag(diag(H)),'fro')/normH < backtol 45 | [eigvals IX] = sort(diag(H),'descend'); eigvals = eigvals'; 46 | Uout = eye(n); Uout = Uout(:,IX); 47 | return 48 | end 49 | 50 | H = (H+H')/2; % Needed for recursive calls, due to roundoff. 51 | 52 | if nargin < 7 || isempty(shift) 53 | % Determine shift: approximation to median(eig(H)). 54 | shift = median(diag(H)); 55 | end 56 | 57 | % Estimate a,b. 58 | if nargin < 5 || isempty(a); a = normest(H-shift*eye(n),3e-1); end 59 | if nargin < 6 || isempty(b); b = .9/condest(H-shift*eye(n)); end 60 | 61 | % Compute polar decomposition via QDWH. 62 | U = qdwh(H-shift*eye(n),a,b); 63 | 64 | % Orthogonal projection matrix. 65 | U = (U+eye(n))/2; 66 | 67 | % Subspace iteration 68 | [U1,U2] = subspaceit(U); 69 | minoff = norm(U2'*H*U1,'fro')/normH; % backward error 70 | 71 | if minoff > backtol 72 | % 'Second subspace iteration'. 73 | [U1,U2] = subspaceit(U,0,U1); 74 | minoff = norm(U2'*H*U1,'fro')/normH; % backward error 75 | end 76 | 77 | if minoff > backtol 78 | for irand = 1:2 79 | % Redo subspace iteration with randomization. 80 | [U1b,U2b] = subspaceit(U,1); 81 | minoff2 = norm(U2b'*H*U1b,'fro')/normH; % backward error 82 | if minoff > minoff2; U1 = U1b; U2 = U2b; end % take better case 83 | end 84 | end 85 | 86 | % One step done; further blocks. 87 | eigvals = []; 88 | if length(U1(1,:)) == 1 89 | eigvals = [eigvals U1'*H*U1]; 90 | end 91 | if length(U2(1,:)) == 1 92 | eigvals = [eigvals U2'*H*U2]; 93 | end 94 | 95 | eigvals1 = []; eigvals2 = []; 96 | if length(U1(1,:)) > minlen 97 | [Ua eigvals1] = qdwheigrep(U1'*H*U1,normH,minlen,backtol); 98 | U1 = U1*Ua; 99 | end 100 | 101 | if length(U2(1,:)) > minlen 102 | [Ua eigvals2] = qdwheigrep(U2'*H*U2,normH,minlen,backtol); 103 | U2 = U2*Ua; 104 | end 105 | 106 | Uout = [U1 U2]; 107 | % Collect eigvals 108 | eigvals = [eigvals eigvals1 eigvals2]; 109 | 110 | 111 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 112 | function [U0,U1] = subspaceit(U,use_rand,U1) 113 | %SUBSPACEIT Subspace iteration for computing invariant subspace. 114 | % [U0,U1] = SUBSPACEIT(U,use_rand,U1) computes an orthogonal basis U0 115 | % for the column space of the square matrix U. 116 | % Normally one or two steps will yield convergence. 117 | % U1 is the orthogonal complement of U0. 118 | % Optional inputs: 119 | % use_rand: 1 to use randomization to form initial matrix (default 0). 120 | % U1: initial matrix (then use_rand becomes irrelevant). 121 | 122 | n = length(U); 123 | xsize = round(norm(U,'fro')^2); % (Accurate) estimate of norm of U0. 124 | 125 | if nargin < 2; use_rand = 0; end 126 | 127 | % Determine initial matrix. 128 | if nargin >= 3 % Initial guess given. 129 | UU = U*U1; 130 | elseif use_rand % Random initial guess. 131 | UU = U*randn(n,min(xsize+3,n)); 132 | else % Take large columns of U as initial guess. 133 | 134 | % normcols = zeros(1,n); 135 | % for ii = 1:n; normcols(ii) = norm(U(:,ii)); end; 136 | % [normc,IX] = sort(normcols,'descend'); 137 | % UU = U(:,IX(1:min(xsize+3,n))); % Take columns of large norm. 138 | UU = U(:,1:min(xsize+3,n)); % Take first columns. 139 | 140 | end 141 | 142 | [UU,R] = qr(UU,0); UU = U*UU;[UU,R] = qr(UU); % Subspace iteration. 143 | U0 = UU(:,1:xsize); U1 = UU(:,xsize+1:end); 144 | -------------------------------------------------------------------------------- /python/centroids.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | import sys 3 | 4 | from numpy.linalg import norm, eig, eigh 5 | from scipy.stats import zscore 6 | import numpy as np 7 | 8 | from distances import dhat_shift_dep,sbd_dep_multi 9 | 10 | 11 | def ksc_centroid(mem, ts, k, cent, shift): 12 | if len(ts.shape) == 2: 13 | n,m = ts.shape 14 | d = 1 15 | elif len(ts.shape) == 3: 16 | n,m,d = ts.shape 17 | else: 18 | print('Something Wrong in centroids.py ...') 19 | sys.exit() 20 | n_ = len(np.where(mem==k)[0]) 21 | if d==1: 22 | a = np.zeros((n_,m,1)) 23 | else: 24 | a = np.zeros((n_,m,d)) 25 | 26 | ai = 0 27 | 28 | for i in np.where(mem==k)[0]: 29 | if not cent.any(): 30 | opt_a = ts[i] 31 | else: 32 | _,_,opt_a = dhat_shift_dep(cent,ts[i],shift) 33 | 34 | a[ai,:,:] = opt_a 35 | ai += 1 36 | 37 | 38 | centroid = np.zeros((m,d)) 39 | if a.shape[0] == 0: 40 | return centroid 41 | 42 | for d_i in range(d): 43 | a_di = a[:,:,d_i] 44 | b = a_di/np.tile(norm(a_di,axis=1),(m,1)).T 45 | M = np.matmul(b.T,b) - (n_+1)*np.eye(m) 46 | 47 | w,v = eigh(M) 48 | 49 | ksc_di = v[:,np.where(w == max(w))[0][0]] 50 | 51 | 52 | dist1 = norm(a_di[0,:,] - ksc_di.T) 53 | dist2 = norm(a_di[0,:,] - (-ksc_di.T)) 54 | 55 | if dist1 > dist2: 56 | ksc_di = -ksc_di 57 | 58 | if np.sum(ksc_di) < 0: 59 | ksc_di = -ksc_di 60 | 61 | centroid[:,d_i] = ksc_di 62 | return centroid 63 | 64 | def kShape_centroid(mem, ts, k, cent, shift): 65 | if len(ts.shape) == 2: 66 | n,m = ts.shape 67 | d = 1 68 | elif len(ts.shape) == 3: 69 | n,m,d = ts.shape 70 | else: 71 | print('Something Wrong in centroids.py ...') 72 | sys.exit() 73 | n_ = len(np.where(mem==k)[0]) 74 | 75 | if d==1: 76 | a = np.zeros((n_,m,1)) 77 | else: 78 | a = np.zeros((n_,m,d)) 79 | 80 | ai = 0 81 | 82 | for i in np.where(mem==k)[0]: 83 | if not cent.any(): 84 | opt_a = ts[i] 85 | else: 86 | _,_,opt_a = sbd_dep_multi(cent,ts[i],shift) 87 | 88 | a[ai,:,:] = opt_a 89 | ai += 1 90 | 91 | centroid = np.zeros((m,d)) 92 | 93 | if a.shape[0] == 0: 94 | return centroid 95 | 96 | for d_i in range(d): 97 | #pdb.set_trace() 98 | ncolumns = a[:,:,d_i].shape[1] 99 | #pdb.set_trace() 100 | Y = zscore(a[:,:,d_i], axis=1, ddof = 1) 101 | Y = np.nan_to_num(Y) 102 | S = np.matmul(Y.T, Y) 103 | P = (np.eye(ncolumns) - 1.0 / ncolumns * np.ones((ncolumns,ncolumns))) 104 | M = np.matmul(np.matmul(P,S),P) 105 | if np.sum(M) == 0: 106 | centroid[:,d_i] = np.zeros((1, ts.shape[1])); 107 | 108 | 109 | w, v = eigh(M) 110 | centroid_di = v[:,np.where(w == max(w))[0][0]] 111 | 112 | finddistance1 = np.sqrt(np.sum((a[0,:,d_i] - centroid_di.T)**2)) 113 | finddistance2 = np.sqrt(np.sum((a[0,:,d_i] - (-centroid_di.T))**2)) 114 | #pdb.set_trace() 115 | if finddistance1 < finddistance2: 116 | centroid_di = centroid_di 117 | else: 118 | centroid_di = -centroid_di 119 | 120 | centroid_di = zscore(centroid_di,ddof=1) 121 | centroid_di = np.nan_to_num(centroid_di) 122 | centroid[:,d_i] = centroid_di 123 | 124 | return centroid 125 | 126 | def vkShape_centroid(mem, ts, k, cent, shift): 127 | if len(ts.shape) == 2: 128 | n,m = ts.shape 129 | d = 1 130 | elif len(ts.shape) == 3: 131 | n,m,d = ts.shape 132 | else: 133 | print('Something Wrong in centroids.py ...') 134 | sys.exit() 135 | n_ = len(np.where(mem==k)[0]) 136 | 137 | if d==1: 138 | a = np.zeros((n_,m,1)) 139 | else: 140 | a = np.zeros((n_,m,d)) 141 | 142 | ai = 0 143 | 144 | for i in np.where(mem==k)[0]: 145 | if not cent.any(): 146 | opt_a = ts[i] 147 | else: 148 | _,_,opt_a = sbd_dep_multi(cent,ts[i],shift) 149 | 150 | a[ai,:,:] = opt_a 151 | ai += 1 152 | 153 | centroid = np.zeros((m,d)) 154 | covar = np.zeros((d, d)); 155 | 156 | if a.shape[0] == 0: 157 | return centroid,covar 158 | 159 | for d_i in range(d): 160 | #pdb.set_trace() 161 | ncolumns = a[:,:,d_i].shape[1] 162 | #pdb.set_trace() 163 | Y = zscore(a[:,:,d_i], axis=1, ddof = 1) 164 | Y = np.nan_to_num(Y) 165 | S = np.matmul(Y.T, Y) 166 | P = (np.eye(ncolumns) - 1.0 / ncolumns * np.ones((ncolumns,ncolumns))) 167 | M = np.matmul(np.matmul(P,S),P) 168 | if np.sum(M) == 0: 169 | centroid[:,d_i] = np.zeros((1, ts.shape[1])); 170 | 171 | 172 | w, v = eigh(M) 173 | centroid_di = v[:,np.where(w == max(w))[0][0]] 174 | 175 | finddistance1 = np.sqrt(np.sum((a[0,:,d_i] - centroid_di.T)**2)) 176 | finddistance2 = np.sqrt(np.sum((a[0,:,d_i] - (-centroid_di.T))**2)) 177 | #pdb.set_trace() 178 | if finddistance1 < finddistance2: 179 | centroid_di = centroid_di 180 | else: 181 | centroid_di = -centroid_di 182 | 183 | centroid_di = zscore(centroid_di,ddof=1) 184 | centroid_di = np.nan_to_num(centroid_di) 185 | centroid[:,d_i] = centroid_di 186 | 187 | sum_ = 0.0; 188 | for i in range(a.shape[0]): 189 | x_i = a[i] 190 | covar_i = (np.matmul(x_i.T,x_i))/m; 191 | sum_ = sum_ + covar_i; 192 | 193 | covar = sum_/a.shape[0]; 194 | 195 | return centroid, covar 196 | -------------------------------------------------------------------------------- /python/distances.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | from numpy.fft import fft, ifft 4 | import pdb 5 | import sys 6 | 7 | def dhat_shift_dep(t1, t2, shift): 8 | sanity_check(t1,t2) 9 | 10 | d = t1.shape[1] 11 | 12 | min_d = 0 13 | for d_i in range(d): 14 | min_d += scale_d(t1[:,d_i],t2[:,d_i]) 15 | opt_t2 = t2 16 | optShift = 0 17 | for sh in range(-shift,shift+1): 18 | if sh == 0: 19 | continue 20 | elif sh < 0: 21 | shifted_t2 = np.append(t2[-sh:len(t2),:],np.zeros((-sh,d)),axis=0) 22 | else: 23 | shifted_t2 = np.append(np.zeros((sh,d)),t2[0:len(t2)-sh,:],axis=0) 24 | 25 | cur_d = 0 26 | for d_i in range(d): 27 | cur_d += scale_d(t1[:,d_i],shifted_t2[:,d_i]) 28 | 29 | if cur_d <= min_d: 30 | optShift = sh 31 | opt_t2 = shifted_t2 32 | min_d = cur_d 33 | 34 | optShift = np.ones((1,d))*optShift 35 | dist = min_d 36 | return dist, optShift, opt_t2 37 | 38 | 39 | def scale_d(t1,t2): 40 | alpha = np.matmul(t1,t2.T)/(np.matmul(t2,t2.T)+np.finfo(float).eps) 41 | dist = norm(t1 - t2*alpha)/(norm(t1)+np.finfo(float).eps) 42 | return dist 43 | 44 | 45 | 46 | def sbd_dep_multi(t1,t2,shift): 47 | sanity_check(t1,t2) 48 | 49 | d = t1.shape[1] 50 | 51 | cc_ = 0 52 | for d_i in range(d): 53 | cc_ += NCCc(t1[:,d_i],t2[:,d_i]) 54 | 55 | '''cc = np.zeros((cc_.shape)) 56 | cc[t1.shape[0]] = cc_[t1.shape[0]] 57 | 58 | for i in range(shift): 59 | cc[t1.shape[0] + i] = cc_[t1.shape[0] + i] 60 | cc[t1.shape[0] - i] = cc_[t1.shape[0] - i] 61 | ''' 62 | maxCC = np.max(cc_) 63 | maxCCI = np.argmax(cc_) 64 | 65 | sh = maxCCI - max(t1.shape[0]-1,t2.shape[0]-1) 66 | 67 | if sh < 0: 68 | shifted_t2 = np.append(t2[-sh:len(t2),:],np.zeros((-sh,d)),axis=0) 69 | else: 70 | shifted_t2 = np.append(np.zeros((sh,d)),t2[0:len(t2)-sh,:],axis=0) 71 | 72 | optShift = np.ones((1,d))*sh 73 | opt_t2 = shifted_t2 74 | dist = d - maxCC 75 | 76 | return dist, optShift, opt_t2 77 | 78 | def NCCc(t1,t2): 79 | len_ = len(t1) 80 | fftLen = int(2**np.ceil(np.log2(abs(2*len_ - 1)))) 81 | 82 | r = ifft(fft(t1, fftLen) * np.conj(fft(t2, fftLen))) 83 | r = np.concatenate((r[-(len_-1):], r[:len_])) 84 | 85 | return np.real(r)/((norm(t1) * norm(t2)) + np.finfo(float).eps) 86 | 87 | def sanity_check(t1,t2): 88 | ## sanity checks 89 | if len(t1.shape) == 2 and len(t2.shape) == 2: 90 | if not t1.shape[0] == t2.shape[0] or not t1.shape[1] == t2.shape[1]: 91 | print('Sth wrong with your time series shapes:',t1.shape,t2.shape) 92 | sys.exit() 93 | elif len(t1.shape) == 1 and len(t2.shape) == 1: 94 | if not t1.shape[0] == t2.shape[0]: 95 | print('Sth wrong with your time series shapes:',t1.shape,t2.shape) 96 | sys.exit() 97 | else: 98 | print('Sth wrong with your time series shapes:',t1.shape,t2.shape) 99 | sys.exit() 100 | -------------------------------------------------------------------------------- /python/kShape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | import pdb 4 | from distances import sbd_dep_multi 5 | from centroids import kShape_centroid 6 | 7 | def kShape(ts,K,shift,init = 0): 8 | iter_ = 100 9 | ndim = len(ts.shape) 10 | if ndim > 2: 11 | print('Warning: multivariate time series fed, running m-kShape instead...') 12 | from mkShape import multidim_kShape 13 | return multidim_kShape(ts,K,shift,init) 14 | n,m = ts.shape 15 | ts = np.reshape(ts,(ts.shape[0],ts.shape[1],1)) 16 | Dist = np.zeros((n,K)) 17 | 18 | 19 | if init == 0: 20 | mem = np.ceil(K*np.random.rand(n,1))-1 21 | cent = np.zeros((K,m,1)) 22 | else: 23 | cent = init 24 | for i in range(n): 25 | for k in range(K): 26 | Dist[i,k],_,_ = sbd_dep_multi(cent[k,:],ts[i,:],shift) 27 | mem = np.argmin(Dist,axis=1) 28 | 29 | prevErr = -1 30 | try_ = 0 31 | 32 | for it in range(iter_): 33 | print('Iteration',it) 34 | prev_mem = mem; 35 | for k in range(K): 36 | cent[k] = kShape_centroid(mem, ts, k, cent[k,:], shift) 37 | 38 | for i in range(n): 39 | for k in range(K): 40 | Dist[i,k],_,_ = sbd_dep_multi(cent[k,:],ts[i,:],shift) 41 | 42 | mem = np.argmin(Dist,axis=1) 43 | err = norm(prev_mem-mem) 44 | 45 | if err == 0: 46 | break 47 | else: 48 | if err == prevErr: 49 | try_ = try_ + 1 50 | if try_ > 2: 51 | break 52 | else: 53 | prevErr = err 54 | try_ = 0 55 | print('||PrevMem-CurMem||=',err) 56 | 57 | finalNorm = err 58 | return mem,Dist,cent,finalNorm 59 | -------------------------------------------------------------------------------- /python/ksc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | import pdb 4 | from distances import dhat_shift_dep 5 | from centroids import ksc_centroid 6 | 7 | def kSC(ts,K,shift,init = 0): 8 | iter_ = 100 9 | ndim = len(ts.shape) 10 | if ndim > 2: 11 | print('Warning: multivariate time series fed, running m-kSC instead...') 12 | from mksc import multidim_kSC 13 | return multidim_kSC(ts,K,shift,init) 14 | n,m = ts.shape 15 | ts = np.reshape(ts,(ts.shape[0],ts.shape[1],1)) 16 | Dist = np.zeros((n,K)) 17 | 18 | 19 | if init == 0: 20 | mem = np.ceil(K*np.random.rand(n,1))-1 21 | cent = np.zeros((K,m,1)) 22 | else: 23 | cent = init 24 | for i in range(n): 25 | for k in range(K): 26 | Dist[i,k],_,_ = dhat_shift_dep(cent[k,:],ts[i,:],shift) 27 | mem = np.argmin(Dist,axis=1) 28 | 29 | prevErr = -1 30 | try_ = 0 31 | 32 | for it in range(iter_): 33 | print('Iteration',it) 34 | prev_mem = mem; 35 | for k in range(K): 36 | cent[k] = ksc_centroid(mem, ts, k, cent[k,:], shift) 37 | 38 | for i in range(n): 39 | for k in range(K): 40 | Dist[i,k],_,_ = dhat_shift_dep(cent[k,:],ts[i,:],shift) 41 | 42 | mem = np.argmin(Dist,axis=1) 43 | err = norm(prev_mem-mem) 44 | 45 | if err == 0: 46 | break 47 | else: 48 | if err == prevErr: 49 | try_ = try_ + 1 50 | if try_ > 2: 51 | break 52 | else: 53 | prevErr = err 54 | try_ = 0 55 | print('||PrevMem-CurMem||=',err) 56 | 57 | finalNorm = err 58 | return mem,Dist,cent,finalNorm 59 | -------------------------------------------------------------------------------- /python/mkShape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | import pdb 4 | from distances import sbd_dep_multi 5 | from centroids import kShape_centroid 6 | 7 | def multidim_kShape(ts,K,shift,init = 0, verbose = 1): 8 | iter_ = 100 9 | ndim = len(ts.shape) 10 | if ndim == 2: 11 | print('Warning: univariate time series input found, running kShape instead...') 12 | from kShape import kShape 13 | return kShape(ts,K,shift,init) 14 | n,m,d = ts.shape 15 | Dist = np.zeros((n,K)) 16 | 17 | #np.random.seed(42) 18 | if init == 0: 19 | mem = np.ceil(K*np.random.rand(n)) - 1 20 | #mem = np.zeros((n)) 21 | #ii = 0 22 | #with open('compareMatlab/memInit') as fi: 23 | # for line in fi: 24 | # mem[ii] = int(line.rstrip('\n'))-1 25 | # ii += 1 26 | cent = np.zeros((K,m,d)) 27 | else: 28 | cent = init 29 | for i in range(n): 30 | for k in range(K): 31 | Dist[i,k],_,_ = sbd_dep_multi(cent[k,:,:],ts[i,:,:],shift) 32 | mem = np.argmin(Dist,axis=1) 33 | 34 | prevErr = -1 35 | try_ = 0 36 | for it in range(iter_): 37 | if verbose: 38 | print('Iteration',it) 39 | prev_mem = mem; 40 | #pdb.set_trace() 41 | for k in range(K): 42 | cent[k,:,:] = kShape_centroid(mem, ts, k, cent[k,:,:], shift) 43 | 44 | for i in range(n): 45 | for k in range(K): 46 | dist_,_,_ = sbd_dep_multi(cent[k,:,:],ts[i,:,:],shift) 47 | Dist[i,k] = dist_ 48 | 49 | mem = np.argmin(Dist,axis=1) 50 | 51 | err = norm(prev_mem-mem) 52 | 53 | if err == 0: 54 | break 55 | else: 56 | if err == prevErr: 57 | try_ = try_ + 1 58 | if try_ > 2: 59 | break 60 | else: 61 | prevErr = err 62 | try_ = 0 63 | if verbose: 64 | print('||PrevMem-CurMem||=',err) 65 | finalNorm = err 66 | 67 | return mem,Dist,cent,finalNorm 68 | -------------------------------------------------------------------------------- /python/mksc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | import pdb 4 | from distances import dhat_shift_dep 5 | from centroids import ksc_centroid 6 | 7 | def multidim_kSC(ts,K,shift,init = 0, verbose = 1): 8 | iter_ = 100 9 | ndim = len(ts.shape) 10 | if ndim == 2: 11 | print('Warning: univariate time series input found, running kSC instead...') 12 | from ksc import kSC 13 | return kSC(ts,K,shift,init) 14 | n,m,d = ts.shape 15 | Dist = np.zeros((n,K)) 16 | 17 | np.random.seed(42) 18 | if init == 0: 19 | mem = np.ceil(K*np.random.rand(n,1))-1 20 | cent = np.zeros((K,m,d)) 21 | else: 22 | cent = init 23 | for i in range(n): 24 | for k in range(K): 25 | Dist[i,k],_,_ = dhat_shift_dep(cent[k,:,:],ts[i,:,:],shift) 26 | mem = np.argmin(Dist,axis=1) 27 | 28 | prevErr = -1 29 | try_ = 0 30 | 31 | for it in range(iter_): 32 | if verbose: 33 | print('Iteration',it) 34 | prev_mem = mem; 35 | for k in range(K): 36 | cent[k,:,:] = ksc_centroid(mem, ts, k, cent[k,:,:], shift) 37 | 38 | for i in range(n): 39 | for k in range(K): 40 | Dist[i,k],_,_ = dhat_shift_dep(cent[k,:,:],ts[i,:,:],shift) 41 | 42 | mem = np.argmin(Dist,axis=1) 43 | err = norm(prev_mem-mem) 44 | 45 | if err == 0: 46 | break 47 | else: 48 | if err == prevErr: 49 | try_ = try_ + 1 50 | if try_ > 2: 51 | break 52 | else: 53 | prevErr = err 54 | try_ = 0 55 | if verbose: 56 | print('||PrevMem-CurMem||=',err) 57 | 58 | finalNorm = err 59 | return mem,Dist,cent,finalNorm 60 | -------------------------------------------------------------------------------- /python/mvkShape.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.linalg import norm 3 | import pdb 4 | from distances import sbd_dep_multi 5 | from centroids import vkShape_centroid 6 | from joblib import Parallel 7 | import time 8 | 9 | def multidim_vkShape(ts,K,shift,init = 0, verbose = 1, alpha=0): 10 | iter_ = 100 11 | ndim = len(ts.shape) 12 | if ndim == 2: 13 | print('Warning: univariate time series input found, running kShape instead...') 14 | from kShape import kShape 15 | return kShape(ts,K,shift,init) 16 | n,m,d = ts.shape 17 | Dist = np.zeros((n,K)) 18 | covar = np.zeros((K, d, d)); 19 | 20 | #np.random.seed(42) 21 | if init == 0: 22 | mem = np.ceil(K*np.random.rand(n)) - 1 23 | #mem = np.zeros((n)) 24 | #ii = 0 25 | #with open('compareMatlab/memInit') as fi: 26 | # for line in fi: 27 | # mem[ii] = int(line.rstrip('\n'))-1 28 | # ii += 1 29 | cent = np.zeros((K,m,d)) 30 | else: 31 | cent = init 32 | for i in range(n): 33 | for k in range(K): 34 | Dist[i,k],_,_ = sbd_dep_multi(cent[k,:,:],ts[i,:,:],shift) 35 | mem = np.argmin(Dist,axis=1) 36 | 37 | prevErr = -1 38 | try_ = 0 39 | for it in range(iter_): 40 | if verbose: 41 | print('Iteration',it) 42 | start_time = time.time() 43 | prev_mem = mem; 44 | #pdb.set_trace() 45 | with Parallel(n_jobs=15) as parallel: 46 | for k in range(K): 47 | cent[k,:,:],covar[k,:,:] = vkShape_centroid(mem, ts, k, cent[k,:,:], shift) 48 | with Parallel(n_jobs=15) as parallel: 49 | for i in range(n): 50 | x_i = ts[i]; 51 | 52 | covar_i = np.matmul(x_i.T,x_i)/m; 53 | for k in range(K): 54 | dist_,_,_ = sbd_dep_multi(cent[k,:,:],ts[i,:,:],shift) 55 | dist_ += alpha*norm(covar[k,:,:]-covar_i,'fro')**2; 56 | 57 | Dist[i,k] = dist_ 58 | 59 | mem = np.argmin(Dist,axis=1) 60 | 61 | err = norm(prev_mem-mem) 62 | 63 | if err == 0: 64 | break 65 | else: 66 | if err == prevErr: 67 | try_ = try_ + 1 68 | if try_ > 2: 69 | break 70 | else: 71 | prevErr = err 72 | try_ = 0 73 | if verbose: 74 | print('||PrevMem-CurMem||=',err) 75 | print(time.time()-start_time,'secs') 76 | finalNorm = err 77 | 78 | return mem,Dist,cent,covar,finalNorm 79 | -------------------------------------------------------------------------------- /python/util.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | from scipy.misc import comb 4 | 5 | def rand_index(yhat, y): 6 | tp_plus_fp = comb(np.bincount(yhat), 2).sum() 7 | tp_plus_fn = comb(np.bincount(y), 2).sum() 8 | A = np.c_[(yhat, y)] 9 | tp = sum(comb(np.bincount(A[A[:, 0] == i, 1]), 2).sum() 10 | for i in set(yhat)) 11 | fp = tp_plus_fp - tp 12 | fn = tp_plus_fn - tp 13 | tn = comb(len(A), 2) - tp - fp - fn 14 | return (tp + tn) / (tp + fp + fn + tn) 15 | 16 | def toy_dataset(type_): 17 | toy_dataset_single = np.array( 18 | [ 19 | [1,2,3,4,5], 20 | [2,3,4,5,6], 21 | [3,4,5,6,7], 22 | [7,6,5,4,3], 23 | [6,5,4,3,2], 24 | [5,4,3,2,1] 25 | ] 26 | ) 27 | toy_dataset = np.array( 28 | [ 29 | [ 30 | [1,5], 31 | [2,4], 32 | [3,3], 33 | [4,2], 34 | [5,1] 35 | ], 36 | [ 37 | [2,6], 38 | [3,5], 39 | [4,4], 40 | [5,3], 41 | [6,2] 42 | ], 43 | [ 44 | [3,7], 45 | [4,6], 46 | [5,5], 47 | [6,4], 48 | [7,3] 49 | ], 50 | [ 51 | [5,1], 52 | [4,2], 53 | [3,3], 54 | [2,4], 55 | [1,5] 56 | ], 57 | [ 58 | [6,2], 59 | [5,3], 60 | [4,4], 61 | [3,5], 62 | [2,6] 63 | ], 64 | [ 65 | [7,3], 66 | [6,4], 67 | [5,5], 68 | [4,6], 69 | [3,7] 70 | ] 71 | ] 72 | ) 73 | if type_ == 'single': 74 | return toy_dataset_single 75 | elif type_ == 'multi': 76 | return toy_dataset 77 | else: 78 | print('Warning choose input parameter as single or multi...') 79 | --------------------------------------------------------------------------------