├── .gitignore ├── MATLAB ├── README.txt ├── demo.m ├── extract_k_motif.m ├── guide_serach.m ├── mstamp.m ├── mstamp_any.m ├── mstamp_any_par.m ├── mstamp_par.m ├── plot_motif_on_data.m ├── toy_data.mat └── unconstrain_search.m ├── Python ├── README.txt ├── demo.py ├── mstamp_stamp.py ├── mstamp_stomp.py └── toy_data.mat └── README.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vscode/ 3 | -------------------------------------------------------------------------------- /MATLAB/README.txt: -------------------------------------------------------------------------------- 1 | See demo.m for examples of using the functions 2 | 3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 4 | Multidimensional Motif Discovery," IEEE ICDM 2017. 5 | https://sites.google.com/view/mstamp/ 6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 7 | -------------------------------------------------------------------------------- /MATLAB/demo.m: -------------------------------------------------------------------------------- 1 | %% 2 | % Chin-Chia Michael Yeh 3 | % 4 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 5 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 6 | % https://sites.google.com/view/mstamp/ 7 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 8 | % 9 | 10 | clear 11 | clc 12 | 13 | load('toy_data.mat'); 14 | 15 | %% compute the multidimensional matrix profile 16 | % here we provided three variation of the mSTAMP algorithm 17 | % The script will only run when only one of the alternatives is uncomment 18 | 19 | %% alternative 1.a: the basic version 20 | 21 | must_dim = []; 22 | exc_dim = []; 23 | [pro_mul, pro_idx] = ... 24 | mstamp(data, sub_len, must_dim, exc_dim); 25 | 26 | %% alternative 1.b: the inclusion 27 | % in the toy data, the first dimension only consist of random walk. 28 | % Forcing the algorithm to consider the first dimension worsen the result. 29 | 30 | % must_dim = [1]; 31 | % exc_dim = []; 32 | % [pro_mul, pro_idx] = ... 33 | % mstamp(data, sub_len, must_dim, exc_dim); 34 | 35 | %% alternative 1.c: the exclusion 36 | % We can also do exclusion. By blacklist one of the dimension that contains 37 | % meaningful motif, we no longer can find a meaningful 2-dimensional motif. 38 | % However, the MDL-based unconstrained search method will correctly provide 39 | % us the 1-dimensional motif 40 | 41 | % must_dim = []; 42 | % exc_dim = [3]; 43 | % [pro_mul, pro_idx] = ... 44 | % mstamp(data, sub_len, must_dim, exc_dim); 45 | % pro_mul = pro_mul(:, 1:2); 46 | % pro_idx = pro_idx(:, 1:2); 47 | % data = data(:, 1:2); 48 | 49 | %% alternative 2: using Parallel Computing Toolbox 50 | 51 | % n_work = 4; 52 | % [pro_mul, pro_idx] = ... 53 | % mstamp_par(data, sub_len, n_work); 54 | 55 | %% alternative 3: using the anytime version stop at 10% 56 | % the guided search is able to find the motif mostly 57 | % however, the MDL-based method's output is less stable due to both method 58 | % are approximated method 59 | 60 | % pct_stop = 0.1; 61 | % [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop); 62 | 63 | 64 | %% guided search for 2-dimensional motif 65 | n_dim = 2; % we want the top 2-dimensional motif 66 | [motif_idx, motif_dim] = guide_serach(... 67 | data, sub_len, pro_mul, pro_idx, n_dim); 68 | plot_motif_on_data(data, sub_len, motif_idx, motif_dim); 69 | 70 | 71 | %% extract motif using the MDL-based unconstrained search method 72 | n_bit = 4; % number of bit for discretization 73 | k = 2; % number of motif to retrieve 74 | [motif_idx, motif_dim] = unconstrain_search(... 75 | data, sub_len, pro_mul, pro_idx, n_bit, k); 76 | plot_motif_on_data(data, sub_len, motif_idx, motif_dim); 77 | 78 | %% the function can also be used to compute the 1D matrix profile 79 | [pro_mul_2, ~] = ... 80 | mstamp(data(:, 2), sub_len, must_dim, exc_dim); 81 | figure(); 82 | plot(pro_mul_2); 83 | -------------------------------------------------------------------------------- /MATLAB/extract_k_motif.m: -------------------------------------------------------------------------------- 1 | % MDL Based Motif Discovery for Multidimensional Matrix Profile 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [motif_idx, motif_dim] = extract_k_motif(... 5 | % data, sub_len, pro_mul, pro_idx, n_bit, k) 6 | % 7 | % Output: 8 | % motif_idx: the index for the founded motifs (matrix) 9 | % motif_dim: the dimensions spanned by the found motifs (cell) 10 | % Input: 11 | % data: input time series (matrix) 12 | % sub_len: interested subsequence length (scalar) 13 | % pro_mul: multidimensional matrix profile (matrix) 14 | % pro_idx: matrix profile index (matrix) 15 | % n_bit: number of bit for discretization (scalar) 16 | % k: number of motif wish to retrieve, set to inf for retrieving 17 | % all possible k-motifs (scalar) 18 | % 19 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 20 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 21 | % https://sites.google.com/view/mstamp/ 22 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 23 | % 24 | 25 | function [motif_idx, motif_dim] = extract_k_motif(... 26 | data, sub_len, pro_mul, pro_idx, n_bit, k) 27 | exc_zone = round(0.5 * sub_len); 28 | tot_dim = size(data, 2); 29 | if isinf(k) 30 | k = size(pro_mul, 1); 31 | end 32 | motif_idx = zeros(k, 1); 33 | motif_dim = cell(k, 1); 34 | base_bit = n_bit * tot_dim * sub_len * 2; 35 | for i = 1:k 36 | fprintf('finding motif %d ... \n', i); 37 | [val, idx_1] = min(pro_mul, [], 1); 38 | if any(isinf(val)) 39 | motif_idx = motif_idx(1:k-1); 40 | motif_dim = motif_dim(1:k-1); 41 | break; 42 | end 43 | 44 | bit_sz = zeros(tot_dim, 1); 45 | idx_2 = zeros(tot_dim, 1); 46 | dim = cell(tot_dim, 1); 47 | for j = 1:tot_dim 48 | idx_2(j) = pro_idx(idx_1(j), j); 49 | motif_1 = data(idx_1(j):idx_1(j) + sub_len - 1, :); 50 | motif_2 = data(idx_2(j):idx_2(j) + sub_len - 1, :); 51 | [bit_sz(j), dim{j}] = get_bit_save(motif_1, motif_2, j, n_bit); 52 | end 53 | [best_bit, min_idx] = min(bit_sz); 54 | if best_bit > base_bit 55 | motif_idx = motif_idx(1:k-1); 56 | motif_dim = motif_dim(1:k-1); 57 | break; 58 | end 59 | motif_idx(i, 1) = idx_1(min_idx); 60 | motif_dim{i} = dim{min_idx}; 61 | 62 | st_idx = max(1, motif_idx(i, 1) - exc_zone); 63 | ed_idx = min(size(pro_mul, 1), motif_idx(i, 1) + exc_zone); 64 | pro_mul(st_idx:ed_idx, :) = inf; 65 | end 66 | motif_dim = motif_dim(motif_idx ~= 0); 67 | motif_idx = motif_idx(motif_idx ~= 0); 68 | 69 | 70 | function [bit_sz, dim_id] = get_bit_save(motif_1, motif_2, n_dim, n_bit) 71 | tot_dim = size(motif_1, 2); 72 | sub_len = size(motif_1, 1); 73 | split_pt = get_desc_split_pt(n_bit); 74 | disc_1 = discretization(motif_1, split_pt); 75 | disc_2 = discretization(motif_2, split_pt); 76 | 77 | [~, dim_id] = sort(sum(abs(disc_1 - disc_2), 1), 'ascend'); 78 | dim_id = dim_id(1:n_dim); 79 | motif_diff = disc_1(:, dim_id) - disc_2(:, dim_id); 80 | n_val = length(unique(motif_diff)); 81 | 82 | bit_sz = n_bit * (tot_dim * sub_len * 2 - n_dim * sub_len); 83 | bit_sz = bit_sz + n_dim * sub_len * log2(n_val) + n_val * n_bit; 84 | 85 | 86 | function disc = discretization(motif, split_pt) 87 | for i = 1:size(motif, 2) 88 | motif(:, i) = (motif(:, i) - mean(motif(:, i))) / ... 89 | std(motif(:, i), 1); 90 | end 91 | disc = zeros(size(motif)); 92 | for i = 1:length(split_pt) 93 | disc(motif < split_pt(i) & disc == 0) = i; 94 | end 95 | disc(disc == 0) = length(split_pt) + 1; 96 | 97 | 98 | function split_pt = get_desc_split_pt(n_bit) 99 | split_pt = norminv((1:(2^n_bit)-1)/(2^n_bit), 0, 1); -------------------------------------------------------------------------------- /MATLAB/guide_serach.m: -------------------------------------------------------------------------------- 1 | % Guided Motif Discovery for Multidimensional Matrix Profile 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [motif_idx, motif_dim] = guide_serach(... 5 | % data, sub_len, pro_mul, pro_idx, n_dim) 6 | % 7 | % Output: 8 | % motif_idx: the index for the founded motifs (matrix) 9 | % motif_dim: the dimensions spanned by the found motifs (cell) 10 | % Input: 11 | % data: input time series (matrix) 12 | % sub_len: interested subsequence length (scalar) 13 | % pro_mul: multidimensional matrix profile (matrix) 14 | % pro_idx: matrix profile index (matrix) 15 | % n_dim: the dimensionality of the motif that you wish to find (scalar) 16 | % 17 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 18 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 19 | % https://sites.google.com/view/mstamp/ 20 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 21 | % 22 | 23 | function [motif_idx, motif_dim] = guide_serach(... 24 | data, sub_len, pro_mul, pro_idx, n_dim) 25 | pro_mul = pro_mul(:, n_dim); 26 | pro_idx = pro_idx(:, n_dim); 27 | [~, motif_idx] = min(pro_mul); 28 | motif_idx = sort([motif_idx, pro_idx(motif_idx)]); 29 | 30 | motif_1 = data(motif_idx(1):motif_idx(1)+sub_len - 1, :); 31 | motif_2 = data(motif_idx(2):motif_idx(2)+sub_len - 1, :); 32 | 33 | [~, motif_dim] = sort(sum(abs(motif_1 - motif_2), 1), 'ascend'); 34 | motif_dim = sort(motif_dim(1:n_dim)); 35 | motif_dim = {motif_dim; motif_dim;}; -------------------------------------------------------------------------------- /MATLAB/mstamp.m: -------------------------------------------------------------------------------- 1 | % STOMP Based mSTAMP with Constrained Search Implemented 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [pro_mul, pro_idx] = mstamp(data, sub_len, must_dim, exc_dim) 5 | % 6 | % Output: 7 | % pro_mul: multidimensional matrix profile (matrix) 8 | % pro_idx: matrix profile index (matrix) 9 | % Input: 10 | % data: input time series (matrix) 11 | % sub_len: interested subsequence length (scalar) 12 | % must_dim: the dimension which must be included (vector) 13 | % exc_dim: the dimension which must be excluded (vector) 14 | % 15 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 16 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 17 | % https://sites.google.com/view/mstamp/ 18 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 19 | % 20 | 21 | function [pro_mul, pro_idx] = ... 22 | mstamp(data, sub_len, must_dim, exc_dim) 23 | %% get various length 24 | exc_zone = round(sub_len / 2); 25 | data_len = size(data, 1); 26 | pro_len = data_len - sub_len + 1; 27 | n_dim = size(data, 2); 28 | 29 | %% check input 30 | if sub_len > data_len / 2 31 | error(['Error: Time series is too short relative ', ... 32 | 'to desired subsequence length']); 33 | end 34 | if sub_len < 4 35 | error('Error: Subsequence length must be at least 4'); 36 | end 37 | if any(must_dim > n_dim) 38 | error(['Error: The must have dimension must be less ', ... 39 | 'then the total dimension']); 40 | end 41 | if any(exc_dim > n_dim) 42 | error(['Error: The exclusion dimension must be less ', ... 43 | 'then the total dimension']); 44 | end 45 | if ~isempty(intersect(must_dim, exc_dim)) 46 | error(['Error: The same dimension is presented in both ', ... 47 | 'the exclusion dimension and must have dimension']); 48 | end 49 | 50 | %% check skip position 51 | n_exc = length(exc_dim); 52 | n_must = length(must_dim); 53 | mask_exc = false(n_dim, 1); 54 | mask_exc(exc_dim) = true; 55 | skip_loc = false(pro_len, 1); 56 | for i = 1:pro_len 57 | if any(isnan(reshape(data(i:i+sub_len-1, ~mask_exc), 1, []))) ... 58 | || any(isinf(reshape(data(i:i+sub_len-1, ~mask_exc), 1, []))) 59 | skip_loc(i) = true; 60 | end 61 | end 62 | data(isnan(data)) = 0; 63 | data(isinf(data)) = 0; 64 | 65 | %% initialization 66 | data_freq = zeros((sub_len + data_len), n_dim); 67 | data_mu = zeros(pro_len, n_dim); 68 | data_sig = zeros(pro_len, n_dim); 69 | first_prod = zeros(pro_len, n_dim); 70 | for i = 1:n_dim 71 | [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ... 72 | mass_pre(data(:, i), data_len, sub_len); 73 | [~, first_prod(:, i)] = mass(... 74 | data_freq(:, i), data(1:sub_len, i), data_len, ... 75 | sub_len, data_mu(:, i), data_sig(:, i), ... 76 | data_mu(1, i), data_sig(1, i)); 77 | end 78 | 79 | %% compute the matrix profile 80 | pro_mul = zeros(pro_len, n_dim); 81 | pro_idx = zeros(pro_len, n_dim); 82 | dist_pro = zeros(pro_len, n_dim); 83 | last_prod = zeros(pro_len, n_dim); 84 | drop_val = zeros(1, n_dim); 85 | for i = 1:pro_len 86 | % compute the distance profile 87 | fprintf('%d %d\n', i, pro_len); 88 | query = data(i:i+sub_len-1, :); 89 | if i==1 90 | for j = 1:n_dim 91 | [dist_pro(:, j), last_prod(:, j)] = ... 92 | mass(data_freq(:, j), query(:, j), ... 93 | data_len, sub_len, data_mu(:, j), ... 94 | data_sig(:, j), data_mu(i, j), ... 95 | data_sig(i, j)); 96 | end 97 | else 98 | last_prod(2:data_len - sub_len + 1, :) = ... 99 | last_prod(1:data_len - sub_len, :) ... 100 | - data(1:data_len - sub_len, :) ... 101 | .* repmat(drop_val, pro_len - 1, 1) ... 102 | + data(sub_len + 1:data_len, :) ... 103 | .* repmat(query(sub_len, :), pro_len - 1, 1); 104 | last_prod(1, :) = first_prod(i, :); 105 | dist_pro = 2 * (sub_len - (last_prod ... 106 | - sub_len * data_mu .* repmat(data_mu(i, :), pro_len, 1)) ... 107 | ./ (data_sig .* repmat(data_sig(i, :), pro_len, 1))); 108 | end 109 | dist_pro = real(dist_pro); 110 | dist_pro = max(dist_pro, 0); 111 | dist_pro = sqrt(dist_pro); 112 | drop_val(:) = query(1, :); 113 | 114 | % apply exclusion zone 115 | exc_st = max(1, i - exc_zone); 116 | exc_ed = min(pro_len, i+exc_zone); 117 | dist_pro(exc_st:exc_ed, :) = inf; 118 | dist_pro(data_sig < eps) = inf; 119 | if skip_loc(i) || any(data_sig(i, ~mask_exc) < eps) 120 | dist_pro = inf(size(dist_pro)); 121 | end 122 | dist_pro(skip_loc, :) = inf; 123 | 124 | % apply dimension "must have" and "exclusion" 125 | dist_pro(:, exc_dim) = inf; 126 | mask_must = false(n_must, 1); 127 | mask_must(must_dim) = true; 128 | dist_pro_must = dist_pro(:, mask_must); 129 | dist_pro(:, mask_must) = -inf; 130 | dist_pro_sort = sort(dist_pro, 2); 131 | dist_pro_sort(:, 1:n_must) = dist_pro_must; 132 | 133 | % figure out and store the nearest neighbor 134 | dist_pro_cum = zeros(pro_len, 1); 135 | dist_pro_merg = zeros(pro_len, 1); 136 | for j = max(1, n_must):(n_dim - n_exc) 137 | dist_pro_cum = dist_pro_cum + dist_pro_sort(:, j); 138 | dist_pro_merg(:) = dist_pro_cum / j; 139 | [min_val, min_idx] = min(dist_pro_merg); 140 | pro_mul(i, j) = min_val; 141 | pro_idx(i, j) = min_idx; 142 | end 143 | end 144 | 145 | %% remove bad k setting in the returned matrix 146 | % pro_mul = sqrt(pro_mul); 147 | pro_mul(:, 1:(n_must - 1)) = nan; 148 | pro_mul(:, (n_dim - n_exc + 1):end) = nan; 149 | pro_idx(:, 1:(n_must - 1)) = nan; 150 | pro_idx(:, (n_dim - n_exc + 1):end) = nan; 151 | 152 | 153 | %% The following two functions are modified from the code provided in the following URL 154 | % http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 155 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len) 156 | data(data_len+1:(sub_len+data_len)) = 0; 157 | data_freq = fft(data); 158 | data_cum = cumsum(data); 159 | data2_cum = cumsum(data.^2); 160 | data2_sum = data2_cum(sub_len:data_len) - ... 161 | [0; data2_cum(1:data_len-sub_len)]; 162 | data_sum = data_cum(sub_len:data_len) - ... 163 | [0; data_cum(1:data_len-sub_len)]; 164 | data_mu = data_sum./sub_len; 165 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2); 166 | data_sig2 = real(data_sig2); 167 | data_sig2 = max(data_sig2, 0); 168 | data_sig = sqrt(data_sig2); 169 | 170 | function [dist_pro, last_prod] = mass(data_freq, query, ... 171 | data_len, sub_len, data_mu, data_sig, query_mu, query_sig) 172 | % pre-process query for fft 173 | query = query(end:-1:1); 174 | query(sub_len+1:(sub_len+data_len)) = 0; 175 | 176 | % compute the product 177 | query_freq = fft(query); 178 | product_freq = data_freq.*query_freq; 179 | product = ifft(product_freq); 180 | 181 | % compute the distance profile 182 | dist_pro = 2 * (sub_len - ... 183 | (product(sub_len:data_len) - sub_len*data_mu*query_mu)./... 184 | (data_sig * query_sig)); 185 | last_prod = real(product(sub_len:data_len)); -------------------------------------------------------------------------------- /MATLAB/mstamp_any.m: -------------------------------------------------------------------------------- 1 | % STAMP Based mSTAMP Implemented as an Anytime Algorithm 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop) 5 | % 6 | % Output: 7 | % pro_mul: multidimensional matrix profile (matrix) 8 | % pro_idx: matrix profile index (matrix) 9 | % Input: 10 | % data: input time series (matrix) 11 | % sub_len: interested subsequence length (scalar) 12 | % pct_stop: stop percentage, a number from 0 to 1 (scalar) 13 | % 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 16 | % https://sites.google.com/view/mstamp/ 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 18 | % 19 | 20 | function [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop) 21 | %% get various length 22 | exc_zone = round(sub_len / 2); 23 | data_len = size(data, 1); 24 | n_dim = size(data, 2); 25 | pro_len = data_len - sub_len + 1; 26 | itr_stop = round(pro_len * pct_stop); 27 | if itr_stop < 1 28 | itr_stop = 1; 29 | end 30 | if itr_stop > pro_len 31 | itr_stop = pro_len; 32 | end 33 | 34 | %% check input 35 | if sub_len > data_len / 2 36 | error(['Error: Time series is too short relative to desired ' ... 37 | 'subsequence length']); 38 | end 39 | if sub_len < 4 40 | error('Error: Subsequence length must be at least 4'); 41 | end 42 | if pct_stop < 0 43 | error('Error: Stop percentage must be greater than 0'); 44 | end 45 | if pct_stop > 1 46 | error('Error: Stop percentage must be less than or equal to 1'); 47 | end 48 | 49 | %% check skip position 50 | skip_loc = false(pro_len, 1); 51 | for i = 1:pro_len 52 | if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ... 53 | || any(isinf(reshape(data(i:i+sub_len-1, :), 1, []))) 54 | skip_loc(i) = true; 55 | end 56 | end 57 | data(isnan(data)) = 0; 58 | data(isinf(data)) = 0; 59 | 60 | %% initialization 61 | data_freq = zeros((sub_len + data_len), n_dim); 62 | data_mu = zeros(pro_len, n_dim); 63 | data_sig = zeros(pro_len, n_dim); 64 | first_prod = zeros(pro_len, n_dim); 65 | for i = 1:n_dim 66 | [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ... 67 | mass_pre(data(:, i), data_len, sub_len); 68 | [~, first_prod(:, i)] = mass(... 69 | data_freq(:, i), data(1:sub_len, i), data_len, ... 70 | sub_len, data_mu(:, i), data_sig(:, i), ... 71 | data_mu(1, i), data_sig(1, i)); 72 | end 73 | 74 | %% compute the matrix profile 75 | dist_pro = zeros(pro_len, n_dim); 76 | last_prod = zeros(pro_len, n_dim); 77 | pro_mul = inf(pro_len, n_dim); 78 | pro_idx = zeros(pro_len, n_dim); 79 | idxs = randperm(pro_len); 80 | idxs = idxs(1:itr_stop); 81 | for j = 1:length(idxs) 82 | idx = idxs(j); 83 | fprintf('%d %d\n', j, pro_len); 84 | query = data(idx:idx+sub_len-1, :); 85 | for k = 1:n_dim 86 | [dist_pro(:, k), last_prod(:, k)] = ... 87 | mass(data_freq(:, k), query(:, k), ... 88 | data_len, sub_len, data_mu(:, k), ... 89 | data_sig(:, k), data_mu(idx, k), ... 90 | data_sig(idx, k)); 91 | end 92 | dist_pro = real(dist_pro); 93 | dist_pro = max(dist_pro, 0); 94 | dist_pro = sqrt(dist_pro); 95 | 96 | % apply exclusion zone 97 | exc_zone_st = max(1, idx - exc_zone); 98 | exc_zone_ed = min(pro_len, idx + exc_zone); 99 | dist_pro(exc_zone_st:exc_zone_ed, :) = inf; 100 | dist_pro(data_sig < eps) = inf; 101 | if skip_loc(idx) 102 | dist_pro = inf(size(dist_pro)); 103 | end 104 | dist_pro(skip_loc, :) = inf; 105 | 106 | % figure out and store the nearest neighbor 107 | dist_pro_sort = sort(dist_pro, 2); 108 | dist_pro_cum = zeros(pro_len, 1); 109 | dist_pro_merg = zeros(pro_len, 1); 110 | for k = 1:n_dim 111 | dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k); 112 | dist_pro_merg(:) = dist_pro_cum / k; 113 | update_idx = dist_pro_merg < pro_mul(:, k); 114 | pro_mul(update_idx, k) = dist_pro_merg(update_idx); 115 | pro_idx(update_idx, k) = idx; 116 | end 117 | end 118 | % pro_mul = sqrt(pro_mul); 119 | 120 | 121 | %% The following two functions are modified from the code provided in the following URL 122 | % http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 123 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len) 124 | data(data_len+1:(sub_len+data_len)) = 0; 125 | data_freq = fft(data); 126 | data_cum = cumsum(data); 127 | data2_cum = cumsum(data.^2); 128 | data2_sum = data2_cum(sub_len:data_len) - ... 129 | [0; data2_cum(1:data_len-sub_len)]; 130 | data_sum = data_cum(sub_len:data_len) - ... 131 | [0; data_cum(1:data_len-sub_len)]; 132 | data_mu = data_sum./sub_len; 133 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2); 134 | data_sig2 = real(data_sig2); 135 | data_sig2 = max(data_sig2, 0); 136 | data_sig = sqrt(data_sig2); 137 | 138 | function [dist_pro, last_prod] = mass(data_freq, query, ... 139 | data_len, sub_len, data_mu, data_sig, query_mu, query_sig) 140 | % pre-process query for fft 141 | query = query(end:-1:1); 142 | query(sub_len+1:(sub_len+data_len)) = 0; 143 | 144 | % compute the product 145 | query_freq = fft(query); 146 | product_freq = data_freq.*query_freq; 147 | product = ifft(product_freq); 148 | 149 | % compute the distance profile 150 | dist_pro = 2 * (sub_len - ... 151 | (product(sub_len:data_len) - sub_len*data_mu*query_mu)./... 152 | (data_sig * query_sig)); 153 | last_prod = real(product(sub_len:data_len)); -------------------------------------------------------------------------------- /MATLAB/mstamp_any_par.m: -------------------------------------------------------------------------------- 1 | % STOMP Based mSTAMP with Parallelization (Parallel Computing Toolbox) 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [pro_mul, pro_idx] = mstamp_par(data, sub_len, n_work) 5 | % 6 | % Output: 7 | % pro_mul: multidimensional matrix profile (matrix) 8 | % pro_idx: matrix profile index (matrix) 9 | % Input: 10 | % data: input time series (matrix) 11 | % sub_len: interested subsequence length (scalar) 12 | % n_work: number of walker for parfor (scalar) 13 | % 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 16 | % https://sites.google.com/view/mstamp/ 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 18 | % 19 | 20 | function [pro_mul, pro_idx] = ... 21 | mstamp_any_par(data, sub_len, pct_stop, n_work) 22 | %% setup pool 23 | if isempty(which('parpool')) 24 | if matlabpool('size') <= 0 %#ok<*DPOOL> 25 | matlabpool(n_work); 26 | elseif matlabpool('size')~= n_work 27 | matlabpool('close'); 28 | matlabpool(n_work); 29 | end 30 | else 31 | pool = gcp('nocreate'); 32 | if isempty(gcp('nocreate')) 33 | parpool(n_work); 34 | elseif pool.NumWorkers ~= n_work 35 | delete(gcp('nocreate')); 36 | parpool(n_work); 37 | end 38 | end 39 | 40 | %% get various length 41 | exc_zone = round(sub_len / 2); 42 | data_len = size(data, 1); 43 | n_dim = size(data, 2); 44 | pro_len = data_len - sub_len + 1; 45 | 46 | %% check input 47 | if sub_len > data_len / 2 48 | error(['Error: Time series is too short relative to desired ' ... 49 | 'subsequence length']); 50 | end 51 | if sub_len < 4 52 | error('Error: Subsequence length must be at least 4'); 53 | end 54 | 55 | %% check skip position 56 | skip_loc = false(pro_len, 1); 57 | for i = 1:pro_len 58 | if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ... 59 | || any(isinf(reshape(data(i:i+sub_len-1, :), 1, []))) 60 | skip_loc(i) = true; 61 | end 62 | end 63 | data(isnan(data)) = 0; 64 | data(isinf(data)) = 0; 65 | 66 | %% initialization 67 | data_freq = zeros((sub_len + data_len), n_dim); 68 | data_mu = zeros(pro_len, n_dim); 69 | data_sig = zeros(pro_len, n_dim); 70 | for i = 1:n_dim 71 | [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ... 72 | mass_pre(data(:, i), data_len, sub_len); 73 | end 74 | 75 | %% initialize variable 76 | idx = 1:pro_len; 77 | idx(skip_loc) = []; 78 | idx = idx(randperm(length(idx))); 79 | itr_stop = round(length(idx) * pct_stop); 80 | idx = idx(1:itr_stop); 81 | per_work = round(length(idx) / n_work); 82 | idx_work = cell(n_work, 1); 83 | pro_muls = cell(n_work, 1); 84 | pro_idxs = cell(n_work, 1); 85 | for i = 1:n_work 86 | idx_st = (i - 1) * per_work + 1; 87 | if i == n_work 88 | idx_ed = length(idx); 89 | else 90 | idx_ed = i * per_work; 91 | end 92 | idx_work{i} = idx(idx_st:idx_ed); 93 | pro_muls{i} = inf(pro_len, n_dim); 94 | pro_idxs{i} = inf(pro_len, n_dim); 95 | end 96 | 97 | %% compute the matrix profile 98 | parfor i = 1:n_work 99 | dist_pro = zeros(pro_len, n_dim); 100 | 101 | for j = 1:length(idx_work{i}) 102 | idx = idx_work{i}(j); 103 | fprintf('%d-%d %d\n', i, j, length(idx_work{i})); 104 | query = data(idx:idx+sub_len-1, :); 105 | for k = 1:n_dim 106 | [dist_pro(:, k), ~] = ... 107 | mass(data_freq(:, k), query(:, k), ... 108 | data_len, sub_len, data_mu(:, k), ... 109 | data_sig(:, k), data_mu(idx, k), ... 110 | data_sig(idx, k)); 111 | end 112 | dist_pro = real(dist_pro); 113 | dist_pro = max(dist_pro, 0); 114 | dist_pro = sqrt(dist_pro); 115 | 116 | % apply exclusion zone 117 | exc_zone_st = max(1, idx - exc_zone); 118 | exc_zone_ed = min(pro_len, idx + exc_zone); 119 | dist_pro(exc_zone_st:exc_zone_ed, :) = inf; 120 | dist_pro(data_sig < eps) = inf; 121 | if skip_loc(idx) 122 | dist_pro = inf(size(dist_pro)); 123 | end 124 | dist_pro(skip_loc, :) = inf; 125 | 126 | % figure out and store the nearest neighbor 127 | dist_pro_sort = sort(dist_pro, 2); 128 | dist_pro_cum = zeros(pro_len, 1); 129 | dist_pro_merg = zeros(pro_len, 1); 130 | for k = 1:n_dim 131 | dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k); 132 | dist_pro_merg(:) = dist_pro_cum / k; 133 | update_idx = dist_pro_merg < pro_muls{i}(:, k); 134 | pro_muls{i}(update_idx, k) = dist_pro_merg(update_idx); 135 | pro_idxs{i}(update_idx, k) = idx; 136 | end 137 | end 138 | % pro_muls{i} = sqrt(pro_muls{i}); 139 | end 140 | 141 | %% merge workers' result 142 | pro_mul = inf(pro_len, n_dim); 143 | pro_idx = inf(pro_len, n_dim); 144 | for i = 1:n_work 145 | for j = 1:n_dim 146 | update_idx = pro_muls{i}(:, j) < pro_mul(:, j); 147 | pro_mul(update_idx, j) = pro_muls{i}(update_idx, j); 148 | pro_idx(update_idx, j) = pro_idxs{i}(update_idx, j); 149 | end 150 | end 151 | 152 | %% The following two functions are modified from the code provided in the following URL 153 | % http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 154 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len) 155 | data(data_len+1:(sub_len+data_len)) = 0; 156 | data_freq = fft(data); 157 | data_cum = cumsum(data); 158 | data2_cum = cumsum(data.^2); 159 | data2_sum = data2_cum(sub_len:data_len) - ... 160 | [0; data2_cum(1:data_len-sub_len)]; 161 | data_sum = data_cum(sub_len:data_len) - ... 162 | [0; data_cum(1:data_len-sub_len)]; 163 | data_mu = data_sum./sub_len; 164 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2); 165 | data_sig2 = real(data_sig2); 166 | data_sig2 = max(data_sig2, 0); 167 | data_sig = sqrt(data_sig2); 168 | 169 | function [dist_pro, last_prod] = mass(data_freq, query, ... 170 | data_len, sub_len, data_mu, data_sig, query_mu, query_sig) 171 | % proprocess query for fft 172 | query = query(end:-1:1); 173 | query(sub_len+1:(sub_len+data_len)) = 0; 174 | 175 | % compute the product 176 | query_freq = fft(query); 177 | product_freq = data_freq.*query_freq; 178 | product = ifft(product_freq); 179 | 180 | % compute the distance profile 181 | dist_pro = 2 * (sub_len - ... 182 | (product(sub_len:data_len) - sub_len*data_mu*query_mu)./... 183 | (data_sig * query_sig)); 184 | last_prod = real(product(sub_len:data_len)); -------------------------------------------------------------------------------- /MATLAB/mstamp_par.m: -------------------------------------------------------------------------------- 1 | % STOMP Based mSTAMP with Parallelization (Parallel Computing Toolbox) 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [pro_mul, pro_idx] = mstamp_par(data, sub_len, n_work) 5 | % 6 | % Output: 7 | % pro_mul: multidimensional matrix profile (matrix) 8 | % pro_idx: matrix profile index (matrix) 9 | % Input: 10 | % data: input time series (matrix) 11 | % sub_len: interested subsequence length (scalar) 12 | % n_work: number of walker for parfor (scalar) 13 | % 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 16 | % https://sites.google.com/view/mstamp/ 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 18 | % 19 | 20 | function [pro_mul, pro_idx] = ... 21 | mstamp_par(data, sub_len, n_work) 22 | %% setup pool 23 | if isempty(which('parpool')) 24 | if matlabpool('size') <= 0 %#ok<*DPOOL> 25 | matlabpool(n_work); 26 | elseif matlabpool('size')~= n_work 27 | matlabpool('close'); 28 | matlabpool(n_work); 29 | end 30 | else 31 | pool = gcp('nocreate'); 32 | if isempty(gcp('nocreate')) 33 | parpool(n_work); 34 | elseif pool.NumWorkers ~= n_work 35 | delete(gcp('nocreate')); 36 | parpool(n_work); 37 | end 38 | end 39 | 40 | %% get various length 41 | exc_zone = round(sub_len / 2); 42 | data_len = size(data, 1); 43 | n_dim = size(data, 2); 44 | pro_len = data_len - sub_len + 1; 45 | 46 | %% check input 47 | if sub_len > data_len / 2 48 | error(['Error: Time series is too short relative to desired ' ... 49 | 'subsequence length']); 50 | end 51 | if sub_len < 4 52 | error('Error: Subsequence length must be at least 4'); 53 | end 54 | 55 | %% check skip position 56 | skip_loc = false(pro_len, 1); 57 | for i = 1:pro_len 58 | if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ... 59 | || any(isinf(reshape(data(i:i+sub_len-1, :), 1, []))) 60 | skip_loc(i) = true; 61 | end 62 | end 63 | data(isnan(data)) = 0; 64 | data(isinf(data)) = 0; 65 | 66 | %% initialization 67 | data_freq = zeros((sub_len + data_len), n_dim); 68 | data_mu = zeros(pro_len, n_dim); 69 | data_sig = zeros(pro_len, n_dim); 70 | first_prod = zeros(pro_len, n_dim); 71 | for i = 1:n_dim 72 | [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ... 73 | mass_pre(data(:, i), data_len, sub_len); 74 | [~, first_prod(:, i)] = mass(... 75 | data_freq(:, i), data(1:sub_len, i), data_len, ... 76 | sub_len, data_mu(:, i), data_sig(:, i), ... 77 | data_mu(1, i), data_sig(1, i)); 78 | end 79 | 80 | %% initialize variable 81 | per_work = round(pro_len / n_work); 82 | idx_work = cell(n_work, 1); 83 | pro_muls = cell(n_work, 1); 84 | pro_idxs = cell(n_work, 1); 85 | for i = 1:n_work 86 | idx_st = (i - 1) * per_work + 1; 87 | if i == n_work 88 | idx_ed = pro_len; 89 | else 90 | idx_ed = i * per_work; 91 | end 92 | idx_work{i} = idx_st:idx_ed; 93 | pro_muls{i} = zeros(length(idx_work{i}), n_dim); 94 | pro_idxs{i} = zeros(length(idx_work{i}), n_dim); 95 | end 96 | 97 | %% compute the matrix profile 98 | parfor i = 1:n_work 99 | dist_pro = zeros(pro_len, n_dim); 100 | last_prod = zeros(pro_len, n_dim); 101 | drop_val = zeros(1, n_dim); 102 | 103 | for j = 1:length(idx_work{i}) 104 | idx = idx_work{i}(j); 105 | fprintf('%d-%d %d\n', i, j, length(idx_work{i})); 106 | query = data(idx:idx+sub_len-1, :); 107 | if j == 1 108 | for k = 1:n_dim 109 | [dist_pro(:, k), last_prod(:, k)] = ... 110 | mass(data_freq(:, k), query(:, k), ... 111 | data_len, sub_len, data_mu(:, k), ... 112 | data_sig(:, k), data_mu(idx, k), ... 113 | data_sig(idx, k)); 114 | end 115 | else 116 | last_prod(2:data_len - sub_len + 1, :) = ... 117 | last_prod(1:data_len - sub_len, :) ... 118 | - data(1:data_len - sub_len, :) ... 119 | .* repmat(drop_val, pro_len - 1, 1) ... 120 | + data(sub_len + 1:data_len, :) ... 121 | .* repmat(query(sub_len, :), pro_len - 1, 1); 122 | last_prod(1, :) = first_prod(idx, :); 123 | dist_pro = 2 * (sub_len - (last_prod ... 124 | - sub_len * data_mu .* repmat(data_mu(idx, :), pro_len, 1)) ... 125 | ./ (data_sig .* repmat(data_sig(idx, :), pro_len, 1))); 126 | end 127 | dist_pro = real(dist_pro); 128 | dist_pro = max(dist_pro, 0); 129 | dist_pro = sqrt(dist_pro); 130 | drop_val = query(1, :); 131 | 132 | % apply exclusion zone 133 | exc_zone_st = max(1, idx - exc_zone); 134 | exc_zone_ed = min(pro_len, idx + exc_zone); 135 | dist_pro(exc_zone_st:exc_zone_ed, :) = inf; 136 | dist_pro(data_sig < eps) = inf; 137 | if skip_loc(idx) 138 | dist_pro = inf(size(dist_pro)); 139 | end 140 | dist_pro(skip_loc, :) = inf; 141 | 142 | % figure out and store the nearest neighbor 143 | dist_pro_sort = sort(dist_pro, 2); 144 | dist_pro_cum = zeros(pro_len, 1); 145 | dist_pro_merg = zeros(pro_len, 1); 146 | for k = 1:n_dim 147 | dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k); 148 | dist_pro_merg(:) = dist_pro_cum / k; 149 | [min_val, min_idx] = min(dist_pro_merg); 150 | pro_muls{i}(j, k) = min_val; 151 | pro_idxs{i}(j, k) = min_idx; 152 | end 153 | end 154 | % pro_muls{i} = sqrt(pro_muls{i}); 155 | end 156 | 157 | %% merge workers' result 158 | pro_mul = zeros(pro_len, n_dim); 159 | pro_idx = zeros(pro_len, n_dim); 160 | for i = 1:n_work 161 | pro_idx(idx_work{i}, :) = pro_idxs{i}; 162 | pro_mul(idx_work{i}, :) = pro_muls{i}; 163 | end 164 | 165 | %% The following two functions are modified from the code provided in the following URL 166 | % http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 167 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len) 168 | data(data_len+1:(sub_len+data_len)) = 0; 169 | data_freq = fft(data); 170 | data_cum = cumsum(data); 171 | data2_cum = cumsum(data.^2); 172 | data2_sum = data2_cum(sub_len:data_len) - ... 173 | [0; data2_cum(1:data_len-sub_len)]; 174 | data_sum = data_cum(sub_len:data_len) - ... 175 | [0; data_cum(1:data_len-sub_len)]; 176 | data_mu = data_sum./sub_len; 177 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2); 178 | data_sig2 = real(data_sig2); 179 | data_sig2 = max(data_sig2, 0); 180 | data_sig = sqrt(data_sig2); 181 | 182 | function [dist_pro, last_prod] = mass(data_freq, query, ... 183 | data_len, sub_len, data_mu, data_sig, query_mu, query_sig) 184 | % pre-process query for fft 185 | query = query(end:-1:1); 186 | query(sub_len+1:(sub_len+data_len)) = 0; 187 | 188 | % compute the product 189 | query_freq = fft(query); 190 | product_freq = data_freq.*query_freq; 191 | product = ifft(product_freq); 192 | 193 | % compute the distance profile 194 | dist_pro = 2 * (sub_len - ... 195 | (product(sub_len:data_len) - sub_len*data_mu*query_mu)./... 196 | (data_sig * query_sig)); 197 | last_prod = real(product(sub_len:data_len)); -------------------------------------------------------------------------------- /MATLAB/plot_motif_on_data.m: -------------------------------------------------------------------------------- 1 | % Plot the Motifs on the data 2 | % Chin-Chia Michael Yeh 3 | % 4 | % plot_motif_on_data(data, sub_len, motif_idx, motif_dim) 5 | % 6 | % Input: 7 | % data: input time series (matrix) 8 | % sub_len: interested subsequence length (scalar) 9 | % motif_idx: the index for the founded motifs (matrix) 10 | % motif_dim: the dimensions spanned by the found motifs (cell) 11 | % 12 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 13 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 14 | % https://sites.google.com/view/mstamp/ 15 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 16 | % 17 | 18 | function plot_motif_on_data(data, sub_len, motif_idx, motif_dim) 19 | figure(); 20 | ax = axes(); 21 | hold(ax, 'on'); 22 | 23 | %% plot the data 24 | for i = 1:size(data, 2) 25 | data(:, i) = data(:, i) - min(data(:, i)); 26 | data(:, i) = data(:, i) / max(data(:, i)); 27 | data(:, i) = data(:, i) + (i - 1) * 1.1; 28 | plot(data(:, i), 'color', 'k'); 29 | end 30 | 31 | 32 | for i = 1:length(motif_idx) 33 | for k = 1:length(motif_dim{i}) 34 | motif_location = motif_idx(i):motif_idx(i) + sub_len - 1; 35 | motif = data(motif_location, motif_dim{i}(k)); 36 | plot(motif_location, motif, 'color', 'r'); 37 | end 38 | end 39 | 40 | hold(ax, 'off'); -------------------------------------------------------------------------------- /MATLAB/toy_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcyeh/mstamp/b40961528493d369bf03d8b0fbc4be48201b2594/MATLAB/toy_data.mat -------------------------------------------------------------------------------- /MATLAB/unconstrain_search.m: -------------------------------------------------------------------------------- 1 | % MDL Based Motif Discovery for Multidimensional Matrix Profile 2 | % Chin-Chia Michael Yeh 3 | % 4 | % [motif_idx, motif_dim] = unconstrain_search(... 5 | % data, sub_len, pro_mul, pro_idx, n_bit, k) 6 | % 7 | % Output: 8 | % motif_idx: the index for the founded motifs (matrix) 9 | % motif_dim: the dimensions spanned by the found motifs (cell) 10 | % Input: 11 | % data: input time series (matrix) 12 | % sub_len: interested subsequence length (scalar) 13 | % pro_mul: multidimensional matrix profile (matrix) 14 | % pro_idx: matrix profile index (matrix) 15 | % n_bit: number of bit for discretization (scalar) 16 | % k: number of motif wish to retrieve, set to inf for retrieving 17 | % all possible k-motifs (scalar) 18 | % 19 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 20 | % Multidimensional Motif Discovery," IEEE ICDM 2017. 21 | % https://sites.google.com/view/mstamp/ 22 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 23 | % 24 | 25 | function [motif_idx, motif_dim] = unconstrain_search(... 26 | data, sub_len, pro_mul, pro_idx, n_bit, k) 27 | exc_zone = round(0.5 * sub_len); 28 | tot_dim = size(data, 2); 29 | if isinf(k) 30 | k = size(pro_mul, 1); 31 | end 32 | motif_idx = zeros(k, 1); 33 | motif_dim = cell(k, 1); 34 | base_bit = n_bit * tot_dim * sub_len * 2; 35 | for i = 1:k 36 | fprintf('finding motif %d ... \n', i); 37 | [val, idx_1] = min(pro_mul, [], 1); 38 | if any(isinf(val)) 39 | motif_idx = motif_idx(1:k-1); 40 | motif_dim = motif_dim(1:k-1); 41 | break; 42 | end 43 | 44 | bit_sz = zeros(tot_dim, 1); 45 | idx_2 = zeros(tot_dim, 1); 46 | dim = cell(tot_dim, 1); 47 | for j = 1:tot_dim 48 | idx_2(j) = pro_idx(idx_1(j), j); 49 | motif_1 = data(idx_1(j):idx_1(j) + sub_len - 1, :); 50 | motif_2 = data(idx_2(j):idx_2(j) + sub_len - 1, :); 51 | [bit_sz(j), dim{j}] = get_bit_save(motif_1, motif_2, j, n_bit); 52 | end 53 | [best_bit, min_idx] = min(bit_sz); 54 | if best_bit > base_bit 55 | motif_idx = motif_idx(1:k-1); 56 | motif_dim = motif_dim(1:k-1); 57 | break; 58 | end 59 | motif_idx(i, 1) = idx_1(min_idx); 60 | motif_dim{i} = dim{min_idx}; 61 | 62 | st_idx = max(1, motif_idx(i, 1) - exc_zone); 63 | ed_idx = min(size(pro_mul, 1), motif_idx(i, 1) + exc_zone); 64 | pro_mul(st_idx:ed_idx, :) = inf; 65 | end 66 | motif_dim = motif_dim(motif_idx ~= 0); 67 | motif_idx = motif_idx(motif_idx ~= 0); 68 | 69 | 70 | function [bit_sz, dim_id] = get_bit_save(motif_1, motif_2, n_dim, n_bit) 71 | tot_dim = size(motif_1, 2); 72 | sub_len = size(motif_1, 1); 73 | split_pt = get_desc_split_pt(n_bit); 74 | disc_1 = discretization(motif_1, split_pt); 75 | disc_2 = discretization(motif_2, split_pt); 76 | 77 | [~, dim_id] = sort(sum(abs(disc_1 - disc_2), 1), 'ascend'); 78 | dim_id = dim_id(1:n_dim); 79 | motif_diff = disc_1(:, dim_id) - disc_2(:, dim_id); 80 | n_val = length(unique(motif_diff)); 81 | 82 | bit_sz = n_bit * (tot_dim * sub_len * 2 - n_dim * sub_len); 83 | bit_sz = bit_sz + n_dim * sub_len * log2(n_val) + n_val * n_bit; 84 | 85 | 86 | function disc = discretization(motif, split_pt) 87 | for i = 1:size(motif, 2) 88 | motif(:, i) = (motif(:, i) - mean(motif(:, i))) / ... 89 | std(motif(:, i), 1); 90 | end 91 | disc = zeros(size(motif)); 92 | for i = 1:length(split_pt) 93 | disc(motif < split_pt(i) & disc == 0) = i; 94 | end 95 | disc(disc == 0) = length(split_pt) + 1; 96 | 97 | 98 | function split_pt = get_desc_split_pt(n_bit) 99 | split_pt = norminv((1:(2^n_bit)-1)/(2^n_bit), 0, 1); -------------------------------------------------------------------------------- /Python/README.txt: -------------------------------------------------------------------------------- 1 | See demo.py for examples of using the functions 2 | 3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 4 | Multidimensional Motif Discovery," IEEE ICDM 2017. 5 | https://sites.google.com/view/mstamp/ 6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 7 | -------------------------------------------------------------------------------- /Python/demo.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: Michael Yeh 4 | 5 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 6 | Multidimensional Motif Discovery," IEEE ICDM 2017. 7 | https://sites.google.com/view/mstamp/ 8 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 9 | """ 10 | 11 | import scipy.io as sio 12 | import matplotlib.pyplot as plt 13 | from mstamp_stomp import mstamp as mstamp_stomp 14 | from mstamp_stamp import mstamp as mstamp_stamp 15 | 16 | 17 | def plot_motifs(matrix_profile, dimensionality=1): 18 | motif_at = matrix_profile[dimensionality - 1, :].argsort()[:2] 19 | 20 | plt.figure(figsize=(14, 7)) 21 | for i in range(3): 22 | plt.subplot(4, 1, i + 1) 23 | plt.plot(data.T[i, :]) 24 | plt.title('$T_{}$'.format(i + 1)) 25 | for m in motif_at: 26 | plt.plot(range(m, m + sub_len), data.T[i, :][m:m + sub_len], c='r') 27 | plt.xlim((0, matrix_profile.shape[1])) 28 | 29 | plt.subplot(414) 30 | plt.title('{}-dimensional Matrix Profile'.format(dimensionality)) 31 | plt.plot(matrix_profile[dimensionality - 1, :]) 32 | for m in motif_at: 33 | plt.axvline(m, c='r') 34 | plt.xlim((0, matrix_profile.shape[1])) 35 | plt.tight_layout() 36 | 37 | 38 | if __name__ == '__main__': 39 | mat = sio.loadmat('toy_data.mat') 40 | data = mat['data'] 41 | sub_len = mat['sub_len'][0][0] 42 | 43 | # using the stomp based method to compute the multidimensional matrix 44 | # profile 45 | mat_pro_1, pro_idx_1 = mstamp_stomp(data.T, sub_len, 46 | return_dimension=False) 47 | 48 | # plot the matrix profile as image 49 | plt.figure() 50 | plt.title('Matrix Profile (STOMP)') 51 | plt.imshow(mat_pro_1, extent=[0, 1, 0, 1]) 52 | 53 | # using the stamp based method to compute the multidimensional matrix 54 | # profile 55 | mat_pro_2, pro_idx_2 = mstamp_stamp(data.T, sub_len, 56 | return_dimension=False) 57 | 58 | # plot the matrix profile as image 59 | plt.figure() 60 | plt.title('Matrix Profile (STAMP)') 61 | plt.imshow(mat_pro_2, extent=[0, 1, 0, 1]) 62 | 63 | plot_motifs(mat_pro_2) 64 | 65 | # the function can also be used to compute the 1D matrix profile 66 | mat_pro_3, _ = mstamp_stomp(data[:, 1].T, sub_len, 67 | return_dimension=False) 68 | plt.figure() 69 | plt.plot(mat_pro_3[0, :]) 70 | 71 | mat_pro_4, _ = mstamp_stamp(data[:, 1].T, sub_len, 72 | return_dimension=False) 73 | plt.figure() 74 | plt.plot(mat_pro_4[0, :]) 75 | 76 | plt.show() 77 | -------------------------------------------------------------------------------- /Python/mstamp_stamp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: Michael Yeh 4 | """ 5 | 6 | from __future__ import print_function 7 | import time 8 | import numpy as np 9 | 10 | 11 | _EPS = 1e-14 12 | 13 | 14 | def mstamp(seq, sub_len, return_dimension=False): 15 | """ multidimensional matrix profile with mSTAMP (stamp based) 16 | 17 | Parameters 18 | ---------- 19 | seq : numpy matrix, shape (n_dim, seq_len) 20 | input sequence 21 | sub_len : int 22 | subsequence length 23 | return_dimension : bool 24 | if True, also return the matrix profile dimension. It takses O(d^2 n) 25 | to store and O(d^2 n^2) to compute. (default is False) 26 | 27 | Returns 28 | ------- 29 | matrix_profile : numpy matrix, shape (n_dim, sub_num) 30 | matrix profile 31 | profile_index : numpy matrix, shape (n_dim, sub_num) 32 | matrix profile index 33 | profile_dimension : list, optional, shape (n_dim) 34 | matrix profile dimension, this is only returned when return_dimension 35 | is True 36 | 37 | Notes 38 | ----- 39 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 40 | Multidimensional Motif Discovery," IEEE ICDM 2017. 41 | https://sites.google.com/view/mstamp/ 42 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 43 | """ 44 | if sub_len < 4: 45 | raise RuntimeError('Subsequence length (sub_len) must be at least 4') 46 | exc_zone = sub_len // 2 47 | seq = np.array(seq, dtype=float, copy=True) 48 | 49 | if seq.ndim == 1: 50 | seq = np.expand_dims(seq, axis=0) 51 | 52 | seq_len = seq.shape[1] 53 | sub_num = seq.shape[1] - sub_len + 1 54 | n_dim = seq.shape[0] 55 | skip_loc = np.zeros(sub_num, dtype=bool) 56 | for i in range(sub_num): 57 | if not np.all(np.isfinite(seq[:, i:i + sub_len])): 58 | skip_loc[i] = True 59 | seq[~np.isfinite(seq)] = 0 60 | 61 | matrix_profile = np.empty((n_dim, sub_num)) 62 | matrix_profile[:] = np.inf 63 | profile_index = -np.ones((n_dim, sub_num), dtype=int) 64 | seq_freq = np.empty((n_dim, seq_len * 2), dtype=np.complex128) 65 | seq_mu = np.empty((n_dim, sub_num)) 66 | seq_sig = np.empty((n_dim, sub_num)) 67 | if return_dimension: 68 | profile_dimension = [] 69 | for i in range(n_dim): 70 | profile_dimension.append(np.empty((i + 1, sub_num), dtype=int)) 71 | for i in range(n_dim): 72 | seq_freq[i, :], seq_mu[i, :], seq_sig[i, :] = \ 73 | _mass_pre(seq[i, :], sub_len) 74 | 75 | dist_profile = np.empty((n_dim, sub_num)) 76 | que_sig = np.empty(n_dim) 77 | tic = time.time() 78 | for i in range(sub_num): 79 | cur_prog = (i + 1) / sub_num 80 | time_left = ((time.time() - tic) / (i + 1)) * (sub_num - i - 1) 81 | print('\rProgress [{0:<50s}] {1:5.1f}% {2:8.1f} sec' 82 | .format('#' * int(cur_prog * 50), 83 | cur_prog * 100, time_left), end="") 84 | for j in range(n_dim): 85 | que = seq[j, i:i + sub_len] 86 | dist_profile[j, :], que_sig[j] = _mass( 87 | seq_freq[j, :], que, seq_len, sub_len, 88 | seq_mu[j, :], seq_sig[j, :]) 89 | 90 | if skip_loc[i] or np.any(que_sig < _EPS): 91 | continue 92 | 93 | exc_zone_st = max(0, i - exc_zone) 94 | exc_zone_ed = min(sub_num, i + exc_zone) 95 | dist_profile[:, exc_zone_st:exc_zone_ed] = np.inf 96 | dist_profile[:, skip_loc] = np.inf 97 | dist_profile[seq_sig < _EPS] = np.inf 98 | dist_profile = np.sqrt(dist_profile) 99 | 100 | dist_profile_dim = np.argsort(dist_profile, axis=0) 101 | dist_profile_sort = np.sort(dist_profile, axis=0) 102 | dist_profile_cumsum = np.zeros(sub_num) 103 | for j in range(n_dim): 104 | dist_profile_cumsum += dist_profile_sort[j, :] 105 | dist_profile_mean = dist_profile_cumsum / (j + 1) 106 | update_pos = dist_profile_mean < matrix_profile[j, :] 107 | profile_index[j, update_pos] = i 108 | matrix_profile[j, update_pos] = dist_profile_mean[update_pos] 109 | if return_dimension: 110 | profile_dimension[j][:, update_pos] = \ 111 | dist_profile_dim[:j + 1, update_pos] 112 | 113 | # matrix_profile = np.sqrt(matrix_profile) 114 | if return_dimension: 115 | return matrix_profile, profile_index, profile_dimension 116 | else: 117 | return matrix_profile, profile_index, 118 | 119 | 120 | def _mass_pre(seq, sub_len): 121 | """ pre-computation for iterative call to MASS 122 | 123 | Parameters 124 | ---------- 125 | seq : numpy array 126 | input sequence 127 | sub_len : int 128 | subsequence length 129 | 130 | Returns 131 | ------- 132 | seq_freq : numpy array 133 | sequence in frequency domain 134 | seq_mu : numpy array 135 | each subsequence's mu (mean) 136 | seq_sig : numpy array 137 | each subsequence's sigma (standard deviation) 138 | 139 | Notes 140 | ----- 141 | This functions is modified from the code provided in the following URL 142 | http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 143 | """ 144 | seq_len = len(seq) 145 | seq_pad = np.zeros(seq_len * 2) 146 | seq_pad[0:seq_len] = seq 147 | seq_freq = np.fft.fft(seq_pad) 148 | seq_cum = np.cumsum(seq_pad) 149 | seq_sq_cum = np.cumsum(np.square(seq_pad)) 150 | seq_sum = (seq_cum[sub_len - 1:seq_len] - 151 | np.concatenate(([0], seq_cum[0:seq_len - sub_len]))) 152 | seq_sq_sum = (seq_sq_cum[sub_len - 1:seq_len] - 153 | np.concatenate(([0], seq_sq_cum[0:seq_len - sub_len]))) 154 | seq_mu = seq_sum / sub_len 155 | seq_sig_sq = seq_sq_sum / sub_len - np.square(seq_mu) 156 | seq_sig = np.sqrt(seq_sig_sq) 157 | return seq_freq, seq_mu, seq_sig 158 | 159 | 160 | def _mass(seq_freq, que, seq_len, sub_len, seq_mu, seq_sig): 161 | """ iterative call of MASS 162 | 163 | Parameters 164 | ---------- 165 | seq_freq : numpy array 166 | sequence in frequency domain 167 | que : numpy array 168 | query 169 | seq_len : int 170 | sequence length 171 | sub_len : int 172 | subsequence length 173 | seq_mu : numpy array 174 | each subsequence's mu (mean) 175 | seq_sig : numpy array 176 | each subsequence's sigma (standard deviation) 177 | 178 | Returns 179 | ------- 180 | dist_profile : numpy array 181 | distance profile 182 | que_sig : float64 183 | query's sigma (standard deviation) 184 | 185 | Notes 186 | ----- 187 | This functions is modified from the code provided in the following URL 188 | http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 189 | """ 190 | que = que[::-1] 191 | que_pad = np.zeros(seq_len * 2) 192 | que_pad[0:sub_len] = que 193 | que_freq = np.fft.fft(que_pad) 194 | product_freq = seq_freq * que_freq 195 | product = np.fft.ifft(product_freq) 196 | product = np.real(product) 197 | 198 | que_sum = np.sum(que) 199 | que_sq_sum = np.sum(np.square(que)) 200 | que_mu = que_sum / sub_len 201 | que_sig_sq = que_sq_sum / sub_len - que_mu**2 202 | if que_sig_sq < _EPS: 203 | que_sig_sq = _EPS 204 | que_sig = np.sqrt(que_sig_sq) 205 | 206 | dist_profile = (2 * (sub_len - (product[sub_len - 1:seq_len] - 207 | sub_len * seq_mu * que_mu) / 208 | (seq_sig * que_sig))) 209 | return dist_profile, que_sig 210 | -------------------------------------------------------------------------------- /Python/mstamp_stomp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | @author: Michael Yeh 4 | """ 5 | 6 | from __future__ import print_function 7 | import time 8 | import numpy as np 9 | 10 | 11 | _EPS = 1e-14 12 | 13 | 14 | def mstamp(seq, sub_len, return_dimension=False): 15 | """ multidimensional matrix profile with mSTAMP (stomp based) 16 | 17 | Parameters 18 | ---------- 19 | seq : numpy matrix, shape (n_dim, seq_len) 20 | input sequence 21 | sub_len : int 22 | subsequence length 23 | return_dimension : bool 24 | if True, also return the matrix profile dimension. It takses O(d^2 n) 25 | to store and O(d^2 n^2) to compute. (default is False) 26 | 27 | Returns 28 | ------- 29 | matrix_profile : numpy matrix, shape (n_dim, sub_num) 30 | matrix profile 31 | profile_index : numpy matrix, shape (n_dim, sub_num) 32 | matrix profile index 33 | profile_dimension : list, optional, shape (n_dim) 34 | matrix profile dimension, this is only returned when return_dimension 35 | is True 36 | 37 | Notes 38 | ----- 39 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 40 | Multidimensional Motif Discovery," IEEE ICDM 2017. 41 | https://sites.google.com/view/mstamp/ 42 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 43 | """ 44 | if sub_len < 4: 45 | raise RuntimeError('Subsequence length (sub_len) must be at least 4') 46 | exc_zone = sub_len // 2 47 | seq = np.array(seq, dtype=float, copy=True) 48 | 49 | if seq.ndim == 1: 50 | seq = np.expand_dims(seq, axis=0) 51 | 52 | seq_len = seq.shape[1] 53 | sub_num = seq.shape[1] - sub_len + 1 54 | n_dim = seq.shape[0] 55 | skip_loc = np.zeros(sub_num, dtype=bool) 56 | for i in range(sub_num): 57 | if not np.all(np.isfinite(seq[:, i:i + sub_len])): 58 | skip_loc[i] = True 59 | seq[~np.isfinite(seq)] = 0 60 | 61 | drop_val = 0 62 | matrix_profile = np.empty((n_dim, sub_num)) 63 | matrix_profile[:] = np.inf 64 | profile_index = -np.ones((n_dim, sub_num), dtype=int) 65 | seq_freq = np.empty((n_dim, seq_len * 2), dtype=np.complex128) 66 | seq_mu = np.empty((n_dim, sub_num)) 67 | seq_sig = np.empty((n_dim, sub_num)) 68 | if return_dimension: 69 | profile_dimension = [] 70 | for i in range(n_dim): 71 | profile_dimension.append(np.empty((i + 1, sub_num), dtype=int)) 72 | for i in range(n_dim): 73 | seq_freq[i, :], seq_mu[i, :], seq_sig[i, :] = \ 74 | _mass_pre(seq[i, :], sub_len) 75 | 76 | dist_profile = np.empty((n_dim, sub_num)) 77 | last_product = np.empty((n_dim, sub_num)) 78 | first_product = np.empty((n_dim, sub_num)) 79 | drop_val = np.empty(n_dim) 80 | que_sum = np.empty(n_dim) 81 | que_sq_sum = np.empty(n_dim) 82 | que_sig = np.empty(n_dim) 83 | tic = time.time() 84 | for i in range(sub_num): 85 | cur_prog = (i + 1) / sub_num 86 | time_left = ((time.time() - tic) / (i + 1)) * (sub_num - i - 1) 87 | print('\rProgress [{0:<50s}] {1:5.1f}% {2:8.1f} sec' 88 | .format('#' * int(cur_prog * 50), 89 | cur_prog * 100, time_left), end="") 90 | for j in range(n_dim): 91 | que = seq[j, i:i + sub_len] 92 | if i == 0: 93 | (dist_profile[j, :], last_product[j, :], 94 | que_sum[j], que_sq_sum[j], que_sig[j]) = \ 95 | _mass(seq_freq[j, :], que, seq_len, sub_len, 96 | seq_mu[j, :], seq_sig[j, :]) 97 | first_product[j, :] = last_product[j, :].copy() 98 | else: 99 | que_sum[j] = que_sum[j] - drop_val[j] + que[-1] 100 | que_sq_sum[j] = que_sq_sum[j] - drop_val[j]**2 + que[-1]**2 101 | que_mu = que_sum[j] / sub_len 102 | que_sig_sq = que_sq_sum[j] / sub_len - que_mu**2 103 | if que_sig_sq < _EPS: 104 | que_sig_sq = _EPS 105 | que_sig[j] = np.sqrt(que_sig_sq) 106 | last_product[j, 1:] = (last_product[j, 0:-1] - 107 | seq[j, 0:seq_len - sub_len] * 108 | drop_val[j] + 109 | seq[j, sub_len:seq_len] * que[-1]) 110 | last_product[j, 0] = first_product[j, i] 111 | dist_profile[j, :] = \ 112 | (2 * (sub_len - (last_product[j, :] - 113 | sub_len * seq_mu[j, :] * que_mu) / 114 | (seq_sig[j, :] * que_sig[j]))) 115 | dist_profile[j, dist_profile[j, :] < _EPS] = 0 116 | drop_val[j] = que[0] 117 | 118 | if skip_loc[i] or np.any(que_sig < _EPS): 119 | continue 120 | 121 | exc_zone_st = max(0, i - exc_zone) 122 | exc_zone_ed = min(sub_num, i + exc_zone) 123 | dist_profile[:, exc_zone_st:exc_zone_ed] = np.inf 124 | dist_profile[:, skip_loc] = np.inf 125 | dist_profile[seq_sig < _EPS] = np.inf 126 | dist_profile = np.sqrt(dist_profile) 127 | 128 | dist_profile_dim = np.argsort(dist_profile, axis=0) 129 | dist_profile_sort = np.sort(dist_profile, axis=0) 130 | dist_profile_cumsum = np.zeros(sub_num) 131 | for j in range(n_dim): 132 | dist_profile_cumsum += dist_profile_sort[j, :] 133 | dist_profile_mean = dist_profile_cumsum / (j + 1) 134 | update_pos = dist_profile_mean < matrix_profile[j, :] 135 | profile_index[j, update_pos] = i 136 | matrix_profile[j, update_pos] = dist_profile_mean[update_pos] 137 | if return_dimension: 138 | profile_dimension[j][:, update_pos] = \ 139 | dist_profile_dim[:j + 1, update_pos] 140 | 141 | # matrix_profile = np.sqrt(matrix_profile) 142 | if return_dimension: 143 | return matrix_profile, profile_index, profile_dimension 144 | else: 145 | return matrix_profile, profile_index, 146 | 147 | 148 | def _mass_pre(seq, sub_len): 149 | """ pre-computation for iterative call to MASS 150 | 151 | Parameters 152 | ---------- 153 | seq : numpy array 154 | input sequence 155 | sub_len : int 156 | subsequence length 157 | 158 | Returns 159 | ------- 160 | seq_freq : numpy array 161 | sequence in frequency domain 162 | seq_mu : numpy array 163 | each subsequence's mu (mean) 164 | seq_sig : numpy array 165 | each subsequence's sigma (standard deviation) 166 | 167 | Notes 168 | ----- 169 | This functions is modified from the code provided in the following URL 170 | http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 171 | """ 172 | seq_len = len(seq) 173 | seq_pad = np.zeros(seq_len * 2) 174 | seq_pad[0:seq_len] = seq 175 | seq_freq = np.fft.fft(seq_pad) 176 | seq_cum = np.cumsum(seq_pad) 177 | seq_sq_cum = np.cumsum(np.square(seq_pad)) 178 | seq_sum = (seq_cum[sub_len - 1:seq_len] - 179 | np.concatenate(([0], seq_cum[0:seq_len - sub_len]))) 180 | seq_sq_sum = (seq_sq_cum[sub_len - 1:seq_len] - 181 | np.concatenate(([0], seq_sq_cum[0:seq_len - sub_len]))) 182 | seq_mu = seq_sum / sub_len 183 | seq_sig_sq = seq_sq_sum / sub_len - np.square(seq_mu) 184 | seq_sig = np.sqrt(seq_sig_sq) 185 | return seq_freq, seq_mu, seq_sig 186 | 187 | 188 | def _mass(seq_freq, que, seq_len, sub_len, seq_mu, seq_sig): 189 | """ iterative call of MASS 190 | 191 | Parameters 192 | ---------- 193 | seq_freq : numpy array 194 | sequence in frequency domain 195 | que : numpy array 196 | query 197 | seq_len : int 198 | sequence length 199 | sub_len : int 200 | subsequence length 201 | seq_mu : numpy array 202 | each subsequence's mu (mean) 203 | seq_sig : numpy array 204 | each subsequence's sigma (standard deviation) 205 | 206 | Returns 207 | ------- 208 | dist_profile : numpy array 209 | distance profile 210 | last_product : numpy array 211 | cross term 212 | que_sum : float64 213 | query's sum 214 | que_sq_sum : float64 215 | query's squre sum 216 | que_sig : float64 217 | query's sigma (standard deviation) 218 | 219 | Notes 220 | ----- 221 | This functions is modified from the code provided in the following URL 222 | http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html 223 | """ 224 | que = que[::-1] 225 | que_pad = np.zeros(seq_len * 2) 226 | que_pad[0:sub_len] = que 227 | que_freq = np.fft.fft(que_pad) 228 | product_freq = seq_freq * que_freq 229 | product = np.fft.ifft(product_freq) 230 | product = np.real(product) 231 | 232 | que_sum = np.sum(que) 233 | que_sq_sum = np.sum(np.square(que)) 234 | que_mu = que_sum / sub_len 235 | que_sig_sq = que_sq_sum / sub_len - que_mu**2 236 | if que_sig_sq < _EPS: 237 | que_sig_sq = _EPS 238 | que_sig = np.sqrt(que_sig_sq) 239 | 240 | dist_profile = (2 * (sub_len - (product[sub_len - 1:seq_len] - 241 | sub_len * seq_mu * que_mu) / 242 | (seq_sig * que_sig))) 243 | last_product = product[sub_len - 1:seq_len] 244 | return dist_profile, last_product, que_sum, que_sq_sum, que_sig 245 | -------------------------------------------------------------------------------- /Python/toy_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mcyeh/mstamp/b40961528493d369bf03d8b0fbc4be48201b2594/Python/toy_data.mat -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | MATLAB and Python code for the following paper: 2 | 3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful 4 | Multidimensional Motif Discovery," IEEE ICDM 2017. 5 | https://sites.google.com/view/mstamp/ 6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html 7 | --------------------------------------------------------------------------------