├── .gitignore
├── MATLAB
    ├── README.txt
    ├── demo.m
    ├── extract_k_motif.m
    ├── guide_serach.m
    ├── mstamp.m
    ├── mstamp_any.m
    ├── mstamp_any_par.m
    ├── mstamp_par.m
    ├── plot_motif_on_data.m
    ├── toy_data.mat
    └── unconstrain_search.m
├── Python
    ├── README.txt
    ├── demo.py
    ├── mstamp_stamp.py
    ├── mstamp_stomp.py
    └── toy_data.mat
└── README.txt


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | .vscode/
3 | 


--------------------------------------------------------------------------------
/MATLAB/README.txt:
--------------------------------------------------------------------------------
1 | See demo.m for examples of using the functions
2 | 
3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
4 | Multidimensional Motif Discovery," IEEE ICDM 2017.
5 | https://sites.google.com/view/mstamp/
6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
7 | 


--------------------------------------------------------------------------------
/MATLAB/demo.m:
--------------------------------------------------------------------------------
 1 | %%
 2 | % Chin-Chia Michael Yeh
 3 | %
 4 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 5 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
 6 | % https://sites.google.com/view/mstamp/
 7 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 8 | %
 9 | 
10 | clear
11 | clc
12 | 
13 | load('toy_data.mat');
14 | 
15 | %% compute the multidimensional matrix profile
16 | % here we provided three variation of the mSTAMP algorithm
17 | % The script will only run when only one of the alternatives is uncomment
18 | 
19 | %% alternative 1.a: the basic version
20 | 
21 | must_dim = [];
22 | exc_dim = [];
23 | [pro_mul, pro_idx] = ...
24 |     mstamp(data, sub_len, must_dim, exc_dim);
25 | 
26 | %% alternative 1.b: the inclusion
27 | % in the toy data, the first dimension only consist of random walk.
28 | % Forcing the algorithm to consider the first dimension worsen the result.
29 | 
30 | % must_dim = [1];
31 | % exc_dim = [];
32 | % [pro_mul, pro_idx] = ...
33 | %     mstamp(data, sub_len, must_dim, exc_dim);
34 | 
35 | %% alternative 1.c: the exclusion
36 | % We can also do exclusion. By blacklist one of the dimension that contains
37 | % meaningful motif, we no longer can find a meaningful 2-dimensional motif.
38 | % However, the MDL-based unconstrained search method will correctly provide
39 | % us the 1-dimensional motif
40 | 
41 | % must_dim = [];
42 | % exc_dim = [3];
43 | % [pro_mul, pro_idx] = ...
44 | %     mstamp(data, sub_len, must_dim, exc_dim);
45 | % pro_mul = pro_mul(:, 1:2);
46 | % pro_idx = pro_idx(:, 1:2);
47 | % data = data(:, 1:2);
48 | 
49 | %% alternative 2: using Parallel Computing Toolbox
50 | 
51 | % n_work = 4;
52 | % [pro_mul, pro_idx] = ...
53 | %     mstamp_par(data, sub_len, n_work);
54 | 
55 | %% alternative 3: using the anytime version stop at 10%
56 | % the guided search is able to find the motif mostly
57 | % however, the MDL-based method's output is less stable due to both method
58 | % are approximated method
59 | 
60 | % pct_stop = 0.1;
61 | % [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop);
62 | 
63 | 
64 | %% guided search for 2-dimensional motif
65 | n_dim = 2; % we want the top 2-dimensional motif
66 | [motif_idx, motif_dim] = guide_serach(...
67 |     data, sub_len, pro_mul, pro_idx, n_dim);
68 | plot_motif_on_data(data, sub_len, motif_idx, motif_dim);
69 | 
70 | 
71 | %% extract motif using the MDL-based unconstrained search method
72 | n_bit = 4; % number of bit for discretization
73 | k = 2; % number of motif to retrieve
74 | [motif_idx, motif_dim] = unconstrain_search(...
75 |     data, sub_len, pro_mul, pro_idx, n_bit, k);
76 | plot_motif_on_data(data, sub_len, motif_idx, motif_dim);
77 | 
78 | %% the function can also be used to compute the 1D matrix profile
79 | [pro_mul_2, ~] = ...
80 |     mstamp(data(:, 2), sub_len, must_dim, exc_dim);
81 | figure();
82 | plot(pro_mul_2);
83 | 


--------------------------------------------------------------------------------
/MATLAB/extract_k_motif.m:
--------------------------------------------------------------------------------
 1 | % MDL Based Motif Discovery for Multidimensional Matrix Profile
 2 | % Chin-Chia Michael Yeh
 3 | %
 4 | % [motif_idx, motif_dim] = extract_k_motif(...
 5 | %     data, sub_len, pro_mul, pro_idx, n_bit, k)
 6 | %
 7 | % Output:
 8 | %     motif_idx: the index for the founded motifs (matrix)
 9 | %     motif_dim: the dimensions spanned by the found motifs (cell)
10 | % Input:
11 | %     data: input time series (matrix)
12 | %     sub_len: interested subsequence length (scalar)
13 | %     pro_mul: multidimensional matrix profile (matrix)
14 | %     pro_idx: matrix profile index (matrix)
15 | %     n_bit: number of bit for discretization (scalar)
16 | %     k: number of motif wish to retrieve, set to inf for retrieving
17 | %        all possible k-motifs (scalar)
18 | %
19 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
20 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
21 | % https://sites.google.com/view/mstamp/
22 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
23 | %
24 | 
25 | function [motif_idx, motif_dim] = extract_k_motif(...
26 |     data, sub_len, pro_mul, pro_idx, n_bit, k)
27 | exc_zone = round(0.5 * sub_len);
28 | tot_dim = size(data, 2);
29 | if isinf(k)
30 |     k = size(pro_mul, 1);
31 | end
32 | motif_idx = zeros(k, 1);
33 | motif_dim = cell(k, 1);
34 | base_bit = n_bit * tot_dim * sub_len * 2;
35 | for i = 1:k
36 |     fprintf('finding motif %d ... \n', i);
37 |     [val, idx_1] = min(pro_mul, [], 1);
38 |     if any(isinf(val))
39 |         motif_idx = motif_idx(1:k-1);
40 |         motif_dim = motif_dim(1:k-1);
41 |         break;
42 |     end
43 | 
44 |     bit_sz = zeros(tot_dim, 1);
45 |     idx_2 = zeros(tot_dim, 1);
46 |     dim = cell(tot_dim, 1);
47 |     for j = 1:tot_dim
48 |         idx_2(j) = pro_idx(idx_1(j), j);
49 |         motif_1 = data(idx_1(j):idx_1(j) + sub_len - 1, :);
50 |         motif_2 = data(idx_2(j):idx_2(j) + sub_len - 1, :);
51 |         [bit_sz(j), dim{j}] = get_bit_save(motif_1, motif_2, j, n_bit);
52 |     end
53 |     [best_bit, min_idx] = min(bit_sz);
54 |     if best_bit > base_bit
55 |         motif_idx = motif_idx(1:k-1);
56 |         motif_dim = motif_dim(1:k-1);
57 |         break;
58 |     end
59 |     motif_idx(i, 1) = idx_1(min_idx);
60 |     motif_dim{i} = dim{min_idx};
61 | 
62 |     st_idx = max(1, motif_idx(i, 1) - exc_zone);
63 |     ed_idx = min(size(pro_mul, 1), motif_idx(i, 1) + exc_zone);
64 |     pro_mul(st_idx:ed_idx, :) = inf;
65 | end
66 | motif_dim = motif_dim(motif_idx ~= 0);
67 | motif_idx = motif_idx(motif_idx ~= 0);
68 | 
69 | 
70 | function [bit_sz, dim_id] = get_bit_save(motif_1, motif_2, n_dim, n_bit)
71 | tot_dim = size(motif_1, 2);
72 | sub_len = size(motif_1, 1);
73 | split_pt = get_desc_split_pt(n_bit);
74 | disc_1 = discretization(motif_1, split_pt);
75 | disc_2 = discretization(motif_2, split_pt);
76 | 
77 | [~, dim_id] = sort(sum(abs(disc_1 - disc_2), 1), 'ascend');
78 | dim_id = dim_id(1:n_dim);
79 | motif_diff = disc_1(:, dim_id) - disc_2(:, dim_id);
80 | n_val = length(unique(motif_diff));
81 | 
82 | bit_sz = n_bit * (tot_dim * sub_len * 2 - n_dim * sub_len);
83 | bit_sz = bit_sz + n_dim * sub_len * log2(n_val) + n_val * n_bit;
84 | 
85 | 
86 | function disc = discretization(motif, split_pt)
87 | for i = 1:size(motif, 2)
88 |     motif(:, i) = (motif(:, i) - mean(motif(:, i))) / ...
89 |         std(motif(:, i), 1);
90 | end
91 | disc = zeros(size(motif));
92 | for i = 1:length(split_pt)
93 |     disc(motif < split_pt(i) & disc == 0) = i;
94 | end
95 | disc(disc == 0) = length(split_pt) + 1;
96 | 
97 | 
98 | function split_pt = get_desc_split_pt(n_bit)
99 | split_pt = norminv((1:(2^n_bit)-1)/(2^n_bit), 0, 1);


--------------------------------------------------------------------------------
/MATLAB/guide_serach.m:
--------------------------------------------------------------------------------
 1 | % Guided Motif Discovery for Multidimensional Matrix Profile
 2 | % Chin-Chia Michael Yeh
 3 | %
 4 | % [motif_idx, motif_dim] = guide_serach(...
 5 | %     data, sub_len, pro_mul, pro_idx, n_dim)
 6 | %
 7 | % Output:
 8 | %     motif_idx: the index for the founded motifs (matrix)
 9 | %     motif_dim: the dimensions spanned by the found motifs (cell)
10 | % Input:
11 | %     data: input time series (matrix)
12 | %     sub_len: interested subsequence length (scalar)
13 | %     pro_mul: multidimensional matrix profile (matrix)
14 | %     pro_idx: matrix profile index (matrix)
15 | %     n_dim: the dimensionality of the motif that you wish to find (scalar)
16 | %
17 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
18 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
19 | % https://sites.google.com/view/mstamp/
20 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
21 | %
22 | 
23 | function [motif_idx, motif_dim] = guide_serach(...
24 |     data, sub_len, pro_mul, pro_idx, n_dim)
25 | pro_mul = pro_mul(:, n_dim);
26 | pro_idx = pro_idx(:, n_dim);
27 | [~, motif_idx] = min(pro_mul);
28 | motif_idx = sort([motif_idx, pro_idx(motif_idx)]);
29 | 
30 | motif_1 = data(motif_idx(1):motif_idx(1)+sub_len - 1, :);
31 | motif_2 = data(motif_idx(2):motif_idx(2)+sub_len - 1, :);
32 | 
33 | [~, motif_dim] = sort(sum(abs(motif_1 - motif_2), 1), 'ascend');
34 | motif_dim = sort(motif_dim(1:n_dim));
35 | motif_dim = {motif_dim; motif_dim;};


--------------------------------------------------------------------------------
/MATLAB/mstamp.m:
--------------------------------------------------------------------------------
  1 | % STOMP Based mSTAMP with Constrained Search Implemented
  2 | % Chin-Chia Michael Yeh
  3 | %
  4 | % [pro_mul, pro_idx] = mstamp(data, sub_len, must_dim, exc_dim)
  5 | %
  6 | % Output:
  7 | %     pro_mul: multidimensional matrix profile (matrix)
  8 | %     pro_idx: matrix profile index (matrix)
  9 | % Input:
 10 | %     data: input time series (matrix)
 11 | %     sub_len: interested subsequence length (scalar)
 12 | %     must_dim: the dimension which must be included (vector)
 13 | %     exc_dim: the dimension which must be excluded (vector)
 14 | %
 15 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 16 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
 17 | % https://sites.google.com/view/mstamp/
 18 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 19 | %
 20 | 
 21 | function [pro_mul, pro_idx] = ...
 22 |     mstamp(data, sub_len, must_dim, exc_dim)
 23 | %% get various length
 24 | exc_zone = round(sub_len / 2);
 25 | data_len = size(data, 1);
 26 | pro_len = data_len - sub_len + 1;
 27 | n_dim = size(data, 2);
 28 | 
 29 | %% check input
 30 | if sub_len > data_len / 2
 31 |     error(['Error: Time series is too short relative ', ...
 32 |         'to desired subsequence length']);
 33 | end
 34 | if sub_len < 4
 35 |     error('Error: Subsequence length must be at least 4');
 36 | end
 37 | if any(must_dim > n_dim)
 38 |     error(['Error: The must have dimension must be less ', ...
 39 |         'then the total dimension']);
 40 | end
 41 | if any(exc_dim > n_dim)
 42 |     error(['Error: The exclusion dimension must be less ', ...
 43 |         'then the total dimension']);
 44 | end
 45 | if ~isempty(intersect(must_dim, exc_dim))
 46 |     error(['Error: The same dimension is presented in both ', ...
 47 |         'the exclusion dimension and must have dimension']);
 48 | end
 49 | 
 50 | %% check skip position
 51 | n_exc = length(exc_dim);
 52 | n_must = length(must_dim);
 53 | mask_exc = false(n_dim, 1);
 54 | mask_exc(exc_dim) = true;
 55 | skip_loc = false(pro_len, 1);
 56 | for i = 1:pro_len
 57 |     if any(isnan(reshape(data(i:i+sub_len-1, ~mask_exc), 1, []))) ...
 58 |             || any(isinf(reshape(data(i:i+sub_len-1, ~mask_exc), 1, [])))
 59 |         skip_loc(i) = true;
 60 |     end
 61 | end
 62 | data(isnan(data)) = 0;
 63 | data(isinf(data)) = 0;
 64 | 
 65 | %% initialization
 66 | data_freq = zeros((sub_len + data_len), n_dim);
 67 | data_mu = zeros(pro_len, n_dim);
 68 | data_sig = zeros(pro_len, n_dim);
 69 | first_prod = zeros(pro_len, n_dim);
 70 | for i = 1:n_dim
 71 |     [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ...
 72 |         mass_pre(data(:, i), data_len, sub_len);
 73 |     [~, first_prod(:, i)] = mass(...
 74 |         data_freq(:, i), data(1:sub_len, i), data_len, ...
 75 |         sub_len, data_mu(:, i), data_sig(:, i), ...
 76 |         data_mu(1, i), data_sig(1, i));
 77 | end
 78 | 
 79 | %% compute the matrix profile
 80 | pro_mul = zeros(pro_len, n_dim);
 81 | pro_idx = zeros(pro_len, n_dim);
 82 | dist_pro = zeros(pro_len, n_dim);
 83 | last_prod = zeros(pro_len, n_dim);
 84 | drop_val = zeros(1, n_dim);
 85 | for i = 1:pro_len
 86 |     % compute the distance profile
 87 |     fprintf('%d %d\n', i, pro_len);
 88 |     query = data(i:i+sub_len-1, :);
 89 |     if i==1
 90 |         for j = 1:n_dim
 91 |             [dist_pro(:, j), last_prod(:, j)] = ...
 92 |                 mass(data_freq(:, j), query(:, j), ...
 93 |                 data_len, sub_len, data_mu(:, j), ...
 94 |                 data_sig(:, j), data_mu(i, j), ...
 95 |                 data_sig(i, j));
 96 |         end
 97 |     else
 98 |         last_prod(2:data_len - sub_len + 1, :) = ...
 99 |             last_prod(1:data_len - sub_len, :) ...
100 |             - data(1:data_len - sub_len, :) ...
101 |             .* repmat(drop_val, pro_len - 1, 1) ...
102 |             + data(sub_len + 1:data_len, :) ...
103 |             .* repmat(query(sub_len, :), pro_len - 1, 1);
104 |         last_prod(1, :) = first_prod(i, :);
105 |         dist_pro = 2 * (sub_len - (last_prod ...
106 |             - sub_len * data_mu .* repmat(data_mu(i, :), pro_len, 1)) ...
107 |             ./ (data_sig .* repmat(data_sig(i, :), pro_len, 1)));
108 |     end
109 |     dist_pro = real(dist_pro);
110 |     dist_pro = max(dist_pro, 0);
111 |     dist_pro = sqrt(dist_pro);
112 |     drop_val(:) = query(1, :);
113 | 
114 |     % apply exclusion zone
115 |     exc_st = max(1, i - exc_zone);
116 |     exc_ed = min(pro_len, i+exc_zone);
117 |     dist_pro(exc_st:exc_ed, :) = inf;
118 |     dist_pro(data_sig < eps) = inf;
119 |     if skip_loc(i) || any(data_sig(i, ~mask_exc) < eps)
120 |         dist_pro = inf(size(dist_pro));
121 |     end
122 |     dist_pro(skip_loc, :) = inf;
123 | 
124 |     % apply dimension "must have" and "exclusion"
125 |     dist_pro(:, exc_dim) = inf;
126 |     mask_must = false(n_must, 1);
127 |     mask_must(must_dim) = true;
128 |     dist_pro_must = dist_pro(:, mask_must);
129 |     dist_pro(:, mask_must) = -inf;
130 |     dist_pro_sort = sort(dist_pro, 2);
131 |     dist_pro_sort(:, 1:n_must) = dist_pro_must;
132 | 
133 |     % figure out and store the nearest neighbor
134 |     dist_pro_cum = zeros(pro_len, 1);
135 |     dist_pro_merg = zeros(pro_len, 1);
136 |     for j = max(1, n_must):(n_dim - n_exc)
137 |         dist_pro_cum = dist_pro_cum + dist_pro_sort(:, j);
138 |         dist_pro_merg(:) = dist_pro_cum / j;
139 |         [min_val, min_idx] = min(dist_pro_merg);
140 |         pro_mul(i, j) = min_val;
141 |         pro_idx(i, j) = min_idx;
142 |     end
143 | end
144 | 
145 | %% remove bad k setting in the returned matrix
146 | % pro_mul = sqrt(pro_mul);
147 | pro_mul(:, 1:(n_must - 1)) = nan;
148 | pro_mul(:, (n_dim - n_exc + 1):end) = nan;
149 | pro_idx(:, 1:(n_must - 1)) = nan;
150 | pro_idx(:, (n_dim - n_exc + 1):end) = nan;
151 | 
152 | 
153 | %% The following two functions are modified from the code provided in the following URL
154 | %  http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
155 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len)
156 | data(data_len+1:(sub_len+data_len)) = 0;
157 | data_freq = fft(data);
158 | data_cum = cumsum(data);
159 | data2_cum =  cumsum(data.^2);
160 | data2_sum = data2_cum(sub_len:data_len) - ...
161 |     [0; data2_cum(1:data_len-sub_len)];
162 | data_sum = data_cum(sub_len:data_len) - ...
163 |     [0; data_cum(1:data_len-sub_len)];
164 | data_mu = data_sum./sub_len;
165 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2);
166 | data_sig2 = real(data_sig2);
167 | data_sig2 = max(data_sig2, 0);
168 | data_sig = sqrt(data_sig2);
169 | 
170 | function [dist_pro, last_prod] = mass(data_freq, query, ...
171 |     data_len, sub_len, data_mu, data_sig, query_mu, query_sig)
172 | % pre-process query for fft
173 | query = query(end:-1:1);
174 | query(sub_len+1:(sub_len+data_len)) = 0;
175 | 
176 | % compute the product
177 | query_freq = fft(query);
178 | product_freq = data_freq.*query_freq;
179 | product = ifft(product_freq);
180 | 
181 | % compute the distance profile
182 | dist_pro = 2 * (sub_len - ...
183 |     (product(sub_len:data_len) - sub_len*data_mu*query_mu)./...
184 |     (data_sig * query_sig));
185 | last_prod = real(product(sub_len:data_len));


--------------------------------------------------------------------------------
/MATLAB/mstamp_any.m:
--------------------------------------------------------------------------------
  1 | % STAMP Based mSTAMP Implemented as an Anytime Algorithm
  2 | % Chin-Chia Michael Yeh
  3 | %
  4 | % [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop)
  5 | %
  6 | % Output:
  7 | %     pro_mul: multidimensional matrix profile (matrix)
  8 | %     pro_idx: matrix profile index (matrix)
  9 | % Input:
 10 | %     data: input time series (matrix)
 11 | %     sub_len: interested subsequence length (scalar)
 12 | %     pct_stop: stop percentage, a number from 0 to 1 (scalar)
 13 | %
 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
 16 | % https://sites.google.com/view/mstamp/
 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 18 | %
 19 | 
 20 | function [pro_mul, pro_idx] = mstamp_any(data, sub_len, pct_stop)
 21 | %% get various length
 22 | exc_zone = round(sub_len / 2);
 23 | data_len = size(data, 1);
 24 | n_dim = size(data, 2);
 25 | pro_len = data_len - sub_len + 1;
 26 | itr_stop = round(pro_len * pct_stop);
 27 | if itr_stop < 1
 28 |     itr_stop = 1;
 29 | end
 30 | if itr_stop > pro_len
 31 |     itr_stop = pro_len;
 32 | end
 33 | 
 34 | %% check input
 35 | if sub_len > data_len / 2
 36 |     error(['Error: Time series is too short relative to desired ' ...
 37 |         'subsequence length']);
 38 | end
 39 | if sub_len < 4
 40 |     error('Error: Subsequence length must be at least 4');
 41 | end
 42 | if pct_stop < 0
 43 |     error('Error: Stop percentage must be greater than 0');
 44 | end
 45 | if pct_stop > 1
 46 |     error('Error: Stop percentage must be less than or equal to 1');
 47 | end
 48 | 
 49 | %% check skip position
 50 | skip_loc = false(pro_len, 1);
 51 | for i = 1:pro_len
 52 |     if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ...
 53 |             || any(isinf(reshape(data(i:i+sub_len-1, :), 1, [])))
 54 |         skip_loc(i) = true;
 55 |     end
 56 | end
 57 | data(isnan(data)) = 0;
 58 | data(isinf(data)) = 0;
 59 | 
 60 | %% initialization
 61 | data_freq = zeros((sub_len + data_len), n_dim);
 62 | data_mu = zeros(pro_len, n_dim);
 63 | data_sig = zeros(pro_len, n_dim);
 64 | first_prod = zeros(pro_len, n_dim);
 65 | for i = 1:n_dim
 66 |     [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ...
 67 |         mass_pre(data(:, i), data_len, sub_len);
 68 |     [~, first_prod(:, i)] = mass(...
 69 |         data_freq(:, i), data(1:sub_len, i), data_len, ...
 70 |         sub_len, data_mu(:, i), data_sig(:, i), ...
 71 |         data_mu(1, i), data_sig(1, i));
 72 | end
 73 | 
 74 | %% compute the matrix profile
 75 | dist_pro = zeros(pro_len, n_dim);
 76 | last_prod = zeros(pro_len, n_dim);
 77 | pro_mul = inf(pro_len, n_dim);
 78 | pro_idx = zeros(pro_len, n_dim);
 79 | idxs = randperm(pro_len);
 80 | idxs = idxs(1:itr_stop);
 81 | for j = 1:length(idxs)
 82 |     idx = idxs(j);
 83 |     fprintf('%d %d\n', j, pro_len);
 84 |     query = data(idx:idx+sub_len-1, :);
 85 |     for k = 1:n_dim
 86 |         [dist_pro(:, k), last_prod(:, k)] = ...
 87 |             mass(data_freq(:, k), query(:, k), ...
 88 |             data_len, sub_len, data_mu(:, k), ...
 89 |             data_sig(:, k), data_mu(idx, k), ...
 90 |             data_sig(idx, k));
 91 |     end
 92 |     dist_pro = real(dist_pro);
 93 |     dist_pro = max(dist_pro, 0);
 94 |     dist_pro = sqrt(dist_pro);
 95 |     
 96 |     % apply exclusion zone
 97 |     exc_zone_st = max(1, idx - exc_zone);
 98 |     exc_zone_ed = min(pro_len, idx + exc_zone);
 99 |     dist_pro(exc_zone_st:exc_zone_ed, :) = inf;
100 |     dist_pro(data_sig < eps) = inf;
101 |     if skip_loc(idx)
102 |         dist_pro = inf(size(dist_pro));
103 |     end
104 |     dist_pro(skip_loc, :) = inf;
105 | 
106 |     % figure out and store the nearest neighbor
107 |     dist_pro_sort = sort(dist_pro, 2);
108 |     dist_pro_cum = zeros(pro_len, 1);
109 |     dist_pro_merg = zeros(pro_len, 1);
110 |     for k = 1:n_dim
111 |         dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k);
112 |         dist_pro_merg(:) = dist_pro_cum / k;
113 |         update_idx = dist_pro_merg < pro_mul(:, k);
114 |         pro_mul(update_idx, k) = dist_pro_merg(update_idx);
115 |         pro_idx(update_idx, k) = idx;
116 |     end
117 | end
118 | % pro_mul = sqrt(pro_mul);
119 | 
120 | 
121 | %% The following two functions are modified from the code provided in the following URL
122 | %  http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
123 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len)
124 | data(data_len+1:(sub_len+data_len)) = 0;
125 | data_freq = fft(data);
126 | data_cum = cumsum(data);
127 | data2_cum =  cumsum(data.^2);
128 | data2_sum = data2_cum(sub_len:data_len) - ...
129 |     [0; data2_cum(1:data_len-sub_len)];
130 | data_sum = data_cum(sub_len:data_len) - ...
131 |     [0; data_cum(1:data_len-sub_len)];
132 | data_mu = data_sum./sub_len;
133 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2);
134 | data_sig2 = real(data_sig2);
135 | data_sig2 = max(data_sig2, 0);
136 | data_sig = sqrt(data_sig2);
137 | 
138 | function [dist_pro, last_prod] = mass(data_freq, query, ...
139 |     data_len, sub_len, data_mu, data_sig, query_mu, query_sig)
140 | % pre-process query for fft
141 | query = query(end:-1:1);
142 | query(sub_len+1:(sub_len+data_len)) = 0;
143 | 
144 | % compute the product
145 | query_freq = fft(query);
146 | product_freq = data_freq.*query_freq;
147 | product = ifft(product_freq);
148 | 
149 | % compute the distance profile
150 | dist_pro = 2 * (sub_len - ...
151 |     (product(sub_len:data_len) - sub_len*data_mu*query_mu)./...
152 |     (data_sig * query_sig));
153 | last_prod = real(product(sub_len:data_len));


--------------------------------------------------------------------------------
/MATLAB/mstamp_any_par.m:
--------------------------------------------------------------------------------
  1 | % STOMP Based mSTAMP with Parallelization (Parallel Computing Toolbox)
  2 | % Chin-Chia Michael Yeh
  3 | %
  4 | % [pro_mul, pro_idx] = mstamp_par(data, sub_len, n_work)
  5 | %
  6 | % Output:
  7 | %     pro_mul: multidimensional matrix profile (matrix)
  8 | %     pro_idx: matrix profile index (matrix)
  9 | % Input:
 10 | %     data: input time series (matrix)
 11 | %     sub_len: interested subsequence length (scalar)
 12 | %     n_work: number of walker for parfor (scalar)
 13 | %
 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
 16 | % https://sites.google.com/view/mstamp/
 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 18 | %
 19 | 
 20 | function [pro_mul, pro_idx] = ...
 21 |     mstamp_any_par(data, sub_len, pct_stop, n_work)
 22 | %% setup pool
 23 | if isempty(which('parpool'))
 24 |     if matlabpool('size') <= 0 %#ok<*DPOOL>
 25 |         matlabpool(n_work);
 26 |     elseif matlabpool('size')~= n_work
 27 |         matlabpool('close');
 28 |         matlabpool(n_work);
 29 |     end
 30 | else
 31 |     pool = gcp('nocreate');
 32 |     if isempty(gcp('nocreate'))
 33 |         parpool(n_work);
 34 |     elseif pool.NumWorkers ~= n_work
 35 |         delete(gcp('nocreate'));
 36 |         parpool(n_work);
 37 |     end
 38 | end
 39 | 
 40 | %% get various length
 41 | exc_zone = round(sub_len / 2);
 42 | data_len = size(data, 1);
 43 | n_dim = size(data, 2);
 44 | pro_len = data_len - sub_len + 1;
 45 | 
 46 | %% check input
 47 | if sub_len > data_len / 2
 48 |     error(['Error: Time series is too short relative to desired ' ...
 49 |         'subsequence length']);
 50 | end
 51 | if sub_len < 4
 52 |     error('Error: Subsequence length must be at least 4');
 53 | end
 54 | 
 55 | %% check skip position
 56 | skip_loc = false(pro_len, 1);
 57 | for i = 1:pro_len
 58 |     if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ...
 59 |             || any(isinf(reshape(data(i:i+sub_len-1, :), 1, [])))
 60 |         skip_loc(i) = true;
 61 |     end
 62 | end
 63 | data(isnan(data)) = 0;
 64 | data(isinf(data)) = 0;
 65 | 
 66 | %% initialization
 67 | data_freq = zeros((sub_len + data_len), n_dim);
 68 | data_mu = zeros(pro_len, n_dim);
 69 | data_sig = zeros(pro_len, n_dim);
 70 | for i = 1:n_dim
 71 |     [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ...
 72 |         mass_pre(data(:, i), data_len, sub_len);
 73 | end
 74 | 
 75 | %% initialize variable
 76 | idx = 1:pro_len;
 77 | idx(skip_loc) = [];
 78 | idx = idx(randperm(length(idx)));
 79 | itr_stop = round(length(idx) * pct_stop);
 80 | idx = idx(1:itr_stop);
 81 | per_work = round(length(idx) / n_work);
 82 | idx_work = cell(n_work, 1);
 83 | pro_muls = cell(n_work, 1);
 84 | pro_idxs = cell(n_work, 1);
 85 | for i = 1:n_work
 86 |     idx_st = (i - 1) * per_work + 1;
 87 |     if i == n_work
 88 |         idx_ed = length(idx);
 89 |     else
 90 |         idx_ed = i * per_work;
 91 |     end
 92 |     idx_work{i} = idx(idx_st:idx_ed);
 93 |     pro_muls{i} = inf(pro_len, n_dim);
 94 |     pro_idxs{i} = inf(pro_len, n_dim);
 95 | end
 96 | 
 97 | %% compute the matrix profile
 98 | parfor i = 1:n_work
 99 |     dist_pro = zeros(pro_len, n_dim);
100 | 
101 |     for j = 1:length(idx_work{i})
102 |         idx = idx_work{i}(j);
103 |         fprintf('%d-%d %d\n', i, j, length(idx_work{i}));
104 |         query = data(idx:idx+sub_len-1, :);
105 |         for k = 1:n_dim
106 |             [dist_pro(:, k), ~] = ...
107 |                 mass(data_freq(:, k), query(:, k), ...
108 |                 data_len, sub_len, data_mu(:, k), ...
109 |                 data_sig(:, k), data_mu(idx, k), ...
110 |                 data_sig(idx, k));
111 |         end
112 |         dist_pro = real(dist_pro);
113 |         dist_pro = max(dist_pro, 0);
114 |         dist_pro = sqrt(dist_pro);
115 |     
116 |         % apply exclusion zone
117 |         exc_zone_st = max(1, idx - exc_zone);
118 |         exc_zone_ed = min(pro_len, idx + exc_zone);
119 |         dist_pro(exc_zone_st:exc_zone_ed, :) = inf;
120 |         dist_pro(data_sig < eps) = inf;
121 |         if skip_loc(idx)
122 |             dist_pro = inf(size(dist_pro));
123 |         end
124 |         dist_pro(skip_loc, :) = inf;
125 | 
126 |         % figure out and store the nearest neighbor
127 |         dist_pro_sort = sort(dist_pro, 2);
128 |         dist_pro_cum = zeros(pro_len, 1);
129 |         dist_pro_merg = zeros(pro_len, 1);
130 |         for k = 1:n_dim
131 |             dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k);
132 |             dist_pro_merg(:) = dist_pro_cum / k;
133 |             update_idx = dist_pro_merg < pro_muls{i}(:, k);
134 |             pro_muls{i}(update_idx, k) = dist_pro_merg(update_idx);
135 |             pro_idxs{i}(update_idx, k) = idx;
136 |         end
137 |     end
138 | %     pro_muls{i} = sqrt(pro_muls{i});
139 | end
140 | 
141 | %% merge workers' result
142 | pro_mul = inf(pro_len, n_dim);
143 | pro_idx = inf(pro_len, n_dim);
144 | for i = 1:n_work
145 |     for j = 1:n_dim
146 |         update_idx = pro_muls{i}(:, j) < pro_mul(:, j);
147 |         pro_mul(update_idx, j) = pro_muls{i}(update_idx, j);
148 |         pro_idx(update_idx, j) = pro_idxs{i}(update_idx, j);
149 |     end
150 | end
151 | 
152 | %% The following two functions are modified from the code provided in the following URL
153 | %  http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
154 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len)
155 | data(data_len+1:(sub_len+data_len)) = 0;
156 | data_freq = fft(data);
157 | data_cum = cumsum(data);
158 | data2_cum =  cumsum(data.^2);
159 | data2_sum = data2_cum(sub_len:data_len) - ...
160 |     [0; data2_cum(1:data_len-sub_len)];
161 | data_sum = data_cum(sub_len:data_len) - ...
162 |     [0; data_cum(1:data_len-sub_len)];
163 | data_mu = data_sum./sub_len;
164 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2);
165 | data_sig2 = real(data_sig2);
166 | data_sig2 = max(data_sig2, 0);
167 | data_sig = sqrt(data_sig2);
168 | 
169 | function [dist_pro, last_prod] = mass(data_freq, query, ...
170 |     data_len, sub_len, data_mu, data_sig, query_mu, query_sig)
171 | % proprocess query for fft
172 | query = query(end:-1:1);
173 | query(sub_len+1:(sub_len+data_len)) = 0;
174 | 
175 | % compute the product
176 | query_freq = fft(query);
177 | product_freq = data_freq.*query_freq;
178 | product = ifft(product_freq);
179 | 
180 | % compute the distance profile
181 | dist_pro = 2 * (sub_len - ...
182 |     (product(sub_len:data_len) - sub_len*data_mu*query_mu)./...
183 |     (data_sig * query_sig));
184 | last_prod = real(product(sub_len:data_len));


--------------------------------------------------------------------------------
/MATLAB/mstamp_par.m:
--------------------------------------------------------------------------------
  1 | % STOMP Based mSTAMP with Parallelization (Parallel Computing Toolbox)
  2 | % Chin-Chia Michael Yeh
  3 | %
  4 | % [pro_mul, pro_idx] = mstamp_par(data, sub_len, n_work)
  5 | %
  6 | % Output:
  7 | %     pro_mul: multidimensional matrix profile (matrix)
  8 | %     pro_idx: matrix profile index (matrix)
  9 | % Input:
 10 | %     data: input time series (matrix)
 11 | %     sub_len: interested subsequence length (scalar)
 12 | %     n_work: number of walker for parfor (scalar)
 13 | %
 14 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 15 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
 16 | % https://sites.google.com/view/mstamp/
 17 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 18 | %
 19 | 
 20 | function [pro_mul, pro_idx] = ...
 21 |     mstamp_par(data, sub_len, n_work)
 22 | %% setup pool
 23 | if isempty(which('parpool'))
 24 |     if matlabpool('size') <= 0 %#ok<*DPOOL>
 25 |         matlabpool(n_work);
 26 |     elseif matlabpool('size')~= n_work
 27 |         matlabpool('close');
 28 |         matlabpool(n_work);
 29 |     end
 30 | else
 31 |     pool = gcp('nocreate');
 32 |     if isempty(gcp('nocreate'))
 33 |         parpool(n_work);
 34 |     elseif pool.NumWorkers ~= n_work
 35 |         delete(gcp('nocreate'));
 36 |         parpool(n_work);
 37 |     end
 38 | end
 39 | 
 40 | %% get various length
 41 | exc_zone = round(sub_len / 2);
 42 | data_len = size(data, 1);
 43 | n_dim = size(data, 2);
 44 | pro_len = data_len - sub_len + 1;
 45 | 
 46 | %% check input
 47 | if sub_len > data_len / 2
 48 |     error(['Error: Time series is too short relative to desired ' ...
 49 |         'subsequence length']);
 50 | end
 51 | if sub_len < 4
 52 |     error('Error: Subsequence length must be at least 4');
 53 | end
 54 | 
 55 | %% check skip position
 56 | skip_loc = false(pro_len, 1);
 57 | for i = 1:pro_len
 58 |     if any(isnan(reshape(data(i:i+sub_len-1, :), 1, []))) ...
 59 |             || any(isinf(reshape(data(i:i+sub_len-1, :), 1, [])))
 60 |         skip_loc(i) = true;
 61 |     end
 62 | end
 63 | data(isnan(data)) = 0;
 64 | data(isinf(data)) = 0;
 65 | 
 66 | %% initialization
 67 | data_freq = zeros((sub_len + data_len), n_dim);
 68 | data_mu = zeros(pro_len, n_dim);
 69 | data_sig = zeros(pro_len, n_dim);
 70 | first_prod = zeros(pro_len, n_dim);
 71 | for i = 1:n_dim
 72 |     [data_freq(:, i), data_mu(:, i), data_sig(:, i)] = ...
 73 |         mass_pre(data(:, i), data_len, sub_len);
 74 |     [~, first_prod(:, i)] = mass(...
 75 |         data_freq(:, i), data(1:sub_len, i), data_len, ...
 76 |         sub_len, data_mu(:, i), data_sig(:, i), ...
 77 |         data_mu(1, i), data_sig(1, i));
 78 | end
 79 | 
 80 | %% initialize variable
 81 | per_work = round(pro_len / n_work);
 82 | idx_work = cell(n_work, 1);
 83 | pro_muls = cell(n_work, 1);
 84 | pro_idxs = cell(n_work, 1);
 85 | for i = 1:n_work
 86 |     idx_st = (i - 1) * per_work + 1;
 87 |     if i == n_work
 88 |         idx_ed = pro_len;
 89 |     else
 90 |         idx_ed = i * per_work;
 91 |     end
 92 |     idx_work{i} = idx_st:idx_ed;
 93 |     pro_muls{i} = zeros(length(idx_work{i}), n_dim);
 94 |     pro_idxs{i} = zeros(length(idx_work{i}), n_dim);
 95 | end
 96 | 
 97 | %% compute the matrix profile
 98 | parfor i = 1:n_work
 99 |     dist_pro = zeros(pro_len, n_dim);
100 |     last_prod = zeros(pro_len, n_dim);
101 |     drop_val = zeros(1, n_dim);
102 | 
103 |     for j = 1:length(idx_work{i})
104 |         idx = idx_work{i}(j);
105 |         fprintf('%d-%d %d\n', i, j, length(idx_work{i}));
106 |         query = data(idx:idx+sub_len-1, :);
107 |         if j == 1
108 |             for k = 1:n_dim
109 |                 [dist_pro(:, k), last_prod(:, k)] = ...
110 |                     mass(data_freq(:, k), query(:, k), ...
111 |                     data_len, sub_len, data_mu(:, k), ...
112 |                     data_sig(:, k), data_mu(idx, k), ...
113 |                     data_sig(idx, k));
114 |             end
115 |         else
116 |             last_prod(2:data_len - sub_len + 1, :) = ...
117 |                 last_prod(1:data_len - sub_len, :) ...
118 |                 - data(1:data_len - sub_len, :) ...
119 |                 .* repmat(drop_val, pro_len - 1, 1) ...
120 |                 + data(sub_len + 1:data_len, :) ...
121 |                 .* repmat(query(sub_len, :), pro_len - 1, 1);
122 |             last_prod(1, :) = first_prod(idx, :);
123 |             dist_pro = 2 * (sub_len - (last_prod ...
124 |                 - sub_len * data_mu .* repmat(data_mu(idx, :), pro_len, 1)) ...
125 |                 ./ (data_sig .* repmat(data_sig(idx, :), pro_len, 1)));
126 |         end
127 |         dist_pro = real(dist_pro);
128 |         dist_pro = max(dist_pro, 0);
129 |         dist_pro = sqrt(dist_pro);
130 |         drop_val = query(1, :);
131 | 
132 |         % apply exclusion zone
133 |         exc_zone_st = max(1, idx - exc_zone);
134 |         exc_zone_ed = min(pro_len, idx + exc_zone);
135 |         dist_pro(exc_zone_st:exc_zone_ed, :) = inf;
136 |         dist_pro(data_sig < eps) = inf;
137 |         if skip_loc(idx)
138 |             dist_pro = inf(size(dist_pro));
139 |         end
140 |         dist_pro(skip_loc, :) = inf;
141 | 
142 |         % figure out and store the nearest neighbor
143 |         dist_pro_sort = sort(dist_pro, 2);
144 |         dist_pro_cum = zeros(pro_len, 1);
145 |         dist_pro_merg = zeros(pro_len, 1);
146 |         for k = 1:n_dim
147 |             dist_pro_cum = dist_pro_cum + dist_pro_sort(:, k);
148 |             dist_pro_merg(:) = dist_pro_cum / k;
149 |             [min_val, min_idx] = min(dist_pro_merg);
150 |             pro_muls{i}(j, k) = min_val;
151 |             pro_idxs{i}(j, k) = min_idx;
152 |         end
153 |     end
154 | %     pro_muls{i} = sqrt(pro_muls{i});
155 | end
156 | 
157 | %% merge workers' result
158 | pro_mul = zeros(pro_len, n_dim);
159 | pro_idx = zeros(pro_len, n_dim);
160 | for i = 1:n_work
161 |     pro_idx(idx_work{i}, :) = pro_idxs{i};
162 |     pro_mul(idx_work{i}, :) = pro_muls{i};
163 | end
164 | 
165 | %% The following two functions are modified from the code provided in the following URL
166 | %  http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
167 | function [data_freq, data_mu, data_sig] = mass_pre(data, data_len, sub_len)
168 | data(data_len+1:(sub_len+data_len)) = 0;
169 | data_freq = fft(data);
170 | data_cum = cumsum(data);
171 | data2_cum =  cumsum(data.^2);
172 | data2_sum = data2_cum(sub_len:data_len) - ...
173 |     [0; data2_cum(1:data_len-sub_len)];
174 | data_sum = data_cum(sub_len:data_len) - ...
175 |     [0; data_cum(1:data_len-sub_len)];
176 | data_mu = data_sum./sub_len;
177 | data_sig2 = (data2_sum./sub_len)-(data_mu.^2);
178 | data_sig2 = real(data_sig2);
179 | data_sig2 = max(data_sig2, 0);
180 | data_sig = sqrt(data_sig2);
181 | 
182 | function [dist_pro, last_prod] = mass(data_freq, query, ...
183 |     data_len, sub_len, data_mu, data_sig, query_mu, query_sig)
184 | % pre-process query for fft
185 | query = query(end:-1:1);
186 | query(sub_len+1:(sub_len+data_len)) = 0;
187 | 
188 | % compute the product
189 | query_freq = fft(query);
190 | product_freq = data_freq.*query_freq;
191 | product = ifft(product_freq);
192 | 
193 | % compute the distance profile
194 | dist_pro = 2 * (sub_len - ...
195 |     (product(sub_len:data_len) - sub_len*data_mu*query_mu)./...
196 |     (data_sig * query_sig));
197 | last_prod = real(product(sub_len:data_len));


--------------------------------------------------------------------------------
/MATLAB/plot_motif_on_data.m:
--------------------------------------------------------------------------------
 1 | % Plot the Motifs on the data
 2 | % Chin-Chia Michael Yeh
 3 | %
 4 | % plot_motif_on_data(data, sub_len, motif_idx, motif_dim)
 5 | %
 6 | % Input:
 7 | %     data: input time series (matrix)
 8 | %     sub_len: interested subsequence length (scalar)
 9 | %     motif_idx: the index for the founded motifs (matrix)
10 | %     motif_dim: the dimensions spanned by the found motifs (cell)
11 | %
12 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
13 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
14 | % https://sites.google.com/view/mstamp/
15 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
16 | %
17 | 
18 | function plot_motif_on_data(data, sub_len, motif_idx, motif_dim)
19 | figure();
20 | ax = axes();
21 | hold(ax, 'on');
22 | 
23 | %% plot the data
24 | for i = 1:size(data, 2)
25 |     data(:, i) = data(:, i) - min(data(:, i));
26 |     data(:, i) = data(:, i) / max(data(:, i));
27 |     data(:, i) = data(:, i) + (i - 1) * 1.1;
28 |     plot(data(:, i), 'color', 'k');
29 | end
30 | 
31 | 
32 | for i = 1:length(motif_idx)
33 |     for k = 1:length(motif_dim{i})
34 |         motif_location = motif_idx(i):motif_idx(i) + sub_len - 1;
35 |         motif = data(motif_location, motif_dim{i}(k));
36 |         plot(motif_location, motif, 'color', 'r');
37 |     end
38 | end
39 | 
40 | hold(ax, 'off');


--------------------------------------------------------------------------------
/MATLAB/toy_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcyeh/mstamp/b40961528493d369bf03d8b0fbc4be48201b2594/MATLAB/toy_data.mat


--------------------------------------------------------------------------------
/MATLAB/unconstrain_search.m:
--------------------------------------------------------------------------------
 1 | % MDL Based Motif Discovery for Multidimensional Matrix Profile
 2 | % Chin-Chia Michael Yeh
 3 | %
 4 | % [motif_idx, motif_dim] = unconstrain_search(...
 5 | %     data, sub_len, pro_mul, pro_idx, n_bit, k)
 6 | %
 7 | % Output:
 8 | %     motif_idx: the index for the founded motifs (matrix)
 9 | %     motif_dim: the dimensions spanned by the found motifs (cell)
10 | % Input:
11 | %     data: input time series (matrix)
12 | %     sub_len: interested subsequence length (scalar)
13 | %     pro_mul: multidimensional matrix profile (matrix)
14 | %     pro_idx: matrix profile index (matrix)
15 | %     n_bit: number of bit for discretization (scalar)
16 | %     k: number of motif wish to retrieve, set to inf for retrieving
17 | %        all possible k-motifs (scalar)
18 | %
19 | % C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
20 | % Multidimensional Motif Discovery," IEEE ICDM 2017.
21 | % https://sites.google.com/view/mstamp/
22 | % http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
23 | %
24 | 
25 | function [motif_idx, motif_dim] = unconstrain_search(...
26 |     data, sub_len, pro_mul, pro_idx, n_bit, k)
27 | exc_zone = round(0.5 * sub_len);
28 | tot_dim = size(data, 2);
29 | if isinf(k)
30 |     k = size(pro_mul, 1);
31 | end
32 | motif_idx = zeros(k, 1);
33 | motif_dim = cell(k, 1);
34 | base_bit = n_bit * tot_dim * sub_len * 2;
35 | for i = 1:k
36 |     fprintf('finding motif %d ... \n', i);
37 |     [val, idx_1] = min(pro_mul, [], 1);
38 |     if any(isinf(val))
39 |         motif_idx = motif_idx(1:k-1);
40 |         motif_dim = motif_dim(1:k-1);
41 |         break;
42 |     end
43 | 
44 |     bit_sz = zeros(tot_dim, 1);
45 |     idx_2 = zeros(tot_dim, 1);
46 |     dim = cell(tot_dim, 1);
47 |     for j = 1:tot_dim
48 |         idx_2(j) = pro_idx(idx_1(j), j);
49 |         motif_1 = data(idx_1(j):idx_1(j) + sub_len - 1, :);
50 |         motif_2 = data(idx_2(j):idx_2(j) + sub_len - 1, :);
51 |         [bit_sz(j), dim{j}] = get_bit_save(motif_1, motif_2, j, n_bit);
52 |     end
53 |     [best_bit, min_idx] = min(bit_sz);
54 |     if best_bit > base_bit
55 |         motif_idx = motif_idx(1:k-1);
56 |         motif_dim = motif_dim(1:k-1);
57 |         break;
58 |     end
59 |     motif_idx(i, 1) = idx_1(min_idx);
60 |     motif_dim{i} = dim{min_idx};
61 | 
62 |     st_idx = max(1, motif_idx(i, 1) - exc_zone);
63 |     ed_idx = min(size(pro_mul, 1), motif_idx(i, 1) + exc_zone);
64 |     pro_mul(st_idx:ed_idx, :) = inf;
65 | end
66 | motif_dim = motif_dim(motif_idx ~= 0);
67 | motif_idx = motif_idx(motif_idx ~= 0);
68 | 
69 | 
70 | function [bit_sz, dim_id] = get_bit_save(motif_1, motif_2, n_dim, n_bit)
71 | tot_dim = size(motif_1, 2);
72 | sub_len = size(motif_1, 1);
73 | split_pt = get_desc_split_pt(n_bit);
74 | disc_1 = discretization(motif_1, split_pt);
75 | disc_2 = discretization(motif_2, split_pt);
76 | 
77 | [~, dim_id] = sort(sum(abs(disc_1 - disc_2), 1), 'ascend');
78 | dim_id = dim_id(1:n_dim);
79 | motif_diff = disc_1(:, dim_id) - disc_2(:, dim_id);
80 | n_val = length(unique(motif_diff));
81 | 
82 | bit_sz = n_bit * (tot_dim * sub_len * 2 - n_dim * sub_len);
83 | bit_sz = bit_sz + n_dim * sub_len * log2(n_val) + n_val * n_bit;
84 | 
85 | 
86 | function disc = discretization(motif, split_pt)
87 | for i = 1:size(motif, 2)
88 |     motif(:, i) = (motif(:, i) - mean(motif(:, i))) / ...
89 |         std(motif(:, i), 1);
90 | end
91 | disc = zeros(size(motif));
92 | for i = 1:length(split_pt)
93 |     disc(motif < split_pt(i) & disc == 0) = i;
94 | end
95 | disc(disc == 0) = length(split_pt) + 1;
96 | 
97 | 
98 | function split_pt = get_desc_split_pt(n_bit)
99 | split_pt = norminv((1:(2^n_bit)-1)/(2^n_bit), 0, 1);


--------------------------------------------------------------------------------
/Python/README.txt:
--------------------------------------------------------------------------------
1 | See demo.py for examples of using the functions
2 | 
3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
4 | Multidimensional Motif Discovery," IEEE ICDM 2017.
5 | https://sites.google.com/view/mstamp/
6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
7 | 


--------------------------------------------------------------------------------
/Python/demo.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | @author: Michael Yeh
 4 | 
 5 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 6 | Multidimensional Motif Discovery," IEEE ICDM 2017.
 7 | https://sites.google.com/view/mstamp/
 8 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 9 | """
10 | 
11 | import scipy.io as sio
12 | import matplotlib.pyplot as plt
13 | from mstamp_stomp import mstamp as mstamp_stomp
14 | from mstamp_stamp import mstamp as mstamp_stamp
15 | 
16 | 
17 | def plot_motifs(matrix_profile, dimensionality=1):
18 |     motif_at = matrix_profile[dimensionality - 1, :].argsort()[:2]
19 | 
20 |     plt.figure(figsize=(14, 7))
21 |     for i in range(3):
22 |         plt.subplot(4, 1, i + 1)
23 |         plt.plot(data.T[i, :])
24 |         plt.title('$T_{}$'.format(i + 1))
25 |         for m in motif_at:
26 |             plt.plot(range(m, m + sub_len), data.T[i, :][m:m + sub_len], c='r')
27 |         plt.xlim((0, matrix_profile.shape[1]))
28 | 
29 |     plt.subplot(414)
30 |     plt.title('{}-dimensional Matrix Profile'.format(dimensionality))
31 |     plt.plot(matrix_profile[dimensionality - 1, :])
32 |     for m in motif_at:
33 |         plt.axvline(m, c='r')
34 |     plt.xlim((0, matrix_profile.shape[1]))
35 |     plt.tight_layout()
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     mat = sio.loadmat('toy_data.mat')
40 |     data = mat['data']
41 |     sub_len = mat['sub_len'][0][0]
42 | 
43 |     # using the stomp based method to compute the multidimensional matrix
44 |     # profile
45 |     mat_pro_1, pro_idx_1 = mstamp_stomp(data.T, sub_len,
46 |                                         return_dimension=False)
47 | 
48 |     # plot the matrix profile as image
49 |     plt.figure()
50 |     plt.title('Matrix Profile (STOMP)')
51 |     plt.imshow(mat_pro_1, extent=[0, 1, 0, 1])
52 | 
53 |     # using the stamp based method to compute the multidimensional matrix
54 |     # profile
55 |     mat_pro_2, pro_idx_2 = mstamp_stamp(data.T, sub_len,
56 |                                         return_dimension=False)
57 | 
58 |     # plot the matrix profile as image
59 |     plt.figure()
60 |     plt.title('Matrix Profile (STAMP)')
61 |     plt.imshow(mat_pro_2, extent=[0, 1, 0, 1])
62 | 
63 |     plot_motifs(mat_pro_2)
64 | 
65 |     # the function can also be used to compute the 1D matrix profile
66 |     mat_pro_3, _ = mstamp_stomp(data[:, 1].T, sub_len,
67 |                                 return_dimension=False)
68 |     plt.figure()
69 |     plt.plot(mat_pro_3[0, :])
70 | 
71 |     mat_pro_4, _ = mstamp_stamp(data[:, 1].T, sub_len,
72 |                                 return_dimension=False)
73 |     plt.figure()
74 |     plt.plot(mat_pro_4[0, :])
75 | 
76 |     plt.show()
77 | 


--------------------------------------------------------------------------------
/Python/mstamp_stamp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Michael Yeh
  4 | """
  5 | 
  6 | from __future__ import print_function
  7 | import time
  8 | import numpy as np
  9 | 
 10 | 
 11 | _EPS = 1e-14
 12 | 
 13 | 
 14 | def mstamp(seq, sub_len, return_dimension=False):
 15 |     """ multidimensional matrix profile with mSTAMP (stamp based)
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     seq : numpy matrix, shape (n_dim, seq_len)
 20 |         input sequence
 21 |     sub_len : int
 22 |         subsequence length
 23 |     return_dimension : bool
 24 |         if True, also return the matrix profile dimension. It takses O(d^2 n)
 25 |         to store and O(d^2 n^2) to compute. (default is False)
 26 | 
 27 |     Returns
 28 |     -------
 29 |     matrix_profile : numpy matrix, shape (n_dim, sub_num)
 30 |         matrix profile
 31 |     profile_index : numpy matrix, shape (n_dim, sub_num)
 32 |         matrix profile index
 33 |     profile_dimension : list, optional, shape (n_dim)
 34 |         matrix profile dimension, this is only returned when return_dimension
 35 |         is True
 36 | 
 37 |     Notes
 38 |     -----
 39 |     C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 40 |     Multidimensional Motif Discovery," IEEE ICDM 2017.
 41 |     https://sites.google.com/view/mstamp/
 42 |     http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 43 |     """
 44 |     if sub_len < 4:
 45 |         raise RuntimeError('Subsequence length (sub_len) must be at least 4')
 46 |     exc_zone = sub_len // 2
 47 |     seq = np.array(seq, dtype=float, copy=True)
 48 | 
 49 |     if seq.ndim == 1:
 50 |         seq = np.expand_dims(seq, axis=0)
 51 | 
 52 |     seq_len = seq.shape[1]
 53 |     sub_num = seq.shape[1] - sub_len + 1
 54 |     n_dim = seq.shape[0]
 55 |     skip_loc = np.zeros(sub_num, dtype=bool)
 56 |     for i in range(sub_num):
 57 |         if not np.all(np.isfinite(seq[:, i:i + sub_len])):
 58 |             skip_loc[i] = True
 59 |     seq[~np.isfinite(seq)] = 0
 60 | 
 61 |     matrix_profile = np.empty((n_dim, sub_num))
 62 |     matrix_profile[:] = np.inf
 63 |     profile_index = -np.ones((n_dim, sub_num), dtype=int)
 64 |     seq_freq = np.empty((n_dim, seq_len * 2), dtype=np.complex128)
 65 |     seq_mu = np.empty((n_dim, sub_num))
 66 |     seq_sig = np.empty((n_dim, sub_num))
 67 |     if return_dimension:
 68 |         profile_dimension = []
 69 |         for i in range(n_dim):
 70 |             profile_dimension.append(np.empty((i + 1, sub_num), dtype=int))
 71 |     for i in range(n_dim):
 72 |         seq_freq[i, :], seq_mu[i, :], seq_sig[i, :] = \
 73 |             _mass_pre(seq[i, :], sub_len)
 74 | 
 75 |     dist_profile = np.empty((n_dim, sub_num))
 76 |     que_sig = np.empty(n_dim)
 77 |     tic = time.time()
 78 |     for i in range(sub_num):
 79 |         cur_prog = (i + 1) / sub_num
 80 |         time_left = ((time.time() - tic) / (i + 1)) * (sub_num - i - 1)
 81 |         print('\rProgress [{0:<50s}] {1:5.1f}% {2:8.1f} sec'
 82 |               .format('#' * int(cur_prog * 50),
 83 |                       cur_prog * 100, time_left), end="")
 84 |         for j in range(n_dim):
 85 |             que = seq[j, i:i + sub_len]
 86 |             dist_profile[j, :], que_sig[j] = _mass(
 87 |                 seq_freq[j, :], que, seq_len, sub_len,
 88 |                 seq_mu[j, :], seq_sig[j, :])
 89 | 
 90 |         if skip_loc[i] or np.any(que_sig < _EPS):
 91 |             continue
 92 | 
 93 |         exc_zone_st = max(0, i - exc_zone)
 94 |         exc_zone_ed = min(sub_num, i + exc_zone)
 95 |         dist_profile[:, exc_zone_st:exc_zone_ed] = np.inf
 96 |         dist_profile[:, skip_loc] = np.inf
 97 |         dist_profile[seq_sig < _EPS] = np.inf
 98 |         dist_profile = np.sqrt(dist_profile)
 99 | 
100 |         dist_profile_dim = np.argsort(dist_profile, axis=0)
101 |         dist_profile_sort = np.sort(dist_profile, axis=0)
102 |         dist_profile_cumsum = np.zeros(sub_num)
103 |         for j in range(n_dim):
104 |             dist_profile_cumsum += dist_profile_sort[j, :]
105 |             dist_profile_mean = dist_profile_cumsum / (j + 1)
106 |             update_pos = dist_profile_mean < matrix_profile[j, :]
107 |             profile_index[j, update_pos] = i
108 |             matrix_profile[j, update_pos] = dist_profile_mean[update_pos]
109 |             if return_dimension:
110 |                 profile_dimension[j][:, update_pos] = \
111 |                     dist_profile_dim[:j + 1, update_pos]
112 | 
113 |     # matrix_profile = np.sqrt(matrix_profile)
114 |     if return_dimension:
115 |         return matrix_profile, profile_index, profile_dimension
116 |     else:
117 |         return matrix_profile, profile_index,
118 | 
119 | 
120 | def _mass_pre(seq, sub_len):
121 |     """ pre-computation for iterative call to MASS
122 | 
123 |     Parameters
124 |     ----------
125 |     seq : numpy array
126 |         input sequence
127 |     sub_len : int
128 |         subsequence length
129 | 
130 |     Returns
131 |     -------
132 |     seq_freq : numpy array
133 |         sequence in frequency domain
134 |     seq_mu : numpy array
135 |         each subsequence's mu (mean)
136 |     seq_sig : numpy array
137 |         each subsequence's sigma (standard deviation)
138 | 
139 |     Notes
140 |     -----
141 |     This functions is modified from the code provided in the following URL
142 |     http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
143 |     """
144 |     seq_len = len(seq)
145 |     seq_pad = np.zeros(seq_len * 2)
146 |     seq_pad[0:seq_len] = seq
147 |     seq_freq = np.fft.fft(seq_pad)
148 |     seq_cum = np.cumsum(seq_pad)
149 |     seq_sq_cum = np.cumsum(np.square(seq_pad))
150 |     seq_sum = (seq_cum[sub_len - 1:seq_len] -
151 |                np.concatenate(([0], seq_cum[0:seq_len - sub_len])))
152 |     seq_sq_sum = (seq_sq_cum[sub_len - 1:seq_len] -
153 |                   np.concatenate(([0], seq_sq_cum[0:seq_len - sub_len])))
154 |     seq_mu = seq_sum / sub_len
155 |     seq_sig_sq = seq_sq_sum / sub_len - np.square(seq_mu)
156 |     seq_sig = np.sqrt(seq_sig_sq)
157 |     return seq_freq, seq_mu, seq_sig
158 | 
159 | 
160 | def _mass(seq_freq, que, seq_len, sub_len, seq_mu, seq_sig):
161 |     """ iterative call of MASS
162 | 
163 |     Parameters
164 |     ----------
165 |     seq_freq : numpy array
166 |         sequence in frequency domain
167 |     que : numpy array
168 |         query
169 |     seq_len : int
170 |         sequence length
171 |     sub_len : int
172 |         subsequence length
173 |     seq_mu : numpy array
174 |         each subsequence's mu (mean)
175 |     seq_sig : numpy array
176 |         each subsequence's sigma (standard deviation)
177 | 
178 |     Returns
179 |     -------
180 |     dist_profile : numpy array
181 |         distance profile
182 |     que_sig : float64
183 |         query's sigma (standard deviation)
184 | 
185 |     Notes
186 |     -----
187 |     This functions is modified from the code provided in the following URL
188 |     http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
189 |     """
190 |     que = que[::-1]
191 |     que_pad = np.zeros(seq_len * 2)
192 |     que_pad[0:sub_len] = que
193 |     que_freq = np.fft.fft(que_pad)
194 |     product_freq = seq_freq * que_freq
195 |     product = np.fft.ifft(product_freq)
196 |     product = np.real(product)
197 | 
198 |     que_sum = np.sum(que)
199 |     que_sq_sum = np.sum(np.square(que))
200 |     que_mu = que_sum / sub_len
201 |     que_sig_sq = que_sq_sum / sub_len - que_mu**2
202 |     if que_sig_sq < _EPS:
203 |         que_sig_sq = _EPS
204 |     que_sig = np.sqrt(que_sig_sq)
205 | 
206 |     dist_profile = (2 * (sub_len - (product[sub_len - 1:seq_len] -
207 |                                     sub_len * seq_mu * que_mu) /
208 |                          (seq_sig * que_sig)))
209 |     return dist_profile, que_sig
210 | 


--------------------------------------------------------------------------------
/Python/mstamp_stomp.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | @author: Michael Yeh
  4 | """
  5 | 
  6 | from __future__ import print_function
  7 | import time
  8 | import numpy as np
  9 | 
 10 | 
 11 | _EPS = 1e-14
 12 | 
 13 | 
 14 | def mstamp(seq, sub_len, return_dimension=False):
 15 |     """ multidimensional matrix profile with mSTAMP (stomp based)
 16 | 
 17 |     Parameters
 18 |     ----------
 19 |     seq : numpy matrix, shape (n_dim, seq_len)
 20 |         input sequence
 21 |     sub_len : int
 22 |         subsequence length
 23 |     return_dimension : bool
 24 |         if True, also return the matrix profile dimension. It takses O(d^2 n)
 25 |         to store and O(d^2 n^2) to compute. (default is False)
 26 | 
 27 |     Returns
 28 |     -------
 29 |     matrix_profile : numpy matrix, shape (n_dim, sub_num)
 30 |         matrix profile
 31 |     profile_index : numpy matrix, shape (n_dim, sub_num)
 32 |         matrix profile index
 33 |     profile_dimension : list, optional, shape (n_dim)
 34 |         matrix profile dimension, this is only returned when return_dimension
 35 |         is True
 36 | 
 37 |     Notes
 38 |     -----
 39 |     C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
 40 |     Multidimensional Motif Discovery," IEEE ICDM 2017.
 41 |     https://sites.google.com/view/mstamp/
 42 |     http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
 43 |     """
 44 |     if sub_len < 4:
 45 |         raise RuntimeError('Subsequence length (sub_len) must be at least 4')
 46 |     exc_zone = sub_len // 2
 47 |     seq = np.array(seq, dtype=float, copy=True)
 48 | 
 49 |     if seq.ndim == 1:
 50 |         seq = np.expand_dims(seq, axis=0)
 51 | 
 52 |     seq_len = seq.shape[1]
 53 |     sub_num = seq.shape[1] - sub_len + 1
 54 |     n_dim = seq.shape[0]
 55 |     skip_loc = np.zeros(sub_num, dtype=bool)
 56 |     for i in range(sub_num):
 57 |         if not np.all(np.isfinite(seq[:, i:i + sub_len])):
 58 |             skip_loc[i] = True
 59 |     seq[~np.isfinite(seq)] = 0
 60 | 
 61 |     drop_val = 0
 62 |     matrix_profile = np.empty((n_dim, sub_num))
 63 |     matrix_profile[:] = np.inf
 64 |     profile_index = -np.ones((n_dim, sub_num), dtype=int)
 65 |     seq_freq = np.empty((n_dim, seq_len * 2), dtype=np.complex128)
 66 |     seq_mu = np.empty((n_dim, sub_num))
 67 |     seq_sig = np.empty((n_dim, sub_num))
 68 |     if return_dimension:
 69 |         profile_dimension = []
 70 |         for i in range(n_dim):
 71 |             profile_dimension.append(np.empty((i + 1, sub_num), dtype=int))
 72 |     for i in range(n_dim):
 73 |         seq_freq[i, :], seq_mu[i, :], seq_sig[i, :] = \
 74 |             _mass_pre(seq[i, :], sub_len)
 75 | 
 76 |     dist_profile = np.empty((n_dim, sub_num))
 77 |     last_product = np.empty((n_dim, sub_num))
 78 |     first_product = np.empty((n_dim, sub_num))
 79 |     drop_val = np.empty(n_dim)
 80 |     que_sum = np.empty(n_dim)
 81 |     que_sq_sum = np.empty(n_dim)
 82 |     que_sig = np.empty(n_dim)
 83 |     tic = time.time()
 84 |     for i in range(sub_num):
 85 |         cur_prog = (i + 1) / sub_num
 86 |         time_left = ((time.time() - tic) / (i + 1)) * (sub_num - i - 1)
 87 |         print('\rProgress [{0:<50s}] {1:5.1f}% {2:8.1f} sec'
 88 |               .format('#' * int(cur_prog * 50),
 89 |                       cur_prog * 100, time_left), end="")
 90 |         for j in range(n_dim):
 91 |             que = seq[j, i:i + sub_len]
 92 |             if i == 0:
 93 |                 (dist_profile[j, :], last_product[j, :],
 94 |                  que_sum[j], que_sq_sum[j], que_sig[j]) = \
 95 |                     _mass(seq_freq[j, :], que, seq_len, sub_len,
 96 |                           seq_mu[j, :], seq_sig[j, :])
 97 |                 first_product[j, :] = last_product[j, :].copy()
 98 |             else:
 99 |                 que_sum[j] = que_sum[j] - drop_val[j] + que[-1]
100 |                 que_sq_sum[j] = que_sq_sum[j] - drop_val[j]**2 + que[-1]**2
101 |                 que_mu = que_sum[j] / sub_len
102 |                 que_sig_sq = que_sq_sum[j] / sub_len - que_mu**2
103 |                 if que_sig_sq < _EPS:
104 |                     que_sig_sq = _EPS
105 |                 que_sig[j] = np.sqrt(que_sig_sq)
106 |                 last_product[j, 1:] = (last_product[j, 0:-1] -
107 |                                        seq[j, 0:seq_len - sub_len] *
108 |                                        drop_val[j] +
109 |                                        seq[j, sub_len:seq_len] * que[-1])
110 |                 last_product[j, 0] = first_product[j, i]
111 |                 dist_profile[j, :] = \
112 |                     (2 * (sub_len - (last_product[j, :] -
113 |                                      sub_len * seq_mu[j, :] * que_mu) /
114 |                           (seq_sig[j, :] * que_sig[j])))
115 |                 dist_profile[j, dist_profile[j, :] < _EPS] = 0
116 |             drop_val[j] = que[0]
117 | 
118 |         if skip_loc[i] or np.any(que_sig < _EPS):
119 |             continue
120 | 
121 |         exc_zone_st = max(0, i - exc_zone)
122 |         exc_zone_ed = min(sub_num, i + exc_zone)
123 |         dist_profile[:, exc_zone_st:exc_zone_ed] = np.inf
124 |         dist_profile[:, skip_loc] = np.inf
125 |         dist_profile[seq_sig < _EPS] = np.inf
126 |         dist_profile = np.sqrt(dist_profile)
127 | 
128 |         dist_profile_dim = np.argsort(dist_profile, axis=0)
129 |         dist_profile_sort = np.sort(dist_profile, axis=0)
130 |         dist_profile_cumsum = np.zeros(sub_num)
131 |         for j in range(n_dim):
132 |             dist_profile_cumsum += dist_profile_sort[j, :]
133 |             dist_profile_mean = dist_profile_cumsum / (j + 1)
134 |             update_pos = dist_profile_mean < matrix_profile[j, :]
135 |             profile_index[j, update_pos] = i
136 |             matrix_profile[j, update_pos] = dist_profile_mean[update_pos]
137 |             if return_dimension:
138 |                 profile_dimension[j][:, update_pos] = \
139 |                     dist_profile_dim[:j + 1, update_pos]
140 | 
141 |     # matrix_profile = np.sqrt(matrix_profile)
142 |     if return_dimension:
143 |         return matrix_profile, profile_index, profile_dimension
144 |     else:
145 |         return matrix_profile, profile_index,
146 | 
147 | 
148 | def _mass_pre(seq, sub_len):
149 |     """ pre-computation for iterative call to MASS
150 | 
151 |     Parameters
152 |     ----------
153 |     seq : numpy array
154 |         input sequence
155 |     sub_len : int
156 |         subsequence length
157 | 
158 |     Returns
159 |     -------
160 |     seq_freq : numpy array
161 |         sequence in frequency domain
162 |     seq_mu : numpy array
163 |         each subsequence's mu (mean)
164 |     seq_sig : numpy array
165 |         each subsequence's sigma (standard deviation)
166 | 
167 |     Notes
168 |     -----
169 |     This functions is modified from the code provided in the following URL
170 |     http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
171 |     """
172 |     seq_len = len(seq)
173 |     seq_pad = np.zeros(seq_len * 2)
174 |     seq_pad[0:seq_len] = seq
175 |     seq_freq = np.fft.fft(seq_pad)
176 |     seq_cum = np.cumsum(seq_pad)
177 |     seq_sq_cum = np.cumsum(np.square(seq_pad))
178 |     seq_sum = (seq_cum[sub_len - 1:seq_len] -
179 |                np.concatenate(([0], seq_cum[0:seq_len - sub_len])))
180 |     seq_sq_sum = (seq_sq_cum[sub_len - 1:seq_len] -
181 |                   np.concatenate(([0], seq_sq_cum[0:seq_len - sub_len])))
182 |     seq_mu = seq_sum / sub_len
183 |     seq_sig_sq = seq_sq_sum / sub_len - np.square(seq_mu)
184 |     seq_sig = np.sqrt(seq_sig_sq)
185 |     return seq_freq, seq_mu, seq_sig
186 | 
187 | 
188 | def _mass(seq_freq, que, seq_len, sub_len, seq_mu, seq_sig):
189 |     """ iterative call of MASS
190 | 
191 |     Parameters
192 |     ----------
193 |     seq_freq : numpy array
194 |         sequence in frequency domain
195 |     que : numpy array
196 |         query
197 |     seq_len : int
198 |         sequence length
199 |     sub_len : int
200 |         subsequence length
201 |     seq_mu : numpy array
202 |         each subsequence's mu (mean)
203 |     seq_sig : numpy array
204 |         each subsequence's sigma (standard deviation)
205 | 
206 |     Returns
207 |     -------
208 |     dist_profile : numpy array
209 |         distance profile
210 |     last_product : numpy array
211 |         cross term
212 |     que_sum : float64
213 |         query's sum
214 |     que_sq_sum : float64
215 |         query's squre sum
216 |     que_sig : float64
217 |         query's sigma (standard deviation)
218 | 
219 |     Notes
220 |     -----
221 |     This functions is modified from the code provided in the following URL
222 |     http://www.cs.unm.edu/~mueen/FastestSimilaritySearch.html
223 |     """
224 |     que = que[::-1]
225 |     que_pad = np.zeros(seq_len * 2)
226 |     que_pad[0:sub_len] = que
227 |     que_freq = np.fft.fft(que_pad)
228 |     product_freq = seq_freq * que_freq
229 |     product = np.fft.ifft(product_freq)
230 |     product = np.real(product)
231 | 
232 |     que_sum = np.sum(que)
233 |     que_sq_sum = np.sum(np.square(que))
234 |     que_mu = que_sum / sub_len
235 |     que_sig_sq = que_sq_sum / sub_len - que_mu**2
236 |     if que_sig_sq < _EPS:
237 |         que_sig_sq = _EPS
238 |     que_sig = np.sqrt(que_sig_sq)
239 | 
240 |     dist_profile = (2 * (sub_len - (product[sub_len - 1:seq_len] -
241 |                                     sub_len * seq_mu * que_mu) /
242 |                          (seq_sig * que_sig)))
243 |     last_product = product[sub_len - 1:seq_len]
244 |     return dist_profile, last_product, que_sum, que_sq_sum, que_sig
245 | 


--------------------------------------------------------------------------------
/Python/toy_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mcyeh/mstamp/b40961528493d369bf03d8b0fbc4be48201b2594/Python/toy_data.mat


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | MATLAB and Python code for the following paper:
2 | 
3 | C.-C. M. Yeh, N. Kavantzas, and E. Keogh, "Matrix Profile VI: Meaningful
4 | Multidimensional Motif Discovery," IEEE ICDM 2017.
5 | https://sites.google.com/view/mstamp/
6 | http://www.cs.ucr.edu/~eamonn/MatrixProfile.html
7 | 


--------------------------------------------------------------------------------