├── .gitignore
├── AAAI24_appendix.pdf
├── LICENSE
├── README.md
└── src
├── augmentations.py
├── collator.py
├── datautils.py
├── lib.py
├── models
├── __init__.py
├── attention.py
├── backbone.py
├── dilated_conv.py
├── encoder.py
└── losses.py
├── tasks
├── __init__.py
├── _eval_protocols.py
├── anomaly_detection.py
├── classification.py
├── forecasting.py
└── imputation.py
├── timesurl.py
├── train.py
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/AAAI24_appendix.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alrash/TimesURL/d3533e45cb28efe8c986f13ce8d80926d0e9254e/AAAI24_appendix.pdf
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Alrash
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TimesURL
2 | The implementation of "TimesURL: Self-supervised Contrastive Learning for Universal Time Series Representation Learning"
3 |
4 |
5 | Paper: [Arxiv](https://arxiv.org/abs/2312.15709) or [AAAI](https://ojs.aaai.org/index.php/AAAI/article/view/29299/30450)
6 |
7 | Video: [Video](https://underline.io/lecture/93776-timesurl-self-supervised-contrastive-learning-for-universal-time-series-representation-learning-video)
8 |
9 | Appendix: [Appendix](https://github.com/Alrash/TimesURL/blob/main/AAAI24_appendix.pdf)
10 | ## Codes
11 | This code is based on [TS2Vec](https://github.com/yuezhihan/ts2vec).
12 |
13 | ## Citation
14 | ```
15 | @inproceedings{liu2024timesurl,
16 | title={Timesurl: Self-supervised contrastive learning for universal time series representation learning},
17 | author={Liu, Jiexi and Chen, Songcan},
18 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
19 | volume={38},
20 | number={12},
21 | pages={13918--13926},
22 | year={2024}
23 | }
24 | ```
25 | ## Acknowledgement
26 | [TS2Vec](https://github.com/yuezhihan/ts2vec)
27 |
28 | [FrAug](https://anonymous.4open.science/r/Fraug-more-results-1785/README.md)
29 |
30 | ## Email
31 | ```
32 | liujiexi@nuaa.edu.cn
33 | alrash@nuaa.edu.cn
34 | ```
35 |
--------------------------------------------------------------------------------
/src/augmentations.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def one_hot_encoding(X):
5 | X = [int(x) for x in X]
6 | n_values = np.max(X) + 1
7 | b = np.eye(n_values)[X]
8 | return b
9 |
10 | def DataTransform(sample, config):
11 | """Weak and strong augmentations"""
12 | weak_aug = scaling(sample, config.augmentation.jitter_scale_ratio)
13 | # weak_aug = permutation(sample, max_segments=config.augmentation.max_seg)
14 | strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio)
15 |
16 | return weak_aug, strong_aug
17 |
18 | # def DataTransform_TD(sample, config):
19 | # """Weak and strong augmentations"""
20 | # weak_aug = sample
21 | # strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio) #masking(sample)
22 | # return weak_aug, strong_aug
23 | #
24 | # def DataTransform_FD(sample, config):
25 | # """Weak and strong augmentations in Frequency domain """
26 | # # weak_aug = remove_frequency(sample, 0.1)
27 | # strong_aug = add_frequency(sample, 0.1)
28 | # return weak_aug, strong_aug
29 | def DataTransform_TD(sample, config):
30 | """Weak and strong augmentations"""
31 | aug_1 = jitter(sample, config.augmentation.jitter_ratio)
32 | aug_2 = scaling(sample, config.augmentation.jitter_scale_ratio)
33 | aug_3 = permutation(sample, max_segments=config.augmentation.max_seg)
34 |
35 | li = np.random.randint(0, 4, size=[sample.shape[0]]) # there are two augmentations in Frequency domain
36 | li_onehot = one_hot_encoding(li)
37 | aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero.
38 | aug_2[1 - li_onehot[:, 1]] = 0
39 | aug_3[1 - li_onehot[:, 2]] = 0
40 | # aug_4[1 - li_onehot[:, 3]] = 0
41 | aug_T = aug_1 + aug_2 + aug_3 #+aug_4
42 | return aug_T
43 |
44 |
45 | def DataTransform_FD(sample, config):
46 | """Weak and strong augmentations in Frequency domain """
47 | aug_1 = remove_frequency(sample, 0.1)
48 | aug_2 = add_frequency(sample, 0.1)
49 | # generate random sequence
50 | li = np.random.randint(0, 2, size=[sample.shape[0]]) # there are two augmentations in Frequency domain
51 | li_onehot = one_hot_encoding(li)
52 | aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero.
53 | aug_2[1 - li_onehot[:, 1]] = 0
54 | aug_F = aug_1 + aug_2
55 | return aug_F
56 |
57 |
58 |
59 | def generate_binomial_mask(B, T, D, p=0.5):
60 | return torch.from_numpy(np.random.binomial(1, p, size=(B, T, D))).to(torch.bool)
61 |
62 | def masking(x, mask= 'binomial'):
63 | nan_mask = ~x.isnan().any(axis=-1)
64 | x[~nan_mask] = 0
65 | # x = self.input_fc(x) # B x T x Ch
66 |
67 | if mask == 'binomial':
68 | mask_id = generate_binomial_mask(x.size(0), x.size(1), x.size(2), p=0.9).to(x.device)
69 | # elif mask == 'continuous':
70 | # mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device)
71 | # elif mask == 'all_true':
72 | # mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
73 | # elif mask == 'all_false':
74 | # mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool)
75 | # elif mask == 'mask_last':
76 | # mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
77 | # mask[:, -1] = False
78 |
79 | # mask &= nan_mask
80 | x[~mask_id] = 0
81 | return x
82 |
83 | def jitter(x, sigma=0.8):
84 | return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
85 |
86 |
87 | def scaling(x, sigma=1.1):
88 | factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[2]))
89 | ai = []
90 | for i in range(x.shape[1]):
91 | xi = x[:, i, :]
92 | ai.append(np.multiply(xi, factor[:, :])[:, np.newaxis, :])
93 | return np.concatenate((ai), axis=1)
94 |
95 | def permutation(x, max_segments=5, seg_mode="random"):
96 | orig_steps = np.arange(x.shape[2])
97 |
98 | num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
99 |
100 | ret = np.zeros_like(x)
101 | for i, pat in enumerate(x):
102 | if num_segs[i] > 1:
103 | if seg_mode == "random":
104 | split_points = np.random.choice(x.shape[2] - 2, num_segs[i] - 1, replace=False)
105 | split_points.sort()
106 | splits = np.split(orig_steps, split_points)
107 | else:
108 | splits = np.array_split(orig_steps, num_segs[i])
109 | warp = np.concatenate(np.random.permutation(splits)).ravel()
110 | ret[i] = pat[0,warp]
111 | else:
112 | ret[i] = pat
113 | return torch.from_numpy(ret)
114 |
115 | def remove_frequency(x, maskout_ratio=0):
116 | mask = torch.cuda.FloatTensor(x.shape).uniform_() > maskout_ratio # maskout_ratio are False
117 | mask = mask.to(x.device)
118 | return x*mask
119 |
120 | def add_frequency(x, pertub_ratio=0,):
121 |
122 | mask = torch.cuda.FloatTensor(x.shape).uniform_() > (1-pertub_ratio) # only pertub_ratio of all values are True
123 | mask = mask.to(x.device)
124 | max_amplitude = x.max()
125 | random_am = torch.rand(mask.shape)*(max_amplitude*0.1)
126 | pertub_matrix = mask*random_am
127 | return x+pertub_matrix
--------------------------------------------------------------------------------
/src/collator.py:
--------------------------------------------------------------------------------
1 | from argparse import Namespace
2 | import numpy as np, math
3 | import random
4 | import torch
5 | from dataclasses import dataclass
6 |
7 |
8 | @dataclass
9 | class CLDataCollator:
10 | max_len: int
11 | args: Namespace
12 | len_sampling_bound = [0.3, 0.7]
13 | dense_sampling_bound = [0.4, 0.6]
14 | pretrain_tasks = 'full2'
15 |
16 | # mask_ratio_per_seg = 0.15
17 | # segment_num = 1
18 | # pretrain_tasks = 'full2'
19 |
20 | def __call__(self, batch):
21 |
22 | batch_size = len(batch)
23 | D = batch[0][0].size(1)
24 |
25 | time_batch = torch.zeros([batch_size, 2, self.max_len])
26 | value_batch = torch.zeros([batch_size, 2, self.max_len, D])
27 | if self.pretrain_tasks == 'full2':
28 | mask_batch = torch.zeros([batch_size, 2, self.max_len, 2 * D])
29 | else:
30 | mask_batch = torch.zeros([batch_size, 2, self.max_len, D])
31 |
32 | mask_old_batch = torch.zeros([batch_size, 2, self.max_len, D])
33 | for idx, instance in enumerate(batch):
34 | seq1, seq2 = self._per_seq_sampling(instance)
35 |
36 | v1, t1, m1, m1_old = seq1
37 | v2, t2, m2, m2_old = seq2
38 |
39 | len1 = v1.size(0)
40 | len2 = v2.size(0)
41 |
42 | # print(len1, len2)
43 | # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape)
44 |
45 | value_batch[idx, 0, :len1] = v1
46 | time_batch[idx, 0, :len1] = t1
47 | mask_batch[idx, 0, :len1] = m1
48 | mask_old_batch[idx, 0, :len1] = m1_old
49 |
50 | value_batch[idx, 1, :len2] = v2
51 | time_batch[idx, 1, :len2] = t2
52 | mask_batch[idx, 1, :len2] = m2
53 | mask_old_batch[idx, 1, :len2] = m2_old
54 |
55 | return {'value': value_batch, 'time': time_batch, 'mask': mask_batch, 'mask_origin': mask_old_batch}
56 |
57 | def _per_seq_sampling(self, instance):
58 | '''
59 | - times is a 1-dimensional tensor containing T time values of observations.
60 | - values is a (T, D) tensor containing observed values for D variables.
61 | - mask is a (T, D) tensor containing 1 where values were observed and 0 otherwise.
62 | '''
63 |
64 | values, times, mask = instance
65 |
66 | # selected_indices = self._random_sampling_cl(values) # Random Anchor and Positive
67 | selected_indices = self._time_sensitive_cl(times) # Anchor and Positive based on sampling density
68 |
69 | v1, t1, m1, v2, t2, m2 = [], [], [], [], [], []
70 |
71 | for idx, (v, t, m) in enumerate(zip(values, times, mask)):
72 |
73 | if idx in selected_indices:
74 | v1.append(v)
75 | t1.append(t)
76 | m1.append(m)
77 |
78 | else:
79 | v2.append(v)
80 | t2.append(t)
81 | m2.append(m)
82 |
83 | v1 = torch.stack(v1, dim=0)
84 | t1 = torch.stack(t1, dim=0)
85 | m1 = torch.stack(m1, dim=0)
86 |
87 | v2 = torch.stack(v2, dim=0)
88 | t2 = torch.stack(t2, dim=0)
89 | m2 = torch.stack(m2, dim=0)
90 |
91 | m1_old, m2_old = m1.clone(), m2.clone()
92 | if self.pretrain_tasks == 'full2':
93 | # print(torch.sum(m1, axis = 0))
94 | T, D = m1.shape
95 |
96 | m1 = self._seg_masking(mask=m1, timestamps=t1)
97 | # a = m1[ : , : D]
98 | # b = m1[ : , D : ]
99 | # c = a + b
100 | # print(torch.sum(c, axis = 0))
101 |
102 | # print(torch.sum(m2, axis = 0))
103 | m2 = self._seg_masking(mask=m2, timestamps=t2)
104 | # a = m2[ : , : D]
105 | # b = m2[ : , D : ]
106 | # c = a + b
107 | # print(torch.sum(c, axis = 0))
108 |
109 | return (v1, t1, m1, m1_old), (v2, t2, m2, m2_old)
110 |
111 | def _random_sampling_cl(self, values):
112 | indices = list(range(len(values)))
113 | random.shuffle(indices)
114 |
115 | length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * len(indices))
116 | length = max(length, 1)
117 |
118 | selected_indices = set(indices[: length])
119 |
120 | # print(indices)
121 | # print(length)
122 | # print(selected_indices)
123 |
124 | return selected_indices
125 |
126 | def _time_sensitive_cl(self, timestamps):
127 |
128 | times = torch.clone(timestamps)
129 | times = times.reshape(times.shape[0])
130 |
131 | # compute average of pre- and post- interval time for each timestep, except the first and last
132 | avg_interval_times = [(((times[i] - times[i - 1]) + (times[i + 1] - times[i])) / 2) for i in
133 | range(1, times.shape[0] - 1)]
134 | avg_interval_times.append(times[-1] - times[-2]) # pre-interval time for last timestep becomes its average
135 | avg_interval_times.insert(0, times[1] - times[0]) # post-interval time for first timestep becomes its average
136 | # print(avg_interval_times)
137 |
138 | # sort the interval times and save its corresponding index, timestep
139 | # after sorting, the first section would contain the lowest interval times -> dense regions of the sample
140 | # last section would contain the highest interval times -> sparse regions of the sample
141 | pairs = [(idx, time, avg_interval_time) for idx, (time, avg_interval_time) in
142 | enumerate(zip(times, avg_interval_times))]
143 | # print(pairs)
144 | pairs.sort(key=lambda pairs: pairs[2])
145 | indices = [idx for idx, time, avg_interval_time in pairs]
146 | # print(pairs)
147 |
148 | # length of the anchor/positive sample
149 | length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * times.shape[0])
150 | length = max(length, 1)
151 | # print(length)
152 |
153 | # select the indices with the most dense sampling frequency, i.e. minimum time interval
154 | # selected_indices = set([idx for idx, time, avg_interval_time in pairs[ : length]])
155 | # print(selected_indices)
156 |
157 | # alternate between dense and sparse sample, i.e. samples located in dense and sparse regions
158 | '''
159 | front, end = 0, len(pairs) - 1
160 | selected_indices = []
161 | for i in range(length):
162 | if i % 2 == 0:
163 | selected_indices.append(pairs[front][0])
164 | front += 2
165 | else:
166 | selected_indices.append(pairs[end][0])
167 | end -= 2
168 | '''
169 |
170 | # divide samples in pairs into two regions -> sparse (50%) and dense(50%)
171 | # sample a fraction, f, of the samples from the dense and the remaining, (1-f), of the samples from the sparse region
172 | dense_indices = indices[: int(len(indices) / 2)]
173 | random.shuffle(dense_indices)
174 | sparse_indices = indices[int(len(indices) / 2):]
175 | random.shuffle(sparse_indices)
176 |
177 | # 5 - random dense, random sparse CL
178 | dense_length = int(np.random.uniform(self.dense_sampling_bound[0], self.dense_sampling_bound[1], 1)[0] * length)
179 | dense_length = max(dense_length, 1)
180 | sparse_length = length - dense_length
181 |
182 | # 6 - 50% dense, 50% sparse CL
183 | # dense_length = int(0.5 * length)
184 | # sparse_length = length - dense_length
185 |
186 | selected_dense_indices = dense_indices[: dense_length]
187 | selected_sparse_indices = sparse_indices[: sparse_length]
188 | selected_dense_indices.extend(selected_sparse_indices)
189 | selected_indices = set(selected_dense_indices)
190 |
191 | return selected_indices
192 |
193 | def _seg_masking(self, mask=None, timestamps=None):
194 |
195 | '''
196 | - mask is a (T, D) tensor
197 | - timestamps is a (T, 1) tensor
198 | - return: (T, 2*D) tensor
199 | '''
200 |
201 | D = mask.size(1)
202 | interp_mask = torch.zeros_like(mask)
203 |
204 | for dim in range(D):
205 | # print('Dimension: ' + str(dim))
206 |
207 | # length = mask[:, dim].sum().long().item()
208 | # print(length)
209 |
210 | # length of each masked segment is constant
211 | # seg_pos = self._constant_length_sampling(mask[ : , dim])
212 |
213 | # time of each masked segment is constant: length of each masked segment may vary depending on the density of the sample in the masked region
214 | seg_pos = self._time_sensitive_sampling(mask[:, dim], timestamps)
215 |
216 | # print(mask[ : , dim])
217 | # print(interp_mask[ : , dim])
218 | # print(seg_pos)
219 | if len(seg_pos) > 0:
220 | mask[seg_pos, dim] = 0.0
221 | interp_mask[seg_pos, dim] = 1.0
222 | # print(mask[ : , dim])
223 | # print(interp_mask[ : , dim])
224 |
225 | return torch.cat([mask, interp_mask], dim=-1)
226 |
227 | def _constant_length_sampling(self, mask):
228 |
229 | # mask = torch.tensor([0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0])
230 | count_ones = mask.sum().long().item()
231 |
232 | if self.args.mask_ratio_per_seg * count_ones < 1:
233 | seg_seq_len = 1
234 | else:
235 | seg_seq_len = int(self.args.mask_ratio_per_seg * count_ones)
236 |
237 | ones_indices_in_mask = torch.where(mask == 1)[0].tolist()
238 |
239 | # if seg_seq_len == 1: indices = list(range(len(ones_indices_in_mask)))
240 | # else: indices = list(range(len(ones_indices_in_mask[ : -seg_seq_len + 1])))
241 |
242 | # print('mask: ' + str(mask))
243 | # print('count_ones: ' + str(count_ones))
244 | # print('seg_seq_len: ' + str(seg_seq_len))
245 | # print('ones_indices_in_mask: ' + str(ones_indices_in_mask))
246 | # print('indices: ' + str(indices))
247 |
248 | seg_pos = []
249 | for seg in range(self.args.segment_num):
250 | # print()
251 | # print(ones_indices_in_mask)
252 |
253 | if len(ones_indices_in_mask) > 1:
254 | if seg_seq_len == 1:
255 | start_idx_in_mask = random.choice(ones_indices_in_mask)
256 | else:
257 | start_idx_in_mask = random.choice(ones_indices_in_mask[: -seg_seq_len + 1])
258 | # print(start_idx_in_mask)
259 |
260 | start = ones_indices_in_mask.index(start_idx_in_mask)
261 | end = start + seg_seq_len
262 |
263 | sub_seg = ones_indices_in_mask[start: end]
264 | # print(sub_seg)
265 |
266 | seg_pos.extend(sub_seg)
267 | ones_indices_in_mask = list(set(ones_indices_in_mask) - set(sub_seg))
268 | ones_indices_in_mask.sort()
269 |
270 | # print('seg_pos: ' + str(seg_pos))
271 | return list(set(seg_pos))
272 |
273 | def _time_sensitive_sampling(self, mask, timestamps):
274 |
275 | # segment_num = 3
276 | # mask_ratio_per_seg = 0.15
277 |
278 | timestamps = timestamps.reshape(timestamps.shape[0])
279 | # sampled_times = timestamps[mask].tolist() # times at which this feature was sampled
280 | sampled_times = [timestamps[i].item() for i in range(mask.shape[0]) if mask[i] == 1]
281 |
282 | if len(sampled_times) == 0: return []
283 |
284 | # print('timestamps: ' + str(timestamps))
285 | # print('mask: ' + str(mask))
286 | # print('sampled_times: ' + str(sampled_times))
287 | sampled_times_start, sampled_times_end = sampled_times[0], sampled_times[-1]
288 |
289 | # full time interval of the feature = last sampling time - first sampling time
290 | # time of masked segment = a fixed percentage of the full time interval of the feature
291 | time_of_masked_segment = (sampled_times_end - sampled_times_start) * self.args.mask_ratio_per_seg
292 | # print('time_of_masked_segment: ' + str(time_of_masked_segment))
293 |
294 | available_samples_to_sample = [time for time in sampled_times if
295 | time < sampled_times_end - time_of_masked_segment]
296 | # print('available_samples_to_sample: ' + str(available_samples_to_sample))
297 |
298 | if len(available_samples_to_sample) > 0:
299 | chosen_time = random.choice(available_samples_to_sample)
300 | else:
301 | return []
302 | # print('chosen_time: ' + str(chosen_time))
303 |
304 | masking_times = []
305 | for i in range(self.args.segment_num):
306 |
307 | masked_segment_start_time = chosen_time
308 | masked_segment_end_time = masked_segment_start_time + time_of_masked_segment
309 |
310 | idx = sampled_times.index(chosen_time)
311 | chosen_times = [chosen_time]
312 | available_samples_to_sample.remove(chosen_time)
313 |
314 | for time in sampled_times[idx + 1:]:
315 | if time > masked_segment_end_time:
316 | break
317 |
318 | if masked_segment_start_time < time and time <= masked_segment_end_time:
319 | chosen_times.append(time)
320 |
321 | if time in available_samples_to_sample:
322 | available_samples_to_sample.remove(time)
323 | # print(' available_samples_to_sample: ' + str(available_samples_to_sample))
324 |
325 | masking_times.extend(chosen_times)
326 |
327 | for time in sampled_times[: idx][::-1]:
328 | if time < chosen_time - time_of_masked_segment or time > chosen_time + time_of_masked_segment:
329 | break
330 |
331 | if time > chosen_time - time_of_masked_segment and time < chosen_time + time_of_masked_segment and time in available_samples_to_sample:
332 | available_samples_to_sample.remove(time)
333 |
334 | if len(available_samples_to_sample) > 0:
335 | chosen_time = random.choice(available_samples_to_sample)
336 | else:
337 | return []
338 | # print('chosen_times: ' + str(chosen_times))
339 | # print('available_samples_to_sample: ' + str(available_samples_to_sample))
340 | # print('chosen_time: ' + str(chosen_time))
341 |
342 | times = timestamps.tolist()
343 | seg_pos = [times.index(time) for time in masking_times]
344 | # print('masking_times: ' + str(masking_times))
345 | # print('seg_pos: ' + str(seg_pos))
346 | return list(set(seg_pos))
347 |
348 | '''
349 | def _seg_sampling(self, max_len):
350 | if max_len * self.args.mask_ratio_per_seg < 1:
351 | return []
352 | seg_pos = []
353 | seg_len = int(max_len * self.args.mask_ratio_per_seg)
354 | print('seg_len: ' + str(seg_len))
355 | start_pos = np.random.randint(max_len, size=self.args.segment_num)
356 | print('start_pos: ' + str(start_pos))
357 | for start in start_pos:
358 | seg_pos += list(range(start, min(start+seg_len, max_len)))
359 | print(seg_pos)
360 | return seg_pos
361 | '''
362 |
363 |
364 | # ---Test _time_sensitive_sampling function for reconstruction task---#
365 | '''
366 | m = torch.zeros((56), dtype = bool)
367 | l = [3, 8, 11, 13, 18, 19, 42, 45, 50, 52, 55]
368 | m[l] = 1
369 | t = torch.zeros((56), dtype = float)
370 | times = torch.tensor([1, 5, 8, 9, 12, 13, 17, 20, 23, 28, 31], dtype = float)
371 | t[l] = times
372 | # print(m)
373 | # print(t)
374 | train_cl_collator = CLDataCollator(max_len = 50)
375 | train_cl_collator._time_sensitive_sampling(m, t)
376 | '''
377 |
378 | # ----------Test _time_sensitive_cl function for CL task----------#
379 | '''
380 | times = torch.tensor([1, 2, 3, 4, 5, 15, 18, 25, 26, 27, 28, 29, 35, 45])
381 | times = times.reshape(times.shape[0], 1)
382 | train_cl_collator = CLDataCollator(max_len = 50)
383 | selected_indices = train_cl_collator._time_sensitive_cl(times)
384 | '''
385 |
386 | '''
387 | max_len = 50
388 | D = 4
389 | value, time, mask = torch.rand(max_len, D), torch.rand(max_len, 1), torch.randint(0, 2, (max_len, D))
390 | data = [value, time, mask]
391 | batch = [data]
392 | train_cl_collator = CLDataCollator(max_len = max_len)
393 | # (v1, t1, m1), (v2, t2, m2) = train_cl_collator._per_seq_sampling(data)
394 | # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape)
395 | out = train_cl_collator.__call__(batch)
396 | '''
397 |
398 | '''
399 | print(out['value'].shape, out['time'].shape, out['mask'].shape)
400 | print('Value')
401 | print(value)
402 | print(out['value'][0, 0].shape)
403 | print(out['value'][0, 1].shape)
404 | print('Time')
405 | print(time)
406 | print(out['time'][0, 0].shape)
407 | print(out['time'][0, 1].shape)
408 | print('Mask')
409 | print(mask)
410 | print(out['mask'][0, 0])
411 | print(out['mask'][0, 1])
412 | print(torch.sum(mask, axis = 0))
413 | print(torch.sum(out['mask'][0, 0], axis = 0))
414 | print(torch.sum(out['mask'][0, 1], axis = 0))
415 | '''
--------------------------------------------------------------------------------
/src/datautils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 |
4 | import numpy as np
5 | import pandas as pd
6 | import math
7 | import random
8 | from datetime import datetime
9 | import pickle
10 | from utils import pkl_load, pad_nan_to_target
11 | from scipy.io.arff import loadarff
12 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
13 | from utils import generate_mask
14 | from utils import interpolate_cubic_spline
15 | from utils import normalize_with_mask
16 |
17 |
18 | def load_UCR(dataset, load_tp: bool = True):
19 | train_file = os.path.join('datasets/UCR', dataset, dataset + "_TRAIN.tsv")
20 | test_file = os.path.join('datasets/UCR', dataset, dataset + "_TEST.tsv")
21 | train_df = pd.read_csv(train_file, sep='\t', header=None)
22 | test_df = pd.read_csv(test_file, sep='\t', header=None)
23 | train_array = np.array(train_df)
24 | test_array = np.array(test_df)
25 |
26 | # Move the labels to {0, ..., L-1}
27 | labels = np.unique(train_array[:, 0])
28 | transform = {}
29 | for i, l in enumerate(labels):
30 | transform[l] = i
31 |
32 | train = train_array[:, 1:].astype(np.float64)
33 | train_labels = np.vectorize(transform.get)(train_array[:, 0])
34 | test = test_array[:, 1:].astype(np.float64)
35 | test_labels = np.vectorize(transform.get)(test_array[:, 0])
36 |
37 | # extend dim to NTC
38 | train, test = train[..., np.newaxis], test[..., np.newaxis]
39 | p = 1
40 | mask_tr, mask_te = generate_mask(train, p), generate_mask(test, p)
41 |
42 | # Normalization for non-normalized datasets
43 | # To keep the amplitude information, we do not normalize values over
44 | # individual time series, but on the whole dataset
45 | if dataset in [
46 | 'AllGestureWiimoteX',
47 | 'AllGestureWiimoteY',
48 | 'AllGestureWiimoteZ',
49 | 'BME',
50 | 'Chinatown',
51 | 'Crop',
52 | 'EOGHorizontalSignal',
53 | 'EOGVerticalSignal',
54 | 'Fungi',
55 | 'GestureMidAirD1',
56 | 'GestureMidAirD2',
57 | 'GestureMidAirD3',
58 | 'GesturePebbleZ1',
59 | 'GesturePebbleZ2',
60 | 'GunPointAgeSpan',
61 | 'GunPointMaleVersusFemale',
62 | 'GunPointOldVersusYoung',
63 | 'HouseTwenty',
64 | 'InsectEPGRegularTrain',
65 | 'InsectEPGSmallTrain',
66 | 'MelbournePedestrian',
67 | 'PickupGestureWiimoteZ',
68 | 'PigAirwayPressure',
69 | 'PigArtPressure',
70 | 'PigCVP',
71 | 'PLAID',
72 | 'PowerCons',
73 | 'Rock',
74 | 'SemgHandGenderCh2',
75 | 'SemgHandMovementCh2',
76 | 'SemgHandSubjectCh2',
77 | 'ShakeGestureWiimoteZ',
78 | 'SmoothSubspace',
79 | 'UMD'
80 | ] or p != 1:
81 | scaler = StandardScaler()
82 | train, test = normalize_with_mask(train, mask_tr, test, mask_te, scaler)
83 | # mean = np.nanmean(train)
84 | # std = np.nanstd(train)
85 | # train = (train - mean) / std
86 | # test = (test - mean) / std
87 |
88 | if load_tp:
89 | tp = np.linspace(0, 1, train.shape[1], endpoint=True).reshape(1, -1, 1)
90 | train = np.concatenate((train, np.repeat(tp, train.shape[0], axis=0)), axis=-1)
91 | test = np.concatenate((test, np.repeat(tp, test.shape[0], axis=0)), axis=-1)
92 |
93 | return {'x': train, 'mask': mask_tr}, train_labels, {'x': test, 'mask': mask_te}, test_labels
94 | # return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels
95 |
96 |
97 | def load_others(dataset, load_tp: bool = True):
98 | data = np.load(f'datasets/Others/{dataset}.npy', allow_pickle=True).item()
99 | train_X, train_mask, train_y, test_X, test_mask, test_y = \
100 | data["tr_x"], data["tr_mask"], data["tr_y"], data["te_x"], data["te_mask"], data["te_y"]
101 |
102 | scaler = MinMaxScaler()
103 |
104 | train_X, test_X = normalize_with_mask(train_X, train_mask, test_X, test_mask, scaler)
105 |
106 | train_tp, test_tp = data['tr_t'], data['te_t']
107 | if load_tp:
108 | train_X = np.concatenate((train_X, train_tp.reshape(train_tp.shape[0], -1, 1)), axis=-1)
109 | test_X = np.concatenate((test_X, test_tp.reshape(test_tp.shape[0], -1, 1)), axis=-1)
110 |
111 | labels = np.unique(train_y)
112 | transform = {k: i for i, k in enumerate(labels)}
113 | train_y = np.vectorize(transform.get)(train_y)
114 | test_y = np.vectorize(transform.get)(test_y)
115 | return {'x': train_X, 'mask': train_mask}, train_y, {'x': test_X, 'mask': test_mask}, test_y
116 |
117 |
118 | def load_UEA(dataset, load_tp: bool = False):
119 | def extract_data(data):
120 | res_data = []
121 | res_labels = []
122 | for t_data, t_label in data:
123 | t_data = np.array([d.tolist() for d in t_data])
124 | t_label = t_label.decode("utf-8")
125 | res_data.append(t_data)
126 | res_labels.append(t_label)
127 | return np.array(res_data).swapaxes(1, 2), np.array(res_labels)
128 |
129 | try:
130 | train_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TRAIN.arff')[0]
131 | test_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TEST.arff')[0]
132 |
133 | train_X, train_y = extract_data(train_data)
134 | test_X, test_y = extract_data(test_data)
135 | except:
136 | data = np.load(f'datasets/UEA/{dataset}/{dataset}.npy', allow_pickle=True).item()
137 | train_X, train_y, test_X, test_y = data["train_X"], data["train_y"], data["test_X"], data["test_y"]
138 |
139 | p = 1
140 | mask_tr, mask_te = generate_mask(train_X, p), generate_mask(test_X, p)
141 | # scaler = MinMaxScaler()
142 | scaler = StandardScaler()
143 |
144 | train_X, test_X = normalize_with_mask(train_X, mask_tr, test_X, mask_te, scaler)
145 |
146 | if load_tp:
147 | tp = np.linspace(0, 1, train_X.shape[1], endpoint=True).reshape(1, -1, 1)
148 | train_X = np.concatenate((train_X, np.repeat(tp, train_X.shape[0], axis=0)), axis=-1)
149 | test_X = np.concatenate((test_X, np.repeat(tp, test_X.shape[0], axis=0)), axis=-1)
150 |
151 | labels = np.unique(train_y)
152 | transform = {k: i for i, k in enumerate(labels)}
153 | train_y = np.vectorize(transform.get)(train_y)
154 | test_y = np.vectorize(transform.get)(test_y)
155 | return {'x': train_X, 'mask': mask_tr}, train_y, {'x': test_X, 'mask': mask_te}, test_y
156 |
157 |
158 | def load_forecast_npy(name, univar=False):
159 | data = np.load(f'datasets/{name}.npy')
160 | if univar:
161 | data = data[: -1:]
162 |
163 | train_slice = slice(None, int(0.6 * len(data)))
164 | valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
165 | test_slice = slice(int(0.8 * len(data)), None)
166 |
167 | scaler = StandardScaler().fit(data[train_slice])
168 | data = scaler.transform(data)
169 | data = np.expand_dims(data, 0)
170 |
171 | pred_lens = [24, 48, 96, 288, 672]
172 | return data, train_slice, valid_slice, test_slice, scaler, pred_lens, 0
173 |
174 |
175 | def _get_time_features(dt):
176 | return np.stack([
177 | dt.minute.to_numpy(),
178 | dt.hour.to_numpy(),
179 | dt.dayofweek.to_numpy(),
180 | dt.day.to_numpy(),
181 | dt.dayofyear.to_numpy(),
182 | dt.month.to_numpy(),
183 | dt.weekofyear.to_numpy(),
184 | ], axis=1).astype(np.float)
185 |
186 |
187 | def load_forecast_csv(name, offset=0 , univar=False, load_tp: bool = True):
188 | data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True)
189 | dt_tp = data.index
190 | dt_embed = _get_time_features(data.index)
191 | n_covariate_cols = dt_embed.shape[-1] if offset == 0 else 0
192 |
193 | if univar:
194 | if name in ('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'):
195 | data = data[['OT']]
196 | elif name == 'electricity':
197 | data = data[['MT_001']]
198 | elif name == 'WTH':
199 | data = data[['WetBulbCelsius']]
200 | else:
201 | data = data.iloc[:, -1:]
202 |
203 | data = data.to_numpy()
204 | if name == 'ETTh1' or name == 'ETTh2':
205 | train_slice = slice(None, 12 * 30 * 24)
206 | valid_slice = slice(12 * 30 * 24 - offset, 16 * 30 * 24)
207 | test_slice = slice(16 * 30 * 24 - offset, 20 * 30 * 24)
208 | elif name == 'ETTm1' or name == 'ETTm2':
209 | train_slice = slice(None, 12 * 30 * 24 * 4)
210 | valid_slice = slice(12 * 30 * 24 * 4 - offset, 16 * 30 * 24 * 4)
211 | test_slice = slice(16 * 30 * 24 * 4 - offset, 20 * 30 * 24 * 4)
212 | else:
213 | train_slice = slice(None, int(0.6 * len(data)))
214 | valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
215 | test_slice = slice(int(0.8 * len(data)), None)
216 |
217 | def fixed_mask_timestamp(num, mask):
218 | mask_time = np.ones((mask.shape[0], mask.shape[1]))
219 | mask_time[np.where(mask.mean(axis=-1) == 0.)] = 0
220 | return np.concatenate((np.repeat(mask_time[..., np.newaxis], num, axis=-1), mask), axis=-1)
221 |
222 | # to N x T x C
223 | if name in ('electricity'):
224 | data = np.expand_dims(data.T, -1) # Each variable is an instance rather than a feature
225 | else:
226 | data = np.expand_dims(data, 0)
227 |
228 | p = 1
229 | mask_tr, mask_va, mask_te = generate_mask(data[:, train_slice], p), \
230 | generate_mask(data[:, valid_slice], p), \
231 | generate_mask(data[:, test_slice], p)
232 | scaler = StandardScaler()
233 |
234 | train_x, valid_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, valid_slice], mask_va, scaler)
235 | _, test_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, test_slice], mask_te, scaler)
236 | data = np.concatenate((train_x, valid_x, test_x), axis=1)
237 | mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1)
238 |
239 | if n_covariate_cols > 0:
240 | dt_mask, dv_mask, d_mask = fixed_mask_timestamp(n_covariate_cols, mask_tr[:1]), \
241 | fixed_mask_timestamp(n_covariate_cols, mask_va[:1]), \
242 | fixed_mask_timestamp(n_covariate_cols, mask_te[:1])
243 |
244 | dt, dv, d = dt_embed[train_slice], dt_embed[valid_slice], dt_embed[test_slice]
245 | dt[dt_mask[0][:, :n_covariate_cols] == 0], dv[dv_mask[0][:, :n_covariate_cols] == 0], d[d_mask[0][:, :n_covariate_cols] == 0] = np.nan, np.nan, np.nan
246 | dt_embed = np.concatenate((dt, dv, d), axis=0)
247 |
248 | dt_scaler = scaler.fit(dt)
249 | dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0)
250 | dt_embed[np.isnan(dt_embed)] = 0
251 | data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1)
252 | mask_tr, mask_va, mask_te = dt_mask, dv_mask, d_mask
253 | mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1)
254 |
255 | if load_tp:
256 | dt_tp = [dt_tp[train_slice], dt_tp[valid_slice], dt_tp[test_slice]]
257 | tp = np.concatenate([[time.mktime(t.timetuple()) for t in tp] for tp in dt_tp])
258 | scaler_hat = MinMaxScaler().fit(tp.reshape(-1, 1))
259 | data = np.concatenate([data, np.expand_dims(scaler_hat.transform(tp.reshape(-1, 1)), 0)], axis=-1)
260 |
261 | if name in ('ETTh1', 'ETTh2', 'electricity', 'WTH'):
262 | pred_lens = [24, 48, 168, 336, 720]
263 | else:
264 | pred_lens = [24, 48, 96, 288, 672]
265 |
266 | return {'x': data, 'mask': mask}, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
267 |
268 |
269 | def load_anomaly(name, load_tp=False):
270 | res = pkl_load(f'datasets/{name}.pkl')
271 |
272 | p, mask_tr, mask_te = 1, [], []
273 | maxl = np.max([len(res['all_train_data'][k]) for k in res['all_train_data']])
274 | maxle = np.max([len(res['all_test_data'][k]) for k in res['all_test_data']])
275 | for k in res['all_train_data']:
276 | # generate mask
277 | mask_tr.append(generate_mask(res['all_train_data'][k].reshape(1, -1, 1), p, remain=1))
278 | mask_te.append(generate_mask(res['all_test_data'][k].reshape(1, -1, 1), p, remain=1))
279 | # mask
280 | res['all_train_data'][k] = (mask_tr[-1] * res['all_train_data'][k].reshape(1, -1, 1)).reshape(-1)
281 | res['all_test_data'][k] = (mask_te[-1] * res['all_test_data'][k].reshape(1, -1, 1)).reshape(-1)
282 | # padding mask
283 | mask_tr[-1] = np.concatenate((mask_tr[-1], np.full((1, maxl - mask_tr[-1].shape[1], 1), np.nan)), axis=1)
284 | mask_te[-1] = np.concatenate((mask_te[-1], np.full((1, maxle - mask_te[-1].shape[1], 1), np.nan)), axis=1)
285 | mask_tr, mask_te = np.concatenate(mask_tr, axis=0), np.concatenate(mask_te, axis=0)
286 |
287 | # if load_tp:
288 | # tp_max, tp_min = np.max(res['all_train_timestamps']), np.min(res['all_train_timestamps'])
289 | # interval = tp_max - tp_min
290 | # interval = 1. if interval == 0. else interval
291 | # tp_train = (res['all_train_timestamps'] - tp_min) / interval
292 | # tp_test = (res['all_test_timestamps'] - tp_min) / interval
293 | # res['all_train_data'] = np.concatenate((res['all_train_data'], np.repeat(tp_train, res['all_train_data'].shape[0], axis=0)), axis=-1)
294 | # res['all_test_data'] = np.concatenate((res['all_test_data'], np.repeat(tp_test, res['all_test_data'].shape[0], axis=0)), axis=-1)
295 |
296 | return {'x': res['all_train_data'], 'mask': mask_tr}, res['all_train_labels'], res['all_train_timestamps'], \
297 | {'x': res['all_test_data'], 'mask': mask_te}, res['all_test_labels'], res['all_test_timestamps'], \
298 | res['delay']
299 |
300 |
301 | def gen_ano_train_data(all_train_data, maxl = None, normal = False):
302 | maxl = np.max([len(all_train_data[k]) for k in all_train_data]) if maxl is None else maxl
303 | pretrain_data = []
304 | for k in all_train_data:
305 | train_data = pad_nan_to_target(np.array(all_train_data[k]).astype(np.float64), maxl, axis=0)
306 | pretrain_data.append(train_data)
307 | pretrain_data = np.expand_dims(np.stack(pretrain_data), 2)
308 | if normal:
309 | data_min, data_max = np.nanmin(pretrain_data), np.nanmax(pretrain_data)
310 | pretrain_data = (pretrain_data - data_min) / (data_max - data_min)
311 | return pretrain_data
--------------------------------------------------------------------------------
/src/lib.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=E1101
2 | import torch
3 | import torch.nn as nn
4 | from torch.utils.data import Dataset, DataLoader, TensorDataset
5 |
6 | import numpy as np
7 | from sklearn import metrics
8 |
9 | from collator import CLDataCollator
10 |
11 |
12 | class TimeDataset(Dataset):
13 | def __init__(self, data):
14 | super().__init__()
15 | self.data = []
16 | for instance in data:
17 | values, times, mask = instance
18 | if len(values) == len(times) and len(times) == len(mask) and len(values) >= 2:
19 | self.data.append(instance)
20 |
21 | def __len__(self):
22 | return len(self.data)
23 |
24 | def __getitem__(self, index):
25 | return self.data[index]
26 |
27 |
28 | def count_parameters(model):
29 | return sum(p.numel() for p in model.parameters() if p.requires_grad)
30 |
31 |
32 | def log_normal_pdf(x, mean, logvar, mask):
33 | const = torch.from_numpy(np.array([2. * np.pi])).float().to(x.device)
34 | const = torch.log(const)
35 | return -.5 * (const + logvar + (x - mean) ** 2. / torch.exp(logvar)) * mask
36 |
37 |
38 | def normal_kl(mu1, lv1, mu2, lv2):
39 | v1 = torch.exp(lv1)
40 | v2 = torch.exp(lv2)
41 | lstd1 = lv1 / 2.
42 | lstd2 = lv2 / 2.
43 |
44 | kl = lstd2 - lstd1 + ((v1 + (mu1 - mu2) ** 2.) / (2. * v2)) - .5
45 | return kl
46 |
47 |
48 | def mean_squared_error(orig, pred, mask):
49 | error = (orig - pred) ** 2
50 | error = error * mask
51 | return error.sum() / mask.sum()
52 |
53 |
54 | def normalize_masked_data(data, mask, att_min, att_max):
55 | # we don't want to divide by zero
56 | att_max[att_max == 0.] = 1.
57 |
58 | if (att_max != 0.).all():
59 | data_norm = (data - att_min) / att_max
60 | else:
61 | raise Exception("Zero!")
62 |
63 | if torch.isnan(data_norm).any():
64 | raise Exception("nans!")
65 |
66 | # set masked out elements back to zero
67 | data_norm[mask == 0] = 0
68 |
69 | return data_norm, att_min, att_max
70 |
71 |
72 | def evaluate(dim, rec, dec, test_loader, args, num_sample=10, device="cuda"):
73 | mse, test_n = 0.0, 0.0
74 | with torch.no_grad():
75 | for test_batch in test_loader:
76 | test_batch = test_batch.to(args.device)
77 | observed_data, observed_mask, observed_tp = (
78 | test_batch[:, :, :dim],
79 | test_batch[:, :, dim: 2 * dim],
80 | test_batch[:, :, -1],
81 | )
82 | if args.sample_tp and args.sample_tp < 1:
83 | subsampled_data, subsampled_tp, subsampled_mask = subsample_timepoints(
84 | observed_data.clone(), observed_tp.clone(), observed_mask.clone(), args.sample_tp)
85 | else:
86 | subsampled_data, subsampled_tp, subsampled_mask = \
87 | observed_data, observed_tp, observed_mask
88 | out = rec(torch.cat((subsampled_data, subsampled_mask), 2), subsampled_tp)
89 | qz0_mean, qz0_logvar = (
90 | out[:, :, : args.latent_dim],
91 | out[:, :, args.latent_dim:],
92 | )
93 | epsilon = torch.randn(
94 | num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2]
95 | ).to(args.device)
96 | z0 = epsilon * torch.exp(0.5 * qz0_logvar) + qz0_mean
97 | z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2])
98 | batch, seqlen = observed_tp.size()
99 | time_steps = (
100 | observed_tp[None, :, :].repeat(num_sample, 1, 1).view(-1, seqlen)
101 | )
102 | pred_x = dec(z0, time_steps)
103 | pred_x = pred_x.view(num_sample, -1, pred_x.shape[1], pred_x.shape[2])
104 | pred_x = pred_x.mean(0)
105 | mse += mean_squared_error(observed_data, pred_x, observed_mask) * batch
106 | test_n += batch
107 | return mse / test_n
108 |
109 |
110 | def compute_losses(dim, dec_train_batch, qz0_mean, qz0_logvar, pred_x, args, device):
111 | observed_data, observed_mask \
112 | = dec_train_batch[:, :, :dim], dec_train_batch[:, :, dim:2 * dim]
113 |
114 | noise_std = args.std # default 0.1
115 | noise_std_ = torch.zeros(pred_x.size()).to(device) + noise_std
116 | noise_logvar = 2. * torch.log(noise_std_).to(device)
117 | logpx = log_normal_pdf(observed_data, pred_x, noise_logvar,
118 | observed_mask).sum(-1).sum(-1)
119 | pz0_mean = pz0_logvar = torch.zeros(qz0_mean.size()).to(device)
120 | analytic_kl = normal_kl(qz0_mean, qz0_logvar,
121 | pz0_mean, pz0_logvar).sum(-1).sum(-1)
122 | if args.norm:
123 | logpx /= observed_mask.sum(-1).sum(-1)
124 | analytic_kl /= observed_mask.sum(-1).sum(-1)
125 | return logpx, analytic_kl
126 |
127 |
128 | def evaluate_classifier(model, test_loader, dec=None, args=None, classifier=None,
129 | dim=0, reconst=False, num_sample=1):
130 | pred = []
131 | true = []
132 | test_loss = 0
133 | for test_batch, label in test_loader:
134 | test_batch, label = test_batch.to(args.device), label.to(args.device)
135 | batch_len = test_batch.shape[0]
136 | observed_data, observed_mask, observed_tp \
137 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
138 | with torch.no_grad():
139 | out = model(
140 | torch.cat((observed_data, observed_mask), 2), observed_tp)
141 | if reconst:
142 | qz0_mean, qz0_logvar = out[:, :,
143 | :args.latent_dim], out[:, :, args.latent_dim:]
144 | epsilon = torch.randn(
145 | num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2]).to(args.device)
146 | z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean
147 | z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2])
148 | if args.classify_pertp:
149 | pred_x = dec(z0, observed_tp[None, :, :].repeat(
150 | num_sample, 1, 1).view(-1, observed_tp.shape[1]))
151 | # pred_x = pred_x.view(num_sample, batch_len, pred_x.shape[1], pred_x.shape[2])
152 | out = classifier(pred_x)
153 | else:
154 | out = classifier(z0)
155 | if args.classify_pertp:
156 | N = label.size(-1)
157 | out = out.view(-1, N)
158 | label = label.view(-1, N)
159 | _, label = label.max(-1)
160 | test_loss += nn.CrossEntropyLoss()(out, label.long()).item() * batch_len * 50.
161 | else:
162 | label = label.unsqueeze(0).repeat_interleave(
163 | num_sample, 0).view(-1)
164 | test_loss += nn.CrossEntropyLoss()(out, label).item() * batch_len * num_sample
165 | pred.append(out.cpu().numpy())
166 | true.append(label.cpu().numpy())
167 | pred = np.concatenate(pred, 0)
168 | true = np.concatenate(true, 0)
169 | acc = np.mean(pred.argmax(1) == true)
170 |
171 | # print(true.shape)
172 | # print(pred.shape)
173 | # print(np.sum(pred, axis = 1))
174 |
175 | if args.dataset == 'physionet' or args.dataset == 'MIMIC-III':
176 | auc = metrics.roc_auc_score(true, pred[:, 1])
177 | elif args.dataset == 'PersonActivity':
178 | auc = 0.
179 |
180 | return test_loss / pred.shape[0], acc, auc
181 |
182 |
183 | def evaluate_regressor(model, test_loader, dec=None, args=None, classifier=None, dim=0):
184 | total_len = 0
185 | test_mse_loss = 0
186 | test_mae_loss = 0
187 | for test_batch, label in test_loader:
188 | test_batch, label = test_batch.to(args.device), label.to(args.device)
189 | observed_data, observed_mask, observed_tp \
190 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
191 | with torch.no_grad():
192 | out = model(
193 | torch.cat((observed_data, observed_mask), 2), observed_tp)
194 | batch_len = test_batch.shape[0]
195 | total_len += batch_len
196 | test_mse_loss += nn.MSELoss()(out[:, 0], label).item() * batch_len
197 | test_mae_loss += nn.L1Loss()(out[:, 0], label).item() * batch_len
198 |
199 | return test_mse_loss / total_len, test_mae_loss / total_len
200 |
201 |
202 | def evaluate_interpolator(model, test_loader, dec=None, args=None, classifier=None, dim=0):
203 | total_values = 0
204 | total_mse_loss = 0
205 | total_mae_loss = 0
206 |
207 | for test_batch, label in test_loader:
208 | test_batch, label = test_batch.to(args.device), label.to(args.device)
209 | observed_data, observed_mask, observed_tp \
210 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
211 | with torch.no_grad():
212 | out = model(
213 | torch.cat((observed_data, observed_mask), 2), observed_tp)
214 |
215 | target_data, target_mask = label[:, :, :dim], label[:, :, dim:2 * dim].bool()
216 | num_values = torch.sum(target_mask).item()
217 | total_mse_loss += nn.MSELoss()(out[target_mask], target_data[target_mask]).item() * num_values
218 | total_mae_loss += nn.L1Loss()(out[target_mask], target_data[target_mask]).item() * num_values
219 | total_values += num_values
220 |
221 | return total_mse_loss / total_values, total_mae_loss / total_values
222 |
223 |
224 | def subsample_timepoints(data, time_steps, mask, percentage_tp_to_sample=None):
225 | # Subsample percentage of points from each time series
226 | for i in range(data.size(0)):
227 | # take mask for current training sample and sum over all features --
228 | # figure out which time points don't have any measurements at all in this batch
229 | current_mask = mask[i].sum(-1).cpu()
230 | non_missing_tp = np.where(current_mask > 0)[0]
231 | n_tp_current = len(non_missing_tp)
232 | n_to_sample = int(n_tp_current * percentage_tp_to_sample)
233 | subsampled_idx = sorted(np.random.choice(
234 | non_missing_tp, n_to_sample, replace=False))
235 | tp_to_set_to_zero = np.setdiff1d(non_missing_tp, subsampled_idx)
236 |
237 | data[i, tp_to_set_to_zero] = 0.
238 | if mask is not None:
239 | mask[i, tp_to_set_to_zero] = 0.
240 |
241 | return data, time_steps, mask
242 |
243 |
244 | def generate_irregular_samples(data, input_dim):
245 | combined_data = []
246 | max_len = 0
247 | for i in range(data.shape[0]):
248 | zero_time_indices_list = torch.where(data[i, :, -1][1:] == 0)[0]
249 | curr_len = zero_time_indices_list[0].item() + 1 if len(zero_time_indices_list) else data.shape[1]
250 | max_len = max(max_len, curr_len)
251 | values = data[i, :curr_len, : input_dim]
252 | times = data[i, :curr_len, -1]
253 | mask = data[i, :curr_len, input_dim: 2 * input_dim]
254 | single_data = [values, times, mask]
255 | combined_data.append(single_data)
256 | return combined_data, max_len
257 |
258 |
259 | def generate_batches(X_train, X_val, args):
260 | input_dim = (X_train.shape[2] - 1) // 2
261 |
262 | X_train, train_max_len = generate_irregular_samples(X_train, input_dim)
263 | # X_val, val_max_len = generate_irregular_samples(X_val, input_dim)
264 |
265 | # max_len = max(train_max_len, val_max_len)
266 | max_len = train_max_len
267 |
268 | pretrain_data = TimeDataset(X_train)
269 | # val_data = TimeDataset(X_val)
270 |
271 | train_cl_collator = CLDataCollator(max_len=max_len, args=args)
272 |
273 | # batch_size = min(min(len(val_data), args.batch_size), 256)
274 | batch_size = min(min(len(pretrain_data), args.batch_size), 256)
275 | train_dataloader = DataLoader(pretrain_data, batch_size=batch_size, shuffle=True, collate_fn=train_cl_collator,
276 | num_workers=0)
277 | # val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False, collate_fn=train_cl_collator,
278 | # num_workers=8)
279 |
280 | data_objects = {"train_dataloader": train_dataloader,
281 | # "val_dataloader": val_dataloader,
282 | "input_dim": input_dim,
283 | "max_len": max_len,
284 | "n_train_batches": len(train_dataloader),
285 | # "n_test_batches": len(val_dataloader),
286 | }
287 |
288 | return data_objects
289 |
290 |
291 | def get_unlabeled_pretrain_data(X_train, args):
292 | # X_train = torch.load(args.path + 'X_train.pt')
293 | # X_val = torch.load(args.path + 'X_val.pt')
294 | X_train = torch.from_numpy(X_train)
295 | print('X_train: ' + str(X_train.shape))
296 | # print('X_val: ' + str(X_val.shape))
297 |
298 | # data_objects = generate_batches(X_train, X_val, args)
299 | data_objects = generate_batches(X_train, None, args)
300 |
301 | return data_objects
302 |
303 |
304 | def get_finetune_data(args):
305 | X_train, y_train = torch.load(args.path + 'X_train.pt'), torch.load(args.path + 'y_train.pt')
306 | X_val, y_val = torch.load(args.path + 'X_val.pt'), torch.load(args.path + 'y_val.pt')
307 | X_test, y_test = torch.load(args.path + 'X_test.pt'), torch.load(args.path + 'y_test.pt')
308 | input_dim = (X_train.shape[2] - 1) // 2
309 |
310 | print('X_train: ' + str(X_train.shape) + ' y_train: ' + str(y_train.shape))
311 | print('X_val: ' + str(X_val.shape) + ' y_val: ' + str(y_val.shape))
312 | print('X_test: ' + str(X_test.shape) + ' y_test: ' + str(y_test.shape))
313 |
314 | if args.task == 'classification':
315 | train_data_combined = TensorDataset(X_train, y_train.long().squeeze())
316 | val_data_combined = TensorDataset(X_val, y_val.long().squeeze())
317 | test_data_combined = TensorDataset(X_test, y_test.long().squeeze())
318 | elif args.task == 'regression' or args.task == 'interpolation':
319 | train_data_combined = TensorDataset(X_train, y_train.float())
320 | val_data_combined = TensorDataset(X_val, y_val.float())
321 | test_data_combined = TensorDataset(X_test, y_test.float())
322 |
323 | train_dataloader = DataLoader(train_data_combined, batch_size=args.batch_size, shuffle=False)
324 | val_dataloader = DataLoader(val_data_combined, batch_size=args.batch_size, shuffle=False)
325 | test_dataloader = DataLoader(test_data_combined, batch_size=args.batch_size, shuffle=False)
326 |
327 | data_objects = {"train_dataloader": train_dataloader,
328 | "test_dataloader": test_dataloader,
329 | "val_dataloader": val_dataloader,
330 | "input_dim": input_dim}
331 |
332 | return data_objects
333 |
--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | # from .encoder_transformer import TSEncoder
2 | from .encoder import TSEncoder
3 |
--------------------------------------------------------------------------------
/src/models/attention.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from einops import rearrange, repeat
5 |
6 |
7 | ########################################################################################
8 |
9 | class Residual(nn.Module):
10 | def __init__(self, fn):
11 | super().__init__()
12 | self.fn = fn
13 |
14 | def forward(self, x, **kwargs):
15 | return self.fn(x, **kwargs) + x
16 |
17 |
18 | class PreNorm(nn.Module):
19 | def __init__(self, dim, fn):
20 | super().__init__()
21 | self.norm = nn.LayerNorm(dim)
22 | self.fn = fn
23 |
24 | def forward(self, x, **kwargs):
25 | return self.fn(self.norm(x), **kwargs)
26 |
27 |
28 | class FeedForward(nn.Module):
29 | def __init__(self, dim, hidden_dim, dropout=0.):
30 | super().__init__()
31 | self.net = nn.Sequential(
32 | nn.Linear(dim, hidden_dim),
33 | nn.ReLU(),
34 | nn.Dropout(dropout),
35 | nn.Linear(hidden_dim, dim),
36 | nn.Dropout(dropout)
37 | )
38 |
39 | def forward(self, x):
40 | return self.net(x)
41 |
42 |
43 | class Attention(nn.Module):
44 | def __init__(self, dim, heads=8, dropout=0.):
45 | super().__init__()
46 | self.heads = heads
47 | self.scale = dim ** -0.5
48 |
49 | self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
50 | self.to_out = nn.Sequential(
51 | nn.Linear(dim, dim),
52 | nn.Dropout(dropout)
53 | )
54 |
55 | def forward(self, x, mask=None):
56 | b, n, _, h = *x.shape, self.heads
57 | qkv = self.to_qkv(x).chunk(3, dim=-1)
58 | q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
59 |
60 | dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
61 |
62 | if mask is not None:
63 | mask = F.pad(mask.flatten(1), (1, 0), value=True)
64 | assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
65 | mask = mask[:, None, :] * mask[:, :, None]
66 | dots.masked_fill_(~mask, float('-inf'))
67 | del mask
68 |
69 | attn = dots.softmax(dim=-1)
70 |
71 | out = torch.einsum('bhij,bhjd->bhid', attn, v)
72 | out = rearrange(out, 'b h n d -> b n (h d)')
73 | out = self.to_out(out)
74 | return out
75 |
76 |
77 | class Transformer(nn.Module):
78 | def __init__(self, dim, depth, heads, mlp_dim, dropout):
79 | super().__init__()
80 | self.layers = nn.ModuleList([])
81 | for _ in range(depth):
82 | self.layers.append(nn.ModuleList([
83 | Residual(PreNorm(dim, Attention(dim, heads=heads, dropout=dropout))),
84 | Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)))
85 | ]))
86 |
87 | def forward(self, x, mask=None):
88 | for attn, ff in self.layers:
89 | x = attn(x, mask=mask)
90 | x = ff(x)
91 | return x
92 |
93 |
94 | class Seq_Transformer(nn.Module):
95 | def __init__(self, *, patch_size, dim, depth, heads, mlp_dim, channels=1, dropout=0.1):
96 | super().__init__()
97 | patch_dim = channels * patch_size
98 | self.patch_to_embedding = nn.Linear(patch_dim, dim)
99 | self.c_token = nn.Parameter(torch.randn(1, 1, dim))
100 | self.transformer = Transformer(dim, depth, heads, mlp_dim, dropout)
101 | self.to_c_token = nn.Identity()
102 |
103 |
104 | def forward(self, forward_seq):
105 | x = self.patch_to_embedding(forward_seq)
106 | b, n, _ = x.shape
107 | c_tokens = repeat(self.c_token, '() n d -> b n d', b=b)
108 | x = torch.cat((c_tokens, x), dim=1)
109 | x = self.transformer(x)
110 | c_t = self.to_c_token(x[:, 0])
111 | return c_t
112 |
--------------------------------------------------------------------------------
/src/models/backbone.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 |
3 | import math
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 | from models.encoder import generate_binomial_mask, generate_continuous_mask
8 |
9 | class SimConv4(torch.nn.Module):
10 | def __init__(self, input_dims, output_dims,hidden_dims=64, mask_mode='binomial'):
11 | super(SimConv4, self).__init__()
12 | self.input_fc = nn.Linear(input_dims, hidden_dims)
13 |
14 | self.feature_size = output_dims
15 | self.name = "conv4"
16 | self.mask_mode = mask_mode
17 |
18 | self.layer1 = torch.nn.Sequential(
19 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
20 | torch.nn.BatchNorm1d(hidden_dims),
21 | torch.nn.ReLU()
22 | )
23 |
24 | self.layer2 = torch.nn.Sequential(
25 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
26 | torch.nn.BatchNorm1d(hidden_dims),
27 | torch.nn.ReLU(),
28 | )
29 |
30 | self.layer3 = torch.nn.Sequential(
31 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
32 | torch.nn.BatchNorm1d(hidden_dims),
33 | torch.nn.ReLU(),
34 | )
35 |
36 | self.layer4 = torch.nn.Sequential(
37 | nn.Conv1d(hidden_dims, output_dims, 4, 2, 1, bias=False),
38 | torch.nn.BatchNorm1d(output_dims),
39 | torch.nn.ReLU(),
40 | torch.nn.AdaptiveAvgPool1d(1)
41 | )
42 |
43 | self.flatten = torch.nn.Flatten()
44 |
45 | for m in self.modules():
46 | if isinstance(m, torch.nn.Conv2d):
47 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
48 | m.weight.data.normal_(0, math.sqrt(2. / n))
49 | elif isinstance(m, torch.nn.BatchNorm2d):
50 | m.weight.data.fill_(1)
51 | m.bias.data.zero_()
52 | if isinstance(m, nn.Conv1d):
53 | nn.init.xavier_normal_(m.weight.data)
54 | # nn.init.xavier_normal_(m.bias.data)
55 | elif isinstance(m, nn.BatchNorm1d):
56 | nn.init.constant_(m.weight, 1)
57 | nn.init.constant_(m.bias, 0)
58 | elif isinstance(m, nn.Linear):
59 | nn.init.constant_(m.weight, 1)
60 | nn.init.constant_(m.bias, 0)
61 |
62 | def forward(self, x,mask=None):
63 | # x_ = x.view(x.shape[0], 1, -1) #(B, T, Ch)
64 | ## B x Ch x T
65 |
66 | nan_mask = ~x.isnan().any(axis=-1)
67 | x[~nan_mask] = 0
68 | x = self.input_fc(x) # B x T x Ch
69 |
70 | # generate & apply mask
71 | if mask is None:
72 | if self.training:
73 | mask = self.mask_mode
74 | else:
75 | mask = 'all_true'
76 |
77 | if mask == 'binomial':
78 | mask = generate_binomial_mask(x.size(0), x.size(1)).to(x.device)
79 | elif mask == 'continuous':
80 | mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device)
81 | elif mask == 'all_true':
82 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
83 | elif mask == 'all_false':
84 | mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool)
85 | elif mask == 'mask_last':
86 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
87 | mask[:, -1] = False
88 |
89 | mask &= nan_mask
90 | x[~mask] = 0
91 |
92 |
93 | x_t = torch.permute(x,[0,2,1])
94 | h = self.layer1(x_t) # (B, T, H)
95 | h = self.layer2(h) # (B, 8, D/2)->(B, 16, D/4)
96 | h = self.layer3(h) # (B, 16, D/4)->(B, 32, D/8)
97 | h = self.layer4(h) # (B, 32, D/8)->(B, 64, 1)
98 | h = self.flatten(h)
99 | h = F.normalize(h, dim=1)
100 | h = torch.unsqueeze(h,1)
101 | return h
102 |
--------------------------------------------------------------------------------
/src/models/dilated_conv.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 | import numpy as np
5 |
6 |
7 | class RelatedConv(nn.Module):
8 | def __init__(self, out_channel, kernel_size, dilation = 1):
9 | super(RelatedConv, self).__init__()
10 | assert isinstance(kernel_size, tuple) or isinstance(kernel_size, list)
11 | in_channels = 1
12 | receptive_field = (kernel_size[-1] - 1) * dilation + 1
13 | padding = receptive_field // 2
14 | self.conv = nn.Conv2d(in_channels, out_channel, kernel_size,
15 | padding = [0, padding],
16 | dilation = dilation
17 | )
18 | self.remove = 1 if receptive_field % 2 == 0 else 0
19 |
20 | def forward(self, x):
21 | x = x.unsqueeze(1) # B * Ch * T => B * 1 * Ch * T
22 | x = self.conv(x) # B * 1 * Ch * T => B * out * 1 * T
23 | if self.remove > 0:
24 | x = x[..., :-self.remove]
25 | return x.squeeze(2) # B * out * 1 * T => B * out * T
26 |
27 |
28 | class RelatedEncoder(nn.Module):
29 | def __init__(self, out_channels, channel, kernel_size: int):
30 | super(RelatedEncoder, self).__init__()
31 |
32 | if isinstance(kernel_size, int):
33 | kernel_size = [kernel_size] * len(out_channels)
34 |
35 | out_channels.insert(0, channel)
36 | self.net = nn.Sequential(*[
37 | RelatedConv(out_channels[i], kernel_size = (out_channels[i - 1], kernel_size[i - 1]))
38 | for i in range(1, len(out_channels))
39 | ])
40 |
41 | def forward(self, x):
42 | return self.net(x)
43 |
44 |
45 | class SamePadConv(nn.Module):
46 | def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1):
47 | super().__init__()
48 | self.receptive_field = (kernel_size - 1) * dilation + 1
49 | padding = self.receptive_field // 2
50 | self.conv = nn.Conv1d(
51 | in_channels, out_channels, kernel_size,
52 | padding=padding,
53 | dilation=dilation,
54 | groups=groups
55 | )
56 | self.remove = 1 if self.receptive_field % 2 == 0 else 0
57 |
58 | def forward(self, x):
59 | out = self.conv(x)
60 | if self.remove > 0:
61 | out = out[:, :, : -self.remove]
62 | return out
63 |
64 | class ConvBlock(nn.Module):
65 | def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False):
66 | super().__init__()
67 | self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation)
68 | self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation)
69 | self.projector = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None
70 |
71 | def forward(self, x):
72 | residual = x if self.projector is None else self.projector(x)
73 | x = F.gelu(x)
74 | x = self.conv1(x)
75 | x = F.gelu(x)
76 | x = self.conv2(x)
77 | return x + residual
78 |
79 | class DilatedConvEncoder(nn.Module):
80 | def __init__(self, in_channels, channels, kernel_size):
81 | super().__init__()
82 | self.net = nn.Sequential(*[
83 | ConvBlock(
84 | channels[i-1] if i > 0 else in_channels,
85 | channels[i],
86 | kernel_size=kernel_size,
87 | dilation=2**i,
88 | final=(i == len(channels)-1)
89 | )
90 | for i in range(len(channels))
91 | ])
92 |
93 | def forward(self, x):
94 | return self.net(x)
--------------------------------------------------------------------------------
/src/models/encoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import copy
3 | from torch import nn
4 | import numpy as np
5 | from .dilated_conv import DilatedConvEncoder
6 |
7 |
8 | def generate_continuous_mask(B, T, n=5, l=0.1):
9 | res = torch.full((B, T), True, dtype=torch.bool)
10 | if isinstance(n, float):
11 | n = int(n * T)
12 | n = max(min(n, T // 2), 1)
13 |
14 | if isinstance(l, float):
15 | l = int(l * T)
16 | l = max(l, 1)
17 |
18 | for i in range(B):
19 | for _ in range(n):
20 | t = np.random.randint(T - l + 1)
21 | res[i, t:t + l] = False
22 | return res
23 |
24 |
25 | class BertInterpHead(nn.Module):
26 | def __init__(self, input_dim, hidden_dim):
27 | super().__init__()
28 | self.dense = nn.Linear(hidden_dim, 4 * hidden_dim)
29 | self.activation = nn.ReLU()
30 | self.project = nn.Linear(4 * hidden_dim, input_dim)
31 |
32 | def forward(self, first_token_tensor):
33 | # We "pool" the model by simply taking the hidden state corresponding
34 | # to the first token.
35 | pooled_output = self.dense(first_token_tensor)
36 | pooled_output = self.activation(pooled_output)
37 | pooled_output = self.project(pooled_output)
38 | return pooled_output
39 |
40 |
41 | def generate_binomial_mask(B, T, p=0.5):
42 | return torch.from_numpy(np.random.binomial(1, p, size=(B, T))).to(torch.bool)
43 |
44 |
45 | class TSEncoder(nn.Module):
46 | def __init__(self, input_dims, output_dims, hidden_dims=64, depth=10, mask_mode='binomial'):
47 | super().__init__()
48 | self.input_dims = input_dims
49 | self.output_dims = output_dims
50 | self.hidden_dims = hidden_dims
51 | self.mask_mode = mask_mode
52 | self.input_fc = nn.Linear(input_dims, hidden_dims)
53 |
54 | self.feature_extractor = DilatedConvEncoder(
55 | # input_dims,
56 | hidden_dims,
57 | [hidden_dims] * depth + [output_dims],
58 | kernel_size=3
59 | )
60 | self.repr_dropout = nn.Dropout(p=0.1)
61 | self.interphead = BertInterpHead(input_dims, output_dims)
62 |
63 | def forward(self, x, mask=None): # x: B x T x input_dims
64 | if isinstance(x, dict):
65 | input_all = copy.deepcopy(x)
66 | m = x['mask']
67 | x = x['data'] if 'data' in x.keys() else x['x']
68 | else:
69 | input_all = copy.deepcopy(x)
70 | m = x[..., -(x.shape[-1] // 2):]
71 | x = x[..., :-(x.shape[-1] // 2)]
72 |
73 | t = x[..., -1]
74 | x = x[..., :-1]
75 |
76 | if mask == 'mask_last':
77 | nan_mask = ~x.isnan().any(axis=-1)
78 |
79 | x[torch.isnan(x)], m[torch.isnan(m)] = 0, 0
80 |
81 | # whole series without missing
82 | if self.training:
83 | x_whole = self.input_fc(x * input_all['mask_origin'])
84 | x_whole = x_whole.transpose(1, 2)
85 | x_whole = self.feature_extractor(x_whole) # B x Ch x T
86 | x_whole = x_whole.transpose(1, 2) # B x T x Co
87 | x_whole = self.repr_dropout(x_whole)
88 |
89 | # recon mask part
90 | if self.training:
91 | x_interp = self.input_fc(x * input_all['mask'])
92 | x_interp = x_interp.transpose(1, 2)
93 | x_interp = self.feature_extractor(x_interp) # B x Ch x T
94 | x_interp = x_interp.transpose(1, 2) # B x T x Co
95 | x_interp = self.repr_dropout(x_interp)
96 |
97 | if mask == 'mask_last':
98 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
99 | mask[:, -1] = False
100 | mask &= nan_mask
101 | x[~mask] = 0
102 |
103 | x = self.input_fc(x * m)
104 | x = x.transpose(1, 2)
105 | x = self.feature_extractor(x) # B x Ch x T
106 | x = x.transpose(1, 2) # B x T x Co
107 | x = self.repr_dropout(x)
108 |
109 | if self.training:
110 | return x_whole, self.interphead(x_interp)
111 | else:
112 | return x
113 |
--------------------------------------------------------------------------------
/src/models/losses.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 |
5 |
6 | def hierarchical_contrastive_loss(z1, z2, alpha=0.8, temporal_unit=0, temp=1.0):
7 | loss = torch.tensor(0., device=z1.device)
8 | d = 0
9 |
10 | while z1.size(1) > 1:
11 |
12 | if alpha != 0:
13 | if d == 0:
14 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
15 | else:
16 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
17 | if d >= temporal_unit:
18 | if 1 - alpha != 0:
19 | if d == 0:
20 | loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp)
21 | else:
22 | loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp)
23 | d += 1
24 |
25 | z1 = F.max_pool1d(z1.transpose(1, 2), kernel_size=2).transpose(1, 2)
26 | z2 = F.max_pool1d(z2.transpose(1, 2), kernel_size=2).transpose(1, 2)
27 |
28 | if z1.size(1) == 1:
29 | if alpha != 0:
30 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
31 | d += 1
32 | return loss / d
33 |
34 |
35 | def temporal_contrastive_loss_mixup(z1, z2, temp=1.0):
36 | B, T = z1.size(0), z1.size(1)
37 | alpha = 0.2
38 | beta = 0.2
39 |
40 | if T == 1:
41 | return z1.new_tensor(0.)
42 |
43 | uni_z1 = alpha * z1 + (1 - alpha) * z1[:, torch.randperm(z1.shape[1]), :].view(z1.size())
44 | uni_z2 = beta * z2 + (1 - beta) * z2[:, torch.randperm(z1.shape[1]), :].view(z2.size())
45 |
46 | z = torch.cat([z1, z2, uni_z1, uni_z2], dim=1)
47 |
48 | sim = torch.matmul(z[:, : 2 * T, :], z.transpose(1, 2)) / temp # B x 2T x 2T
49 | logits = torch.tril(sim, diagonal=-1)[:, :, :-1]
50 | logits += torch.triu(sim, diagonal=1)[:, :, 1:]
51 |
52 | if T > 1500:
53 | z, sim = z.cpu(), sim.cpu()
54 | torch.cuda.empty_cache()
55 |
56 | logits = -F.log_softmax(logits, dim=-1)
57 |
58 | logits = logits[:, :2 * T, :(2 * T - 1)]
59 |
60 | t = torch.arange(T, device=z1.device)
61 | loss = (logits[:, t, T + t - 1].mean() + logits[:, T + t, t].mean()) / 2
62 | return loss
63 |
64 |
65 | def instance_contrastive_loss_mixup(z1, z2, temp=1.0):
66 | B, T = z1.size(0), z1.size(1)
67 | alpha = 0.2
68 | beta = 0.2
69 |
70 | if B == 1:
71 | return z1.new_tensor(0.)
72 |
73 | uni_z1 = alpha * z1 + (1 - alpha) * z1[torch.randperm(z1.shape[0]), :, :].view(z1.size())
74 | uni_z2 = beta * z2 + (1 - beta) * z2[torch.randperm(z2.shape[0]), :, :].view(z2.size())
75 |
76 | z = torch.cat([z1, z2, uni_z1, uni_z2], dim=0)
77 | z = z.transpose(0, 1) # T x 2B x C
78 | sim = torch.matmul(z[:, : 2 * B, :], z.transpose(1, 2)) / temp # T x 2B x 2B
79 |
80 | logits = torch.tril(sim, diagonal=-1)[:, :, :-1] # T x 2B x (2B-1)
81 | logits += torch.triu(sim, diagonal=1)[:, :, 1:]
82 | logits = -F.log_softmax(logits, dim=-1)
83 |
84 | logits = logits[:, :2 * B, :(2 * B - 1)]
85 |
86 | i = torch.arange(B, device=z1.device)
87 | loss = (logits[:, i, B + i - 1].mean() + logits[:, B + i, i].mean()) / 2
88 | return loss
89 |
--------------------------------------------------------------------------------
/src/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import eval_classification
2 | from .forecasting import eval_forecasting
3 | from .anomaly_detection import eval_anomaly_detection, eval_anomaly_detection_coldstart
4 | from .imputation import eval_imputation
5 |
--------------------------------------------------------------------------------
/src/tasks/_eval_protocols.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.linear_model import Ridge
3 | from sklearn.svm import SVC
4 | from sklearn.linear_model import LogisticRegression
5 | from sklearn.neighbors import KNeighborsClassifier
6 | from sklearn.preprocessing import StandardScaler
7 | from sklearn.pipeline import make_pipeline
8 | from sklearn.model_selection import GridSearchCV, train_test_split
9 |
10 | def fit_svm(features, y, MAX_SAMPLES=10000):
11 | nb_classes = np.unique(y, return_counts=True)[1].shape[0]
12 | train_size = features.shape[0]
13 |
14 | svm = SVC(C=np.inf, gamma='scale')
15 | if train_size // nb_classes < 5 or train_size < 50:
16 | return svm.fit(features, y)
17 | else:
18 | grid_search = GridSearchCV(
19 | svm, {
20 | 'C': [
21 | 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000,
22 | np.inf
23 | ],
24 | 'kernel': ['rbf'],
25 | 'degree': [3],
26 | 'gamma': ['scale'],
27 | 'coef0': [0],
28 | 'shrinking': [True],
29 | 'probability': [False],
30 | 'tol': [0.001],
31 | 'cache_size': [200],
32 | 'class_weight': [None],
33 | 'verbose': [False],
34 | 'max_iter': [10000000],
35 | 'decision_function_shape': ['ovr'],
36 | 'random_state': [None]
37 | },
38 | cv=5, n_jobs=5
39 | )
40 | # If the training set is too large, subsample MAX_SAMPLES examples
41 | if train_size > MAX_SAMPLES:
42 | split = train_test_split(
43 | features, y,
44 | train_size=MAX_SAMPLES, random_state=0, stratify=y
45 | )
46 | features = split[0]
47 | y = split[2]
48 |
49 | grid_search.fit(features, y)
50 | return grid_search.best_estimator_
51 |
52 | def fit_lr(features, y, MAX_SAMPLES=100000):
53 | # If the training set is too large, subsample MAX_SAMPLES examples
54 | if features.shape[0] > MAX_SAMPLES:
55 | split = train_test_split(
56 | features, y,
57 | train_size=MAX_SAMPLES, random_state=0, stratify=y
58 | )
59 | features = split[0]
60 | y = split[2]
61 |
62 | pipe = make_pipeline(
63 | StandardScaler(),
64 | LogisticRegression(
65 | random_state=0,
66 | max_iter=1000000,
67 | multi_class='ovr'
68 | )
69 | )
70 | pipe.fit(features, y)
71 | return pipe
72 |
73 | def fit_knn(features, y):
74 | pipe = make_pipeline(
75 | StandardScaler(),
76 | KNeighborsClassifier(n_neighbors=1)
77 | )
78 | pipe.fit(features, y)
79 | return pipe
80 |
81 | def fit_ridge(train_features, train_y, valid_features, valid_y, MAX_SAMPLES=100000):
82 | # If the training set is too large, subsample MAX_SAMPLES examples
83 | if train_features.shape[0] > MAX_SAMPLES:
84 | split = train_test_split(
85 | train_features, train_y,
86 | train_size=MAX_SAMPLES, random_state=0
87 | )
88 | train_features = split[0]
89 | train_y = split[2]
90 | if valid_features.shape[0] > MAX_SAMPLES:
91 | split = train_test_split(
92 | valid_features, valid_y,
93 | train_size=MAX_SAMPLES, random_state=0
94 | )
95 | valid_features = split[0]
96 | valid_y = split[2]
97 |
98 | alphas = [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
99 | valid_results = []
100 | for alpha in alphas:
101 | lr = Ridge(alpha=alpha).fit(train_features, train_y)
102 | valid_pred = lr.predict(valid_features)
103 | score = np.sqrt(((valid_pred - valid_y) ** 2).mean()) + np.abs(valid_pred - valid_y).mean()
104 | valid_results.append(score)
105 | best_alpha = alphas[np.argmin(valid_results)]
106 |
107 | lr = Ridge(alpha=best_alpha)
108 | lr.fit(train_features, train_y)
109 | return lr
110 |
--------------------------------------------------------------------------------
/src/tasks/anomaly_detection.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import time
3 | from sklearn.metrics import f1_score, precision_score, recall_score
4 | import bottleneck as bn
5 |
6 |
7 | # consider delay threshold and missing segments
8 | def get_range_proba(predict, label, delay=7):
9 | splits = np.where(label[1:] != label[:-1])[0] + 1
10 | is_anomaly = label[0] == 1
11 | new_predict = np.array(predict)
12 | pos = 0
13 |
14 | for sp in splits:
15 | if is_anomaly:
16 | if 1 in predict[pos:min(pos + delay + 1, sp)]:
17 | new_predict[pos: sp] = 1
18 | else:
19 | new_predict[pos: sp] = 0
20 | is_anomaly = not is_anomaly
21 | pos = sp
22 | sp = len(label)
23 |
24 | if is_anomaly: # anomaly in the end
25 | if 1 in predict[pos: min(pos + delay + 1, sp)]:
26 | new_predict[pos: sp] = 1
27 | else:
28 | new_predict[pos: sp] = 0
29 |
30 | return new_predict
31 |
32 |
33 | # set missing = 0
34 | def reconstruct_label(timestamp, label):
35 | timestamp = np.asarray(timestamp, np.int64)
36 | index = np.argsort(timestamp)
37 |
38 | timestamp_sorted = np.asarray(timestamp[index])
39 | interval = np.min(np.diff(timestamp_sorted))
40 |
41 | label = np.asarray(label, np.int64)
42 | label = np.asarray(label[index])
43 |
44 | idx = (timestamp_sorted - timestamp_sorted[0]) // interval
45 |
46 | new_label = np.zeros(shape=((timestamp_sorted[-1] - timestamp_sorted[0]) // interval + 1,), dtype=np.int)
47 | new_label[idx] = label
48 |
49 | return new_label
50 |
51 |
52 | def eval_ad_result(test_pred_list, test_labels_list, test_timestamps_list, delay):
53 | labels = []
54 | pred = []
55 | for test_pred, test_labels, test_timestamps in zip(test_pred_list, test_labels_list, test_timestamps_list):
56 | assert test_pred.shape == test_labels.shape == test_timestamps.shape
57 | test_labels = reconstruct_label(test_timestamps, test_labels)
58 | test_pred = reconstruct_label(test_timestamps, test_pred)
59 | test_pred = get_range_proba(test_pred, test_labels, delay)
60 | labels.append(test_labels)
61 | pred.append(test_pred)
62 | labels = np.concatenate(labels)
63 | pred = np.concatenate(pred)
64 | return {
65 | 'f1': f1_score(labels, pred),
66 | 'precision': precision_score(labels, pred),
67 | 'recall': recall_score(labels, pred)
68 | }
69 |
70 |
71 | def np_shift(arr, num, fill_value=np.nan):
72 | result = np.empty_like(arr)
73 | if num > 0:
74 | result[:num] = fill_value
75 | result[num:] = arr[:-num]
76 | elif num < 0:
77 | result[num:] = fill_value
78 | result[:num] = arr[-num:]
79 | else:
80 | result[:] = arr
81 | return result
82 |
83 |
84 | def eval_anomaly_detection(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data,
85 | all_test_labels, all_test_timestamps, delay):
86 | t = time.time()
87 |
88 | train_mask, test_mask = all_train_data['mask'], all_test_data['mask']
89 | all_train_data, all_test_data = all_train_data['x'], all_test_data['x']
90 | ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps]
91 | ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts))
92 |
93 | all_train_repr = {}
94 | all_test_repr = {}
95 | all_train_repr_wom = {}
96 | all_test_repr_wom = {}
97 | for i, k in enumerate(all_train_data):
98 | train_data = all_train_data[k]
99 | test_data = all_test_data[k]
100 |
101 | train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
102 | test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
103 | train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1)
104 | test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1)
105 | data = {'x': np.concatenate([train_data, test_data], axis=1),
106 | 'mask': np.concatenate(
107 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
108 |
109 | full_repr = model.encode(
110 | # np.concatenate([train_data, test_data]).reshape(1, -1, 1),
111 | data,
112 | mask='mask_last',
113 | casual=True,
114 | sliding_length=1,
115 | sliding_padding=200,
116 | batch_size=256
117 | ).squeeze()
118 | all_train_repr[k] = full_repr[:train_data.shape[1]]
119 | all_test_repr[k] = full_repr[train_data.shape[1]:]
120 |
121 | data = {'x': np.concatenate([train_data, test_data], axis=1),
122 | 'mask': np.concatenate(
123 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
124 |
125 | full_repr_wom = model.encode(
126 | # np.concatenate([train_data, test_data]).reshape(1, -1, 1),
127 | data,
128 | casual=True,
129 | sliding_length=1,
130 | sliding_padding=200,
131 | batch_size=256
132 | ).squeeze()
133 | all_train_repr_wom[k] = full_repr_wom[:train_data.shape[1]]
134 | all_test_repr_wom[k] = full_repr_wom[train_data.shape[1]:]
135 |
136 | res_log = []
137 | labels_log = []
138 | timestamps_log = []
139 | for k in all_train_data:
140 | train_data = all_train_data[k]
141 | train_labels = all_train_labels[k]
142 | train_timestamps = all_train_timestamps[k]
143 |
144 | test_data = all_test_data[k]
145 | test_labels = all_test_labels[k]
146 | test_timestamps = all_test_timestamps[k]
147 |
148 | train_err = np.abs(all_train_repr_wom[k] - all_train_repr[k]).sum(axis=1)
149 | test_err = np.abs(all_test_repr_wom[k] - all_test_repr[k]).sum(axis=1)
150 |
151 | ma = np_shift(bn.move_mean(np.concatenate([train_err, test_err]), 21), 1)
152 | train_err_adj = (train_err - ma[:len(train_err)]) / ma[:len(train_err)]
153 | test_err_adj = (test_err - ma[len(train_err):]) / ma[len(train_err):]
154 | train_err_adj = train_err_adj[22:]
155 |
156 | thr = np.mean(train_err_adj) + 4 * np.std(train_err_adj)
157 | test_res = (test_err_adj > thr) * 1
158 |
159 | for i in range(len(test_res)):
160 | if i >= delay and test_res[i - delay:i].sum() >= 1:
161 | test_res[i] = 0
162 |
163 | res_log.append(test_res)
164 | labels_log.append(test_labels)
165 | timestamps_log.append(test_timestamps)
166 | t = time.time() - t
167 |
168 | eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay)
169 | eval_res['infer_time'] = t
170 | return res_log, eval_res
171 |
172 |
173 | def eval_anomaly_detection_coldstart(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data,
174 | all_test_labels, all_test_timestamps, delay):
175 | t = time.time()
176 |
177 | train_mask, test_mask = all_train_data['mask'], all_test_data['mask']
178 | all_train_data, all_test_data = all_train_data['x'], all_test_data['x']
179 | ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps]
180 | ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts))
181 |
182 | all_data = {}
183 | all_repr = {}
184 | all_repr_wom = {}
185 | for i, k in enumerate(all_train_data):
186 | train_data = all_train_data[k]
187 | test_data = all_test_data[k]
188 |
189 | train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
190 | test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
191 | train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1)
192 | test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1)
193 | data = {'x': np.concatenate([train_data, test_data], axis=1),
194 | 'mask': np.concatenate(
195 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
196 |
197 | all_data[k] = np.concatenate([all_train_data[k], all_test_data[k]])
198 | all_repr[k] = model.encode(
199 | # all_data[k].reshape(1, -1, 1),
200 | data,
201 | mask='mask_last',
202 | casual=True,
203 | sliding_length=1,
204 | sliding_padding=200,
205 | batch_size=256
206 | ).squeeze()
207 | all_repr_wom[k] = model.encode(
208 | # all_data[k].reshape(1, -1, 1),
209 | data,
210 | casual=True,
211 | sliding_length=1,
212 | sliding_padding=200,
213 | batch_size=256
214 | ).squeeze()
215 |
216 | res_log = []
217 | labels_log = []
218 | timestamps_log = []
219 | for k in all_data:
220 | data = all_data[k]
221 | labels = np.concatenate([all_train_labels[k], all_test_labels[k]])
222 | timestamps = np.concatenate([all_train_timestamps[k], all_test_timestamps[k]])
223 |
224 | err = np.abs(all_repr_wom[k] - all_repr[k]).sum(axis=1)
225 | ma = np_shift(bn.move_mean(err, 21), 1)
226 | err_adj = (err - ma) / ma
227 |
228 | MIN_WINDOW = len(data) // 10
229 | thr = bn.move_mean(err_adj, len(err_adj), MIN_WINDOW) + 4 * bn.move_std(err_adj, len(err_adj), MIN_WINDOW)
230 | res = (err_adj > thr) * 1
231 |
232 | for i in range(len(res)):
233 | if i >= delay and res[i - delay:i].sum() >= 1:
234 | res[i] = 0
235 |
236 | res_log.append(res[MIN_WINDOW:])
237 | labels_log.append(labels[MIN_WINDOW:])
238 | timestamps_log.append(timestamps[MIN_WINDOW:])
239 | t = time.time() - t
240 |
241 | eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay)
242 | eval_res['infer_time'] = t
243 | return res_log, eval_res
244 |
245 |
--------------------------------------------------------------------------------
/src/tasks/classification.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from . import _eval_protocols as eval_protocols
3 | from sklearn.preprocessing import label_binarize
4 | from sklearn.metrics import average_precision_score, roc_auc_score
5 |
6 |
7 | def eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='linear'):
8 | assert train_labels.ndim == 1 or train_labels.ndim == 2
9 | train_repr = model.encode(train_data, encoding_window='full_series' if train_labels.ndim == 1 else None)
10 | test_repr = model.encode(test_data, encoding_window='full_series' if train_labels.ndim == 1 else None)
11 |
12 | if eval_protocol == 'linear':
13 | fit_clf = eval_protocols.fit_lr
14 | elif eval_protocol == 'svm':
15 | fit_clf = eval_protocols.fit_svm
16 | elif eval_protocol == 'knn':
17 | fit_clf = eval_protocols.fit_knn
18 | else:
19 | assert False, 'unknown evaluation protocol'
20 |
21 | def merge_dim01(array):
22 | return array.reshape(array.shape[0]*array.shape[1], *array.shape[2:])
23 |
24 | if train_labels.ndim == 2:
25 | train_repr = merge_dim01(train_repr)
26 | train_labels = merge_dim01(train_labels)
27 | test_repr = merge_dim01(test_repr)
28 | test_labels = merge_dim01(test_labels)
29 |
30 | clf = fit_clf(train_repr, train_labels)
31 |
32 | acc = clf.score(test_repr, test_labels)
33 | if eval_protocol == 'linear':
34 | y_score = clf.predict_proba(test_repr)
35 | else:
36 | y_score = clf.decision_function(test_repr)
37 | test_labels_onehot = label_binarize(test_labels, classes=np.arange(train_labels.max()+1))
38 | auprc = average_precision_score(test_labels_onehot, y_score)
39 |
40 | return y_score, { 'acc': acc, 'auprc': auprc}
41 |
--------------------------------------------------------------------------------
/src/tasks/forecasting.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import time
3 | from . import _eval_protocols as eval_protocols
4 |
5 | def generate_pred_samples(features, data, pred_len, drop=0):
6 | n = data.shape[1]
7 | features = features[:, :-pred_len]
8 | labels = np.stack([ data[:, i:1+n+i-pred_len] for i in range(pred_len)], axis=2)[:, 1:]
9 | features = features[:, drop:]
10 | labels = labels[:, drop:]
11 | return features.reshape(-1, features.shape[-1]), \
12 | labels.reshape(-1, labels.shape[2]*labels.shape[3])
13 |
14 | def cal_metrics(pred, target):
15 | return {
16 | 'MSE': ((pred - target) ** 2).mean(),
17 | 'MAE': np.abs(pred - target).mean()
18 | }
19 |
20 |
21 | def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols):
22 | padding = 200
23 |
24 | t = time.time()
25 | all_repr = model.encode(
26 | data,
27 | casual=True,
28 | sliding_length=1,
29 | sliding_padding=padding,
30 | batch_size=256
31 | )
32 | ts2vec_infer_time = time.time() - t
33 |
34 | train_repr = all_repr[:, train_slice]
35 | valid_repr = all_repr[:, valid_slice]
36 | test_repr = all_repr[:, test_slice]
37 |
38 | train_data = data['x'][:, train_slice, n_covariate_cols:][..., :-1]
39 | valid_data = data['x'][:, valid_slice, n_covariate_cols:][..., :-1]
40 | test_data = data['x'][:, test_slice, n_covariate_cols:][..., :-1]
41 |
42 | ours_result = {}
43 | lr_train_time = {}
44 | lr_infer_time = {}
45 | out_log = {}
46 | for pred_len in pred_lens:
47 | train_features, train_labels = generate_pred_samples(train_repr, train_data, pred_len, drop=padding)
48 | valid_features, valid_labels = generate_pred_samples(valid_repr, valid_data, pred_len)
49 | test_features, test_labels = generate_pred_samples(test_repr, test_data, pred_len)
50 |
51 | t = time.time()
52 | lr = eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels)
53 | lr_train_time[pred_len] = time.time() - t
54 |
55 | t = time.time()
56 | test_pred = lr.predict(test_features)
57 | lr_infer_time[pred_len] = time.time() - t
58 |
59 | ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2]
60 | test_pred = test_pred.reshape(ori_shape)
61 | test_labels = test_labels.reshape(ori_shape)
62 |
63 | # if test_data.shape[0] > 1:
64 | # test_pred_inv = scaler.inverse_transform(test_pred.swapaxes(0, 3)).swapaxes(0, 3)
65 | # test_labels_inv = scaler.inverse_transform(test_labels.swapaxes(0, 3)).swapaxes(0, 3)
66 | # else:
67 | # test_pred_inv = scaler.inverse_transform(test_pred)
68 | # test_labels_inv = scaler.inverse_transform(test_labels)
69 |
70 | out_log[pred_len] = {
71 | 'norm': test_pred,
72 | # 'raw': test_pred_inv,
73 | 'norm_gt': test_labels,
74 | # 'raw_gt': test_labels_inv
75 | }
76 | ours_result[pred_len] = {
77 | 'norm': cal_metrics(test_pred, test_labels),
78 | # 'raw': cal_metrics(test_pred_inv, test_labels_inv)
79 | }
80 |
81 | # train_repr, valid_repr, test_repr = train_repr.cpu(), valid_repr.cpu(), test_repr.cpu()
82 | # del train_repr, valid_repr, test_repr
83 |
84 | eval_res = {
85 | 'ours': ours_result,
86 | 'ts2vec_infer_time': ts2vec_infer_time,
87 | 'lr_train_time': lr_train_time,
88 | 'lr_infer_time': lr_infer_time
89 | }
90 | return out_log, eval_res
91 |
--------------------------------------------------------------------------------
/src/tasks/imputation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch.utils.data import TensorDataset, DataLoader
4 |
5 |
6 | def metrics(true, pred, mask):
7 | mask = 1. - mask
8 | mse = np.power((true - pred) * mask, 2).sum() / mask.sum()
9 | mae = np.abs((true - pred) * mask).sum() / mask.sum()
10 | rmse = np.sqrt(mse)
11 | return {'mse': mse, 'mae': mae, 'rmse': rmse}
12 |
13 |
14 | def split(data, test_slice, seq_len):
15 | x, m = data['x'][:, test_slice], data['mask'][:, test_slice]
16 | value, mask = np.zeros((x.shape[1] // seq_len, seq_len, x.shape[2])), np.zeros((x.shape[1] // seq_len, seq_len, m.shape[2]))
17 | for i in range(x.shape[1] // seq_len):
18 | if (i+1) * seq_len > x.shape[1]:
19 | break
20 | value[i] = x[0, i*seq_len:(i+1)*seq_len, :]
21 | mask[i] = m[0, i*seq_len:(i+1)*seq_len, :]
22 | return torch.from_numpy(value), torch.from_numpy(mask)
23 |
24 |
25 | def eval_imputation(model, data, test_slice, missing_rate, n_covariate_cols, device):
26 | value, mask = split(data, test_slice, 96)
27 | test_loader = DataLoader(TensorDataset(value, mask), batch_size=128, shuffle=False, num_workers=8)
28 | with torch.no_grad():
29 | true, pred, m = [], [], []
30 | for batch in test_loader:
31 | x = batch[0].float().to(device)
32 |
33 | mask = torch.randn_like(x[..., :-1])
34 | mask[mask > missing_rate] = 1.
35 | mask[mask <= missing_rate] = 0.
36 |
37 | # val = torch.cat([x[..., :-1].masked_fill(mask == 0., 0.), x[..., -1:]], dim=-1)
38 | # out = model._net.imputation(x[..., :-1].masked_fill(mask == 0., 0.), mask)
39 | out = model.net(x[..., :-1].masked_fill(mask == 0., 0.), mask, imputation=True)
40 |
41 | true.append(x[..., :-1].cpu().detach().numpy())
42 | pred.append(out.cpu().detach().numpy())
43 | # true.append(out[0].cpu().detach().numpy())
44 | # pred.append(out[1].cpu().detach().numpy())
45 | m.append(mask.cpu().detach().numpy())
46 |
47 | true = np.concatenate(true, axis=0)[..., n_covariate_cols:]
48 | pred = np.concatenate(pred, axis=0)[..., n_covariate_cols:]
49 | # pred = np.concatenate(pred, axis=0)
50 | m = np.concatenate(m, axis=0)
51 | return None, metrics(true, pred, m)
52 |
--------------------------------------------------------------------------------
/src/timesurl.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.utils.data import TensorDataset, DataLoader
5 | import numpy as np
6 | from models import TSEncoder
7 | from models.losses import hierarchical_contrastive_loss
8 | from utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan
9 | from utils import inter_cubic_sp_torch
10 | from utils import convert_coeff
11 | from lib import get_unlabeled_pretrain_data
12 |
13 |
14 | def tp_noneffect(func, x, **kwargs):
15 | tp = x[..., -1:]
16 | x = func(x[..., :-1], **kwargs)
17 | return torch.cat([x, tp], dim=-1)
18 |
19 |
20 | def freq_mix(x, rate=0.5, dim=1):
21 | x_f = torch.fft.fft(x, dim=dim)
22 |
23 | m = torch.cuda.FloatTensor(x_f.shape).uniform_() < rate
24 | amp = abs(x_f)
25 | _, index = amp.sort(dim=dim, descending=True)
26 | dominant_mask = index > 2
27 | m = torch.bitwise_and(m, dominant_mask)
28 | freal = x_f.real.masked_fill(m, 0)
29 | fimag = x_f.imag.masked_fill(m, 0)
30 |
31 | b_idx = np.arange(x.shape[0])
32 | np.random.shuffle(b_idx)
33 | x2 = x[b_idx]
34 | x2_f = torch.fft.fft(x2, dim=dim)
35 |
36 | m = torch.bitwise_not(m)
37 | freal2 = x2_f.real.masked_fill(m, 0)
38 | fimag2 = x2_f.imag.masked_fill(m, 0)
39 |
40 | freal += freal2
41 | fimag += fimag2
42 |
43 | x_f = torch.complex(freal, fimag)
44 |
45 | x = torch.abs(torch.fft.ifft(x_f, dim=dim))
46 | return x
47 |
48 |
49 | def freq_dropout(x, dropout_rate=0.5):
50 | x_aug = x.clone()
51 | x_aug_f = torch.fft.fft(x_aug)
52 | m = torch.cuda.FloatTensor(x_aug_f.shape).uniform_() < dropout_rate
53 | amp = torch.abs(x_aug_f)
54 | _, index = amp.sort(dim=1, descending=True)
55 | dominant_mask = index > 5
56 | m = torch.bitwise_and(m, dominant_mask)
57 | freal = x_aug_f.real.masked_fill(m, 0)
58 | fimag = x_aug_f.imag.masked_fill(m, 0)
59 | x_aug_f = torch.complex(freal, fimag)
60 | x_aug = torch.abs(torch.fft.ifft(x_aug_f, dim=1))
61 | return x_aug
62 |
63 |
64 | class TimesURL:
65 | '''The TimesURL model'''
66 |
67 | def __init__(
68 | self,
69 | input_dims,
70 | output_dims=320,
71 | hidden_dims=64,
72 | depth=10,
73 | device='cuda',
74 | lr=0.001,
75 | batch_size=16,
76 | sgd=False,
77 | max_train_length=None,
78 | temporal_unit=0,
79 | after_iter_callback=None,
80 | after_epoch_callback=None,
81 | args=None
82 | ):
83 | ''' Initialize a TimesURL model.
84 |
85 | Args:
86 | input_dims (int): The input dimension. For a univariate time series, this should be set to 1.
87 | output_dims (int): The representation dimension.
88 | hidden_dims (int): The hidden dimension of the encoder.
89 | depth (int): The number of hidden residual blocks in the encoder.
90 | device (int): The gpu used for training and inference.
91 | lr (int): The learning rate.
92 | batch_size (int): The batch size.
93 | max_train_length (Union[int, NoneType]): The maximum allowed sequence length for training. For sequence with a length greater than , it would be cropped into some sequences, each of which has a length less than .
94 | temporal_unit (int): The minimum unit to perform temporal contrast. When training on a very long sequence, this param helps to reduce the cost of time and memory.
95 | after_iter_callback (Union[Callable, NoneType]): A callback function that would be called after each iteration.
96 | after_epoch_callback (Union[Callable, NoneType]): A callback function that would be called after each epoch.
97 | '''
98 |
99 | super().__init__()
100 | self.device = device
101 | self.lr = lr
102 | self.sgd = sgd
103 | self.batch_size = batch_size
104 | self.max_train_length = max_train_length
105 | self.temporal_unit = temporal_unit
106 |
107 | self._net = TSEncoder(input_dims=input_dims, output_dims=output_dims, hidden_dims=hidden_dims, depth=depth).to(self.device)
108 | self.net = torch.optim.swa_utils.AveragedModel(self._net)
109 | self.net.update_parameters(self._net)
110 |
111 | self.after_iter_callback = after_iter_callback
112 | self.after_epoch_callback = after_epoch_callback
113 | self.args = args
114 |
115 | self.n_epochs = 0
116 | self.n_iters = 0
117 |
118 | def fit(self, train_data, n_epochs=None, n_iters=None, verbose=False, is_scheduler=True, temp=1.0):
119 | ''' Training the TimesURL model.
120 |
121 | Args:
122 | train_data (numpy.ndarray): The training data. It should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
123 | n_epochs (Union[int, NoneType]): The number of epochs. When this reaches, the training stops.
124 | n_iters (Union[int, NoneType]): The number of iterations. When this reaches, the training stops. If both n_epochs and n_iters are not specified, a default setting would be used that sets n_iters to 200 for a dataset with size <= 100000, 600 otherwise.
125 | verbose (bool): Whether to print the training loss after each epoch.
126 |
127 | Returns:
128 | loss_log: a list containing the training losses on each epoch.
129 | '''
130 | train_data, mask = train_data['x'], train_data['mask']
131 |
132 | assert train_data.ndim == 3
133 |
134 | if n_iters is None and n_epochs is None:
135 | n_iters = 200 if train_data.size <= 100000 else 600 # default param for n_iters
136 |
137 | if self.lr <= 1e-5 and n_iters is not None:
138 | n_iters *= 1.2
139 |
140 | if self.max_train_length is not None:
141 | sections = train_data.shape[1] // self.max_train_length
142 | if sections >= 2:
143 | train_data = np.concatenate(split_with_nan(train_data, sections, axis=1), axis=0)
144 | mask = np.concatenate(split_with_nan(mask, sections, axis=1), axis=0)
145 |
146 | temporal_missing = np.isnan(train_data).all(axis=-1).any(axis=0)
147 | if temporal_missing[0] or temporal_missing[-1]:
148 | train_data, mask = centerize_vary_length_series(train_data, mask)
149 |
150 | mask = mask[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)]
151 | train_data = train_data[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)]
152 | mask[np.isnan(mask)] = 0
153 | x, t = train_data[..., :-1], train_data[..., -1:]
154 | obj = get_unlabeled_pretrain_data(np.concatenate([x, mask, t], axis=-1), self.args)
155 | train_loader = obj['train_dataloader']
156 |
157 | if self.sgd:
158 | optimizer = torch.optim.SGD(self._net.parameters(), lr=self.lr, weight_decay=5e-4, momentum=0.9)
159 | else:
160 | optimizer = torch.optim.AdamW(self._net.parameters(), lr=self.lr, weight_decay=5e-4)
161 | if is_scheduler:
162 | if n_iters is not None and n_epochs is None:
163 | max_epochs = n_iters // len(train_loader)
164 | else:
165 | max_epochs = n_epochs
166 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_epochs)
167 |
168 | loss_log = []
169 |
170 | while True:
171 | if n_epochs is not None and self.n_epochs >= n_epochs:
172 | break
173 |
174 | cum_loss = 0
175 | n_epoch_iters = 0
176 |
177 | interrupted = False
178 | for batch in train_loader:
179 | if n_iters is not None and self.n_iters >= n_iters:
180 | interrupted = True
181 | break
182 |
183 | value = batch['value'].to(self.device)
184 | time = batch['time'].to(self.device)
185 | mask = batch['mask'].to(self.device)
186 | mask_origin = batch['mask_origin'].to(self.device)
187 |
188 | optimizer.zero_grad()
189 |
190 | loss = torch.tensor([0.]).to(self.device)
191 | for seq in range(value.size(1)):
192 | x, t, m, m_old = value[:, seq], time[:, seq], mask[:, seq], mask_origin[:, seq]
193 | dim = x.size(-1)
194 | x = torch.cat([x, t.unsqueeze(2)], dim=-1)
195 |
196 | ts_l = x.size(1)
197 | crop_l = np.random.randint(low=2 ** (self.temporal_unit + 1), high=ts_l + 1)
198 | crop_left = np.random.randint(ts_l - crop_l + 1)
199 | crop_right = crop_left + crop_l
200 | crop_eleft = np.random.randint(crop_left + 1)
201 | crop_eright = np.random.randint(low=crop_right, high=ts_l + 1)
202 | crop_offset = np.random.randint(low=-crop_eleft, high=ts_l - crop_eright + 1, size=x.size(0))
203 |
204 | x_left = take_per_row(x, crop_offset + crop_eleft, crop_right - crop_eleft)
205 | x_right = tp_noneffect(freq_mix, take_per_row(x, crop_offset + crop_left, crop_eright - crop_left), rate=0.5)
206 |
207 | mask1 = take_per_row(m[..., :dim], crop_offset + crop_eleft, crop_right - crop_eleft)
208 | mask2 = take_per_row(m[..., :dim], crop_offset + crop_left, crop_eright - crop_left)
209 |
210 | mask1_inter = take_per_row(m[..., dim:], crop_offset + crop_eleft, crop_right - crop_eleft)
211 | mask2_inter = take_per_row(m[..., dim:], crop_offset + crop_left, crop_eright - crop_left)
212 |
213 | mask1_origin = take_per_row(m_old, crop_offset + crop_eleft, crop_right - crop_eleft)
214 | mask2_origin = take_per_row(m_old, crop_offset + crop_left, crop_eright - crop_left)
215 |
216 | out1, left_recon = self._net({'data': x_left, 'mask': mask1, 'mask_inter': mask1_inter, 'mask_origin': mask1_origin})
217 | out2, right_recon = self._net({'data': x_right, 'mask': mask2, 'mask_inter': mask2_inter, 'mask_origin': mask2_origin})
218 |
219 | out1, left_recon = out1[:, -crop_l:], left_recon[:, -crop_l:]
220 | out2, right_recon = out2[:, :crop_l], right_recon[:, :crop_l]
221 |
222 | x_left, x_right = x_left[:, -crop_l:], x_right[:, :crop_l]
223 |
224 | mask1, mask2 = mask1[:, -crop_l:], mask2[:, :crop_l]
225 | mask1_inter, mask2_inter = mask1_inter[:, -crop_l:], mask2_inter[:, :crop_l]
226 |
227 | loss += self.args.lmd * hierarchical_contrastive_loss(
228 | out1,
229 | out2,
230 | temporal_unit=self.temporal_unit,
231 | temp=temp
232 | )
233 |
234 | if torch.sum(mask1_inter) > 0:
235 | loss += 1 * torch.sum(torch.pow((x_left[..., :-1] - left_recon) * mask1_inter, 2)) / (
236 | torch.sum(mask1_inter) + 1e-10) / 2
237 | if torch.sum(mask2_inter) > 0:
238 | loss += 1 * torch.sum(torch.pow((x_right[..., :-1] - right_recon) * mask2_inter, 2)) / (
239 | torch.sum(mask2_inter) + 1e-10) / 2
240 |
241 | loss.requires_grad_(True)
242 | loss.backward()
243 | optimizer.step()
244 | self.net.update_parameters(self._net)
245 |
246 | cum_loss += loss.item()
247 | n_epoch_iters += 1
248 |
249 | self.n_iters += 1
250 |
251 | if self.after_iter_callback is not None:
252 | self.after_iter_callback(self, loss.item())
253 |
254 | cum_loss /= n_epoch_iters if n_epoch_iters else 1
255 | loss_log.append(cum_loss)
256 | if verbose:
257 | print(f"Epoch #{self.n_epochs}: loss={cum_loss}")
258 | self.n_epochs += 1
259 | if is_scheduler:
260 | scheduler.step()
261 |
262 | if self.after_epoch_callback is not None:
263 | self.after_epoch_callback(self, cum_loss)
264 |
265 | if interrupted:
266 | break
267 | # end
268 |
269 | return loss_log
270 |
271 | def _eval_with_pooling(self, x, mask=None, slicing=None, encoding_window=None):
272 | out = self.net(x.to(self.device, non_blocking=True), mask)
273 | if encoding_window == 'full_series':
274 | if slicing is not None:
275 | out = out[:, slicing]
276 | out = F.max_pool1d(
277 | out.transpose(1, 2),
278 | kernel_size=out.size(1),
279 | ).transpose(1, 2)
280 |
281 | elif isinstance(encoding_window, int):
282 | out = F.max_pool1d(
283 | out.transpose(1, 2),
284 | kernel_size=encoding_window,
285 | stride=1,
286 | padding=encoding_window // 2
287 | ).transpose(1, 2)
288 | if encoding_window % 2 == 0:
289 | out = out[:, :-1]
290 | if slicing is not None:
291 | out = out[:, slicing]
292 |
293 | elif encoding_window == 'multiscale':
294 | p = 0
295 | reprs = []
296 | while (1 << p) + 1 < out.size(1):
297 | t_out = F.max_pool1d(
298 | out.transpose(1, 2),
299 | kernel_size=(1 << (p + 1)) + 1,
300 | stride=1,
301 | padding=1 << p
302 | ).transpose(1, 2)
303 | if slicing is not None:
304 | t_out = t_out[:, slicing]
305 | reprs.append(t_out)
306 | p += 1
307 | out = torch.cat(reprs, dim=-1)
308 |
309 | else:
310 | if slicing is not None:
311 | out = out[:, slicing]
312 |
313 | return out.cpu()
314 |
315 | def encode(self, data, mask=None, encoding_window=None, casual=False, sliding_length=None, sliding_padding=0,
316 | batch_size=None):
317 | ''' Compute representations using the model.
318 |
319 | Args:
320 | data (numpy.ndarray): This should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
321 | mask (str): The mask used by encoder can be specified with this parameter. This can be set to 'binomial', 'continuous', 'all_true', 'all_false' or 'mask_last'.
322 | encoding_window (Union[str, int]): When this param is specified, the computed representation would the max pooling over this window. This can be set to 'full_series', 'multiscale' or an integer specifying the pooling kernel size.
323 | casual (bool): When this param is set to True, the future informations would not be encoded into representation of each timestamp.
324 | sliding_length (Union[int, NoneType]): The length of sliding window. When this param is specified, a sliding inference would be applied on the time series.
325 | sliding_padding (int): This param specifies the contextual data length used for inference every sliding windows.
326 | batch_size (Union[int, NoneType]): The batch size used for inference. If not specified, this would be the same batch size as training.
327 |
328 | Returns:
329 | repr: The representations for data.
330 | '''
331 | assert self.net is not None, 'please train or load a net first'
332 | assert isinstance(data, dict) or data.ndim == 3
333 | if batch_size is None:
334 | batch_size = self.batch_size
335 | n_samples, ts_l, _ = data.shape if not isinstance(data, dict) else data['x'].shape
336 |
337 | org_training = self.net.training
338 | self.net.eval()
339 |
340 | if isinstance(data, dict):
341 | data = np.concatenate((data['x'], data['mask']), axis=-1)
342 | dataset = TensorDataset(torch.from_numpy(data).to(torch.float))
343 | loader = DataLoader(dataset, batch_size=batch_size)
344 |
345 | with torch.no_grad():
346 | output = []
347 | for batch in loader:
348 | x = batch[0]
349 | if sliding_length is not None:
350 | reprs = []
351 | if n_samples < batch_size:
352 | calc_buffer = []
353 | calc_buffer_l = 0
354 | for i in range(0, ts_l, sliding_length):
355 | l = i - sliding_padding
356 | r = i + sliding_length + (sliding_padding if not casual else 0)
357 | x_sliding = torch_pad_nan(
358 | x[:, max(l, 0): min(r, ts_l)],
359 | left=-l if l < 0 else 0,
360 | right=r - ts_l if r > ts_l else 0,
361 | dim=1
362 | )
363 | if n_samples < batch_size:
364 | if calc_buffer_l + n_samples > batch_size:
365 | out = self._eval_with_pooling(
366 | torch.cat(calc_buffer, dim=0),
367 | mask,
368 | slicing=slice(sliding_padding, sliding_padding + sliding_length),
369 | encoding_window=encoding_window
370 | )
371 | reprs += torch.split(out, n_samples)
372 | calc_buffer = []
373 | calc_buffer_l = 0
374 | calc_buffer.append(x_sliding)
375 | calc_buffer_l += n_samples
376 | else:
377 | out = self._eval_with_pooling(
378 | x_sliding,
379 | mask,
380 | slicing=slice(sliding_padding, sliding_padding + sliding_length),
381 | encoding_window=encoding_window
382 | )
383 | reprs.append(out)
384 |
385 | if n_samples < batch_size:
386 | if calc_buffer_l > 0:
387 | out = self._eval_with_pooling(
388 | torch.cat(calc_buffer, dim=0),
389 | mask,
390 | slicing=slice(sliding_padding, sliding_padding + sliding_length),
391 | encoding_window=encoding_window
392 | )
393 | reprs += torch.split(out, n_samples)
394 | calc_buffer = []
395 | calc_buffer_l = 0
396 |
397 | out = torch.cat(reprs, dim=1)
398 | if encoding_window == 'full_series':
399 | out = F.max_pool1d(
400 | out.transpose(1, 2).contiguous(),
401 | kernel_size=out.size(1),
402 | ).squeeze(1)
403 | else:
404 | out = self._eval_with_pooling(x, mask, encoding_window=encoding_window)
405 | if encoding_window == 'full_series':
406 | out = out.squeeze(1)
407 |
408 | output.append(out)
409 |
410 | output = torch.cat(output, dim=0)
411 |
412 | self.net.train(org_training)
413 | return output.numpy()
414 |
415 | def save(self, fn):
416 | ''' Save the model to a file.
417 |
418 | Args:
419 | fn (str): filename.
420 | '''
421 | torch.save(self.net.state_dict(), fn)
422 |
423 | def load(self, fn):
424 | ''' Load the model from a file.
425 |
426 | Args:
427 | fn (str): filename.
428 | '''
429 | state_dict = torch.load(fn, map_location=self.device)
430 | self.net.load_state_dict(state_dict)
431 |
--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import torch
4 | import numpy as np
5 | import argparse
6 | import os
7 | import sys
8 | import time
9 | import datetime
10 | from timesurl import TimesURL
11 | import tasks
12 | import datautils
13 | from utils import init_dl_program, name_with_datetime, pkl_save, data_dropout
14 |
15 | def save_checkpoint_callback(
16 | save_every=1,
17 | unit='epoch'
18 | ):
19 | assert unit in ('epoch', 'iter')
20 | def callback(model, loss):
21 | n = model.n_epochs if unit == 'epoch' else model.n_iters
22 | if n % save_every == 0:
23 | model.save(f'{run_dir}/model_{n}.pkl')
24 | return callback
25 |
26 | if __name__ == '__main__':
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument('dataset', help='The dataset name')
29 | parser.add_argument('run_name', help='The folder name used to save model, output and evaluation metrics. This can be set to any word')
30 | parser.add_argument('--loader', type=str, required=True, help='The data loader used to load the experimental data. This can be set to UCR, UEA, forecast_csv, forecast_csv_univar, anomaly, or anomaly_coldstart')
31 | parser.add_argument('--gpu', type=int, default=0, help='The gpu no. used for training and inference (defaults to 0)')
32 | parser.add_argument('--batch-size', type=int, default=8, help='The batch size (defaults to 8)')
33 | parser.add_argument('--lr', type=float, default=0.0001, help='The learning rate (defaults to 0.001)')
34 | parser.add_argument('--repr-dims', type=int, default=320, help='The representation dimension (defaults to 320)')
35 | parser.add_argument('--max-train-length', type=int, default=3000, help='For sequence with a length greater than , it would be cropped into some sequences, each of which has a length less than (defaults to 3000)')
36 | parser.add_argument('--iters', type=int, default=None, help='The number of iterations')
37 | parser.add_argument('--epochs', type=int, default=None, help='The number of epochs')
38 | parser.add_argument('--save-every', type=int, default=None, help='Save the checkpoint every iterations/epochs')
39 | parser.add_argument('--seed', type=int, default=None, help='The random seed')
40 | parser.add_argument('--max-threads', type=int, default=None, help='The maximum allowed number of threads used by this process')
41 | parser.add_argument('--eval', action="store_true", help='Whether to perform evaluation after training')
42 | parser.add_argument('--sgd', action="store_true", help='Whether to perform evaluation after training')
43 | parser.add_argument('--load_tp', action="store_true", help='Whether to perform evaluation after training')
44 | parser.add_argument('--temp', type=float, default=1.0,)
45 | parser.add_argument('--lmd', type=float, default=0.01, )
46 | parser.add_argument('--irregular', type=float, default=0, help='The ratio of missing observations (defaults to 0)')
47 | parser.add_argument('--segment_num', type=int, default=3,
48 | help='number of time interval segment to mask, default: 3 time intervals')
49 | parser.add_argument('--mask_ratio_per_seg', type=float, default=0.05,
50 | help='fraction of the sequence length to mask for each time interval, deafult: 0.05 * seq_len to be masked for each of the time interval')
51 | args = parser.parse_args()
52 |
53 | print("Dataset:", args.dataset)
54 | print("Arguments:", str(args))
55 |
56 | device = init_dl_program(args.gpu, seed=args.seed, max_threads=args.max_threads, deterministic=False)
57 |
58 | args.load_tp = True
59 |
60 | print('Loading data... ', end='')
61 | if args.loader == 'UCR':
62 | task_type = 'classification'
63 | train_data, train_labels, test_data, test_labels = datautils.load_UCR(args.dataset, load_tp = args.load_tp)
64 |
65 | elif args.loader == 'Others':
66 | task_type = 'classification'
67 | train_data, train_labels, test_data, test_labels = datautils.load_others(args.dataset, load_tp = args.load_tp)
68 |
69 | elif args.loader == 'UEA':
70 | task_type = 'classification'
71 | train_data, train_labels, test_data, test_labels = datautils.load_UEA(args.dataset, load_tp = args.load_tp)
72 |
73 | elif args.loader == 'forecast_csv':
74 | task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation'
75 | offset = 0 if task_type == 'forecasting' else 96
76 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, load_tp=args.load_tp)
77 | train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]}
78 |
79 | elif args.loader == 'forecast_csv_univar':
80 | task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation'
81 | offset = 0 if task_type == 'forecasting' else 96
82 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, univar=True, load_tp=args.load_tp)
83 | train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]}
84 |
85 | elif args.loader == 'forecast_npy':
86 | task_type = 'forecasting'
87 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset)
88 | train_data = data[:, train_slice]
89 |
90 | elif args.loader == 'forecast_npy_univar':
91 | task_type = 'forecasting'
92 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset, univar=True)
93 | train_data = data[:, train_slice]
94 |
95 | elif args.loader == 'anomaly':
96 | task_type = 'anomaly_detection'
97 | # all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay = datautils.load_anomaly(args.dataset)
98 | train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay = datautils.load_anomaly(args.dataset, load_tp=args.load_tp)
99 | train_data = datautils.gen_ano_train_data(train_data_task['x'])
100 | train_data = {
101 | 'x': np.concatenate([train_data, datautils.gen_ano_train_data(train_timestamps, train_data.shape[1], True)], axis=-1),
102 | 'mask': train_data_task['mask']}
103 |
104 | else:
105 | raise ValueError(f"Unknown loader {args.loader}.")
106 |
107 | args.task_type = task_type
108 | if args.irregular > 0:
109 | if task_type == 'classification':
110 | train_data = data_dropout(train_data, args.irregular)
111 | test_data = data_dropout(test_data, args.irregular)
112 | else:
113 | raise ValueError(f"Task type {task_type} is not supported when irregular>0.")
114 | print('done')
115 | print(train_data['x'].shape)
116 |
117 | config = dict(
118 | batch_size=args.batch_size,
119 | lr=args.lr,
120 | sgd=args.sgd,
121 | output_dims=args.repr_dims,
122 | max_train_length=args.max_train_length,
123 | args=args
124 | )
125 |
126 | if args.save_every is not None:
127 | unit = 'epoch' if args.epochs is not None else 'iter'
128 | config[f'after_{unit}_callback'] = save_checkpoint_callback(args.save_every, unit)
129 |
130 | run_dir = 'training/' + args.dataset + '__' + name_with_datetime(args.run_name)
131 | os.makedirs(run_dir, exist_ok=True)
132 |
133 | t = time.time()
134 |
135 | model = TimesURL(
136 | input_dims=train_data['x'].shape[-1] - (1 if args.load_tp else 0),
137 | device=device,
138 | **config
139 | )
140 | loss_log = model.fit(
141 | train_data,
142 | n_epochs=args.epochs,
143 | n_iters=args.iters,
144 | verbose=True,
145 | is_scheduler=True if args.sgd else False,
146 | temp=args.temp
147 | )
148 | model.save(f'{run_dir}/model.pkl')
149 |
150 | t = time.time() - t
151 | print(f"\nTraining time: {datetime.timedelta(seconds=t)}\n")
152 |
153 | if args.eval:
154 | if task_type == 'classification':
155 | out, eval_res = tasks.eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='svm')
156 | elif task_type == 'forecasting':
157 | out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols)
158 | elif task_type == 'anomaly_detection':
159 | out, eval_res = tasks.eval_anomaly_detection(model, train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay)
160 | elif task_type == 'imputation':
161 | out, eval_res = tasks.eval_imputation(model, data, test_slice, args.missing_rate, n_covariate_cols, device)
162 | else:
163 | assert False
164 |
165 | pkl_save(f'{run_dir}/out.pkl', out)
166 | pkl_save(f'{run_dir}/eval_res.pkl', eval_res)
167 | print('Evaluation result:', eval_res)
168 |
169 | print("Finished.")
170 |
--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pickle
4 | import torch
5 | import random
6 | from datetime import datetime
7 | from scipy.interpolate import CubicSpline
8 |
9 | def pkl_save(name, var):
10 | with open(name, 'wb') as f:
11 | pickle.dump(var, f)
12 |
13 | def pkl_load(name):
14 | with open(name, 'rb') as f:
15 | return pickle.load(f)
16 |
17 | def torch_pad_nan(arr, left=0, right=0, dim=0):
18 | if left > 0:
19 | padshape = list(arr.shape)
20 | padshape[dim] = left
21 | arr = torch.cat((torch.full(padshape, np.nan), arr), dim=dim)
22 | if right > 0:
23 | padshape = list(arr.shape)
24 | padshape[dim] = right
25 | arr = torch.cat((arr, torch.full(padshape, np.nan)), dim=dim)
26 | return arr
27 |
28 | def pad_nan_to_target(array, target_length, axis=0, both_side=False):
29 | assert array.dtype in [np.float16, np.float32, np.float64]
30 | pad_size = target_length - array.shape[axis]
31 | if pad_size <= 0:
32 | return array
33 | npad = [(0, 0)] * array.ndim
34 | if both_side:
35 | npad[axis] = (pad_size // 2, pad_size - pad_size//2)
36 | else:
37 | npad[axis] = (0, pad_size)
38 | return np.pad(array, pad_width=npad, mode='constant', constant_values=np.nan)
39 |
40 | def split_with_nan(x, sections, axis=0):
41 | assert x.dtype in [np.float16, np.float32, np.float64]
42 | arrs = np.array_split(x, sections, axis=axis)
43 | target_length = arrs[0].shape[axis]
44 | for i in range(len(arrs)):
45 | arrs[i] = pad_nan_to_target(arrs[i], target_length, axis=axis)
46 | return arrs
47 |
48 | def take_per_row(A, indx, num_elem):
49 | all_indx = indx[:,None] + np.arange(num_elem)
50 | return A[torch.arange(all_indx.shape[0])[:,None], all_indx]
51 |
52 | def centerize_vary_length_series(x, mask):
53 | prefix_zeros = np.argmax(~np.isnan(x).all(axis=-1), axis=1)
54 | suffix_zeros = np.argmax(~np.isnan(x[:, ::-1]).all(axis=-1), axis=1)
55 | offset = (prefix_zeros + suffix_zeros) // 2 - prefix_zeros
56 | rows, column_indices = np.ogrid[:x.shape[0], :x.shape[1]]
57 | offset[offset < 0] += x.shape[1]
58 | column_indices = column_indices - offset[:, np.newaxis]
59 | return x[rows, column_indices], mask[rows, column_indices]
60 |
61 | def data_dropout(arr, p):
62 | B, T = arr.shape[0], arr.shape[1]
63 | mask = np.full(B*T, False, dtype=np.bool)
64 | ele_sel = np.random.choice(
65 | B*T,
66 | size=int(B*T*p),
67 | replace=False
68 | )
69 | mask[ele_sel] = True
70 | res = arr.copy()
71 | res[mask.reshape(B, T)] = np.nan
72 | return res
73 |
74 | def name_with_datetime(prefix='default'):
75 | now = datetime.now()
76 | return prefix + '_' + now.strftime("%Y%m%d_%H%M%S")
77 |
78 | def init_dl_program(
79 | device_name,
80 | seed=None,
81 | use_cudnn=True,
82 | deterministic=False,
83 | benchmark=False,
84 | use_tf32=False,
85 | max_threads=None
86 | ):
87 | import torch
88 | if max_threads is not None:
89 | torch.set_num_threads(max_threads) # intraop
90 | if torch.get_num_interop_threads() != max_threads:
91 | torch.set_num_interop_threads(max_threads) # interop
92 | try:
93 | import mkl
94 | except:
95 | pass
96 | else:
97 | mkl.set_num_threads(max_threads)
98 |
99 | if seed is not None:
100 | random.seed(seed)
101 | seed += 1
102 | np.random.seed(seed)
103 | seed += 1
104 | torch.manual_seed(seed)
105 |
106 | if isinstance(device_name, (str, int)):
107 | device_name = [device_name]
108 |
109 | devices = []
110 | for t in reversed(device_name):
111 | t_device = torch.device(t)
112 | devices.append(t_device)
113 | if t_device.type == 'cuda':
114 | assert torch.cuda.is_available()
115 | torch.cuda.set_device(t_device)
116 | if seed is not None:
117 | seed += 1
118 | torch.cuda.manual_seed(seed)
119 | devices.reverse()
120 | torch.backends.cudnn.enabled = use_cudnn
121 | torch.backends.cudnn.deterministic = deterministic
122 | torch.backends.cudnn.benchmark = benchmark
123 |
124 | if hasattr(torch.backends.cudnn, 'allow_tf32'):
125 | torch.backends.cudnn.allow_tf32 = use_tf32
126 | torch.backends.cuda.matmul.allow_tf32 = use_tf32
127 |
128 | return devices if len(devices) > 1 else devices[0]
129 |
130 |
131 | def convert_coeff(x, eps=1e-6):
132 | amp = torch.sqrt((x.real + eps).pow(2) + (x.imag + eps).pow(2))
133 | phase = torch.atan2(x.imag, x.real + eps)
134 | return amp, phase
135 |
136 |
137 | def hierarchical_x(x, mask):
138 | hi_x, B, C = [{'x': x, 'mask': mask}], x.size(0), x.size(2)
139 | while x.size(1) > 1:
140 | if x.size(1) % 2 != 0:
141 | x = torch.cat((x, -np.inf * torch.ones(B, 1, C, device = x.device)), dim = 1)
142 | # obtain max index
143 | _, t_index = torch.max(x.permute(0, 2, 1).reshape(B, C, -1, 2).permute(0, 3, 2, 1), dim = 1)
144 |
145 | # fixed max index
146 | t_index = (t_index.transpose(1, 2) + torch.arange(0, x.size(1), 2, device = x.device)).transpose(1, 2).reshape(-1)
147 | # create B, C index
148 | b_index = torch.arange(B, device = x.device).reshape(-1, 1).repeat(1, x.size(1) // 2 * C).reshape(-1)
149 | c_index = torch.arange(C, device = x.device).repeat(B * x.size(1) // 2)
150 |
151 | # achieve max representations
152 | x, mask = x[(b_index, t_index, c_index)].reshape(B, -1, C), mask[(b_index, t_index, c_index)].reshape(B, -1, C)
153 | hi_x.append({'x': x, 'mask': mask})
154 | return hi_x
155 |
156 |
157 | def generate_mask(data, p = 0.5, remain = 0):
158 | B, T, C = data.shape
159 | mask = np.empty_like(data)
160 |
161 | for b in range(B):
162 | ts = data[b, :, 0]
163 | et_num = ts[~np.isnan(ts)].size - remain
164 | total, num = et_num * C, round(et_num * C * p)
165 |
166 | while True:
167 | i_mask = np.zeros(total)
168 | i_mask[random.sample(range(total), num)] = 1
169 | i_mask = i_mask.reshape(et_num, C)
170 | if 1 not in i_mask.sum(axis = 0) and 0 not in i_mask.sum(axis = 0):
171 | break
172 | break
173 |
174 | i_mask = np.concatenate((i_mask, np.ones((remain, C))), axis = 0)
175 | mask[b, ~np.isnan(ts), :] = i_mask
176 | mask[b, np.isnan(ts), :] = np.nan
177 |
178 | # mask = np.concatenate([random.sample(range(total), num) for _ in range(B)])
179 | # matrix = np.zeros((B, total))
180 | # matrix[(np.arange(B).repeat(num), mask)] = 1.0
181 | # matrix = matrix.reshape(B, T, C)
182 | # return matrix
183 | return mask
184 |
185 |
186 | def interpolate_cubic_spline(data, mask, p = 1):
187 | # normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) | (np.isnan(data)))[0]
188 | normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) & (~np.isnan(data)))[0]
189 | cs = CubicSpline(normal, data[normal])
190 | num = int(missing.size * p)
191 | missing = missing[np.argsort(np.random.random(missing.size))[:num]]
192 | data[missing] = cs(missing)
193 | return data
194 |
195 |
196 | def inter_cubic_sp_torch(data, mask, p = 1):
197 | device = data.device
198 | return torch.from_numpy(interpolate_cubic_spline(data.cpu().detach().numpy(), mask.cpu().detach().numpy(), p)).to(device)
199 |
200 |
201 | def generate_uni(data, mask, alpha):
202 | n = data.size(1)
203 | neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, n, 1) - data) / (n - 1)
204 | return (1 - alpha) * neg + alpha * data
205 |
206 |
207 | def generate_uni_p(data, mask, alpha):
208 | p = mask.mean(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1)
209 | data = p * data
210 | neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - data) / \
211 | (p.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - p)
212 | return (1 - alpha) * neg + alpha * data
213 |
214 |
215 | def normalize_with_mask(train, mask_tr, test, mask_te, scaler):
216 | train[mask_tr == 0], test[mask_te == 0] = np.nan, np.nan
217 | scaler = scaler.fit(train.reshape(-1, train.shape[-1]))
218 | train = scaler.transform(train.reshape(-1, train.shape[-1])).reshape(train.shape)
219 | test = scaler.transform(test.reshape(-1, test.shape[-1])).reshape(test.shape)
220 | train[mask_tr == 0], test[mask_te == 0] = 0, 0
221 | return train, test
222 |
223 |
224 | if __name__ == '__main__':
225 | B, T, C = 3, 10, 3
226 | x = torch.randn((B, T, C))
227 | dict_x = hierarchical_x(x, x)
228 | print('ok')
--------------------------------------------------------------------------------