├── .gitignore ├── AAAI24_appendix.pdf ├── LICENSE ├── README.md └── src ├── augmentations.py ├── collator.py ├── datautils.py ├── lib.py ├── models ├── __init__.py ├── attention.py ├── backbone.py ├── dilated_conv.py ├── encoder.py └── losses.py ├── tasks ├── __init__.py ├── _eval_protocols.py ├── anomaly_detection.py ├── classification.py ├── forecasting.py └── imputation.py ├── timesurl.py ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /AAAI24_appendix.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Alrash/TimesURL/d3533e45cb28efe8c986f13ce8d80926d0e9254e/AAAI24_appendix.pdf -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Alrash 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TimesURL 2 | The implementation of "TimesURL: Self-supervised Contrastive Learning for Universal Time Series Representation Learning" 3 | 4 | 5 | Paper: [Arxiv](https://arxiv.org/abs/2312.15709) or [AAAI](https://ojs.aaai.org/index.php/AAAI/article/view/29299/30450) 6 | 7 | Video: [Video](https://underline.io/lecture/93776-timesurl-self-supervised-contrastive-learning-for-universal-time-series-representation-learning-video) 8 | 9 | Appendix: [Appendix](https://github.com/Alrash/TimesURL/blob/main/AAAI24_appendix.pdf) 10 | ## Codes 11 | This code is based on [TS2Vec](https://github.com/yuezhihan/ts2vec). 12 | 13 | ## Citation 14 | ``` 15 | @inproceedings{liu2024timesurl, 16 | title={Timesurl: Self-supervised contrastive learning for universal time series representation learning}, 17 | author={Liu, Jiexi and Chen, Songcan}, 18 | booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, 19 | volume={38}, 20 | number={12}, 21 | pages={13918--13926}, 22 | year={2024} 23 | } 24 | ``` 25 | ## Acknowledgement 26 | [TS2Vec](https://github.com/yuezhihan/ts2vec) 27 | 28 | [FrAug](https://anonymous.4open.science/r/Fraug-more-results-1785/README.md) 29 | 30 | ## Email 31 | ``` 32 | liujiexi@nuaa.edu.cn 33 | alrash@nuaa.edu.cn 34 | ``` 35 | -------------------------------------------------------------------------------- /src/augmentations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def one_hot_encoding(X): 5 | X = [int(x) for x in X] 6 | n_values = np.max(X) + 1 7 | b = np.eye(n_values)[X] 8 | return b 9 | 10 | def DataTransform(sample, config): 11 | """Weak and strong augmentations""" 12 | weak_aug = scaling(sample, config.augmentation.jitter_scale_ratio) 13 | # weak_aug = permutation(sample, max_segments=config.augmentation.max_seg) 14 | strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio) 15 | 16 | return weak_aug, strong_aug 17 | 18 | # def DataTransform_TD(sample, config): 19 | # """Weak and strong augmentations""" 20 | # weak_aug = sample 21 | # strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio) #masking(sample) 22 | # return weak_aug, strong_aug 23 | # 24 | # def DataTransform_FD(sample, config): 25 | # """Weak and strong augmentations in Frequency domain """ 26 | # # weak_aug = remove_frequency(sample, 0.1) 27 | # strong_aug = add_frequency(sample, 0.1) 28 | # return weak_aug, strong_aug 29 | def DataTransform_TD(sample, config): 30 | """Weak and strong augmentations""" 31 | aug_1 = jitter(sample, config.augmentation.jitter_ratio) 32 | aug_2 = scaling(sample, config.augmentation.jitter_scale_ratio) 33 | aug_3 = permutation(sample, max_segments=config.augmentation.max_seg) 34 | 35 | li = np.random.randint(0, 4, size=[sample.shape[0]]) # there are two augmentations in Frequency domain 36 | li_onehot = one_hot_encoding(li) 37 | aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero. 38 | aug_2[1 - li_onehot[:, 1]] = 0 39 | aug_3[1 - li_onehot[:, 2]] = 0 40 | # aug_4[1 - li_onehot[:, 3]] = 0 41 | aug_T = aug_1 + aug_2 + aug_3 #+aug_4 42 | return aug_T 43 | 44 | 45 | def DataTransform_FD(sample, config): 46 | """Weak and strong augmentations in Frequency domain """ 47 | aug_1 = remove_frequency(sample, 0.1) 48 | aug_2 = add_frequency(sample, 0.1) 49 | # generate random sequence 50 | li = np.random.randint(0, 2, size=[sample.shape[0]]) # there are two augmentations in Frequency domain 51 | li_onehot = one_hot_encoding(li) 52 | aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero. 53 | aug_2[1 - li_onehot[:, 1]] = 0 54 | aug_F = aug_1 + aug_2 55 | return aug_F 56 | 57 | 58 | 59 | def generate_binomial_mask(B, T, D, p=0.5): 60 | return torch.from_numpy(np.random.binomial(1, p, size=(B, T, D))).to(torch.bool) 61 | 62 | def masking(x, mask= 'binomial'): 63 | nan_mask = ~x.isnan().any(axis=-1) 64 | x[~nan_mask] = 0 65 | # x = self.input_fc(x) # B x T x Ch 66 | 67 | if mask == 'binomial': 68 | mask_id = generate_binomial_mask(x.size(0), x.size(1), x.size(2), p=0.9).to(x.device) 69 | # elif mask == 'continuous': 70 | # mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device) 71 | # elif mask == 'all_true': 72 | # mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool) 73 | # elif mask == 'all_false': 74 | # mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool) 75 | # elif mask == 'mask_last': 76 | # mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool) 77 | # mask[:, -1] = False 78 | 79 | # mask &= nan_mask 80 | x[~mask_id] = 0 81 | return x 82 | 83 | def jitter(x, sigma=0.8): 84 | return x + np.random.normal(loc=0., scale=sigma, size=x.shape) 85 | 86 | 87 | def scaling(x, sigma=1.1): 88 | factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[2])) 89 | ai = [] 90 | for i in range(x.shape[1]): 91 | xi = x[:, i, :] 92 | ai.append(np.multiply(xi, factor[:, :])[:, np.newaxis, :]) 93 | return np.concatenate((ai), axis=1) 94 | 95 | def permutation(x, max_segments=5, seg_mode="random"): 96 | orig_steps = np.arange(x.shape[2]) 97 | 98 | num_segs = np.random.randint(1, max_segments, size=(x.shape[0])) 99 | 100 | ret = np.zeros_like(x) 101 | for i, pat in enumerate(x): 102 | if num_segs[i] > 1: 103 | if seg_mode == "random": 104 | split_points = np.random.choice(x.shape[2] - 2, num_segs[i] - 1, replace=False) 105 | split_points.sort() 106 | splits = np.split(orig_steps, split_points) 107 | else: 108 | splits = np.array_split(orig_steps, num_segs[i]) 109 | warp = np.concatenate(np.random.permutation(splits)).ravel() 110 | ret[i] = pat[0,warp] 111 | else: 112 | ret[i] = pat 113 | return torch.from_numpy(ret) 114 | 115 | def remove_frequency(x, maskout_ratio=0): 116 | mask = torch.cuda.FloatTensor(x.shape).uniform_() > maskout_ratio # maskout_ratio are False 117 | mask = mask.to(x.device) 118 | return x*mask 119 | 120 | def add_frequency(x, pertub_ratio=0,): 121 | 122 | mask = torch.cuda.FloatTensor(x.shape).uniform_() > (1-pertub_ratio) # only pertub_ratio of all values are True 123 | mask = mask.to(x.device) 124 | max_amplitude = x.max() 125 | random_am = torch.rand(mask.shape)*(max_amplitude*0.1) 126 | pertub_matrix = mask*random_am 127 | return x+pertub_matrix -------------------------------------------------------------------------------- /src/collator.py: -------------------------------------------------------------------------------- 1 | from argparse import Namespace 2 | import numpy as np, math 3 | import random 4 | import torch 5 | from dataclasses import dataclass 6 | 7 | 8 | @dataclass 9 | class CLDataCollator: 10 | max_len: int 11 | args: Namespace 12 | len_sampling_bound = [0.3, 0.7] 13 | dense_sampling_bound = [0.4, 0.6] 14 | pretrain_tasks = 'full2' 15 | 16 | # mask_ratio_per_seg = 0.15 17 | # segment_num = 1 18 | # pretrain_tasks = 'full2' 19 | 20 | def __call__(self, batch): 21 | 22 | batch_size = len(batch) 23 | D = batch[0][0].size(1) 24 | 25 | time_batch = torch.zeros([batch_size, 2, self.max_len]) 26 | value_batch = torch.zeros([batch_size, 2, self.max_len, D]) 27 | if self.pretrain_tasks == 'full2': 28 | mask_batch = torch.zeros([batch_size, 2, self.max_len, 2 * D]) 29 | else: 30 | mask_batch = torch.zeros([batch_size, 2, self.max_len, D]) 31 | 32 | mask_old_batch = torch.zeros([batch_size, 2, self.max_len, D]) 33 | for idx, instance in enumerate(batch): 34 | seq1, seq2 = self._per_seq_sampling(instance) 35 | 36 | v1, t1, m1, m1_old = seq1 37 | v2, t2, m2, m2_old = seq2 38 | 39 | len1 = v1.size(0) 40 | len2 = v2.size(0) 41 | 42 | # print(len1, len2) 43 | # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape) 44 | 45 | value_batch[idx, 0, :len1] = v1 46 | time_batch[idx, 0, :len1] = t1 47 | mask_batch[idx, 0, :len1] = m1 48 | mask_old_batch[idx, 0, :len1] = m1_old 49 | 50 | value_batch[idx, 1, :len2] = v2 51 | time_batch[idx, 1, :len2] = t2 52 | mask_batch[idx, 1, :len2] = m2 53 | mask_old_batch[idx, 1, :len2] = m2_old 54 | 55 | return {'value': value_batch, 'time': time_batch, 'mask': mask_batch, 'mask_origin': mask_old_batch} 56 | 57 | def _per_seq_sampling(self, instance): 58 | ''' 59 | - times is a 1-dimensional tensor containing T time values of observations. 60 | - values is a (T, D) tensor containing observed values for D variables. 61 | - mask is a (T, D) tensor containing 1 where values were observed and 0 otherwise. 62 | ''' 63 | 64 | values, times, mask = instance 65 | 66 | # selected_indices = self._random_sampling_cl(values) # Random Anchor and Positive 67 | selected_indices = self._time_sensitive_cl(times) # Anchor and Positive based on sampling density 68 | 69 | v1, t1, m1, v2, t2, m2 = [], [], [], [], [], [] 70 | 71 | for idx, (v, t, m) in enumerate(zip(values, times, mask)): 72 | 73 | if idx in selected_indices: 74 | v1.append(v) 75 | t1.append(t) 76 | m1.append(m) 77 | 78 | else: 79 | v2.append(v) 80 | t2.append(t) 81 | m2.append(m) 82 | 83 | v1 = torch.stack(v1, dim=0) 84 | t1 = torch.stack(t1, dim=0) 85 | m1 = torch.stack(m1, dim=0) 86 | 87 | v2 = torch.stack(v2, dim=0) 88 | t2 = torch.stack(t2, dim=0) 89 | m2 = torch.stack(m2, dim=0) 90 | 91 | m1_old, m2_old = m1.clone(), m2.clone() 92 | if self.pretrain_tasks == 'full2': 93 | # print(torch.sum(m1, axis = 0)) 94 | T, D = m1.shape 95 | 96 | m1 = self._seg_masking(mask=m1, timestamps=t1) 97 | # a = m1[ : , : D] 98 | # b = m1[ : , D : ] 99 | # c = a + b 100 | # print(torch.sum(c, axis = 0)) 101 | 102 | # print(torch.sum(m2, axis = 0)) 103 | m2 = self._seg_masking(mask=m2, timestamps=t2) 104 | # a = m2[ : , : D] 105 | # b = m2[ : , D : ] 106 | # c = a + b 107 | # print(torch.sum(c, axis = 0)) 108 | 109 | return (v1, t1, m1, m1_old), (v2, t2, m2, m2_old) 110 | 111 | def _random_sampling_cl(self, values): 112 | indices = list(range(len(values))) 113 | random.shuffle(indices) 114 | 115 | length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * len(indices)) 116 | length = max(length, 1) 117 | 118 | selected_indices = set(indices[: length]) 119 | 120 | # print(indices) 121 | # print(length) 122 | # print(selected_indices) 123 | 124 | return selected_indices 125 | 126 | def _time_sensitive_cl(self, timestamps): 127 | 128 | times = torch.clone(timestamps) 129 | times = times.reshape(times.shape[0]) 130 | 131 | # compute average of pre- and post- interval time for each timestep, except the first and last 132 | avg_interval_times = [(((times[i] - times[i - 1]) + (times[i + 1] - times[i])) / 2) for i in 133 | range(1, times.shape[0] - 1)] 134 | avg_interval_times.append(times[-1] - times[-2]) # pre-interval time for last timestep becomes its average 135 | avg_interval_times.insert(0, times[1] - times[0]) # post-interval time for first timestep becomes its average 136 | # print(avg_interval_times) 137 | 138 | # sort the interval times and save its corresponding index, timestep 139 | # after sorting, the first section would contain the lowest interval times -> dense regions of the sample 140 | # last section would contain the highest interval times -> sparse regions of the sample 141 | pairs = [(idx, time, avg_interval_time) for idx, (time, avg_interval_time) in 142 | enumerate(zip(times, avg_interval_times))] 143 | # print(pairs) 144 | pairs.sort(key=lambda pairs: pairs[2]) 145 | indices = [idx for idx, time, avg_interval_time in pairs] 146 | # print(pairs) 147 | 148 | # length of the anchor/positive sample 149 | length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * times.shape[0]) 150 | length = max(length, 1) 151 | # print(length) 152 | 153 | # select the indices with the most dense sampling frequency, i.e. minimum time interval 154 | # selected_indices = set([idx for idx, time, avg_interval_time in pairs[ : length]]) 155 | # print(selected_indices) 156 | 157 | # alternate between dense and sparse sample, i.e. samples located in dense and sparse regions 158 | ''' 159 | front, end = 0, len(pairs) - 1 160 | selected_indices = [] 161 | for i in range(length): 162 | if i % 2 == 0: 163 | selected_indices.append(pairs[front][0]) 164 | front += 2 165 | else: 166 | selected_indices.append(pairs[end][0]) 167 | end -= 2 168 | ''' 169 | 170 | # divide samples in pairs into two regions -> sparse (50%) and dense(50%) 171 | # sample a fraction, f, of the samples from the dense and the remaining, (1-f), of the samples from the sparse region 172 | dense_indices = indices[: int(len(indices) / 2)] 173 | random.shuffle(dense_indices) 174 | sparse_indices = indices[int(len(indices) / 2):] 175 | random.shuffle(sparse_indices) 176 | 177 | # 5 - random dense, random sparse CL 178 | dense_length = int(np.random.uniform(self.dense_sampling_bound[0], self.dense_sampling_bound[1], 1)[0] * length) 179 | dense_length = max(dense_length, 1) 180 | sparse_length = length - dense_length 181 | 182 | # 6 - 50% dense, 50% sparse CL 183 | # dense_length = int(0.5 * length) 184 | # sparse_length = length - dense_length 185 | 186 | selected_dense_indices = dense_indices[: dense_length] 187 | selected_sparse_indices = sparse_indices[: sparse_length] 188 | selected_dense_indices.extend(selected_sparse_indices) 189 | selected_indices = set(selected_dense_indices) 190 | 191 | return selected_indices 192 | 193 | def _seg_masking(self, mask=None, timestamps=None): 194 | 195 | ''' 196 | - mask is a (T, D) tensor 197 | - timestamps is a (T, 1) tensor 198 | - return: (T, 2*D) tensor 199 | ''' 200 | 201 | D = mask.size(1) 202 | interp_mask = torch.zeros_like(mask) 203 | 204 | for dim in range(D): 205 | # print('Dimension: ' + str(dim)) 206 | 207 | # length = mask[:, dim].sum().long().item() 208 | # print(length) 209 | 210 | # length of each masked segment is constant 211 | # seg_pos = self._constant_length_sampling(mask[ : , dim]) 212 | 213 | # time of each masked segment is constant: length of each masked segment may vary depending on the density of the sample in the masked region 214 | seg_pos = self._time_sensitive_sampling(mask[:, dim], timestamps) 215 | 216 | # print(mask[ : , dim]) 217 | # print(interp_mask[ : , dim]) 218 | # print(seg_pos) 219 | if len(seg_pos) > 0: 220 | mask[seg_pos, dim] = 0.0 221 | interp_mask[seg_pos, dim] = 1.0 222 | # print(mask[ : , dim]) 223 | # print(interp_mask[ : , dim]) 224 | 225 | return torch.cat([mask, interp_mask], dim=-1) 226 | 227 | def _constant_length_sampling(self, mask): 228 | 229 | # mask = torch.tensor([0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0]) 230 | count_ones = mask.sum().long().item() 231 | 232 | if self.args.mask_ratio_per_seg * count_ones < 1: 233 | seg_seq_len = 1 234 | else: 235 | seg_seq_len = int(self.args.mask_ratio_per_seg * count_ones) 236 | 237 | ones_indices_in_mask = torch.where(mask == 1)[0].tolist() 238 | 239 | # if seg_seq_len == 1: indices = list(range(len(ones_indices_in_mask))) 240 | # else: indices = list(range(len(ones_indices_in_mask[ : -seg_seq_len + 1]))) 241 | 242 | # print('mask: ' + str(mask)) 243 | # print('count_ones: ' + str(count_ones)) 244 | # print('seg_seq_len: ' + str(seg_seq_len)) 245 | # print('ones_indices_in_mask: ' + str(ones_indices_in_mask)) 246 | # print('indices: ' + str(indices)) 247 | 248 | seg_pos = [] 249 | for seg in range(self.args.segment_num): 250 | # print() 251 | # print(ones_indices_in_mask) 252 | 253 | if len(ones_indices_in_mask) > 1: 254 | if seg_seq_len == 1: 255 | start_idx_in_mask = random.choice(ones_indices_in_mask) 256 | else: 257 | start_idx_in_mask = random.choice(ones_indices_in_mask[: -seg_seq_len + 1]) 258 | # print(start_idx_in_mask) 259 | 260 | start = ones_indices_in_mask.index(start_idx_in_mask) 261 | end = start + seg_seq_len 262 | 263 | sub_seg = ones_indices_in_mask[start: end] 264 | # print(sub_seg) 265 | 266 | seg_pos.extend(sub_seg) 267 | ones_indices_in_mask = list(set(ones_indices_in_mask) - set(sub_seg)) 268 | ones_indices_in_mask.sort() 269 | 270 | # print('seg_pos: ' + str(seg_pos)) 271 | return list(set(seg_pos)) 272 | 273 | def _time_sensitive_sampling(self, mask, timestamps): 274 | 275 | # segment_num = 3 276 | # mask_ratio_per_seg = 0.15 277 | 278 | timestamps = timestamps.reshape(timestamps.shape[0]) 279 | # sampled_times = timestamps[mask].tolist() # times at which this feature was sampled 280 | sampled_times = [timestamps[i].item() for i in range(mask.shape[0]) if mask[i] == 1] 281 | 282 | if len(sampled_times) == 0: return [] 283 | 284 | # print('timestamps: ' + str(timestamps)) 285 | # print('mask: ' + str(mask)) 286 | # print('sampled_times: ' + str(sampled_times)) 287 | sampled_times_start, sampled_times_end = sampled_times[0], sampled_times[-1] 288 | 289 | # full time interval of the feature = last sampling time - first sampling time 290 | # time of masked segment = a fixed percentage of the full time interval of the feature 291 | time_of_masked_segment = (sampled_times_end - sampled_times_start) * self.args.mask_ratio_per_seg 292 | # print('time_of_masked_segment: ' + str(time_of_masked_segment)) 293 | 294 | available_samples_to_sample = [time for time in sampled_times if 295 | time < sampled_times_end - time_of_masked_segment] 296 | # print('available_samples_to_sample: ' + str(available_samples_to_sample)) 297 | 298 | if len(available_samples_to_sample) > 0: 299 | chosen_time = random.choice(available_samples_to_sample) 300 | else: 301 | return [] 302 | # print('chosen_time: ' + str(chosen_time)) 303 | 304 | masking_times = [] 305 | for i in range(self.args.segment_num): 306 | 307 | masked_segment_start_time = chosen_time 308 | masked_segment_end_time = masked_segment_start_time + time_of_masked_segment 309 | 310 | idx = sampled_times.index(chosen_time) 311 | chosen_times = [chosen_time] 312 | available_samples_to_sample.remove(chosen_time) 313 | 314 | for time in sampled_times[idx + 1:]: 315 | if time > masked_segment_end_time: 316 | break 317 | 318 | if masked_segment_start_time < time and time <= masked_segment_end_time: 319 | chosen_times.append(time) 320 | 321 | if time in available_samples_to_sample: 322 | available_samples_to_sample.remove(time) 323 | # print(' available_samples_to_sample: ' + str(available_samples_to_sample)) 324 | 325 | masking_times.extend(chosen_times) 326 | 327 | for time in sampled_times[: idx][::-1]: 328 | if time < chosen_time - time_of_masked_segment or time > chosen_time + time_of_masked_segment: 329 | break 330 | 331 | if time > chosen_time - time_of_masked_segment and time < chosen_time + time_of_masked_segment and time in available_samples_to_sample: 332 | available_samples_to_sample.remove(time) 333 | 334 | if len(available_samples_to_sample) > 0: 335 | chosen_time = random.choice(available_samples_to_sample) 336 | else: 337 | return [] 338 | # print('chosen_times: ' + str(chosen_times)) 339 | # print('available_samples_to_sample: ' + str(available_samples_to_sample)) 340 | # print('chosen_time: ' + str(chosen_time)) 341 | 342 | times = timestamps.tolist() 343 | seg_pos = [times.index(time) for time in masking_times] 344 | # print('masking_times: ' + str(masking_times)) 345 | # print('seg_pos: ' + str(seg_pos)) 346 | return list(set(seg_pos)) 347 | 348 | ''' 349 | def _seg_sampling(self, max_len): 350 | if max_len * self.args.mask_ratio_per_seg < 1: 351 | return [] 352 | seg_pos = [] 353 | seg_len = int(max_len * self.args.mask_ratio_per_seg) 354 | print('seg_len: ' + str(seg_len)) 355 | start_pos = np.random.randint(max_len, size=self.args.segment_num) 356 | print('start_pos: ' + str(start_pos)) 357 | for start in start_pos: 358 | seg_pos += list(range(start, min(start+seg_len, max_len))) 359 | print(seg_pos) 360 | return seg_pos 361 | ''' 362 | 363 | 364 | # ---Test _time_sensitive_sampling function for reconstruction task---# 365 | ''' 366 | m = torch.zeros((56), dtype = bool) 367 | l = [3, 8, 11, 13, 18, 19, 42, 45, 50, 52, 55] 368 | m[l] = 1 369 | t = torch.zeros((56), dtype = float) 370 | times = torch.tensor([1, 5, 8, 9, 12, 13, 17, 20, 23, 28, 31], dtype = float) 371 | t[l] = times 372 | # print(m) 373 | # print(t) 374 | train_cl_collator = CLDataCollator(max_len = 50) 375 | train_cl_collator._time_sensitive_sampling(m, t) 376 | ''' 377 | 378 | # ----------Test _time_sensitive_cl function for CL task----------# 379 | ''' 380 | times = torch.tensor([1, 2, 3, 4, 5, 15, 18, 25, 26, 27, 28, 29, 35, 45]) 381 | times = times.reshape(times.shape[0], 1) 382 | train_cl_collator = CLDataCollator(max_len = 50) 383 | selected_indices = train_cl_collator._time_sensitive_cl(times) 384 | ''' 385 | 386 | ''' 387 | max_len = 50 388 | D = 4 389 | value, time, mask = torch.rand(max_len, D), torch.rand(max_len, 1), torch.randint(0, 2, (max_len, D)) 390 | data = [value, time, mask] 391 | batch = [data] 392 | train_cl_collator = CLDataCollator(max_len = max_len) 393 | # (v1, t1, m1), (v2, t2, m2) = train_cl_collator._per_seq_sampling(data) 394 | # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape) 395 | out = train_cl_collator.__call__(batch) 396 | ''' 397 | 398 | ''' 399 | print(out['value'].shape, out['time'].shape, out['mask'].shape) 400 | print('Value') 401 | print(value) 402 | print(out['value'][0, 0].shape) 403 | print(out['value'][0, 1].shape) 404 | print('Time') 405 | print(time) 406 | print(out['time'][0, 0].shape) 407 | print(out['time'][0, 1].shape) 408 | print('Mask') 409 | print(mask) 410 | print(out['mask'][0, 0]) 411 | print(out['mask'][0, 1]) 412 | print(torch.sum(mask, axis = 0)) 413 | print(torch.sum(out['mask'][0, 0], axis = 0)) 414 | print(torch.sum(out['mask'][0, 1], axis = 0)) 415 | ''' -------------------------------------------------------------------------------- /src/datautils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import numpy as np 5 | import pandas as pd 6 | import math 7 | import random 8 | from datetime import datetime 9 | import pickle 10 | from utils import pkl_load, pad_nan_to_target 11 | from scipy.io.arff import loadarff 12 | from sklearn.preprocessing import StandardScaler, MinMaxScaler 13 | from utils import generate_mask 14 | from utils import interpolate_cubic_spline 15 | from utils import normalize_with_mask 16 | 17 | 18 | def load_UCR(dataset, load_tp: bool = True): 19 | train_file = os.path.join('datasets/UCR', dataset, dataset + "_TRAIN.tsv") 20 | test_file = os.path.join('datasets/UCR', dataset, dataset + "_TEST.tsv") 21 | train_df = pd.read_csv(train_file, sep='\t', header=None) 22 | test_df = pd.read_csv(test_file, sep='\t', header=None) 23 | train_array = np.array(train_df) 24 | test_array = np.array(test_df) 25 | 26 | # Move the labels to {0, ..., L-1} 27 | labels = np.unique(train_array[:, 0]) 28 | transform = {} 29 | for i, l in enumerate(labels): 30 | transform[l] = i 31 | 32 | train = train_array[:, 1:].astype(np.float64) 33 | train_labels = np.vectorize(transform.get)(train_array[:, 0]) 34 | test = test_array[:, 1:].astype(np.float64) 35 | test_labels = np.vectorize(transform.get)(test_array[:, 0]) 36 | 37 | # extend dim to NTC 38 | train, test = train[..., np.newaxis], test[..., np.newaxis] 39 | p = 1 40 | mask_tr, mask_te = generate_mask(train, p), generate_mask(test, p) 41 | 42 | # Normalization for non-normalized datasets 43 | # To keep the amplitude information, we do not normalize values over 44 | # individual time series, but on the whole dataset 45 | if dataset in [ 46 | 'AllGestureWiimoteX', 47 | 'AllGestureWiimoteY', 48 | 'AllGestureWiimoteZ', 49 | 'BME', 50 | 'Chinatown', 51 | 'Crop', 52 | 'EOGHorizontalSignal', 53 | 'EOGVerticalSignal', 54 | 'Fungi', 55 | 'GestureMidAirD1', 56 | 'GestureMidAirD2', 57 | 'GestureMidAirD3', 58 | 'GesturePebbleZ1', 59 | 'GesturePebbleZ2', 60 | 'GunPointAgeSpan', 61 | 'GunPointMaleVersusFemale', 62 | 'GunPointOldVersusYoung', 63 | 'HouseTwenty', 64 | 'InsectEPGRegularTrain', 65 | 'InsectEPGSmallTrain', 66 | 'MelbournePedestrian', 67 | 'PickupGestureWiimoteZ', 68 | 'PigAirwayPressure', 69 | 'PigArtPressure', 70 | 'PigCVP', 71 | 'PLAID', 72 | 'PowerCons', 73 | 'Rock', 74 | 'SemgHandGenderCh2', 75 | 'SemgHandMovementCh2', 76 | 'SemgHandSubjectCh2', 77 | 'ShakeGestureWiimoteZ', 78 | 'SmoothSubspace', 79 | 'UMD' 80 | ] or p != 1: 81 | scaler = StandardScaler() 82 | train, test = normalize_with_mask(train, mask_tr, test, mask_te, scaler) 83 | # mean = np.nanmean(train) 84 | # std = np.nanstd(train) 85 | # train = (train - mean) / std 86 | # test = (test - mean) / std 87 | 88 | if load_tp: 89 | tp = np.linspace(0, 1, train.shape[1], endpoint=True).reshape(1, -1, 1) 90 | train = np.concatenate((train, np.repeat(tp, train.shape[0], axis=0)), axis=-1) 91 | test = np.concatenate((test, np.repeat(tp, test.shape[0], axis=0)), axis=-1) 92 | 93 | return {'x': train, 'mask': mask_tr}, train_labels, {'x': test, 'mask': mask_te}, test_labels 94 | # return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels 95 | 96 | 97 | def load_others(dataset, load_tp: bool = True): 98 | data = np.load(f'datasets/Others/{dataset}.npy', allow_pickle=True).item() 99 | train_X, train_mask, train_y, test_X, test_mask, test_y = \ 100 | data["tr_x"], data["tr_mask"], data["tr_y"], data["te_x"], data["te_mask"], data["te_y"] 101 | 102 | scaler = MinMaxScaler() 103 | 104 | train_X, test_X = normalize_with_mask(train_X, train_mask, test_X, test_mask, scaler) 105 | 106 | train_tp, test_tp = data['tr_t'], data['te_t'] 107 | if load_tp: 108 | train_X = np.concatenate((train_X, train_tp.reshape(train_tp.shape[0], -1, 1)), axis=-1) 109 | test_X = np.concatenate((test_X, test_tp.reshape(test_tp.shape[0], -1, 1)), axis=-1) 110 | 111 | labels = np.unique(train_y) 112 | transform = {k: i for i, k in enumerate(labels)} 113 | train_y = np.vectorize(transform.get)(train_y) 114 | test_y = np.vectorize(transform.get)(test_y) 115 | return {'x': train_X, 'mask': train_mask}, train_y, {'x': test_X, 'mask': test_mask}, test_y 116 | 117 | 118 | def load_UEA(dataset, load_tp: bool = False): 119 | def extract_data(data): 120 | res_data = [] 121 | res_labels = [] 122 | for t_data, t_label in data: 123 | t_data = np.array([d.tolist() for d in t_data]) 124 | t_label = t_label.decode("utf-8") 125 | res_data.append(t_data) 126 | res_labels.append(t_label) 127 | return np.array(res_data).swapaxes(1, 2), np.array(res_labels) 128 | 129 | try: 130 | train_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TRAIN.arff')[0] 131 | test_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TEST.arff')[0] 132 | 133 | train_X, train_y = extract_data(train_data) 134 | test_X, test_y = extract_data(test_data) 135 | except: 136 | data = np.load(f'datasets/UEA/{dataset}/{dataset}.npy', allow_pickle=True).item() 137 | train_X, train_y, test_X, test_y = data["train_X"], data["train_y"], data["test_X"], data["test_y"] 138 | 139 | p = 1 140 | mask_tr, mask_te = generate_mask(train_X, p), generate_mask(test_X, p) 141 | # scaler = MinMaxScaler() 142 | scaler = StandardScaler() 143 | 144 | train_X, test_X = normalize_with_mask(train_X, mask_tr, test_X, mask_te, scaler) 145 | 146 | if load_tp: 147 | tp = np.linspace(0, 1, train_X.shape[1], endpoint=True).reshape(1, -1, 1) 148 | train_X = np.concatenate((train_X, np.repeat(tp, train_X.shape[0], axis=0)), axis=-1) 149 | test_X = np.concatenate((test_X, np.repeat(tp, test_X.shape[0], axis=0)), axis=-1) 150 | 151 | labels = np.unique(train_y) 152 | transform = {k: i for i, k in enumerate(labels)} 153 | train_y = np.vectorize(transform.get)(train_y) 154 | test_y = np.vectorize(transform.get)(test_y) 155 | return {'x': train_X, 'mask': mask_tr}, train_y, {'x': test_X, 'mask': mask_te}, test_y 156 | 157 | 158 | def load_forecast_npy(name, univar=False): 159 | data = np.load(f'datasets/{name}.npy') 160 | if univar: 161 | data = data[: -1:] 162 | 163 | train_slice = slice(None, int(0.6 * len(data))) 164 | valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data))) 165 | test_slice = slice(int(0.8 * len(data)), None) 166 | 167 | scaler = StandardScaler().fit(data[train_slice]) 168 | data = scaler.transform(data) 169 | data = np.expand_dims(data, 0) 170 | 171 | pred_lens = [24, 48, 96, 288, 672] 172 | return data, train_slice, valid_slice, test_slice, scaler, pred_lens, 0 173 | 174 | 175 | def _get_time_features(dt): 176 | return np.stack([ 177 | dt.minute.to_numpy(), 178 | dt.hour.to_numpy(), 179 | dt.dayofweek.to_numpy(), 180 | dt.day.to_numpy(), 181 | dt.dayofyear.to_numpy(), 182 | dt.month.to_numpy(), 183 | dt.weekofyear.to_numpy(), 184 | ], axis=1).astype(np.float) 185 | 186 | 187 | def load_forecast_csv(name, offset=0 , univar=False, load_tp: bool = True): 188 | data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True) 189 | dt_tp = data.index 190 | dt_embed = _get_time_features(data.index) 191 | n_covariate_cols = dt_embed.shape[-1] if offset == 0 else 0 192 | 193 | if univar: 194 | if name in ('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'): 195 | data = data[['OT']] 196 | elif name == 'electricity': 197 | data = data[['MT_001']] 198 | elif name == 'WTH': 199 | data = data[['WetBulbCelsius']] 200 | else: 201 | data = data.iloc[:, -1:] 202 | 203 | data = data.to_numpy() 204 | if name == 'ETTh1' or name == 'ETTh2': 205 | train_slice = slice(None, 12 * 30 * 24) 206 | valid_slice = slice(12 * 30 * 24 - offset, 16 * 30 * 24) 207 | test_slice = slice(16 * 30 * 24 - offset, 20 * 30 * 24) 208 | elif name == 'ETTm1' or name == 'ETTm2': 209 | train_slice = slice(None, 12 * 30 * 24 * 4) 210 | valid_slice = slice(12 * 30 * 24 * 4 - offset, 16 * 30 * 24 * 4) 211 | test_slice = slice(16 * 30 * 24 * 4 - offset, 20 * 30 * 24 * 4) 212 | else: 213 | train_slice = slice(None, int(0.6 * len(data))) 214 | valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data))) 215 | test_slice = slice(int(0.8 * len(data)), None) 216 | 217 | def fixed_mask_timestamp(num, mask): 218 | mask_time = np.ones((mask.shape[0], mask.shape[1])) 219 | mask_time[np.where(mask.mean(axis=-1) == 0.)] = 0 220 | return np.concatenate((np.repeat(mask_time[..., np.newaxis], num, axis=-1), mask), axis=-1) 221 | 222 | # to N x T x C 223 | if name in ('electricity'): 224 | data = np.expand_dims(data.T, -1) # Each variable is an instance rather than a feature 225 | else: 226 | data = np.expand_dims(data, 0) 227 | 228 | p = 1 229 | mask_tr, mask_va, mask_te = generate_mask(data[:, train_slice], p), \ 230 | generate_mask(data[:, valid_slice], p), \ 231 | generate_mask(data[:, test_slice], p) 232 | scaler = StandardScaler() 233 | 234 | train_x, valid_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, valid_slice], mask_va, scaler) 235 | _, test_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, test_slice], mask_te, scaler) 236 | data = np.concatenate((train_x, valid_x, test_x), axis=1) 237 | mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1) 238 | 239 | if n_covariate_cols > 0: 240 | dt_mask, dv_mask, d_mask = fixed_mask_timestamp(n_covariate_cols, mask_tr[:1]), \ 241 | fixed_mask_timestamp(n_covariate_cols, mask_va[:1]), \ 242 | fixed_mask_timestamp(n_covariate_cols, mask_te[:1]) 243 | 244 | dt, dv, d = dt_embed[train_slice], dt_embed[valid_slice], dt_embed[test_slice] 245 | dt[dt_mask[0][:, :n_covariate_cols] == 0], dv[dv_mask[0][:, :n_covariate_cols] == 0], d[d_mask[0][:, :n_covariate_cols] == 0] = np.nan, np.nan, np.nan 246 | dt_embed = np.concatenate((dt, dv, d), axis=0) 247 | 248 | dt_scaler = scaler.fit(dt) 249 | dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0) 250 | dt_embed[np.isnan(dt_embed)] = 0 251 | data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1) 252 | mask_tr, mask_va, mask_te = dt_mask, dv_mask, d_mask 253 | mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1) 254 | 255 | if load_tp: 256 | dt_tp = [dt_tp[train_slice], dt_tp[valid_slice], dt_tp[test_slice]] 257 | tp = np.concatenate([[time.mktime(t.timetuple()) for t in tp] for tp in dt_tp]) 258 | scaler_hat = MinMaxScaler().fit(tp.reshape(-1, 1)) 259 | data = np.concatenate([data, np.expand_dims(scaler_hat.transform(tp.reshape(-1, 1)), 0)], axis=-1) 260 | 261 | if name in ('ETTh1', 'ETTh2', 'electricity', 'WTH'): 262 | pred_lens = [24, 48, 168, 336, 720] 263 | else: 264 | pred_lens = [24, 48, 96, 288, 672] 265 | 266 | return {'x': data, 'mask': mask}, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols 267 | 268 | 269 | def load_anomaly(name, load_tp=False): 270 | res = pkl_load(f'datasets/{name}.pkl') 271 | 272 | p, mask_tr, mask_te = 1, [], [] 273 | maxl = np.max([len(res['all_train_data'][k]) for k in res['all_train_data']]) 274 | maxle = np.max([len(res['all_test_data'][k]) for k in res['all_test_data']]) 275 | for k in res['all_train_data']: 276 | # generate mask 277 | mask_tr.append(generate_mask(res['all_train_data'][k].reshape(1, -1, 1), p, remain=1)) 278 | mask_te.append(generate_mask(res['all_test_data'][k].reshape(1, -1, 1), p, remain=1)) 279 | # mask 280 | res['all_train_data'][k] = (mask_tr[-1] * res['all_train_data'][k].reshape(1, -1, 1)).reshape(-1) 281 | res['all_test_data'][k] = (mask_te[-1] * res['all_test_data'][k].reshape(1, -1, 1)).reshape(-1) 282 | # padding mask 283 | mask_tr[-1] = np.concatenate((mask_tr[-1], np.full((1, maxl - mask_tr[-1].shape[1], 1), np.nan)), axis=1) 284 | mask_te[-1] = np.concatenate((mask_te[-1], np.full((1, maxle - mask_te[-1].shape[1], 1), np.nan)), axis=1) 285 | mask_tr, mask_te = np.concatenate(mask_tr, axis=0), np.concatenate(mask_te, axis=0) 286 | 287 | # if load_tp: 288 | # tp_max, tp_min = np.max(res['all_train_timestamps']), np.min(res['all_train_timestamps']) 289 | # interval = tp_max - tp_min 290 | # interval = 1. if interval == 0. else interval 291 | # tp_train = (res['all_train_timestamps'] - tp_min) / interval 292 | # tp_test = (res['all_test_timestamps'] - tp_min) / interval 293 | # res['all_train_data'] = np.concatenate((res['all_train_data'], np.repeat(tp_train, res['all_train_data'].shape[0], axis=0)), axis=-1) 294 | # res['all_test_data'] = np.concatenate((res['all_test_data'], np.repeat(tp_test, res['all_test_data'].shape[0], axis=0)), axis=-1) 295 | 296 | return {'x': res['all_train_data'], 'mask': mask_tr}, res['all_train_labels'], res['all_train_timestamps'], \ 297 | {'x': res['all_test_data'], 'mask': mask_te}, res['all_test_labels'], res['all_test_timestamps'], \ 298 | res['delay'] 299 | 300 | 301 | def gen_ano_train_data(all_train_data, maxl = None, normal = False): 302 | maxl = np.max([len(all_train_data[k]) for k in all_train_data]) if maxl is None else maxl 303 | pretrain_data = [] 304 | for k in all_train_data: 305 | train_data = pad_nan_to_target(np.array(all_train_data[k]).astype(np.float64), maxl, axis=0) 306 | pretrain_data.append(train_data) 307 | pretrain_data = np.expand_dims(np.stack(pretrain_data), 2) 308 | if normal: 309 | data_min, data_max = np.nanmin(pretrain_data), np.nanmax(pretrain_data) 310 | pretrain_data = (pretrain_data - data_min) / (data_max - data_min) 311 | return pretrain_data -------------------------------------------------------------------------------- /src/lib.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=E1101 2 | import torch 3 | import torch.nn as nn 4 | from torch.utils.data import Dataset, DataLoader, TensorDataset 5 | 6 | import numpy as np 7 | from sklearn import metrics 8 | 9 | from collator import CLDataCollator 10 | 11 | 12 | class TimeDataset(Dataset): 13 | def __init__(self, data): 14 | super().__init__() 15 | self.data = [] 16 | for instance in data: 17 | values, times, mask = instance 18 | if len(values) == len(times) and len(times) == len(mask) and len(values) >= 2: 19 | self.data.append(instance) 20 | 21 | def __len__(self): 22 | return len(self.data) 23 | 24 | def __getitem__(self, index): 25 | return self.data[index] 26 | 27 | 28 | def count_parameters(model): 29 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 30 | 31 | 32 | def log_normal_pdf(x, mean, logvar, mask): 33 | const = torch.from_numpy(np.array([2. * np.pi])).float().to(x.device) 34 | const = torch.log(const) 35 | return -.5 * (const + logvar + (x - mean) ** 2. / torch.exp(logvar)) * mask 36 | 37 | 38 | def normal_kl(mu1, lv1, mu2, lv2): 39 | v1 = torch.exp(lv1) 40 | v2 = torch.exp(lv2) 41 | lstd1 = lv1 / 2. 42 | lstd2 = lv2 / 2. 43 | 44 | kl = lstd2 - lstd1 + ((v1 + (mu1 - mu2) ** 2.) / (2. * v2)) - .5 45 | return kl 46 | 47 | 48 | def mean_squared_error(orig, pred, mask): 49 | error = (orig - pred) ** 2 50 | error = error * mask 51 | return error.sum() / mask.sum() 52 | 53 | 54 | def normalize_masked_data(data, mask, att_min, att_max): 55 | # we don't want to divide by zero 56 | att_max[att_max == 0.] = 1. 57 | 58 | if (att_max != 0.).all(): 59 | data_norm = (data - att_min) / att_max 60 | else: 61 | raise Exception("Zero!") 62 | 63 | if torch.isnan(data_norm).any(): 64 | raise Exception("nans!") 65 | 66 | # set masked out elements back to zero 67 | data_norm[mask == 0] = 0 68 | 69 | return data_norm, att_min, att_max 70 | 71 | 72 | def evaluate(dim, rec, dec, test_loader, args, num_sample=10, device="cuda"): 73 | mse, test_n = 0.0, 0.0 74 | with torch.no_grad(): 75 | for test_batch in test_loader: 76 | test_batch = test_batch.to(args.device) 77 | observed_data, observed_mask, observed_tp = ( 78 | test_batch[:, :, :dim], 79 | test_batch[:, :, dim: 2 * dim], 80 | test_batch[:, :, -1], 81 | ) 82 | if args.sample_tp and args.sample_tp < 1: 83 | subsampled_data, subsampled_tp, subsampled_mask = subsample_timepoints( 84 | observed_data.clone(), observed_tp.clone(), observed_mask.clone(), args.sample_tp) 85 | else: 86 | subsampled_data, subsampled_tp, subsampled_mask = \ 87 | observed_data, observed_tp, observed_mask 88 | out = rec(torch.cat((subsampled_data, subsampled_mask), 2), subsampled_tp) 89 | qz0_mean, qz0_logvar = ( 90 | out[:, :, : args.latent_dim], 91 | out[:, :, args.latent_dim:], 92 | ) 93 | epsilon = torch.randn( 94 | num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2] 95 | ).to(args.device) 96 | z0 = epsilon * torch.exp(0.5 * qz0_logvar) + qz0_mean 97 | z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2]) 98 | batch, seqlen = observed_tp.size() 99 | time_steps = ( 100 | observed_tp[None, :, :].repeat(num_sample, 1, 1).view(-1, seqlen) 101 | ) 102 | pred_x = dec(z0, time_steps) 103 | pred_x = pred_x.view(num_sample, -1, pred_x.shape[1], pred_x.shape[2]) 104 | pred_x = pred_x.mean(0) 105 | mse += mean_squared_error(observed_data, pred_x, observed_mask) * batch 106 | test_n += batch 107 | return mse / test_n 108 | 109 | 110 | def compute_losses(dim, dec_train_batch, qz0_mean, qz0_logvar, pred_x, args, device): 111 | observed_data, observed_mask \ 112 | = dec_train_batch[:, :, :dim], dec_train_batch[:, :, dim:2 * dim] 113 | 114 | noise_std = args.std # default 0.1 115 | noise_std_ = torch.zeros(pred_x.size()).to(device) + noise_std 116 | noise_logvar = 2. * torch.log(noise_std_).to(device) 117 | logpx = log_normal_pdf(observed_data, pred_x, noise_logvar, 118 | observed_mask).sum(-1).sum(-1) 119 | pz0_mean = pz0_logvar = torch.zeros(qz0_mean.size()).to(device) 120 | analytic_kl = normal_kl(qz0_mean, qz0_logvar, 121 | pz0_mean, pz0_logvar).sum(-1).sum(-1) 122 | if args.norm: 123 | logpx /= observed_mask.sum(-1).sum(-1) 124 | analytic_kl /= observed_mask.sum(-1).sum(-1) 125 | return logpx, analytic_kl 126 | 127 | 128 | def evaluate_classifier(model, test_loader, dec=None, args=None, classifier=None, 129 | dim=0, reconst=False, num_sample=1): 130 | pred = [] 131 | true = [] 132 | test_loss = 0 133 | for test_batch, label in test_loader: 134 | test_batch, label = test_batch.to(args.device), label.to(args.device) 135 | batch_len = test_batch.shape[0] 136 | observed_data, observed_mask, observed_tp \ 137 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1] 138 | with torch.no_grad(): 139 | out = model( 140 | torch.cat((observed_data, observed_mask), 2), observed_tp) 141 | if reconst: 142 | qz0_mean, qz0_logvar = out[:, :, 143 | :args.latent_dim], out[:, :, args.latent_dim:] 144 | epsilon = torch.randn( 145 | num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2]).to(args.device) 146 | z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean 147 | z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2]) 148 | if args.classify_pertp: 149 | pred_x = dec(z0, observed_tp[None, :, :].repeat( 150 | num_sample, 1, 1).view(-1, observed_tp.shape[1])) 151 | # pred_x = pred_x.view(num_sample, batch_len, pred_x.shape[1], pred_x.shape[2]) 152 | out = classifier(pred_x) 153 | else: 154 | out = classifier(z0) 155 | if args.classify_pertp: 156 | N = label.size(-1) 157 | out = out.view(-1, N) 158 | label = label.view(-1, N) 159 | _, label = label.max(-1) 160 | test_loss += nn.CrossEntropyLoss()(out, label.long()).item() * batch_len * 50. 161 | else: 162 | label = label.unsqueeze(0).repeat_interleave( 163 | num_sample, 0).view(-1) 164 | test_loss += nn.CrossEntropyLoss()(out, label).item() * batch_len * num_sample 165 | pred.append(out.cpu().numpy()) 166 | true.append(label.cpu().numpy()) 167 | pred = np.concatenate(pred, 0) 168 | true = np.concatenate(true, 0) 169 | acc = np.mean(pred.argmax(1) == true) 170 | 171 | # print(true.shape) 172 | # print(pred.shape) 173 | # print(np.sum(pred, axis = 1)) 174 | 175 | if args.dataset == 'physionet' or args.dataset == 'MIMIC-III': 176 | auc = metrics.roc_auc_score(true, pred[:, 1]) 177 | elif args.dataset == 'PersonActivity': 178 | auc = 0. 179 | 180 | return test_loss / pred.shape[0], acc, auc 181 | 182 | 183 | def evaluate_regressor(model, test_loader, dec=None, args=None, classifier=None, dim=0): 184 | total_len = 0 185 | test_mse_loss = 0 186 | test_mae_loss = 0 187 | for test_batch, label in test_loader: 188 | test_batch, label = test_batch.to(args.device), label.to(args.device) 189 | observed_data, observed_mask, observed_tp \ 190 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1] 191 | with torch.no_grad(): 192 | out = model( 193 | torch.cat((observed_data, observed_mask), 2), observed_tp) 194 | batch_len = test_batch.shape[0] 195 | total_len += batch_len 196 | test_mse_loss += nn.MSELoss()(out[:, 0], label).item() * batch_len 197 | test_mae_loss += nn.L1Loss()(out[:, 0], label).item() * batch_len 198 | 199 | return test_mse_loss / total_len, test_mae_loss / total_len 200 | 201 | 202 | def evaluate_interpolator(model, test_loader, dec=None, args=None, classifier=None, dim=0): 203 | total_values = 0 204 | total_mse_loss = 0 205 | total_mae_loss = 0 206 | 207 | for test_batch, label in test_loader: 208 | test_batch, label = test_batch.to(args.device), label.to(args.device) 209 | observed_data, observed_mask, observed_tp \ 210 | = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1] 211 | with torch.no_grad(): 212 | out = model( 213 | torch.cat((observed_data, observed_mask), 2), observed_tp) 214 | 215 | target_data, target_mask = label[:, :, :dim], label[:, :, dim:2 * dim].bool() 216 | num_values = torch.sum(target_mask).item() 217 | total_mse_loss += nn.MSELoss()(out[target_mask], target_data[target_mask]).item() * num_values 218 | total_mae_loss += nn.L1Loss()(out[target_mask], target_data[target_mask]).item() * num_values 219 | total_values += num_values 220 | 221 | return total_mse_loss / total_values, total_mae_loss / total_values 222 | 223 | 224 | def subsample_timepoints(data, time_steps, mask, percentage_tp_to_sample=None): 225 | # Subsample percentage of points from each time series 226 | for i in range(data.size(0)): 227 | # take mask for current training sample and sum over all features -- 228 | # figure out which time points don't have any measurements at all in this batch 229 | current_mask = mask[i].sum(-1).cpu() 230 | non_missing_tp = np.where(current_mask > 0)[0] 231 | n_tp_current = len(non_missing_tp) 232 | n_to_sample = int(n_tp_current * percentage_tp_to_sample) 233 | subsampled_idx = sorted(np.random.choice( 234 | non_missing_tp, n_to_sample, replace=False)) 235 | tp_to_set_to_zero = np.setdiff1d(non_missing_tp, subsampled_idx) 236 | 237 | data[i, tp_to_set_to_zero] = 0. 238 | if mask is not None: 239 | mask[i, tp_to_set_to_zero] = 0. 240 | 241 | return data, time_steps, mask 242 | 243 | 244 | def generate_irregular_samples(data, input_dim): 245 | combined_data = [] 246 | max_len = 0 247 | for i in range(data.shape[0]): 248 | zero_time_indices_list = torch.where(data[i, :, -1][1:] == 0)[0] 249 | curr_len = zero_time_indices_list[0].item() + 1 if len(zero_time_indices_list) else data.shape[1] 250 | max_len = max(max_len, curr_len) 251 | values = data[i, :curr_len, : input_dim] 252 | times = data[i, :curr_len, -1] 253 | mask = data[i, :curr_len, input_dim: 2 * input_dim] 254 | single_data = [values, times, mask] 255 | combined_data.append(single_data) 256 | return combined_data, max_len 257 | 258 | 259 | def generate_batches(X_train, X_val, args): 260 | input_dim = (X_train.shape[2] - 1) // 2 261 | 262 | X_train, train_max_len = generate_irregular_samples(X_train, input_dim) 263 | # X_val, val_max_len = generate_irregular_samples(X_val, input_dim) 264 | 265 | # max_len = max(train_max_len, val_max_len) 266 | max_len = train_max_len 267 | 268 | pretrain_data = TimeDataset(X_train) 269 | # val_data = TimeDataset(X_val) 270 | 271 | train_cl_collator = CLDataCollator(max_len=max_len, args=args) 272 | 273 | # batch_size = min(min(len(val_data), args.batch_size), 256) 274 | batch_size = min(min(len(pretrain_data), args.batch_size), 256) 275 | train_dataloader = DataLoader(pretrain_data, batch_size=batch_size, shuffle=True, collate_fn=train_cl_collator, 276 | num_workers=0) 277 | # val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False, collate_fn=train_cl_collator, 278 | # num_workers=8) 279 | 280 | data_objects = {"train_dataloader": train_dataloader, 281 | # "val_dataloader": val_dataloader, 282 | "input_dim": input_dim, 283 | "max_len": max_len, 284 | "n_train_batches": len(train_dataloader), 285 | # "n_test_batches": len(val_dataloader), 286 | } 287 | 288 | return data_objects 289 | 290 | 291 | def get_unlabeled_pretrain_data(X_train, args): 292 | # X_train = torch.load(args.path + 'X_train.pt') 293 | # X_val = torch.load(args.path + 'X_val.pt') 294 | X_train = torch.from_numpy(X_train) 295 | print('X_train: ' + str(X_train.shape)) 296 | # print('X_val: ' + str(X_val.shape)) 297 | 298 | # data_objects = generate_batches(X_train, X_val, args) 299 | data_objects = generate_batches(X_train, None, args) 300 | 301 | return data_objects 302 | 303 | 304 | def get_finetune_data(args): 305 | X_train, y_train = torch.load(args.path + 'X_train.pt'), torch.load(args.path + 'y_train.pt') 306 | X_val, y_val = torch.load(args.path + 'X_val.pt'), torch.load(args.path + 'y_val.pt') 307 | X_test, y_test = torch.load(args.path + 'X_test.pt'), torch.load(args.path + 'y_test.pt') 308 | input_dim = (X_train.shape[2] - 1) // 2 309 | 310 | print('X_train: ' + str(X_train.shape) + ' y_train: ' + str(y_train.shape)) 311 | print('X_val: ' + str(X_val.shape) + ' y_val: ' + str(y_val.shape)) 312 | print('X_test: ' + str(X_test.shape) + ' y_test: ' + str(y_test.shape)) 313 | 314 | if args.task == 'classification': 315 | train_data_combined = TensorDataset(X_train, y_train.long().squeeze()) 316 | val_data_combined = TensorDataset(X_val, y_val.long().squeeze()) 317 | test_data_combined = TensorDataset(X_test, y_test.long().squeeze()) 318 | elif args.task == 'regression' or args.task == 'interpolation': 319 | train_data_combined = TensorDataset(X_train, y_train.float()) 320 | val_data_combined = TensorDataset(X_val, y_val.float()) 321 | test_data_combined = TensorDataset(X_test, y_test.float()) 322 | 323 | train_dataloader = DataLoader(train_data_combined, batch_size=args.batch_size, shuffle=False) 324 | val_dataloader = DataLoader(val_data_combined, batch_size=args.batch_size, shuffle=False) 325 | test_dataloader = DataLoader(test_data_combined, batch_size=args.batch_size, shuffle=False) 326 | 327 | data_objects = {"train_dataloader": train_dataloader, 328 | "test_dataloader": test_dataloader, 329 | "val_dataloader": val_dataloader, 330 | "input_dim": input_dim} 331 | 332 | return data_objects 333 | -------------------------------------------------------------------------------- /src/models/__init__.py: -------------------------------------------------------------------------------- 1 | # from .encoder_transformer import TSEncoder 2 | from .encoder import TSEncoder 3 | -------------------------------------------------------------------------------- /src/models/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from einops import rearrange, repeat 5 | 6 | 7 | ######################################################################################## 8 | 9 | class Residual(nn.Module): 10 | def __init__(self, fn): 11 | super().__init__() 12 | self.fn = fn 13 | 14 | def forward(self, x, **kwargs): 15 | return self.fn(x, **kwargs) + x 16 | 17 | 18 | class PreNorm(nn.Module): 19 | def __init__(self, dim, fn): 20 | super().__init__() 21 | self.norm = nn.LayerNorm(dim) 22 | self.fn = fn 23 | 24 | def forward(self, x, **kwargs): 25 | return self.fn(self.norm(x), **kwargs) 26 | 27 | 28 | class FeedForward(nn.Module): 29 | def __init__(self, dim, hidden_dim, dropout=0.): 30 | super().__init__() 31 | self.net = nn.Sequential( 32 | nn.Linear(dim, hidden_dim), 33 | nn.ReLU(), 34 | nn.Dropout(dropout), 35 | nn.Linear(hidden_dim, dim), 36 | nn.Dropout(dropout) 37 | ) 38 | 39 | def forward(self, x): 40 | return self.net(x) 41 | 42 | 43 | class Attention(nn.Module): 44 | def __init__(self, dim, heads=8, dropout=0.): 45 | super().__init__() 46 | self.heads = heads 47 | self.scale = dim ** -0.5 48 | 49 | self.to_qkv = nn.Linear(dim, dim * 3, bias=False) 50 | self.to_out = nn.Sequential( 51 | nn.Linear(dim, dim), 52 | nn.Dropout(dropout) 53 | ) 54 | 55 | def forward(self, x, mask=None): 56 | b, n, _, h = *x.shape, self.heads 57 | qkv = self.to_qkv(x).chunk(3, dim=-1) 58 | q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv) 59 | 60 | dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale 61 | 62 | if mask is not None: 63 | mask = F.pad(mask.flatten(1), (1, 0), value=True) 64 | assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions' 65 | mask = mask[:, None, :] * mask[:, :, None] 66 | dots.masked_fill_(~mask, float('-inf')) 67 | del mask 68 | 69 | attn = dots.softmax(dim=-1) 70 | 71 | out = torch.einsum('bhij,bhjd->bhid', attn, v) 72 | out = rearrange(out, 'b h n d -> b n (h d)') 73 | out = self.to_out(out) 74 | return out 75 | 76 | 77 | class Transformer(nn.Module): 78 | def __init__(self, dim, depth, heads, mlp_dim, dropout): 79 | super().__init__() 80 | self.layers = nn.ModuleList([]) 81 | for _ in range(depth): 82 | self.layers.append(nn.ModuleList([ 83 | Residual(PreNorm(dim, Attention(dim, heads=heads, dropout=dropout))), 84 | Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout))) 85 | ])) 86 | 87 | def forward(self, x, mask=None): 88 | for attn, ff in self.layers: 89 | x = attn(x, mask=mask) 90 | x = ff(x) 91 | return x 92 | 93 | 94 | class Seq_Transformer(nn.Module): 95 | def __init__(self, *, patch_size, dim, depth, heads, mlp_dim, channels=1, dropout=0.1): 96 | super().__init__() 97 | patch_dim = channels * patch_size 98 | self.patch_to_embedding = nn.Linear(patch_dim, dim) 99 | self.c_token = nn.Parameter(torch.randn(1, 1, dim)) 100 | self.transformer = Transformer(dim, depth, heads, mlp_dim, dropout) 101 | self.to_c_token = nn.Identity() 102 | 103 | 104 | def forward(self, forward_seq): 105 | x = self.patch_to_embedding(forward_seq) 106 | b, n, _ = x.shape 107 | c_tokens = repeat(self.c_token, '() n d -> b n d', b=b) 108 | x = torch.cat((c_tokens, x), dim=1) 109 | x = self.transformer(x) 110 | c_t = self.to_c_token(x[:, 0]) 111 | return c_t 112 | -------------------------------------------------------------------------------- /src/models/backbone.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import math 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from models.encoder import generate_binomial_mask, generate_continuous_mask 8 | 9 | class SimConv4(torch.nn.Module): 10 | def __init__(self, input_dims, output_dims,hidden_dims=64, mask_mode='binomial'): 11 | super(SimConv4, self).__init__() 12 | self.input_fc = nn.Linear(input_dims, hidden_dims) 13 | 14 | self.feature_size = output_dims 15 | self.name = "conv4" 16 | self.mask_mode = mask_mode 17 | 18 | self.layer1 = torch.nn.Sequential( 19 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False), 20 | torch.nn.BatchNorm1d(hidden_dims), 21 | torch.nn.ReLU() 22 | ) 23 | 24 | self.layer2 = torch.nn.Sequential( 25 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False), 26 | torch.nn.BatchNorm1d(hidden_dims), 27 | torch.nn.ReLU(), 28 | ) 29 | 30 | self.layer3 = torch.nn.Sequential( 31 | nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False), 32 | torch.nn.BatchNorm1d(hidden_dims), 33 | torch.nn.ReLU(), 34 | ) 35 | 36 | self.layer4 = torch.nn.Sequential( 37 | nn.Conv1d(hidden_dims, output_dims, 4, 2, 1, bias=False), 38 | torch.nn.BatchNorm1d(output_dims), 39 | torch.nn.ReLU(), 40 | torch.nn.AdaptiveAvgPool1d(1) 41 | ) 42 | 43 | self.flatten = torch.nn.Flatten() 44 | 45 | for m in self.modules(): 46 | if isinstance(m, torch.nn.Conv2d): 47 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 48 | m.weight.data.normal_(0, math.sqrt(2. / n)) 49 | elif isinstance(m, torch.nn.BatchNorm2d): 50 | m.weight.data.fill_(1) 51 | m.bias.data.zero_() 52 | if isinstance(m, nn.Conv1d): 53 | nn.init.xavier_normal_(m.weight.data) 54 | # nn.init.xavier_normal_(m.bias.data) 55 | elif isinstance(m, nn.BatchNorm1d): 56 | nn.init.constant_(m.weight, 1) 57 | nn.init.constant_(m.bias, 0) 58 | elif isinstance(m, nn.Linear): 59 | nn.init.constant_(m.weight, 1) 60 | nn.init.constant_(m.bias, 0) 61 | 62 | def forward(self, x,mask=None): 63 | # x_ = x.view(x.shape[0], 1, -1) #(B, T, Ch) 64 | ## B x Ch x T 65 | 66 | nan_mask = ~x.isnan().any(axis=-1) 67 | x[~nan_mask] = 0 68 | x = self.input_fc(x) # B x T x Ch 69 | 70 | # generate & apply mask 71 | if mask is None: 72 | if self.training: 73 | mask = self.mask_mode 74 | else: 75 | mask = 'all_true' 76 | 77 | if mask == 'binomial': 78 | mask = generate_binomial_mask(x.size(0), x.size(1)).to(x.device) 79 | elif mask == 'continuous': 80 | mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device) 81 | elif mask == 'all_true': 82 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool) 83 | elif mask == 'all_false': 84 | mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool) 85 | elif mask == 'mask_last': 86 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool) 87 | mask[:, -1] = False 88 | 89 | mask &= nan_mask 90 | x[~mask] = 0 91 | 92 | 93 | x_t = torch.permute(x,[0,2,1]) 94 | h = self.layer1(x_t) # (B, T, H) 95 | h = self.layer2(h) # (B, 8, D/2)->(B, 16, D/4) 96 | h = self.layer3(h) # (B, 16, D/4)->(B, 32, D/8) 97 | h = self.layer4(h) # (B, 32, D/8)->(B, 64, 1) 98 | h = self.flatten(h) 99 | h = F.normalize(h, dim=1) 100 | h = torch.unsqueeze(h,1) 101 | return h 102 | -------------------------------------------------------------------------------- /src/models/dilated_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class RelatedConv(nn.Module): 8 | def __init__(self, out_channel, kernel_size, dilation = 1): 9 | super(RelatedConv, self).__init__() 10 | assert isinstance(kernel_size, tuple) or isinstance(kernel_size, list) 11 | in_channels = 1 12 | receptive_field = (kernel_size[-1] - 1) * dilation + 1 13 | padding = receptive_field // 2 14 | self.conv = nn.Conv2d(in_channels, out_channel, kernel_size, 15 | padding = [0, padding], 16 | dilation = dilation 17 | ) 18 | self.remove = 1 if receptive_field % 2 == 0 else 0 19 | 20 | def forward(self, x): 21 | x = x.unsqueeze(1) # B * Ch * T => B * 1 * Ch * T 22 | x = self.conv(x) # B * 1 * Ch * T => B * out * 1 * T 23 | if self.remove > 0: 24 | x = x[..., :-self.remove] 25 | return x.squeeze(2) # B * out * 1 * T => B * out * T 26 | 27 | 28 | class RelatedEncoder(nn.Module): 29 | def __init__(self, out_channels, channel, kernel_size: int): 30 | super(RelatedEncoder, self).__init__() 31 | 32 | if isinstance(kernel_size, int): 33 | kernel_size = [kernel_size] * len(out_channels) 34 | 35 | out_channels.insert(0, channel) 36 | self.net = nn.Sequential(*[ 37 | RelatedConv(out_channels[i], kernel_size = (out_channels[i - 1], kernel_size[i - 1])) 38 | for i in range(1, len(out_channels)) 39 | ]) 40 | 41 | def forward(self, x): 42 | return self.net(x) 43 | 44 | 45 | class SamePadConv(nn.Module): 46 | def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1): 47 | super().__init__() 48 | self.receptive_field = (kernel_size - 1) * dilation + 1 49 | padding = self.receptive_field // 2 50 | self.conv = nn.Conv1d( 51 | in_channels, out_channels, kernel_size, 52 | padding=padding, 53 | dilation=dilation, 54 | groups=groups 55 | ) 56 | self.remove = 1 if self.receptive_field % 2 == 0 else 0 57 | 58 | def forward(self, x): 59 | out = self.conv(x) 60 | if self.remove > 0: 61 | out = out[:, :, : -self.remove] 62 | return out 63 | 64 | class ConvBlock(nn.Module): 65 | def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False): 66 | super().__init__() 67 | self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation) 68 | self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation) 69 | self.projector = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None 70 | 71 | def forward(self, x): 72 | residual = x if self.projector is None else self.projector(x) 73 | x = F.gelu(x) 74 | x = self.conv1(x) 75 | x = F.gelu(x) 76 | x = self.conv2(x) 77 | return x + residual 78 | 79 | class DilatedConvEncoder(nn.Module): 80 | def __init__(self, in_channels, channels, kernel_size): 81 | super().__init__() 82 | self.net = nn.Sequential(*[ 83 | ConvBlock( 84 | channels[i-1] if i > 0 else in_channels, 85 | channels[i], 86 | kernel_size=kernel_size, 87 | dilation=2**i, 88 | final=(i == len(channels)-1) 89 | ) 90 | for i in range(len(channels)) 91 | ]) 92 | 93 | def forward(self, x): 94 | return self.net(x) -------------------------------------------------------------------------------- /src/models/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import copy 3 | from torch import nn 4 | import numpy as np 5 | from .dilated_conv import DilatedConvEncoder 6 | 7 | 8 | def generate_continuous_mask(B, T, n=5, l=0.1): 9 | res = torch.full((B, T), True, dtype=torch.bool) 10 | if isinstance(n, float): 11 | n = int(n * T) 12 | n = max(min(n, T // 2), 1) 13 | 14 | if isinstance(l, float): 15 | l = int(l * T) 16 | l = max(l, 1) 17 | 18 | for i in range(B): 19 | for _ in range(n): 20 | t = np.random.randint(T - l + 1) 21 | res[i, t:t + l] = False 22 | return res 23 | 24 | 25 | class BertInterpHead(nn.Module): 26 | def __init__(self, input_dim, hidden_dim): 27 | super().__init__() 28 | self.dense = nn.Linear(hidden_dim, 4 * hidden_dim) 29 | self.activation = nn.ReLU() 30 | self.project = nn.Linear(4 * hidden_dim, input_dim) 31 | 32 | def forward(self, first_token_tensor): 33 | # We "pool" the model by simply taking the hidden state corresponding 34 | # to the first token. 35 | pooled_output = self.dense(first_token_tensor) 36 | pooled_output = self.activation(pooled_output) 37 | pooled_output = self.project(pooled_output) 38 | return pooled_output 39 | 40 | 41 | def generate_binomial_mask(B, T, p=0.5): 42 | return torch.from_numpy(np.random.binomial(1, p, size=(B, T))).to(torch.bool) 43 | 44 | 45 | class TSEncoder(nn.Module): 46 | def __init__(self, input_dims, output_dims, hidden_dims=64, depth=10, mask_mode='binomial'): 47 | super().__init__() 48 | self.input_dims = input_dims 49 | self.output_dims = output_dims 50 | self.hidden_dims = hidden_dims 51 | self.mask_mode = mask_mode 52 | self.input_fc = nn.Linear(input_dims, hidden_dims) 53 | 54 | self.feature_extractor = DilatedConvEncoder( 55 | # input_dims, 56 | hidden_dims, 57 | [hidden_dims] * depth + [output_dims], 58 | kernel_size=3 59 | ) 60 | self.repr_dropout = nn.Dropout(p=0.1) 61 | self.interphead = BertInterpHead(input_dims, output_dims) 62 | 63 | def forward(self, x, mask=None): # x: B x T x input_dims 64 | if isinstance(x, dict): 65 | input_all = copy.deepcopy(x) 66 | m = x['mask'] 67 | x = x['data'] if 'data' in x.keys() else x['x'] 68 | else: 69 | input_all = copy.deepcopy(x) 70 | m = x[..., -(x.shape[-1] // 2):] 71 | x = x[..., :-(x.shape[-1] // 2)] 72 | 73 | t = x[..., -1] 74 | x = x[..., :-1] 75 | 76 | if mask == 'mask_last': 77 | nan_mask = ~x.isnan().any(axis=-1) 78 | 79 | x[torch.isnan(x)], m[torch.isnan(m)] = 0, 0 80 | 81 | # whole series without missing 82 | if self.training: 83 | x_whole = self.input_fc(x * input_all['mask_origin']) 84 | x_whole = x_whole.transpose(1, 2) 85 | x_whole = self.feature_extractor(x_whole) # B x Ch x T 86 | x_whole = x_whole.transpose(1, 2) # B x T x Co 87 | x_whole = self.repr_dropout(x_whole) 88 | 89 | # recon mask part 90 | if self.training: 91 | x_interp = self.input_fc(x * input_all['mask']) 92 | x_interp = x_interp.transpose(1, 2) 93 | x_interp = self.feature_extractor(x_interp) # B x Ch x T 94 | x_interp = x_interp.transpose(1, 2) # B x T x Co 95 | x_interp = self.repr_dropout(x_interp) 96 | 97 | if mask == 'mask_last': 98 | mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool) 99 | mask[:, -1] = False 100 | mask &= nan_mask 101 | x[~mask] = 0 102 | 103 | x = self.input_fc(x * m) 104 | x = x.transpose(1, 2) 105 | x = self.feature_extractor(x) # B x Ch x T 106 | x = x.transpose(1, 2) # B x T x Co 107 | x = self.repr_dropout(x) 108 | 109 | if self.training: 110 | return x_whole, self.interphead(x_interp) 111 | else: 112 | return x 113 | -------------------------------------------------------------------------------- /src/models/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def hierarchical_contrastive_loss(z1, z2, alpha=0.8, temporal_unit=0, temp=1.0): 7 | loss = torch.tensor(0., device=z1.device) 8 | d = 0 9 | 10 | while z1.size(1) > 1: 11 | 12 | if alpha != 0: 13 | if d == 0: 14 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp) 15 | else: 16 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp) 17 | if d >= temporal_unit: 18 | if 1 - alpha != 0: 19 | if d == 0: 20 | loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp) 21 | else: 22 | loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp) 23 | d += 1 24 | 25 | z1 = F.max_pool1d(z1.transpose(1, 2), kernel_size=2).transpose(1, 2) 26 | z2 = F.max_pool1d(z2.transpose(1, 2), kernel_size=2).transpose(1, 2) 27 | 28 | if z1.size(1) == 1: 29 | if alpha != 0: 30 | loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp) 31 | d += 1 32 | return loss / d 33 | 34 | 35 | def temporal_contrastive_loss_mixup(z1, z2, temp=1.0): 36 | B, T = z1.size(0), z1.size(1) 37 | alpha = 0.2 38 | beta = 0.2 39 | 40 | if T == 1: 41 | return z1.new_tensor(0.) 42 | 43 | uni_z1 = alpha * z1 + (1 - alpha) * z1[:, torch.randperm(z1.shape[1]), :].view(z1.size()) 44 | uni_z2 = beta * z2 + (1 - beta) * z2[:, torch.randperm(z1.shape[1]), :].view(z2.size()) 45 | 46 | z = torch.cat([z1, z2, uni_z1, uni_z2], dim=1) 47 | 48 | sim = torch.matmul(z[:, : 2 * T, :], z.transpose(1, 2)) / temp # B x 2T x 2T 49 | logits = torch.tril(sim, diagonal=-1)[:, :, :-1] 50 | logits += torch.triu(sim, diagonal=1)[:, :, 1:] 51 | 52 | if T > 1500: 53 | z, sim = z.cpu(), sim.cpu() 54 | torch.cuda.empty_cache() 55 | 56 | logits = -F.log_softmax(logits, dim=-1) 57 | 58 | logits = logits[:, :2 * T, :(2 * T - 1)] 59 | 60 | t = torch.arange(T, device=z1.device) 61 | loss = (logits[:, t, T + t - 1].mean() + logits[:, T + t, t].mean()) / 2 62 | return loss 63 | 64 | 65 | def instance_contrastive_loss_mixup(z1, z2, temp=1.0): 66 | B, T = z1.size(0), z1.size(1) 67 | alpha = 0.2 68 | beta = 0.2 69 | 70 | if B == 1: 71 | return z1.new_tensor(0.) 72 | 73 | uni_z1 = alpha * z1 + (1 - alpha) * z1[torch.randperm(z1.shape[0]), :, :].view(z1.size()) 74 | uni_z2 = beta * z2 + (1 - beta) * z2[torch.randperm(z2.shape[0]), :, :].view(z2.size()) 75 | 76 | z = torch.cat([z1, z2, uni_z1, uni_z2], dim=0) 77 | z = z.transpose(0, 1) # T x 2B x C 78 | sim = torch.matmul(z[:, : 2 * B, :], z.transpose(1, 2)) / temp # T x 2B x 2B 79 | 80 | logits = torch.tril(sim, diagonal=-1)[:, :, :-1] # T x 2B x (2B-1) 81 | logits += torch.triu(sim, diagonal=1)[:, :, 1:] 82 | logits = -F.log_softmax(logits, dim=-1) 83 | 84 | logits = logits[:, :2 * B, :(2 * B - 1)] 85 | 86 | i = torch.arange(B, device=z1.device) 87 | loss = (logits[:, i, B + i - 1].mean() + logits[:, B + i, i].mean()) / 2 88 | return loss 89 | -------------------------------------------------------------------------------- /src/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification import eval_classification 2 | from .forecasting import eval_forecasting 3 | from .anomaly_detection import eval_anomaly_detection, eval_anomaly_detection_coldstart 4 | from .imputation import eval_imputation 5 | -------------------------------------------------------------------------------- /src/tasks/_eval_protocols.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import Ridge 3 | from sklearn.svm import SVC 4 | from sklearn.linear_model import LogisticRegression 5 | from sklearn.neighbors import KNeighborsClassifier 6 | from sklearn.preprocessing import StandardScaler 7 | from sklearn.pipeline import make_pipeline 8 | from sklearn.model_selection import GridSearchCV, train_test_split 9 | 10 | def fit_svm(features, y, MAX_SAMPLES=10000): 11 | nb_classes = np.unique(y, return_counts=True)[1].shape[0] 12 | train_size = features.shape[0] 13 | 14 | svm = SVC(C=np.inf, gamma='scale') 15 | if train_size // nb_classes < 5 or train_size < 50: 16 | return svm.fit(features, y) 17 | else: 18 | grid_search = GridSearchCV( 19 | svm, { 20 | 'C': [ 21 | 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 22 | np.inf 23 | ], 24 | 'kernel': ['rbf'], 25 | 'degree': [3], 26 | 'gamma': ['scale'], 27 | 'coef0': [0], 28 | 'shrinking': [True], 29 | 'probability': [False], 30 | 'tol': [0.001], 31 | 'cache_size': [200], 32 | 'class_weight': [None], 33 | 'verbose': [False], 34 | 'max_iter': [10000000], 35 | 'decision_function_shape': ['ovr'], 36 | 'random_state': [None] 37 | }, 38 | cv=5, n_jobs=5 39 | ) 40 | # If the training set is too large, subsample MAX_SAMPLES examples 41 | if train_size > MAX_SAMPLES: 42 | split = train_test_split( 43 | features, y, 44 | train_size=MAX_SAMPLES, random_state=0, stratify=y 45 | ) 46 | features = split[0] 47 | y = split[2] 48 | 49 | grid_search.fit(features, y) 50 | return grid_search.best_estimator_ 51 | 52 | def fit_lr(features, y, MAX_SAMPLES=100000): 53 | # If the training set is too large, subsample MAX_SAMPLES examples 54 | if features.shape[0] > MAX_SAMPLES: 55 | split = train_test_split( 56 | features, y, 57 | train_size=MAX_SAMPLES, random_state=0, stratify=y 58 | ) 59 | features = split[0] 60 | y = split[2] 61 | 62 | pipe = make_pipeline( 63 | StandardScaler(), 64 | LogisticRegression( 65 | random_state=0, 66 | max_iter=1000000, 67 | multi_class='ovr' 68 | ) 69 | ) 70 | pipe.fit(features, y) 71 | return pipe 72 | 73 | def fit_knn(features, y): 74 | pipe = make_pipeline( 75 | StandardScaler(), 76 | KNeighborsClassifier(n_neighbors=1) 77 | ) 78 | pipe.fit(features, y) 79 | return pipe 80 | 81 | def fit_ridge(train_features, train_y, valid_features, valid_y, MAX_SAMPLES=100000): 82 | # If the training set is too large, subsample MAX_SAMPLES examples 83 | if train_features.shape[0] > MAX_SAMPLES: 84 | split = train_test_split( 85 | train_features, train_y, 86 | train_size=MAX_SAMPLES, random_state=0 87 | ) 88 | train_features = split[0] 89 | train_y = split[2] 90 | if valid_features.shape[0] > MAX_SAMPLES: 91 | split = train_test_split( 92 | valid_features, valid_y, 93 | train_size=MAX_SAMPLES, random_state=0 94 | ) 95 | valid_features = split[0] 96 | valid_y = split[2] 97 | 98 | alphas = [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000] 99 | valid_results = [] 100 | for alpha in alphas: 101 | lr = Ridge(alpha=alpha).fit(train_features, train_y) 102 | valid_pred = lr.predict(valid_features) 103 | score = np.sqrt(((valid_pred - valid_y) ** 2).mean()) + np.abs(valid_pred - valid_y).mean() 104 | valid_results.append(score) 105 | best_alpha = alphas[np.argmin(valid_results)] 106 | 107 | lr = Ridge(alpha=best_alpha) 108 | lr.fit(train_features, train_y) 109 | return lr 110 | -------------------------------------------------------------------------------- /src/tasks/anomaly_detection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | from sklearn.metrics import f1_score, precision_score, recall_score 4 | import bottleneck as bn 5 | 6 | 7 | # consider delay threshold and missing segments 8 | def get_range_proba(predict, label, delay=7): 9 | splits = np.where(label[1:] != label[:-1])[0] + 1 10 | is_anomaly = label[0] == 1 11 | new_predict = np.array(predict) 12 | pos = 0 13 | 14 | for sp in splits: 15 | if is_anomaly: 16 | if 1 in predict[pos:min(pos + delay + 1, sp)]: 17 | new_predict[pos: sp] = 1 18 | else: 19 | new_predict[pos: sp] = 0 20 | is_anomaly = not is_anomaly 21 | pos = sp 22 | sp = len(label) 23 | 24 | if is_anomaly: # anomaly in the end 25 | if 1 in predict[pos: min(pos + delay + 1, sp)]: 26 | new_predict[pos: sp] = 1 27 | else: 28 | new_predict[pos: sp] = 0 29 | 30 | return new_predict 31 | 32 | 33 | # set missing = 0 34 | def reconstruct_label(timestamp, label): 35 | timestamp = np.asarray(timestamp, np.int64) 36 | index = np.argsort(timestamp) 37 | 38 | timestamp_sorted = np.asarray(timestamp[index]) 39 | interval = np.min(np.diff(timestamp_sorted)) 40 | 41 | label = np.asarray(label, np.int64) 42 | label = np.asarray(label[index]) 43 | 44 | idx = (timestamp_sorted - timestamp_sorted[0]) // interval 45 | 46 | new_label = np.zeros(shape=((timestamp_sorted[-1] - timestamp_sorted[0]) // interval + 1,), dtype=np.int) 47 | new_label[idx] = label 48 | 49 | return new_label 50 | 51 | 52 | def eval_ad_result(test_pred_list, test_labels_list, test_timestamps_list, delay): 53 | labels = [] 54 | pred = [] 55 | for test_pred, test_labels, test_timestamps in zip(test_pred_list, test_labels_list, test_timestamps_list): 56 | assert test_pred.shape == test_labels.shape == test_timestamps.shape 57 | test_labels = reconstruct_label(test_timestamps, test_labels) 58 | test_pred = reconstruct_label(test_timestamps, test_pred) 59 | test_pred = get_range_proba(test_pred, test_labels, delay) 60 | labels.append(test_labels) 61 | pred.append(test_pred) 62 | labels = np.concatenate(labels) 63 | pred = np.concatenate(pred) 64 | return { 65 | 'f1': f1_score(labels, pred), 66 | 'precision': precision_score(labels, pred), 67 | 'recall': recall_score(labels, pred) 68 | } 69 | 70 | 71 | def np_shift(arr, num, fill_value=np.nan): 72 | result = np.empty_like(arr) 73 | if num > 0: 74 | result[:num] = fill_value 75 | result[num:] = arr[:-num] 76 | elif num < 0: 77 | result[num:] = fill_value 78 | result[:num] = arr[-num:] 79 | else: 80 | result[:] = arr 81 | return result 82 | 83 | 84 | def eval_anomaly_detection(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data, 85 | all_test_labels, all_test_timestamps, delay): 86 | t = time.time() 87 | 88 | train_mask, test_mask = all_train_data['mask'], all_test_data['mask'] 89 | all_train_data, all_test_data = all_train_data['x'], all_test_data['x'] 90 | ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps] 91 | ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts)) 92 | 93 | all_train_repr = {} 94 | all_test_repr = {} 95 | all_train_repr_wom = {} 96 | all_test_repr_wom = {} 97 | for i, k in enumerate(all_train_data): 98 | train_data = all_train_data[k] 99 | test_data = all_test_data[k] 100 | 101 | train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min) 102 | test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min) 103 | train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1) 104 | test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1) 105 | data = {'x': np.concatenate([train_data, test_data], axis=1), 106 | 'mask': np.concatenate( 107 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)} 108 | 109 | full_repr = model.encode( 110 | # np.concatenate([train_data, test_data]).reshape(1, -1, 1), 111 | data, 112 | mask='mask_last', 113 | casual=True, 114 | sliding_length=1, 115 | sliding_padding=200, 116 | batch_size=256 117 | ).squeeze() 118 | all_train_repr[k] = full_repr[:train_data.shape[1]] 119 | all_test_repr[k] = full_repr[train_data.shape[1]:] 120 | 121 | data = {'x': np.concatenate([train_data, test_data], axis=1), 122 | 'mask': np.concatenate( 123 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)} 124 | 125 | full_repr_wom = model.encode( 126 | # np.concatenate([train_data, test_data]).reshape(1, -1, 1), 127 | data, 128 | casual=True, 129 | sliding_length=1, 130 | sliding_padding=200, 131 | batch_size=256 132 | ).squeeze() 133 | all_train_repr_wom[k] = full_repr_wom[:train_data.shape[1]] 134 | all_test_repr_wom[k] = full_repr_wom[train_data.shape[1]:] 135 | 136 | res_log = [] 137 | labels_log = [] 138 | timestamps_log = [] 139 | for k in all_train_data: 140 | train_data = all_train_data[k] 141 | train_labels = all_train_labels[k] 142 | train_timestamps = all_train_timestamps[k] 143 | 144 | test_data = all_test_data[k] 145 | test_labels = all_test_labels[k] 146 | test_timestamps = all_test_timestamps[k] 147 | 148 | train_err = np.abs(all_train_repr_wom[k] - all_train_repr[k]).sum(axis=1) 149 | test_err = np.abs(all_test_repr_wom[k] - all_test_repr[k]).sum(axis=1) 150 | 151 | ma = np_shift(bn.move_mean(np.concatenate([train_err, test_err]), 21), 1) 152 | train_err_adj = (train_err - ma[:len(train_err)]) / ma[:len(train_err)] 153 | test_err_adj = (test_err - ma[len(train_err):]) / ma[len(train_err):] 154 | train_err_adj = train_err_adj[22:] 155 | 156 | thr = np.mean(train_err_adj) + 4 * np.std(train_err_adj) 157 | test_res = (test_err_adj > thr) * 1 158 | 159 | for i in range(len(test_res)): 160 | if i >= delay and test_res[i - delay:i].sum() >= 1: 161 | test_res[i] = 0 162 | 163 | res_log.append(test_res) 164 | labels_log.append(test_labels) 165 | timestamps_log.append(test_timestamps) 166 | t = time.time() - t 167 | 168 | eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay) 169 | eval_res['infer_time'] = t 170 | return res_log, eval_res 171 | 172 | 173 | def eval_anomaly_detection_coldstart(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data, 174 | all_test_labels, all_test_timestamps, delay): 175 | t = time.time() 176 | 177 | train_mask, test_mask = all_train_data['mask'], all_test_data['mask'] 178 | all_train_data, all_test_data = all_train_data['x'], all_test_data['x'] 179 | ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps] 180 | ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts)) 181 | 182 | all_data = {} 183 | all_repr = {} 184 | all_repr_wom = {} 185 | for i, k in enumerate(all_train_data): 186 | train_data = all_train_data[k] 187 | test_data = all_test_data[k] 188 | 189 | train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min) 190 | test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min) 191 | train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1) 192 | test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1) 193 | data = {'x': np.concatenate([train_data, test_data], axis=1), 194 | 'mask': np.concatenate( 195 | [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)} 196 | 197 | all_data[k] = np.concatenate([all_train_data[k], all_test_data[k]]) 198 | all_repr[k] = model.encode( 199 | # all_data[k].reshape(1, -1, 1), 200 | data, 201 | mask='mask_last', 202 | casual=True, 203 | sliding_length=1, 204 | sliding_padding=200, 205 | batch_size=256 206 | ).squeeze() 207 | all_repr_wom[k] = model.encode( 208 | # all_data[k].reshape(1, -1, 1), 209 | data, 210 | casual=True, 211 | sliding_length=1, 212 | sliding_padding=200, 213 | batch_size=256 214 | ).squeeze() 215 | 216 | res_log = [] 217 | labels_log = [] 218 | timestamps_log = [] 219 | for k in all_data: 220 | data = all_data[k] 221 | labels = np.concatenate([all_train_labels[k], all_test_labels[k]]) 222 | timestamps = np.concatenate([all_train_timestamps[k], all_test_timestamps[k]]) 223 | 224 | err = np.abs(all_repr_wom[k] - all_repr[k]).sum(axis=1) 225 | ma = np_shift(bn.move_mean(err, 21), 1) 226 | err_adj = (err - ma) / ma 227 | 228 | MIN_WINDOW = len(data) // 10 229 | thr = bn.move_mean(err_adj, len(err_adj), MIN_WINDOW) + 4 * bn.move_std(err_adj, len(err_adj), MIN_WINDOW) 230 | res = (err_adj > thr) * 1 231 | 232 | for i in range(len(res)): 233 | if i >= delay and res[i - delay:i].sum() >= 1: 234 | res[i] = 0 235 | 236 | res_log.append(res[MIN_WINDOW:]) 237 | labels_log.append(labels[MIN_WINDOW:]) 238 | timestamps_log.append(timestamps[MIN_WINDOW:]) 239 | t = time.time() - t 240 | 241 | eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay) 242 | eval_res['infer_time'] = t 243 | return res_log, eval_res 244 | 245 | -------------------------------------------------------------------------------- /src/tasks/classification.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from . import _eval_protocols as eval_protocols 3 | from sklearn.preprocessing import label_binarize 4 | from sklearn.metrics import average_precision_score, roc_auc_score 5 | 6 | 7 | def eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='linear'): 8 | assert train_labels.ndim == 1 or train_labels.ndim == 2 9 | train_repr = model.encode(train_data, encoding_window='full_series' if train_labels.ndim == 1 else None) 10 | test_repr = model.encode(test_data, encoding_window='full_series' if train_labels.ndim == 1 else None) 11 | 12 | if eval_protocol == 'linear': 13 | fit_clf = eval_protocols.fit_lr 14 | elif eval_protocol == 'svm': 15 | fit_clf = eval_protocols.fit_svm 16 | elif eval_protocol == 'knn': 17 | fit_clf = eval_protocols.fit_knn 18 | else: 19 | assert False, 'unknown evaluation protocol' 20 | 21 | def merge_dim01(array): 22 | return array.reshape(array.shape[0]*array.shape[1], *array.shape[2:]) 23 | 24 | if train_labels.ndim == 2: 25 | train_repr = merge_dim01(train_repr) 26 | train_labels = merge_dim01(train_labels) 27 | test_repr = merge_dim01(test_repr) 28 | test_labels = merge_dim01(test_labels) 29 | 30 | clf = fit_clf(train_repr, train_labels) 31 | 32 | acc = clf.score(test_repr, test_labels) 33 | if eval_protocol == 'linear': 34 | y_score = clf.predict_proba(test_repr) 35 | else: 36 | y_score = clf.decision_function(test_repr) 37 | test_labels_onehot = label_binarize(test_labels, classes=np.arange(train_labels.max()+1)) 38 | auprc = average_precision_score(test_labels_onehot, y_score) 39 | 40 | return y_score, { 'acc': acc, 'auprc': auprc} 41 | -------------------------------------------------------------------------------- /src/tasks/forecasting.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | from . import _eval_protocols as eval_protocols 4 | 5 | def generate_pred_samples(features, data, pred_len, drop=0): 6 | n = data.shape[1] 7 | features = features[:, :-pred_len] 8 | labels = np.stack([ data[:, i:1+n+i-pred_len] for i in range(pred_len)], axis=2)[:, 1:] 9 | features = features[:, drop:] 10 | labels = labels[:, drop:] 11 | return features.reshape(-1, features.shape[-1]), \ 12 | labels.reshape(-1, labels.shape[2]*labels.shape[3]) 13 | 14 | def cal_metrics(pred, target): 15 | return { 16 | 'MSE': ((pred - target) ** 2).mean(), 17 | 'MAE': np.abs(pred - target).mean() 18 | } 19 | 20 | 21 | def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols): 22 | padding = 200 23 | 24 | t = time.time() 25 | all_repr = model.encode( 26 | data, 27 | casual=True, 28 | sliding_length=1, 29 | sliding_padding=padding, 30 | batch_size=256 31 | ) 32 | ts2vec_infer_time = time.time() - t 33 | 34 | train_repr = all_repr[:, train_slice] 35 | valid_repr = all_repr[:, valid_slice] 36 | test_repr = all_repr[:, test_slice] 37 | 38 | train_data = data['x'][:, train_slice, n_covariate_cols:][..., :-1] 39 | valid_data = data['x'][:, valid_slice, n_covariate_cols:][..., :-1] 40 | test_data = data['x'][:, test_slice, n_covariate_cols:][..., :-1] 41 | 42 | ours_result = {} 43 | lr_train_time = {} 44 | lr_infer_time = {} 45 | out_log = {} 46 | for pred_len in pred_lens: 47 | train_features, train_labels = generate_pred_samples(train_repr, train_data, pred_len, drop=padding) 48 | valid_features, valid_labels = generate_pred_samples(valid_repr, valid_data, pred_len) 49 | test_features, test_labels = generate_pred_samples(test_repr, test_data, pred_len) 50 | 51 | t = time.time() 52 | lr = eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels) 53 | lr_train_time[pred_len] = time.time() - t 54 | 55 | t = time.time() 56 | test_pred = lr.predict(test_features) 57 | lr_infer_time[pred_len] = time.time() - t 58 | 59 | ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2] 60 | test_pred = test_pred.reshape(ori_shape) 61 | test_labels = test_labels.reshape(ori_shape) 62 | 63 | # if test_data.shape[0] > 1: 64 | # test_pred_inv = scaler.inverse_transform(test_pred.swapaxes(0, 3)).swapaxes(0, 3) 65 | # test_labels_inv = scaler.inverse_transform(test_labels.swapaxes(0, 3)).swapaxes(0, 3) 66 | # else: 67 | # test_pred_inv = scaler.inverse_transform(test_pred) 68 | # test_labels_inv = scaler.inverse_transform(test_labels) 69 | 70 | out_log[pred_len] = { 71 | 'norm': test_pred, 72 | # 'raw': test_pred_inv, 73 | 'norm_gt': test_labels, 74 | # 'raw_gt': test_labels_inv 75 | } 76 | ours_result[pred_len] = { 77 | 'norm': cal_metrics(test_pred, test_labels), 78 | # 'raw': cal_metrics(test_pred_inv, test_labels_inv) 79 | } 80 | 81 | # train_repr, valid_repr, test_repr = train_repr.cpu(), valid_repr.cpu(), test_repr.cpu() 82 | # del train_repr, valid_repr, test_repr 83 | 84 | eval_res = { 85 | 'ours': ours_result, 86 | 'ts2vec_infer_time': ts2vec_infer_time, 87 | 'lr_train_time': lr_train_time, 88 | 'lr_infer_time': lr_infer_time 89 | } 90 | return out_log, eval_res 91 | -------------------------------------------------------------------------------- /src/tasks/imputation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.utils.data import TensorDataset, DataLoader 4 | 5 | 6 | def metrics(true, pred, mask): 7 | mask = 1. - mask 8 | mse = np.power((true - pred) * mask, 2).sum() / mask.sum() 9 | mae = np.abs((true - pred) * mask).sum() / mask.sum() 10 | rmse = np.sqrt(mse) 11 | return {'mse': mse, 'mae': mae, 'rmse': rmse} 12 | 13 | 14 | def split(data, test_slice, seq_len): 15 | x, m = data['x'][:, test_slice], data['mask'][:, test_slice] 16 | value, mask = np.zeros((x.shape[1] // seq_len, seq_len, x.shape[2])), np.zeros((x.shape[1] // seq_len, seq_len, m.shape[2])) 17 | for i in range(x.shape[1] // seq_len): 18 | if (i+1) * seq_len > x.shape[1]: 19 | break 20 | value[i] = x[0, i*seq_len:(i+1)*seq_len, :] 21 | mask[i] = m[0, i*seq_len:(i+1)*seq_len, :] 22 | return torch.from_numpy(value), torch.from_numpy(mask) 23 | 24 | 25 | def eval_imputation(model, data, test_slice, missing_rate, n_covariate_cols, device): 26 | value, mask = split(data, test_slice, 96) 27 | test_loader = DataLoader(TensorDataset(value, mask), batch_size=128, shuffle=False, num_workers=8) 28 | with torch.no_grad(): 29 | true, pred, m = [], [], [] 30 | for batch in test_loader: 31 | x = batch[0].float().to(device) 32 | 33 | mask = torch.randn_like(x[..., :-1]) 34 | mask[mask > missing_rate] = 1. 35 | mask[mask <= missing_rate] = 0. 36 | 37 | # val = torch.cat([x[..., :-1].masked_fill(mask == 0., 0.), x[..., -1:]], dim=-1) 38 | # out = model._net.imputation(x[..., :-1].masked_fill(mask == 0., 0.), mask) 39 | out = model.net(x[..., :-1].masked_fill(mask == 0., 0.), mask, imputation=True) 40 | 41 | true.append(x[..., :-1].cpu().detach().numpy()) 42 | pred.append(out.cpu().detach().numpy()) 43 | # true.append(out[0].cpu().detach().numpy()) 44 | # pred.append(out[1].cpu().detach().numpy()) 45 | m.append(mask.cpu().detach().numpy()) 46 | 47 | true = np.concatenate(true, axis=0)[..., n_covariate_cols:] 48 | pred = np.concatenate(pred, axis=0)[..., n_covariate_cols:] 49 | # pred = np.concatenate(pred, axis=0) 50 | m = np.concatenate(m, axis=0) 51 | return None, metrics(true, pred, m) 52 | -------------------------------------------------------------------------------- /src/timesurl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.utils.data import TensorDataset, DataLoader 5 | import numpy as np 6 | from models import TSEncoder 7 | from models.losses import hierarchical_contrastive_loss 8 | from utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan 9 | from utils import inter_cubic_sp_torch 10 | from utils import convert_coeff 11 | from lib import get_unlabeled_pretrain_data 12 | 13 | 14 | def tp_noneffect(func, x, **kwargs): 15 | tp = x[..., -1:] 16 | x = func(x[..., :-1], **kwargs) 17 | return torch.cat([x, tp], dim=-1) 18 | 19 | 20 | def freq_mix(x, rate=0.5, dim=1): 21 | x_f = torch.fft.fft(x, dim=dim) 22 | 23 | m = torch.cuda.FloatTensor(x_f.shape).uniform_() < rate 24 | amp = abs(x_f) 25 | _, index = amp.sort(dim=dim, descending=True) 26 | dominant_mask = index > 2 27 | m = torch.bitwise_and(m, dominant_mask) 28 | freal = x_f.real.masked_fill(m, 0) 29 | fimag = x_f.imag.masked_fill(m, 0) 30 | 31 | b_idx = np.arange(x.shape[0]) 32 | np.random.shuffle(b_idx) 33 | x2 = x[b_idx] 34 | x2_f = torch.fft.fft(x2, dim=dim) 35 | 36 | m = torch.bitwise_not(m) 37 | freal2 = x2_f.real.masked_fill(m, 0) 38 | fimag2 = x2_f.imag.masked_fill(m, 0) 39 | 40 | freal += freal2 41 | fimag += fimag2 42 | 43 | x_f = torch.complex(freal, fimag) 44 | 45 | x = torch.abs(torch.fft.ifft(x_f, dim=dim)) 46 | return x 47 | 48 | 49 | def freq_dropout(x, dropout_rate=0.5): 50 | x_aug = x.clone() 51 | x_aug_f = torch.fft.fft(x_aug) 52 | m = torch.cuda.FloatTensor(x_aug_f.shape).uniform_() < dropout_rate 53 | amp = torch.abs(x_aug_f) 54 | _, index = amp.sort(dim=1, descending=True) 55 | dominant_mask = index > 5 56 | m = torch.bitwise_and(m, dominant_mask) 57 | freal = x_aug_f.real.masked_fill(m, 0) 58 | fimag = x_aug_f.imag.masked_fill(m, 0) 59 | x_aug_f = torch.complex(freal, fimag) 60 | x_aug = torch.abs(torch.fft.ifft(x_aug_f, dim=1)) 61 | return x_aug 62 | 63 | 64 | class TimesURL: 65 | '''The TimesURL model''' 66 | 67 | def __init__( 68 | self, 69 | input_dims, 70 | output_dims=320, 71 | hidden_dims=64, 72 | depth=10, 73 | device='cuda', 74 | lr=0.001, 75 | batch_size=16, 76 | sgd=False, 77 | max_train_length=None, 78 | temporal_unit=0, 79 | after_iter_callback=None, 80 | after_epoch_callback=None, 81 | args=None 82 | ): 83 | ''' Initialize a TimesURL model. 84 | 85 | Args: 86 | input_dims (int): The input dimension. For a univariate time series, this should be set to 1. 87 | output_dims (int): The representation dimension. 88 | hidden_dims (int): The hidden dimension of the encoder. 89 | depth (int): The number of hidden residual blocks in the encoder. 90 | device (int): The gpu used for training and inference. 91 | lr (int): The learning rate. 92 | batch_size (int): The batch size. 93 | max_train_length (Union[int, NoneType]): The maximum allowed sequence length for training. For sequence with a length greater than , it would be cropped into some sequences, each of which has a length less than . 94 | temporal_unit (int): The minimum unit to perform temporal contrast. When training on a very long sequence, this param helps to reduce the cost of time and memory. 95 | after_iter_callback (Union[Callable, NoneType]): A callback function that would be called after each iteration. 96 | after_epoch_callback (Union[Callable, NoneType]): A callback function that would be called after each epoch. 97 | ''' 98 | 99 | super().__init__() 100 | self.device = device 101 | self.lr = lr 102 | self.sgd = sgd 103 | self.batch_size = batch_size 104 | self.max_train_length = max_train_length 105 | self.temporal_unit = temporal_unit 106 | 107 | self._net = TSEncoder(input_dims=input_dims, output_dims=output_dims, hidden_dims=hidden_dims, depth=depth).to(self.device) 108 | self.net = torch.optim.swa_utils.AveragedModel(self._net) 109 | self.net.update_parameters(self._net) 110 | 111 | self.after_iter_callback = after_iter_callback 112 | self.after_epoch_callback = after_epoch_callback 113 | self.args = args 114 | 115 | self.n_epochs = 0 116 | self.n_iters = 0 117 | 118 | def fit(self, train_data, n_epochs=None, n_iters=None, verbose=False, is_scheduler=True, temp=1.0): 119 | ''' Training the TimesURL model. 120 | 121 | Args: 122 | train_data (numpy.ndarray): The training data. It should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN. 123 | n_epochs (Union[int, NoneType]): The number of epochs. When this reaches, the training stops. 124 | n_iters (Union[int, NoneType]): The number of iterations. When this reaches, the training stops. If both n_epochs and n_iters are not specified, a default setting would be used that sets n_iters to 200 for a dataset with size <= 100000, 600 otherwise. 125 | verbose (bool): Whether to print the training loss after each epoch. 126 | 127 | Returns: 128 | loss_log: a list containing the training losses on each epoch. 129 | ''' 130 | train_data, mask = train_data['x'], train_data['mask'] 131 | 132 | assert train_data.ndim == 3 133 | 134 | if n_iters is None and n_epochs is None: 135 | n_iters = 200 if train_data.size <= 100000 else 600 # default param for n_iters 136 | 137 | if self.lr <= 1e-5 and n_iters is not None: 138 | n_iters *= 1.2 139 | 140 | if self.max_train_length is not None: 141 | sections = train_data.shape[1] // self.max_train_length 142 | if sections >= 2: 143 | train_data = np.concatenate(split_with_nan(train_data, sections, axis=1), axis=0) 144 | mask = np.concatenate(split_with_nan(mask, sections, axis=1), axis=0) 145 | 146 | temporal_missing = np.isnan(train_data).all(axis=-1).any(axis=0) 147 | if temporal_missing[0] or temporal_missing[-1]: 148 | train_data, mask = centerize_vary_length_series(train_data, mask) 149 | 150 | mask = mask[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)] 151 | train_data = train_data[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)] 152 | mask[np.isnan(mask)] = 0 153 | x, t = train_data[..., :-1], train_data[..., -1:] 154 | obj = get_unlabeled_pretrain_data(np.concatenate([x, mask, t], axis=-1), self.args) 155 | train_loader = obj['train_dataloader'] 156 | 157 | if self.sgd: 158 | optimizer = torch.optim.SGD(self._net.parameters(), lr=self.lr, weight_decay=5e-4, momentum=0.9) 159 | else: 160 | optimizer = torch.optim.AdamW(self._net.parameters(), lr=self.lr, weight_decay=5e-4) 161 | if is_scheduler: 162 | if n_iters is not None and n_epochs is None: 163 | max_epochs = n_iters // len(train_loader) 164 | else: 165 | max_epochs = n_epochs 166 | scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_epochs) 167 | 168 | loss_log = [] 169 | 170 | while True: 171 | if n_epochs is not None and self.n_epochs >= n_epochs: 172 | break 173 | 174 | cum_loss = 0 175 | n_epoch_iters = 0 176 | 177 | interrupted = False 178 | for batch in train_loader: 179 | if n_iters is not None and self.n_iters >= n_iters: 180 | interrupted = True 181 | break 182 | 183 | value = batch['value'].to(self.device) 184 | time = batch['time'].to(self.device) 185 | mask = batch['mask'].to(self.device) 186 | mask_origin = batch['mask_origin'].to(self.device) 187 | 188 | optimizer.zero_grad() 189 | 190 | loss = torch.tensor([0.]).to(self.device) 191 | for seq in range(value.size(1)): 192 | x, t, m, m_old = value[:, seq], time[:, seq], mask[:, seq], mask_origin[:, seq] 193 | dim = x.size(-1) 194 | x = torch.cat([x, t.unsqueeze(2)], dim=-1) 195 | 196 | ts_l = x.size(1) 197 | crop_l = np.random.randint(low=2 ** (self.temporal_unit + 1), high=ts_l + 1) 198 | crop_left = np.random.randint(ts_l - crop_l + 1) 199 | crop_right = crop_left + crop_l 200 | crop_eleft = np.random.randint(crop_left + 1) 201 | crop_eright = np.random.randint(low=crop_right, high=ts_l + 1) 202 | crop_offset = np.random.randint(low=-crop_eleft, high=ts_l - crop_eright + 1, size=x.size(0)) 203 | 204 | x_left = take_per_row(x, crop_offset + crop_eleft, crop_right - crop_eleft) 205 | x_right = tp_noneffect(freq_mix, take_per_row(x, crop_offset + crop_left, crop_eright - crop_left), rate=0.5) 206 | 207 | mask1 = take_per_row(m[..., :dim], crop_offset + crop_eleft, crop_right - crop_eleft) 208 | mask2 = take_per_row(m[..., :dim], crop_offset + crop_left, crop_eright - crop_left) 209 | 210 | mask1_inter = take_per_row(m[..., dim:], crop_offset + crop_eleft, crop_right - crop_eleft) 211 | mask2_inter = take_per_row(m[..., dim:], crop_offset + crop_left, crop_eright - crop_left) 212 | 213 | mask1_origin = take_per_row(m_old, crop_offset + crop_eleft, crop_right - crop_eleft) 214 | mask2_origin = take_per_row(m_old, crop_offset + crop_left, crop_eright - crop_left) 215 | 216 | out1, left_recon = self._net({'data': x_left, 'mask': mask1, 'mask_inter': mask1_inter, 'mask_origin': mask1_origin}) 217 | out2, right_recon = self._net({'data': x_right, 'mask': mask2, 'mask_inter': mask2_inter, 'mask_origin': mask2_origin}) 218 | 219 | out1, left_recon = out1[:, -crop_l:], left_recon[:, -crop_l:] 220 | out2, right_recon = out2[:, :crop_l], right_recon[:, :crop_l] 221 | 222 | x_left, x_right = x_left[:, -crop_l:], x_right[:, :crop_l] 223 | 224 | mask1, mask2 = mask1[:, -crop_l:], mask2[:, :crop_l] 225 | mask1_inter, mask2_inter = mask1_inter[:, -crop_l:], mask2_inter[:, :crop_l] 226 | 227 | loss += self.args.lmd * hierarchical_contrastive_loss( 228 | out1, 229 | out2, 230 | temporal_unit=self.temporal_unit, 231 | temp=temp 232 | ) 233 | 234 | if torch.sum(mask1_inter) > 0: 235 | loss += 1 * torch.sum(torch.pow((x_left[..., :-1] - left_recon) * mask1_inter, 2)) / ( 236 | torch.sum(mask1_inter) + 1e-10) / 2 237 | if torch.sum(mask2_inter) > 0: 238 | loss += 1 * torch.sum(torch.pow((x_right[..., :-1] - right_recon) * mask2_inter, 2)) / ( 239 | torch.sum(mask2_inter) + 1e-10) / 2 240 | 241 | loss.requires_grad_(True) 242 | loss.backward() 243 | optimizer.step() 244 | self.net.update_parameters(self._net) 245 | 246 | cum_loss += loss.item() 247 | n_epoch_iters += 1 248 | 249 | self.n_iters += 1 250 | 251 | if self.after_iter_callback is not None: 252 | self.after_iter_callback(self, loss.item()) 253 | 254 | cum_loss /= n_epoch_iters if n_epoch_iters else 1 255 | loss_log.append(cum_loss) 256 | if verbose: 257 | print(f"Epoch #{self.n_epochs}: loss={cum_loss}") 258 | self.n_epochs += 1 259 | if is_scheduler: 260 | scheduler.step() 261 | 262 | if self.after_epoch_callback is not None: 263 | self.after_epoch_callback(self, cum_loss) 264 | 265 | if interrupted: 266 | break 267 | # end 268 | 269 | return loss_log 270 | 271 | def _eval_with_pooling(self, x, mask=None, slicing=None, encoding_window=None): 272 | out = self.net(x.to(self.device, non_blocking=True), mask) 273 | if encoding_window == 'full_series': 274 | if slicing is not None: 275 | out = out[:, slicing] 276 | out = F.max_pool1d( 277 | out.transpose(1, 2), 278 | kernel_size=out.size(1), 279 | ).transpose(1, 2) 280 | 281 | elif isinstance(encoding_window, int): 282 | out = F.max_pool1d( 283 | out.transpose(1, 2), 284 | kernel_size=encoding_window, 285 | stride=1, 286 | padding=encoding_window // 2 287 | ).transpose(1, 2) 288 | if encoding_window % 2 == 0: 289 | out = out[:, :-1] 290 | if slicing is not None: 291 | out = out[:, slicing] 292 | 293 | elif encoding_window == 'multiscale': 294 | p = 0 295 | reprs = [] 296 | while (1 << p) + 1 < out.size(1): 297 | t_out = F.max_pool1d( 298 | out.transpose(1, 2), 299 | kernel_size=(1 << (p + 1)) + 1, 300 | stride=1, 301 | padding=1 << p 302 | ).transpose(1, 2) 303 | if slicing is not None: 304 | t_out = t_out[:, slicing] 305 | reprs.append(t_out) 306 | p += 1 307 | out = torch.cat(reprs, dim=-1) 308 | 309 | else: 310 | if slicing is not None: 311 | out = out[:, slicing] 312 | 313 | return out.cpu() 314 | 315 | def encode(self, data, mask=None, encoding_window=None, casual=False, sliding_length=None, sliding_padding=0, 316 | batch_size=None): 317 | ''' Compute representations using the model. 318 | 319 | Args: 320 | data (numpy.ndarray): This should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN. 321 | mask (str): The mask used by encoder can be specified with this parameter. This can be set to 'binomial', 'continuous', 'all_true', 'all_false' or 'mask_last'. 322 | encoding_window (Union[str, int]): When this param is specified, the computed representation would the max pooling over this window. This can be set to 'full_series', 'multiscale' or an integer specifying the pooling kernel size. 323 | casual (bool): When this param is set to True, the future informations would not be encoded into representation of each timestamp. 324 | sliding_length (Union[int, NoneType]): The length of sliding window. When this param is specified, a sliding inference would be applied on the time series. 325 | sliding_padding (int): This param specifies the contextual data length used for inference every sliding windows. 326 | batch_size (Union[int, NoneType]): The batch size used for inference. If not specified, this would be the same batch size as training. 327 | 328 | Returns: 329 | repr: The representations for data. 330 | ''' 331 | assert self.net is not None, 'please train or load a net first' 332 | assert isinstance(data, dict) or data.ndim == 3 333 | if batch_size is None: 334 | batch_size = self.batch_size 335 | n_samples, ts_l, _ = data.shape if not isinstance(data, dict) else data['x'].shape 336 | 337 | org_training = self.net.training 338 | self.net.eval() 339 | 340 | if isinstance(data, dict): 341 | data = np.concatenate((data['x'], data['mask']), axis=-1) 342 | dataset = TensorDataset(torch.from_numpy(data).to(torch.float)) 343 | loader = DataLoader(dataset, batch_size=batch_size) 344 | 345 | with torch.no_grad(): 346 | output = [] 347 | for batch in loader: 348 | x = batch[0] 349 | if sliding_length is not None: 350 | reprs = [] 351 | if n_samples < batch_size: 352 | calc_buffer = [] 353 | calc_buffer_l = 0 354 | for i in range(0, ts_l, sliding_length): 355 | l = i - sliding_padding 356 | r = i + sliding_length + (sliding_padding if not casual else 0) 357 | x_sliding = torch_pad_nan( 358 | x[:, max(l, 0): min(r, ts_l)], 359 | left=-l if l < 0 else 0, 360 | right=r - ts_l if r > ts_l else 0, 361 | dim=1 362 | ) 363 | if n_samples < batch_size: 364 | if calc_buffer_l + n_samples > batch_size: 365 | out = self._eval_with_pooling( 366 | torch.cat(calc_buffer, dim=0), 367 | mask, 368 | slicing=slice(sliding_padding, sliding_padding + sliding_length), 369 | encoding_window=encoding_window 370 | ) 371 | reprs += torch.split(out, n_samples) 372 | calc_buffer = [] 373 | calc_buffer_l = 0 374 | calc_buffer.append(x_sliding) 375 | calc_buffer_l += n_samples 376 | else: 377 | out = self._eval_with_pooling( 378 | x_sliding, 379 | mask, 380 | slicing=slice(sliding_padding, sliding_padding + sliding_length), 381 | encoding_window=encoding_window 382 | ) 383 | reprs.append(out) 384 | 385 | if n_samples < batch_size: 386 | if calc_buffer_l > 0: 387 | out = self._eval_with_pooling( 388 | torch.cat(calc_buffer, dim=0), 389 | mask, 390 | slicing=slice(sliding_padding, sliding_padding + sliding_length), 391 | encoding_window=encoding_window 392 | ) 393 | reprs += torch.split(out, n_samples) 394 | calc_buffer = [] 395 | calc_buffer_l = 0 396 | 397 | out = torch.cat(reprs, dim=1) 398 | if encoding_window == 'full_series': 399 | out = F.max_pool1d( 400 | out.transpose(1, 2).contiguous(), 401 | kernel_size=out.size(1), 402 | ).squeeze(1) 403 | else: 404 | out = self._eval_with_pooling(x, mask, encoding_window=encoding_window) 405 | if encoding_window == 'full_series': 406 | out = out.squeeze(1) 407 | 408 | output.append(out) 409 | 410 | output = torch.cat(output, dim=0) 411 | 412 | self.net.train(org_training) 413 | return output.numpy() 414 | 415 | def save(self, fn): 416 | ''' Save the model to a file. 417 | 418 | Args: 419 | fn (str): filename. 420 | ''' 421 | torch.save(self.net.state_dict(), fn) 422 | 423 | def load(self, fn): 424 | ''' Load the model from a file. 425 | 426 | Args: 427 | fn (str): filename. 428 | ''' 429 | state_dict = torch.load(fn, map_location=self.device) 430 | self.net.load_state_dict(state_dict) 431 | -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import torch 4 | import numpy as np 5 | import argparse 6 | import os 7 | import sys 8 | import time 9 | import datetime 10 | from timesurl import TimesURL 11 | import tasks 12 | import datautils 13 | from utils import init_dl_program, name_with_datetime, pkl_save, data_dropout 14 | 15 | def save_checkpoint_callback( 16 | save_every=1, 17 | unit='epoch' 18 | ): 19 | assert unit in ('epoch', 'iter') 20 | def callback(model, loss): 21 | n = model.n_epochs if unit == 'epoch' else model.n_iters 22 | if n % save_every == 0: 23 | model.save(f'{run_dir}/model_{n}.pkl') 24 | return callback 25 | 26 | if __name__ == '__main__': 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('dataset', help='The dataset name') 29 | parser.add_argument('run_name', help='The folder name used to save model, output and evaluation metrics. This can be set to any word') 30 | parser.add_argument('--loader', type=str, required=True, help='The data loader used to load the experimental data. This can be set to UCR, UEA, forecast_csv, forecast_csv_univar, anomaly, or anomaly_coldstart') 31 | parser.add_argument('--gpu', type=int, default=0, help='The gpu no. used for training and inference (defaults to 0)') 32 | parser.add_argument('--batch-size', type=int, default=8, help='The batch size (defaults to 8)') 33 | parser.add_argument('--lr', type=float, default=0.0001, help='The learning rate (defaults to 0.001)') 34 | parser.add_argument('--repr-dims', type=int, default=320, help='The representation dimension (defaults to 320)') 35 | parser.add_argument('--max-train-length', type=int, default=3000, help='For sequence with a length greater than , it would be cropped into some sequences, each of which has a length less than (defaults to 3000)') 36 | parser.add_argument('--iters', type=int, default=None, help='The number of iterations') 37 | parser.add_argument('--epochs', type=int, default=None, help='The number of epochs') 38 | parser.add_argument('--save-every', type=int, default=None, help='Save the checkpoint every iterations/epochs') 39 | parser.add_argument('--seed', type=int, default=None, help='The random seed') 40 | parser.add_argument('--max-threads', type=int, default=None, help='The maximum allowed number of threads used by this process') 41 | parser.add_argument('--eval', action="store_true", help='Whether to perform evaluation after training') 42 | parser.add_argument('--sgd', action="store_true", help='Whether to perform evaluation after training') 43 | parser.add_argument('--load_tp', action="store_true", help='Whether to perform evaluation after training') 44 | parser.add_argument('--temp', type=float, default=1.0,) 45 | parser.add_argument('--lmd', type=float, default=0.01, ) 46 | parser.add_argument('--irregular', type=float, default=0, help='The ratio of missing observations (defaults to 0)') 47 | parser.add_argument('--segment_num', type=int, default=3, 48 | help='number of time interval segment to mask, default: 3 time intervals') 49 | parser.add_argument('--mask_ratio_per_seg', type=float, default=0.05, 50 | help='fraction of the sequence length to mask for each time interval, deafult: 0.05 * seq_len to be masked for each of the time interval') 51 | args = parser.parse_args() 52 | 53 | print("Dataset:", args.dataset) 54 | print("Arguments:", str(args)) 55 | 56 | device = init_dl_program(args.gpu, seed=args.seed, max_threads=args.max_threads, deterministic=False) 57 | 58 | args.load_tp = True 59 | 60 | print('Loading data... ', end='') 61 | if args.loader == 'UCR': 62 | task_type = 'classification' 63 | train_data, train_labels, test_data, test_labels = datautils.load_UCR(args.dataset, load_tp = args.load_tp) 64 | 65 | elif args.loader == 'Others': 66 | task_type = 'classification' 67 | train_data, train_labels, test_data, test_labels = datautils.load_others(args.dataset, load_tp = args.load_tp) 68 | 69 | elif args.loader == 'UEA': 70 | task_type = 'classification' 71 | train_data, train_labels, test_data, test_labels = datautils.load_UEA(args.dataset, load_tp = args.load_tp) 72 | 73 | elif args.loader == 'forecast_csv': 74 | task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation' 75 | offset = 0 if task_type == 'forecasting' else 96 76 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, load_tp=args.load_tp) 77 | train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]} 78 | 79 | elif args.loader == 'forecast_csv_univar': 80 | task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation' 81 | offset = 0 if task_type == 'forecasting' else 96 82 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, univar=True, load_tp=args.load_tp) 83 | train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]} 84 | 85 | elif args.loader == 'forecast_npy': 86 | task_type = 'forecasting' 87 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset) 88 | train_data = data[:, train_slice] 89 | 90 | elif args.loader == 'forecast_npy_univar': 91 | task_type = 'forecasting' 92 | data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset, univar=True) 93 | train_data = data[:, train_slice] 94 | 95 | elif args.loader == 'anomaly': 96 | task_type = 'anomaly_detection' 97 | # all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay = datautils.load_anomaly(args.dataset) 98 | train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay = datautils.load_anomaly(args.dataset, load_tp=args.load_tp) 99 | train_data = datautils.gen_ano_train_data(train_data_task['x']) 100 | train_data = { 101 | 'x': np.concatenate([train_data, datautils.gen_ano_train_data(train_timestamps, train_data.shape[1], True)], axis=-1), 102 | 'mask': train_data_task['mask']} 103 | 104 | else: 105 | raise ValueError(f"Unknown loader {args.loader}.") 106 | 107 | args.task_type = task_type 108 | if args.irregular > 0: 109 | if task_type == 'classification': 110 | train_data = data_dropout(train_data, args.irregular) 111 | test_data = data_dropout(test_data, args.irregular) 112 | else: 113 | raise ValueError(f"Task type {task_type} is not supported when irregular>0.") 114 | print('done') 115 | print(train_data['x'].shape) 116 | 117 | config = dict( 118 | batch_size=args.batch_size, 119 | lr=args.lr, 120 | sgd=args.sgd, 121 | output_dims=args.repr_dims, 122 | max_train_length=args.max_train_length, 123 | args=args 124 | ) 125 | 126 | if args.save_every is not None: 127 | unit = 'epoch' if args.epochs is not None else 'iter' 128 | config[f'after_{unit}_callback'] = save_checkpoint_callback(args.save_every, unit) 129 | 130 | run_dir = 'training/' + args.dataset + '__' + name_with_datetime(args.run_name) 131 | os.makedirs(run_dir, exist_ok=True) 132 | 133 | t = time.time() 134 | 135 | model = TimesURL( 136 | input_dims=train_data['x'].shape[-1] - (1 if args.load_tp else 0), 137 | device=device, 138 | **config 139 | ) 140 | loss_log = model.fit( 141 | train_data, 142 | n_epochs=args.epochs, 143 | n_iters=args.iters, 144 | verbose=True, 145 | is_scheduler=True if args.sgd else False, 146 | temp=args.temp 147 | ) 148 | model.save(f'{run_dir}/model.pkl') 149 | 150 | t = time.time() - t 151 | print(f"\nTraining time: {datetime.timedelta(seconds=t)}\n") 152 | 153 | if args.eval: 154 | if task_type == 'classification': 155 | out, eval_res = tasks.eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='svm') 156 | elif task_type == 'forecasting': 157 | out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols) 158 | elif task_type == 'anomaly_detection': 159 | out, eval_res = tasks.eval_anomaly_detection(model, train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay) 160 | elif task_type == 'imputation': 161 | out, eval_res = tasks.eval_imputation(model, data, test_slice, args.missing_rate, n_covariate_cols, device) 162 | else: 163 | assert False 164 | 165 | pkl_save(f'{run_dir}/out.pkl', out) 166 | pkl_save(f'{run_dir}/eval_res.pkl', eval_res) 167 | print('Evaluation result:', eval_res) 168 | 169 | print("Finished.") 170 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | import torch 5 | import random 6 | from datetime import datetime 7 | from scipy.interpolate import CubicSpline 8 | 9 | def pkl_save(name, var): 10 | with open(name, 'wb') as f: 11 | pickle.dump(var, f) 12 | 13 | def pkl_load(name): 14 | with open(name, 'rb') as f: 15 | return pickle.load(f) 16 | 17 | def torch_pad_nan(arr, left=0, right=0, dim=0): 18 | if left > 0: 19 | padshape = list(arr.shape) 20 | padshape[dim] = left 21 | arr = torch.cat((torch.full(padshape, np.nan), arr), dim=dim) 22 | if right > 0: 23 | padshape = list(arr.shape) 24 | padshape[dim] = right 25 | arr = torch.cat((arr, torch.full(padshape, np.nan)), dim=dim) 26 | return arr 27 | 28 | def pad_nan_to_target(array, target_length, axis=0, both_side=False): 29 | assert array.dtype in [np.float16, np.float32, np.float64] 30 | pad_size = target_length - array.shape[axis] 31 | if pad_size <= 0: 32 | return array 33 | npad = [(0, 0)] * array.ndim 34 | if both_side: 35 | npad[axis] = (pad_size // 2, pad_size - pad_size//2) 36 | else: 37 | npad[axis] = (0, pad_size) 38 | return np.pad(array, pad_width=npad, mode='constant', constant_values=np.nan) 39 | 40 | def split_with_nan(x, sections, axis=0): 41 | assert x.dtype in [np.float16, np.float32, np.float64] 42 | arrs = np.array_split(x, sections, axis=axis) 43 | target_length = arrs[0].shape[axis] 44 | for i in range(len(arrs)): 45 | arrs[i] = pad_nan_to_target(arrs[i], target_length, axis=axis) 46 | return arrs 47 | 48 | def take_per_row(A, indx, num_elem): 49 | all_indx = indx[:,None] + np.arange(num_elem) 50 | return A[torch.arange(all_indx.shape[0])[:,None], all_indx] 51 | 52 | def centerize_vary_length_series(x, mask): 53 | prefix_zeros = np.argmax(~np.isnan(x).all(axis=-1), axis=1) 54 | suffix_zeros = np.argmax(~np.isnan(x[:, ::-1]).all(axis=-1), axis=1) 55 | offset = (prefix_zeros + suffix_zeros) // 2 - prefix_zeros 56 | rows, column_indices = np.ogrid[:x.shape[0], :x.shape[1]] 57 | offset[offset < 0] += x.shape[1] 58 | column_indices = column_indices - offset[:, np.newaxis] 59 | return x[rows, column_indices], mask[rows, column_indices] 60 | 61 | def data_dropout(arr, p): 62 | B, T = arr.shape[0], arr.shape[1] 63 | mask = np.full(B*T, False, dtype=np.bool) 64 | ele_sel = np.random.choice( 65 | B*T, 66 | size=int(B*T*p), 67 | replace=False 68 | ) 69 | mask[ele_sel] = True 70 | res = arr.copy() 71 | res[mask.reshape(B, T)] = np.nan 72 | return res 73 | 74 | def name_with_datetime(prefix='default'): 75 | now = datetime.now() 76 | return prefix + '_' + now.strftime("%Y%m%d_%H%M%S") 77 | 78 | def init_dl_program( 79 | device_name, 80 | seed=None, 81 | use_cudnn=True, 82 | deterministic=False, 83 | benchmark=False, 84 | use_tf32=False, 85 | max_threads=None 86 | ): 87 | import torch 88 | if max_threads is not None: 89 | torch.set_num_threads(max_threads) # intraop 90 | if torch.get_num_interop_threads() != max_threads: 91 | torch.set_num_interop_threads(max_threads) # interop 92 | try: 93 | import mkl 94 | except: 95 | pass 96 | else: 97 | mkl.set_num_threads(max_threads) 98 | 99 | if seed is not None: 100 | random.seed(seed) 101 | seed += 1 102 | np.random.seed(seed) 103 | seed += 1 104 | torch.manual_seed(seed) 105 | 106 | if isinstance(device_name, (str, int)): 107 | device_name = [device_name] 108 | 109 | devices = [] 110 | for t in reversed(device_name): 111 | t_device = torch.device(t) 112 | devices.append(t_device) 113 | if t_device.type == 'cuda': 114 | assert torch.cuda.is_available() 115 | torch.cuda.set_device(t_device) 116 | if seed is not None: 117 | seed += 1 118 | torch.cuda.manual_seed(seed) 119 | devices.reverse() 120 | torch.backends.cudnn.enabled = use_cudnn 121 | torch.backends.cudnn.deterministic = deterministic 122 | torch.backends.cudnn.benchmark = benchmark 123 | 124 | if hasattr(torch.backends.cudnn, 'allow_tf32'): 125 | torch.backends.cudnn.allow_tf32 = use_tf32 126 | torch.backends.cuda.matmul.allow_tf32 = use_tf32 127 | 128 | return devices if len(devices) > 1 else devices[0] 129 | 130 | 131 | def convert_coeff(x, eps=1e-6): 132 | amp = torch.sqrt((x.real + eps).pow(2) + (x.imag + eps).pow(2)) 133 | phase = torch.atan2(x.imag, x.real + eps) 134 | return amp, phase 135 | 136 | 137 | def hierarchical_x(x, mask): 138 | hi_x, B, C = [{'x': x, 'mask': mask}], x.size(0), x.size(2) 139 | while x.size(1) > 1: 140 | if x.size(1) % 2 != 0: 141 | x = torch.cat((x, -np.inf * torch.ones(B, 1, C, device = x.device)), dim = 1) 142 | # obtain max index 143 | _, t_index = torch.max(x.permute(0, 2, 1).reshape(B, C, -1, 2).permute(0, 3, 2, 1), dim = 1) 144 | 145 | # fixed max index 146 | t_index = (t_index.transpose(1, 2) + torch.arange(0, x.size(1), 2, device = x.device)).transpose(1, 2).reshape(-1) 147 | # create B, C index 148 | b_index = torch.arange(B, device = x.device).reshape(-1, 1).repeat(1, x.size(1) // 2 * C).reshape(-1) 149 | c_index = torch.arange(C, device = x.device).repeat(B * x.size(1) // 2) 150 | 151 | # achieve max representations 152 | x, mask = x[(b_index, t_index, c_index)].reshape(B, -1, C), mask[(b_index, t_index, c_index)].reshape(B, -1, C) 153 | hi_x.append({'x': x, 'mask': mask}) 154 | return hi_x 155 | 156 | 157 | def generate_mask(data, p = 0.5, remain = 0): 158 | B, T, C = data.shape 159 | mask = np.empty_like(data) 160 | 161 | for b in range(B): 162 | ts = data[b, :, 0] 163 | et_num = ts[~np.isnan(ts)].size - remain 164 | total, num = et_num * C, round(et_num * C * p) 165 | 166 | while True: 167 | i_mask = np.zeros(total) 168 | i_mask[random.sample(range(total), num)] = 1 169 | i_mask = i_mask.reshape(et_num, C) 170 | if 1 not in i_mask.sum(axis = 0) and 0 not in i_mask.sum(axis = 0): 171 | break 172 | break 173 | 174 | i_mask = np.concatenate((i_mask, np.ones((remain, C))), axis = 0) 175 | mask[b, ~np.isnan(ts), :] = i_mask 176 | mask[b, np.isnan(ts), :] = np.nan 177 | 178 | # mask = np.concatenate([random.sample(range(total), num) for _ in range(B)]) 179 | # matrix = np.zeros((B, total)) 180 | # matrix[(np.arange(B).repeat(num), mask)] = 1.0 181 | # matrix = matrix.reshape(B, T, C) 182 | # return matrix 183 | return mask 184 | 185 | 186 | def interpolate_cubic_spline(data, mask, p = 1): 187 | # normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) | (np.isnan(data)))[0] 188 | normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) & (~np.isnan(data)))[0] 189 | cs = CubicSpline(normal, data[normal]) 190 | num = int(missing.size * p) 191 | missing = missing[np.argsort(np.random.random(missing.size))[:num]] 192 | data[missing] = cs(missing) 193 | return data 194 | 195 | 196 | def inter_cubic_sp_torch(data, mask, p = 1): 197 | device = data.device 198 | return torch.from_numpy(interpolate_cubic_spline(data.cpu().detach().numpy(), mask.cpu().detach().numpy(), p)).to(device) 199 | 200 | 201 | def generate_uni(data, mask, alpha): 202 | n = data.size(1) 203 | neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, n, 1) - data) / (n - 1) 204 | return (1 - alpha) * neg + alpha * data 205 | 206 | 207 | def generate_uni_p(data, mask, alpha): 208 | p = mask.mean(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) 209 | data = p * data 210 | neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - data) / \ 211 | (p.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - p) 212 | return (1 - alpha) * neg + alpha * data 213 | 214 | 215 | def normalize_with_mask(train, mask_tr, test, mask_te, scaler): 216 | train[mask_tr == 0], test[mask_te == 0] = np.nan, np.nan 217 | scaler = scaler.fit(train.reshape(-1, train.shape[-1])) 218 | train = scaler.transform(train.reshape(-1, train.shape[-1])).reshape(train.shape) 219 | test = scaler.transform(test.reshape(-1, test.shape[-1])).reshape(test.shape) 220 | train[mask_tr == 0], test[mask_te == 0] = 0, 0 221 | return train, test 222 | 223 | 224 | if __name__ == '__main__': 225 | B, T, C = 3, 10, 3 226 | x = torch.randn((B, T, C)) 227 | dict_x = hierarchical_x(x, x) 228 | print('ok') --------------------------------------------------------------------------------