├── .gitignore
├── AAAI24_appendix.pdf
├── LICENSE
├── README.md
└── src
    ├── augmentations.py
    ├── collator.py
    ├── datautils.py
    ├── lib.py
    ├── models
        ├── __init__.py
        ├── attention.py
        ├── backbone.py
        ├── dilated_conv.py
        ├── encoder.py
        └── losses.py
    ├── tasks
        ├── __init__.py
        ├── _eval_protocols.py
        ├── anomaly_detection.py
        ├── classification.py
        ├── forecasting.py
        └── imputation.py
    ├── timesurl.py
    ├── train.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/AAAI24_appendix.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Alrash/TimesURL/d3533e45cb28efe8c986f13ce8d80926d0e9254e/AAAI24_appendix.pdf


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Alrash
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TimesURL
 2 | The implementation of "TimesURL: Self-supervised Contrastive Learning for Universal Time Series Representation Learning"
 3 | <img src="https://github.com/Alrash/TimesURL/assets/30361341/e80fd603-c3ca-49a7-a00f-87dfd5327d2a"  />
 4 | 
 5 | Paper: [Arxiv](https://arxiv.org/abs/2312.15709) or  [AAAI](https://ojs.aaai.org/index.php/AAAI/article/view/29299/30450) 
 6 | 
 7 | Video: [Video](https://underline.io/lecture/93776-timesurl-self-supervised-contrastive-learning-for-universal-time-series-representation-learning-video)
 8 | 
 9 | Appendix: [Appendix](https://github.com/Alrash/TimesURL/blob/main/AAAI24_appendix.pdf)
10 | ## Codes
11 | This code is based on [TS2Vec](https://github.com/yuezhihan/ts2vec).
12 | 
13 | ## Citation
14 | ```
15 | @inproceedings{liu2024timesurl,
16 |   title={Timesurl: Self-supervised contrastive learning for universal time series representation learning},
17 |   author={Liu, Jiexi and Chen, Songcan},
18 |   booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
19 |   volume={38},
20 |   number={12},
21 |   pages={13918--13926},
22 |   year={2024}
23 | }
24 | ```
25 | ## Acknowledgement
26 | [TS2Vec](https://github.com/yuezhihan/ts2vec)
27 | 
28 | [FrAug](https://anonymous.4open.science/r/Fraug-more-results-1785/README.md)
29 | 
30 | ## Email
31 | ```
32 | liujiexi@nuaa.edu.cn
33 | alrash@nuaa.edu.cn
34 | ```
35 | 


--------------------------------------------------------------------------------
/src/augmentations.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | def one_hot_encoding(X):
  5 |     X = [int(x) for x in X]
  6 |     n_values = np.max(X) + 1
  7 |     b = np.eye(n_values)[X]
  8 |     return b
  9 | 
 10 | def DataTransform(sample, config):
 11 |     """Weak and strong augmentations"""
 12 |     weak_aug = scaling(sample, config.augmentation.jitter_scale_ratio)
 13 |     # weak_aug = permutation(sample, max_segments=config.augmentation.max_seg)
 14 |     strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio)
 15 | 
 16 |     return weak_aug, strong_aug
 17 | 
 18 | # def DataTransform_TD(sample, config):
 19 | #     """Weak and strong augmentations"""
 20 | #     weak_aug = sample
 21 | #     strong_aug = jitter(permutation(sample, max_segments=config.augmentation.max_seg), config.augmentation.jitter_ratio) #masking(sample)
 22 | #     return weak_aug, strong_aug
 23 | #
 24 | # def DataTransform_FD(sample, config):
 25 | #     """Weak and strong augmentations in Frequency domain """
 26 | #     # weak_aug =  remove_frequency(sample, 0.1)
 27 | #     strong_aug = add_frequency(sample, 0.1)
 28 | #     return weak_aug, strong_aug
 29 | def DataTransform_TD(sample, config):
 30 |     """Weak and strong augmentations"""
 31 |     aug_1 = jitter(sample, config.augmentation.jitter_ratio)
 32 |     aug_2 = scaling(sample, config.augmentation.jitter_scale_ratio)
 33 |     aug_3 = permutation(sample, max_segments=config.augmentation.max_seg)
 34 | 
 35 |     li = np.random.randint(0, 4, size=[sample.shape[0]]) # there are two augmentations in Frequency domain
 36 |     li_onehot = one_hot_encoding(li)
 37 |     aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero.
 38 |     aug_2[1 - li_onehot[:, 1]] = 0
 39 |     aug_3[1 - li_onehot[:, 2]] = 0
 40 |     # aug_4[1 - li_onehot[:, 3]] = 0
 41 |     aug_T = aug_1 + aug_2 + aug_3 #+aug_4
 42 |     return aug_T
 43 | 
 44 | 
 45 | def DataTransform_FD(sample, config):
 46 |     """Weak and strong augmentations in Frequency domain """
 47 |     aug_1 =  remove_frequency(sample, 0.1)
 48 |     aug_2 = add_frequency(sample, 0.1)
 49 |     # generate random sequence
 50 |     li = np.random.randint(0, 2, size=[sample.shape[0]]) # there are two augmentations in Frequency domain
 51 |     li_onehot = one_hot_encoding(li)
 52 |     aug_1[1-li_onehot[:, 0]] = 0 # the rows are not selected are set as zero.
 53 |     aug_2[1 - li_onehot[:, 1]] = 0
 54 |     aug_F = aug_1 + aug_2
 55 |     return aug_F
 56 | 
 57 | 
 58 | 
 59 | def generate_binomial_mask(B, T, D, p=0.5):
 60 |     return torch.from_numpy(np.random.binomial(1, p, size=(B, T, D))).to(torch.bool)
 61 | 
 62 | def masking(x, mask= 'binomial'):
 63 |     nan_mask = ~x.isnan().any(axis=-1)
 64 |     x[~nan_mask] = 0
 65 |     # x = self.input_fc(x)  # B x T x Ch
 66 | 
 67 |     if mask == 'binomial':
 68 |         mask_id = generate_binomial_mask(x.size(0), x.size(1), x.size(2), p=0.9).to(x.device)
 69 |     # elif mask == 'continuous':
 70 |     #     mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device)
 71 |     # elif mask == 'all_true':
 72 |     #     mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
 73 |     # elif mask == 'all_false':
 74 |     #     mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool)
 75 |     # elif mask == 'mask_last':
 76 |     #     mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
 77 |     #     mask[:, -1] = False
 78 | 
 79 |     # mask &= nan_mask
 80 |     x[~mask_id] = 0
 81 |     return x
 82 | 
 83 | def jitter(x, sigma=0.8):
 84 |     return x + np.random.normal(loc=0., scale=sigma, size=x.shape)
 85 | 
 86 | 
 87 | def scaling(x, sigma=1.1):
 88 |     factor = np.random.normal(loc=2., scale=sigma, size=(x.shape[0], x.shape[2]))
 89 |     ai = []
 90 |     for i in range(x.shape[1]):
 91 |         xi = x[:, i, :]
 92 |         ai.append(np.multiply(xi, factor[:, :])[:, np.newaxis, :])
 93 |     return np.concatenate((ai), axis=1)
 94 | 
 95 | def permutation(x, max_segments=5, seg_mode="random"):
 96 |     orig_steps = np.arange(x.shape[2])
 97 | 
 98 |     num_segs = np.random.randint(1, max_segments, size=(x.shape[0]))
 99 | 
100 |     ret = np.zeros_like(x)
101 |     for i, pat in enumerate(x):
102 |         if num_segs[i] > 1:
103 |             if seg_mode == "random":
104 |                 split_points = np.random.choice(x.shape[2] - 2, num_segs[i] - 1, replace=False)
105 |                 split_points.sort()
106 |                 splits = np.split(orig_steps, split_points)
107 |             else:
108 |                 splits = np.array_split(orig_steps, num_segs[i])
109 |             warp = np.concatenate(np.random.permutation(splits)).ravel()
110 |             ret[i] = pat[0,warp]
111 |         else:
112 |             ret[i] = pat
113 |     return torch.from_numpy(ret)
114 | 
115 | def remove_frequency(x, maskout_ratio=0):
116 |     mask = torch.cuda.FloatTensor(x.shape).uniform_() > maskout_ratio # maskout_ratio are False
117 |     mask = mask.to(x.device)
118 |     return x*mask
119 | 
120 | def add_frequency(x, pertub_ratio=0,):
121 | 
122 |     mask = torch.cuda.FloatTensor(x.shape).uniform_() > (1-pertub_ratio) # only pertub_ratio of all values are True
123 |     mask = mask.to(x.device)
124 |     max_amplitude = x.max()
125 |     random_am = torch.rand(mask.shape)*(max_amplitude*0.1)
126 |     pertub_matrix = mask*random_am
127 |     return x+pertub_matrix


--------------------------------------------------------------------------------
/src/collator.py:
--------------------------------------------------------------------------------
  1 | from argparse import Namespace
  2 | import numpy as np, math
  3 | import random
  4 | import torch
  5 | from dataclasses import dataclass
  6 | 
  7 | 
  8 | @dataclass
  9 | class CLDataCollator:
 10 |     max_len: int
 11 |     args: Namespace
 12 |     len_sampling_bound = [0.3, 0.7]
 13 |     dense_sampling_bound = [0.4, 0.6]
 14 |     pretrain_tasks = 'full2'
 15 | 
 16 |     # mask_ratio_per_seg = 0.15
 17 |     # segment_num = 1
 18 |     # pretrain_tasks = 'full2'
 19 | 
 20 |     def __call__(self, batch):
 21 | 
 22 |         batch_size = len(batch)
 23 |         D = batch[0][0].size(1)
 24 | 
 25 |         time_batch = torch.zeros([batch_size, 2, self.max_len])
 26 |         value_batch = torch.zeros([batch_size, 2, self.max_len, D])
 27 |         if self.pretrain_tasks == 'full2':
 28 |             mask_batch = torch.zeros([batch_size, 2, self.max_len, 2 * D])
 29 |         else:
 30 |             mask_batch = torch.zeros([batch_size, 2, self.max_len, D])
 31 | 
 32 |         mask_old_batch = torch.zeros([batch_size, 2, self.max_len, D])
 33 |         for idx, instance in enumerate(batch):
 34 |             seq1, seq2 = self._per_seq_sampling(instance)
 35 | 
 36 |             v1, t1, m1, m1_old = seq1
 37 |             v2, t2, m2, m2_old = seq2
 38 | 
 39 |             len1 = v1.size(0)
 40 |             len2 = v2.size(0)
 41 | 
 42 |             # print(len1, len2)
 43 |             # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape)
 44 | 
 45 |             value_batch[idx, 0, :len1] = v1
 46 |             time_batch[idx, 0, :len1] = t1
 47 |             mask_batch[idx, 0, :len1] = m1
 48 |             mask_old_batch[idx, 0, :len1] = m1_old
 49 | 
 50 |             value_batch[idx, 1, :len2] = v2
 51 |             time_batch[idx, 1, :len2] = t2
 52 |             mask_batch[idx, 1, :len2] = m2
 53 |             mask_old_batch[idx, 1, :len2] = m2_old
 54 | 
 55 |         return {'value': value_batch, 'time': time_batch, 'mask': mask_batch, 'mask_origin': mask_old_batch}
 56 | 
 57 |     def _per_seq_sampling(self, instance):
 58 |         '''
 59 |         - times is a 1-dimensional tensor containing T time values of observations.
 60 |         - values is a (T, D) tensor containing observed values for D variables.
 61 |         - mask is a (T, D) tensor containing 1 where values were observed and 0 otherwise.
 62 |         '''
 63 | 
 64 |         values, times, mask = instance
 65 | 
 66 |         # selected_indices = self._random_sampling_cl(values) # Random Anchor and Positive
 67 |         selected_indices = self._time_sensitive_cl(times)  # Anchor and Positive based on sampling density
 68 | 
 69 |         v1, t1, m1, v2, t2, m2 = [], [], [], [], [], []
 70 | 
 71 |         for idx, (v, t, m) in enumerate(zip(values, times, mask)):
 72 | 
 73 |             if idx in selected_indices:
 74 |                 v1.append(v)
 75 |                 t1.append(t)
 76 |                 m1.append(m)
 77 | 
 78 |             else:
 79 |                 v2.append(v)
 80 |                 t2.append(t)
 81 |                 m2.append(m)
 82 | 
 83 |         v1 = torch.stack(v1, dim=0)
 84 |         t1 = torch.stack(t1, dim=0)
 85 |         m1 = torch.stack(m1, dim=0)
 86 | 
 87 |         v2 = torch.stack(v2, dim=0)
 88 |         t2 = torch.stack(t2, dim=0)
 89 |         m2 = torch.stack(m2, dim=0)
 90 | 
 91 |         m1_old, m2_old = m1.clone(), m2.clone()
 92 |         if self.pretrain_tasks == 'full2':
 93 |             # print(torch.sum(m1, axis = 0))
 94 |             T, D = m1.shape
 95 | 
 96 |             m1 = self._seg_masking(mask=m1, timestamps=t1)
 97 |             # a = m1[ : , : D]
 98 |             # b = m1[ : , D : ]
 99 |             # c = a + b
100 |             # print(torch.sum(c, axis = 0))
101 | 
102 |             # print(torch.sum(m2, axis = 0))
103 |             m2 = self._seg_masking(mask=m2, timestamps=t2)
104 |             # a = m2[ : , : D]
105 |             # b = m2[ : , D : ]
106 |             # c = a + b
107 |             # print(torch.sum(c, axis = 0))
108 | 
109 |         return (v1, t1, m1, m1_old), (v2, t2, m2, m2_old)
110 | 
111 |     def _random_sampling_cl(self, values):
112 |         indices = list(range(len(values)))
113 |         random.shuffle(indices)
114 | 
115 |         length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * len(indices))
116 |         length = max(length, 1)
117 | 
118 |         selected_indices = set(indices[: length])
119 | 
120 |         # print(indices)
121 |         # print(length)
122 |         # print(selected_indices)
123 | 
124 |         return selected_indices
125 | 
126 |     def _time_sensitive_cl(self, timestamps):
127 | 
128 |         times = torch.clone(timestamps)
129 |         times = times.reshape(times.shape[0])
130 | 
131 |         # compute average of pre- and post- interval time for each timestep, except the first and last
132 |         avg_interval_times = [(((times[i] - times[i - 1]) + (times[i + 1] - times[i])) / 2) for i in
133 |                               range(1, times.shape[0] - 1)]
134 |         avg_interval_times.append(times[-1] - times[-2])  # pre-interval time for last timestep becomes its average
135 |         avg_interval_times.insert(0, times[1] - times[0])  # post-interval time for first timestep becomes its average
136 |         # print(avg_interval_times)
137 | 
138 |         # sort the interval times and save its corresponding index, timestep
139 |         # after sorting, the first section would contain the lowest interval times -> dense regions of the sample
140 |         # last section would contain the highest interval times -> sparse regions of the sample
141 |         pairs = [(idx, time, avg_interval_time) for idx, (time, avg_interval_time) in
142 |                  enumerate(zip(times, avg_interval_times))]
143 |         # print(pairs)
144 |         pairs.sort(key=lambda pairs: pairs[2])
145 |         indices = [idx for idx, time, avg_interval_time in pairs]
146 |         # print(pairs)
147 | 
148 |         # length of the anchor/positive sample
149 |         length = int(np.random.uniform(self.len_sampling_bound[0], self.len_sampling_bound[1], 1)[0] * times.shape[0])
150 |         length = max(length, 1)
151 |         # print(length)
152 | 
153 |         # select the indices with the most dense sampling frequency, i.e. minimum time interval
154 |         # selected_indices = set([idx for idx, time, avg_interval_time in pairs[ : length]])
155 |         # print(selected_indices)
156 | 
157 |         # alternate between dense and sparse sample, i.e. samples located in dense and sparse regions
158 |         '''
159 |         front, end = 0, len(pairs) - 1
160 |         selected_indices = []
161 |         for i in range(length):
162 |             if i % 2 == 0:
163 |                 selected_indices.append(pairs[front][0])
164 |                 front += 2
165 |             else:
166 |                 selected_indices.append(pairs[end][0])
167 |                 end -= 2
168 |         '''
169 | 
170 |         # divide samples in pairs into two regions -> sparse (50%) and dense(50%)
171 |         # sample a fraction, f, of the samples from the dense and the remaining, (1-f), of the samples from the sparse region
172 |         dense_indices = indices[: int(len(indices) / 2)]
173 |         random.shuffle(dense_indices)
174 |         sparse_indices = indices[int(len(indices) / 2):]
175 |         random.shuffle(sparse_indices)
176 | 
177 |         # 5 - random dense, random sparse CL
178 |         dense_length = int(np.random.uniform(self.dense_sampling_bound[0], self.dense_sampling_bound[1], 1)[0] * length)
179 |         dense_length = max(dense_length, 1)
180 |         sparse_length = length - dense_length
181 | 
182 |         # 6 - 50% dense, 50% sparse CL
183 |         # dense_length = int(0.5 * length)
184 |         # sparse_length = length - dense_length
185 | 
186 |         selected_dense_indices = dense_indices[: dense_length]
187 |         selected_sparse_indices = sparse_indices[: sparse_length]
188 |         selected_dense_indices.extend(selected_sparse_indices)
189 |         selected_indices = set(selected_dense_indices)
190 | 
191 |         return selected_indices
192 | 
193 |     def _seg_masking(self, mask=None, timestamps=None):
194 | 
195 |         '''
196 |         - mask is a (T, D) tensor
197 |         - timestamps is a (T, 1) tensor
198 |         - return: (T, 2*D) tensor
199 |         '''
200 | 
201 |         D = mask.size(1)
202 |         interp_mask = torch.zeros_like(mask)
203 | 
204 |         for dim in range(D):
205 |             # print('Dimension: ' + str(dim))
206 | 
207 |             # length = mask[:, dim].sum().long().item()
208 |             # print(length)
209 | 
210 |             # length of each masked segment is constant
211 |             # seg_pos = self._constant_length_sampling(mask[ : , dim])
212 | 
213 |             # time of each masked segment is constant: length of each masked segment may vary depending on the density of the sample in the masked region
214 |             seg_pos = self._time_sensitive_sampling(mask[:, dim], timestamps)
215 | 
216 |             # print(mask[ : , dim])
217 |             # print(interp_mask[ : , dim])
218 |             # print(seg_pos)
219 |             if len(seg_pos) > 0:
220 |                 mask[seg_pos, dim] = 0.0
221 |                 interp_mask[seg_pos, dim] = 1.0
222 |             # print(mask[ : , dim])
223 |             # print(interp_mask[ : , dim])
224 | 
225 |         return torch.cat([mask, interp_mask], dim=-1)
226 | 
227 |     def _constant_length_sampling(self, mask):
228 | 
229 |         # mask = torch.tensor([0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0])
230 |         count_ones = mask.sum().long().item()
231 | 
232 |         if self.args.mask_ratio_per_seg * count_ones < 1:
233 |             seg_seq_len = 1
234 |         else:
235 |             seg_seq_len = int(self.args.mask_ratio_per_seg * count_ones)
236 | 
237 |         ones_indices_in_mask = torch.where(mask == 1)[0].tolist()
238 | 
239 |         # if seg_seq_len == 1: indices = list(range(len(ones_indices_in_mask)))
240 |         # else: indices = list(range(len(ones_indices_in_mask[ : -seg_seq_len + 1])))
241 | 
242 |         # print('mask: ' + str(mask))
243 |         # print('count_ones: ' + str(count_ones))
244 |         # print('seg_seq_len: ' + str(seg_seq_len))
245 |         # print('ones_indices_in_mask: ' + str(ones_indices_in_mask))
246 |         # print('indices: ' + str(indices))
247 | 
248 |         seg_pos = []
249 |         for seg in range(self.args.segment_num):
250 |             # print()
251 |             # print(ones_indices_in_mask)
252 | 
253 |             if len(ones_indices_in_mask) > 1:
254 |                 if seg_seq_len == 1:
255 |                     start_idx_in_mask = random.choice(ones_indices_in_mask)
256 |                 else:
257 |                     start_idx_in_mask = random.choice(ones_indices_in_mask[: -seg_seq_len + 1])
258 |                 # print(start_idx_in_mask)
259 | 
260 |                 start = ones_indices_in_mask.index(start_idx_in_mask)
261 |                 end = start + seg_seq_len
262 | 
263 |                 sub_seg = ones_indices_in_mask[start: end]
264 |                 # print(sub_seg)
265 | 
266 |                 seg_pos.extend(sub_seg)
267 |                 ones_indices_in_mask = list(set(ones_indices_in_mask) - set(sub_seg))
268 |                 ones_indices_in_mask.sort()
269 | 
270 |         # print('seg_pos: ' + str(seg_pos))
271 |         return list(set(seg_pos))
272 | 
273 |     def _time_sensitive_sampling(self, mask, timestamps):
274 | 
275 |         # segment_num = 3
276 |         # mask_ratio_per_seg = 0.15
277 | 
278 |         timestamps = timestamps.reshape(timestamps.shape[0])
279 |         # sampled_times = timestamps[mask].tolist() # times at which this feature was sampled
280 |         sampled_times = [timestamps[i].item() for i in range(mask.shape[0]) if mask[i] == 1]
281 | 
282 |         if len(sampled_times) == 0: return []
283 | 
284 |         # print('timestamps: ' + str(timestamps))
285 |         # print('mask: ' + str(mask))
286 |         # print('sampled_times: ' + str(sampled_times))
287 |         sampled_times_start, sampled_times_end = sampled_times[0], sampled_times[-1]
288 | 
289 |         # full time interval of the feature = last sampling time - first sampling time
290 |         # time of masked segment = a fixed percentage of the full time interval of the feature
291 |         time_of_masked_segment = (sampled_times_end - sampled_times_start) * self.args.mask_ratio_per_seg
292 |         # print('time_of_masked_segment: ' + str(time_of_masked_segment))
293 | 
294 |         available_samples_to_sample = [time for time in sampled_times if
295 |                                        time < sampled_times_end - time_of_masked_segment]
296 |         # print('available_samples_to_sample: ' + str(available_samples_to_sample))
297 | 
298 |         if len(available_samples_to_sample) > 0:
299 |             chosen_time = random.choice(available_samples_to_sample)
300 |         else:
301 |             return []
302 |         # print('chosen_time: ' + str(chosen_time))
303 | 
304 |         masking_times = []
305 |         for i in range(self.args.segment_num):
306 | 
307 |             masked_segment_start_time = chosen_time
308 |             masked_segment_end_time = masked_segment_start_time + time_of_masked_segment
309 | 
310 |             idx = sampled_times.index(chosen_time)
311 |             chosen_times = [chosen_time]
312 |             available_samples_to_sample.remove(chosen_time)
313 | 
314 |             for time in sampled_times[idx + 1:]:
315 |                 if time > masked_segment_end_time:
316 |                     break
317 | 
318 |                 if masked_segment_start_time < time and time <= masked_segment_end_time:
319 |                     chosen_times.append(time)
320 | 
321 |                 if time in available_samples_to_sample:
322 |                     available_samples_to_sample.remove(time)
323 |                 # print('           available_samples_to_sample: ' + str(available_samples_to_sample))
324 | 
325 |             masking_times.extend(chosen_times)
326 | 
327 |             for time in sampled_times[: idx][::-1]:
328 |                 if time < chosen_time - time_of_masked_segment or time > chosen_time + time_of_masked_segment:
329 |                     break
330 | 
331 |                 if time > chosen_time - time_of_masked_segment and time < chosen_time + time_of_masked_segment and time in available_samples_to_sample:
332 |                     available_samples_to_sample.remove(time)
333 | 
334 |             if len(available_samples_to_sample) > 0:
335 |                 chosen_time = random.choice(available_samples_to_sample)
336 |             else:
337 |                 return []
338 |             # print('chosen_times: ' + str(chosen_times))
339 |             # print('available_samples_to_sample: ' + str(available_samples_to_sample))
340 |             # print('chosen_time: ' + str(chosen_time))
341 | 
342 |         times = timestamps.tolist()
343 |         seg_pos = [times.index(time) for time in masking_times]
344 |         # print('masking_times: ' + str(masking_times))
345 |         # print('seg_pos: ' + str(seg_pos))
346 |         return list(set(seg_pos))
347 | 
348 |     '''
349 |     def _seg_sampling(self, max_len):
350 |         if max_len * self.args.mask_ratio_per_seg < 1:
351 |             return []
352 |         seg_pos = []
353 |         seg_len = int(max_len * self.args.mask_ratio_per_seg)
354 |         print('seg_len: ' + str(seg_len))
355 |         start_pos = np.random.randint(max_len, size=self.args.segment_num)
356 |         print('start_pos: ' + str(start_pos))
357 |         for start in start_pos:
358 |             seg_pos += list(range(start, min(start+seg_len, max_len)))
359 |         print(seg_pos)
360 |         return seg_pos
361 |     '''
362 | 
363 | 
364 | # ---Test _time_sensitive_sampling function for reconstruction task---#
365 | '''
366 | m = torch.zeros((56), dtype = bool)
367 | l = [3, 8, 11, 13, 18, 19, 42, 45, 50, 52, 55]
368 | m[l] = 1
369 | t = torch.zeros((56), dtype = float)
370 | times = torch.tensor([1, 5, 8, 9, 12, 13, 17, 20, 23, 28, 31], dtype = float)
371 | t[l] = times
372 | # print(m)
373 | # print(t)
374 | train_cl_collator = CLDataCollator(max_len = 50)
375 | train_cl_collator._time_sensitive_sampling(m, t)
376 | '''
377 | 
378 | # ----------Test _time_sensitive_cl function for CL task----------#
379 | '''
380 | times = torch.tensor([1, 2, 3, 4, 5, 15, 18, 25, 26, 27, 28, 29, 35, 45])
381 | times = times.reshape(times.shape[0], 1)
382 | train_cl_collator = CLDataCollator(max_len = 50)
383 | selected_indices = train_cl_collator._time_sensitive_cl(times)
384 | '''
385 | 
386 | '''
387 | max_len = 50
388 | D = 4
389 | value, time, mask = torch.rand(max_len, D), torch.rand(max_len, 1), torch.randint(0, 2, (max_len, D))
390 | data = [value, time, mask]
391 | batch = [data]
392 | train_cl_collator = CLDataCollator(max_len = max_len)
393 | # (v1, t1, m1), (v2, t2, m2) = train_cl_collator._per_seq_sampling(data)
394 | # print(v1.shape, t1.shape, m1.shape, v2.shape, t2.shape, m2.shape)
395 | out = train_cl_collator.__call__(batch)
396 | '''
397 | 
398 | '''
399 | print(out['value'].shape, out['time'].shape, out['mask'].shape)
400 | print('Value')
401 | print(value)
402 | print(out['value'][0, 0].shape)
403 | print(out['value'][0, 1].shape)
404 | print('Time')
405 | print(time)
406 | print(out['time'][0, 0].shape)
407 | print(out['time'][0, 1].shape)
408 | print('Mask')
409 | print(mask)
410 | print(out['mask'][0, 0])
411 | print(out['mask'][0, 1])
412 | print(torch.sum(mask, axis = 0))
413 | print(torch.sum(out['mask'][0, 0], axis = 0))
414 | print(torch.sum(out['mask'][0, 1], axis = 0))
415 | '''


--------------------------------------------------------------------------------
/src/datautils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | import math
  7 | import random
  8 | from datetime import datetime
  9 | import pickle
 10 | from utils import pkl_load, pad_nan_to_target
 11 | from scipy.io.arff import loadarff
 12 | from sklearn.preprocessing import StandardScaler, MinMaxScaler
 13 | from utils import generate_mask
 14 | from utils import interpolate_cubic_spline
 15 | from utils import normalize_with_mask
 16 | 
 17 | 
 18 | def load_UCR(dataset, load_tp: bool = True):
 19 |     train_file = os.path.join('datasets/UCR', dataset, dataset + "_TRAIN.tsv")
 20 |     test_file = os.path.join('datasets/UCR', dataset, dataset + "_TEST.tsv")
 21 |     train_df = pd.read_csv(train_file, sep='\t', header=None)
 22 |     test_df = pd.read_csv(test_file, sep='\t', header=None)
 23 |     train_array = np.array(train_df)
 24 |     test_array = np.array(test_df)
 25 | 
 26 |     # Move the labels to {0, ..., L-1}
 27 |     labels = np.unique(train_array[:, 0])
 28 |     transform = {}
 29 |     for i, l in enumerate(labels):
 30 |         transform[l] = i
 31 | 
 32 |     train = train_array[:, 1:].astype(np.float64)
 33 |     train_labels = np.vectorize(transform.get)(train_array[:, 0])
 34 |     test = test_array[:, 1:].astype(np.float64)
 35 |     test_labels = np.vectorize(transform.get)(test_array[:, 0])
 36 | 
 37 |     # extend dim to NTC
 38 |     train, test = train[..., np.newaxis], test[..., np.newaxis]
 39 |     p = 1
 40 |     mask_tr, mask_te = generate_mask(train, p), generate_mask(test, p)
 41 | 
 42 |     # Normalization for non-normalized datasets
 43 |     # To keep the amplitude information, we do not normalize values over
 44 |     # individual time series, but on the whole dataset
 45 |     if dataset in [
 46 |         'AllGestureWiimoteX',
 47 |         'AllGestureWiimoteY',
 48 |         'AllGestureWiimoteZ',
 49 |         'BME',
 50 |         'Chinatown',
 51 |         'Crop',
 52 |         'EOGHorizontalSignal',
 53 |         'EOGVerticalSignal',
 54 |         'Fungi',
 55 |         'GestureMidAirD1',
 56 |         'GestureMidAirD2',
 57 |         'GestureMidAirD3',
 58 |         'GesturePebbleZ1',
 59 |         'GesturePebbleZ2',
 60 |         'GunPointAgeSpan',
 61 |         'GunPointMaleVersusFemale',
 62 |         'GunPointOldVersusYoung',
 63 |         'HouseTwenty',
 64 |         'InsectEPGRegularTrain',
 65 |         'InsectEPGSmallTrain',
 66 |         'MelbournePedestrian',
 67 |         'PickupGestureWiimoteZ',
 68 |         'PigAirwayPressure',
 69 |         'PigArtPressure',
 70 |         'PigCVP',
 71 |         'PLAID',
 72 |         'PowerCons',
 73 |         'Rock',
 74 |         'SemgHandGenderCh2',
 75 |         'SemgHandMovementCh2',
 76 |         'SemgHandSubjectCh2',
 77 |         'ShakeGestureWiimoteZ',
 78 |         'SmoothSubspace',
 79 |         'UMD'
 80 |     ] or p != 1:
 81 |         scaler = StandardScaler()
 82 |         train, test = normalize_with_mask(train, mask_tr, test, mask_te, scaler)
 83 |         # mean = np.nanmean(train)
 84 |         # std = np.nanstd(train)
 85 |         # train = (train - mean) / std
 86 |         # test = (test - mean) / std
 87 | 
 88 |     if load_tp:
 89 |         tp = np.linspace(0, 1, train.shape[1], endpoint=True).reshape(1, -1, 1)
 90 |         train = np.concatenate((train, np.repeat(tp, train.shape[0], axis=0)), axis=-1)
 91 |         test = np.concatenate((test, np.repeat(tp, test.shape[0], axis=0)), axis=-1)
 92 | 
 93 |     return {'x': train, 'mask': mask_tr}, train_labels, {'x': test, 'mask': mask_te}, test_labels
 94 |     # return train[..., np.newaxis], train_labels, test[..., np.newaxis], test_labels
 95 | 
 96 | 
 97 | def load_others(dataset, load_tp: bool = True):
 98 |     data = np.load(f'datasets/Others/{dataset}.npy', allow_pickle=True).item()
 99 |     train_X, train_mask, train_y, test_X, test_mask, test_y = \
100 |         data["tr_x"], data["tr_mask"], data["tr_y"], data["te_x"], data["te_mask"], data["te_y"]
101 | 
102 |     scaler = MinMaxScaler()
103 | 
104 |     train_X, test_X = normalize_with_mask(train_X, train_mask, test_X, test_mask, scaler)
105 | 
106 |     train_tp, test_tp = data['tr_t'], data['te_t']
107 |     if load_tp:
108 |         train_X = np.concatenate((train_X, train_tp.reshape(train_tp.shape[0], -1, 1)), axis=-1)
109 |         test_X = np.concatenate((test_X, test_tp.reshape(test_tp.shape[0], -1, 1)), axis=-1)
110 | 
111 |     labels = np.unique(train_y)
112 |     transform = {k: i for i, k in enumerate(labels)}
113 |     train_y = np.vectorize(transform.get)(train_y)
114 |     test_y = np.vectorize(transform.get)(test_y)
115 |     return {'x': train_X, 'mask': train_mask}, train_y, {'x': test_X, 'mask': test_mask}, test_y
116 | 
117 | 
118 | def load_UEA(dataset, load_tp: bool = False):
119 |     def extract_data(data):
120 |         res_data = []
121 |         res_labels = []
122 |         for t_data, t_label in data:
123 |             t_data = np.array([d.tolist() for d in t_data])
124 |             t_label = t_label.decode("utf-8")
125 |             res_data.append(t_data)
126 |             res_labels.append(t_label)
127 |         return np.array(res_data).swapaxes(1, 2), np.array(res_labels)
128 | 
129 |     try:
130 |         train_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TRAIN.arff')[0]
131 |         test_data = loadarff(f'datasets/UEA/{dataset}/{dataset}_TEST.arff')[0]
132 | 
133 |         train_X, train_y = extract_data(train_data)
134 |         test_X, test_y = extract_data(test_data)
135 |     except:
136 |         data = np.load(f'datasets/UEA/{dataset}/{dataset}.npy', allow_pickle=True).item()
137 |         train_X, train_y, test_X, test_y = data["train_X"], data["train_y"], data["test_X"], data["test_y"]
138 | 
139 |     p = 1
140 |     mask_tr, mask_te = generate_mask(train_X, p), generate_mask(test_X, p)
141 |     # scaler = MinMaxScaler()
142 |     scaler = StandardScaler()
143 | 
144 |     train_X, test_X = normalize_with_mask(train_X, mask_tr, test_X, mask_te, scaler)
145 | 
146 |     if load_tp:
147 |         tp = np.linspace(0, 1, train_X.shape[1], endpoint=True).reshape(1, -1, 1)
148 |         train_X = np.concatenate((train_X, np.repeat(tp, train_X.shape[0], axis=0)), axis=-1)
149 |         test_X = np.concatenate((test_X, np.repeat(tp, test_X.shape[0], axis=0)), axis=-1)
150 | 
151 |     labels = np.unique(train_y)
152 |     transform = {k: i for i, k in enumerate(labels)}
153 |     train_y = np.vectorize(transform.get)(train_y)
154 |     test_y = np.vectorize(transform.get)(test_y)
155 |     return {'x': train_X, 'mask': mask_tr}, train_y, {'x': test_X, 'mask': mask_te}, test_y
156 | 
157 | 
158 | def load_forecast_npy(name, univar=False):
159 |     data = np.load(f'datasets/{name}.npy')
160 |     if univar:
161 |         data = data[: -1:]
162 | 
163 |     train_slice = slice(None, int(0.6 * len(data)))
164 |     valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
165 |     test_slice = slice(int(0.8 * len(data)), None)
166 | 
167 |     scaler = StandardScaler().fit(data[train_slice])
168 |     data = scaler.transform(data)
169 |     data = np.expand_dims(data, 0)
170 | 
171 |     pred_lens = [24, 48, 96, 288, 672]
172 |     return data, train_slice, valid_slice, test_slice, scaler, pred_lens, 0
173 | 
174 | 
175 | def _get_time_features(dt):
176 |     return np.stack([
177 |         dt.minute.to_numpy(),
178 |         dt.hour.to_numpy(),
179 |         dt.dayofweek.to_numpy(),
180 |         dt.day.to_numpy(),
181 |         dt.dayofyear.to_numpy(),
182 |         dt.month.to_numpy(),
183 |         dt.weekofyear.to_numpy(),
184 |     ], axis=1).astype(np.float)
185 | 
186 | 
187 | def load_forecast_csv(name, offset=0 , univar=False, load_tp: bool = True):
188 |     data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True)
189 |     dt_tp = data.index
190 |     dt_embed = _get_time_features(data.index)
191 |     n_covariate_cols = dt_embed.shape[-1] if offset == 0 else 0
192 | 
193 |     if univar:
194 |         if name in ('ETTh1', 'ETTh2', 'ETTm1', 'ETTm2'):
195 |             data = data[['OT']]
196 |         elif name == 'electricity':
197 |             data = data[['MT_001']]
198 |         elif name == 'WTH':
199 |             data = data[['WetBulbCelsius']]
200 |         else:
201 |             data = data.iloc[:, -1:]
202 | 
203 |     data = data.to_numpy()
204 |     if name == 'ETTh1' or name == 'ETTh2':
205 |         train_slice = slice(None, 12 * 30 * 24)
206 |         valid_slice = slice(12 * 30 * 24 - offset, 16 * 30 * 24)
207 |         test_slice = slice(16 * 30 * 24 - offset, 20 * 30 * 24)
208 |     elif name == 'ETTm1' or name == 'ETTm2':
209 |         train_slice = slice(None, 12 * 30 * 24 * 4)
210 |         valid_slice = slice(12 * 30 * 24 * 4 - offset, 16 * 30 * 24 * 4)
211 |         test_slice = slice(16 * 30 * 24 * 4 - offset, 20 * 30 * 24 * 4)
212 |     else:
213 |         train_slice = slice(None, int(0.6 * len(data)))
214 |         valid_slice = slice(int(0.6 * len(data)), int(0.8 * len(data)))
215 |         test_slice = slice(int(0.8 * len(data)), None)
216 | 
217 |     def fixed_mask_timestamp(num, mask):
218 |         mask_time = np.ones((mask.shape[0], mask.shape[1]))
219 |         mask_time[np.where(mask.mean(axis=-1) == 0.)] = 0
220 |         return np.concatenate((np.repeat(mask_time[..., np.newaxis], num, axis=-1), mask), axis=-1)
221 | 
222 |     # to N x T x C
223 |     if name in ('electricity'):
224 |         data = np.expand_dims(data.T, -1)  # Each variable is an instance rather than a feature
225 |     else:
226 |         data = np.expand_dims(data, 0)
227 | 
228 |     p = 1
229 |     mask_tr, mask_va, mask_te = generate_mask(data[:, train_slice], p), \
230 |                                 generate_mask(data[:, valid_slice], p), \
231 |                                 generate_mask(data[:, test_slice], p)
232 |     scaler = StandardScaler()
233 | 
234 |     train_x, valid_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, valid_slice], mask_va, scaler)
235 |     _, test_x = normalize_with_mask(data[:, train_slice], mask_tr, data[:, test_slice], mask_te, scaler)
236 |     data = np.concatenate((train_x, valid_x, test_x), axis=1)
237 |     mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1)
238 | 
239 |     if n_covariate_cols > 0:
240 |         dt_mask, dv_mask, d_mask = fixed_mask_timestamp(n_covariate_cols, mask_tr[:1]), \
241 |                                    fixed_mask_timestamp(n_covariate_cols, mask_va[:1]), \
242 |                                    fixed_mask_timestamp(n_covariate_cols, mask_te[:1])
243 | 
244 |         dt, dv, d = dt_embed[train_slice], dt_embed[valid_slice], dt_embed[test_slice]
245 |         dt[dt_mask[0][:, :n_covariate_cols] == 0], dv[dv_mask[0][:, :n_covariate_cols] == 0], d[d_mask[0][:, :n_covariate_cols] == 0] = np.nan, np.nan, np.nan
246 |         dt_embed = np.concatenate((dt, dv, d), axis=0)
247 | 
248 |         dt_scaler = scaler.fit(dt)
249 |         dt_embed = np.expand_dims(dt_scaler.transform(dt_embed), 0)
250 |         dt_embed[np.isnan(dt_embed)] = 0
251 |         data = np.concatenate([np.repeat(dt_embed, data.shape[0], axis=0), data], axis=-1)
252 |         mask_tr, mask_va, mask_te = dt_mask, dv_mask, d_mask
253 |         mask = np.concatenate([mask_tr, mask_va, mask_te], axis=1)
254 | 
255 |     if load_tp:
256 |         dt_tp = [dt_tp[train_slice], dt_tp[valid_slice], dt_tp[test_slice]]
257 |         tp = np.concatenate([[time.mktime(t.timetuple()) for t in tp] for tp in dt_tp])
258 |         scaler_hat = MinMaxScaler().fit(tp.reshape(-1, 1))
259 |         data = np.concatenate([data, np.expand_dims(scaler_hat.transform(tp.reshape(-1, 1)), 0)], axis=-1)
260 | 
261 |     if name in ('ETTh1', 'ETTh2', 'electricity', 'WTH'):
262 |         pred_lens = [24, 48, 168, 336, 720]
263 |     else:
264 |         pred_lens = [24, 48, 96, 288, 672]
265 | 
266 |     return {'x': data, 'mask': mask}, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols
267 | 
268 | 
269 | def load_anomaly(name, load_tp=False):
270 |     res = pkl_load(f'datasets/{name}.pkl')
271 | 
272 |     p, mask_tr, mask_te = 1, [], []
273 |     maxl = np.max([len(res['all_train_data'][k]) for k in res['all_train_data']])
274 |     maxle = np.max([len(res['all_test_data'][k]) for k in res['all_test_data']])
275 |     for k in res['all_train_data']:
276 |         # generate mask
277 |         mask_tr.append(generate_mask(res['all_train_data'][k].reshape(1, -1, 1), p, remain=1))
278 |         mask_te.append(generate_mask(res['all_test_data'][k].reshape(1, -1, 1), p, remain=1))
279 |         # mask
280 |         res['all_train_data'][k] = (mask_tr[-1] * res['all_train_data'][k].reshape(1, -1, 1)).reshape(-1)
281 |         res['all_test_data'][k] = (mask_te[-1] * res['all_test_data'][k].reshape(1, -1, 1)).reshape(-1)
282 |         # padding mask
283 |         mask_tr[-1] = np.concatenate((mask_tr[-1], np.full((1, maxl - mask_tr[-1].shape[1], 1), np.nan)), axis=1)
284 |         mask_te[-1] = np.concatenate((mask_te[-1], np.full((1, maxle - mask_te[-1].shape[1], 1), np.nan)), axis=1)
285 |     mask_tr, mask_te = np.concatenate(mask_tr, axis=0), np.concatenate(mask_te, axis=0)
286 | 
287 |     # if load_tp:
288 |     #     tp_max, tp_min = np.max(res['all_train_timestamps']), np.min(res['all_train_timestamps'])
289 |     #     interval = tp_max - tp_min
290 |     #     interval = 1. if interval == 0. else interval
291 |     #     tp_train = (res['all_train_timestamps'] - tp_min) / interval
292 |     #     tp_test = (res['all_test_timestamps'] - tp_min) / interval
293 |     #     res['all_train_data'] = np.concatenate((res['all_train_data'], np.repeat(tp_train, res['all_train_data'].shape[0], axis=0)), axis=-1)
294 |     #     res['all_test_data'] = np.concatenate((res['all_test_data'], np.repeat(tp_test, res['all_test_data'].shape[0], axis=0)), axis=-1)
295 | 
296 |     return {'x': res['all_train_data'], 'mask': mask_tr}, res['all_train_labels'], res['all_train_timestamps'], \
297 |            {'x': res['all_test_data'], 'mask': mask_te}, res['all_test_labels'], res['all_test_timestamps'], \
298 |            res['delay']
299 | 
300 | 
301 | def gen_ano_train_data(all_train_data, maxl = None, normal = False):
302 |     maxl = np.max([len(all_train_data[k]) for k in all_train_data]) if maxl is None else maxl
303 |     pretrain_data = []
304 |     for k in all_train_data:
305 |         train_data = pad_nan_to_target(np.array(all_train_data[k]).astype(np.float64), maxl, axis=0)
306 |         pretrain_data.append(train_data)
307 |     pretrain_data = np.expand_dims(np.stack(pretrain_data), 2)
308 |     if normal:
309 |         data_min, data_max = np.nanmin(pretrain_data), np.nanmax(pretrain_data)
310 |         pretrain_data = (pretrain_data - data_min) / (data_max - data_min)
311 |     return pretrain_data


--------------------------------------------------------------------------------
/src/lib.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=E1101
  2 | import torch
  3 | import torch.nn as nn
  4 | from torch.utils.data import Dataset, DataLoader, TensorDataset
  5 | 
  6 | import numpy as np
  7 | from sklearn import metrics
  8 | 
  9 | from collator import CLDataCollator
 10 | 
 11 | 
 12 | class TimeDataset(Dataset):
 13 |     def __init__(self, data):
 14 |         super().__init__()
 15 |         self.data = []
 16 |         for instance in data:
 17 |             values, times, mask = instance
 18 |             if len(values) == len(times) and len(times) == len(mask) and len(values) >= 2:
 19 |                 self.data.append(instance)
 20 | 
 21 |     def __len__(self):
 22 |         return len(self.data)
 23 | 
 24 |     def __getitem__(self, index):
 25 |         return self.data[index]
 26 | 
 27 | 
 28 | def count_parameters(model):
 29 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
 30 | 
 31 | 
 32 | def log_normal_pdf(x, mean, logvar, mask):
 33 |     const = torch.from_numpy(np.array([2. * np.pi])).float().to(x.device)
 34 |     const = torch.log(const)
 35 |     return -.5 * (const + logvar + (x - mean) ** 2. / torch.exp(logvar)) * mask
 36 | 
 37 | 
 38 | def normal_kl(mu1, lv1, mu2, lv2):
 39 |     v1 = torch.exp(lv1)
 40 |     v2 = torch.exp(lv2)
 41 |     lstd1 = lv1 / 2.
 42 |     lstd2 = lv2 / 2.
 43 | 
 44 |     kl = lstd2 - lstd1 + ((v1 + (mu1 - mu2) ** 2.) / (2. * v2)) - .5
 45 |     return kl
 46 | 
 47 | 
 48 | def mean_squared_error(orig, pred, mask):
 49 |     error = (orig - pred) ** 2
 50 |     error = error * mask
 51 |     return error.sum() / mask.sum()
 52 | 
 53 | 
 54 | def normalize_masked_data(data, mask, att_min, att_max):
 55 |     # we don't want to divide by zero
 56 |     att_max[att_max == 0.] = 1.
 57 | 
 58 |     if (att_max != 0.).all():
 59 |         data_norm = (data - att_min) / att_max
 60 |     else:
 61 |         raise Exception("Zero!")
 62 | 
 63 |     if torch.isnan(data_norm).any():
 64 |         raise Exception("nans!")
 65 | 
 66 |     # set masked out elements back to zero
 67 |     data_norm[mask == 0] = 0
 68 | 
 69 |     return data_norm, att_min, att_max
 70 | 
 71 | 
 72 | def evaluate(dim, rec, dec, test_loader, args, num_sample=10, device="cuda"):
 73 |     mse, test_n = 0.0, 0.0
 74 |     with torch.no_grad():
 75 |         for test_batch in test_loader:
 76 |             test_batch = test_batch.to(args.device)
 77 |             observed_data, observed_mask, observed_tp = (
 78 |                 test_batch[:, :, :dim],
 79 |                 test_batch[:, :, dim: 2 * dim],
 80 |                 test_batch[:, :, -1],
 81 |             )
 82 |             if args.sample_tp and args.sample_tp < 1:
 83 |                 subsampled_data, subsampled_tp, subsampled_mask = subsample_timepoints(
 84 |                     observed_data.clone(), observed_tp.clone(), observed_mask.clone(), args.sample_tp)
 85 |             else:
 86 |                 subsampled_data, subsampled_tp, subsampled_mask = \
 87 |                     observed_data, observed_tp, observed_mask
 88 |             out = rec(torch.cat((subsampled_data, subsampled_mask), 2), subsampled_tp)
 89 |             qz0_mean, qz0_logvar = (
 90 |                 out[:, :, : args.latent_dim],
 91 |                 out[:, :, args.latent_dim:],
 92 |             )
 93 |             epsilon = torch.randn(
 94 |                 num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2]
 95 |             ).to(args.device)
 96 |             z0 = epsilon * torch.exp(0.5 * qz0_logvar) + qz0_mean
 97 |             z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2])
 98 |             batch, seqlen = observed_tp.size()
 99 |             time_steps = (
100 |                 observed_tp[None, :, :].repeat(num_sample, 1, 1).view(-1, seqlen)
101 |             )
102 |             pred_x = dec(z0, time_steps)
103 |             pred_x = pred_x.view(num_sample, -1, pred_x.shape[1], pred_x.shape[2])
104 |             pred_x = pred_x.mean(0)
105 |             mse += mean_squared_error(observed_data, pred_x, observed_mask) * batch
106 |             test_n += batch
107 |     return mse / test_n
108 | 
109 | 
110 | def compute_losses(dim, dec_train_batch, qz0_mean, qz0_logvar, pred_x, args, device):
111 |     observed_data, observed_mask \
112 |         = dec_train_batch[:, :, :dim], dec_train_batch[:, :, dim:2 * dim]
113 | 
114 |     noise_std = args.std  # default 0.1
115 |     noise_std_ = torch.zeros(pred_x.size()).to(device) + noise_std
116 |     noise_logvar = 2. * torch.log(noise_std_).to(device)
117 |     logpx = log_normal_pdf(observed_data, pred_x, noise_logvar,
118 |                            observed_mask).sum(-1).sum(-1)
119 |     pz0_mean = pz0_logvar = torch.zeros(qz0_mean.size()).to(device)
120 |     analytic_kl = normal_kl(qz0_mean, qz0_logvar,
121 |                             pz0_mean, pz0_logvar).sum(-1).sum(-1)
122 |     if args.norm:
123 |         logpx /= observed_mask.sum(-1).sum(-1)
124 |         analytic_kl /= observed_mask.sum(-1).sum(-1)
125 |     return logpx, analytic_kl
126 | 
127 | 
128 | def evaluate_classifier(model, test_loader, dec=None, args=None, classifier=None,
129 |                         dim=0, reconst=False, num_sample=1):
130 |     pred = []
131 |     true = []
132 |     test_loss = 0
133 |     for test_batch, label in test_loader:
134 |         test_batch, label = test_batch.to(args.device), label.to(args.device)
135 |         batch_len = test_batch.shape[0]
136 |         observed_data, observed_mask, observed_tp \
137 |             = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
138 |         with torch.no_grad():
139 |             out = model(
140 |                 torch.cat((observed_data, observed_mask), 2), observed_tp)
141 |             if reconst:
142 |                 qz0_mean, qz0_logvar = out[:, :,
143 |                                        :args.latent_dim], out[:, :, args.latent_dim:]
144 |                 epsilon = torch.randn(
145 |                     num_sample, qz0_mean.shape[0], qz0_mean.shape[1], qz0_mean.shape[2]).to(args.device)
146 |                 z0 = epsilon * torch.exp(.5 * qz0_logvar) + qz0_mean
147 |                 z0 = z0.view(-1, qz0_mean.shape[1], qz0_mean.shape[2])
148 |                 if args.classify_pertp:
149 |                     pred_x = dec(z0, observed_tp[None, :, :].repeat(
150 |                         num_sample, 1, 1).view(-1, observed_tp.shape[1]))
151 |                     # pred_x = pred_x.view(num_sample, batch_len, pred_x.shape[1], pred_x.shape[2])
152 |                     out = classifier(pred_x)
153 |                 else:
154 |                     out = classifier(z0)
155 |             if args.classify_pertp:
156 |                 N = label.size(-1)
157 |                 out = out.view(-1, N)
158 |                 label = label.view(-1, N)
159 |                 _, label = label.max(-1)
160 |                 test_loss += nn.CrossEntropyLoss()(out, label.long()).item() * batch_len * 50.
161 |             else:
162 |                 label = label.unsqueeze(0).repeat_interleave(
163 |                     num_sample, 0).view(-1)
164 |                 test_loss += nn.CrossEntropyLoss()(out, label).item() * batch_len * num_sample
165 |         pred.append(out.cpu().numpy())
166 |         true.append(label.cpu().numpy())
167 |     pred = np.concatenate(pred, 0)
168 |     true = np.concatenate(true, 0)
169 |     acc = np.mean(pred.argmax(1) == true)
170 | 
171 |     # print(true.shape)
172 |     # print(pred.shape)
173 |     # print(np.sum(pred, axis = 1))
174 | 
175 |     if args.dataset == 'physionet' or args.dataset == 'MIMIC-III':
176 |         auc = metrics.roc_auc_score(true, pred[:, 1])
177 |     elif args.dataset == 'PersonActivity':
178 |         auc = 0.
179 | 
180 |     return test_loss / pred.shape[0], acc, auc
181 | 
182 | 
183 | def evaluate_regressor(model, test_loader, dec=None, args=None, classifier=None, dim=0):
184 |     total_len = 0
185 |     test_mse_loss = 0
186 |     test_mae_loss = 0
187 |     for test_batch, label in test_loader:
188 |         test_batch, label = test_batch.to(args.device), label.to(args.device)
189 |         observed_data, observed_mask, observed_tp \
190 |             = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
191 |         with torch.no_grad():
192 |             out = model(
193 |                 torch.cat((observed_data, observed_mask), 2), observed_tp)
194 |             batch_len = test_batch.shape[0]
195 |             total_len += batch_len
196 |             test_mse_loss += nn.MSELoss()(out[:, 0], label).item() * batch_len
197 |             test_mae_loss += nn.L1Loss()(out[:, 0], label).item() * batch_len
198 | 
199 |     return test_mse_loss / total_len, test_mae_loss / total_len
200 | 
201 | 
202 | def evaluate_interpolator(model, test_loader, dec=None, args=None, classifier=None, dim=0):
203 |     total_values = 0
204 |     total_mse_loss = 0
205 |     total_mae_loss = 0
206 | 
207 |     for test_batch, label in test_loader:
208 |         test_batch, label = test_batch.to(args.device), label.to(args.device)
209 |         observed_data, observed_mask, observed_tp \
210 |             = test_batch[:, :, :dim], test_batch[:, :, dim:2 * dim], test_batch[:, :, -1]
211 |         with torch.no_grad():
212 |             out = model(
213 |                 torch.cat((observed_data, observed_mask), 2), observed_tp)
214 | 
215 |             target_data, target_mask = label[:, :, :dim], label[:, :, dim:2 * dim].bool()
216 |             num_values = torch.sum(target_mask).item()
217 |             total_mse_loss += nn.MSELoss()(out[target_mask], target_data[target_mask]).item() * num_values
218 |             total_mae_loss += nn.L1Loss()(out[target_mask], target_data[target_mask]).item() * num_values
219 |             total_values += num_values
220 | 
221 |     return total_mse_loss / total_values, total_mae_loss / total_values
222 | 
223 | 
224 | def subsample_timepoints(data, time_steps, mask, percentage_tp_to_sample=None):
225 |     # Subsample percentage of points from each time series
226 |     for i in range(data.size(0)):
227 |         # take mask for current training sample and sum over all features --
228 |         # figure out which time points don't have any measurements at all in this batch
229 |         current_mask = mask[i].sum(-1).cpu()
230 |         non_missing_tp = np.where(current_mask > 0)[0]
231 |         n_tp_current = len(non_missing_tp)
232 |         n_to_sample = int(n_tp_current * percentage_tp_to_sample)
233 |         subsampled_idx = sorted(np.random.choice(
234 |             non_missing_tp, n_to_sample, replace=False))
235 |         tp_to_set_to_zero = np.setdiff1d(non_missing_tp, subsampled_idx)
236 | 
237 |         data[i, tp_to_set_to_zero] = 0.
238 |         if mask is not None:
239 |             mask[i, tp_to_set_to_zero] = 0.
240 | 
241 |     return data, time_steps, mask
242 | 
243 | 
244 | def generate_irregular_samples(data, input_dim):
245 |     combined_data = []
246 |     max_len = 0
247 |     for i in range(data.shape[0]):
248 |         zero_time_indices_list = torch.where(data[i, :, -1][1:] == 0)[0]
249 |         curr_len = zero_time_indices_list[0].item() + 1 if len(zero_time_indices_list) else data.shape[1]
250 |         max_len = max(max_len, curr_len)
251 |         values = data[i, :curr_len, : input_dim]
252 |         times = data[i, :curr_len, -1]
253 |         mask = data[i, :curr_len, input_dim: 2 * input_dim]
254 |         single_data = [values, times, mask]
255 |         combined_data.append(single_data)
256 |     return combined_data, max_len
257 | 
258 | 
259 | def generate_batches(X_train, X_val, args):
260 |     input_dim = (X_train.shape[2] - 1) // 2
261 | 
262 |     X_train, train_max_len = generate_irregular_samples(X_train, input_dim)
263 |     # X_val, val_max_len = generate_irregular_samples(X_val, input_dim)
264 | 
265 |     # max_len = max(train_max_len, val_max_len)
266 |     max_len = train_max_len
267 | 
268 |     pretrain_data = TimeDataset(X_train)
269 |     # val_data = TimeDataset(X_val)
270 | 
271 |     train_cl_collator = CLDataCollator(max_len=max_len, args=args)
272 | 
273 |     # batch_size = min(min(len(val_data), args.batch_size), 256)
274 |     batch_size = min(min(len(pretrain_data), args.batch_size), 256)
275 |     train_dataloader = DataLoader(pretrain_data, batch_size=batch_size, shuffle=True, collate_fn=train_cl_collator,
276 |                                   num_workers=0)
277 |     # val_dataloader = DataLoader(val_data, batch_size=batch_size, shuffle=False, collate_fn=train_cl_collator,
278 |     #                             num_workers=8)
279 | 
280 |     data_objects = {"train_dataloader": train_dataloader,
281 |                     # "val_dataloader": val_dataloader,
282 |                     "input_dim": input_dim,
283 |                     "max_len": max_len,
284 |                     "n_train_batches": len(train_dataloader),
285 |                     # "n_test_batches": len(val_dataloader),
286 |                     }
287 | 
288 |     return data_objects
289 | 
290 | 
291 | def get_unlabeled_pretrain_data(X_train, args):
292 |     # X_train = torch.load(args.path + 'X_train.pt')
293 |     # X_val = torch.load(args.path + 'X_val.pt')
294 |     X_train = torch.from_numpy(X_train)
295 |     print('X_train: ' + str(X_train.shape))
296 |     # print('X_val: ' + str(X_val.shape))
297 | 
298 |     # data_objects = generate_batches(X_train, X_val, args)
299 |     data_objects = generate_batches(X_train, None, args)
300 | 
301 |     return data_objects
302 | 
303 | 
304 | def get_finetune_data(args):
305 |     X_train, y_train = torch.load(args.path + 'X_train.pt'), torch.load(args.path + 'y_train.pt')
306 |     X_val, y_val = torch.load(args.path + 'X_val.pt'), torch.load(args.path + 'y_val.pt')
307 |     X_test, y_test = torch.load(args.path + 'X_test.pt'), torch.load(args.path + 'y_test.pt')
308 |     input_dim = (X_train.shape[2] - 1) // 2
309 | 
310 |     print('X_train: ' + str(X_train.shape) + ' y_train: ' + str(y_train.shape))
311 |     print('X_val: ' + str(X_val.shape) + ' y_val: ' + str(y_val.shape))
312 |     print('X_test: ' + str(X_test.shape) + ' y_test: ' + str(y_test.shape))
313 | 
314 |     if args.task == 'classification':
315 |         train_data_combined = TensorDataset(X_train, y_train.long().squeeze())
316 |         val_data_combined = TensorDataset(X_val, y_val.long().squeeze())
317 |         test_data_combined = TensorDataset(X_test, y_test.long().squeeze())
318 |     elif args.task == 'regression' or args.task == 'interpolation':
319 |         train_data_combined = TensorDataset(X_train, y_train.float())
320 |         val_data_combined = TensorDataset(X_val, y_val.float())
321 |         test_data_combined = TensorDataset(X_test, y_test.float())
322 | 
323 |     train_dataloader = DataLoader(train_data_combined, batch_size=args.batch_size, shuffle=False)
324 |     val_dataloader = DataLoader(val_data_combined, batch_size=args.batch_size, shuffle=False)
325 |     test_dataloader = DataLoader(test_data_combined, batch_size=args.batch_size, shuffle=False)
326 | 
327 |     data_objects = {"train_dataloader": train_dataloader,
328 |                     "test_dataloader": test_dataloader,
329 |                     "val_dataloader": val_dataloader,
330 |                     "input_dim": input_dim}
331 | 
332 |     return data_objects
333 | 


--------------------------------------------------------------------------------
/src/models/__init__.py:
--------------------------------------------------------------------------------
1 | # from .encoder_transformer import TSEncoder
2 | from .encoder import TSEncoder
3 | 


--------------------------------------------------------------------------------
/src/models/attention.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from einops import rearrange, repeat
  5 | 
  6 | 
  7 | ########################################################################################
  8 | 
  9 | class Residual(nn.Module):
 10 |     def __init__(self, fn):
 11 |         super().__init__()
 12 |         self.fn = fn
 13 | 
 14 |     def forward(self, x, **kwargs):
 15 |         return self.fn(x, **kwargs) + x
 16 | 
 17 | 
 18 | class PreNorm(nn.Module):
 19 |     def __init__(self, dim, fn):
 20 |         super().__init__()
 21 |         self.norm = nn.LayerNorm(dim)
 22 |         self.fn = fn
 23 | 
 24 |     def forward(self, x, **kwargs):
 25 |         return self.fn(self.norm(x), **kwargs)
 26 | 
 27 | 
 28 | class FeedForward(nn.Module):
 29 |     def __init__(self, dim, hidden_dim, dropout=0.):
 30 |         super().__init__()
 31 |         self.net = nn.Sequential(
 32 |             nn.Linear(dim, hidden_dim),
 33 |             nn.ReLU(),
 34 |             nn.Dropout(dropout),
 35 |             nn.Linear(hidden_dim, dim),
 36 |             nn.Dropout(dropout)
 37 |         )
 38 | 
 39 |     def forward(self, x):
 40 |         return self.net(x)
 41 | 
 42 | 
 43 | class Attention(nn.Module):
 44 |     def __init__(self, dim, heads=8, dropout=0.):
 45 |         super().__init__()
 46 |         self.heads = heads
 47 |         self.scale = dim ** -0.5
 48 | 
 49 |         self.to_qkv = nn.Linear(dim, dim * 3, bias=False)
 50 |         self.to_out = nn.Sequential(
 51 |             nn.Linear(dim, dim),
 52 |             nn.Dropout(dropout)
 53 |         )
 54 | 
 55 |     def forward(self, x, mask=None):
 56 |         b, n, _, h = *x.shape, self.heads
 57 |         qkv = self.to_qkv(x).chunk(3, dim=-1)
 58 |         q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=h), qkv)
 59 | 
 60 |         dots = torch.einsum('bhid,bhjd->bhij', q, k) * self.scale
 61 | 
 62 |         if mask is not None:
 63 |             mask = F.pad(mask.flatten(1), (1, 0), value=True)
 64 |             assert mask.shape[-1] == dots.shape[-1], 'mask has incorrect dimensions'
 65 |             mask = mask[:, None, :] * mask[:, :, None]
 66 |             dots.masked_fill_(~mask, float('-inf'))
 67 |             del mask
 68 | 
 69 |         attn = dots.softmax(dim=-1)
 70 | 
 71 |         out = torch.einsum('bhij,bhjd->bhid', attn, v)
 72 |         out = rearrange(out, 'b h n d -> b n (h d)')
 73 |         out = self.to_out(out)
 74 |         return out
 75 | 
 76 | 
 77 | class Transformer(nn.Module):
 78 |     def __init__(self, dim, depth, heads, mlp_dim, dropout):
 79 |         super().__init__()
 80 |         self.layers = nn.ModuleList([])
 81 |         for _ in range(depth):
 82 |             self.layers.append(nn.ModuleList([
 83 |                 Residual(PreNorm(dim, Attention(dim, heads=heads, dropout=dropout))),
 84 |                 Residual(PreNorm(dim, FeedForward(dim, mlp_dim, dropout=dropout)))
 85 |             ]))
 86 | 
 87 |     def forward(self, x, mask=None):
 88 |         for attn, ff in self.layers:
 89 |             x = attn(x, mask=mask)
 90 |             x = ff(x)
 91 |         return x
 92 | 
 93 | 
 94 | class Seq_Transformer(nn.Module):
 95 |     def __init__(self, *, patch_size, dim, depth, heads, mlp_dim, channels=1, dropout=0.1):
 96 |         super().__init__()
 97 |         patch_dim = channels * patch_size
 98 |         self.patch_to_embedding = nn.Linear(patch_dim, dim)
 99 |         self.c_token = nn.Parameter(torch.randn(1, 1, dim))
100 |         self.transformer = Transformer(dim, depth, heads, mlp_dim, dropout)
101 |         self.to_c_token = nn.Identity()
102 | 
103 | 
104 |     def forward(self, forward_seq):
105 |         x = self.patch_to_embedding(forward_seq)
106 |         b, n, _ = x.shape
107 |         c_tokens = repeat(self.c_token, '() n d -> b n d', b=b)
108 |         x = torch.cat((c_tokens, x), dim=1)
109 |         x = self.transformer(x)
110 |         c_t = self.to_c_token(x[:, 0])
111 |         return c_t
112 | 


--------------------------------------------------------------------------------
/src/models/backbone.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from models.encoder import generate_binomial_mask, generate_continuous_mask
  8 | 
  9 | class SimConv4(torch.nn.Module):
 10 |     def __init__(self, input_dims, output_dims,hidden_dims=64, mask_mode='binomial'):
 11 |         super(SimConv4, self).__init__()
 12 |         self.input_fc = nn.Linear(input_dims, hidden_dims)
 13 | 
 14 |         self.feature_size = output_dims
 15 |         self.name = "conv4"
 16 |         self.mask_mode = mask_mode
 17 | 
 18 |         self.layer1 = torch.nn.Sequential(
 19 |             nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
 20 |             torch.nn.BatchNorm1d(hidden_dims),
 21 |           torch.nn.ReLU()
 22 |         )
 23 | 
 24 |         self.layer2 = torch.nn.Sequential(
 25 |             nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
 26 |             torch.nn.BatchNorm1d(hidden_dims),
 27 |           torch.nn.ReLU(),
 28 |         )
 29 | 
 30 |         self.layer3 = torch.nn.Sequential(
 31 |             nn.Conv1d(hidden_dims, hidden_dims, 4, 2, 1, bias=False),
 32 |             torch.nn.BatchNorm1d(hidden_dims),
 33 |           torch.nn.ReLU(),
 34 |         )
 35 | 
 36 |         self.layer4 = torch.nn.Sequential(
 37 |             nn.Conv1d(hidden_dims, output_dims, 4, 2, 1, bias=False),
 38 |             torch.nn.BatchNorm1d(output_dims),
 39 |           torch.nn.ReLU(),
 40 |           torch.nn.AdaptiveAvgPool1d(1)
 41 |         )
 42 | 
 43 |         self.flatten = torch.nn.Flatten()
 44 | 
 45 |         for m in self.modules():
 46 |             if isinstance(m, torch.nn.Conv2d):
 47 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 48 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 49 |             elif isinstance(m, torch.nn.BatchNorm2d):
 50 |                 m.weight.data.fill_(1)
 51 |                 m.bias.data.zero_()
 52 |             if isinstance(m, nn.Conv1d):
 53 |                 nn.init.xavier_normal_(m.weight.data)
 54 |             #        nn.init.xavier_normal_(m.bias.data)
 55 |             elif isinstance(m, nn.BatchNorm1d):
 56 |                 nn.init.constant_(m.weight, 1)
 57 |                 nn.init.constant_(m.bias, 0)
 58 |             elif isinstance(m, nn.Linear):
 59 |                 nn.init.constant_(m.weight, 1)
 60 |                 nn.init.constant_(m.bias, 0)
 61 | 
 62 |     def forward(self, x,mask=None):
 63 |         # x_ = x.view(x.shape[0], 1, -1) #(B, T, Ch)
 64 |         ## B x Ch x T
 65 | 
 66 |         nan_mask = ~x.isnan().any(axis=-1)
 67 |         x[~nan_mask] = 0
 68 |         x = self.input_fc(x)  # B x T x Ch
 69 | 
 70 |         # generate & apply mask
 71 |         if mask is None:
 72 |             if self.training:
 73 |                 mask = self.mask_mode
 74 |             else:
 75 |                 mask = 'all_true'
 76 | 
 77 |         if mask == 'binomial':
 78 |             mask = generate_binomial_mask(x.size(0), x.size(1)).to(x.device)
 79 |         elif mask == 'continuous':
 80 |             mask = generate_continuous_mask(x.size(0), x.size(1)).to(x.device)
 81 |         elif mask == 'all_true':
 82 |             mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
 83 |         elif mask == 'all_false':
 84 |             mask = x.new_full((x.size(0), x.size(1)), False, dtype=torch.bool)
 85 |         elif mask == 'mask_last':
 86 |             mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
 87 |             mask[:, -1] = False
 88 | 
 89 |         mask &= nan_mask
 90 |         x[~mask] = 0
 91 | 
 92 | 
 93 |         x_t = torch.permute(x,[0,2,1])
 94 |         h = self.layer1(x_t)  # (B, T, H)
 95 |         h = self.layer2(h)  # (B, 8, D/2)->(B, 16, D/4)
 96 |         h = self.layer3(h)  # (B, 16, D/4)->(B, 32, D/8)
 97 |         h = self.layer4(h)  # (B, 32, D/8)->(B, 64, 1)
 98 |         h = self.flatten(h)
 99 |         h = F.normalize(h, dim=1)
100 |         h = torch.unsqueeze(h,1)
101 |         return h
102 | 


--------------------------------------------------------------------------------
/src/models/dilated_conv.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RelatedConv(nn.Module):
 8 |     def __init__(self, out_channel, kernel_size, dilation = 1):
 9 |         super(RelatedConv, self).__init__()
10 |         assert isinstance(kernel_size, tuple) or isinstance(kernel_size, list)
11 |         in_channels = 1
12 |         receptive_field = (kernel_size[-1] - 1) * dilation + 1
13 |         padding = receptive_field // 2
14 |         self.conv = nn.Conv2d(in_channels, out_channel, kernel_size,
15 |                               padding = [0, padding],
16 |                               dilation = dilation
17 |                               )
18 |         self.remove = 1 if receptive_field % 2 == 0 else 0
19 | 
20 |     def forward(self, x):
21 |         x = x.unsqueeze(1)  # B * Ch * T => B * 1 * Ch * T
22 |         x = self.conv(x)    # B * 1 * Ch * T => B * out * 1 * T
23 |         if self.remove > 0:
24 |             x = x[..., :-self.remove]
25 |         return x.squeeze(2) # B * out * 1 * T => B * out * T
26 | 
27 | 
28 | class RelatedEncoder(nn.Module):
29 |     def __init__(self, out_channels, channel, kernel_size: int):
30 |         super(RelatedEncoder, self).__init__()
31 | 
32 |         if isinstance(kernel_size, int):
33 |             kernel_size = [kernel_size] * len(out_channels)
34 | 
35 |         out_channels.insert(0, channel)
36 |         self.net = nn.Sequential(*[
37 |             RelatedConv(out_channels[i], kernel_size = (out_channels[i - 1], kernel_size[i - 1]))
38 |             for i in range(1, len(out_channels))
39 |         ])
40 | 
41 |     def forward(self, x):
42 |         return self.net(x)
43 | 
44 | 
45 | class SamePadConv(nn.Module):
46 |     def __init__(self, in_channels, out_channels, kernel_size, dilation=1, groups=1):
47 |         super().__init__()
48 |         self.receptive_field = (kernel_size - 1) * dilation + 1
49 |         padding = self.receptive_field // 2
50 |         self.conv = nn.Conv1d(
51 |             in_channels, out_channels, kernel_size,
52 |             padding=padding,
53 |             dilation=dilation,
54 |             groups=groups
55 |         )
56 |         self.remove = 1 if self.receptive_field % 2 == 0 else 0
57 |         
58 |     def forward(self, x):
59 |         out = self.conv(x)
60 |         if self.remove > 0:
61 |             out = out[:, :, : -self.remove]
62 |         return out
63 |     
64 | class ConvBlock(nn.Module):
65 |     def __init__(self, in_channels, out_channels, kernel_size, dilation, final=False):
66 |         super().__init__()
67 |         self.conv1 = SamePadConv(in_channels, out_channels, kernel_size, dilation=dilation)
68 |         self.conv2 = SamePadConv(out_channels, out_channels, kernel_size, dilation=dilation)
69 |         self.projector = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels or final else None
70 |     
71 |     def forward(self, x):
72 |         residual = x if self.projector is None else self.projector(x)
73 |         x = F.gelu(x)
74 |         x = self.conv1(x)
75 |         x = F.gelu(x)
76 |         x = self.conv2(x)
77 |         return x + residual
78 | 
79 | class DilatedConvEncoder(nn.Module):
80 |     def __init__(self, in_channels, channels, kernel_size):
81 |         super().__init__()
82 |         self.net = nn.Sequential(*[
83 |             ConvBlock(
84 |                 channels[i-1] if i > 0 else in_channels,
85 |                 channels[i],
86 |                 kernel_size=kernel_size,
87 |                 dilation=2**i,
88 |                 final=(i == len(channels)-1)
89 |             )
90 |             for i in range(len(channels))
91 |         ])
92 |         
93 |     def forward(self, x):
94 |         return self.net(x)


--------------------------------------------------------------------------------
/src/models/encoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import copy
  3 | from torch import nn
  4 | import numpy as np
  5 | from .dilated_conv import DilatedConvEncoder
  6 | 
  7 | 
  8 | def generate_continuous_mask(B, T, n=5, l=0.1):
  9 |     res = torch.full((B, T), True, dtype=torch.bool)
 10 |     if isinstance(n, float):
 11 |         n = int(n * T)
 12 |     n = max(min(n, T // 2), 1)
 13 | 
 14 |     if isinstance(l, float):
 15 |         l = int(l * T)
 16 |     l = max(l, 1)
 17 | 
 18 |     for i in range(B):
 19 |         for _ in range(n):
 20 |             t = np.random.randint(T - l + 1)
 21 |             res[i, t:t + l] = False
 22 |     return res
 23 | 
 24 | 
 25 | class BertInterpHead(nn.Module):
 26 |     def __init__(self, input_dim, hidden_dim):
 27 |         super().__init__()
 28 |         self.dense = nn.Linear(hidden_dim, 4 * hidden_dim)
 29 |         self.activation = nn.ReLU()
 30 |         self.project = nn.Linear(4 * hidden_dim, input_dim)
 31 | 
 32 |     def forward(self, first_token_tensor):
 33 |         # We "pool" the model by simply taking the hidden state corresponding
 34 |         # to the first token.
 35 |         pooled_output = self.dense(first_token_tensor)
 36 |         pooled_output = self.activation(pooled_output)
 37 |         pooled_output = self.project(pooled_output)
 38 |         return pooled_output
 39 | 
 40 | 
 41 | def generate_binomial_mask(B, T, p=0.5):
 42 |     return torch.from_numpy(np.random.binomial(1, p, size=(B, T))).to(torch.bool)
 43 | 
 44 | 
 45 | class TSEncoder(nn.Module):
 46 |     def __init__(self, input_dims, output_dims, hidden_dims=64, depth=10, mask_mode='binomial'):
 47 |         super().__init__()
 48 |         self.input_dims = input_dims
 49 |         self.output_dims = output_dims
 50 |         self.hidden_dims = hidden_dims
 51 |         self.mask_mode = mask_mode
 52 |         self.input_fc = nn.Linear(input_dims, hidden_dims)
 53 | 
 54 |         self.feature_extractor = DilatedConvEncoder(
 55 |             # input_dims,
 56 |             hidden_dims,
 57 |             [hidden_dims] * depth + [output_dims],
 58 |             kernel_size=3
 59 |         )
 60 |         self.repr_dropout = nn.Dropout(p=0.1)
 61 |         self.interphead = BertInterpHead(input_dims, output_dims)
 62 | 
 63 |     def forward(self, x, mask=None):  # x: B x T x input_dims
 64 |         if isinstance(x, dict):
 65 |             input_all = copy.deepcopy(x)
 66 |             m = x['mask']
 67 |             x = x['data'] if 'data' in x.keys() else x['x']
 68 |         else:
 69 |             input_all = copy.deepcopy(x)
 70 |             m = x[..., -(x.shape[-1] // 2):]
 71 |             x = x[..., :-(x.shape[-1] // 2)]
 72 | 
 73 |         t = x[..., -1]
 74 |         x = x[..., :-1]
 75 | 
 76 |         if mask == 'mask_last':
 77 |             nan_mask = ~x.isnan().any(axis=-1)
 78 | 
 79 |         x[torch.isnan(x)], m[torch.isnan(m)] = 0, 0
 80 | 
 81 |         # whole series without missing
 82 |         if self.training:
 83 |             x_whole = self.input_fc(x * input_all['mask_origin'])
 84 |             x_whole = x_whole.transpose(1, 2)
 85 |             x_whole = self.feature_extractor(x_whole)  # B x Ch x T
 86 |             x_whole = x_whole.transpose(1, 2)  # B x T x Co
 87 |             x_whole = self.repr_dropout(x_whole)
 88 | 
 89 |         # recon mask part
 90 |         if self.training:
 91 |             x_interp = self.input_fc(x * input_all['mask'])
 92 |             x_interp = x_interp.transpose(1, 2)
 93 |             x_interp = self.feature_extractor(x_interp)  # B x Ch x T
 94 |             x_interp = x_interp.transpose(1, 2)  # B x T x Co
 95 |             x_interp = self.repr_dropout(x_interp)
 96 | 
 97 |         if mask == 'mask_last':
 98 |             mask = x.new_full((x.size(0), x.size(1)), True, dtype=torch.bool)
 99 |             mask[:, -1] = False
100 |             mask &= nan_mask
101 |             x[~mask] = 0
102 | 
103 |         x = self.input_fc(x * m)
104 |         x = x.transpose(1, 2)
105 |         x = self.feature_extractor(x)  # B x Ch x T
106 |         x = x.transpose(1, 2)  # B x T x Co
107 |         x = self.repr_dropout(x)
108 | 
109 |         if self.training:
110 |             return x_whole, self.interphead(x_interp)
111 |         else:
112 |             return x
113 | 


--------------------------------------------------------------------------------
/src/models/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def hierarchical_contrastive_loss(z1, z2, alpha=0.8, temporal_unit=0, temp=1.0):
 7 |     loss = torch.tensor(0., device=z1.device)
 8 |     d = 0
 9 | 
10 |     while z1.size(1) > 1:
11 | 
12 |         if alpha != 0:
13 |             if d == 0:
14 |                 loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
15 |             else:
16 |                 loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
17 |         if d >= temporal_unit:
18 |             if 1 - alpha != 0:
19 |                 if d == 0:
20 |                     loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp)
21 |                 else:
22 |                     loss += (1 - alpha) * temporal_contrastive_loss_mixup(z1, z2, temp)
23 |         d += 1
24 | 
25 |         z1 = F.max_pool1d(z1.transpose(1, 2), kernel_size=2).transpose(1, 2)
26 |         z2 = F.max_pool1d(z2.transpose(1, 2), kernel_size=2).transpose(1, 2)
27 | 
28 |     if z1.size(1) == 1:
29 |         if alpha != 0:
30 |             loss += alpha * instance_contrastive_loss_mixup(z1, z2, temp)
31 |             d += 1
32 |     return loss / d
33 | 
34 | 
35 | def temporal_contrastive_loss_mixup(z1, z2, temp=1.0):
36 |     B, T = z1.size(0), z1.size(1)
37 |     alpha = 0.2
38 |     beta = 0.2
39 | 
40 |     if T == 1:
41 |         return z1.new_tensor(0.)
42 | 
43 |     uni_z1 = alpha * z1 + (1 - alpha) * z1[:, torch.randperm(z1.shape[1]), :].view(z1.size())
44 |     uni_z2 = beta * z2 + (1 - beta) * z2[:, torch.randperm(z1.shape[1]), :].view(z2.size())
45 | 
46 |     z = torch.cat([z1, z2, uni_z1, uni_z2], dim=1)
47 | 
48 |     sim = torch.matmul(z[:, : 2 * T, :], z.transpose(1, 2)) / temp  # B x 2T x 2T
49 |     logits = torch.tril(sim, diagonal=-1)[:, :, :-1]
50 |     logits += torch.triu(sim, diagonal=1)[:, :, 1:]
51 | 
52 |     if T > 1500:
53 |         z, sim = z.cpu(), sim.cpu()
54 |         torch.cuda.empty_cache()
55 | 
56 |     logits = -F.log_softmax(logits, dim=-1)
57 | 
58 |     logits = logits[:, :2 * T, :(2 * T - 1)]
59 | 
60 |     t = torch.arange(T, device=z1.device)
61 |     loss = (logits[:, t, T + t - 1].mean() + logits[:, T + t, t].mean()) / 2
62 |     return loss
63 | 
64 | 
65 | def instance_contrastive_loss_mixup(z1, z2, temp=1.0):
66 |     B, T = z1.size(0), z1.size(1)
67 |     alpha = 0.2
68 |     beta = 0.2
69 | 
70 |     if B == 1:
71 |         return z1.new_tensor(0.)
72 | 
73 |     uni_z1 = alpha * z1 + (1 - alpha) * z1[torch.randperm(z1.shape[0]), :, :].view(z1.size())
74 |     uni_z2 = beta * z2 + (1 - beta) * z2[torch.randperm(z2.shape[0]), :, :].view(z2.size())
75 | 
76 |     z = torch.cat([z1, z2, uni_z1, uni_z2], dim=0)
77 |     z = z.transpose(0, 1)  # T x 2B x C
78 |     sim = torch.matmul(z[:, : 2 * B, :], z.transpose(1, 2)) / temp  # T x 2B x 2B
79 | 
80 |     logits = torch.tril(sim, diagonal=-1)[:, :, :-1]  # T x 2B  x (2B-1)
81 |     logits += torch.triu(sim, diagonal=1)[:, :, 1:]
82 |     logits = -F.log_softmax(logits, dim=-1)
83 | 
84 |     logits = logits[:, :2 * B, :(2 * B - 1)]
85 | 
86 |     i = torch.arange(B, device=z1.device)
87 |     loss = (logits[:, i, B + i - 1].mean() + logits[:, B + i, i].mean()) / 2
88 |     return loss
89 | 


--------------------------------------------------------------------------------
/src/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import eval_classification
2 | from .forecasting import eval_forecasting
3 | from .anomaly_detection import eval_anomaly_detection, eval_anomaly_detection_coldstart
4 | from .imputation import eval_imputation
5 | 


--------------------------------------------------------------------------------
/src/tasks/_eval_protocols.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.linear_model import Ridge
  3 | from sklearn.svm import SVC
  4 | from sklearn.linear_model import LogisticRegression
  5 | from sklearn.neighbors import KNeighborsClassifier
  6 | from sklearn.preprocessing import StandardScaler
  7 | from sklearn.pipeline import make_pipeline
  8 | from sklearn.model_selection import GridSearchCV, train_test_split
  9 | 
 10 | def fit_svm(features, y, MAX_SAMPLES=10000):
 11 |     nb_classes = np.unique(y, return_counts=True)[1].shape[0]
 12 |     train_size = features.shape[0]
 13 | 
 14 |     svm = SVC(C=np.inf, gamma='scale')
 15 |     if train_size // nb_classes < 5 or train_size < 50:
 16 |         return svm.fit(features, y)
 17 |     else:
 18 |         grid_search = GridSearchCV(
 19 |             svm, {
 20 |                 'C': [
 21 |                     0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000,
 22 |                     np.inf
 23 |                 ],
 24 |                 'kernel': ['rbf'],
 25 |                 'degree': [3],
 26 |                 'gamma': ['scale'],
 27 |                 'coef0': [0],
 28 |                 'shrinking': [True],
 29 |                 'probability': [False],
 30 |                 'tol': [0.001],
 31 |                 'cache_size': [200],
 32 |                 'class_weight': [None],
 33 |                 'verbose': [False],
 34 |                 'max_iter': [10000000],
 35 |                 'decision_function_shape': ['ovr'],
 36 |                 'random_state': [None]
 37 |             },
 38 |             cv=5, n_jobs=5
 39 |         )
 40 |         # If the training set is too large, subsample MAX_SAMPLES examples
 41 |         if train_size > MAX_SAMPLES:
 42 |             split = train_test_split(
 43 |                 features, y,
 44 |                 train_size=MAX_SAMPLES, random_state=0, stratify=y
 45 |             )
 46 |             features = split[0]
 47 |             y = split[2]
 48 |             
 49 |         grid_search.fit(features, y)
 50 |         return grid_search.best_estimator_
 51 | 
 52 | def fit_lr(features, y, MAX_SAMPLES=100000):
 53 |     # If the training set is too large, subsample MAX_SAMPLES examples
 54 |     if features.shape[0] > MAX_SAMPLES:
 55 |         split = train_test_split(
 56 |             features, y,
 57 |             train_size=MAX_SAMPLES, random_state=0, stratify=y
 58 |         )
 59 |         features = split[0]
 60 |         y = split[2]
 61 |         
 62 |     pipe = make_pipeline(
 63 |         StandardScaler(),
 64 |         LogisticRegression(
 65 |             random_state=0,
 66 |             max_iter=1000000,
 67 |             multi_class='ovr'
 68 |         )
 69 |     )
 70 |     pipe.fit(features, y)
 71 |     return pipe
 72 | 
 73 | def fit_knn(features, y):
 74 |     pipe = make_pipeline(
 75 |         StandardScaler(),
 76 |         KNeighborsClassifier(n_neighbors=1)
 77 |     )
 78 |     pipe.fit(features, y)
 79 |     return pipe
 80 | 
 81 | def fit_ridge(train_features, train_y, valid_features, valid_y, MAX_SAMPLES=100000):
 82 |     # If the training set is too large, subsample MAX_SAMPLES examples
 83 |     if train_features.shape[0] > MAX_SAMPLES:
 84 |         split = train_test_split(
 85 |             train_features, train_y,
 86 |             train_size=MAX_SAMPLES, random_state=0
 87 |         )
 88 |         train_features = split[0]
 89 |         train_y = split[2]
 90 |     if valid_features.shape[0] > MAX_SAMPLES:
 91 |         split = train_test_split(
 92 |             valid_features, valid_y,
 93 |             train_size=MAX_SAMPLES, random_state=0
 94 |         )
 95 |         valid_features = split[0]
 96 |         valid_y = split[2]
 97 |     
 98 |     alphas = [0.1, 0.2, 0.5, 1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]
 99 |     valid_results = []
100 |     for alpha in alphas:
101 |         lr = Ridge(alpha=alpha).fit(train_features, train_y)
102 |         valid_pred = lr.predict(valid_features)
103 |         score = np.sqrt(((valid_pred - valid_y) ** 2).mean()) + np.abs(valid_pred - valid_y).mean()
104 |         valid_results.append(score)
105 |     best_alpha = alphas[np.argmin(valid_results)]
106 |     
107 |     lr = Ridge(alpha=best_alpha)
108 |     lr.fit(train_features, train_y)
109 |     return lr
110 | 


--------------------------------------------------------------------------------
/src/tasks/anomaly_detection.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time
  3 | from sklearn.metrics import f1_score, precision_score, recall_score
  4 | import bottleneck as bn
  5 | 
  6 | 
  7 | # consider delay threshold and missing segments
  8 | def get_range_proba(predict, label, delay=7):
  9 |     splits = np.where(label[1:] != label[:-1])[0] + 1
 10 |     is_anomaly = label[0] == 1
 11 |     new_predict = np.array(predict)
 12 |     pos = 0
 13 | 
 14 |     for sp in splits:
 15 |         if is_anomaly:
 16 |             if 1 in predict[pos:min(pos + delay + 1, sp)]:
 17 |                 new_predict[pos: sp] = 1
 18 |             else:
 19 |                 new_predict[pos: sp] = 0
 20 |         is_anomaly = not is_anomaly
 21 |         pos = sp
 22 |     sp = len(label)
 23 | 
 24 |     if is_anomaly:  # anomaly in the end
 25 |         if 1 in predict[pos: min(pos + delay + 1, sp)]:
 26 |             new_predict[pos: sp] = 1
 27 |         else:
 28 |             new_predict[pos: sp] = 0
 29 | 
 30 |     return new_predict
 31 | 
 32 | 
 33 | # set missing = 0
 34 | def reconstruct_label(timestamp, label):
 35 |     timestamp = np.asarray(timestamp, np.int64)
 36 |     index = np.argsort(timestamp)
 37 | 
 38 |     timestamp_sorted = np.asarray(timestamp[index])
 39 |     interval = np.min(np.diff(timestamp_sorted))
 40 | 
 41 |     label = np.asarray(label, np.int64)
 42 |     label = np.asarray(label[index])
 43 | 
 44 |     idx = (timestamp_sorted - timestamp_sorted[0]) // interval
 45 | 
 46 |     new_label = np.zeros(shape=((timestamp_sorted[-1] - timestamp_sorted[0]) // interval + 1,), dtype=np.int)
 47 |     new_label[idx] = label
 48 | 
 49 |     return new_label
 50 | 
 51 | 
 52 | def eval_ad_result(test_pred_list, test_labels_list, test_timestamps_list, delay):
 53 |     labels = []
 54 |     pred = []
 55 |     for test_pred, test_labels, test_timestamps in zip(test_pred_list, test_labels_list, test_timestamps_list):
 56 |         assert test_pred.shape == test_labels.shape == test_timestamps.shape
 57 |         test_labels = reconstruct_label(test_timestamps, test_labels)
 58 |         test_pred = reconstruct_label(test_timestamps, test_pred)
 59 |         test_pred = get_range_proba(test_pred, test_labels, delay)
 60 |         labels.append(test_labels)
 61 |         pred.append(test_pred)
 62 |     labels = np.concatenate(labels)
 63 |     pred = np.concatenate(pred)
 64 |     return {
 65 |         'f1': f1_score(labels, pred),
 66 |         'precision': precision_score(labels, pred),
 67 |         'recall': recall_score(labels, pred)
 68 |     }
 69 | 
 70 | 
 71 | def np_shift(arr, num, fill_value=np.nan):
 72 |     result = np.empty_like(arr)
 73 |     if num > 0:
 74 |         result[:num] = fill_value
 75 |         result[num:] = arr[:-num]
 76 |     elif num < 0:
 77 |         result[num:] = fill_value
 78 |         result[:num] = arr[-num:]
 79 |     else:
 80 |         result[:] = arr
 81 |     return result
 82 | 
 83 | 
 84 | def eval_anomaly_detection(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data,
 85 |                            all_test_labels, all_test_timestamps, delay):
 86 |     t = time.time()
 87 | 
 88 |     train_mask, test_mask = all_train_data['mask'], all_test_data['mask']
 89 |     all_train_data, all_test_data = all_train_data['x'], all_test_data['x']
 90 |     ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps]
 91 |     ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts))
 92 | 
 93 |     all_train_repr = {}
 94 |     all_test_repr = {}
 95 |     all_train_repr_wom = {}
 96 |     all_test_repr_wom = {}
 97 |     for i, k in enumerate(all_train_data):
 98 |         train_data = all_train_data[k]
 99 |         test_data = all_test_data[k]
100 | 
101 |         train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
102 |         test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
103 |         train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1)
104 |         test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1)
105 |         data = {'x': np.concatenate([train_data, test_data], axis=1),
106 |                 'mask': np.concatenate(
107 |                     [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
108 | 
109 |         full_repr = model.encode(
110 |             # np.concatenate([train_data, test_data]).reshape(1, -1, 1),
111 |             data,
112 |             mask='mask_last',
113 |             casual=True,
114 |             sliding_length=1,
115 |             sliding_padding=200,
116 |             batch_size=256
117 |         ).squeeze()
118 |         all_train_repr[k] = full_repr[:train_data.shape[1]]
119 |         all_test_repr[k] = full_repr[train_data.shape[1]:]
120 | 
121 |         data = {'x': np.concatenate([train_data, test_data], axis=1),
122 |                 'mask': np.concatenate(
123 |                     [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
124 | 
125 |         full_repr_wom = model.encode(
126 |             # np.concatenate([train_data, test_data]).reshape(1, -1, 1),
127 |             data,
128 |             casual=True,
129 |             sliding_length=1,
130 |             sliding_padding=200,
131 |             batch_size=256
132 |         ).squeeze()
133 |         all_train_repr_wom[k] = full_repr_wom[:train_data.shape[1]]
134 |         all_test_repr_wom[k] = full_repr_wom[train_data.shape[1]:]
135 | 
136 |     res_log = []
137 |     labels_log = []
138 |     timestamps_log = []
139 |     for k in all_train_data:
140 |         train_data = all_train_data[k]
141 |         train_labels = all_train_labels[k]
142 |         train_timestamps = all_train_timestamps[k]
143 | 
144 |         test_data = all_test_data[k]
145 |         test_labels = all_test_labels[k]
146 |         test_timestamps = all_test_timestamps[k]
147 | 
148 |         train_err = np.abs(all_train_repr_wom[k] - all_train_repr[k]).sum(axis=1)
149 |         test_err = np.abs(all_test_repr_wom[k] - all_test_repr[k]).sum(axis=1)
150 | 
151 |         ma = np_shift(bn.move_mean(np.concatenate([train_err, test_err]), 21), 1)
152 |         train_err_adj = (train_err - ma[:len(train_err)]) / ma[:len(train_err)]
153 |         test_err_adj = (test_err - ma[len(train_err):]) / ma[len(train_err):]
154 |         train_err_adj = train_err_adj[22:]
155 | 
156 |         thr = np.mean(train_err_adj) + 4 * np.std(train_err_adj)
157 |         test_res = (test_err_adj > thr) * 1
158 | 
159 |         for i in range(len(test_res)):
160 |             if i >= delay and test_res[i - delay:i].sum() >= 1:
161 |                 test_res[i] = 0
162 | 
163 |         res_log.append(test_res)
164 |         labels_log.append(test_labels)
165 |         timestamps_log.append(test_timestamps)
166 |     t = time.time() - t
167 | 
168 |     eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay)
169 |     eval_res['infer_time'] = t
170 |     return res_log, eval_res
171 | 
172 | 
173 | def eval_anomaly_detection_coldstart(model, all_train_data, all_train_labels, all_train_timestamps, all_test_data,
174 |                                      all_test_labels, all_test_timestamps, delay):
175 |     t = time.time()
176 | 
177 |     train_mask, test_mask = all_train_data['mask'], all_test_data['mask']
178 |     all_train_data, all_test_data = all_train_data['x'], all_test_data['x']
179 |     ts = [[np.nanmin(all_train_timestamps[k]), np.nanmax(all_train_timestamps[k])] for k in all_train_timestamps]
180 |     ts_max, ts_min = np.max(np.array(ts)), np.min(np.array(ts))
181 | 
182 |     all_data = {}
183 |     all_repr = {}
184 |     all_repr_wom = {}
185 |     for i, k in enumerate(all_train_data):
186 |         train_data = all_train_data[k]
187 |         test_data = all_test_data[k]
188 | 
189 |         train_ts = (np.array(all_train_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
190 |         test_ts = (np.array(all_test_timestamps[k]).astype(np.float64) - ts_min) / (ts_max - ts_min)
191 |         train_data = np.concatenate([train_data.reshape(1, -1, 1), train_ts.reshape(1, -1, 1)], axis=-1)
192 |         test_data = np.concatenate([test_data.reshape(1, -1, 1), test_ts.reshape(1, -1, 1)], axis=-1)
193 |         data = {'x': np.concatenate([train_data, test_data], axis=1),
194 |                 'mask': np.concatenate(
195 |                     [train_mask[i:i + 1][:, :train_data.shape[1]], test_mask[i:i + 1][:, :test_data.shape[1]]], axis=1)}
196 | 
197 |         all_data[k] = np.concatenate([all_train_data[k], all_test_data[k]])
198 |         all_repr[k] = model.encode(
199 |             # all_data[k].reshape(1, -1, 1),
200 |             data,
201 |             mask='mask_last',
202 |             casual=True,
203 |             sliding_length=1,
204 |             sliding_padding=200,
205 |             batch_size=256
206 |         ).squeeze()
207 |         all_repr_wom[k] = model.encode(
208 |             # all_data[k].reshape(1, -1, 1),
209 |             data,
210 |             casual=True,
211 |             sliding_length=1,
212 |             sliding_padding=200,
213 |             batch_size=256
214 |         ).squeeze()
215 | 
216 |     res_log = []
217 |     labels_log = []
218 |     timestamps_log = []
219 |     for k in all_data:
220 |         data = all_data[k]
221 |         labels = np.concatenate([all_train_labels[k], all_test_labels[k]])
222 |         timestamps = np.concatenate([all_train_timestamps[k], all_test_timestamps[k]])
223 | 
224 |         err = np.abs(all_repr_wom[k] - all_repr[k]).sum(axis=1)
225 |         ma = np_shift(bn.move_mean(err, 21), 1)
226 |         err_adj = (err - ma) / ma
227 | 
228 |         MIN_WINDOW = len(data) // 10
229 |         thr = bn.move_mean(err_adj, len(err_adj), MIN_WINDOW) + 4 * bn.move_std(err_adj, len(err_adj), MIN_WINDOW)
230 |         res = (err_adj > thr) * 1
231 | 
232 |         for i in range(len(res)):
233 |             if i >= delay and res[i - delay:i].sum() >= 1:
234 |                 res[i] = 0
235 | 
236 |         res_log.append(res[MIN_WINDOW:])
237 |         labels_log.append(labels[MIN_WINDOW:])
238 |         timestamps_log.append(timestamps[MIN_WINDOW:])
239 |     t = time.time() - t
240 | 
241 |     eval_res = eval_ad_result(res_log, labels_log, timestamps_log, delay)
242 |     eval_res['infer_time'] = t
243 |     return res_log, eval_res
244 | 
245 | 


--------------------------------------------------------------------------------
/src/tasks/classification.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from . import _eval_protocols as eval_protocols
 3 | from sklearn.preprocessing import label_binarize
 4 | from sklearn.metrics import average_precision_score, roc_auc_score
 5 | 
 6 | 
 7 | def eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='linear'):
 8 |     assert train_labels.ndim == 1 or train_labels.ndim == 2
 9 |     train_repr = model.encode(train_data, encoding_window='full_series' if train_labels.ndim == 1 else None)
10 |     test_repr = model.encode(test_data, encoding_window='full_series' if train_labels.ndim == 1 else None)
11 | 
12 |     if eval_protocol == 'linear':
13 |         fit_clf = eval_protocols.fit_lr
14 |     elif eval_protocol == 'svm':
15 |         fit_clf = eval_protocols.fit_svm
16 |     elif eval_protocol == 'knn':
17 |         fit_clf = eval_protocols.fit_knn
18 |     else:
19 |         assert False, 'unknown evaluation protocol'
20 | 
21 |     def merge_dim01(array):
22 |         return array.reshape(array.shape[0]*array.shape[1], *array.shape[2:])
23 | 
24 |     if train_labels.ndim == 2:
25 |         train_repr = merge_dim01(train_repr)
26 |         train_labels = merge_dim01(train_labels)
27 |         test_repr = merge_dim01(test_repr)
28 |         test_labels = merge_dim01(test_labels)
29 | 
30 |     clf = fit_clf(train_repr, train_labels)
31 | 
32 |     acc = clf.score(test_repr, test_labels)
33 |     if eval_protocol == 'linear':
34 |         y_score = clf.predict_proba(test_repr)
35 |     else:
36 |         y_score = clf.decision_function(test_repr)
37 |     test_labels_onehot = label_binarize(test_labels, classes=np.arange(train_labels.max()+1))
38 |     auprc = average_precision_score(test_labels_onehot, y_score)
39 | 
40 |     return y_score, { 'acc': acc, 'auprc': auprc}
41 | 


--------------------------------------------------------------------------------
/src/tasks/forecasting.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | from . import _eval_protocols as eval_protocols
 4 | 
 5 | def generate_pred_samples(features, data, pred_len, drop=0):
 6 |     n = data.shape[1]
 7 |     features = features[:, :-pred_len]
 8 |     labels = np.stack([ data[:, i:1+n+i-pred_len] for i in range(pred_len)], axis=2)[:, 1:]
 9 |     features = features[:, drop:]
10 |     labels = labels[:, drop:]
11 |     return features.reshape(-1, features.shape[-1]), \
12 |             labels.reshape(-1, labels.shape[2]*labels.shape[3])
13 | 
14 | def cal_metrics(pred, target):
15 |     return {
16 |         'MSE': ((pred - target) ** 2).mean(),
17 |         'MAE': np.abs(pred - target).mean()
18 |     }
19 | 
20 | 
21 | def eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols):
22 |     padding = 200
23 | 
24 |     t = time.time()
25 |     all_repr = model.encode(
26 |         data,
27 |         casual=True,
28 |         sliding_length=1,
29 |         sliding_padding=padding,
30 |         batch_size=256
31 |     )
32 |     ts2vec_infer_time = time.time() - t
33 | 
34 |     train_repr = all_repr[:, train_slice]
35 |     valid_repr = all_repr[:, valid_slice]
36 |     test_repr = all_repr[:, test_slice]
37 | 
38 |     train_data = data['x'][:, train_slice, n_covariate_cols:][..., :-1]
39 |     valid_data = data['x'][:, valid_slice, n_covariate_cols:][..., :-1]
40 |     test_data = data['x'][:, test_slice, n_covariate_cols:][..., :-1]
41 | 
42 |     ours_result = {}
43 |     lr_train_time = {}
44 |     lr_infer_time = {}
45 |     out_log = {}
46 |     for pred_len in pred_lens:
47 |         train_features, train_labels = generate_pred_samples(train_repr, train_data, pred_len, drop=padding)
48 |         valid_features, valid_labels = generate_pred_samples(valid_repr, valid_data, pred_len)
49 |         test_features, test_labels = generate_pred_samples(test_repr, test_data, pred_len)
50 | 
51 |         t = time.time()
52 |         lr = eval_protocols.fit_ridge(train_features, train_labels, valid_features, valid_labels)
53 |         lr_train_time[pred_len] = time.time() - t
54 | 
55 |         t = time.time()
56 |         test_pred = lr.predict(test_features)
57 |         lr_infer_time[pred_len] = time.time() - t
58 | 
59 |         ori_shape = test_data.shape[0], -1, pred_len, test_data.shape[2]
60 |         test_pred = test_pred.reshape(ori_shape)
61 |         test_labels = test_labels.reshape(ori_shape)
62 | 
63 |         # if test_data.shape[0] > 1:
64 |         #     test_pred_inv = scaler.inverse_transform(test_pred.swapaxes(0, 3)).swapaxes(0, 3)
65 |         #     test_labels_inv = scaler.inverse_transform(test_labels.swapaxes(0, 3)).swapaxes(0, 3)
66 |         # else:
67 |         #     test_pred_inv = scaler.inverse_transform(test_pred)
68 |         #     test_labels_inv = scaler.inverse_transform(test_labels)
69 | 
70 |         out_log[pred_len] = {
71 |             'norm': test_pred,
72 |             # 'raw': test_pred_inv,
73 |             'norm_gt': test_labels,
74 |             # 'raw_gt': test_labels_inv
75 |         }
76 |         ours_result[pred_len] = {
77 |             'norm': cal_metrics(test_pred, test_labels),
78 |             # 'raw': cal_metrics(test_pred_inv, test_labels_inv)
79 |         }
80 | 
81 |     # train_repr, valid_repr, test_repr = train_repr.cpu(), valid_repr.cpu(), test_repr.cpu()
82 |     # del train_repr, valid_repr, test_repr
83 | 
84 |     eval_res = {
85 |         'ours': ours_result,
86 |         'ts2vec_infer_time': ts2vec_infer_time,
87 |         'lr_train_time': lr_train_time,
88 |         'lr_infer_time': lr_infer_time
89 |     }
90 |     return out_log, eval_res
91 | 


--------------------------------------------------------------------------------
/src/tasks/imputation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.utils.data import TensorDataset, DataLoader
 4 | 
 5 | 
 6 | def metrics(true, pred, mask):
 7 |     mask = 1. - mask
 8 |     mse = np.power((true - pred) * mask, 2).sum() / mask.sum()
 9 |     mae = np.abs((true - pred) * mask).sum() / mask.sum()
10 |     rmse = np.sqrt(mse)
11 |     return {'mse': mse, 'mae': mae, 'rmse': rmse}
12 | 
13 | 
14 | def split(data, test_slice, seq_len):
15 |     x, m = data['x'][:, test_slice], data['mask'][:, test_slice]
16 |     value, mask = np.zeros((x.shape[1] // seq_len, seq_len, x.shape[2])), np.zeros((x.shape[1] // seq_len, seq_len, m.shape[2]))
17 |     for i in range(x.shape[1] // seq_len):
18 |         if (i+1) * seq_len > x.shape[1]:
19 |             break
20 |         value[i] = x[0, i*seq_len:(i+1)*seq_len, :]
21 |         mask[i] = m[0, i*seq_len:(i+1)*seq_len, :]
22 |     return torch.from_numpy(value), torch.from_numpy(mask)
23 | 
24 | 
25 | def eval_imputation(model, data, test_slice, missing_rate, n_covariate_cols, device):
26 |     value, mask = split(data, test_slice, 96)
27 |     test_loader = DataLoader(TensorDataset(value, mask), batch_size=128, shuffle=False, num_workers=8)
28 |     with torch.no_grad():
29 |         true, pred, m = [], [], []
30 |         for batch in test_loader:
31 |             x = batch[0].float().to(device)
32 | 
33 |             mask = torch.randn_like(x[..., :-1])
34 |             mask[mask > missing_rate] = 1.
35 |             mask[mask <= missing_rate] = 0.
36 | 
37 |             # val = torch.cat([x[..., :-1].masked_fill(mask == 0., 0.), x[..., -1:]], dim=-1)
38 |             # out = model._net.imputation(x[..., :-1].masked_fill(mask == 0., 0.), mask)
39 |             out = model.net(x[..., :-1].masked_fill(mask == 0., 0.), mask, imputation=True)
40 | 
41 |             true.append(x[..., :-1].cpu().detach().numpy())
42 |             pred.append(out.cpu().detach().numpy())
43 |             # true.append(out[0].cpu().detach().numpy())
44 |             # pred.append(out[1].cpu().detach().numpy())
45 |             m.append(mask.cpu().detach().numpy())
46 | 
47 |         true = np.concatenate(true, axis=0)[..., n_covariate_cols:]
48 |         pred = np.concatenate(pred, axis=0)[..., n_covariate_cols:]
49 |         # pred = np.concatenate(pred, axis=0)
50 |         m = np.concatenate(m, axis=0)
51 |     return None, metrics(true, pred, m)
52 | 


--------------------------------------------------------------------------------
/src/timesurl.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.utils.data import TensorDataset, DataLoader
  5 | import numpy as np
  6 | from models import TSEncoder
  7 | from models.losses import hierarchical_contrastive_loss
  8 | from utils import take_per_row, split_with_nan, centerize_vary_length_series, torch_pad_nan
  9 | from utils import inter_cubic_sp_torch
 10 | from utils import convert_coeff
 11 | from lib import get_unlabeled_pretrain_data
 12 | 
 13 | 
 14 | def tp_noneffect(func, x, **kwargs):
 15 |     tp = x[..., -1:]
 16 |     x = func(x[..., :-1], **kwargs)
 17 |     return torch.cat([x, tp], dim=-1)
 18 | 
 19 | 
 20 | def freq_mix(x, rate=0.5, dim=1):
 21 |     x_f = torch.fft.fft(x, dim=dim)
 22 | 
 23 |     m = torch.cuda.FloatTensor(x_f.shape).uniform_() < rate
 24 |     amp = abs(x_f)
 25 |     _, index = amp.sort(dim=dim, descending=True)
 26 |     dominant_mask = index > 2
 27 |     m = torch.bitwise_and(m, dominant_mask)
 28 |     freal = x_f.real.masked_fill(m, 0)
 29 |     fimag = x_f.imag.masked_fill(m, 0)
 30 | 
 31 |     b_idx = np.arange(x.shape[0])
 32 |     np.random.shuffle(b_idx)
 33 |     x2 = x[b_idx]
 34 |     x2_f = torch.fft.fft(x2, dim=dim)
 35 | 
 36 |     m = torch.bitwise_not(m)
 37 |     freal2 = x2_f.real.masked_fill(m, 0)
 38 |     fimag2 = x2_f.imag.masked_fill(m, 0)
 39 | 
 40 |     freal += freal2
 41 |     fimag += fimag2
 42 | 
 43 |     x_f = torch.complex(freal, fimag)
 44 | 
 45 |     x = torch.abs(torch.fft.ifft(x_f, dim=dim))
 46 |     return x
 47 | 
 48 | 
 49 | def freq_dropout(x, dropout_rate=0.5):
 50 |     x_aug = x.clone()
 51 |     x_aug_f = torch.fft.fft(x_aug)
 52 |     m = torch.cuda.FloatTensor(x_aug_f.shape).uniform_() < dropout_rate
 53 |     amp = torch.abs(x_aug_f)
 54 |     _, index = amp.sort(dim=1, descending=True)
 55 |     dominant_mask = index > 5
 56 |     m = torch.bitwise_and(m, dominant_mask)
 57 |     freal = x_aug_f.real.masked_fill(m, 0)
 58 |     fimag = x_aug_f.imag.masked_fill(m, 0)
 59 |     x_aug_f = torch.complex(freal, fimag)
 60 |     x_aug = torch.abs(torch.fft.ifft(x_aug_f, dim=1))
 61 |     return x_aug
 62 | 
 63 | 
 64 | class TimesURL:
 65 |     '''The TimesURL model'''
 66 | 
 67 |     def __init__(
 68 |             self,
 69 |             input_dims,
 70 |             output_dims=320,
 71 |             hidden_dims=64,
 72 |             depth=10,
 73 |             device='cuda',
 74 |             lr=0.001,
 75 |             batch_size=16,
 76 |             sgd=False,
 77 |             max_train_length=None,
 78 |             temporal_unit=0,
 79 |             after_iter_callback=None,
 80 |             after_epoch_callback=None,
 81 |             args=None
 82 |     ):
 83 |         ''' Initialize a TimesURL model.
 84 |         
 85 |         Args:
 86 |             input_dims (int): The input dimension. For a univariate time series, this should be set to 1.
 87 |             output_dims (int): The representation dimension.
 88 |             hidden_dims (int): The hidden dimension of the encoder.
 89 |             depth (int): The number of hidden residual blocks in the encoder.
 90 |             device (int): The gpu used for training and inference.
 91 |             lr (int): The learning rate.
 92 |             batch_size (int): The batch size.
 93 |             max_train_length (Union[int, NoneType]): The maximum allowed sequence length for training. For sequence with a length greater than <max_train_length>, it would be cropped into some sequences, each of which has a length less than <max_train_length>.
 94 |             temporal_unit (int): The minimum unit to perform temporal contrast. When training on a very long sequence, this param helps to reduce the cost of time and memory.
 95 |             after_iter_callback (Union[Callable, NoneType]): A callback function that would be called after each iteration.
 96 |             after_epoch_callback (Union[Callable, NoneType]): A callback function that would be called after each epoch.
 97 |         '''
 98 | 
 99 |         super().__init__()
100 |         self.device = device
101 |         self.lr = lr
102 |         self.sgd = sgd
103 |         self.batch_size = batch_size
104 |         self.max_train_length = max_train_length
105 |         self.temporal_unit = temporal_unit
106 | 
107 |         self._net = TSEncoder(input_dims=input_dims, output_dims=output_dims, hidden_dims=hidden_dims, depth=depth).to(self.device)
108 |         self.net = torch.optim.swa_utils.AveragedModel(self._net)
109 |         self.net.update_parameters(self._net)
110 | 
111 |         self.after_iter_callback = after_iter_callback
112 |         self.after_epoch_callback = after_epoch_callback
113 |         self.args = args
114 | 
115 |         self.n_epochs = 0
116 |         self.n_iters = 0
117 | 
118 |     def fit(self, train_data, n_epochs=None, n_iters=None, verbose=False, is_scheduler=True, temp=1.0):
119 |         ''' Training the TimesURL model.
120 |         
121 |         Args:
122 |             train_data (numpy.ndarray): The training data. It should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
123 |             n_epochs (Union[int, NoneType]): The number of epochs. When this reaches, the training stops.
124 |             n_iters (Union[int, NoneType]): The number of iterations. When this reaches, the training stops. If both n_epochs and n_iters are not specified, a default setting would be used that sets n_iters to 200 for a dataset with size <= 100000, 600 otherwise.
125 |             verbose (bool): Whether to print the training loss after each epoch.
126 |             
127 |         Returns:
128 |             loss_log: a list containing the training losses on each epoch.
129 |         '''
130 |         train_data, mask = train_data['x'], train_data['mask']
131 | 
132 |         assert train_data.ndim == 3
133 | 
134 |         if n_iters is None and n_epochs is None:
135 |             n_iters = 200 if train_data.size <= 100000 else 600  # default param for n_iters
136 | 
137 |         if self.lr <= 1e-5 and n_iters is not None:
138 |             n_iters *= 1.2
139 | 
140 |         if self.max_train_length is not None:
141 |             sections = train_data.shape[1] // self.max_train_length
142 |             if sections >= 2:
143 |                 train_data = np.concatenate(split_with_nan(train_data, sections, axis=1), axis=0)
144 |                 mask = np.concatenate(split_with_nan(mask, sections, axis=1), axis=0)
145 | 
146 |         temporal_missing = np.isnan(train_data).all(axis=-1).any(axis=0)
147 |         if temporal_missing[0] or temporal_missing[-1]:
148 |             train_data, mask = centerize_vary_length_series(train_data, mask)
149 | 
150 |         mask = mask[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)]
151 |         train_data = train_data[~np.isnan(train_data[..., :-1]).all(axis=2).all(axis=1)]
152 |         mask[np.isnan(mask)] = 0
153 |         x, t = train_data[..., :-1], train_data[..., -1:]
154 |         obj = get_unlabeled_pretrain_data(np.concatenate([x, mask, t], axis=-1), self.args)
155 |         train_loader = obj['train_dataloader']
156 | 
157 |         if self.sgd:
158 |             optimizer = torch.optim.SGD(self._net.parameters(), lr=self.lr, weight_decay=5e-4, momentum=0.9)
159 |         else:
160 |             optimizer = torch.optim.AdamW(self._net.parameters(), lr=self.lr, weight_decay=5e-4)
161 |         if is_scheduler:
162 |             if n_iters is not None and n_epochs is None:
163 |                 max_epochs = n_iters // len(train_loader)
164 |             else:
165 |                 max_epochs = n_epochs
166 |             scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, max_epochs)
167 | 
168 |         loss_log = []
169 | 
170 |         while True:
171 |             if n_epochs is not None and self.n_epochs >= n_epochs:
172 |                 break
173 | 
174 |             cum_loss = 0
175 |             n_epoch_iters = 0
176 | 
177 |             interrupted = False
178 |             for batch in train_loader:
179 |                 if n_iters is not None and self.n_iters >= n_iters:
180 |                     interrupted = True
181 |                     break
182 | 
183 |                 value = batch['value'].to(self.device)
184 |                 time = batch['time'].to(self.device)
185 |                 mask = batch['mask'].to(self.device)
186 |                 mask_origin = batch['mask_origin'].to(self.device)
187 | 
188 |                 optimizer.zero_grad()
189 | 
190 |                 loss = torch.tensor([0.]).to(self.device)
191 |                 for seq in range(value.size(1)):
192 |                     x, t, m, m_old = value[:, seq], time[:, seq], mask[:, seq], mask_origin[:, seq]
193 |                     dim = x.size(-1)
194 |                     x = torch.cat([x, t.unsqueeze(2)], dim=-1)
195 | 
196 |                     ts_l = x.size(1)
197 |                     crop_l = np.random.randint(low=2 ** (self.temporal_unit + 1), high=ts_l + 1)
198 |                     crop_left = np.random.randint(ts_l - crop_l + 1)
199 |                     crop_right = crop_left + crop_l
200 |                     crop_eleft = np.random.randint(crop_left + 1)
201 |                     crop_eright = np.random.randint(low=crop_right, high=ts_l + 1)
202 |                     crop_offset = np.random.randint(low=-crop_eleft, high=ts_l - crop_eright + 1, size=x.size(0))
203 | 
204 |                     x_left = take_per_row(x, crop_offset + crop_eleft, crop_right - crop_eleft)
205 |                     x_right = tp_noneffect(freq_mix, take_per_row(x, crop_offset + crop_left, crop_eright - crop_left), rate=0.5)
206 | 
207 |                     mask1 = take_per_row(m[..., :dim], crop_offset + crop_eleft, crop_right - crop_eleft)
208 |                     mask2 = take_per_row(m[..., :dim], crop_offset + crop_left, crop_eright - crop_left)
209 | 
210 |                     mask1_inter = take_per_row(m[..., dim:], crop_offset + crop_eleft, crop_right - crop_eleft)
211 |                     mask2_inter = take_per_row(m[..., dim:], crop_offset + crop_left, crop_eright - crop_left)
212 | 
213 |                     mask1_origin = take_per_row(m_old, crop_offset + crop_eleft, crop_right - crop_eleft)
214 |                     mask2_origin = take_per_row(m_old, crop_offset + crop_left, crop_eright - crop_left)
215 | 
216 |                     out1, left_recon = self._net({'data': x_left, 'mask': mask1, 'mask_inter': mask1_inter, 'mask_origin': mask1_origin})
217 |                     out2, right_recon = self._net({'data': x_right, 'mask': mask2, 'mask_inter': mask2_inter, 'mask_origin': mask2_origin})
218 | 
219 |                     out1, left_recon = out1[:, -crop_l:], left_recon[:, -crop_l:]
220 |                     out2, right_recon = out2[:, :crop_l], right_recon[:, :crop_l]
221 | 
222 |                     x_left, x_right = x_left[:, -crop_l:], x_right[:, :crop_l]
223 | 
224 |                     mask1, mask2 = mask1[:, -crop_l:], mask2[:, :crop_l]
225 |                     mask1_inter, mask2_inter = mask1_inter[:, -crop_l:], mask2_inter[:, :crop_l]
226 | 
227 |                     loss += self.args.lmd * hierarchical_contrastive_loss(
228 |                         out1,
229 |                         out2,
230 |                         temporal_unit=self.temporal_unit,
231 |                         temp=temp
232 |                     )
233 | 
234 |                     if torch.sum(mask1_inter) > 0:
235 |                         loss += 1 * torch.sum(torch.pow((x_left[..., :-1] - left_recon) * mask1_inter, 2)) / (
236 |                                 torch.sum(mask1_inter) + 1e-10) / 2
237 |                     if torch.sum(mask2_inter) > 0:
238 |                         loss += 1 * torch.sum(torch.pow((x_right[..., :-1] - right_recon) * mask2_inter, 2)) / (
239 |                                 torch.sum(mask2_inter) + 1e-10) / 2
240 | 
241 |                 loss.requires_grad_(True)
242 |                 loss.backward()
243 |                 optimizer.step()
244 |                 self.net.update_parameters(self._net)
245 | 
246 |                 cum_loss += loss.item()
247 |                 n_epoch_iters += 1
248 | 
249 |                 self.n_iters += 1
250 | 
251 |                 if self.after_iter_callback is not None:
252 |                     self.after_iter_callback(self, loss.item())
253 | 
254 |             cum_loss /= n_epoch_iters if n_epoch_iters else 1
255 |             loss_log.append(cum_loss)
256 |             if verbose:
257 |                 print(f"Epoch #{self.n_epochs}: loss={cum_loss}")
258 |             self.n_epochs += 1
259 |             if is_scheduler:
260 |                 scheduler.step()
261 | 
262 |             if self.after_epoch_callback is not None:
263 |                 self.after_epoch_callback(self, cum_loss)
264 | 
265 |             if interrupted:
266 |                 break
267 |         # end
268 | 
269 |         return loss_log
270 | 
271 |     def _eval_with_pooling(self, x, mask=None, slicing=None, encoding_window=None):
272 |         out = self.net(x.to(self.device, non_blocking=True), mask)
273 |         if encoding_window == 'full_series':
274 |             if slicing is not None:
275 |                 out = out[:, slicing]
276 |             out = F.max_pool1d(
277 |                 out.transpose(1, 2),
278 |                 kernel_size=out.size(1),
279 |             ).transpose(1, 2)
280 | 
281 |         elif isinstance(encoding_window, int):
282 |             out = F.max_pool1d(
283 |                 out.transpose(1, 2),
284 |                 kernel_size=encoding_window,
285 |                 stride=1,
286 |                 padding=encoding_window // 2
287 |             ).transpose(1, 2)
288 |             if encoding_window % 2 == 0:
289 |                 out = out[:, :-1]
290 |             if slicing is not None:
291 |                 out = out[:, slicing]
292 | 
293 |         elif encoding_window == 'multiscale':
294 |             p = 0
295 |             reprs = []
296 |             while (1 << p) + 1 < out.size(1):
297 |                 t_out = F.max_pool1d(
298 |                     out.transpose(1, 2),
299 |                     kernel_size=(1 << (p + 1)) + 1,
300 |                     stride=1,
301 |                     padding=1 << p
302 |                 ).transpose(1, 2)
303 |                 if slicing is not None:
304 |                     t_out = t_out[:, slicing]
305 |                 reprs.append(t_out)
306 |                 p += 1
307 |             out = torch.cat(reprs, dim=-1)
308 | 
309 |         else:
310 |             if slicing is not None:
311 |                 out = out[:, slicing]
312 | 
313 |         return out.cpu()
314 | 
315 |     def encode(self, data, mask=None, encoding_window=None, casual=False, sliding_length=None, sliding_padding=0,
316 |                batch_size=None):
317 |         ''' Compute representations using the model.
318 | 
319 |         Args:
320 |             data (numpy.ndarray): This should have a shape of (n_instance, n_timestamps, n_features). All missing data should be set to NaN.
321 |             mask (str): The mask used by encoder can be specified with this parameter. This can be set to 'binomial', 'continuous', 'all_true', 'all_false' or 'mask_last'.
322 |             encoding_window (Union[str, int]): When this param is specified, the computed representation would the max pooling over this window. This can be set to 'full_series', 'multiscale' or an integer specifying the pooling kernel size.
323 |             casual (bool): When this param is set to True, the future informations would not be encoded into representation of each timestamp.
324 |             sliding_length (Union[int, NoneType]): The length of sliding window. When this param is specified, a sliding inference would be applied on the time series.
325 |             sliding_padding (int): This param specifies the contextual data length used for inference every sliding windows.
326 |             batch_size (Union[int, NoneType]): The batch size used for inference. If not specified, this would be the same batch size as training.
327 | 
328 |         Returns:
329 |             repr: The representations for data.
330 |         '''
331 |         assert self.net is not None, 'please train or load a net first'
332 |         assert isinstance(data, dict) or data.ndim == 3
333 |         if batch_size is None:
334 |             batch_size = self.batch_size
335 |         n_samples, ts_l, _ = data.shape if not isinstance(data, dict) else data['x'].shape
336 | 
337 |         org_training = self.net.training
338 |         self.net.eval()
339 | 
340 |         if isinstance(data, dict):
341 |             data = np.concatenate((data['x'], data['mask']), axis=-1)
342 |         dataset = TensorDataset(torch.from_numpy(data).to(torch.float))
343 |         loader = DataLoader(dataset, batch_size=batch_size)
344 | 
345 |         with torch.no_grad():
346 |             output = []
347 |             for batch in loader:
348 |                 x = batch[0]
349 |                 if sliding_length is not None:
350 |                     reprs = []
351 |                     if n_samples < batch_size:
352 |                         calc_buffer = []
353 |                         calc_buffer_l = 0
354 |                     for i in range(0, ts_l, sliding_length):
355 |                         l = i - sliding_padding
356 |                         r = i + sliding_length + (sliding_padding if not casual else 0)
357 |                         x_sliding = torch_pad_nan(
358 |                             x[:, max(l, 0): min(r, ts_l)],
359 |                             left=-l if l < 0 else 0,
360 |                             right=r - ts_l if r > ts_l else 0,
361 |                             dim=1
362 |                         )
363 |                         if n_samples < batch_size:
364 |                             if calc_buffer_l + n_samples > batch_size:
365 |                                 out = self._eval_with_pooling(
366 |                                     torch.cat(calc_buffer, dim=0),
367 |                                     mask,
368 |                                     slicing=slice(sliding_padding, sliding_padding + sliding_length),
369 |                                     encoding_window=encoding_window
370 |                                 )
371 |                                 reprs += torch.split(out, n_samples)
372 |                                 calc_buffer = []
373 |                                 calc_buffer_l = 0
374 |                             calc_buffer.append(x_sliding)
375 |                             calc_buffer_l += n_samples
376 |                         else:
377 |                             out = self._eval_with_pooling(
378 |                                 x_sliding,
379 |                                 mask,
380 |                                 slicing=slice(sliding_padding, sliding_padding + sliding_length),
381 |                                 encoding_window=encoding_window
382 |                             )
383 |                             reprs.append(out)
384 | 
385 |                     if n_samples < batch_size:
386 |                         if calc_buffer_l > 0:
387 |                             out = self._eval_with_pooling(
388 |                                 torch.cat(calc_buffer, dim=0),
389 |                                 mask,
390 |                                 slicing=slice(sliding_padding, sliding_padding + sliding_length),
391 |                                 encoding_window=encoding_window
392 |                             )
393 |                             reprs += torch.split(out, n_samples)
394 |                             calc_buffer = []
395 |                             calc_buffer_l = 0
396 | 
397 |                     out = torch.cat(reprs, dim=1)
398 |                     if encoding_window == 'full_series':
399 |                         out = F.max_pool1d(
400 |                             out.transpose(1, 2).contiguous(),
401 |                             kernel_size=out.size(1),
402 |                         ).squeeze(1)
403 |                 else:
404 |                     out = self._eval_with_pooling(x, mask, encoding_window=encoding_window)
405 |                     if encoding_window == 'full_series':
406 |                         out = out.squeeze(1)
407 | 
408 |                 output.append(out)
409 | 
410 |             output = torch.cat(output, dim=0)
411 | 
412 |         self.net.train(org_training)
413 |         return output.numpy()
414 | 
415 |     def save(self, fn):
416 |         ''' Save the model to a file.
417 |         
418 |         Args:
419 |             fn (str): filename.
420 |         '''
421 |         torch.save(self.net.state_dict(), fn)
422 | 
423 |     def load(self, fn):
424 |         ''' Load the model from a file.
425 |         
426 |         Args:
427 |             fn (str): filename.
428 |         '''
429 |         state_dict = torch.load(fn, map_location=self.device)
430 |         self.net.load_state_dict(state_dict)
431 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import torch
  4 | import numpy as np
  5 | import argparse
  6 | import os
  7 | import sys
  8 | import time
  9 | import datetime
 10 | from timesurl import TimesURL
 11 | import tasks
 12 | import datautils
 13 | from utils import init_dl_program, name_with_datetime, pkl_save, data_dropout
 14 | 
 15 | def save_checkpoint_callback(
 16 |     save_every=1,
 17 |     unit='epoch'
 18 | ):
 19 |     assert unit in ('epoch', 'iter')
 20 |     def callback(model, loss):
 21 |         n = model.n_epochs if unit == 'epoch' else model.n_iters
 22 |         if n % save_every == 0:
 23 |             model.save(f'{run_dir}/model_{n}.pkl')
 24 |     return callback
 25 | 
 26 | if __name__ == '__main__':
 27 |     parser = argparse.ArgumentParser()
 28 |     parser.add_argument('dataset', help='The dataset name')
 29 |     parser.add_argument('run_name', help='The folder name used to save model, output and evaluation metrics. This can be set to any word')
 30 |     parser.add_argument('--loader', type=str, required=True, help='The data loader used to load the experimental data. This can be set to UCR, UEA, forecast_csv, forecast_csv_univar, anomaly, or anomaly_coldstart')
 31 |     parser.add_argument('--gpu', type=int, default=0, help='The gpu no. used for training and inference (defaults to 0)')
 32 |     parser.add_argument('--batch-size', type=int, default=8, help='The batch size (defaults to 8)')
 33 |     parser.add_argument('--lr', type=float, default=0.0001, help='The learning rate (defaults to 0.001)')
 34 |     parser.add_argument('--repr-dims', type=int, default=320, help='The representation dimension (defaults to 320)')
 35 |     parser.add_argument('--max-train-length', type=int, default=3000, help='For sequence with a length greater than <max_train_length>, it would be cropped into some sequences, each of which has a length less than <max_train_length> (defaults to 3000)')
 36 |     parser.add_argument('--iters', type=int, default=None, help='The number of iterations')
 37 |     parser.add_argument('--epochs', type=int, default=None, help='The number of epochs')
 38 |     parser.add_argument('--save-every', type=int, default=None, help='Save the checkpoint every <save_every> iterations/epochs')
 39 |     parser.add_argument('--seed', type=int, default=None, help='The random seed')
 40 |     parser.add_argument('--max-threads', type=int, default=None, help='The maximum allowed number of threads used by this process')
 41 |     parser.add_argument('--eval', action="store_true", help='Whether to perform evaluation after training')
 42 |     parser.add_argument('--sgd', action="store_true", help='Whether to perform evaluation after training')
 43 |     parser.add_argument('--load_tp', action="store_true", help='Whether to perform evaluation after training')
 44 |     parser.add_argument('--temp', type=float, default=1.0,)
 45 |     parser.add_argument('--lmd', type=float, default=0.01, )
 46 |     parser.add_argument('--irregular', type=float, default=0, help='The ratio of missing observations (defaults to 0)')
 47 |     parser.add_argument('--segment_num', type=int, default=3,
 48 |                         help='number of time interval segment to mask, default: 3 time intervals')
 49 |     parser.add_argument('--mask_ratio_per_seg', type=float, default=0.05,
 50 |                         help='fraction of the sequence length to mask for each time interval, deafult: 0.05 * seq_len to be masked for each of the time interval')
 51 |     args = parser.parse_args()
 52 |     
 53 |     print("Dataset:", args.dataset)
 54 |     print("Arguments:", str(args))
 55 |     
 56 |     device = init_dl_program(args.gpu, seed=args.seed, max_threads=args.max_threads, deterministic=False)
 57 | 
 58 |     args.load_tp = True
 59 |     
 60 |     print('Loading data... ', end='')
 61 |     if args.loader == 'UCR':
 62 |         task_type = 'classification'
 63 |         train_data, train_labels, test_data, test_labels = datautils.load_UCR(args.dataset, load_tp = args.load_tp)
 64 | 
 65 |     elif args.loader == 'Others':
 66 |         task_type = 'classification'
 67 |         train_data, train_labels, test_data, test_labels = datautils.load_others(args.dataset, load_tp = args.load_tp)
 68 | 
 69 |     elif args.loader == 'UEA':
 70 |         task_type = 'classification'
 71 |         train_data, train_labels, test_data, test_labels = datautils.load_UEA(args.dataset, load_tp = args.load_tp)
 72 |         
 73 |     elif args.loader == 'forecast_csv':
 74 |         task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation'
 75 |         offset = 0 if task_type == 'forecasting' else 96
 76 |         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, load_tp=args.load_tp)
 77 |         train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]}
 78 | 
 79 |     elif args.loader == 'forecast_csv_univar':
 80 |         task_type = 'forecasting' if 'forecast' in args.run_name else 'imputation'
 81 |         offset = 0 if task_type == 'forecasting' else 96
 82 |         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_csv(args.dataset, offset=offset, univar=True, load_tp=args.load_tp)
 83 |         train_data = {'x': data['x'][:, train_slice], 'mask': data['mask'][:, train_slice]}
 84 |         
 85 |     elif args.loader == 'forecast_npy':
 86 |         task_type = 'forecasting'
 87 |         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset)
 88 |         train_data = data[:, train_slice]
 89 |         
 90 |     elif args.loader == 'forecast_npy_univar':
 91 |         task_type = 'forecasting'
 92 |         data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols = datautils.load_forecast_npy(args.dataset, univar=True)
 93 |         train_data = data[:, train_slice]
 94 |         
 95 |     elif args.loader == 'anomaly':
 96 |         task_type = 'anomaly_detection'
 97 |         # all_train_data, all_train_labels, all_train_timestamps, all_test_data, all_test_labels, all_test_timestamps, delay = datautils.load_anomaly(args.dataset)
 98 |         train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay = datautils.load_anomaly(args.dataset, load_tp=args.load_tp)
 99 |         train_data = datautils.gen_ano_train_data(train_data_task['x'])
100 |         train_data = {
101 |             'x': np.concatenate([train_data, datautils.gen_ano_train_data(train_timestamps, train_data.shape[1], True)], axis=-1),
102 |             'mask': train_data_task['mask']}
103 |         
104 |     else:
105 |         raise ValueError(f"Unknown loader {args.loader}.")
106 | 
107 |     args.task_type = task_type
108 |     if args.irregular > 0:
109 |         if task_type == 'classification':
110 |             train_data = data_dropout(train_data, args.irregular)
111 |             test_data = data_dropout(test_data, args.irregular)
112 |         else:
113 |             raise ValueError(f"Task type {task_type} is not supported when irregular>0.")
114 |     print('done')
115 |     print(train_data['x'].shape)
116 | 
117 |     config = dict(
118 |         batch_size=args.batch_size,
119 |         lr=args.lr,
120 |         sgd=args.sgd,
121 |         output_dims=args.repr_dims,
122 |         max_train_length=args.max_train_length,
123 |         args=args
124 |     )
125 |     
126 |     if args.save_every is not None:
127 |         unit = 'epoch' if args.epochs is not None else 'iter'
128 |         config[f'after_{unit}_callback'] = save_checkpoint_callback(args.save_every, unit)
129 | 
130 |     run_dir = 'training/' + args.dataset + '__' + name_with_datetime(args.run_name)
131 |     os.makedirs(run_dir, exist_ok=True)
132 |     
133 |     t = time.time()
134 |     
135 |     model = TimesURL(
136 |         input_dims=train_data['x'].shape[-1] - (1 if args.load_tp else 0),
137 |         device=device,
138 |         **config
139 |     )
140 |     loss_log = model.fit(
141 |         train_data,
142 |         n_epochs=args.epochs,
143 |         n_iters=args.iters,
144 |         verbose=True,
145 |         is_scheduler=True if args.sgd else False,
146 |         temp=args.temp
147 |     )
148 |     model.save(f'{run_dir}/model.pkl')
149 | 
150 |     t = time.time() - t
151 |     print(f"\nTraining time: {datetime.timedelta(seconds=t)}\n")
152 | 
153 |     if args.eval:
154 |         if task_type == 'classification':
155 |             out, eval_res = tasks.eval_classification(model, train_data, train_labels, test_data, test_labels, eval_protocol='svm')
156 |         elif task_type == 'forecasting':
157 |             out, eval_res = tasks.eval_forecasting(model, data, train_slice, valid_slice, test_slice, scaler, pred_lens, n_covariate_cols)
158 |         elif task_type == 'anomaly_detection':
159 |             out, eval_res = tasks.eval_anomaly_detection(model, train_data_task, train_labels, train_timestamps, test_data, test_labels, test_timestamps, delay)
160 |         elif task_type == 'imputation':
161 |             out, eval_res = tasks.eval_imputation(model, data, test_slice, args.missing_rate, n_covariate_cols, device)
162 |         else:
163 |             assert False
164 | 
165 |         pkl_save(f'{run_dir}/out.pkl', out)
166 |         pkl_save(f'{run_dir}/eval_res.pkl', eval_res)
167 |         print('Evaluation result:', eval_res)
168 | 
169 |     print("Finished.")
170 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pickle
  4 | import torch
  5 | import random
  6 | from datetime import datetime
  7 | from scipy.interpolate import CubicSpline
  8 | 
  9 | def pkl_save(name, var):
 10 |     with open(name, 'wb') as f:
 11 |         pickle.dump(var, f)
 12 | 
 13 | def pkl_load(name):
 14 |     with open(name, 'rb') as f:
 15 |         return pickle.load(f)
 16 |     
 17 | def torch_pad_nan(arr, left=0, right=0, dim=0):
 18 |     if left > 0:
 19 |         padshape = list(arr.shape)
 20 |         padshape[dim] = left
 21 |         arr = torch.cat((torch.full(padshape, np.nan), arr), dim=dim)
 22 |     if right > 0:
 23 |         padshape = list(arr.shape)
 24 |         padshape[dim] = right
 25 |         arr = torch.cat((arr, torch.full(padshape, np.nan)), dim=dim)
 26 |     return arr
 27 |     
 28 | def pad_nan_to_target(array, target_length, axis=0, both_side=False):
 29 |     assert array.dtype in [np.float16, np.float32, np.float64]
 30 |     pad_size = target_length - array.shape[axis]
 31 |     if pad_size <= 0:
 32 |         return array
 33 |     npad = [(0, 0)] * array.ndim
 34 |     if both_side:
 35 |         npad[axis] = (pad_size // 2, pad_size - pad_size//2)
 36 |     else:
 37 |         npad[axis] = (0, pad_size)
 38 |     return np.pad(array, pad_width=npad, mode='constant', constant_values=np.nan)
 39 | 
 40 | def split_with_nan(x, sections, axis=0):
 41 |     assert x.dtype in [np.float16, np.float32, np.float64]
 42 |     arrs = np.array_split(x, sections, axis=axis)
 43 |     target_length = arrs[0].shape[axis]
 44 |     for i in range(len(arrs)):
 45 |         arrs[i] = pad_nan_to_target(arrs[i], target_length, axis=axis)
 46 |     return arrs
 47 | 
 48 | def take_per_row(A, indx, num_elem):
 49 |     all_indx = indx[:,None] + np.arange(num_elem)
 50 |     return A[torch.arange(all_indx.shape[0])[:,None], all_indx]
 51 | 
 52 | def centerize_vary_length_series(x, mask):
 53 |     prefix_zeros = np.argmax(~np.isnan(x).all(axis=-1), axis=1)
 54 |     suffix_zeros = np.argmax(~np.isnan(x[:, ::-1]).all(axis=-1), axis=1)
 55 |     offset = (prefix_zeros + suffix_zeros) // 2 - prefix_zeros
 56 |     rows, column_indices = np.ogrid[:x.shape[0], :x.shape[1]]
 57 |     offset[offset < 0] += x.shape[1]
 58 |     column_indices = column_indices - offset[:, np.newaxis]
 59 |     return x[rows, column_indices], mask[rows, column_indices]
 60 | 
 61 | def data_dropout(arr, p):
 62 |     B, T = arr.shape[0], arr.shape[1]
 63 |     mask = np.full(B*T, False, dtype=np.bool)
 64 |     ele_sel = np.random.choice(
 65 |         B*T,
 66 |         size=int(B*T*p),
 67 |         replace=False
 68 |     )
 69 |     mask[ele_sel] = True
 70 |     res = arr.copy()
 71 |     res[mask.reshape(B, T)] = np.nan
 72 |     return res
 73 | 
 74 | def name_with_datetime(prefix='default'):
 75 |     now = datetime.now()
 76 |     return prefix + '_' + now.strftime("%Y%m%d_%H%M%S")
 77 | 
 78 | def init_dl_program(
 79 |     device_name,
 80 |     seed=None,
 81 |     use_cudnn=True,
 82 |     deterministic=False,
 83 |     benchmark=False,
 84 |     use_tf32=False,
 85 |     max_threads=None
 86 | ):
 87 |     import torch
 88 |     if max_threads is not None:
 89 |         torch.set_num_threads(max_threads)  # intraop
 90 |         if torch.get_num_interop_threads() != max_threads:
 91 |             torch.set_num_interop_threads(max_threads)  # interop
 92 |         try:
 93 |             import mkl
 94 |         except:
 95 |             pass
 96 |         else:
 97 |             mkl.set_num_threads(max_threads)
 98 |         
 99 |     if seed is not None:
100 |         random.seed(seed)
101 |         seed += 1
102 |         np.random.seed(seed)
103 |         seed += 1
104 |         torch.manual_seed(seed)
105 |         
106 |     if isinstance(device_name, (str, int)):
107 |         device_name = [device_name]
108 |     
109 |     devices = []
110 |     for t in reversed(device_name):
111 |         t_device = torch.device(t)
112 |         devices.append(t_device)
113 |         if t_device.type == 'cuda':
114 |             assert torch.cuda.is_available()
115 |             torch.cuda.set_device(t_device)
116 |             if seed is not None:
117 |                 seed += 1
118 |                 torch.cuda.manual_seed(seed)
119 |     devices.reverse()
120 |     torch.backends.cudnn.enabled = use_cudnn
121 |     torch.backends.cudnn.deterministic = deterministic
122 |     torch.backends.cudnn.benchmark = benchmark
123 |     
124 |     if hasattr(torch.backends.cudnn, 'allow_tf32'):
125 |         torch.backends.cudnn.allow_tf32 = use_tf32
126 |         torch.backends.cuda.matmul.allow_tf32 = use_tf32
127 |         
128 |     return devices if len(devices) > 1 else devices[0]
129 | 
130 | 
131 | def convert_coeff(x, eps=1e-6):
132 |     amp = torch.sqrt((x.real + eps).pow(2) + (x.imag + eps).pow(2))
133 |     phase = torch.atan2(x.imag, x.real + eps)
134 |     return amp, phase
135 | 
136 | 
137 | def hierarchical_x(x, mask):
138 |     hi_x, B, C = [{'x': x, 'mask': mask}], x.size(0), x.size(2)
139 |     while x.size(1) > 1:
140 |         if x.size(1) % 2 != 0:
141 |             x = torch.cat((x, -np.inf * torch.ones(B, 1, C, device = x.device)), dim = 1)
142 |         # obtain max index
143 |         _, t_index = torch.max(x.permute(0, 2, 1).reshape(B, C, -1, 2).permute(0, 3, 2, 1), dim = 1)
144 | 
145 |         # fixed max index
146 |         t_index = (t_index.transpose(1, 2) + torch.arange(0, x.size(1), 2, device = x.device)).transpose(1, 2).reshape(-1)
147 |         # create B, C index
148 |         b_index = torch.arange(B, device = x.device).reshape(-1, 1).repeat(1, x.size(1) // 2 * C).reshape(-1)
149 |         c_index = torch.arange(C, device = x.device).repeat(B * x.size(1) // 2)
150 | 
151 |         # achieve max representations
152 |         x, mask = x[(b_index, t_index, c_index)].reshape(B, -1, C), mask[(b_index, t_index, c_index)].reshape(B, -1, C)
153 |         hi_x.append({'x': x, 'mask': mask})
154 |     return hi_x
155 | 
156 | 
157 | def generate_mask(data, p = 0.5, remain = 0):
158 |     B, T, C = data.shape
159 |     mask = np.empty_like(data)
160 | 
161 |     for b in range(B):
162 |         ts = data[b, :, 0]
163 |         et_num = ts[~np.isnan(ts)].size - remain
164 |         total, num = et_num * C, round(et_num * C * p)
165 | 
166 |         while True:
167 |             i_mask = np.zeros(total)
168 |             i_mask[random.sample(range(total), num)] = 1
169 |             i_mask = i_mask.reshape(et_num, C)
170 |             if 1 not in i_mask.sum(axis = 0) and 0 not in i_mask.sum(axis = 0):
171 |                 break
172 |             break
173 | 
174 |         i_mask = np.concatenate((i_mask, np.ones((remain, C))), axis = 0)
175 |         mask[b, ~np.isnan(ts), :] = i_mask
176 |         mask[b, np.isnan(ts), :] = np.nan
177 | 
178 |     # mask = np.concatenate([random.sample(range(total), num) for _ in range(B)])
179 |     # matrix = np.zeros((B, total))
180 |     # matrix[(np.arange(B).repeat(num), mask)] = 1.0
181 |     # matrix = matrix.reshape(B, T, C)
182 |     # return matrix
183 |     return mask
184 | 
185 | 
186 | def interpolate_cubic_spline(data, mask, p = 1):
187 |     # normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) | (np.isnan(data)))[0]
188 |     normal, missing = np.where((mask == 1) & (~np.isnan(data)))[0], np.where((mask == 0) & (~np.isnan(data)))[0]
189 |     cs = CubicSpline(normal, data[normal])
190 |     num = int(missing.size * p)
191 |     missing = missing[np.argsort(np.random.random(missing.size))[:num]]
192 |     data[missing] = cs(missing)
193 |     return data
194 | 
195 | 
196 | def inter_cubic_sp_torch(data, mask, p = 1):
197 |     device = data.device
198 |     return torch.from_numpy(interpolate_cubic_spline(data.cpu().detach().numpy(), mask.cpu().detach().numpy(), p)).to(device)
199 | 
200 | 
201 | def generate_uni(data, mask, alpha):
202 |     n = data.size(1)
203 |     neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, n, 1) - data) / (n - 1)
204 |     return (1 - alpha) * neg + alpha * data
205 | 
206 | 
207 | def generate_uni_p(data, mask, alpha):
208 |     p = mask.mean(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1)
209 |     data = p * data
210 |     neg = (data.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - data) / \
211 |           (p.sum(dim = 1).unsqueeze(1).repeat(1, mask.size(1), 1) - p)
212 |     return (1 - alpha) * neg + alpha * data
213 | 
214 | 
215 | def normalize_with_mask(train, mask_tr, test, mask_te, scaler):
216 |     train[mask_tr == 0], test[mask_te == 0] = np.nan, np.nan
217 |     scaler = scaler.fit(train.reshape(-1, train.shape[-1]))
218 |     train = scaler.transform(train.reshape(-1, train.shape[-1])).reshape(train.shape)
219 |     test = scaler.transform(test.reshape(-1, test.shape[-1])).reshape(test.shape)
220 |     train[mask_tr == 0], test[mask_te == 0] = 0, 0
221 |     return train, test
222 | 
223 | 
224 | if __name__ == '__main__':
225 |     B, T, C = 3, 10, 3
226 |     x = torch.randn((B, T, C))
227 |     dict_x = hierarchical_x(x, x)
228 |     print('ok')


--------------------------------------------------------------------------------