├── .gitignore
├── README.md
├── benchmark
    ├── AutoEncoder_benchmark.py
    ├── LODA_benchmark.py
    ├── PCA_benchmark.py
    ├── RANS_benchmark.py
    ├── anomaly_transformer_benchmark.py
    ├── benchmark_config
    │   ├── dataset_config
    │   │   ├── ASD.yaml
    │   │   ├── MSL.yaml
    │   │   ├── SMAP.yaml
    │   │   ├── SMD.yaml
    │   │   ├── SWAT.yaml
    │   │   └── WADI.yaml
    │   ├── eval_config.yaml
    │   └── model_config
    │   │   ├── anomaly_transformer.yaml
    │   │   ├── autoencoder.yaml
    │   │   ├── dagmm.yaml
    │   │   ├── ganf.yaml
    │   │   ├── iforest.yaml
    │   │   ├── interfusion.yaml
    │   │   ├── loda.yaml
    │   │   ├── lstm.yaml
    │   │   ├── mscred.yaml
    │   │   ├── mtad_gat.yaml
    │   │   ├── omnianomaly.yaml
    │   │   ├── pca.yaml
    │   │   ├── rans.yaml
    │   │   ├── tranad.yaml
    │   │   └── usad.yaml
    ├── dagmm_benchmark.py
    ├── ganf_benchmark.py
    ├── iforest_benchmark.py
    ├── interfusion_benchmark.py
    ├── lstm_benchmark.py
    ├── mscred_benchmark.py
    ├── mtad_gat_benchmark.py
    ├── omnianomaly_benchmark.py
    ├── tranad_benchmark.py
    └── usad_benchmark.py
├── common
    ├── autotuner.py
    ├── config.py
    ├── data_preprocess.py
    ├── dataloader.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── eval_pipline.py
    │   ├── metrics.py
    │   ├── point_adjustment.py
    │   ├── spot.py
    │   ├── thresholding.py
    │   └── time_tracker.py
    ├── evaluation_.py
    ├── exp.py
    └── utils.py
├── doc
    └── mtad_metrics.png
├── networks
    ├── InterFusion
    │   ├── __init__.py
    │   ├── algorithm
    │   │   ├── InterFusion.py
    │   │   ├── InterFusion_swat.py
    │   │   ├── __init__.py
    │   │   ├── conv1d_.py
    │   │   ├── mcmc_recons.py
    │   │   ├── real_nvp.py
    │   │   ├── recurrent_distribution.py
    │   │   └── utils.py
    │   ├── predict.py
    │   ├── train.py
    │   └── wrapper.py
    ├── RANS
    │   ├── __init__.py
    │   ├── main.py
    │   └── models.py
    ├── __init__.py
    ├── anomaly_transformer
    │   ├── __init__.py
    │   ├── model
    │   │   ├── AnomalyTransformer.py
    │   │   ├── __init__.py
    │   │   ├── attn.py
    │   │   └── embed.py
    │   └── solver.py
    ├── dagmm
    │   ├── __init__.py
    │   ├── compression_net.py
    │   ├── dagmm.py
    │   ├── estimation_net.py
    │   ├── gmm.py
    │   └── main.py
    ├── ganf
    │   ├── DROCC.py
    │   ├── DeepSAD.py
    │   ├── GAN.py
    │   ├── GANF.py
    │   ├── NF.py
    │   ├── RNN.py
    │   ├── dataset.py
    │   ├── fit.py
    │   ├── graph_layer.py
    │   ├── predict.py
    │   └── utils.py
    ├── lstm
    │   ├── __init__.py
    │   ├── lstm.py
    │   └── wrappers.py
    ├── mscred
    │   ├── __init__.py
    │   ├── dlutils.py
    │   └── models.py
    ├── mtad_gat
    │   ├── __init__.py
    │   ├── modules.py
    │   ├── mtad_gat.py
    │   ├── plotting.py
    │   ├── predict.py
    │   ├── prediction.py
    │   ├── train.py
    │   ├── training.py
    │   └── utils.py
    ├── omni_anomaly
    │   ├── __init__.py
    │   ├── detector.py
    │   ├── model.py
    │   ├── prediction.py
    │   ├── recurrent_distribution.py
    │   ├── requirements.txt
    │   ├── training.py
    │   ├── utils.py
    │   ├── vae.py
    │   └── wrapper.py
    ├── tranad
    │   ├── __init__.py
    │   ├── dlutils.py
    │   └── models.py
    └── usad
    │   ├── __init__.py
    │   ├── gdrivedl.py
    │   └── usad.py
└── requirements
    ├── RANSyncoders.txt
    ├── anomaly_transformer.txt
    ├── interfusion.txt
    └── omnianomaly.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # MTSBenchmark
  2 | **/checkpoints/
  3 | **/dev
  4 | **/data/
  5 | **/.vscode
  6 | 
  7 | .DS_Store
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | *.pkl
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | build/
 20 | develop-eggs/
 21 | dist/
 22 | downloads/
 23 | eggs/
 24 | .eggs/
 25 | lib/
 26 | lib64/
 27 | parts/
 28 | sdist/
 29 | var/
 30 | wheels/
 31 | share/python-wheels/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | MANIFEST
 36 | 
 37 | # PyInstaller
 38 | #  Usually these files are written by a python script from a template
 39 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 40 | *.manifest
 41 | *.spec
 42 | 
 43 | # Installer logs
 44 | pip-log.txt
 45 | pip-delete-this-directory.txt
 46 | 
 47 | # Unit test / coverage reports
 48 | htmlcov/
 49 | .tox/
 50 | .nox/
 51 | .coverage
 52 | .coverage.*
 53 | .cache
 54 | nosetests.xml
 55 | coverage.xml
 56 | *.cover
 57 | *.py,cover
 58 | .hypothesis/
 59 | .pytest_cache/
 60 | cover/
 61 | 
 62 | # Translations
 63 | *.mo
 64 | *.pot
 65 | 
 66 | # Django stuff:
 67 | *.log
 68 | local_settings.py
 69 | db.sqlite3
 70 | db.sqlite3-journal
 71 | 
 72 | # Flask stuff:
 73 | instance/
 74 | .webassets-cache
 75 | 
 76 | # Scrapy stuff:
 77 | .scrapy
 78 | 
 79 | # Sphinx documentation
 80 | docs/_build/
 81 | 
 82 | # PyBuilder
 83 | .pybuilder/
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | #   For a library or package, you might want to ignore these files since the code is
 95 | #   intended to run in multiple environments; otherwise, check them in:
 96 | # .python-version
 97 | 
 98 | # pipenv
 99 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
100 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
101 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
102 | #   install all needed dependencies.
103 | #Pipfile.lock
104 | 
105 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
106 | __pypackages__/
107 | 
108 | # Celery stuff
109 | celerybeat-schedule
110 | celerybeat.pid
111 | 
112 | # SageMath parsed files
113 | *.sage.py
114 | 
115 | # Environments
116 | .env
117 | .venv
118 | env/
119 | venv/
120 | ENV/
121 | env.bak/
122 | venv.bak/
123 | 
124 | # Spyder project settings
125 | .spyderproject
126 | .spyproject
127 | 
128 | # Rope project settings
129 | .ropeproject
130 | 
131 | # mkdocs documentation
132 | /site
133 | 
134 | # mypy
135 | .mypy_cache/
136 | .dmypy.json
137 | dmypy.json
138 | 
139 | # Pyre type checker
140 | .pyre/
141 | 
142 | # pytype static type analyzer
143 | .pytype/
144 | 
145 | # Cython debug symbols
146 | cython_debug/
147 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## MTAD: Tools and Benchmark for Multivariate Time Series Anomaly Detection
 2 | 
 3 | This repository is a **M**ultivariate **T**ime Series **A**nomaly **D**etection toolkit named ***MTAD*** with a comprehensive benchmarking protocol and contains state-of-the-art methods with a unified and easy-to-use interface. We include 15 methods in our repo, which are evaluated on 4 public datasets. 
 4 | 
 5 | ### Citation
 6 | 👋 If you use our tools or benchmarking results in your publication, please cite the following paper.
 7 | 
 8 | >  Jinyang Liu, Wenwei Gu, Zhuangbin Chen, Yichen Li, Yuxin Su, Michael R. Lyu. [MTAD: Tools and Benchmarks for Multivariate Time Series Anomaly Detection](https://arxiv.org/pdf/2401.06175.pdf).
 9 | 
10 | 
11 | ### Our evaluation protocol
12 | 
13 | Our evluation protocal can be summarized as the following figure, where we consider different threshold selection strategies and prediction adjustment. We also include the evaluation of delay in our protocol.
14 | 
15 | ![Alt text](doc/mtad_metrics.png)
16 | 
17 | **Threshold Selection:**
18 | 
19 | - EVT-based method
20 | - Seaching for the optimal one
21 | 
22 | **Metrics:**
23 | 
24 | - Precision, Recall, F1-score: *how accruate a model is?*
25 |   - with **or** without point adjustment
26 | - Delay: *how timely can a model report an anomaly?*
27 | - Efficiency: *how fast can a model be trained and perform anomaly detection?*
28 | 
29 | 
30 | 
31 | ### Requirement
32 | 
33 | > cd ./requirements
34 | >
35 | > pip install -r \<requirement file\>
36 | 
37 | ### Run our benchmark
38 | 
39 | The following is an example to run benchmark for LSTM, whose configuration files are stored in the `./benchmark_config/`folder.
40 | 
41 | ```
42 | cd benchmark
43 | python lstm_benchmark.py
44 | ```
45 | 
46 | ### Models integrated in this tool
47 | 
48 | **General Machine Learning-based Models**
49 | 
50 | | Model   | Paper reference                                              |
51 | | :------ | :----------------------------------------------------------- |
52 | | PCA     | **[2003]** Shyu M L, Chen S C, Sarinnapakorn K, et al. A novel anomaly detection scheme based on principal component classifier |
53 | | iForest | **[ICDM'2008]** Fei Tony Liu, Kai Ming Ting, Zhi-Hua Zhou: Isolation Forest |
54 | | LODA    | **[Machine Learning'2016]** Tomás Pevný. Loda**:** Lightweight online detector of anomalies |
55 | 
56 | **Deep Learning-based Models**
57 | 
58 | | Model       | Paper reference                                              |
59 | | :---------- | :----------------------------------------------------------- |
60 | | AE          | **[AAAI'2019]** Andrea Borghesi, Andrea Bartolini, Michele Lombardi, Michela Milano, Luca Benini. Anomaly Detection Using Autoencoders in High Performance Computing Systems |
61 | | LSTM        | **[KDD'2018]** Kyle Hundman, Valentino Constantinou, Christopher Laporte, Ian Colwell, Tom Söderström. Detecting Spacecraft Anomalies Using LSTMs and Nonparametric Dynamic Thresholding |
62 | | LSTM-VAE    | **[Arxiv'2017]** A Multimodal Anomaly Detector for Robot-Assisted Feeding Using an LSTM-based Variational Autoencoder |
63 | | DAGMM       | **[ICLR'2018]** Bo Zong, Qi Song, Martin Renqiang Min, Wei Cheng, Cristian Lumezanu, Dae-ki Cho, Haifeng Chen. Deep Autoencoding Gaussian Mixture Model for Unsupervised Anomaly Detection |
64 | | MSCRED      | **[AAAI'19]** Chuxu Zhang, Dongjin Song, Yuncong Chen, Xinyang Feng, Cristian Lumezanu, Wei Cheng, Jingchao Ni, Bo Zong, Haifeng Chen, Nitesh V. Chawla. A Deep Neural Network for Unsupervised Anomaly Detection and Diagnosis in Multivariate Time Series Data. |
65 | | OmniAnomaly | **[KDD'2019]** Ya Su, Youjian Zhao, Chenhao Niu, Rong Liu, Wei Sun, Dan Pei. Robust Anomaly Detection for Multivariate Time Series through Stochastic Recurrent Neural Network |
66 | | MTAD-GAT | **[ICDM'2020]** Multivariate Time-series Anomaly Detection via Graph Attention Networks |
67 | | USAD | **[KDD'2020]** USAD: UnSupervised Anomaly Detection on Multivariate Time Series. |
68 | | InterFusion | **[KDD'2021]** Zhihan Li, Youjian Zhao, Jiaqi Han, Ya Su, Rui Jiao, Xidao Wen, Dan Pei. Multivariate Time Series Anomaly Detection and Interpretation using Hierarchical Inter-Metric and Temporal Embedding |
69 | | TranAD | **[VLDB'2021]** TranAD: Deep Transformer Networks for Anomaly Detection in Multivariate Time Series Data |
70 | | RANSynCoders | **[KDD'2021]** Practical Approach to Asynchronous Multivariate Time Series Anomaly Detection and Localization |
71 | | AnomalyTransformer | **[ICLR'2022]** Anomaly Transformer: Time Series Anomaly Detection with Association Discrepancy |
72 | | GANF | **[ICLR'2022]** Graph-Augmented Normalizing Flows for Anomaly Detection of Multiple Time Series |
73 | 
74 | 
75 | 
76 | ### Datasets 
77 | 
78 | The following datasets are kindly released by different institutions or schools. Raw datasets could be downloaded or applied from the link right behind the dataset names. The processed datasets can be found [here](https://drive.google.com/drive/folders/1NEGyB4y8CvUB8TX2Wh83Eas_QHtufGPR?usp=sharing)⬇️ (SMD, SMAP, and MSL).
79 | 
80 | - Server Machine Datase (**SMD**) [Download raw datasets⬇️](https://github.com/NetManAIOps/OmniAnomaly.git)
81 | 
82 |   > Collected from a large Internet company containing a 5-week-long monitoring KPIs of 28 machines. The meaning for each KPI could be found [here](https://github.com/NetManAIOps/OmniAnomaly/issues/22).
83 | 
84 | - Soil Moisture Active Passive satellite (**SMAP**) and Mars Science Laboratory rovel (**MSL**) [Download raw datasets⬇️](https://github.com/khundman/telemanom)
85 | 
86 |   > They are collected from the running spacecraft and contain a set of telemetry anomalies corresponding to actual spacecraft issues involving various subsystems and channel types.
87 | 
88 | - Secure Water Treatment (**WADI**) [Apply here\*](https://itrust.sutd.edu.sg/itrust-labs_datasets/dataset_info/)
89 | 
90 |   >WADI is collected from a real-world industrial water treatment plant, which contains 11-day-long multivariate KPIs. Particularly, the system is in a normal state in the first seven days and is under attack in the following four days.
91 | 
92 | - Water Distribution (**SWAT**) [Apply here\*](https://itrust.sutd.edu.sg/itrust-labs_datasets/dataset_info/)
93 | 
94 |   > An extended dataset of SWAT. 14-day-long operation KPIs are collected when the system is running normally and 2-day-long KPIs are obtained when the system is in attack scenarios.
95 | 
96 | \* WADI and SWAT datasets were released by iTrust, which should be individually applied. One can request the raw datasets and preprocess them with our preprocessing scripts.
97 | 
98 | 


--------------------------------------------------------------------------------
/benchmark/AutoEncoder_benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from pyod.models.auto_encoder import AutoEncoder
 3 | 
 4 | sys.path.append("../")
 5 | 
 6 | import logging
 7 | import argparse
 8 | from common import data_preprocess
 9 | from common.dataloader import load_dataset
10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
11 | from common.evaluation import Evaluator, TimeTracker
12 | from common.exp import store_entity
13 | 
14 | seed_everything()
15 | if __name__ == "__main__":
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument(
18 |         "--config",
19 |         type=str,
20 |         default="./benchmark_config/",
21 |         help="The config directory.",
22 |     )
23 |     parser.add_argument("--expid", type=str, default="autoencoder_SMD")
24 |     parser.add_argument("--gpu", type=int, default=-1)
25 |     args = vars(parser.parse_args())
26 | 
27 |     config_dir = args["config"]
28 |     experiment_id = args["expid"]
29 | 
30 |     params = load_config(config_dir, experiment_id)
31 |     set_logger(params, args)
32 |     logging.info(print_to_json(params))
33 | 
34 |     data_dict = load_dataset(
35 |         data_root=params["data_root"],
36 |         entities=params["entities"],
37 |         dim=params["dim"],
38 |         valid_ratio=params["valid_ratio"],
39 |         test_label_postfix=params["test_label_postfix"],
40 |         test_postfix=params["test_postfix"],
41 |         train_postfix=params["train_postfix"],
42 |         nrows=params["nrows"],
43 |     )
44 | 
45 |     # preprocessing
46 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
47 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
48 | 
49 |     # train/test on each entity put here
50 |     evaluator = Evaluator(**params["eval"])
51 |     for entity in params["entities"]:
52 |         logging.info("Fitting dataset: {}".format(entity))
53 | 
54 |         train = data_dict[entity]["train"]
55 |         test = data_dict[entity]["test"]
56 | 
57 |         model = AutoEncoder(
58 |             hidden_neurons=params["hidden_neurons"],
59 |             batch_size=params["batch_size"],
60 |             epochs=params["nb_epoch"],
61 |             l2_regularizer=params["l2_regularizer"],
62 |             verbose=1,
63 |         )
64 | 
65 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
66 | 
67 |         tt.train_start()
68 |         model.fit(train)
69 |         tt.train_end()
70 | 
71 |         train_anomaly_score = model.decision_function(train)
72 | 
73 |         tt.test_start()
74 |         anomaly_score = model.decision_function(test)
75 |         tt.test_end()
76 | 
77 |         anomaly_label = data_dict[entity]["test_label"]
78 | 
79 |         store_entity(
80 |             params,
81 |             entity,
82 |             train_anomaly_score,
83 |             anomaly_score,
84 |             anomaly_label,
85 |             time_tracker=tt.get_data(),
86 |         )
87 |     evaluator.eval_exp(
88 |         exp_folder=params["model_root"],
89 |         entities=params["entities"],
90 |         merge_folder=params["benchmark_dir"],
91 |         extra_params=params,
92 |     )
93 | 


--------------------------------------------------------------------------------
/benchmark/LODA_benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | import logging
 5 | from common import data_preprocess
 6 | from common.dataloader import load_dataset
 7 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 8 | from common.exp import store_entity
 9 | from common.evaluation import Evaluator, TimeTracker
10 | from pyod.models.loda import LODA
11 | 
12 | seed_everything()
13 | if __name__ == "__main__":
14 |     import argparse
15 | 
16 |     parser = argparse.ArgumentParser()
17 |     parser.add_argument(
18 |         "--config",
19 |         type=str,
20 |         default="./benchmark_config/",
21 |         help="The config directory.",
22 |     )
23 |     parser.add_argument("--expid", type=str, default="loda_SMD")
24 |     parser.add_argument("--gpu", type=int, default=-1)
25 |     args = vars(parser.parse_args())
26 | 
27 |     config_dir = args["config"]
28 |     experiment_id = args["expid"]
29 |     params = load_config(config_dir, experiment_id)
30 |     set_logger(params, args)
31 |     logging.info(print_to_json(params))
32 | 
33 |     data_dict = load_dataset(
34 |         data_root=params["data_root"],
35 |         entities=params["entities"],
36 |         dim=params["dim"],
37 |         valid_ratio=params["valid_ratio"],
38 |         test_label_postfix=params["test_label_postfix"],
39 |         test_postfix=params["test_postfix"],
40 |         train_postfix=params["train_postfix"],
41 |         nrows=params["nrows"],
42 |     )
43 | 
44 |     # preprocessing
45 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
46 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
47 | 
48 |     # train/test on each entity put here
49 |     evaluator = Evaluator(**params["eval"])
50 |     for entity in params["entities"]:
51 |         logging.info("Fitting dataset: {}".format(entity))
52 | 
53 |         train = data_dict[entity]["train"]
54 |         test = data_dict[entity]["test"]
55 |         test_label = data_dict[entity]["test_label"]
56 | 
57 |         model = LODA(n_bins=params["n_bins"], n_random_cuts=params["n_random_cuts"])
58 | 
59 |         tt = TimeTracker()
60 |         tt.train_start()
61 |         model.fit(train)
62 |         tt.train_end()
63 | 
64 |         train_anomaly_score = model.decision_function(train)
65 | 
66 |         tt.test_start()
67 |         anomaly_score = model.decision_function(test)
68 |         tt.test_end()
69 | 
70 |         anomaly_label = test_label
71 | 
72 |         store_entity(
73 |             params,
74 |             entity,
75 |             train_anomaly_score,
76 |             anomaly_score,
77 |             anomaly_label,
78 |             time_tracker=tt.get_data(),
79 |         )
80 |     evaluator.eval_exp(
81 |         exp_folder=params["model_root"],
82 |         entities=params["entities"],
83 |         merge_folder=params["benchmark_dir"],
84 |         extra_params=params,
85 |     )
86 | 


--------------------------------------------------------------------------------
/benchmark/PCA_benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import logging
 3 | from pyod.models.pca import PCA
 4 | 
 5 | sys.path.append("../")
 6 | 
 7 | from common import data_preprocess
 8 | from common.dataloader import load_dataset
 9 | from common.utils import seed_everything, load_config, set_logger, print_to_json
10 | from common.evaluation import Evaluator, TimeTracker
11 | from common.exp import store_entity
12 | 
13 | seed_everything()
14 | if __name__ == "__main__":
15 |     import argparse
16 | 
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument(
19 |         "--config",
20 |         type=str,
21 |         default="./benchmark_config/",
22 |         help="The config directory.",
23 |     )
24 |     parser.add_argument("--expid", type=str, default="pca_SMD")
25 |     parser.add_argument("--gpu", type=int, default=-1)
26 |     args = vars(parser.parse_args())
27 | 
28 |     config_dir = args["config"]
29 |     experiment_id = args["expid"]
30 | 
31 |     params = load_config(config_dir, experiment_id)
32 |     set_logger(params, args)
33 |     logging.info(print_to_json(params))
34 | 
35 |     data_dict = load_dataset(
36 |         data_root=params["data_root"],
37 |         entities=params["entities"],
38 |         dim=params["dim"],
39 |         valid_ratio=params["valid_ratio"],
40 |         test_label_postfix=params["test_label_postfix"],
41 |         test_postfix=params["test_postfix"],
42 |         train_postfix=params["train_postfix"],
43 |         nrows=params["nrows"],
44 |     )
45 | 
46 |     # preprocessing
47 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
48 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
49 | 
50 |     # train/test on each entity put here
51 |     evaluator = Evaluator(**params["eval"])
52 |     for entity in params["entities"]:
53 |         logging.info("Fitting dataset: {}".format(entity))
54 |         train = data_dict[entity]["train"]
55 |         test = data_dict[entity]["test"]
56 |         test_label = data_dict[entity]["test_label"]
57 | 
58 |         # data preprocessing for MSCRED
59 |         model = PCA()
60 | 
61 |         tt = TimeTracker()
62 |         tt.train_start()
63 |         model.fit(train)
64 |         tt.train_end()
65 | 
66 |         # get outlier scores
67 |         train_anomaly_score = model.decision_function(train)
68 |         tt.test_start()
69 |         anomaly_score = model.decision_function(test)
70 |         tt.test_end()
71 |         anomaly_label = test_label
72 | 
73 |         store_entity(
74 |             params,
75 |             entity,
76 |             train_anomaly_score,
77 |             anomaly_score,
78 |             anomaly_label,
79 |             time_tracker=tt.get_data(),
80 |         )
81 |     evaluator.eval_exp(
82 |         exp_folder=params["model_root"],
83 |         entities=params["entities"],
84 |         merge_folder=params["benchmark_dir"],
85 |         extra_params=params,
86 |     )
87 | 


--------------------------------------------------------------------------------
/benchmark/RANS_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
  4 | import sys
  5 | 
  6 | sys.path.append("../")
  7 | import logging
  8 | from common import data_preprocess
  9 | from common.dataloader import load_dataset
 10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 11 | from common.evaluation import Evaluator, TimeTracker
 12 | from common.exp import store_entity
 13 | from networks.RANS import RANSynCoders
 14 | 
 15 | 
 16 | seed_everything()
 17 | if __name__ == "__main__":
 18 |     import argparse
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument(
 22 |         "--config",
 23 |         type=str,
 24 |         default="./benchmark_config/",
 25 |         help="The config directory.",
 26 |     )
 27 |     parser.add_argument("--expid", type=str, default="rans_SMD")
 28 |     parser.add_argument("--gpu", type=int, default=-1)
 29 |     args = vars(parser.parse_args())
 30 | 
 31 |     config_dir = args["config"]
 32 |     experiment_id = args["expid"]
 33 | 
 34 |     params = load_config(config_dir, experiment_id)
 35 |     set_logger(params, args)
 36 |     logging.info(print_to_json(params))
 37 | 
 38 |     data_dict = load_dataset(
 39 |         data_root=params["data_root"],
 40 |         entities=params["entities"],
 41 |         valid_ratio=params["valid_ratio"],
 42 |         dim=params["dim"],
 43 |         test_label_postfix=params["test_label_postfix"],
 44 |         test_postfix=params["test_postfix"],
 45 |         train_postfix=params["train_postfix"],
 46 |         nrows=params["nrows"],
 47 |     )
 48 | 
 49 |     # preprocessing
 50 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 51 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 52 | 
 53 |     # train/test on each entity put here
 54 |     evaluator = Evaluator(**params["eval"])
 55 |     for entity in params["entities"]:
 56 |         logging.info("Fitting dataset: {}".format(entity))
 57 |         x_train = data_dict[entity]["train"]
 58 |         x_test = data_dict[entity]["test"]
 59 | 
 60 |         N = 5 * round((x_train.shape[1] / 3) / 5)
 61 |         z = int((N / 2) - 1)
 62 | 
 63 |         model = RANSynCoders(
 64 |             n_estimators=N,
 65 |             max_features=N,
 66 |             encoding_depth=params["encoder_layers"],
 67 |             latent_dim=z,
 68 |             decoding_depth=params["decoder_layers"],
 69 |             activation=params["activation"],
 70 |             output_activation=params["output_activation"],
 71 |             delta=params["delta"],
 72 |             synchronize=params["synchronize"],
 73 |             max_freqs=params["S"],
 74 |         )
 75 | 
 76 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 77 |         tt.train_start()
 78 | 
 79 |         model.fit(
 80 |             x_train,
 81 |             epochs=params["nb_epoch"],
 82 |             batch_size=params["batch_size"],
 83 |             freq_warmup=params["freq_warmup"],
 84 |             sin_warmup=params["sin_warmup"],
 85 |         )
 86 |         tt.train_end()
 87 | 
 88 |         train_anomaly_score = model.predict_prob(
 89 |             x_train, N, batch_size=10 * params["batch_size"]
 90 |         )
 91 | 
 92 |         tt.test_start()
 93 |         anomaly_score = model.predict_prob(
 94 |             x_test, N, batch_size=10 * params["batch_size"]
 95 |         )
 96 |         tt.test_end()
 97 | 
 98 |         anomaly_label = data_dict[entity]["test_label"]
 99 | 
100 |         store_entity(
101 |             params,
102 |             entity,
103 |             train_anomaly_score,
104 |             anomaly_score,
105 |             anomaly_label,
106 |             time_tracker=tt.get_data(),
107 |         )
108 |     evaluator.eval_exp(
109 |         exp_folder=params["model_root"],
110 |         entities=params["entities"],
111 |         merge_folder=params["benchmark_dir"],
112 |         extra_params=params,
113 |     )
114 | 


--------------------------------------------------------------------------------
/benchmark/anomaly_transformer_benchmark.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append("../")
  4 | import logging
  5 | import argparse
  6 | from networks.anomaly_transformer.solver import AnomalyTransformer
  7 | 
  8 | from common import data_preprocess
  9 | from common.dataloader import get_dataloaders, load_dataset
 10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 11 | from common.evaluation import Evaluator, TimeTracker
 12 | from common.exp import store_entity
 13 | 
 14 | seed_everything()
 15 | if __name__ == "__main__":
 16 |     parser = argparse.ArgumentParser()
 17 |     parser.add_argument(
 18 |         "--config",
 19 |         type=str,
 20 |         default="./benchmark_config/",
 21 |         help="The config directory.",
 22 |     )
 23 |     parser.add_argument("--expid", type=str, default="anomaly_transformer_SMD")
 24 |     parser.add_argument("--gpu", type=int, default=-1)
 25 |     args = vars(parser.parse_args())
 26 | 
 27 |     config_dir = args["config"]
 28 |     experiment_id = args["expid"]
 29 | 
 30 |     params = load_config(config_dir, experiment_id)
 31 |     set_logger(params, args)
 32 |     logging.info(print_to_json(params))
 33 | 
 34 |     data_dict = load_dataset(
 35 |         data_root=params["data_root"],
 36 |         entities=params["entities"],
 37 |         dim=params["dim"],
 38 |         valid_ratio=params["valid_ratio"],
 39 |         test_label_postfix=params["test_label_postfix"],
 40 |         test_postfix=params["test_postfix"],
 41 |         train_postfix=params["train_postfix"],
 42 |         nrows=params["nrows"],
 43 |     )
 44 | 
 45 |     # preprocessing
 46 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 47 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 48 | 
 49 |     # sliding windows
 50 |     window_dict = data_preprocess.generate_windows(
 51 |         data_dict,
 52 |         window_size=params["window_size"],
 53 |         stride=params["stride"],
 54 |     )
 55 | 
 56 |     # train/test on each entity put here
 57 |     evaluator = Evaluator(**params["eval"])
 58 |     for entity in params["entities"]:
 59 |         logging.info("Fitting dataset: {}".format(entity))
 60 |         windows = window_dict[entity]
 61 |         train_windows = windows["train_windows"]
 62 |         test_windows = windows["test_windows"]
 63 |         test_windows_label = windows["test_label"]
 64 | 
 65 |         train_loader, valid_loader, test_loader = get_dataloaders(
 66 |             train_windows,
 67 |             test_windows,
 68 |             batch_size=params["batch_size"],
 69 |             num_workers=params["num_workers"],
 70 |         )
 71 | 
 72 |         model = AnomalyTransformer(
 73 |             lr=params["lr"],
 74 |             num_epochs=params["nb_epoch"],
 75 |             k=params["k"],
 76 |             win_size=params["window_size"],
 77 |             input_c=params["dim"],
 78 |             output_c=params["dim"],
 79 |             batch_size=params["batch_size"],
 80 |             model_save_path=params["model_root"],
 81 |             device=params["device"],
 82 |         )
 83 | 
 84 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 85 | 
 86 |         tt.train_start()
 87 |         model.fit(train_loader, valid_loader)
 88 |         tt.train_end()
 89 | 
 90 |         tt.test_start()
 91 |         anomaly_score, anomaly_label = model.predict_prob(
 92 |             test_loader, test_windows_label
 93 |         )
 94 |         tt.test_end()
 95 | 
 96 |         train_anomaly_score, anomaly_label = model.predict_prob(
 97 |             train_loader, test_windows_label
 98 |         )
 99 | 
100 |         store_entity(
101 |             params,
102 |             entity,
103 |             train_anomaly_score,
104 |             anomaly_score,
105 |             anomaly_label,
106 |             time_tracker=tt.get_data(),
107 |         )
108 |     evaluator.eval_exp(
109 |         exp_folder=params["model_root"],
110 |         entities=params["entities"],
111 |         merge_folder=params["benchmark_dir"],
112 |         extra_params=params,
113 |     )
114 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/ASD.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "asd"
 3 |     data_root: ../data/ASD/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 19
10 |     nrows: null
11 |     entities:
12 |         - omi-1
13 |         - omi-2
14 |         - omi-3
15 |         - omi-4
16 |         - omi-5
17 |         - omi-6
18 |         - omi-7
19 |         - omi-8
20 |         - omi-9
21 |         - omi-10
22 |         - omi-11
23 |         - omi-12
24 | 
25 | ASD_x2_valid:
26 |     valid_ratio: 0
27 |     nrows: 2000
28 |     entities: ["omi-1", "omi-2"]
29 | 
30 | ASD_x2:
31 |     valid_ratio: 0
32 |     nrows: 2000
33 |     entities: ["omi-1", "omi-2"]
34 | 
35 | ASD_x2_full:
36 |     valid_ratio: 0
37 |     entities: ["omi-1", "omi-2"]
38 | 
39 | ASD:
40 |     valid_ratio: 0


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/MSL.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "msl"
 3 |     data_root: ../data/MSL/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 55
10 |     nrows: null
11 |     entities:
12 |         ["M-6",
13 |         "M-1",
14 |         "M-2",
15 |         "S-2",
16 |         "P-10",
17 |         "T-4",
18 |         "T-5",
19 |         "F-7",
20 |         "M-3",
21 |         "M-4",
22 |         "M-5",
23 |         "P-15",
24 |         "C-1",
25 |         "C-2",
26 |         "T-12",
27 |         "T-13",
28 |         "F-4",
29 |         "F-5",
30 |         "D-14",
31 |         "T-9",
32 |         "P-14",
33 |         "T-8",
34 |         "P-11",
35 |         "D-15",
36 |         "D-16",
37 |         "M-7",
38 |         "F-8"]
39 | 
40 | MSL_x2_valid:
41 |     valid_ratio: 0
42 |     nrows: 2000
43 |     entities: ["M-6", "M-1"]
44 | 
45 | MSL_x2:
46 |     valid_ratio: 0
47 |     entities: ["M-6", "M-1"]
48 | 
49 | MSL_usad_test:
50 |     valid_ratio: 0
51 |     entities: ["T-9"]
52 | 
53 | MSL:
54 |     valid_ratio: 0


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/SMAP.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "smap"
 3 |     data_root: ../data/SMAP/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 25
10 |     nrows: null
11 |     entities:
12 |         [
13 |         "P-1",
14 |         "S-1",
15 |         "E-1",
16 |         "E-2",
17 |         "E-3",
18 |         "E-4",
19 |         "E-5",
20 |         "E-6",
21 |         "E-7",
22 |         "E-8",
23 |         "E-9",
24 |         "E-10",
25 |         "E-11",
26 |         "E-12",
27 |         "E-13",
28 |         "A-1",
29 |         "D-1",
30 |         "P-2",
31 |         "P-3",
32 |         "D-2",
33 |         "D-3",
34 |         "D-4",
35 |         "A-2",
36 |         "A-3",
37 |         "A-4",
38 |         "G-1",
39 |         "G-2",
40 |         "D-5",
41 |         "D-6",
42 |         "D-7",
43 |         "F-1",
44 |         "P-4",
45 |         "G-3",
46 |         "T-1",
47 |         "T-2",
48 |         "D-8",
49 |         "D-9",
50 |         "F-2",
51 |         "G-4",
52 |         "T-3",
53 |         "D-11",
54 |         # "D-12",
55 |         "B-1",
56 |         "G-6",
57 |         "G-7",
58 |         "P-7",
59 |         "R-1",
60 |         "A-5",
61 |         "A-6",
62 |         "A-7",
63 |         "D-13",
64 |         "P-2",
65 |         "A-8",
66 |         "A-9",
67 |         "F-3",
68 |     ]
69 | 
70 | SMAP_x2_valid:
71 |     valid_ratio: 0
72 |     nrows: 2000
73 |     entities: ["P-1","S-1"]
74 | 
75 | SMAP_test:
76 |     valid_ratio: 0
77 |     entities:       [  "B-1",
78 |         "G-6",
79 |         "G-7",
80 |         "P-7",
81 |         "R-1",
82 |         "A-5",
83 |         "A-6",
84 |         "A-7",
85 |         "D-13",
86 |         "P-2",
87 |         "A-8",
88 |         "A-9",
89 |         "F-3",
90 |     ]
91 | 
92 | SMAP_x2_full:
93 |     valid_ratio: 0
94 |     entities: ["D-13"]
95 | 
96 | SMAP:
97 |     valid_ratio: 0


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/SMD.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "smd"
 3 |     data_root: ../data/SMD/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 38
10 |     nrows: null
11 |     entities:
12 |         - machine-1-1
13 |         - machine-1-2
14 |         - machine-1-3
15 |         - machine-1-4
16 |         - machine-1-5
17 |         - machine-1-6
18 |         - machine-1-7
19 |         - machine-1-8
20 |         - machine-2-1
21 |         - machine-2-2
22 |         - machine-2-3
23 |         - machine-2-4
24 |         - machine-2-5
25 |         - machine-2-6
26 |         - machine-2-7
27 |         - machine-2-8
28 |         - machine-2-9
29 |         - machine-3-1
30 |         - machine-3-2
31 |         - machine-3-3
32 |         - machine-3-4
33 |         - machine-3-5
34 |         - machine-3-6
35 |         - machine-3-7
36 |         - machine-3-8
37 |         - machine-3-9
38 |         - machine-3-10
39 |         - machine-3-11
40 | 
41 | SMD:
42 |     valid_ratio: 0


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/SWAT.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "swat"
 3 |     data_root: ../data/SWAT/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 40
10 |     nrows: null
11 |     entities:
12 |         - swat
13 | 
14 | 
15 | SWAT:
16 |     valid_ratio: 0
17 |     entities: ["swat"]
18 | 
19 | SWAT_test:
20 |     nrows: 5000
21 |     valid_ratio: 0
22 |     entities: ["swat"]
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/dataset_config/WADI.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     dataset: "wadi"
 3 |     data_root: ../data/WADI/
 4 |     model_root: "./benchmark_exp_details"
 5 |     benchmark_dir: "./benchmark_results"
 6 |     train_postfix: "train.pkl"
 7 |     test_postfix: "test.pkl"
 8 |     test_label_postfix: "test_label.pkl"
 9 |     dim: 93
10 |     nrows: null
11 |     entities:
12 |         - wadi
13 | 
14 | 
15 | WADI:
16 |     valid_ratio: 0
17 |     entities: ["wadi"]
18 | 
19 | WADI_test:
20 |     nrows: 5000
21 |     valid_ratio: 0
22 |     entities: ["wadi"]
23 | 
24 | 
25 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/eval_config.yaml:
--------------------------------------------------------------------------------
1 | Base:
2 |     metrics: ["f1", "delay"]
3 |     pot_params: {"q": 1.0e-2, "level": [0.99, 0.98, 0.97,0.9,0.8], "dynamic": False}
4 |     best_params: {"target_metric": "f1", "target_direction": "max"}
5 |     thresholding: ["best", "pot"]
6 |     point_adjustment: [True, False]
7 | 
8 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/anomaly_transformer.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: anomaly_transformer
 3 |     normalize: "minmax"
 4 | 
 5 | anomaly_transformer_SMD:
 6 |     dataset_id: SMD
 7 |     batch_size: 128
 8 |     window_size: 100
 9 |     nb_epoch: 10
10 |     l2_regularizer: 0.1
11 |     stride: 1
12 |     num_workers: 1
13 |     lr: 1.0e-4
14 |     k: 3
15 |     device: 2
16 | 
17 | anomaly_transformer_ASD:
18 |     dataset_id: ASD
19 |     batch_size: 128
20 |     window_size: 100
21 |     nb_epoch: 10
22 |     l2_regularizer: 0.1
23 |     stride: 1
24 |     num_workers: 1
25 |     lr: 1.0e-4
26 |     k: 3
27 |     device: 2
28 | 
29 | anomaly_transformer_SWAT:
30 |     dataset_id: SWAT
31 |     batch_size: 128
32 |     window_size: 100
33 |     nb_epoch: 10
34 |     l2_regularizer: 0.1
35 |     stride: 1
36 |     num_workers: 1
37 |     lr: 1.0e-4
38 |     k: 3
39 |     device: 2
40 | 
41 | anomaly_transformer_WADI:
42 |     dataset_id: WADI
43 |     batch_size: 128
44 |     window_size: 100
45 |     nb_epoch: 10
46 |     l2_regularizer: 0.1
47 |     stride: 1
48 |     num_workers: 1
49 |     lr: 1.0e-4
50 |     k: 3
51 |     device: 2
52 | 
53 | anomaly_transformer_SMAP:
54 |     dataset_id: SMAP
55 |     batch_size: 128
56 |     window_size: 100
57 |     nb_epoch: 10
58 |     l2_regularizer: 0.1
59 |     stride: 1
60 |     num_workers: 1
61 |     lr: 1.0e-4
62 |     k: 3
63 |     device: 2
64 | 
65 | anomaly_transformer_MSL:
66 |     dataset_id: MSL
67 |     batch_size: 128
68 |     window_size: 100
69 |     nb_epoch: 10
70 |     l2_regularizer: 0.1
71 |     stride: 1
72 |     num_workers: 1
73 |     lr: 1.0e-4
74 |     k: 3
75 |     device: 2
76 | 
77 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/autoencoder.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: AutoEncoder
 3 |     normalize: "minmax"
 4 | 
 5 | autoencoder_SMD:
 6 |     dataset_id: SMD
 7 |     hidden_neurons: [64, 32, 32, 64]
 8 |     batch_size: 512
 9 |     nb_epoch: 50
10 |     l2_regularizer: 0.1
11 |     device: 2
12 | autoencoder_ASD:
13 |     dataset_id: ASD
14 |     normalize: "minmax"
15 |     hidden_neurons: [64, 16, 16, 64]
16 |     batch_size: 512
17 |     nb_epoch: 50
18 |     l2_regularizer: 0.1
19 |     device: 2
20 | autoencoder_SWAT:
21 |     dataset_id: SWAT
22 |     normalize: "minmax"
23 |     hidden_neurons: [64, 32, 32, 64]
24 |     batch_size: 512
25 |     nb_epoch: 50
26 |     l2_regularizer: 0.1
27 |     device: 2
28 | autoencoder_WADI:
29 |     dataset_id: WADI
30 |     normalize: "minmax"
31 |     hidden_neurons: [64, 32, 32, 64]
32 |     batch_size: 512
33 |     nb_epoch: 50
34 |     l2_regularizer: 0.1
35 |     device: 2
36 | autoencoder_SMAP:
37 |     dataset_id: SMAP
38 |     normalize: "minmax"
39 |     hidden_neurons: [64, 16, 16, 64]
40 |     batch_size: 512
41 |     nb_epoch: 50
42 |     l2_regularizer: 0.1
43 |     device: 2
44 | autoencoder_MSL:
45 |     dataset_id: MSL
46 |     normalize: "minmax"
47 |     hidden_neurons: [64, 32, 32, 64]
48 |     batch_size: 512
49 |     nb_epoch: 50
50 |     l2_regularizer: 0.1
51 |     device: 2
52 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/dagmm.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     normalize: "standard"
 3 |     model_id: dagmm
 4 |     reverse_score: False
 5 | 
 6 | dagmm_SMD:
 7 |     dataset_id: SMD
 8 |     batch_size: 512
 9 |     nb_epoch: 20
10 |     compression_hiddens: [128, 64, 2]
11 |     estimation_hiddens: [100, 50]
12 |     estimation_dropout_ratio: 0.25
13 |     lr: 0.0001
14 |     lambdaone: 0.1
15 |     lambdatwo: 0.0001
16 |     device: 2
17 | dagmm_ASD:
18 |     dataset_id: ASD
19 |     batch_size: 512
20 |     nb_epoch: 20
21 |     compression_hiddens: [128, 64, 2]
22 |     estimation_hiddens: [100, 50]
23 |     estimation_dropout_ratio: 0.25
24 |     lr: 0.0001
25 |     lambdaone: 0.1
26 |     lambdatwo: 0.0001
27 |     device: 2
28 | dagmm_SWAT:
29 |     dataset_id: SWAT
30 |     batch_size: 512
31 |     nb_epoch: 20
32 |     compression_hiddens: [128, 64, 2]
33 |     estimation_hiddens: [100, 50]
34 |     estimation_dropout_ratio: 0.25
35 |     lr: 0.0001
36 |     lambdaone: 0.1
37 |     lambdatwo: 0.0001
38 |     device: 2
39 | dagmm_WADI:
40 |     dataset_id: WADI
41 |     batch_size: 512
42 |     nb_epoch: 20
43 |     compression_hiddens: [128, 64, 2]
44 |     estimation_hiddens: [100, 50]
45 |     estimation_dropout_ratio: 0.25
46 |     lr: 0.0001
47 |     lambdaone: 0.1
48 |     lambdatwo: 0.0001
49 |     device: 2
50 | dagmm_SMAP:
51 |     dataset_id: SMAP_test
52 |     batch_size: 512
53 |     nb_epoch: 20
54 |     compression_hiddens: [128, 64, 2]
55 |     estimation_hiddens: [100, 5]
56 |     estimation_dropout_ratio: 0
57 |     lr: 0.001
58 |     lambdaone: 0
59 |     lambdatwo: 0
60 |     device: 2
61 | dagmm_MSL:
62 |     dataset_id: MSL
63 |     batch_size: 512
64 |     nb_epoch: 20
65 |     compression_hiddens: [128, 64, 2]
66 |     estimation_hiddens: [100, 50]
67 |     estimation_dropout_ratio: 0.25
68 |     lr: 0.0001
69 |     lambdaone: 0.1
70 |     lambdatwo: 0.0001
71 |     device: 2
72 | 
73 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/ganf.yaml:
--------------------------------------------------------------------------------
  1 | Base:
  2 |     model_id: ganf
  3 |     normalize: 'minmax'
  4 | 
  5 | ganf_SMD:
  6 |     dataset_id: SMD
  7 |     window_size: 100
  8 |     stride: 1
  9 |     n_blocks: 1
 10 |     input_size: 1
 11 |     hidden_size: 32
 12 |     n_hidden: 1
 13 |     dropout: 0.1
 14 |     batch_norm: False
 15 |     batch_size: 512
 16 |     weight_decay: 5.0e-4
 17 |     nb_epoch: 10
 18 |     lr: 2.0e-3
 19 |     h_tol: 1.0e-4
 20 |     rho_max: 1000000
 21 |     lambda1: 0.0
 22 |     rho_init: 1.0
 23 |     alpha_init: 0.0
 24 |     shuffle: True
 25 |     num_workers: 1
 26 |     device: 0
 27 | ganf_ASD:
 28 |     dataset_id: ASD
 29 |     window_size: 100
 30 |     stride: 1
 31 |     n_blocks: 1
 32 |     input_size: 1
 33 |     hidden_size: 32
 34 |     n_hidden: 1
 35 |     dropout: 0.1
 36 |     batch_norm: False
 37 |     batch_size: 512
 38 |     weight_decay: 5.0e-4
 39 |     nb_epoch: 10
 40 |     lr: 2.0e-3
 41 |     h_tol: 1.0e-4
 42 |     rho_max: 1000000
 43 |     lambda1: 0.0
 44 |     rho_init: 1.0
 45 |     alpha_init: 0.0
 46 |     shuffle: True
 47 |     num_workers: 1
 48 |     device: 0
 49 | ganf_SWAT:
 50 |     dataset_id: SWAT
 51 |     window_size: 100
 52 |     stride: 1
 53 |     n_blocks: 1
 54 |     input_size: 1
 55 |     hidden_size: 32
 56 |     n_hidden: 1
 57 |     dropout: 0.1
 58 |     batch_norm: False
 59 |     batch_size: 512
 60 |     weight_decay: 5.0e-4
 61 |     nb_epoch: 10
 62 |     lr: 2.0e-3
 63 |     h_tol: 1.0e-4
 64 |     rho_max: 1000000
 65 |     lambda1: 0.0
 66 |     rho_init: 1.0
 67 |     alpha_init: 0.0
 68 |     shuffle: True
 69 |     num_workers: 1
 70 |     device: 0
 71 | ganf_WADI:
 72 |     dataset_id: WADI
 73 |     window_size: 100
 74 |     stride: 1
 75 |     n_blocks: 1
 76 |     input_size: 1
 77 |     hidden_size: 32
 78 |     n_hidden: 1
 79 |     dropout: 0.1
 80 |     batch_norm: False
 81 |     batch_size: 512
 82 |     weight_decay: 5.0e-4
 83 |     nb_epoch: 10
 84 |     lr: 2.0e-3
 85 |     h_tol: 1.0e-4
 86 |     rho_max: 1000000
 87 |     lambda1: 0.0
 88 |     rho_init: 1.0
 89 |     alpha_init: 0.0
 90 |     shuffle: True
 91 |     num_workers: 1
 92 |     device: 0
 93 | ganf_SMAP:
 94 |     dataset_id: SMAP
 95 |     window_size: 100
 96 |     stride: 1
 97 |     n_blocks: 1
 98 |     input_size: 1
 99 |     hidden_size: 32
100 |     n_hidden: 1
101 |     dropout: 0.1
102 |     batch_norm: False
103 |     batch_size: 512
104 |     weight_decay: 5.0e-4
105 |     nb_epoch: 10
106 |     lr: 2.0e-3
107 |     h_tol: 1.0e-4
108 |     rho_max: 1000000
109 |     lambda1: 0.0
110 |     rho_init: 1.0
111 |     alpha_init: 0.0
112 |     shuffle: True
113 |     num_workers: 1
114 |     device: 0
115 | ganf_MSL:
116 |     dataset_id: MSL
117 |     window_size: 100
118 |     stride: 1
119 |     n_blocks: 1
120 |     input_size: 1
121 |     hidden_size: 32
122 |     n_hidden: 1
123 |     dropout: 0.1
124 |     batch_norm: False
125 |     batch_size: 512
126 |     weight_decay: 5.0e-4
127 |     nb_epoch: 10
128 |     lr: 2.0e-3
129 |     h_tol: 1.0e-4
130 |     rho_max: 1000000
131 |     lambda1: 0.0
132 |     rho_init: 1.0
133 |     alpha_init: 0.0
134 |     shuffle: True
135 |     num_workers: 1
136 |     device: 0


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/iforest.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: iforest
 3 |     normalize: "minmax"
 4 | 
 5 | iforest_SMD:
 6 |     dataset_id: SMD
 7 |     n_estimators: 100
 8 | 
 9 | iforest_MSL:
10 |     dataset_id: MSL
11 |     n_estimators: 100
12 | 
13 | iforest_SMAP:
14 |     dataset_id: SMAP
15 |     n_estimators: 100
16 | 
17 | iforest_SWAT:
18 |     dataset_id: SWAT
19 |     n_estimators: 100
20 | 
21 | iforest_WADI:
22 |     dataset_id: WADI
23 |     n_estimators: 100
24 | 
25 | iforest_ASD:
26 |     dataset_id: ASD
27 |     n_estimators: 100


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/interfusion.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: interfusion
 3 |     normalize: "minmax"
 4 | 
 5 | 
 6 | interfusion_SMD:
 7 |     dataset_id: SMD
 8 |     batch_size: 128
 9 |     window_size: 100
10 |     stride: 1
11 |     patience: 5
12 |     lr: 0.001
13 |     num_workers: 1
14 |     pretrain_max_epoch: 20
15 |     nb_epoch: 20
16 |     device: 0  # -1 for cpu, 0 for cuda:0
17 | interfusion_ASD:
18 |     dataset_id: ASD
19 |     batch_size: 128
20 |     window_size: 100
21 |     stride: 1
22 |     patience: 5
23 |     lr: 0.001
24 |     num_workers: 1
25 |     pretrain_max_epoch: 20
26 |     nb_epoch: 20
27 |     device: 0  # -1 for cpu, 0 for cuda:0
28 | interfusion_SWAT:
29 |     dataset_id: SWAT
30 |     batch_size: 128
31 |     window_size: 100
32 |     stride: 1
33 |     patience: 5
34 |     lr: 0.001
35 |     num_workers: 1
36 |     pretrain_max_epoch: 20
37 |     nb_epoch: 20
38 |     device: 0  # -1 for cpu, 0 for cuda:0
39 | interfusion_WADI:
40 |     dataset_id: WADI
41 |     batch_size: 128
42 |     window_size: 100
43 |     stride: 1
44 |     patience: 5
45 |     lr: 0.001
46 |     num_workers: 1
47 |     pretrain_max_epoch: 20
48 |     nb_epoch: 20
49 |     device: 0  # -1 for cpu, 0 for cuda:0
50 | interfusion_SMAP:
51 |     dataset_id: SMAP
52 |     batch_size: 128
53 |     window_size: 100
54 |     stride: 1
55 |     patience: 5
56 |     lr: 0.001
57 |     num_workers: 1
58 |     pretrain_max_epoch: 20
59 |     nb_epoch: 20
60 |     device: 0  # -1 for cpu, 0 for cuda:0
61 | interfusion_MSL:
62 |     dataset_id: MSL
63 |     batch_size: 128
64 |     window_size: 100
65 |     stride: 1
66 |     patience: 5
67 |     lr: 0.001
68 |     num_workers: 1
69 |     pretrain_max_epoch: 20
70 |     nb_epoch: 20
71 |     device: 0  # -1 for cpu, 0 for cuda:0
72 | 
73 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/loda.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: LODA
 3 |     normalize: "minmax"
 4 | 
 5 | loda_SMD:
 6 |     dataset_id: SMD
 7 |     n_bins: 10
 8 |     n_random_cuts: 100
 9 | loda_ASD:
10 |     dataset_id: ASD
11 |     n_bins: 10
12 |     n_random_cuts: 100
13 | loda_SWAT:
14 |     dataset_id: SWAT
15 |     n_bins: 10
16 |     n_random_cuts: 100
17 | loda_WADI:
18 |     dataset_id: WADI
19 |     n_bins: 10
20 |     n_random_cuts: 100
21 | loda_SMAP:
22 |     dataset_id: SMAP
23 |     n_bins: 10
24 |     n_random_cuts: 100
25 | loda_MSL:
26 |     dataset_id: MSL
27 |     n_bins: 10
28 |     n_random_cuts: 100
29 |     


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/lstm.yaml:
--------------------------------------------------------------------------------
  1 | Base:
  2 |     model_id: lstm
  3 |     normalize: "minmax"
  4 | 
  5 | lstm_SMD:
  6 |     dataset_id: SMD
  7 |     batch_size: 1024
  8 |     window_size: 100
  9 |     stride: 1
 10 |     nb_epoch: 10
 11 |     patience: 5
 12 |     device: 0  # -1 for cpu, 0 for cuda:0
 13 |     lr: 0.001
 14 |     hidden_size: 256
 15 |     num_layers: 2
 16 |     dropout: 0
 17 |     prediction_length: 1
 18 |     prediction_dims: []
 19 |     num_workers: 4
 20 | 
 21 | 
 22 | lstm_ASD:
 23 |     dataset_id: ASD
 24 |     normalize: "minmax"
 25 |     batch_size: 1024
 26 |     window_size: 100
 27 |     stride: 1
 28 |     nb_epoch: 10
 29 |     patience: 5
 30 |     device: 0  # -1 for cpu, 0 for cuda:0
 31 |     lr: 0.001
 32 |     hidden_size: 256
 33 |     num_layers: 2
 34 |     dropout: 0
 35 |     prediction_length: 1
 36 |     prediction_dims: []
 37 |     num_workers: 4
 38 | 
 39 | lstm_SWAT:
 40 |     dataset_id: SWAT
 41 |     normalize: "minmax"
 42 |     batch_size: 1024
 43 |     window_size: 100
 44 |     stride: 1
 45 |     nb_epoch: 10
 46 |     patience: 5
 47 |     device: 0  # -1 for cpu, 0 for cuda:0
 48 |     lr: 0.001
 49 |     hidden_size: 256
 50 |     num_layers: 2
 51 |     dropout: 0
 52 |     prediction_length: 1
 53 |     prediction_dims: []
 54 |     num_workers: 4
 55 | 
 56 | lstm_WADI:
 57 |     dataset_id: WADI
 58 |     normalize: "minmax"
 59 |     batch_size: 1024
 60 |     window_size: 100
 61 |     stride: 1
 62 |     nb_epoch: 10
 63 |     patience: 5
 64 |     device: 0  # -1 for cpu, 0 for cuda:0
 65 |     lr: 0.001
 66 |     hidden_size: 256
 67 |     num_layers: 2
 68 |     dropout: 0
 69 |     prediction_length: 1
 70 |     prediction_dims: []
 71 |     num_workers: 4
 72 | 
 73 | lstm_SMAP:
 74 |     dataset_id: SMAP
 75 |     normalize: "minmax"
 76 |     batch_size: 1024
 77 |     window_size: 100
 78 |     stride: 1
 79 |     nb_epoch: 10
 80 |     patience: 5
 81 |     device: 0  # -1 for cpu, 0 for cuda:0
 82 |     lr: 0.001
 83 |     hidden_size: 256
 84 |     num_layers: 2
 85 |     dropout: 0
 86 |     prediction_length: 1
 87 |     prediction_dims: []
 88 |     num_workers: 4
 89 | 
 90 | lstm_MSL:
 91 |     dataset_id: MSL
 92 |     normalize: "minmax"
 93 |     batch_size: 1024
 94 |     window_size: 100
 95 |     stride: 1
 96 |     nb_epoch: 10
 97 |     patience: 5
 98 |     device: 0  # -1 for cpu, 0 for cuda:0
 99 |     lr: 0.001
100 |     hidden_size: 256
101 |     num_layers: 2
102 |     dropout: 0
103 |     prediction_length: 1
104 |     prediction_dims: []
105 |     num_workers: 4


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/mscred.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: mscred
 3 |     normalize: "minmax"
 4 | 
 5 | mscred_SMD:
 6 |     dataset_id: SMD
 7 |     window_size: 100
 8 |     stride: 1
 9 |     batch_size: 64
10 |     nb_epoch: 3
11 |     device: 1
12 |     step_max: 5
13 |     gap_time: 10
14 |     lr: 0.0002
15 | mscred_ASD:
16 |     dataset_id: ASD
17 |     window_size: 100
18 |     stride: 1
19 |     batch_size: 64
20 |     nb_epoch: 3
21 |     device: 1
22 |     step_max: 5
23 |     gap_time: 10
24 |     lr: 0.0002
25 | mscred_SWAT:
26 |     dataset_id: SWAT
27 |     window_size: 100
28 |     stride: 1
29 |     batch_size: 64
30 |     nb_epoch: 3
31 |     device: 1
32 |     step_max: 5
33 |     gap_time: 10
34 |     lr: 0.0002
35 | mscred_WADI:
36 |     dataset_id: WADI
37 |     window_size: 100
38 |     stride: 1
39 |     batch_size: 32
40 |     nb_epoch: 3
41 |     device: 1
42 |     step_max: 5
43 |     gap_time: 10
44 |     lr: 0.0002
45 | mscred_SMAP:
46 |     dataset_id: SMAP
47 |     window_size: 100
48 |     stride: 1
49 |     batch_size: 64
50 |     nb_epoch: 3
51 |     device: 1
52 |     step_max: 5
53 |     gap_time: 10
54 |     lr: 0.0002
55 | mscred_MSL:
56 |     dataset_id: MSL
57 |     window_size: 100
58 |     stride: 1
59 |     batch_size: 64
60 |     nb_epoch: 3
61 |     device: 1
62 |     step_max: 5
63 |     gap_time: 10
64 |     lr: 0.0002 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/mtad_gat.yaml:
--------------------------------------------------------------------------------
  1 | Base:
  2 |     model_id: mtad_gat
  3 |     normalize: "minmax"
  4 | 
  5 | mtad_gat_SMD:
  6 |     dataset_id: SMD
  7 |     batch_size: 512
  8 |     window_size: 100
  9 |     stride: 1
 10 |     nb_epoch: 10
 11 |     shuffle: True
 12 |     num_workers: 1
 13 |     init_lr: 3.0e-4
 14 |     kernel_size: 7
 15 |     feat_gat_embed_dim: null
 16 |     time_gat_embed_dim: null
 17 |     use_gatv2: True
 18 |     gru_n_layers: 1
 19 |     gru_hid_dim: 150
 20 |     forecast_n_layers: 1
 21 |     forecast_hid_dim: 150
 22 |     recon_n_layers: 1
 23 |     recon_hid_dim: 150
 24 |     dropout: 0.3
 25 |     alpha: 0.2
 26 |     gamma: 1
 27 |     device: 3
 28 | mtad_gat_ASD:
 29 |     dataset_id: ASD
 30 |     batch_size: 512
 31 |     window_size: 100
 32 |     stride: 1
 33 |     nb_epoch: 10
 34 |     shuffle: True
 35 |     num_workers: 1
 36 |     init_lr: 3.0e-4
 37 |     kernel_size: 7
 38 |     feat_gat_embed_dim: null
 39 |     time_gat_embed_dim: null
 40 |     use_gatv2: True
 41 |     gru_n_layers: 1
 42 |     gru_hid_dim: 150
 43 |     forecast_n_layers: 1
 44 |     forecast_hid_dim: 150
 45 |     recon_n_layers: 1
 46 |     recon_hid_dim: 150
 47 |     dropout: 0.3
 48 |     alpha: 0.2
 49 |     gamma: 1
 50 |     device: 3
 51 | mtad_gat_SWAT:
 52 |     dataset_id: SWAT
 53 |     batch_size: 512
 54 |     window_size: 100
 55 |     stride: 1
 56 |     nb_epoch: 10
 57 |     shuffle: True
 58 |     num_workers: 1
 59 |     init_lr: 3.0e-4
 60 |     kernel_size: 7
 61 |     feat_gat_embed_dim: null
 62 |     time_gat_embed_dim: null
 63 |     use_gatv2: True
 64 |     gru_n_layers: 1
 65 |     gru_hid_dim: 150
 66 |     forecast_n_layers: 1
 67 |     forecast_hid_dim: 150
 68 |     recon_n_layers: 1
 69 |     recon_hid_dim: 150
 70 |     dropout: 0.3
 71 |     alpha: 0.2
 72 |     gamma: 1
 73 |     device: 3
 74 | mtad_gat_WADI:
 75 |     dataset_id: WADI
 76 |     batch_size: 128
 77 |     window_size: 100
 78 |     stride: 1
 79 |     nb_epoch: 10
 80 |     shuffle: True
 81 |     num_workers: 1
 82 |     init_lr: 3.0e-4
 83 |     kernel_size: 7
 84 |     feat_gat_embed_dim: null
 85 |     time_gat_embed_dim: null
 86 |     use_gatv2: True
 87 |     gru_n_layers: 1
 88 |     gru_hid_dim: 150
 89 |     forecast_n_layers: 1
 90 |     forecast_hid_dim: 150
 91 |     recon_n_layers: 1
 92 |     recon_hid_dim: 150
 93 |     dropout: 0.3
 94 |     alpha: 0.2
 95 |     gamma: 1
 96 |     device: 3
 97 | mtad_gat_SMAP:
 98 |     dataset_id: SMAP
 99 |     batch_size: 512
100 |     window_size: 100
101 |     stride: 1
102 |     nb_epoch: 10
103 |     shuffle: True
104 |     num_workers: 1
105 |     init_lr: 3.0e-4
106 |     kernel_size: 7
107 |     feat_gat_embed_dim: null
108 |     time_gat_embed_dim: null
109 |     use_gatv2: True
110 |     gru_n_layers: 1
111 |     gru_hid_dim: 150
112 |     forecast_n_layers: 1
113 |     forecast_hid_dim: 150
114 |     recon_n_layers: 1
115 |     recon_hid_dim: 150
116 |     dropout: 0.3
117 |     alpha: 0.2
118 |     gamma: 1
119 |     device: 3
120 | mtad_gat_MSL:
121 |     dataset_id: MSL
122 |     batch_size: 256
123 |     window_size: 100
124 |     stride: 1
125 |     nb_epoch: 10
126 |     shuffle: True
127 |     num_workers: 1
128 |     init_lr: 3.0e-4
129 |     kernel_size: 7
130 |     feat_gat_embed_dim: null
131 |     time_gat_embed_dim: null
132 |     use_gatv2: True
133 |     gru_n_layers: 1
134 |     gru_hid_dim: 150
135 |     forecast_n_layers: 1
136 |     forecast_hid_dim: 150
137 |     recon_n_layers: 1
138 |     recon_hid_dim: 150
139 |     dropout: 0.3
140 |     alpha: 0.2
141 |     gamma: 1
142 |     device: 1
143 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/omnianomaly.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: omnianomaly
 3 |     normalize: "minmax"
 4 |     reverse_score: True
 5 | 
 6 | omnianomaly_SMD:
 7 |     dataset_id: SMD
 8 |     batch_size: 512
 9 |     window_size: 100
10 |     stride: 1
11 |     nb_epoch: 10
12 |     l2_reg: 0.1
13 |     initial_lr: 1.0e-3
14 |     device: 3
15 | omnianomaly_ASD:
16 |     dataset_id: ASD
17 |     batch_size: 512
18 |     window_size: 100
19 |     stride: 1
20 |     nb_epoch: 10
21 |     l2_reg: 0.1
22 |     initial_lr: 1.0e-3
23 |     device: 3
24 | omnianomaly_SWAT:
25 |     dataset_id: SWAT
26 |     batch_size: 512
27 |     window_size: 100
28 |     stride: 1
29 |     nb_epoch: 10
30 |     l2_reg: 0.1
31 |     initial_lr: 1.0e-3
32 |     device: 3
33 | omnianomaly_WADI:
34 |     dataset_id: WADI
35 |     batch_size: 512
36 |     window_size: 100
37 |     stride: 1
38 |     nb_epoch: 10
39 |     l2_reg: 0.1
40 |     initial_lr: 1.0e-3
41 |     device: 3
42 | omnianomaly_SMAP:
43 |     dataset_id: SMAP
44 |     batch_size: 512
45 |     window_size: 100
46 |     stride: 1
47 |     nb_epoch: 10
48 |     l2_reg: 0.1
49 |     initial_lr: 1.0e-3
50 |     device: 3
51 | omnianomaly_MSL:
52 |     dataset_id: MSL
53 |     batch_size: 512
54 |     window_size: 100
55 |     stride: 1
56 |     nb_epoch: 10
57 |     l2_reg: 0.1
58 |     initial_lr: 1.0e-3
59 |     device: 3 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/pca.yaml:
--------------------------------------------------------------------------------
 1 | Basic: 
 2 |     model_id: PCA
 3 |     normalize: "minmax"
 4 | 
 5 | PCA_SMD:
 6 |     dataset_id: SMD
 7 | PCA_ASD:
 8 |     dataset_id: ASD
 9 | PCA_SWAT:
10 |     dataset_id: SWAT
11 | PCA_WADI:
12 |     dataset_id: WADI
13 | PCA_SMAP:
14 |     dataset_id: SMAP
15 | PCA_MSL:
16 |     dataset_id: MSL
17 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/rans.yaml:
--------------------------------------------------------------------------------
  1 | Base:
  2 |     model_id: RANS
  3 |     normalize: "minmax"
  4 | 
  5 | 
  6 | rans_SMAP_test:
  7 |     dataset_id: SMAP_x2_full
  8 |     device: 1 # -1 for cpu, 0 for cuda:0
  9 |     num_workers: 1
 10 |     encoder_layers: 1 
 11 |     decoder_layers: 1
 12 |     activation: 'relu'
 13 |     output_activation: 'relu'
 14 |     S: 2
 15 |     delta: 0.05
 16 |     batch_size: 512
 17 |     synchronize: True
 18 |     freq_warmup: 1
 19 |     sin_warmup: 1
 20 |     nb_epoch: 1
 21 | 
 22 | rans_SMD:
 23 |     dataset_id: SMD
 24 |     device: 1 # -1 for cpu, 0 for cuda:0
 25 |     num_workers: 1
 26 |     encoder_layers: 1 
 27 |     decoder_layers: 2 
 28 |     activation: 'relu'
 29 |     output_activation: 'relu'
 30 |     S: 5 
 31 |     delta: 0.05
 32 |     synchronize: True
 33 |     batch_size: 512
 34 |     freq_warmup: 5 
 35 |     sin_warmup: 5 
 36 |     nb_epoch: 50
 37 | rans_ASD:
 38 |     dataset_id: ASD
 39 |     device: 1 # -1 for cpu, 0 for cuda:0
 40 |     num_workers: 1
 41 |     encoder_layers: 1 
 42 |     decoder_layers: 2 
 43 |     activation: 'relu'
 44 |     output_activation: 'relu'
 45 |     S: 5 
 46 |     delta: 0.05
 47 |     synchronize: True
 48 |     batch_size: 512
 49 |     freq_warmup: 5 
 50 |     sin_warmup: 5 
 51 |     nb_epoch: 50
 52 | rans_SWAT:
 53 |     dataset_id: SWAT
 54 |     device: 1 # -1 for cpu, 0 for cuda:0
 55 |     num_workers: 1
 56 |     encoder_layers: 1 
 57 |     decoder_layers: 2 
 58 |     activation: 'relu'
 59 |     output_activation: 'relu'
 60 |     S: 5 
 61 |     delta: 0.05
 62 |     synchronize: True
 63 |     batch_size: 512
 64 |     freq_warmup: 5 
 65 |     sin_warmup: 5 
 66 |     nb_epoch: 50
 67 | rans_WADI:
 68 |     dataset_id: WADI
 69 |     device: 1 # -1 for cpu, 0 for cuda:0
 70 |     num_workers: 1
 71 |     encoder_layers: 1 
 72 |     decoder_layers: 2 
 73 |     activation: 'relu'
 74 |     output_activation: 'relu'
 75 |     S: 5 
 76 |     delta: 0.05
 77 |     synchronize: True
 78 |     batch_size: 512
 79 |     freq_warmup: 5 
 80 |     sin_warmup: 5 
 81 |     nb_epoch: 50
 82 | rans_SMAP:
 83 |     dataset_id: SMAP
 84 |     device: 1 # -1 for cpu, 0 for cuda:0
 85 |     num_workers: 1
 86 |     encoder_layers: 1 
 87 |     decoder_layers: 2 
 88 |     activation: 'relu'
 89 |     output_activation: 'relu'
 90 |     S: 5 
 91 |     delta: 0.05
 92 |     synchronize: True
 93 |     batch_size: 512
 94 |     freq_warmup: 5 
 95 |     sin_warmup: 5 
 96 |     nb_epoch: 50
 97 | rans_MSL:
 98 |     dataset_id: MSL
 99 |     device: 1 # -1 for cpu, 0 for cuda:0
100 |     num_workers: 1
101 |     encoder_layers: 1 
102 |     decoder_layers: 2 
103 |     activation: 'relu'
104 |     output_activation: 'relu'
105 |     S: 5 
106 |     delta: 0.05
107 |     synchronize: True
108 |     batch_size: 512
109 |     freq_warmup: 5 
110 |     sin_warmup: 50
111 |     nb_epoch: 100


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/tranad.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: tranad
 3 |     normalize: "minmax"
 4 | 
 5 | tranad_SMD:
 6 |     dataset_id: SMD
 7 |     num_workers: 1
 8 |     batch_size: 512
 9 |     window_size: 100
10 |     stride: 1
11 |     nb_epoch: 10
12 |     device: 3  # -1 for cpu, 0 for cuda:0
13 |     lr: 0.001
14 |     hidden_size: 64
15 | tranad_ASD:
16 |     dataset_id: ASD
17 |     num_workers: 1
18 |     batch_size: 512
19 |     window_size: 100
20 |     stride: 1
21 |     nb_epoch: 10
22 |     device: 3  # -1 for cpu, 0 for cuda:0
23 |     lr: 0.001
24 |     hidden_size: 64
25 | tranad_SWAT:
26 |     dataset_id: SWAT
27 |     num_workers: 1
28 |     batch_size: 512
29 |     window_size: 100
30 |     stride: 1
31 |     nb_epoch: 10
32 |     device: 3  # -1 for cpu, 0 for cuda:0
33 |     lr: 0.001
34 |     hidden_size: 64
35 | tranad_WADI:
36 |     dataset_id: WADI
37 |     num_workers: 1
38 |     batch_size: 512
39 |     window_size: 100
40 |     stride: 1
41 |     nb_epoch: 10
42 |     device: 3  # -1 for cpu, 0 for cuda:0
43 |     lr: 0.001
44 |     hidden_size: 64
45 | tranad_SMAP:
46 |     dataset_id: SMAP
47 |     num_workers: 1
48 |     batch_size: 512
49 |     window_size: 100
50 |     stride: 1
51 |     nb_epoch: 10
52 |     device: 3  # -1 for cpu, 0 for cuda:0
53 |     lr: 0.001
54 |     hidden_size: 64
55 | tranad_MSL:
56 |     dataset_id: MSL
57 |     num_workers: 1
58 |     batch_size: 512
59 |     window_size: 100
60 |     stride: 1
61 |     nb_epoch: 10
62 |     device: 3  # -1 for cpu, 0 for cuda:0
63 |     lr: 0.001
64 |     hidden_size: 64
65 | 


--------------------------------------------------------------------------------
/benchmark/benchmark_config/model_config/usad.yaml:
--------------------------------------------------------------------------------
 1 | Base:
 2 |     model_id: usad
 3 |     normalize: "minmax"
 4 | 
 5 | usad_SMD:
 6 |     dataset_id: SMD
 7 |     batch_size: 512
 8 |     window_size: 100
 9 |     stride: 1
10 |     num_workers: 1
11 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
12 |     nb_epoch: 10
13 |     lr: 0.001
14 |     hidden_size: 64
15 | usad_ASD:
16 |     dataset_id: ASD
17 |     batch_size: 512
18 |     window_size: 100
19 |     stride: 1
20 |     num_workers: 1
21 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
22 |     nb_epoch: 10
23 |     lr: 0.001
24 |     hidden_size: 64
25 | usad_SWAT:
26 |     dataset_id: SWAT
27 |     batch_size: 256
28 |     window_size: 100
29 |     stride: 1
30 |     num_workers: 1
31 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
32 |     nb_epoch: 10
33 |     lr: 0.001
34 |     hidden_size: 64
35 | usad_WADI:
36 |     dataset_id: WADI
37 |     batch_size: 256
38 |     window_size: 100
39 |     stride: 1
40 |     num_workers: 1
41 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
42 |     nb_epoch: 10
43 |     lr: 0.001
44 |     hidden_size: 64
45 | usad_SMAP:
46 |     dataset_id: SMAP
47 |     batch_size: 512
48 |     window_size: 100
49 |     stride: 1
50 |     num_workers: 1
51 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
52 |     nb_epoch: 10
53 |     lr: 0.001
54 |     hidden_size: 64
55 | usad_MSL:
56 |     dataset_id: MSL
57 |     batch_size: 512
58 |     window_size: 100
59 |     stride: 1
60 |     num_workers: 1
61 |     device: 0  # -1 for cpu, 0 for cuda:0stride: 5
62 |     nb_epoch: 10
63 |     lr: 0.001
64 |     hidden_size: 64
65 |     
66 |     
67 |     
68 |     
69 |     
70 |     
71 | 


--------------------------------------------------------------------------------
/benchmark/dagmm_benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | import logging
 5 | import argparse
 6 | from common import data_preprocess
 7 | from common.dataloader import load_dataset
 8 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 9 | from common.evaluation import Evaluator, TimeTracker
10 | from common.exp import store_entity
11 | from networks.dagmm.dagmm import DAGMM
12 | 
13 | seed_everything()
14 | if __name__ == "__main__":
15 |     parser = argparse.ArgumentParser()
16 |     parser.add_argument(
17 |         "--config",
18 |         type=str,
19 |         default="./benchmark_config/",
20 |         help="The config directory.",
21 |     )
22 |     parser.add_argument("--expid", type=str, default="dagmm_SMD")
23 |     parser.add_argument("--gpu", type=int, default=-1)
24 |     args = vars(parser.parse_args()) 
25 | 
26 |     config_dir = args["config"]
27 |     experiment_id = args["expid"]
28 | 
29 |     params = load_config(config_dir, experiment_id)
30 |     set_logger(params, args)
31 |     logging.info(print_to_json(params))
32 | 
33 |     data_dict = load_dataset(
34 |         data_root=params["data_root"],
35 |         entities=params["entities"],
36 |         dim=params["dim"],
37 |         valid_ratio=params["valid_ratio"],
38 |         test_label_postfix=params["test_label_postfix"],
39 |         test_postfix=params["test_postfix"],
40 |         train_postfix=params["train_postfix"],
41 |         nrows=params["nrows"],
42 |     )
43 | 
44 |     # preprocessing
45 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
46 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
47 | 
48 |     evaluator = Evaluator(**params["eval"])
49 |     for entity in params["entities"]:
50 |         logging.info("Fitting dataset: {}".format(entity))
51 | 
52 |         train = data_dict[entity]["train"]
53 |         test = data_dict[entity]["test"]
54 |         test_label = data_dict[entity]["test_label"]
55 | 
56 |         model = DAGMM(
57 |             comp_hiddens=params["compression_hiddens"],
58 |             est_hiddens=params["estimation_hiddens"],
59 |             est_dropout_ratio=params["estimation_dropout_ratio"],
60 |             minibatch_size=params["batch_size"],
61 |             epoch_size=params["nb_epoch"],
62 |             learning_rate=params["lr"],
63 |             lambda1=params["lambdaone"],
64 |             lambda2=params["lambdatwo"],
65 |         )
66 | 
67 |         # predict anomaly score
68 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
69 | 
70 |         tt.train_start()
71 |         model.fit(train)
72 |         tt.train_end()
73 | 
74 |         train_anomaly_score = model.predict_prob(test)
75 |         tt.test_start()
76 |         anomaly_score = model.predict_prob(test)
77 |         tt.test_end()
78 | 
79 |         anomaly_label = test_label
80 | 
81 |         store_entity(
82 |             params,
83 |             entity,
84 |             train_anomaly_score,
85 |             anomaly_score,
86 |             anomaly_label,
87 |             time_tracker=tt.get_data(),
88 |         )
89 |         del model
90 |     evaluator.eval_exp(
91 |         exp_folder=params["model_root"],
92 |         entities=params["entities"],
93 |         merge_folder=params["benchmark_dir"],
94 |         extra_params=params,
95 |     )
96 | 


--------------------------------------------------------------------------------
/benchmark/ganf_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
  4 | import sys
  5 | 
  6 | sys.path.append("../")
  7 | import logging
  8 | import argparse
  9 | from common import data_preprocess
 10 | from common.dataloader import load_dataset, get_dataloaders
 11 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 12 | from common.evaluation import Evaluator, TimeTracker
 13 | from common.exp import store_entity
 14 | from networks.ganf.GANF import GANF
 15 | 
 16 | seed_everything()
 17 | if __name__ == "__main__":
 18 |     parser = argparse.ArgumentParser()
 19 |     parser.add_argument(
 20 |         "--config",
 21 |         type=str,
 22 |         default="./benchmark_config/",
 23 |         help="The config directory.",
 24 |     )
 25 |     parser.add_argument("--expid", type=str, default="ganf_SMD")
 26 |     parser.add_argument("--gpu", type=int, default=-1)
 27 |     args = vars(parser.parse_args())
 28 | 
 29 |     config_dir = args["config"]
 30 |     experiment_id = args["expid"]
 31 | 
 32 |     params = load_config(config_dir, experiment_id)
 33 |     set_logger(params, args)
 34 |     logging.info(print_to_json(params))
 35 | 
 36 |     data_dict = load_dataset(
 37 |         data_root=params["data_root"],
 38 |         entities=params["entities"],
 39 |         valid_ratio=params["valid_ratio"],
 40 |         dim=params["dim"],
 41 |         test_label_postfix=params["test_label_postfix"],
 42 |         test_postfix=params["test_postfix"],
 43 |         train_postfix=params["train_postfix"],
 44 |         nrows=params["nrows"],
 45 |     )
 46 | 
 47 |     # preprocessing
 48 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 49 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 50 | 
 51 |     # sliding windows
 52 |     window_dict = data_preprocess.generate_windows(
 53 |         data_dict,
 54 |         window_size=params["window_size"],
 55 |         stride=params["stride"],
 56 |     )
 57 | 
 58 |     # train/test on each entity put here
 59 |     evaluator = Evaluator(**params["eval"])
 60 |     for entity in params["entities"]:
 61 |         logging.info("Fitting dataset: {}".format(entity))
 62 |         windows = window_dict[entity]
 63 |         train_windows = windows["train_windows"]
 64 |         test_windows = windows["test_windows"]
 65 | 
 66 |         train_loader, _, test_loader = get_dataloaders(
 67 |             train_windows,
 68 |             test_windows,
 69 |             next_steps=0,
 70 |             batch_size=params["batch_size"],
 71 |             shuffle=params["shuffle"],
 72 |             num_workers=params["num_workers"],
 73 |         )
 74 | 
 75 |         model = GANF(
 76 |             n_blocks=params["n_blocks"],
 77 |             input_size=params["input_size"],
 78 |             hidden_size=params["hidden_size"],
 79 |             n_hidden=params["n_hidden"],
 80 |             dropout=params["dropout"],
 81 |             batch_norm=params["batch_norm"],
 82 |             model_root=params["model_root"],
 83 |             device=params["device"],
 84 |         )
 85 | 
 86 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 87 |         tt.train_start()
 88 |         model.fit(
 89 |             train_loader,
 90 |             n_sensor=params["dim"],
 91 |             weight_decay=params["weight_decay"],
 92 |             n_epochs=params["nb_epoch"],
 93 |             lr=params["lr"],
 94 |             h_tol=params["h_tol"],
 95 |             rho_max=params["rho_max"],
 96 |             lambda1=params["lambda1"],
 97 |             rho_init=params["rho_init"],
 98 |             alpha_init=params["alpha_init"],
 99 |         )
100 |         tt.train_end()
101 | 
102 |         tt.test_start()
103 |         anomaly_score, anomaly_label = model.predict_prob(
104 |             test_loader, windows["test_label"]
105 |         )
106 |         tt.test_end()
107 | 
108 |         train_anomaly_score = model.predict_prob(train_loader)
109 | 
110 |         store_entity(
111 |             params,
112 |             entity,
113 |             train_anomaly_score,
114 |             anomaly_score,
115 |             anomaly_label,
116 |             time_tracker=tt.get_data(),
117 |         )
118 |     evaluator.eval_exp(
119 |         exp_folder=params["model_root"],
120 |         entities=params["entities"],
121 |         merge_folder=params["benchmark_dir"],
122 |         extra_params=params,
123 |     )
124 | 


--------------------------------------------------------------------------------
/benchmark/iforest_benchmark.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | sys.path.append("../")
 4 | import logging
 5 | import argparse
 6 | 
 7 | from common import data_preprocess
 8 | from common.dataloader import load_dataset
 9 | from common.utils import seed_everything, load_config, set_logger, print_to_json
10 | from common.exp import store_entity
11 | from common.evaluation import Evaluator, TimeTracker
12 | from pyod.models.iforest import IForest
13 | 
14 | 
15 | seed_everything()
16 | if __name__ == "__main__":
17 |     parser = argparse.ArgumentParser()
18 |     parser.add_argument(
19 |         "--config",
20 |         type=str,
21 |         default="./benchmark_config/",
22 |         help="The config directory.",
23 |     )
24 |     parser.add_argument("--expid", type=str, default="iforest_SMD")
25 |     parser.add_argument("--gpu", type=int, default=-1)
26 |     args = vars(parser.parse_args())
27 | 
28 |     config_dir = args["config"]
29 |     experiment_id = args["expid"]
30 | 
31 |     params = load_config(config_dir, experiment_id)
32 |     set_logger(params, args)
33 |     logging.info(print_to_json(params))
34 | 
35 |     data_dict = load_dataset(
36 |         data_root=params["data_root"],
37 |         entities=params["entities"],
38 |         dim=params["dim"],
39 |         valid_ratio=params["valid_ratio"],
40 |         test_label_postfix=params["test_label_postfix"],
41 |         test_postfix=params["test_postfix"],
42 |         train_postfix=params["train_postfix"],
43 |         nrows=params["nrows"],
44 |     )
45 | 
46 |     # preprocessing
47 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
48 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
49 | 
50 |     # train/test on each entity put here
51 |     evaluator = Evaluator(**params["eval"])
52 |     for entity in params["entities"]:
53 |         logging.info("Fitting dataset: {}".format(entity))
54 | 
55 |         train = data_dict[entity]["train"]
56 |         test = data_dict[entity]["test"]
57 |         test_label = data_dict[entity]["test_label"]
58 | 
59 |         model = IForest(n_estimators=params["n_estimators"])
60 | 
61 |         tt = TimeTracker()
62 |         tt.train_start()
63 |         model.fit(train)
64 |         tt.train_end()
65 | 
66 |         train_anomaly_score = model.decision_function(train)
67 | 
68 |         tt.test_start()
69 |         anomaly_score = model.decision_function(test)
70 |         tt.test_end()
71 | 
72 |         anomaly_label = test_label
73 | 
74 |         # Make evaluation
75 |         store_entity(
76 |             params,
77 |             entity,
78 |             train_anomaly_score,
79 |             anomaly_score,
80 |             anomaly_label,
81 |             time_tracker=tt.get_data(),
82 |         )
83 |     evaluator.eval_exp(
84 |         exp_folder=params["model_root"],
85 |         entities=params["entities"],
86 |         merge_folder=params["benchmark_dir"],
87 |         extra_params=params,
88 |         eval_single=True,
89 |     )
90 | 


--------------------------------------------------------------------------------
/benchmark/interfusion_benchmark.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | 
  5 | sys.path.append("../")
  6 | 
  7 | import logging
  8 | from common import data_preprocess
  9 | from common.dataloader import load_dataset
 10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 11 | from networks.InterFusion import InterFusion
 12 | from common.evaluation import Evaluator, TimeTracker
 13 | from common.exp import store_entity
 14 | 
 15 | seed_everything()
 16 | 
 17 | if __name__ == "__main__":
 18 |     import argparse
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument(
 22 |         "--config",
 23 |         type=str,
 24 |         default="./benchmark_config/",
 25 |         help="The config directory.",
 26 |     )
 27 |     parser.add_argument("--expid", type=str, default="interfusion_SMD")
 28 |     parser.add_argument("--gpu", type=int, default=-1)
 29 |     args = vars(parser.parse_args())
 30 | 
 31 |     config_dir = args["config"]
 32 |     experiment_id = args["expid"]
 33 | 
 34 |     params = load_config(config_dir, experiment_id)
 35 |     set_logger(params, args)
 36 |     logging.info(print_to_json(params))
 37 | 
 38 |     data_dict = load_dataset(
 39 |         data_root=params["data_root"],
 40 |         entities=params["entities"],
 41 |         dim=params["dim"],
 42 |         valid_ratio=params["valid_ratio"],
 43 |         test_label_postfix=params["test_label_postfix"],
 44 |         test_postfix=params["test_postfix"],
 45 |         train_postfix=params["train_postfix"],
 46 |         nrows=params["nrows"],
 47 |     )
 48 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 49 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 50 | 
 51 |     # train/test on each entity put here
 52 |     evaluator = Evaluator(**params["eval"])
 53 |     for entity in params["entities"]:
 54 |         logging.info("Fitting dataset: {}".format(entity))
 55 |         train = data_dict[entity]["train"]
 56 |         valid = data_dict[entity].get("valid", None)
 57 |         test, test_label = (
 58 |             data_dict[entity]["test"],
 59 |             data_dict[entity]["test_label"],
 60 |         )
 61 | 
 62 |         model = InterFusion(
 63 |             dataset=params["dataset"],
 64 |             model_root=params["model_root"],
 65 |             dim=params["dim"],
 66 |         )
 67 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 68 | 
 69 |         tt.train_start()
 70 |         model.fit(
 71 |             x_train=train,
 72 |             x_valid=valid,
 73 |             lr=params["lr"],
 74 |             window_size=params["window_size"],
 75 |             batch_size=params["batch_size"],
 76 |             pretrain_max_epoch=params["pretrain_max_epoch"],
 77 |             max_epoch=params["nb_epoch"],
 78 |         )
 79 |         tt.train_end()
 80 | 
 81 |         train_anomaly_score = model.predict_prob(train, None)
 82 | 
 83 |         tt.test_start()
 84 |         anomaly_score, anomaly_label = model.predict_prob(test, test_label)
 85 |         tt.test_end()
 86 | 
 87 |         store_entity(
 88 |             params,
 89 |             entity,
 90 |             train_anomaly_score,
 91 |             anomaly_score,
 92 |             anomaly_label,
 93 |             time_tracker=tt.get_data(),
 94 |         )
 95 |         del model
 96 |     evaluator.eval_exp(
 97 |         exp_folder=params["model_root"],
 98 |         entities=params["entities"],
 99 |         merge_folder=params["benchmark_dir"],
100 |         extra_params=params,
101 |     )
102 | 


--------------------------------------------------------------------------------
/benchmark/lstm_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
  4 | import sys
  5 | 
  6 | sys.path.append("../")
  7 | import logging
  8 | from common import data_preprocess
  9 | from common.dataloader import get_dataloaders, load_dataset
 10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 11 | from common.evaluation import Evaluator, TimeTracker
 12 | from common.exp import store_entity
 13 | from networks.lstm import LSTM
 14 | 
 15 | 
 16 | seed_everything()
 17 | if __name__ == "__main__":
 18 |     import argparse
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument(
 22 |         "--config",
 23 |         type=str,
 24 |         default="./benchmark_config/",
 25 |         help="The config directory.",
 26 |     )
 27 |     parser.add_argument("--expid", type=str, default="lstm_SMD")
 28 |     parser.add_argument("--gpu", type=int, default=-1)
 29 |     args = vars(parser.parse_args())
 30 | 
 31 |     config_dir = args["config"]
 32 |     experiment_id = args["expid"]
 33 | 
 34 |     params = load_config(config_dir, experiment_id)
 35 |     set_logger(params, args)
 36 |     logging.info(print_to_json(params))
 37 | 
 38 |     data_dict = load_dataset(
 39 |         data_root=params["data_root"],
 40 |         entities=params["entities"],
 41 |         dim=params["dim"],
 42 |         valid_ratio=params["valid_ratio"],
 43 |         test_label_postfix=params["test_label_postfix"],
 44 |         test_postfix=params["test_postfix"],
 45 |         train_postfix=params["train_postfix"],
 46 |         nrows=params["nrows"],
 47 |     )
 48 | 
 49 |     # preprocessing
 50 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 51 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 52 | 
 53 |     # sliding windows
 54 |     window_dict = data_preprocess.generate_windows(
 55 |         data_dict,
 56 |         window_size=params["window_size"],
 57 |         stride=params["stride"],
 58 |     )
 59 | 
 60 |     # train/test on each entity put here
 61 |     evaluator = Evaluator(**params["eval"])
 62 |     for entity in params["entities"]:
 63 |         logging.info("Fitting dataset: {}".format(entity))
 64 |         windows = window_dict[entity]
 65 |         train_windows = windows["train_windows"]
 66 |         test_windows = windows["test_windows"]
 67 | 
 68 |         train_loader, _, test_loader = get_dataloaders(
 69 |             train_windows,
 70 |             test_windows,
 71 |             batch_size=params["batch_size"],
 72 |             num_workers=params["num_workers"],
 73 |         )
 74 | 
 75 |         model = LSTM(
 76 |             in_channels=params["dim"],
 77 |             num_layers=params["num_layers"],
 78 |             dropout=params["dropout"],
 79 |             window_size=params["window_size"],
 80 |             prediction_length=params["prediction_length"],
 81 |             prediction_dims=params["prediction_dims"],
 82 |             patience=params["patience"],
 83 |             save_path=params["model_root"],
 84 |             nb_epoch=params["nb_epoch"],
 85 |             lr=params["lr"],
 86 |             device=params["device"],
 87 |         )
 88 | 
 89 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 90 |         tt.train_start()
 91 |         model.fit(
 92 |             train_loader,
 93 |             test_loader=test_loader,
 94 |             test_label=windows["test_label"],
 95 |         )
 96 |         tt.train_end()
 97 | 
 98 |         model.load_encoder()
 99 |         train_anomaly_score = model.predict_prob(train_loader)
100 | 
101 |         tt.test_start()
102 |         anomaly_score, anomaly_label = model.predict_prob(
103 |             test_loader, windows["test_label"]
104 |         )
105 |         tt.test_end()
106 | 
107 |         store_entity(
108 |             params,
109 |             entity,
110 |             train_anomaly_score,
111 |             anomaly_score,
112 |             anomaly_label,
113 |             time_tracker=tt.get_data(),
114 |         )
115 |     evaluator.eval_exp(
116 |         exp_folder=params["model_root"],
117 |         entities=params["entities"],
118 |         merge_folder=params["benchmark_dir"],
119 |         extra_params=params,
120 |     )
121 | 


--------------------------------------------------------------------------------
/benchmark/mscred_benchmark.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append("../")
  4 | 
  5 | import logging
  6 | from common.dataloader import load_dataset
  7 | from common import data_preprocess
  8 | from common.dataloader import load_dataset, get_dataloaders
  9 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 10 | from networks.mscred import MSCRED
 11 | from common.evaluation import Evaluator, TimeTracker
 12 | from common.exp import store_entity
 13 | 
 14 | 
 15 | seed_everything()
 16 | if __name__ == "__main__":
 17 |     import argparse
 18 | 
 19 |     parser = argparse.ArgumentParser()
 20 |     parser.add_argument(
 21 |         "--config",
 22 |         type=str,
 23 |         default="./benchmark_config/",
 24 |         help="The config directory.",
 25 |     )
 26 |     parser.add_argument("--expid", type=str, default="mscred_SMD")
 27 |     parser.add_argument("--gpu", type=int, default=-1)
 28 |     args = vars(parser.parse_args())
 29 | 
 30 |     config_dir = args["config"]
 31 |     experiment_id = args["expid"]
 32 | 
 33 |     params = load_config(config_dir, experiment_id)
 34 |     set_logger(params, args)
 35 |     logging.info(print_to_json(params))
 36 | 
 37 |     data_dict = load_dataset(
 38 |         data_root=params["data_root"],
 39 |         entities=params["entities"],
 40 |         dim=params["dim"],
 41 |         valid_ratio=params["valid_ratio"],
 42 |         test_label_postfix=params["test_label_postfix"],
 43 |         test_postfix=params["test_postfix"],
 44 |         train_postfix=params["train_postfix"],
 45 |         nrows=params["nrows"],
 46 |     )
 47 |     # sliding windows
 48 |     window_dict = data_preprocess.generate_windows(
 49 |         data_dict,
 50 |         window_size=params["window_size"],
 51 |         stride=params["stride"],
 52 |     )
 53 |     # train/test on each entity put here
 54 |     evaluator = Evaluator(**params["eval"])
 55 |     for entity in params["entities"]:
 56 |         logging.info("Fitting dataset: {}".format(entity))
 57 |         windows = window_dict[entity]
 58 |         train_windows = windows["train_windows"]
 59 |         test_windows = windows["test_windows"]
 60 | 
 61 |         train_loader, _, test_loader = get_dataloaders(
 62 |             train_windows, test_windows, batch_size=params["batch_size"]
 63 |         )
 64 | 
 65 |         model = MSCRED(
 66 |             params["dim"],
 67 |             params["window_size"],
 68 |             lr=params["lr"],
 69 |             model_root=params["model_root"],
 70 |             device=params["device"],
 71 |         )
 72 | 
 73 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 74 | 
 75 |         tt.train_start()
 76 |         model.fit(
 77 |             params["nb_epoch"],
 78 |             train_loader,
 79 |             training=True,
 80 |         )
 81 |         tt.train_end()
 82 | 
 83 |         train_anomaly_score = model.predict_prob(train_loader)
 84 | 
 85 |         tt.test_start()
 86 |         anomaly_score, anomaly_label = model.predict_prob(
 87 |             test_loader, windows["test_label"]
 88 |         )
 89 |         tt.test_end()
 90 | 
 91 |         store_entity(
 92 |             params,
 93 |             entity,
 94 |             train_anomaly_score,
 95 |             anomaly_score,
 96 |             anomaly_label,
 97 |             time_tracker=tt.get_data(),
 98 |         )
 99 |     evaluator.eval_exp(
100 |         exp_folder=params["model_root"],
101 |         entities=params["entities"],
102 |         merge_folder=params["benchmark_dir"],
103 |         extra_params=params,
104 |     )
105 | 


--------------------------------------------------------------------------------
/benchmark/mtad_gat_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
  4 | import sys
  5 | 
  6 | sys.path.append("../")
  7 | import logging
  8 | from common import data_preprocess
  9 | from common.dataloader import load_dataset, get_dataloaders
 10 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 11 | from common.evaluation import Evaluator, TimeTracker
 12 | from common.exp import store_entity
 13 | from networks.mtad_gat import MTAD_GAT
 14 | 
 15 | 
 16 | seed_everything()
 17 | if __name__ == "__main__":
 18 |     import argparse
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument(
 22 |         "--config",
 23 |         type=str,
 24 |         default="./benchmark_config/",
 25 |         help="The config directory.",
 26 |     )
 27 |     parser.add_argument("--expid", type=str, default="mtad_gat_SMD")
 28 |     parser.add_argument("--gpu", type=int, default=-1)
 29 |     args = vars(parser.parse_args())
 30 | 
 31 |     config_dir = args["config"]
 32 |     experiment_id = args["expid"]
 33 | 
 34 |     params = load_config(config_dir, experiment_id)
 35 |     set_logger(params, args)
 36 |     logging.info(print_to_json(params))
 37 | 
 38 |     data_dict = load_dataset(
 39 |         data_root=params["data_root"],
 40 |         entities=params["entities"],
 41 |         valid_ratio=params["valid_ratio"],
 42 |         dim=params["dim"],
 43 |         test_label_postfix=params["test_label_postfix"],
 44 |         test_postfix=params["test_postfix"],
 45 |         train_postfix=params["train_postfix"],
 46 |         nrows=params["nrows"],
 47 |     )
 48 | 
 49 |     # preprocessing
 50 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 51 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 52 | 
 53 |     # sliding windows
 54 |     window_dict = data_preprocess.generate_windows(
 55 |         data_dict,
 56 |         window_size=params["window_size"],
 57 |         stride=params["stride"],
 58 |     )
 59 | 
 60 |     # train/test on each entity put here
 61 |     evaluator = Evaluator(**params["eval"])
 62 |     for entity in params["entities"]:
 63 |         logging.info("Fitting dataset: {}".format(entity))
 64 |         windows = window_dict[entity]
 65 |         train_windows = windows["train_windows"]
 66 |         test_windows = windows["test_windows"]
 67 | 
 68 |         train_loader, _, test_loader = get_dataloaders(
 69 |             train_windows,
 70 |             test_windows,
 71 |             next_steps=1,
 72 |             batch_size=params["batch_size"],
 73 |             shuffle=params["shuffle"],
 74 |             num_workers=params["num_workers"],
 75 |         )
 76 | 
 77 |         model = MTAD_GAT(
 78 |             n_features=params["dim"],
 79 |             window_size=params["window_size"],
 80 |             out_dim=params["dim"],
 81 |             kernel_size=params["kernel_size"],
 82 |             feat_gat_embed_dim=params["feat_gat_embed_dim"],
 83 |             time_gat_embed_dim=params["time_gat_embed_dim"],
 84 |             use_gatv2=params["use_gatv2"],
 85 |             gru_n_layers=params["gru_n_layers"],
 86 |             gru_hid_dim=params["gru_hid_dim"],
 87 |             forecast_n_layers=params["forecast_n_layers"],
 88 |             forecast_hid_dim=params["forecast_hid_dim"],
 89 |             recon_n_layers=params["recon_n_layers"],
 90 |             recon_hid_dim=params["recon_hid_dim"],
 91 |             dropout=params["dropout"],
 92 |             alpha=params["alpha"],
 93 |             device=params["device"],
 94 |         )
 95 | 
 96 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 97 | 
 98 |         tt.train_start()
 99 |         model.fit(
100 |             train_loader,
101 |             val_loader=None,
102 |             n_epochs=params["nb_epoch"],
103 |             batch_size=params["batch_size"],
104 |             init_lr=params["init_lr"],
105 |             model_root=params["model_root"],
106 |         )
107 |         tt.train_end()
108 | 
109 |         train_anomaly_score = model.predict_prob(train_loader, gamma=params["gamma"])
110 | 
111 |         tt.test_start()
112 |         anomaly_score, anomaly_label = model.predict_prob(
113 |             test_loader, gamma=params["gamma"], window_labels=windows["test_label"]
114 |         )
115 |         tt.test_end()
116 | 
117 |         store_entity(
118 |             params,
119 |             entity,
120 |             train_anomaly_score,
121 |             anomaly_score,
122 |             anomaly_label,
123 |             time_tracker=tt.get_data(),
124 |         )
125 |     evaluator.eval_exp(
126 |         exp_folder=params["model_root"],
127 |         entities=params["entities"],
128 |         merge_folder=params["benchmark_dir"],
129 |         extra_params=params,
130 |     )
131 | 


--------------------------------------------------------------------------------
/benchmark/omnianomaly_benchmark.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | 
  5 | sys.path.append("../")
  6 | 
  7 | import logging
  8 | import warnings
  9 | 
 10 | warnings.filterwarnings("ignore", category=DeprecationWarning)
 11 | warnings.filterwarnings("ignore", category=FutureWarning)
 12 | 
 13 | from networks.omni_anomaly.detector import OmniDetector
 14 | 
 15 | from common import data_preprocess
 16 | from common.dataloader import get_dataloaders, load_dataset
 17 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 18 | from common.evaluation import Evaluator, TimeTracker
 19 | from common.exp import store_entity
 20 | 
 21 | 
 22 | seed_everything()
 23 | if __name__ == "__main__":
 24 |     import argparse
 25 | 
 26 |     parser = argparse.ArgumentParser()
 27 |     parser.add_argument(
 28 |         "--config",
 29 |         type=str,
 30 |         default="./benchmark_config/",
 31 |         help="The config directory.",
 32 |     )
 33 |     parser.add_argument("--expid", type=str, default="omnianomaly_SMD")
 34 |     parser.add_argument("--gpu", type=int, default=-1)
 35 |     args = vars(parser.parse_args())
 36 | 
 37 |     config_dir = args["config"]
 38 |     experiment_id = args["expid"]
 39 | 
 40 |     params = load_config(config_dir, experiment_id)
 41 |     set_logger(params, args)
 42 |     logging.info(print_to_json(params))
 43 | 
 44 |     data_dict = load_dataset(
 45 |         data_root=params["data_root"],
 46 |         entities=params["entities"],
 47 |         dim=params["dim"],
 48 |         valid_ratio=params["valid_ratio"],
 49 |         test_label_postfix=params["test_label_postfix"],
 50 |         test_postfix=params["test_postfix"],
 51 |         train_postfix=params["train_postfix"],
 52 |         nrows=params["nrows"],
 53 |     )
 54 | 
 55 |     # preprocessing
 56 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 57 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 58 | 
 59 |     # sliding windows
 60 |     window_dict = data_preprocess.generate_windows(
 61 |         data_dict,
 62 |         window_size=params["window_size"],
 63 |         stride=params["stride"],
 64 |     )
 65 | 
 66 |     # train/test on each entity put here
 67 |     evaluator = Evaluator(**params["eval"], reverse_score=params["reverse_score"])
 68 |     for entity in params["entities"]:
 69 |         logging.info("Fitting dataset: {}".format(entity))
 70 |         windows = window_dict[entity]
 71 |         train_windows = windows["train_windows"]
 72 |         test_windows = windows["test_windows"]
 73 |         test_label_windows = windows["test_label"]
 74 |         # batch data
 75 |         train_loader, _, test_loader = get_dataloaders(
 76 |             train_windows, test_windows, batch_size=params["batch_size"]
 77 |         )
 78 | 
 79 |         model = OmniDetector(
 80 |             dim=params["dim"],
 81 |             model_root=params["model_root"],
 82 |             window_size=params["window_size"],
 83 |             initial_lr=params["initial_lr"],
 84 |             l2_reg=params["l2_reg"]
 85 |         )
 86 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 87 | 
 88 |         tt.train_start()
 89 |         model.fit(train_loader)
 90 |         tt.train_end()
 91 | 
 92 |         train_anomaly_score = model.predict_prob(train_loader)
 93 |         tt.test_start()
 94 |         anomaly_score, anomaly_label = model.predict_prob(
 95 |             test_loader, test_label_windows
 96 |         )
 97 |         tt.test_end()
 98 | 
 99 |         store_entity(
100 |             params,
101 |             entity,
102 |             train_anomaly_score,
103 |             anomaly_score,
104 |             anomaly_label,
105 |             time_tracker=tt.get_data(),
106 |         )
107 |     evaluator.eval_exp(
108 |         exp_folder=params["model_root"],
109 |         entities=params["entities"],
110 |         merge_folder=params["benchmark_dir"],
111 |         extra_params=params,
112 |     )
113 | 


--------------------------------------------------------------------------------
/benchmark/tranad_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | os.chdir(os.path.dirname(os.path.realpath(__file__)))
  4 | import sys
  5 | 
  6 | sys.path.append("../")
  7 | import logging
  8 | from networks.tranad import *
  9 | from common import data_preprocess
 10 | from common.dataloader import load_dataset, get_dataloaders
 11 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 12 | from networks.tranad.models import TranAD
 13 | from common.evaluation import Evaluator, TimeTracker
 14 | from common.exp import store_entity
 15 | 
 16 | seed_everything()
 17 | if __name__ == "__main__":
 18 |     import argparse
 19 | 
 20 |     parser = argparse.ArgumentParser()
 21 |     parser.add_argument(
 22 |         "--config",
 23 |         type=str,
 24 |         default="./benchmark_config/",
 25 |         help="The config directory.",
 26 |     )
 27 |     parser.add_argument("--expid", type=str, default="tranad_SMD")
 28 |     parser.add_argument("--gpu", type=int, default=-1)
 29 |     args = vars(parser.parse_args())
 30 | 
 31 |     config_dir = args["config"]
 32 |     experiment_id = args["expid"]
 33 | 
 34 |     params = load_config(config_dir, experiment_id)
 35 |     set_logger(params, args)
 36 |     logging.info(print_to_json(params))
 37 | 
 38 |     data_dict = load_dataset(
 39 |         data_root=params["data_root"],
 40 |         entities=params["entities"],
 41 |         valid_ratio=params["valid_ratio"],
 42 |         dim=params["dim"],
 43 |         test_label_postfix=params["test_label_postfix"],
 44 |         test_postfix=params["test_postfix"],
 45 |         train_postfix=params["train_postfix"],
 46 |         nrows=params["nrows"],
 47 |     )
 48 | 
 49 |     # preprocessing
 50 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 51 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 52 | 
 53 |     # sliding windows
 54 |     window_dict = data_preprocess.generate_windows(
 55 |         data_dict,
 56 |         window_size=params["window_size"],
 57 |         stride=params["stride"],
 58 |     )
 59 | 
 60 |     # train/test on each entity put here
 61 |     evaluator = Evaluator(**params["eval"])
 62 |     for entity in params["entities"]:
 63 |         logging.info("Fitting dataset: {}".format(entity))
 64 |         windows = window_dict[entity]
 65 |         train_windows = windows["train_windows"]
 66 |         test_windows = windows["test_windows"]
 67 | 
 68 |         train_loader, _, test_loader = get_dataloaders(train_windows, test_windows)
 69 | 
 70 |         model = TranAD(
 71 |             params["dim"],
 72 |             params["window_size"],
 73 |             lr=params["lr"],
 74 |             model_root=params["model_root"],
 75 |             device=params["device"],
 76 |         )
 77 | 
 78 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 79 | 
 80 |         tt.train_start()
 81 |         model.fit(
 82 |             params["nb_epoch"],
 83 |             train_loader,
 84 |             training=True,
 85 |         )
 86 |         tt.train_end()
 87 | 
 88 |         train_anomaly_score = model.predict_prob(train_loader)
 89 | 
 90 |         tt.test_start()
 91 |         anomaly_score, anomaly_label = model.predict_prob(
 92 |             test_loader, windows["test_label"]
 93 |         )
 94 |         tt.test_end()
 95 | 
 96 |         store_entity(
 97 |             params,
 98 |             entity,
 99 |             train_anomaly_score,
100 |             anomaly_score,
101 |             anomaly_label,
102 |             time_tracker=tt.get_data(),
103 |         )
104 |     evaluator.eval_exp(
105 |         exp_folder=params["model_root"],
106 |         entities=params["entities"],
107 |         merge_folder=params["benchmark_dir"],
108 |         extra_params=params,
109 |     )
110 | 


--------------------------------------------------------------------------------
/benchmark/usad_benchmark.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.append("../")
  4 | import logging
  5 | from common import data_preprocess
  6 | from common.dataloader import load_dataset
  7 | from common.utils import seed_everything
  8 | from networks.usad import UsadModel
  9 | from common.utils import seed_everything, load_config, set_logger, print_to_json
 10 | from common.evaluation import Evaluator, TimeTracker
 11 | from common.exp import store_entity
 12 | 
 13 | seed_everything()
 14 | if __name__ == "__main__":
 15 |     import argparse
 16 | 
 17 |     parser = argparse.ArgumentParser()
 18 |     parser.add_argument(
 19 |         "--config",
 20 |         type=str,
 21 |         default="./benchmark_config/",
 22 |         help="The config directory.",
 23 |     )
 24 |     parser.add_argument("--expid", type=str, default="usad_SMD")
 25 |     parser.add_argument("--gpu", type=int, default=-1)
 26 |     args = vars(parser.parse_args())
 27 | 
 28 |     config_dir = args["config"]
 29 |     experiment_id = args["expid"]
 30 | 
 31 |     params = load_config(config_dir, experiment_id)
 32 |     set_logger(params, args)
 33 |     logging.info(print_to_json(params))
 34 | 
 35 |     data_dict = load_dataset(
 36 |         data_root=params["data_root"],
 37 |         entities=params["entities"],
 38 |         valid_ratio=params["valid_ratio"],
 39 |         dim=params["dim"],
 40 |         test_label_postfix=params["test_label_postfix"],
 41 |         test_postfix=params["test_postfix"],
 42 |         train_postfix=params["train_postfix"],
 43 |         nrows=params["nrows"],
 44 |     )
 45 | 
 46 |     # preprocessing
 47 |     pp = data_preprocess.preprocessor(model_root=params["model_root"])
 48 |     data_dict = pp.normalize(data_dict, method=params["normalize"])
 49 | 
 50 |     # sliding windows
 51 |     window_dict = data_preprocess.generate_windows(
 52 |         data_dict,
 53 |         window_size=params["window_size"],
 54 |         stride=params["stride"],
 55 |     )
 56 | 
 57 |     # train/test on each entity put here
 58 |     evaluator = Evaluator(**params["eval"])
 59 |     for entity in params["entities"]:
 60 |         logging.info("Fitting dataset: {}".format(entity))
 61 |         windows = window_dict[entity]
 62 |         train_windows = windows["train_windows"]
 63 |         test_windows = windows["test_windows"]
 64 |         test_windows_label = windows["test_label"]
 65 | 
 66 |         model = UsadModel(
 67 |             w_size=train_windows.shape[1] * train_windows.shape[2],
 68 |             z_size=train_windows.shape[1] * params["hidden_size"],
 69 |             device=params["device"],
 70 |         )
 71 |         tt = TimeTracker(nb_epoch=params["nb_epoch"])
 72 | 
 73 |         tt.train_start()
 74 |         model.fit(
 75 |             windows_train=train_windows,
 76 |             windows_val=None,
 77 |             epochs=params["nb_epoch"],
 78 |             batch_size=params["batch_size"],
 79 |         )
 80 |         tt.train_end()
 81 | 
 82 |         train_anomaly_score = model.predict_prob(
 83 |             windows_test=train_windows,
 84 |             batch_size=params["batch_size"],
 85 |         )
 86 |         tt.test_start()
 87 |         anomaly_score, anomaly_label = model.predict_prob(
 88 |             windows_test=test_windows,
 89 |             batch_size=params["batch_size"],
 90 |             windows_label=test_windows_label,
 91 |         )
 92 |         tt.test_end()
 93 | 
 94 |         store_entity(
 95 |             params,
 96 |             entity,
 97 |             train_anomaly_score,
 98 |             anomaly_score,
 99 |             anomaly_label,
100 |             time_tracker=tt.get_data(),
101 |         )
102 |     evaluator.eval_exp(
103 |         exp_folder=params["model_root"],
104 |         entities=params["entities"],
105 |         merge_folder=params["benchmark_dir"],
106 |         extra_params=params,
107 |     )
108 | 


--------------------------------------------------------------------------------
/common/autotuner.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import yaml
  3 | import itertools
  4 | import hashlib
  5 | import time 
  6 | import subprocess
  7 | import glob
  8 | import numpy as np
  9 | from .utils import load_config, print_to_json, load_dataset_config
 10 | 
 11 | 
 12 | 
 13 | def enumerate_params(config_file, exclude_expid=[]):
 14 |     with open(config_file, "r") as cfg:
 15 |         config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
 16 |     # tuning space
 17 |     tune_dict = config_dict["tuner_space"]
 18 |     for k, v in tune_dict.items():
 19 |         if not isinstance(v, list):
 20 |             tune_dict[k] = [v]
 21 |     experiment_id = config_dict["base_expid"]
 22 |     if "model_config" in config_dict:
 23 |         model_dict = dict()
 24 |         if 'Base' in config_dict["model_config"]:
 25 |             model_dict.update(config_dict["model_config"]['Base'])
 26 |         model_dict.update(config_dict["model_config"][experiment_id])
 27 |     else:
 28 |         base_config_dir = config_dict.get("base_config", os.path.dirname(config_file))
 29 |         model_dict = load_config(base_config_dir, experiment_id)
 30 | 
 31 |     dataset_id = config_dict.get("dataset_id", model_dict["dataset_id"])
 32 |     
 33 |     if "dataset_config" in config_dict:
 34 |         dataset_dict = config_dict["dataset_config"][dataset_id]
 35 |     else:
 36 |         dataset_dict = load_dataset_config(base_config_dir, dataset_id)
 37 |         
 38 |     if model_dict["dataset_id"] == "TBD": # rename base expid
 39 |         model_dict["dataset_id"] = dataset_id
 40 |         experiment_id = model_dict["model"] + "_" + dataset_id
 41 |         
 42 |     # key checking
 43 |     tuner_keys = set(tune_dict.keys())
 44 |     base_keys = set(model_dict.keys()).union(set(dataset_dict.keys()))
 45 |     if len(tuner_keys - base_keys) > 0:
 46 |         raise RuntimeError("Invalid params in tuner config: {}".format(tuner_keys - base_keys))
 47 | 
 48 |     config_dir = config_file.replace(".yaml", "")
 49 |     if not os.path.exists(config_dir):
 50 |         os.makedirs(config_dir)
 51 | 
 52 |     # enumerate dataset para combinations
 53 |     dataset_dict = {k: tune_dict[k] if k in tune_dict else [v] for k, v in dataset_dict.items()}
 54 |     dataset_para_keys = list(dataset_dict.keys())
 55 |     dataset_para_combs = dict()
 56 |     for idx, values in enumerate(itertools.product(*map(dataset_dict.get, dataset_para_keys))):
 57 |         dataset_params = dict(zip(dataset_para_keys, values))
 58 |         # if dataset_params["data_format"] == "h5":
 59 |         #     dataset_para_combs[dataset_id] = dataset_params
 60 |         # else:
 61 |         hash_id = hashlib.md5(print_to_json(dataset_params).encode("utf-8")).hexdigest()[0:8]
 62 |         dataset_para_combs[dataset_id + "_{}".format(hash_id)] = dataset_params
 63 | 
 64 |     # dump dataset para combinations to config file
 65 |     dataset_config = os.path.join(config_dir, "dataset_config.yaml")
 66 |     with open(dataset_config, "w") as fw:
 67 |         yaml.dump(dataset_para_combs, fw, default_flow_style=None, indent=4)
 68 | 
 69 |     # enumerate model para combinations
 70 |     model_dict = {k: tune_dict[k] if k in tune_dict else [v] for k, v in model_dict.items()}
 71 |     model_para_keys = list(model_dict.keys())
 72 |     model_param_combs = dict()
 73 |     for idx, values in enumerate(itertools.product(*map(model_dict.get, model_para_keys))):
 74 |         model_param_combs[idx + 1] = dict(zip(model_para_keys, values))
 75 |         
 76 |     # update dataset_id into model params
 77 |     merged_param_combs = dict()
 78 |     for idx, item in enumerate(itertools.product(model_param_combs.values(),
 79 |                                                  dataset_para_combs.keys())):
 80 |         para_dict = item[0]
 81 |         para_dict["dataset_id"] = item[1]
 82 |         random_number = ""
 83 |         # if para_dict["debug"]:
 84 |         #     random_number = str(np.random.randint(1e8)) # add a random number to avoid duplicate during debug
 85 |         hash_id = hashlib.md5((print_to_json(para_dict) + random_number).encode("utf-8")).hexdigest()[0:8]
 86 |         hash_expid = experiment_id + "_{:03d}_{}".format(idx + 1, hash_id)
 87 |         if hash_expid not in exclude_expid:
 88 |             merged_param_combs[hash_expid] = para_dict.copy()
 89 | 
 90 |     # dump model para combinations to config file
 91 |     model_config = os.path.join(config_dir, "model_config.yaml")
 92 |     with open(model_config, "w") as fw:
 93 |         yaml.dump(merged_param_combs, fw, default_flow_style=None, indent=4)
 94 |     print("Enumerate all tuner configurations done.")    
 95 |     return config_dir
 96 | 
 97 | def load_experiment_ids(config_dir):
 98 |     model_configs = glob.glob(os.path.join(config_dir, "model_config.yaml"))
 99 |     if not model_configs:
100 |         model_configs = glob.glob(os.path.join(config_dir, "model_config/*.yaml"))
101 |     experiment_id_list = []
102 |     for config in model_configs:
103 |         with open(config, "r") as cfg:
104 |             config_dict = yaml.load(cfg, Loader=yaml.FullLoader)
105 |             experiment_id_list += config_dict.keys()
106 |     return sorted(experiment_id_list)
107 | 
108 | def grid_search(model_name, config_dir, gpu_list, expid_tag=None):
109 |     config_dir = os.path.abspath(config_dir)
110 |     experiment_id_list = load_experiment_ids(config_dir)
111 |     if expid_tag is not None:
112 |         experiment_id_list = [expid for expid in experiment_id_list if str(expid_tag) in expid]
113 |         assert len(experiment_id_list) > 0, "tag={} does not match any expid!".format(expid_tag)
114 |     gpu_list = list(gpu_list)
115 |     idle_queue = list(range(len(gpu_list)))
116 |     processes = dict()
117 |     while len(experiment_id_list) > 0:
118 |         if len(idle_queue) > 0:
119 |             idle_idx = idle_queue.pop(0)
120 |             gpu_id = gpu_list[idle_idx]
121 |             expid = experiment_id_list.pop(0)
122 |             cmd = "python -u {}_benchmark.py --config {} --expid {} --gpu {}"\
123 |                   .format(model_name, config_dir, expid, gpu_id)
124 |             # print("Run cmd:", cmd)
125 |             p = subprocess.Popen(cmd.split(), cwd="../benchmark")
126 |             processes[idle_idx] = p
127 |         else:
128 |             time.sleep(5)
129 |             for idle_idx, p in processes.items():
130 |                 if p.poll() is not None: # terminated
131 |                     idle_queue.append(idle_idx)
132 |     [p.wait() for p in processes.values()]


--------------------------------------------------------------------------------
/common/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import argparse
  4 | import logging
  5 | import glob
  6 | 
  7 | 
  8 | class eval_config:
  9 |     def __init__(self) -> None:
 10 |         self.metrics = {"f1": True, "adjusted_f1": True, "delay": True, "time": True}
 11 | 
 12 | 
 13 | class model_config(eval_config):
 14 |     def __init__(self) -> None:
 15 |         super.__init__()
 16 |         pass
 17 | 
 18 | 
 19 | entities = {
 20 |     "SMD": ["machine-1-{}".format(i) for i in range(1, 9)]
 21 |     + ["machine-2-{}".format(i) for i in range(1, 10)]
 22 |     + ["machine-3-{}".format(i) for i in range(1, 12)],
 23 |     "SMAP": [
 24 |         "P-1",
 25 |         "S-1",
 26 |         "E-1",
 27 |         "E-2",
 28 |         "E-3",
 29 |         "E-4",
 30 |         "E-5",
 31 |         "E-6",
 32 |         "E-7",
 33 |         "E-8",
 34 |         "E-9",
 35 |         "E-10",
 36 |         "E-11",
 37 |         "E-12",
 38 |         "E-13",
 39 |         "A-1",
 40 |         "D-1",
 41 |         "P-2",
 42 |         "P-3",
 43 |         "D-2",
 44 |         "D-3",
 45 |         "D-4",
 46 |         "A-2",
 47 |         "A-3",
 48 |         "A-4",
 49 |         "G-1",
 50 |         "G-2",
 51 |         "D-5",
 52 |         "D-6",
 53 |         "D-7",
 54 |         "F-1",
 55 |         "P-4",
 56 |         "G-3",
 57 |         "T-1",
 58 |         "T-2",
 59 |         "D-8",
 60 |         "D-9",
 61 |         "F-2",
 62 |         "G-4",
 63 |         "T-3",
 64 |         "D-11",
 65 |         "D-12",
 66 |         "B-1",
 67 |         "G-6",
 68 |         "G-7",
 69 |         "P-7",
 70 |         "R-1",
 71 |         "A-5",
 72 |         "A-6",
 73 |         "A-7",
 74 |         "D-13",
 75 |         "P-2",
 76 |         "A-8",
 77 |         "A-9",
 78 |         "F-3",
 79 |     ],
 80 |     "MSL": [
 81 |         "M-6",
 82 |         "M-1",
 83 |         "M-2",
 84 |         "S-2",
 85 |         "P-10",
 86 |         "T-4",
 87 |         "T-5",
 88 |         "F-7",
 89 |         "M-3",
 90 |         "M-4",
 91 |         "M-5",
 92 |         "P-15",
 93 |         "C-1",
 94 |         "C-2",
 95 |         "T-12",
 96 |         "T-13",
 97 |         "F-4",
 98 |         "F-5",
 99 |         "D-14",
100 |         "T-9",
101 |         "P-14",
102 |         "T-8",
103 |         "P-11",
104 |         "D-15",
105 |         "D-16",
106 |         "M-7",
107 |         "F-8",
108 |     ],
109 |     "WADI": ["wadi"],
110 |     "SWAT": ["swat"],
111 |     "WADI_SPLIT": ["wadi-1", "wadi-2", "wadi-3"],  # if OOM occurs
112 | }
113 | 


--------------------------------------------------------------------------------
/common/data_preprocess.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | from collections import defaultdict
  5 | import numpy as np
  6 | from sklearn.preprocessing import (
  7 |     KBinsDiscretizer,
  8 |     MinMaxScaler,
  9 |     RobustScaler,
 10 |     StandardScaler,
 11 | )
 12 | 
 13 | from common.utils import load_hdf5, save_hdf5
 14 | 
 15 | 
 16 | class preprocessor:
 17 |     def __init__(self, model_root):
 18 |         self.model_root = model_root
 19 |         self.vocab_size = None
 20 |         self.discretizer_list = defaultdict(list)
 21 | 
 22 |     def save(self, filepath):
 23 |         filepath = os.path.join(filepath, "preprocessor.pkl")
 24 |         logging.info("Saving preprocessor into {}".format(filepath))
 25 |         with open(filepath, "wb") as fw:
 26 |             pickle.dump(self.__dict__, fw)
 27 | 
 28 |     def load(self, filepath):
 29 |         filepath = os.path.join(filepath, "preprocessor.pkl")
 30 |         logging.info("Loading preprocessor from {}".format(filepath))
 31 |         with open(filepath, "rb") as fw:
 32 |             self.__dict__.update(pickle.load(fw))
 33 | 
 34 |     def normalize(self, data_dict, method="minmax"):
 35 |         if method == "none":
 36 |             return data_dict
 37 |         logging.info("Normalizing data with {}".format(method))
 38 |         normalized_dict = defaultdict(dict)
 39 |         for k, subdata_dict in data_dict.items():
 40 |             # method: minmax, standard, robust
 41 |             # fit_transform using train
 42 |             if method == "minmax":
 43 |                 est = MinMaxScaler()
 44 |             elif method == "standard":
 45 |                 est = StandardScaler()
 46 |             elif method == "robust":
 47 |                 est = RobustScaler()
 48 | 
 49 |             train_ = est.fit_transform(subdata_dict["train"])
 50 |             test_ = est.transform(subdata_dict["test"])
 51 | 
 52 |             # assign back
 53 |             normalized_dict[k]["train"] = train_
 54 |             normalized_dict[k]["test"] = test_
 55 |             for subk in subdata_dict.keys():
 56 |                 if subk not in ["train", "test"]:
 57 |                     normalized_dict[k][subk] = subdata_dict[subk]
 58 |         return normalized_dict
 59 | 
 60 | 
 61 | def get_windows(ts, labels=None, window_size=128, stride=1, dim=None):
 62 |     i = 0
 63 |     ts_len = ts.shape[0]
 64 |     windows = []
 65 |     label_windows = []
 66 |     while i + window_size < ts_len:
 67 |         if dim is not None:
 68 |             windows.append(ts[i : i + window_size, dim])
 69 |         else:
 70 |             windows.append(ts[i : i + window_size])
 71 |         if labels is not None:
 72 |             label_windows.append(labels[i : i + window_size])
 73 |         i += stride
 74 |     if labels is not None:
 75 |         return np.array(windows, dtype=np.float32), np.array(
 76 |             label_windows, dtype=np.float32
 77 |         )
 78 |     else:
 79 |         return np.array(windows, dtype=np.float32), None
 80 | 
 81 | 
 82 | def generate_windows(data_dict, window_size=100, nrows=None, stride=1, **kwargs):
 83 |     logging.info("Generating sliding windows (size {}).".format(window_size))
 84 |     results = defaultdict(dict)
 85 |     for dataname, subdata_dict in data_dict.items():
 86 |         for k in ["train", "valid", "test"]:
 87 |             if k not in subdata_dict: continue
 88 |             data = subdata_dict[k][0:nrows]
 89 |             if k == "train":
 90 |                 data_windows, _ = get_windows(
 91 |                     data, window_size=window_size, stride=stride
 92 |                 )
 93 |                 results[dataname]["train_windows"] = data_windows
 94 |             if k == "valid":
 95 |                 data_windows, _ = get_windows(
 96 |                     data, window_size=window_size, stride=stride
 97 |                 )
 98 |                 results[dataname]["valid_windows"] = data_windows
 99 |             if k == "test":
100 |                 test_label = subdata_dict["test_label"][0:nrows]
101 |                 test_windows, test_label = get_windows(
102 |                     data, test_label, window_size=window_size, stride=1
103 |                 )
104 |                 results[dataname]["test_windows"] = test_windows
105 |                 results[dataname]["test_label"] = test_label
106 |             logging.info("Windows for {} #: {}".format(k, data_windows.shape))
107 | 
108 |     return results
109 | 


--------------------------------------------------------------------------------
/common/dataloader.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import pickle
  4 | import numpy as np
  5 | from collections import defaultdict
  6 | from torch.utils.data import DataLoader, Dataset
  7 | 
  8 | data_path_dict = {
  9 |     "SMD": "./datasets/anomaly/SMD/processed",
 10 |     "SMAP": "./datasets/anomaly/SMAP-MSL/processed_SMAP",
 11 |     "MSL": "./datasets/anomaly/SMAP-MSL/processed_MSL",
 12 |     "WADI": "./datasets/anomaly/WADI/processed",
 13 |     "SWAT": "./datasets/anomaly/SWAT/processed",
 14 |     "WADI_SPLIT": "./datasets/anomaly/WADI_SPLIT/processed",
 15 |     "SWAT_SPLIT": "./datasets/anomaly/SWAT_SPLIT/processed",
 16 | }
 17 | 
 18 | 
 19 | def get_data_dim(dataset):
 20 |     if "SMAP" in dataset:
 21 |         return 25
 22 |     elif "MSL" in dataset:
 23 |         return 55
 24 |     elif "SMD" in dataset:
 25 |         return 38
 26 |     elif "WADI" in dataset:
 27 |         return 93
 28 |     elif "SWAT" in dataset:
 29 |         return 40
 30 |     else:
 31 |         raise ValueError("unknown dataset " + str(dataset))
 32 | 
 33 | 
 34 | def load_dataset(
 35 |     data_root,
 36 |     entities,
 37 |     valid_ratio,
 38 |     dim,
 39 |     test_label_postfix,
 40 |     test_postfix,
 41 |     train_postfix,
 42 |     nan_value=0,
 43 |     nrows=None,
 44 | ):
 45 |     """
 46 |     use_dim: dimension used in multivariate timeseries
 47 |     """
 48 |     logging.info("Loading data from {}".format(data_root))
 49 | 
 50 |     data = defaultdict(dict)
 51 |     total_train_len, total_valid_len, total_test_len = 0, 0, 0
 52 |     for dataname in entities:
 53 |         with open(
 54 |             os.path.join(data_root, "{}_{}".format(dataname, train_postfix)), "rb"
 55 |         ) as f:
 56 |             train = pickle.load(f).reshape((-1, dim))[0:nrows, :]
 57 |             if valid_ratio > 0:
 58 |                 split_idx = int(len(train) * valid_ratio)
 59 |                 train, valid = train[:-split_idx], train[-split_idx:]
 60 |                 data[dataname]["valid"] = np.nan_to_num(valid, nan_value)
 61 |                 total_valid_len += len(valid)
 62 |             data[dataname]["train"] = np.nan_to_num(train, nan_value)
 63 |             total_train_len += len(train)
 64 |         with open(
 65 |             os.path.join(data_root, "{}_{}".format(dataname, test_postfix)), "rb"
 66 |         ) as f:
 67 |             test = pickle.load(f).reshape((-1, dim))[0:nrows, :]
 68 |             data[dataname]["test"] = np.nan_to_num(test, nan_value)
 69 |             total_test_len += len(test)
 70 |         with open(
 71 |             os.path.join(data_root, "{}_{}".format(dataname, test_label_postfix)), "rb"
 72 |         ) as f:
 73 |             data[dataname]["test_label"] = pickle.load(f).reshape(-1)[0:nrows]
 74 |     logging.info("Loading {} entities done.".format(len(entities)))
 75 |     logging.info(
 76 |         "Train/Valid/Test: {}/{}/{} lines.".format(
 77 |             total_train_len, total_valid_len, total_test_len
 78 |         )
 79 |     )
 80 | 
 81 |     return data
 82 | 
 83 | 
 84 | class sliding_window_dataset(Dataset):
 85 |     def __init__(self, data, next_steps=0):
 86 |         self.data = data
 87 |         self.next_steps = next_steps
 88 | 
 89 |     def __getitem__(self, index):
 90 |         if self.next_steps == 0:
 91 |             x = self.data[index]
 92 |             return x
 93 |         else:
 94 |             x = self.data[index, 0 : -self.next_steps]
 95 |             y = self.data[index, -self.next_steps :]
 96 |             return x, y
 97 | 
 98 |     def __len__(self):
 99 |         return len(self.data)
100 | 
101 | 
102 | def get_dataloaders(
103 |     train_data,
104 |     test_data,
105 |     valid_data=None,
106 |     next_steps=0,
107 |     batch_size=32,
108 |     shuffle=True,
109 |     num_workers=1,
110 | ):
111 | 
112 |     train_loader = DataLoader(
113 |         sliding_window_dataset(train_data, next_steps),
114 |         batch_size=batch_size,
115 |         shuffle=shuffle,
116 |         num_workers=num_workers,
117 |     )
118 | 
119 |     test_loader = DataLoader(
120 |         sliding_window_dataset(test_data, next_steps),
121 |         batch_size=batch_size,
122 |         shuffle=False,
123 |         num_workers=num_workers,
124 |     )
125 | 
126 |     if valid_data is not None:
127 |         valid_loader = DataLoader(
128 |             sliding_window_dataset(valid_data, next_steps),
129 |             batch_size=batch_size,
130 |             shuffle=shuffle,
131 |             num_workers=num_workers,
132 |         )
133 |     else:
134 |         valid_loader = None
135 |     return train_loader, valid_loader, test_loader
136 | 


--------------------------------------------------------------------------------
/common/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .point_adjustment import *
2 | from .spot import *
3 | from .metrics import *
4 | from .thresholding import *
5 | from .eval_pipline import *
6 | from .time_tracker import *
7 | 


--------------------------------------------------------------------------------
/common/evaluation/metrics.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import numpy as np
 3 | from sklearn.metrics import f1_score, precision_score, recall_score
 4 | from common.evaluation import adjust_pred
 5 | 
 6 | 
 7 | def compute_binary_metrics(anomaly_pred, anomaly_label, adjustment=False):
 8 |     if not adjustment:
 9 |         eval_anomaly_pred = anomaly_pred
10 |         metrics = {
11 |             "f1": f1_score(eval_anomaly_pred, anomaly_label),
12 |             "pc": precision_score(eval_anomaly_pred, anomaly_label),
13 |             "rc": recall_score(eval_anomaly_pred, anomaly_label),
14 |         }
15 |     else:
16 |         eval_anomaly_pred = adjust_pred(anomaly_pred, anomaly_label)
17 |         metrics = {
18 |             "f1_adjusted": f1_score(eval_anomaly_pred, anomaly_label),
19 |             "pc_adjusted": precision_score(eval_anomaly_pred, anomaly_label),
20 |             "rc_adjusted": recall_score(eval_anomaly_pred, anomaly_label),
21 |         }
22 |     return metrics
23 | 
24 | 
25 | def compute_delay(anomaly_pred, anomaly_label):
26 |     def onehot2interval(arr):
27 |         result = []
28 |         record = False
29 |         for idx, item in enumerate(arr):
30 |             if item == 1 and not record:
31 |                 start = idx
32 |                 record = True
33 |             if item == 0 and record:
34 |                 end = idx  # not include the end point, like [a,b)
35 |                 record = False
36 |                 result.append((start, end))
37 |         return result
38 | 
39 |     count = 0
40 |     total_delay = 0
41 |     pred = np.array(anomaly_pred)
42 |     label = np.array(anomaly_label)
43 |     for start, end in onehot2interval(label):
44 |         pred_interval = pred[start:end]
45 |         if pred_interval.sum() > 0:
46 |             delay = np.where(pred_interval == 1)[0][0]
47 |             delay = delay / len(pred_interval)  # normalized by the interval
48 |             total_delay += delay
49 |             count += 1
50 |     avg_delay = total_delay / (1e-6 + count)
51 |     return avg_delay
52 | 


--------------------------------------------------------------------------------
/common/evaluation/point_adjustment.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | 
 4 | def adjust_pred(pred, label):
 5 |     """
 6 |     Borrow from https://github.com/NetManAIOps/OmniAnomaly/blob/master/omni_anomaly/eval_methods.py
 7 |     """
 8 |     adjusted_pred = copy.deepcopy(pred)
 9 | 
10 |     anomaly_state = False
11 |     anomaly_count = 0
12 |     latency = 0
13 |     for i in range(len(adjusted_pred)):
14 |         if label[i] and adjusted_pred[i] and not anomaly_state:
15 |             anomaly_state = True
16 |             anomaly_count += 1
17 |             for j in range(i, 0, -1):
18 |                 if not label[j]:
19 |                     break
20 |                 else:
21 |                     if not adjusted_pred[j]:
22 |                         adjusted_pred[j] = True
23 |                         latency += 1
24 |         elif not label[i]:
25 |             anomaly_state = False
26 |         if anomaly_state:
27 |             adjusted_pred[i] = True
28 |     return adjusted_pred
29 | 


--------------------------------------------------------------------------------
/common/evaluation/thresholding.py:
--------------------------------------------------------------------------------
  1 | from cgi import print_form
  2 | import logging
  3 | import numpy as np
  4 | import more_itertools as mit
  5 | from .metrics import compute_binary_metrics
  6 | from .spot import SPOT
  7 | 
  8 | 
  9 | def pot_th(train_anomaly_score, anomaly_score, q=1e-3, level=0.99, dynamic=False):
 10 |     """
 11 |     Run POT method on given score.
 12 |     :param init_score (np.ndarray): The data to get init threshold.
 13 |                     For `OmniAnomaly`, it should be the anomaly score of train set.
 14 |     :param: score (np.ndarray): The data to run POT method.
 15 |                     For `OmniAnomaly`, it should be the anomaly score of test set.
 16 |     :param label (np.ndarray): boolean list of true anomalies in score
 17 |     :param q (float): Detection level (risk)
 18 |     :param level (float): Probability associated with the initial threshold t
 19 |     :return dict: pot result dict
 20 |     Method from OmniAnomaly (https://github.com/NetManAIOps/OmniAnomaly)
 21 |     """
 22 |     logging.info(f"Computing the threshold using POT with q={q}, level={level}...")
 23 |     logging.info(
 24 |         "[POT] Train score max: {}, min: {}".format(
 25 |             train_anomaly_score.max(), train_anomaly_score.min()
 26 |         )
 27 |     )
 28 |     logging.info(
 29 |         "[POT] Test score max: {}, min: {}".format(
 30 |             anomaly_score.max(), anomaly_score.min()
 31 |         )
 32 |     )
 33 |     print(train_anomaly_score.shape, anomaly_score.shape)
 34 | 
 35 |     pot_th = None
 36 |     if not isinstance(level, list):
 37 |         level = [level]
 38 |     for l in level:
 39 |         try:
 40 |             s = SPOT(q)  # SPOT object
 41 |             s.fit(train_anomaly_score, anomaly_score)
 42 |             s.initialize(level=l, min_extrema=False)  # Calibration step
 43 |             ret = s.run(dynamic=dynamic, with_alarm=False)
 44 |             pot_th = np.mean(ret["thresholds"])
 45 |             logging.info(f"Hit level={l}")
 46 |             break
 47 |         except:
 48 |             pass
 49 |     if pot_th is None:
 50 |         pot_th = np.percentile(anomaly_score, level[0] * 100)
 51 |         logging.info(
 52 |             "POT cannot find the threshold, use {}% percentile {}".format(
 53 |                 level[0] * 100, pot_th
 54 |             )
 55 |         )
 56 |     return pot_th
 57 | 
 58 | 
 59 | def eps_th(train_anomaly_score, reg_level=1):
 60 |     """
 61 |     Threshold method proposed by Hundman et. al. (https://arxiv.org/abs/1802.04431)
 62 |     Code from TelemAnom (https://github.com/khundman/telemanom)
 63 |     """
 64 |     logging.info("Computing the threshold with eps...")
 65 |     e_s = train_anomaly_score
 66 |     best_epsilon = None
 67 |     max_score = -10000000
 68 |     mean_e_s = np.mean(e_s)
 69 |     sd_e_s = np.std(e_s)
 70 | 
 71 |     for z in np.arange(2.5, 12, 0.5):
 72 |         epsilon = mean_e_s + sd_e_s * z
 73 |         pruned_e_s = e_s[e_s < epsilon]
 74 | 
 75 |         i_anom = np.argwhere(e_s >= epsilon).reshape(
 76 |             -1,
 77 |         )
 78 |         buffer = np.arange(1, 50)
 79 |         i_anom = np.sort(
 80 |             np.concatenate(
 81 |                 (
 82 |                     i_anom,
 83 |                     np.array([i + buffer for i in i_anom]).flatten(),
 84 |                     np.array([i - buffer for i in i_anom]).flatten(),
 85 |                 )
 86 |             )
 87 |         )
 88 |         i_anom = i_anom[(i_anom < len(e_s)) & (i_anom >= 0)]
 89 |         i_anom = np.sort(np.unique(i_anom))
 90 | 
 91 |         if len(i_anom) > 0:
 92 |             groups = [list(group) for group in mit.consecutive_groups(i_anom)]
 93 |             # E_seq = [(g[0], g[-1]) for g in groups if not g[0] == g[-1]]
 94 | 
 95 |             mean_perc_decrease = (mean_e_s - np.mean(pruned_e_s)) / mean_e_s
 96 |             sd_perc_decrease = (sd_e_s - np.std(pruned_e_s)) / sd_e_s
 97 |             if reg_level == 0:
 98 |                 denom = 1
 99 |             elif reg_level == 1:
100 |                 denom = len(i_anom)
101 |             elif reg_level == 2:
102 |                 denom = len(i_anom) ** 2
103 | 
104 |             score = (mean_perc_decrease + sd_perc_decrease) / denom
105 | 
106 |             if score >= max_score and len(i_anom) < (len(e_s) * 0.5):
107 |                 max_score = score
108 |                 best_epsilon = epsilon
109 | 
110 |     if best_epsilon is None:
111 |         best_epsilon = np.max(e_s)
112 |     return best_epsilon
113 | 
114 | 
115 | def best_th(
116 |     anomaly_score,
117 |     anomaly_label,
118 |     target_metric="f1",
119 |     target_direction="max",
120 |     point_adjustment=False,
121 | ):
122 |     logging.info("Searching for the best threshod..")
123 |     search_range = np.linspace(0, 1, 100)
124 |     search_history = []
125 |     if point_adjustment:
126 |         target_metric = target_metric + "_adjusted"
127 | 
128 |     for anomaly_percent in search_range:
129 |         theta = np.percentile(anomaly_score, 100 * (1 - anomaly_percent))
130 |         pred = (anomaly_score >= theta).astype(int)
131 | 
132 |         metric_dict = compute_binary_metrics(pred, anomaly_label, point_adjustment)
133 |         current_value = metric_dict[target_metric]
134 | 
135 |         logging.debug(f"th={theta}, {target_metric}={current_value}")
136 | 
137 |         search_history.append(
138 |             {
139 |                 "best_value": current_value,
140 |                 "best_theta": theta,
141 |                 "target_metric": target_metric,
142 |                 "target_direction": target_direction,
143 |             }
144 |         )
145 | 
146 |     result = (
147 |         max(search_history, key=lambda x: x["best_value"])
148 |         if target_direction == "max"
149 |         else min(search_history, key=lambda x: x["best_value"])
150 |     )
151 |     return result["best_theta"]
152 | 


--------------------------------------------------------------------------------
/common/evaluation/time_tracker.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class TimeTracker:
 5 |     def __init__(self, nb_epoch=1):
 6 |         self.train_time = 0
 7 |         self.test_time = 0
 8 |         self.nb_epoch = nb_epoch
 9 | 
10 |     def train_start(self):
11 |         self.s_train = time.time()
12 | 
13 |     def train_end(self):
14 |         self.e_train = time.time()
15 |         self.train_time = self.e_train - self.s_train
16 | 
17 |     def test_start(self):
18 |         self.s_test = time.time()
19 | 
20 |     def test_end(self):
21 |         self.e_test = time.time()
22 |         self.test_time = self.e_test - self.s_test
23 | 
24 |     def get_data(self):
25 |         return {
26 |             "train_time": self.train_time,
27 |             "test_time": self.test_time,
28 |             "nb_epoch": self.nb_epoch,
29 |         }
30 | 


--------------------------------------------------------------------------------
/common/exp.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import yaml
 3 | import json
 4 | import os
 5 | from .utils import save_hdf5
 6 | 
 7 | 
 8 | BENCHMARK_DIR = "./benchmark_results"
 9 | 
10 | 
11 | def json_pretty_dump(obj, filename):
12 |     with open(filename, "w") as fw:
13 |         json.dump(
14 |             {str(k): str(v) for k, v in obj.items()},
15 |             fw,
16 |             sort_keys=True,
17 |             indent=4,
18 |             separators=(",", ": "),
19 |             ensure_ascii=False,
20 |         )
21 | 
22 | 
23 | def store_entity(
24 |     params,
25 |     entity,
26 |     train_anomaly_score,
27 |     anomaly_score,
28 |     anomaly_label,
29 |     eval_results={},
30 |     time_tracker={},
31 | ):
32 |     exp_folder = params["model_root"]
33 |     entity_folder = os.path.join(exp_folder, entity)
34 |     os.makedirs(entity_folder, exist_ok=True)
35 | 
36 |     # save params
37 |     with open(os.path.join(exp_folder, "params.yaml"), "w") as fw:
38 |         yaml.dump(params, fw)
39 | 
40 |     # save results
41 |     json_pretty_dump(eval_results, os.path.join(entity_folder, "eval_results.json"))
42 | 
43 |     # save time
44 |     json_pretty_dump(time_tracker, os.path.join(entity_folder, "time.json"))
45 | 
46 |     # save scores
47 |     score_dict = {
48 |         "anomaly_label": anomaly_label,
49 |         "anomaly_score": anomaly_score,
50 |         "train_anomaly_score": train_anomaly_score,
51 |     }
52 |     save_hdf5(os.path.join(entity_folder, f"score_{entity}.hdf5"), score_dict)
53 | 
54 |     logging.info(f"Saving results for {entity} done.")
55 | 
56 | 


--------------------------------------------------------------------------------
/doc/mtad_metrics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpsPAI/MTAD/63a601f45a088f12f8e3acf7bb887b2bfdd30ff2/doc/mtad_metrics.png


--------------------------------------------------------------------------------
/networks/InterFusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .wrapper import *
2 | 


--------------------------------------------------------------------------------
/networks/InterFusion/algorithm/__init__.py:
--------------------------------------------------------------------------------
1 | from .recurrent_distribution import RecurrentDistribution
2 | from .real_nvp import dense_real_nvp
3 | from .utils import *
4 | from .mcmc_recons import *


--------------------------------------------------------------------------------
/networks/InterFusion/algorithm/mcmc_recons.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | __all__ = ['masked_reconstruct', 'mcmc_reconstruct']
 5 | 
 6 | 
 7 | def masked_reconstruct(reconstruct, x, u, mask, name=None):
 8 |     """
 9 |     Replace masked elements of `x` with reconstructed outputs.
10 |     The potential anomaly points on x can be masked, and replaced by the reconstructed values.
11 |     This can make the reconstruction more likely to be the normal pattern x should follow.
12 |     Args:
13 |         reconstruct ((tf.Tensor, tf.Tensor, tf.Tensor) -> tf.Tensor): Function for reconstructing `x`.
14 |         x: The tensor to be reconstructed by `func`.
15 |         u: Additional input for reconstructing `x`.
16 |         mask: (tf.Tensor) mask, must be broadcastable into the shape of `x`.
17 |             Indicating whether or not to mask each element of `x`.
18 |         name (str): Name of this operation in TensorFlow graph.
19 |             (default "masked_reconstruct")
20 |     Returns:
21 |         tf.Tensor: `x` with masked elements replaced by reconstructed outputs.
22 |     """
23 |     with tf.name_scope(name, default_name='masked_reconstruct'):
24 |         x = tf.convert_to_tensor(x)  # type: tf.Tensor
25 |         mask = tf.convert_to_tensor(mask, dtype=tf.int32)  # type: tf.Tensor
26 | 
27 |         mask = tf.broadcast_to(mask, tf.shape(x))
28 | 
29 |         # get reconstructed x. Currently only support mask the last point if pixelcnn decoder is used.
30 |         x_recons = reconstruct(x, u, mask)
31 | 
32 |         # get masked outputs
33 |         return tf.where(tf.cast(mask, dtype=tf.bool), x_recons, x)
34 | 
35 | 
36 | def mcmc_reconstruct(reconstruct, x, u, mask, iter_count,
37 |                                  back_prop=True, name=None):
38 |     """
39 |     Iteratively reconstruct `x` with `mask` for `iter_count` times.
40 |     This method will call :func:`masked_reconstruct` for `iter_count` times,
41 |     with the output from previous iteration as the input `x` for the next
42 |     iteration.  The output of the final iteration would be returned.
43 |     Args:
44 |         reconstruct: Function for reconstructing `x`.
45 |         x: The tensor to be reconstructed by `func`.
46 |         u: Additional input for reconstructing `x`.
47 |         mask: (tf.Tensor) mask, must be broadcastable into the shape of `x`.
48 |             Indicating whether or not to mask each element of `x`.
49 |         iter_count (int or tf.Tensor):
50 |             Number of mcmc iterations(must be greater than 1).
51 |         back_prop (bool): Whether or not to support back-propagation through
52 |             all the iterations? (default :obj:`True`)
53 |         name (str): Name of this operation in TensorFlow graph.
54 |             (default "iterative_masked_reconstruct")
55 |     Returns:
56 |         tf.Tensor: The iteratively reconstructed `x`.
57 |     """
58 |     with tf.name_scope(name, default_name='mcmc_reconstruct'):
59 | 
60 |         # do the masked reconstructions
61 |         x_recons, _ = tf.while_loop(
62 |             lambda x_i, i: i < iter_count,
63 |             lambda x_i, i: (masked_reconstruct(reconstruct, x_i, u, mask), i + 1),
64 |             [x, tf.constant(0, dtype=tf.int32)],
65 |             back_prop=back_prop
66 |         )
67 | 
68 |         return x_recons
69 | 


--------------------------------------------------------------------------------
/networks/InterFusion/algorithm/real_nvp.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tfsnippet as spt
  3 | from tensorflow.contrib.framework import arg_scope
  4 | import numpy as np
  5 | from tfsnippet.layers.flows.utils import ZeroLogDet
  6 | 
  7 | 
  8 | class FeatureReversingFlow(spt.layers.FeatureMappingFlow):
  9 | 
 10 |     def __init__(self, axis=-1, value_ndims=1, name=None, scope=None):
 11 |         super(FeatureReversingFlow, self).__init__(
 12 |             axis=int(axis), value_ndims=value_ndims, name=name, scope=scope)
 13 | 
 14 |     @property
 15 |     def explicitly_invertible(self):
 16 |         return True
 17 | 
 18 |     def _build(self, input=None):
 19 |         pass
 20 | 
 21 |     def _reverse_feature(self, x, compute_y, compute_log_det):
 22 |         n_features = spt.utils.get_static_shape(x)[self.axis]
 23 |         if n_features is None:
 24 |             raise ValueError('The feature dimension must be fixed.')
 25 |         assert (0 > self.axis >= -self.value_ndims >=
 26 |                 -len(spt.utils.get_static_shape(x)))
 27 |         permutation = np.asarray(list(reversed(range(n_features))),
 28 |                                  dtype=np.int32)
 29 | 
 30 |         # compute y
 31 |         y = None
 32 |         if compute_y:
 33 |             y = tf.gather(x, permutation, axis=self.axis)
 34 | 
 35 |         # compute log_det
 36 |         log_det = None
 37 |         if compute_log_det:
 38 |             log_det = ZeroLogDet(spt.utils.get_shape(x)[:-self.value_ndims],
 39 |                                  x.dtype.base_dtype)
 40 | 
 41 |         return y, log_det
 42 | 
 43 |     def _transform(self, x, compute_y, compute_log_det):
 44 |         return self._reverse_feature(x, compute_y, compute_log_det)
 45 | 
 46 |     def _inverse_transform(self, y, compute_x, compute_log_det):
 47 |         return self._reverse_feature(y, compute_x, compute_log_det)
 48 | 
 49 | 
 50 | def dense_real_nvp(flow_depth: int,
 51 |                    activation,
 52 |                    kernel_regularizer,
 53 |                    scope: str,
 54 |                    use_invertible_flow=True,
 55 |                    strict_invertible=False,
 56 |                    use_actnorm_flow=False,
 57 |                    dense_coupling_n_hidden_layers=1,
 58 |                    dense_coupling_n_hidden_units=100,
 59 |                    coupling_scale_shift_initializer='zero',     # 'zero' or 'normal'
 60 |                    coupling_scale_shift_normal_initializer_stddev=0.001,
 61 |                    coupling_scale_type='sigmoid',               # 'sigmoid' or 'exp'
 62 |                    coupling_sigmoid_scale_bias=2.,
 63 |                    is_prior_flow=False) -> spt.layers.BaseFlow:
 64 |     def shift_and_scale(x1, n2):
 65 | 
 66 |         with arg_scope([spt.layers.dense],
 67 |                        activation_fn=activation,
 68 |                        kernel_regularizer=kernel_regularizer):
 69 |             h = x1
 70 |             for j in range(dense_coupling_n_hidden_layers):
 71 |                 h = spt.layers.dense(h,
 72 |                                      units=dense_coupling_n_hidden_units,
 73 |                                      scope='hidden_{}'.format(j))
 74 | 
 75 |         # compute shift and scale
 76 |         if coupling_scale_shift_initializer == 'zero':
 77 |             pre_params_initializer = tf.zeros_initializer()
 78 |         else:
 79 |             pre_params_initializer = tf.random_normal_initializer(
 80 |                 stddev=coupling_scale_shift_normal_initializer_stddev)
 81 |         pre_params = spt.layers.dense(h,
 82 |                                       units=n2 * 2,
 83 |                                       kernel_initializer=pre_params_initializer,
 84 |                                       scope='shift_and_scale',)
 85 | 
 86 |         shift = pre_params[..., :n2]
 87 |         scale = pre_params[..., n2:]
 88 | 
 89 |         return shift, scale
 90 | 
 91 |     with tf.variable_scope(scope):
 92 |         flows = []
 93 |         for i in range(flow_depth):
 94 |             level = []
 95 |             if use_invertible_flow:
 96 |                 level.append(
 97 |                     spt.layers.InvertibleDense(
 98 |                         strict_invertible=strict_invertible)
 99 |                 )
100 |             else:
101 |                 level.append(FeatureReversingFlow())
102 |             level.append(
103 |                 spt.layers.CouplingLayer(
104 |                     tf.make_template(
105 |                         'coupling', shift_and_scale, create_scope_now_=True),
106 |                     scale_type=coupling_scale_type,
107 |                     sigmoid_scale_bias=coupling_sigmoid_scale_bias,
108 |                 )
109 |             )
110 |             if use_actnorm_flow:
111 |                 level.append(spt.layers.ActNorm())
112 |             flows.extend(level)
113 |         flow = spt.layers.SequentialFlow(flows)
114 | 
115 |     if is_prior_flow:
116 |         flow = flow.invert()
117 | 
118 |     return flow
119 | 


--------------------------------------------------------------------------------
/networks/InterFusion/wrapper.py:
--------------------------------------------------------------------------------
 1 | from .train import ExpConfig, TrainConfig, fit
 2 | from .predict import predict_prob
 3 | 
 4 | 
 5 | class InterFusion:
 6 |     def __init__(self, dataset, model_root, dim):
 7 |         self.dataset = dataset
 8 |         self.model_root = model_root
 9 |         self.dim = dim
10 |         self.train_exp = None
11 | 
12 |     def fit(
13 |         self,
14 |         x_train,
15 |         x_valid,
16 |         lr,
17 |         window_size,
18 |         batch_size,
19 |         pretrain_max_epoch,
20 |         max_epoch,
21 |     ):
22 |         self.train_exp = fit(
23 |             self.dataset,
24 |             self.model_root,
25 |             x_train,
26 |             x_valid,
27 |             self.dim,
28 |             lr,
29 |             window_size,
30 |             batch_size,
31 |             pretrain_max_epoch,
32 |             max_epoch,
33 |         )
34 | 
35 |     def predict_prob(self, x_test, y_test):
36 |         return predict_prob(x_test, y_test, self.train_exp.config, self.model_root)
37 | 


--------------------------------------------------------------------------------
/networks/RANS/__init__.py:
--------------------------------------------------------------------------------
1 | import imp
2 | from .main import *
3 | 


--------------------------------------------------------------------------------
/networks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | 
 9 | #   http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 | # Unless required by applicable law or agreed to in writing,
12 | # software distributed under the License is distributed on an
13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 | # KIND, either express or implied.  See the License for the
15 | # specific language governing permissions and limitations
16 | # under the License.
17 | 
18 | 
19 | # import pkgutil
20 | 
21 | # __all__ = []
22 | # for loader, module_name, is_pkg in pkgutil.walk_packages(__path__):
23 | #     __all__.append(module_name)
24 | #     module = loader.find_module(module_name).load_module(module_name)
25 | #     exec('%s = module' % module_name)
26 | 


--------------------------------------------------------------------------------
/networks/anomaly_transformer/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpsPAI/MTAD/63a601f45a088f12f8e3acf7bb887b2bfdd30ff2/networks/anomaly_transformer/__init__.py


--------------------------------------------------------------------------------
/networks/anomaly_transformer/model/AnomalyTransformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from .attn import AnomalyAttention, AttentionLayer
  6 | from .embed import DataEmbedding
  7 | 
  8 | 
  9 | class EncoderLayer(nn.Module):
 10 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
 11 |         super(EncoderLayer, self).__init__()
 12 |         d_ff = d_ff or 4 * d_model
 13 |         self.attention = attention
 14 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 15 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 16 |         self.norm1 = nn.LayerNorm(d_model)
 17 |         self.norm2 = nn.LayerNorm(d_model)
 18 |         self.dropout = nn.Dropout(dropout)
 19 |         self.activation = F.relu if activation == "relu" else F.gelu
 20 | 
 21 |     def forward(self, x, attn_mask=None):
 22 |         new_x, attn, mask, sigma = self.attention(x, x, x, attn_mask=attn_mask)
 23 |         x = x + self.dropout(new_x)
 24 |         y = x = self.norm1(x)
 25 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
 26 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
 27 | 
 28 |         return self.norm2(x + y), attn, mask, sigma
 29 | 
 30 | 
 31 | class Encoder(nn.Module):
 32 |     def __init__(self, attn_layers, norm_layer=None):
 33 |         super(Encoder, self).__init__()
 34 |         self.attn_layers = nn.ModuleList(attn_layers)
 35 |         self.norm = norm_layer
 36 | 
 37 |     def forward(self, x, attn_mask=None):
 38 |         # x [B, L, D]
 39 |         series_list = []
 40 |         prior_list = []
 41 |         sigma_list = []
 42 |         for attn_layer in self.attn_layers:
 43 |             x, series, prior, sigma = attn_layer(x, attn_mask=attn_mask)
 44 |             series_list.append(series)
 45 |             prior_list.append(prior)
 46 |             sigma_list.append(sigma)
 47 | 
 48 |         if self.norm is not None:
 49 |             x = self.norm(x)
 50 | 
 51 |         return x, series_list, prior_list, sigma_list
 52 | 
 53 | 
 54 | class Anomaly_Transformer(nn.Module):
 55 |     def __init__(
 56 |         self,
 57 |         win_size,
 58 |         device,
 59 |         enc_in,
 60 |         c_out,
 61 |         d_model=512,
 62 |         n_heads=8,
 63 |         e_layers=3,
 64 |         d_ff=512,
 65 |         dropout=0.0,
 66 |         activation="gelu",
 67 |         output_attention=True,
 68 |     ):
 69 |         super(Anomaly_Transformer, self).__init__()
 70 |         self.output_attention = output_attention
 71 | 
 72 |         # Encoding
 73 |         self.embedding = DataEmbedding(enc_in, d_model, dropout)
 74 | 
 75 |         # Encoder
 76 |         self.encoder = Encoder(
 77 |             [
 78 |                 EncoderLayer(
 79 |                     AttentionLayer(
 80 |                         AnomalyAttention(
 81 |                             win_size,
 82 |                             device,
 83 |                             False,
 84 |                             attention_dropout=dropout,
 85 |                             output_attention=output_attention,
 86 |                         ),
 87 |                         d_model,
 88 |                         n_heads,
 89 |                     ),
 90 |                     d_model,
 91 |                     d_ff,
 92 |                     dropout=dropout,
 93 |                     activation=activation,
 94 |                 )
 95 |                 for l in range(e_layers)
 96 |             ],
 97 |             norm_layer=torch.nn.LayerNorm(d_model),
 98 |         )
 99 | 
100 |         self.projection = nn.Linear(d_model, c_out, bias=True)
101 | 
102 |     def forward(self, x):
103 |         enc_out = self.embedding(x)
104 |         enc_out, series, prior, sigmas = self.encoder(enc_out)
105 |         enc_out = self.projection(enc_out)
106 | 
107 |         if self.output_attention:
108 |             return enc_out, series, prior, sigmas
109 |         else:
110 |             return enc_out  # [B, L, D]
111 | 


--------------------------------------------------------------------------------
/networks/anomaly_transformer/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpsPAI/MTAD/63a601f45a088f12f8e3acf7bb887b2bfdd30ff2/networks/anomaly_transformer/model/__init__.py


--------------------------------------------------------------------------------
/networks/anomaly_transformer/model/attn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | import math
  5 | from math import sqrt
  6 | 
  7 | 
  8 | class TriangularCausalMask:
  9 |     def __init__(self, B, L, device="cpu"):
 10 |         mask_shape = [B, 1, L, L]
 11 |         with torch.no_grad():
 12 |             self._mask = torch.triu(
 13 |                 torch.ones(mask_shape, dtype=torch.bool), diagonal=1
 14 |             ).to(device)
 15 | 
 16 |     @property
 17 |     def mask(self):
 18 |         return self._mask
 19 | 
 20 | 
 21 | class AnomalyAttention(nn.Module):
 22 |     def __init__(
 23 |         self,
 24 |         win_size,
 25 |         device,
 26 |         mask_flag=True,
 27 |         scale=None,
 28 |         attention_dropout=0.0,
 29 |         output_attention=False,
 30 |     ):
 31 |         super(AnomalyAttention, self).__init__()
 32 |         self.device = device
 33 |         self.scale = scale
 34 |         self.mask_flag = mask_flag
 35 |         self.output_attention = output_attention
 36 |         self.dropout = nn.Dropout(attention_dropout)
 37 |         window_size = win_size
 38 |         self.distances = torch.zeros((window_size, window_size)).to(self.device)
 39 |         for i in range(window_size):
 40 |             for j in range(window_size):
 41 |                 self.distances[i][j] = abs(i - j)
 42 | 
 43 |     def forward(self, queries, keys, values, sigma, attn_mask):
 44 |         B, L, H, E = queries.shape
 45 |         _, S, _, D = values.shape
 46 |         scale = self.scale or 1.0 / sqrt(E)
 47 | 
 48 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys)
 49 |         if self.mask_flag:
 50 |             if attn_mask is None:
 51 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 52 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 53 |         attn = scale * scores
 54 | 
 55 |         sigma = sigma.transpose(1, 2)  # B L H ->  B H L
 56 |         window_size = attn.shape[-1]
 57 |         sigma = torch.sigmoid(sigma * 5) + 1e-5
 58 |         sigma = torch.pow(3, sigma) - 1
 59 |         sigma = sigma.unsqueeze(-1).repeat(1, 1, 1, window_size)  # B H L L
 60 |         prior = (
 61 |             self.distances.unsqueeze(0)
 62 |             .unsqueeze(0)
 63 |             .repeat(sigma.shape[0], sigma.shape[1], 1, 1)
 64 |             .to(self.device)
 65 |         )
 66 |         prior = (
 67 |             1.0
 68 |             / (math.sqrt(2 * math.pi) * sigma)
 69 |             * torch.exp(-(prior ** 2) / 2 / (sigma ** 2))
 70 |         )
 71 | 
 72 |         series = self.dropout(torch.softmax(attn, dim=-1))
 73 |         V = torch.einsum("bhls,bshd->blhd", series, values)
 74 | 
 75 |         if self.output_attention:
 76 |             return (V.contiguous(), series, prior, sigma)
 77 |         else:
 78 |             return (V.contiguous(), None)
 79 | 
 80 | 
 81 | class AttentionLayer(nn.Module):
 82 |     def __init__(self, attention, d_model, n_heads, d_keys=None, d_values=None):
 83 |         super(AttentionLayer, self).__init__()
 84 | 
 85 |         d_keys = d_keys or (d_model // n_heads)
 86 |         d_values = d_values or (d_model // n_heads)
 87 |         self.norm = nn.LayerNorm(d_model)
 88 |         self.inner_attention = attention
 89 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
 90 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
 91 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
 92 |         self.sigma_projection = nn.Linear(d_model, n_heads)
 93 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
 94 | 
 95 |         self.n_heads = n_heads
 96 | 
 97 |     def forward(self, queries, keys, values, attn_mask):
 98 |         B, L, _ = queries.shape
 99 |         _, S, _ = keys.shape
100 |         H = self.n_heads
101 |         x = queries
102 |         queries = self.query_projection(queries).view(B, L, H, -1)
103 |         keys = self.key_projection(keys).view(B, S, H, -1)
104 |         values = self.value_projection(values).view(B, S, H, -1)
105 |         sigma = self.sigma_projection(x).view(B, L, H)
106 | 
107 |         out, series, prior, sigma = self.inner_attention(
108 |             queries, keys, values, sigma, attn_mask
109 |         )
110 |         out = out.view(B, L, -1)
111 | 
112 |         return self.out_projection(out), series, prior, sigma
113 | 


--------------------------------------------------------------------------------
/networks/anomaly_transformer/model/embed.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.nn.utils import weight_norm
 5 | import math
 6 | 
 7 | 
 8 | class PositionalEmbedding(nn.Module):
 9 |     def __init__(self, d_model, max_len=5000):
10 |         super(PositionalEmbedding, self).__init__()
11 |         # Compute the positional encodings once in log space.
12 |         pe = torch.zeros(max_len, d_model).float()
13 |         pe.require_grad = False
14 | 
15 |         position = torch.arange(0, max_len).float().unsqueeze(1)
16 |         div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
17 | 
18 |         pe[:, 0::2] = torch.sin(position * div_term)
19 |         pe[:, 1::2] = torch.cos(position * div_term)
20 | 
21 |         pe = pe.unsqueeze(0)
22 |         self.register_buffer('pe', pe)
23 | 
24 |     def forward(self, x):
25 |         return self.pe[:, :x.size(1)]
26 | 
27 | 
28 | class TokenEmbedding(nn.Module):
29 |     def __init__(self, c_in, d_model):
30 |         super(TokenEmbedding, self).__init__()
31 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
32 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
33 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
34 |         for m in self.modules():
35 |             if isinstance(m, nn.Conv1d):
36 |                 nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='leaky_relu')
37 | 
38 |     def forward(self, x):
39 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
40 |         return x
41 | 
42 | 
43 | class DataEmbedding(nn.Module):
44 |     def __init__(self, c_in, d_model, dropout=0.0):
45 |         super(DataEmbedding, self).__init__()
46 | 
47 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
48 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
49 | 
50 |         self.dropout = nn.Dropout(p=dropout)
51 | 
52 |     def forward(self, x):
53 |         x = self.value_embedding(x) + self.position_embedding(x)
54 |         return self.dropout(x)
55 | 


--------------------------------------------------------------------------------
/networks/dagmm/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .compression_net import CompressionNet
4 | from .estimation_net import EstimationNet
5 | from .gmm import GMM
6 | from .dagmm import DAGMM
7 | 


--------------------------------------------------------------------------------
/networks/dagmm/compression_net.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | 
  3 | class CompressionNet:
  4 |     """ Compression Network.
  5 |     This network converts the input data to the representations
  6 |     suitable for calculation of anormaly scores by "Estimation Network".
  7 | 
  8 |     Outputs of network consist of next 2 components:
  9 |     1) reduced low-dimensional representations learned by AutoEncoder.
 10 |     2) the features derived from reconstruction error.
 11 |     """
 12 |     def __init__(self, hidden_layer_sizes, activation=tf.nn.tanh):
 13 |         """
 14 |         Parameters
 15 |         ----------
 16 |         hidden_layer_sizes : list of int
 17 |             list of the size of hidden layers.
 18 |             For example, if the sizes are [n1, n2],
 19 |             the sizes of created networks are:
 20 |             input_size -> n1 -> n2 -> n1 -> input_sizes
 21 |             (network outputs the representation of "n2" layer)
 22 |         activation : function
 23 |             activation function of hidden layer.
 24 |             the last layer uses linear function.
 25 |         """
 26 |         self.hidden_layer_sizes = hidden_layer_sizes
 27 |         self.activation = activation
 28 | 
 29 |     def compress(self, x):
 30 |         self.input_size = x.shape[1]
 31 | 
 32 |         with tf.variable_scope("Encoder"):
 33 |             z = x
 34 |             n_layer = 0
 35 |             for size in self.hidden_layer_sizes[:-1]:
 36 |                 n_layer += 1
 37 |                 z = tf.layers.dense(z, size, activation=self.activation,
 38 |                     name="layer_{}".format(n_layer))
 39 | 
 40 |             # activation function of last layer is linear
 41 |             n_layer += 1
 42 |             z = tf.layers.dense(z, self.hidden_layer_sizes[-1],
 43 |                 name="layer_{}".format(n_layer))
 44 | 
 45 |         return z
 46 | 
 47 |     def reverse(self, z):
 48 |         with tf.variable_scope("Decoder"):
 49 |             n_layer = 0
 50 |             for size in self.hidden_layer_sizes[:-1][::-1]:
 51 |                 n_layer += 1
 52 |                 z = tf.layers.dense(z, size, activation=self.activation,
 53 |                     name="layer_{}".format(n_layer))
 54 | 
 55 |             # activation function of last layes is linear
 56 |             n_layer += 1
 57 |             x_dash = tf.layers.dense(z, self.input_size,
 58 |                 name="layer_{}".format(n_layer))
 59 | 
 60 |         return x_dash
 61 | 
 62 |     def loss(self, x, x_dash):
 63 |         def euclid_norm(x):
 64 |             return tf.sqrt(tf.reduce_sum(tf.square(x), axis=1))
 65 | 
 66 |         # Calculate Euclid norm, distance
 67 |         norm_x = euclid_norm(x)
 68 |         norm_x_dash = euclid_norm(x_dash)
 69 |         dist_x = euclid_norm(x - x_dash)
 70 |         dot_x = tf.reduce_sum(x * x_dash, axis=1)
 71 | 
 72 |         # Based on the original paper, features of reconstraction error
 73 |         # are composed of these loss functions:
 74 |         #  1. loss_E : relative Euclidean distance
 75 |         #  2. loss_C : cosine similarity
 76 |         min_val = 1e-3
 77 |         loss_E = dist_x  / (norm_x + min_val)
 78 |         loss_C = 0.5 * (1.0 - dot_x / (norm_x * norm_x_dash + min_val))
 79 |         return tf.concat([loss_E[:,None], loss_C[:,None]], axis=1)
 80 | 
 81 |     def extract_feature(self, x, x_dash, z_c):
 82 |         z_r = self.loss(x, x_dash)
 83 |         return tf.concat([z_c, z_r], axis=1)
 84 | 
 85 |     def inference(self, x):
 86 |         """ convert input to output tensor, which is composed of
 87 |         low-dimensional representation and reconstruction error.
 88 | 
 89 |         Parameters
 90 |         ----------
 91 |         x : tf.Tensor shape : (n_samples, n_features)
 92 |             Input data
 93 | 
 94 |         Results
 95 |         -------
 96 |         z : tf.Tensor shape : (n_samples, n2 + 2)
 97 |             Result data
 98 |             Second dimension of this data is equal to
 99 |             sum of compressed representation size and
100 |             number of loss function (=2)
101 | 
102 |         x_dash : tf.Tensor shape : (n_samples, n_features)
103 |             Reconstructed data for calculation of
104 |             reconstruction error.
105 |         """
106 | 
107 |         with tf.variable_scope("CompNet"):
108 |             # AutoEncoder
109 |             z_c = self.compress(x)
110 |             x_dash = self.reverse(z_c)
111 | 
112 |             # compose feature vector
113 |             z = self.extract_feature(x, x_dash, z_c)
114 | 
115 |         return z, x_dash
116 | 
117 |     def reconstruction_error(self, x, x_dash):
118 |         return tf.reduce_mean(tf.reduce_sum(
119 |             tf.square(x - x_dash), axis=1), axis=0)
120 | 


--------------------------------------------------------------------------------
/networks/dagmm/estimation_net.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import tensorflow as tf
 3 | 
 4 | 
 5 | class EstimationNet:
 6 |     """Estimation Network
 7 | 
 8 |     This network converts input feature vector to softmax probability.
 9 |     Bacause loss function for this network is not defined,
10 |     it should be implemented outside of this class.
11 |     """
12 | 
13 |     def __init__(self, hidden_layer_sizes, activation=tf.nn.relu):
14 |         """
15 |         Parameters
16 |         ----------
17 |         hidden_layer_sizes : list of int
18 |             list of sizes of hidden layers.
19 |             For example, if the sizes are [n1, n2],
20 |             layer sizes of the network are:
21 |             input_size -> n1 -> n2
22 |             (network outputs the softmax probabilities of "n2" layer)
23 |         activation : function
24 |             activation function of hidden layer.
25 |             the funtcion of last layer is softmax function.
26 |         """
27 |         self.hidden_layer_sizes = hidden_layer_sizes
28 |         self.activation = activation
29 | 
30 |     def inference(self, z, dropout_ratio=None):
31 |         """Output softmax probabilities
32 | 
33 |         Parameters
34 |         ----------
35 |         z : tf.Tensor shape : (n_samples, n_features)
36 |             Data inferenced by this network
37 |         dropout_ratio : tf.Tensor shape : 0-dimension float (optional)
38 |             Specify dropout ratio
39 |             (if None, dropout is not applied)
40 | 
41 |         Results
42 |         -------
43 |         probs : tf.Tensor shape : (n_samples, n_classes)
44 |             Calculated probabilities
45 |         """
46 |         with tf.variable_scope("EstNet"):
47 |             n_layer = 0
48 |             for size in self.hidden_layer_sizes[:-1]:
49 |                 n_layer += 1
50 |                 z = tf.layers.dense(
51 |                     z, size, activation=self.activation, name="layer_{}".format(n_layer)
52 |                 )
53 |                 if dropout_ratio is not None:
54 |                     z = tf.layers.dropout(
55 |                         z, dropout_ratio, name="drop_{}".format(n_layer)
56 |                     )
57 | 
58 |             # Last layer uses linear function (=logits)
59 |             size = self.hidden_layer_sizes[-1]
60 |             logits = tf.layers.dense(z, size, activation=None, name="logits")
61 | 
62 |             # Softmax output
63 |             output = tf.contrib.layers.softmax(logits)
64 | 
65 |         return output
66 | 


--------------------------------------------------------------------------------
/networks/dagmm/gmm.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import numpy as np
  3 | import tensorflow as tf
  4 | 
  5 | 
  6 | class GMM:
  7 |     """ Gaussian Mixture Model (GMM) """
  8 | 
  9 |     def __init__(self, n_comp):
 10 |         self.n_comp = n_comp
 11 |         self.phi = self.mu = self.sigma = None
 12 |         self.training = False
 13 | 
 14 |     def create_variables(self, n_features):
 15 |         with tf.variable_scope("GMM"):
 16 |             phi = tf.Variable(
 17 |                 tf.zeros(shape=[self.n_comp]), dtype=tf.float32, name="phi"
 18 |             )
 19 |             mu = tf.Variable(
 20 |                 tf.zeros(shape=[self.n_comp, n_features]), dtype=tf.float32, name="mu"
 21 |             )
 22 |             sigma = tf.Variable(
 23 |                 tf.zeros(shape=[self.n_comp, n_features, n_features]),
 24 |                 dtype=tf.float32,
 25 |                 name="sigma",
 26 |             )
 27 |             L = tf.Variable(
 28 |                 tf.zeros(shape=[self.n_comp, n_features, n_features]),
 29 |                 dtype=tf.float32,
 30 |                 name="L",
 31 |             )
 32 | 
 33 |         return phi, mu, sigma, L
 34 | 
 35 |     def fit(self, z, gamma):
 36 |         """fit data to GMM model
 37 | 
 38 |         Parameters
 39 |         ----------
 40 |         z : tf.Tensor, shape (n_samples, n_features)
 41 |             data fitted to GMM.
 42 |         gamma : tf.Tensor, shape (n_samples, n_comp)
 43 |             probability. each row is correspond to row of z.
 44 |         """
 45 | 
 46 |         with tf.variable_scope("GMM"):
 47 |             # Calculate mu, sigma
 48 |             # i   : index of samples
 49 |             # k   : index of components
 50 |             # l,m : index of features
 51 |             gamma_sum = tf.reduce_sum(gamma, axis=0)
 52 |             self.phi = phi = tf.reduce_mean(gamma, axis=0)
 53 |             self.mu = mu = tf.einsum("ik,il->kl", gamma, z) / gamma_sum[:, None]
 54 |             z_centered = tf.sqrt(gamma[:, :, None]) * (z[:, None, :] - mu[None, :, :])
 55 |             self.sigma = sigma = (
 56 |                 tf.einsum("ikl,ikm->klm", z_centered, z_centered)
 57 |                 / gamma_sum[:, None, None]
 58 |             )
 59 | 
 60 |             # Calculate a cholesky decomposition of covariance in advance
 61 |             n_features = z.shape[1]
 62 |             min_vals = tf.diag(tf.ones(n_features, dtype=tf.float32)) * 1e-6
 63 |             self.L = tf.cholesky(sigma + min_vals[None, :, :])
 64 | 
 65 |         self.training = False
 66 | 
 67 |     def fix_op(self):
 68 |         """return operator to fix paramters of GMM
 69 |         Using this operator outside of this class,
 70 |         you can fix current parameter to static tensor variable.
 71 | 
 72 |         After you call this method, you have to run result
 73 |         operator immediatelly, and call energy() to use static
 74 |         variables of model parameter.
 75 | 
 76 |         Returns
 77 |         -------
 78 |         op : operator of tensorflow
 79 |             operator to assign current parameter to variables
 80 |         """
 81 | 
 82 |         phi, mu, sigma, L = self.create_variables(self.mu.shape[1])
 83 | 
 84 |         op = tf.group(
 85 |             tf.assign(phi, self.phi),
 86 |             tf.assign(mu, self.mu),
 87 |             tf.assign(sigma, self.sigma),
 88 |             tf.assign(L, self.L),
 89 |         )
 90 | 
 91 |         self.phi, self.phi_org = phi, self.phi
 92 |         self.mu, self.mu_org = mu, self.mu
 93 |         self.sigma, self.sigma_org = sigma, self.sigma
 94 |         self.L, self.L_org = L, self.L
 95 | 
 96 |         self.training = False
 97 | 
 98 |         return op
 99 | 
100 |     def energy(self, z):
101 |         """calculate an energy of each row of z
102 | 
103 |         Parameters
104 |         ----------
105 |         z : tf.Tensor, shape (n_samples, n_features)
106 |             data each row of which is calculated its energy.
107 | 
108 |         Returns
109 |         -------
110 |         energy : tf.Tensor, shape (n_samples)
111 |             calculated energies
112 |         """
113 | 
114 |         if self.training and self.phi is None:
115 |             self.phi, self.mu, self.sigma, self.L = self.create_variable(z.shape[1])
116 | 
117 |         with tf.variable_scope("GMM_energy"):
118 |             # Instead of inverse covariance matrix, exploit cholesky decomposition
119 |             # for stability of calculation.
120 |             z_centered = z[:, None, :] - self.mu[None, :, :]  # ikl
121 |             v = tf.matrix_triangular_solve(
122 |                 self.L, tf.transpose(z_centered, [1, 2, 0])
123 |             )  # kli
124 | 
125 |             # log(det(Sigma)) = 2 * sum[log(diag(L))]
126 |             log_det_sigma = 2.0 * tf.reduce_sum(
127 |                 tf.log(tf.matrix_diag_part(self.L)), axis=1
128 |             )
129 | 
130 |             # To calculate energies, use "log-sum-exp" (different from orginal paper)
131 |             d = z.get_shape().as_list()[1]
132 |             logits = tf.log(self.phi[:, None]) - 0.5 * (
133 |                 tf.reduce_sum(tf.square(v), axis=1)
134 |                 + d * tf.log(2.0 * np.pi)
135 |                 + log_det_sigma[:, None]
136 |             )
137 |             energies = -tf.reduce_logsumexp(logits, axis=0)
138 | 
139 |         return energies
140 | 
141 |     def cov_diag_loss(self):
142 |         with tf.variable_scope("GMM_diag_loss"):
143 |             diag_loss = tf.reduce_sum(tf.divide(1, tf.matrix_diag_part(self.sigma)))
144 | 
145 |         return diag_loss
146 | 


--------------------------------------------------------------------------------
/networks/dagmm/main.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from DAGMM.dagmm import DAGMM
 3 | import numpy as np
 4 | import pandas as pd
 5 | import os
 6 | 
 7 | # Initialize
 8 | model = DAGMM(
 9 |     comp_hiddens=[32, 16, 2], comp_activation=tf.nn.tanh,
10 |     est_hiddens=[80, 40], est_activation=tf.nn.tanh,
11 |     est_dropout_ratio=0.25
12 | )
13 | 
14 | # Fit the training data to model
15 | data_dir_path = 'C:/Users/Administrator/Downloads/DAGMM-master/SMD/data_concat/'
16 | csvs = os.listdir(data_dir_path)
17 | 
18 | csv_path = []
19 | 
20 | for i in csvs:
21 |     csv_path.append(data_dir_path + i)
22 | 
23 | numbers = []
24 | 
25 | for j in csvs:
26 |     name_temp = os.path.split(j)[1]
27 |     numbers.append(name_temp[5:-4])
28 | 
29 | 
30 | def generate_score(number):
31 |     # Read the raw data.
32 |     input_dir_path = 'C:/Users/Administrator/Downloads/DAGMM-master/SMD/data_concat/data-' + number + '.csv'
33 |     data = np.array(pd.read_csv(input_dir_path, header=None), dtype=np.float64)
34 |     x_train = data[: len(data) // 2]
35 |     x_test = data[len(data) // 2:]
36 |     print(len(x_train))
37 |     print(len(x_test))
38 |     model.fit(x_train)
39 |     if not os.path.exists('../score'):
40 |         os.makedirs('../score')
41 |     # Evaluate energies
42 |     # (the more the energy is, the more it is anomaly)
43 |     energy = model.predict(x_test)
44 |     np.save('../score/' + number + '.npy', energy)
45 |     # Save fitted model to the directory
46 |     model.save('./model/fitted_model' + number)
47 | 
48 |     # Restore saved model from directory
49 |     model.restore('./model/fitted_model' + number)
50 | 
51 | 
52 | for j in numbers:
53 |     generate_score(j)
54 |     print('Finish generating ' + j)
55 | 


--------------------------------------------------------------------------------
/networks/ganf/RNN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from functools import partial
  4 | 
  5 | class RecurrentEncoder(nn.Module):
  6 |     """Recurrent encoder"""
  7 | 
  8 |     def __init__(self, n_features, latent_dim, rnn):
  9 |         super().__init__()
 10 | 
 11 |         self.rec_enc1 = rnn(n_features, latent_dim, batch_first=True)
 12 | 
 13 |     def forward(self, x):
 14 |         _, h_n = self.rec_enc1(x)
 15 | 
 16 |         return h_n
 17 | 
 18 | class RecurrentDecoder(nn.Module):
 19 |     """Recurrent decoder for RNN and GRU"""
 20 | 
 21 |     def __init__(self, latent_dim, n_features, rnn_cell, device):
 22 |         super().__init__()
 23 | 
 24 |         self.n_features = n_features
 25 |         self.device = device
 26 |         self.rec_dec1 = rnn_cell(n_features, latent_dim)
 27 |         self.dense_dec1 = nn.Linear(latent_dim, n_features)
 28 | 
 29 |     def forward(self, h_0, seq_len):
 30 |         # Initialize output
 31 |         x = torch.tensor([], device = self.device)
 32 | 
 33 |         # Squeezing
 34 |         h_i = h_0.squeeze()
 35 | 
 36 |         # Reconstruct first element with encoder output
 37 |         x_i = self.dense_dec1(h_i)
 38 | 
 39 |         # Reconstruct remaining elements
 40 |         for i in range(0, seq_len):
 41 |             h_i = self.rec_dec1(x_i, h_i)
 42 |             x_i = self.dense_dec1(h_i)
 43 |             x = torch.cat([x, x_i], axis=1)
 44 | 
 45 |         return x.view(-1, seq_len, self.n_features)
 46 | 
 47 | 
 48 | class RecurrentDecoderLSTM(nn.Module):
 49 |     """Recurrent decoder LSTM"""
 50 | 
 51 |     def __init__(self, latent_dim, n_features, rnn_cell, device):
 52 |         super().__init__()
 53 | 
 54 |         self.n_features = n_features
 55 |         self.device = device
 56 |         self.rec_dec1 = rnn_cell(n_features, latent_dim)
 57 |         self.dense_dec1 = nn.Linear(latent_dim, n_features)
 58 | 
 59 |     def forward(self, h_0, seq_len):
 60 |         # Initialize output
 61 |         x = torch.tensor([], device = self.device)
 62 | 
 63 |         # Squeezing
 64 |         h_i = [h.squeeze() for h in h_0]
 65 | 
 66 |         # Reconstruct first element with encoder output
 67 |         x_i = self.dense_dec1(h_i[0])
 68 | 
 69 |         # Reconstruct remaining elements
 70 |         for i in range(0, seq_len):
 71 |             h_i = self.rec_dec1(x_i, h_i)
 72 |             x_i = self.dense_dec1(h_i[0])
 73 |             x = torch.cat([x, x_i], axis = 1)
 74 | 
 75 |         return x.view(-1, seq_len, self.n_features)
 76 | 
 77 | 
 78 | class RecurrentAE(nn.Module):
 79 |     """Recurrent autoencoder"""
 80 | 
 81 |     def __init__(self, n_features, latent_dim, device):
 82 |         super().__init__()
 83 | 
 84 |         # Encoder and decoder argsuration
 85 |         self.rnn, self.rnn_cell = nn.LSTM, nn.LSTMCell
 86 |         self.decoder = RecurrentDecoderLSTM
 87 |         self.latent_dim = latent_dim
 88 |         self.n_features = n_features
 89 |         self.device = device
 90 | 
 91 |         # Encoder and decoder
 92 |         self.encoder = RecurrentEncoder(self.n_features, self.latent_dim, self.rnn)
 93 |         self.decoder = self.decoder(self.latent_dim, self.n_features, self.rnn_cell, self.device)
 94 | 
 95 |     def forward(self, x):
 96 |         # x: N X K X L X D 
 97 |         seq_len = x.shape[1]
 98 |         h_n = self.encoder(x)
 99 |         out = self.decoder(h_n, seq_len)
100 | 
101 |         return torch.flip(out, [1])
102 | 


--------------------------------------------------------------------------------
/networks/ganf/dataset.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | import numpy as np
 5 | 
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | def load_SMD(data, label, batch_size):
 9 | 
10 |     #mean_df = data.mean(axis=0)
11 |     #std_df = data.std(axis=0)
12 | 
13 |     #data = pd.DataFrame((data-mean_df)/std_df)
14 |     n_sensor = data.shape[1]
15 |     #data = data.dropna(axis=1)
16 |     data = np.array(data)
17 | 
18 |     train_df = data[:int(0.5*len(data))]
19 |     train_label = label[:int(0.5*len(data))]
20 | 
21 |     val_df = data[int(0.5*len(data)):int(0.7*len(data))]
22 |     val_label = label[int(0.5*len(data)):int(0.7*len(data))]
23 |     
24 |     test_df = data[int(0.7*len(data)):]
25 |     test_label = label[int(0.7*len(data)):]
26 | 
27 |     train_loader = DataLoader(SMD(train_df, train_label), batch_size=batch_size, shuffle=True)
28 | 
29 |     val_loader = DataLoader(SMD(val_df,val_label), batch_size=batch_size, shuffle=False)
30 |     test_loader = DataLoader(SMD(test_df,test_label), batch_size=batch_size, shuffle=False)
31 | 
32 |     return train_loader, val_loader, test_loader, n_sensor
33 | 
34 | 
35 | class SMD(Dataset):
36 |     def __init__(self, data, label, window_size=60, stride_size=1):
37 |         super(SMD, self).__init__()
38 |         self.data = data
39 |         self.window_size = window_size
40 |         self.stride_size = stride_size
41 | 
42 |         self.data, self.idx, self.label = self.preprocess(data, label)
43 | 
44 |     def preprocess(self, data, label):
45 |         start_idx = np.arange(0, len(data) - self.window_size, self.stride_size)
46 |         end_idx = np.arange(self.window_size, len(data), self.stride_size)
47 | 
48 |         return data, start_idx, label[end_idx]
49 | 
50 |     def __len__(self):
51 |         length = len(self.idx)
52 | 
53 |         return length
54 | 
55 |     def __getitem__(self, index):
56 |         #  N X K X L X D
57 |         start = self.idx[index]
58 |         end = start + self.window_size
59 |         data = self.data[start:end].reshape([self.window_size, -1, 1])
60 | 
61 |         return torch.FloatTensor(data).transpose(0, 1)
62 | 


--------------------------------------------------------------------------------
/networks/ganf/fit.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import torch
 4 | from torch.utils.data import DataLoader
 5 | from networks.ganf.GANF import GANF
 6 | from sklearn.metrics import roc_auc_score
 7 | import sys
 8 | import random
 9 | import numpy as np
10 | from torch.nn.utils import clip_grad_value_
11 | import seaborn as sns
12 | import matplotlib.pyplot as plt
13 | import logging
14 | sys.path.append("../")
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/networks/ganf/graph_layer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import Parameter, Linear, Sequential, BatchNorm1d, ReLU
  3 | import torch.nn.functional as F
  4 | from torch_geometric.nn.conv import MessagePassing
  5 | from torch_geometric.utils import remove_self_loops, add_self_loops, softmax
  6 | 
  7 | from torch_geometric.nn.inits import glorot, zeros
  8 | 
  9 | class GraphLayer(MessagePassing):
 10 |     def __init__(self, in_channels, out_channels, heads=1, concat=True,
 11 |                  negative_slope=0.2, dropout=0, bias=True, inter_dim=-1,**kwargs):
 12 |         super(GraphLayer, self).__init__(aggr='add', **kwargs)
 13 | 
 14 |         self.in_channels = in_channels
 15 |         self.out_channels = out_channels
 16 |         self.heads = heads
 17 |         self.concat = concat
 18 |         self.negative_slope = negative_slope
 19 |         self.dropout = dropout
 20 | 
 21 |         self.__alpha__ = None
 22 | 
 23 |         self.lin = Linear(in_channels, heads * out_channels, bias=False)
 24 | 
 25 |         self.att_i = Parameter(torch.Tensor(1, heads, out_channels))
 26 |         self.att_j = Parameter(torch.Tensor(1, heads, out_channels))
 27 |         self.att_em_i = Parameter(torch.Tensor(1, heads, out_channels))
 28 |         self.att_em_j = Parameter(torch.Tensor(1, heads, out_channels))
 29 | 
 30 |         if bias and concat:
 31 |             self.bias = Parameter(torch.Tensor(heads * out_channels))
 32 |         elif bias and not concat:
 33 |             self.bias = Parameter(torch.Tensor(out_channels))
 34 |         else:
 35 |             self.register_parameter('bias', None)
 36 | 
 37 |         self.reset_parameters()
 38 | 
 39 |     def reset_parameters(self):
 40 |         glorot(self.lin.weight)
 41 |         glorot(self.att_i)
 42 |         glorot(self.att_j)
 43 |         
 44 |         zeros(self.att_em_i)
 45 |         zeros(self.att_em_j)
 46 | 
 47 |         zeros(self.bias)
 48 | 
 49 | 
 50 | 
 51 |     def forward(self, x, edge_index, embedding, return_attention_weights=False):
 52 |         """"""
 53 |         if torch.is_tensor(x):
 54 |             x = self.lin(x)
 55 |             x = (x, x)
 56 |         else:
 57 |             x = (self.lin(x[0]), self.lin(x[1]))
 58 | 
 59 |         edge_index, _ = remove_self_loops(edge_index)
 60 |         edge_index, _ = add_self_loops(edge_index,
 61 |                                        num_nodes=x[1].size(self.node_dim))
 62 | 
 63 |         out = self.propagate(edge_index, x=x, embedding=embedding, edges=edge_index,
 64 |                              return_attention_weights=return_attention_weights)
 65 | 
 66 |         if self.concat:
 67 |             out = out.view(-1, self.heads * self.out_channels)
 68 |         else:
 69 |             out = out.mean(dim=1)
 70 | 
 71 |         if self.bias is not None:
 72 |             out = out + self.bias
 73 | 
 74 |         if return_attention_weights:
 75 |             alpha, self.__alpha__ = self.__alpha__, None
 76 |             return out, (edge_index, alpha)
 77 |         else:
 78 |             return out
 79 | 
 80 |     def message(self, x_i, x_j, edge_index_i, size_i,
 81 |                 embedding,
 82 |                 edges,
 83 |                 return_attention_weights):
 84 | 
 85 |         x_i = x_i.view(-1, self.heads, self.out_channels)
 86 |         x_j = x_j.view(-1, self.heads, self.out_channels)
 87 | 
 88 |         if embedding is not None:
 89 |             embedding_i, embedding_j = embedding[edge_index_i], embedding[edges[0]]
 90 |             embedding_i = embedding_i.unsqueeze(1).repeat(1,self.heads,1)
 91 |             embedding_j = embedding_j.unsqueeze(1).repeat(1,self.heads,1)
 92 | 
 93 |             key_i = torch.cat((x_i, embedding_i), dim=-1)
 94 |             key_j = torch.cat((x_j, embedding_j), dim=-1)
 95 | 
 96 | 
 97 | 
 98 |         cat_att_i = torch.cat((self.att_i, self.att_em_i), dim=-1)
 99 |         cat_att_j = torch.cat((self.att_j, self.att_em_j), dim=-1)
100 | 
101 |         alpha = (key_i * cat_att_i).sum(-1) + (key_j * cat_att_j).sum(-1)
102 | 
103 | 
104 |         alpha = alpha.view(-1, self.heads, 1)
105 | 
106 | 
107 |         alpha = F.leaky_relu(alpha, self.negative_slope)
108 |         alpha = softmax(alpha, edge_index_i, num_nodes=size_i)
109 | 
110 |         if return_attention_weights:
111 |             self.__alpha__ = alpha
112 | 
113 |         alpha = F.dropout(alpha, p=self.dropout, training=self.training)
114 |         
115 |         return x_j * alpha.view(-1, self.heads, 1)
116 | 
117 | 
118 | 
119 |     def __repr__(self):
120 |         return '{}({}, {}, heads={})'.format(self.__class__.__name__,
121 |                                              self.in_channels,
122 |                                              self.out_channels, self.heads)
123 | 


--------------------------------------------------------------------------------
/networks/ganf/predict.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import torch
 4 | from networks.ganf.GANF import GANF
 5 | import numpy as np
 6 | from sklearn.metrics import roc_auc_score
 7 | 
 8 | 
 9 | def predict_prob(model, test_iterator, evaluate_dir, window_labels):
10 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
11 |     model = model.to(device)
12 |     model.load_state_dict(torch.load(evaluate_dir + "/GANF_SMD_best.pt"))
13 |     A = torch.load(evaluate_dir + "/graph_best.pt").to(device)
14 |     model.eval()
15 | 
16 |     loss_test = []
17 |     with torch.no_grad():
18 |         for x in test_iterator:
19 |             x = x.unsqueeze(-1).transpose(1, 2)
20 |             x = x.to(device)
21 |             loss = -model.test(x, A.data).cpu().numpy()
22 |             loss_test.append(loss)
23 |     loss_test = np.concatenate(loss_test)
24 |     anomaly_score = loss_test
25 |     anomaly_label = window_labels[-len(anomaly_score) :]
26 | 
27 |     return anomaly_score, anomaly_label
28 | 


--------------------------------------------------------------------------------
/networks/ganf/utils.py:
--------------------------------------------------------------------------------
 1 | #%%
 2 | import torch
 3 | 
 4 | def h(A):
 5 |     return torch.trace(torch.matrix_exp(A*A)) - A.shape[0]
 6 | 
 7 | def normalize(A):
 8 |     D = A.sum(dim=0)
 9 |     D_inv = D.pow_(-1)
10 |     D_inv.masked_fill_(D_inv == float('inf'), 0)
11 |     
12 |     return A * D_inv
13 |     
14 | def thresholding(A, thre):
15 |     return torch.where(A.abs()>thre, A, torch.scalar_tensor(0.0, dtype=torch.float32, device=A.device))
16 | 
17 | def binarize(A, thre):
18 |     return torch.where(A.abs()>thre, 1.0, 0.0)
19 | # %%
20 | import pandas as pd
21 | def get_timestamp(stamps):
22 |     return (stamps - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
23 | # %%
24 | import numpy as np
25 | from sklearn.metrics import auc
26 | def roc_auc(label_time, pred, negative_sample, sigma):
27 |     negative_sample = np.sort(negative_sample)[::-1]
28 |     thresholds = list(negative_sample[::int(len(negative_sample)/50)])
29 |     thresholds.append(negative_sample[-1])
30 |     tps=[]
31 |     fps=[]
32 | 
33 |     for thre in thresholds:
34 |         pred_pos = pred[pred>thre]
35 | 
36 |         tp = 0
37 |         for i in range(len(label_time)):
38 |             start_time = label_time[i] - pd.Timedelta(30, unit='min')
39 |             end_time = label_time[i] + pd.Timedelta(30, unit='min')
40 | 
41 |             detected_event = pred_pos[str(start_time): str(end_time)]
42 |             if len(detected_event)>0:
43 |                 timestamps = get_timestamp(detected_event.index)
44 |                 delta_t = np.min(np.abs(timestamps.values - get_timestamp(label_time[i])))
45 |                 tp += np.exp(-np.power(delta_t/sigma,2))
46 |         tp = tp/len(label_time)
47 |         tps.append(tp)
48 | 
49 |         fp = (negative_sample>thre).sum()/len(negative_sample)
50 |         fps.append(fp)
51 |     return auc(fps,tps), (fps,tps)
52 | # %%
53 | def roc_auc_all(loss_np, delta_t, sigma):
54 | 
55 |     ground_truth = np.exp(-np.power((delta_t.values)/sigma,2))
56 | 
57 |     loss_sort = np.sort(loss_np)[::-1]
58 |     thresholds = list(loss_sort[::int(len(loss_sort)/50)])
59 |     thresholds.append(loss_sort[-1])
60 | 
61 |     n_pos = ground_truth.sum()
62 |     n_neg = (1-ground_truth).sum()
63 |     tps = []
64 |     fps = []
65 |     for thre in thresholds:
66 |         pred_pos = loss_np>thre
67 | 
68 |         tp = ground_truth[pred_pos].sum()/n_pos
69 |         fp = (1-ground_truth[pred_pos]).sum()/n_neg
70 |         tps.append(tp)
71 |         fps.append(fp)
72 | 
73 |     auc_score = auc(fps, tps)
74 |     return auc_score, fps, tps


--------------------------------------------------------------------------------
/networks/lstm/__init__.py:
--------------------------------------------------------------------------------
1 | from .lstm import LSTM


--------------------------------------------------------------------------------
/networks/lstm/lstm.py:
--------------------------------------------------------------------------------
 1 | ## Unit test only start
 2 | # import torch
 3 | # import sys
 4 | import torch
 5 | from torch import nn
 6 | from networks.lstm.wrappers import TimeSeriesEncoder
 7 | 
 8 | 
 9 | class LSTM(TimeSeriesEncoder):
10 |     """
11 |     Encoder of a time series using a LSTM, ccomputing a linear transformation
12 |     of the output of an LSTM
13 | 
14 |     Takes as input a three-dimensional tensor (`B`, `L`, `C`) where `B` is the
15 |     batch size, `C` is the number of input channels, and `L` is the length of
16 |     the input. Outputs a two-dimensional tensor (`B`, `C`).
17 |     """
18 | 
19 |     def __init__(
20 |         self,
21 |         in_channels,
22 |         hidden_size=64,
23 |         num_layers=1,
24 |         dropout=0,
25 |         prediction_length=1,
26 |         prediction_dims=[],
27 |         **kwargs,
28 |     ):
29 |         super().__init__(architecture="LSTM", **kwargs)
30 | 
31 |         self.prediction_dims = (
32 |             prediction_dims if prediction_dims else list(range(in_channels))
33 |         )
34 |         self.prediction_length = prediction_length
35 | 
36 |         self.lstm = nn.LSTM(
37 |             input_size=in_channels,
38 |             hidden_size=hidden_size,
39 |             num_layers=num_layers,
40 |             batch_first=True,
41 |         )
42 |         clf_input_dim = hidden_size
43 |         final_output_dim = prediction_length * len(self.prediction_dims)
44 | 
45 |         self.predcitor = nn.Linear(clf_input_dim, final_output_dim)
46 | 
47 |         self.dropout = nn.Dropout(dropout)
48 |         self.loss_fn = nn.MSELoss(reduction="none")
49 | 
50 |         self.compile()
51 | 
52 |     def forward(self, batch_window):
53 |         # batch_window = batch_window.permute(0, 2, 1)  # b x win x ts_dim
54 |         self.batch_size = batch_window.size(0)
55 |         x, y = (
56 |             batch_window[:, 0 : -self.prediction_length, :],
57 |             batch_window[:, -self.prediction_length :, self.prediction_dims],
58 |         )
59 | 
60 |         lstm_out, _ = self.lstm(x)
61 |         lstm_out = self.dropout(lstm_out[:, -1, :])
62 | 
63 |         recst = self.predcitor(lstm_out).view(
64 |             self.batch_size, self.prediction_length, len(self.prediction_dims)
65 |         )
66 | 
67 |         loss = self.loss_fn(recst, y)
68 |         return_dict = {
69 |             "loss": loss.sum(),
70 |             "recst": recst,
71 |             "score": loss,
72 |             "y": y,
73 |         }
74 | 
75 |         return return_dict
76 | 


--------------------------------------------------------------------------------
/networks/lstm/wrappers.py:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one
  2 | # or more contributor license agreements.  See the NOTICE file
  3 | # distributed with this work for additional information
  4 | # regarding copyright ownership.  The ASF licenses this file
  5 | # to you under the Apache License, Version 2.0 (the
  6 | # "License"); you may not use this file except in compliance
  7 | # with the License.  You may obtain a copy of the License at
  8 | 
  9 | #   http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 | # Unless required by applicable law or agreed to in writing,
 12 | # software distributed under the License is distributed on an
 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 14 | # KIND, either express or implied.  See the License for the
 15 | # specific language governing permissions and limitations
 16 | # under the License.
 17 | 
 18 | import os
 19 | import logging
 20 | import time
 21 | import torch
 22 | from common.utils import set_device
 23 | from collections import defaultdict
 24 | 
 25 | 
 26 | class TimeSeriesEncoder(torch.nn.Module):
 27 |     def __init__(
 28 |         self,
 29 |         save_path,
 30 |         nb_epoch,
 31 |         lr,
 32 |         device="cpu",
 33 |         architecture="base",
 34 |         **kwargs,
 35 |     ):
 36 |         super().__init__()
 37 |         self.device = set_device(device)
 38 |         self.nb_epoch = nb_epoch
 39 |         self.lr = lr
 40 |         self.best_metric = float("inf")
 41 |         self.time_tracker = {}
 42 |         self.model_save_file = os.path.join(save_path, f"{architecture}_model.ckpt")
 43 | 
 44 |     def compile(self):
 45 |         logging.info("Compiling finished.")
 46 |         self.optimizer = torch.optim.Adam(
 47 |             self.parameters(), lr=self.lr, weight_decay=0.001
 48 |         )
 49 |         self = self.to(self.device)
 50 | 
 51 |     def save_encoder(self):
 52 |         logging.info("Saving model to {}".format(self.model_save_file))
 53 |         try:
 54 |             torch.save(
 55 |                 self.state_dict(),
 56 |                 self.model_save_file,
 57 |                 _use_new_zipfile_serialization=False,
 58 |             )
 59 |         except:
 60 |             torch.save(self.state_dict(), self.model_save_file)
 61 | 
 62 |     def load_encoder(self, model_save_path=""):
 63 |         logging.info("Loading model from {}".format(self.model_save_file))
 64 |         self.load_state_dict(torch.load(self.model_save_file, map_location=self.device))
 65 | 
 66 |     def fit(
 67 |         self,
 68 |         train_iterator,
 69 |         patience=10,
 70 |         **kwargs,
 71 |     ):
 72 |         num_batches = len(train_iterator)
 73 |         logging.info("Start training for {} batches.".format(num_batches))
 74 |         train_start = time.time()
 75 |         # Encoder training
 76 |         for epoch in range(1, self.nb_epoch + 1):
 77 |             running_loss = 0
 78 |             for idx, batch in enumerate(train_iterator):
 79 |                 # batch: b x d x dim
 80 |                 batch = batch.to(self.device).float()
 81 |                 return_dict = self(batch)
 82 |                 self.optimizer.zero_grad()
 83 |                 loss = return_dict["loss"]
 84 |                 loss.backward()
 85 |                 self.optimizer.step()
 86 |                 running_loss += loss.item()
 87 |             avg_loss = running_loss / num_batches
 88 |             logging.info("Epoch: {}, loss: {:.5f}".format(epoch, avg_loss))
 89 |             stop_training = self.__on_epoch_end(avg_loss, patience=patience)
 90 |             if stop_training:
 91 |                 logging.info("Early stop at epoch {}.".format(epoch))
 92 |                 break
 93 |         train_end = time.time()
 94 | 
 95 |         self.time_tracker["train"] = train_end - train_start
 96 |         return self
 97 | 
 98 |     def __on_epoch_end(self, monitor_value, patience):
 99 |         if monitor_value < self.best_metric:
100 |             self.best_metric = monitor_value
101 |             logging.info("Saving model for performance: {:.3f}".format(monitor_value))
102 |             self.save_encoder()
103 |             self.worse_count = 0
104 |         else:
105 |             self.worse_count += 1
106 |         if self.worse_count >= patience:
107 |             return True
108 |         return False
109 | 
110 |     def encode(self, iterator):
111 |         # Check if the given time series have unequal lengths
112 |         save_dict = defaultdict(list)
113 |         self = self.eval()
114 | 
115 |         used_keys = ["recst", "y", "diff"]
116 |         with torch.no_grad():
117 |             for batch in iterator:
118 |                 batch = batch.to(self.device).float()
119 |                 return_dict = self(batch)
120 |                 for k in used_keys:
121 |                     save_dict[k].append(return_dict[k])
122 |         self = self.train()
123 |         return {k: torch.cat(v) for k, v in save_dict.items()}
124 | 
125 |     def predict_prob(self, iterator, window_labels=None):
126 |         logging.info("Evaluating")
127 |         self = self.eval()
128 |         test_start = time.time()
129 |         with torch.no_grad():
130 |             score_list = []
131 |             for batch in iterator:
132 |                 batch = batch.to(self.device).float()
133 |                 return_dict = self(batch)
134 |                 score = (
135 |                     # average all dimension
136 |                     return_dict["score"]
137 |                     .mean(dim=-1)
138 |                     .sigmoid()  # b x prediction_length
139 |                 )
140 |                 # mean all timestamp
141 |                 score_list.append(score.mean(dim=-1))
142 |         test_end = time.time()
143 |         self.time_tracker["test"] = test_end - test_start
144 | 
145 |         anomaly_score = torch.cat(score_list, dim=0).cpu().numpy()
146 |         if window_labels is not None:
147 |             anomaly_label = (window_labels.sum(axis=1) > 0).astype(int)
148 |             return anomaly_score, anomaly_label
149 |         return anomaly_score
150 | 


--------------------------------------------------------------------------------
/networks/mscred/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import MSCRED
2 | 


--------------------------------------------------------------------------------
/networks/mscred/models.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import torch
 4 | import torch.nn as nn
 5 | import numpy as np
 6 | 
 7 | from common.utils import set_device
 8 | from .dlutils import ConvLSTM
 9 | 
10 | ## MSCRED Model (AAAI 19)
11 | class MSCRED(nn.Module):
12 |     def __init__(self, feats, window_size, lr, model_root, device):
13 |         super(MSCRED, self).__init__()
14 |         self.name = "MSCRED"
15 |         self.name = "TranAD"
16 |         self.n_feats = feats
17 |         self.n_window = window_size
18 |         self.lr = lr
19 |         self.device = set_device(device)
20 |         self.encoder = nn.ModuleList(
21 |             [
22 |                 ConvLSTM(1, 32, (3, 3), 1, True, True, False),
23 |                 ConvLSTM(32, 64, (3, 3), 1, True, True, False),
24 |                 ConvLSTM(64, 128, (3, 3), 1, True, True, False),
25 |             ]
26 |         )
27 |         self.decoder = nn.Sequential(
28 |             nn.ConvTranspose2d(128, 64, (3, 3), 1, 1),
29 |             nn.ReLU(True),
30 |             nn.ConvTranspose2d(64, 32, (3, 3), 1, 1),
31 |             nn.ReLU(True),
32 |             nn.ConvTranspose2d(32, 1, (3, 3), 1, 1),
33 |             nn.Sigmoid(),
34 |         )
35 |         self.init_model(lr, model_root)
36 | 
37 |     def forward(self, g):
38 |         batch_size = g.shape[0]
39 |         ## Encode
40 |         z = g.view(batch_size, 1, self.n_window, self.n_feats)
41 |         for cell in self.encoder:
42 |             _, z = cell(z.unsqueeze(1))
43 |             z = z[0][0]
44 |         ## Decode
45 |         x = self.decoder(z)
46 |         x = x.view(batch_size, self.n_window, self.n_feats)
47 |         return x
48 | 
49 |     def init_model(self, lr, model_root, retrain=True, test=False):
50 |         optimizer = torch.optim.AdamW(self.parameters(), lr=lr, weight_decay=1e-5)
51 | 
52 |         if os.path.exists(model_root) and (not retrain or test):
53 |             logging.info("Loading pre-trained model")
54 |             checkpoint = torch.load(os.path.join(model_root, "model.pt"))
55 |             self.load_state_dict(checkpoint["model_state_dict"])
56 |             optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
57 |         else:
58 |             logging.info("Creating new model: MSCRED")
59 | 
60 |         self.optimizer = optimizer
61 |         logging.info("Finish model initialization.")
62 | 
63 |     def fit(self, nb_epoch, dataloader, training=True):
64 |         self.to(self.device)
65 |         for epoch in range(1, nb_epoch + 1):
66 |             mse_func = nn.MSELoss(reduction="none")
67 |             if training:
68 |                 logging.info("Training epoch: {}".format(epoch))
69 |                 for _, d in enumerate(dataloader):
70 |                     d = d.to(self.device)
71 |                     x = self(d)
72 |                     loss = torch.mean(mse_func(x, d))
73 |                     self.optimizer.zero_grad()
74 |                     loss.backward()
75 |                     self.optimizer.step()
76 |                 logging.info("Epoch: {} finished.".format(epoch))
77 | 
78 |     def predict_prob(self, test_iterator, label_windows=None):
79 |         with torch.no_grad():
80 |             self.eval()
81 |             mse_func = nn.MSELoss(reduction="none")
82 |             loss_steps = []
83 |             for d in test_iterator:
84 |                 d = d.to(self.device)
85 |                 x = self(d)
86 |                 loss = mse_func(x, d).view(-1, self.n_window, self.n_feats)
87 |                 loss_steps.append(loss.detach().cpu().numpy())
88 |             anomaly_score = np.concatenate(loss_steps).mean(axis=(2, 1))
89 |             if label_windows is None:
90 |                 return anomaly_score
91 |             else:
92 |                 anomaly_label = (np.sum(label_windows, axis=1) >= 1) + 0
93 |                 return anomaly_score, anomaly_label
94 | 


--------------------------------------------------------------------------------
/networks/mtad_gat/__init__.py:
--------------------------------------------------------------------------------
1 | from .mtad_gat import MTAD_GAT
2 | 


--------------------------------------------------------------------------------
/networks/mtad_gat/train.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from datetime import datetime
  3 | import torch.nn as nn
  4 | 
  5 | from args import get_parser
  6 | from utils import *
  7 | from mtad_gat import MTAD_GAT
  8 | from prediction import Predictor
  9 | from training import Trainer
 10 | 
 11 | 
 12 | if __name__ == "__main__":
 13 | 
 14 |     id = datetime.now().strftime("%d%m%Y_%H%M%S")
 15 | 
 16 |     parser = get_parser()
 17 |     args = parser.parse_args()
 18 | 
 19 |     dataset = args.dataset
 20 |     window_size = args.lookback
 21 |     spec_res = args.spec_res
 22 |     normalize = args.normalize
 23 |     n_epochs = args.epochs
 24 |     batch_size = args.bs
 25 |     init_lr = args.init_lr
 26 |     val_split = args.val_split
 27 |     shuffle_dataset = args.shuffle_dataset
 28 |     use_cuda = args.use_cuda
 29 |     print_every = args.print_every
 30 |     log_tensorboard = args.log_tensorboard
 31 |     group_index = args.group[0]
 32 |     index = args.group[2:]
 33 |     args_summary = str(args.__dict__)
 34 |     logging.info(args_summary)
 35 | 
 36 |     if dataset == "SMD":
 37 |         output_path = f"output/SMD/{args.group}"
 38 |         (x_train, _), (x_test, y_test) = get_data(
 39 |             f"machine-{group_index}-{index}", normalize=normalize
 40 |         )
 41 |     elif dataset in ["MSL", "SMAP"]:
 42 |         output_path = f"output/{dataset}"
 43 |         (x_train, _), (x_test, y_test) = get_data(dataset, normalize=normalize)
 44 |     else:
 45 |         raise Exception(f'Dataset "{dataset}" not available.')
 46 | 
 47 |     log_dir = f"{output_path}/logs"
 48 |     if not os.path.exists(output_path):
 49 |         os.makedirs(output_path)
 50 |     if not os.path.exists(log_dir):
 51 |         os.makedirs(log_dir)
 52 |     save_path = f"{output_path}/{id}"
 53 | 
 54 |     x_train = torch.from_numpy(x_train).float()
 55 |     x_test = torch.from_numpy(x_test).float()
 56 |     n_features = x_train.shape[1]
 57 | 
 58 |     target_dims = get_target_dims(dataset)
 59 |     if target_dims is None:
 60 |         out_dim = n_features
 61 |         logging.info(f"Will forecast and reconstruct all {n_features} input features")
 62 |     elif type(target_dims) == int:
 63 |         logging.info(f"Will forecast and reconstruct input feature: {target_dims}")
 64 |         out_dim = 1
 65 |     else:
 66 |         logging.info(f"Will forecast and reconstruct input features: {target_dims}")
 67 |         out_dim = len(target_dims)
 68 | 
 69 |     train_dataset = SlidingWindowDataset(x_train, window_size, target_dims)
 70 |     test_dataset = SlidingWindowDataset(x_test, window_size, target_dims)
 71 | 
 72 |     train_loader, val_loader, test_loader = create_data_loaders(
 73 |         train_dataset, batch_size, val_split, shuffle_dataset, test_dataset=test_dataset
 74 |     )
 75 | 
 76 |     logging.info(next(iter(train_loader))[0].shape)
 77 |     # logging.info(next(iter(val_loader)).shape)
 78 |     # logging.info(next(iter(test_loader)).shape)
 79 | 
 80 |     model = MTAD_GAT(
 81 |         n_features,
 82 |         window_size,
 83 |         out_dim,
 84 |         kernel_size=args.kernel_size,
 85 |         use_gatv2=args.use_gatv2,
 86 |         feat_gat_embed_dim=args.feat_gat_embed_dim,
 87 |         time_gat_embed_dim=args.time_gat_embed_dim,
 88 |         gru_n_layers=args.gru_n_layers,
 89 |         gru_hid_dim=args.gru_hid_dim,
 90 |         forecast_n_layers=args.fc_n_layers,
 91 |         forecast_hid_dim=args.fc_hid_dim,
 92 |         recon_n_layers=args.recon_n_layers,
 93 |         recon_hid_dim=args.recon_hid_dim,
 94 |         dropout=args.dropout,
 95 |         alpha=args.alpha,
 96 |     )
 97 | 
 98 |     optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr)
 99 |     forecast_criterion = nn.MSELoss()
100 |     recon_criterion = nn.MSELoss()
101 | 
102 |     trainer = Trainer(
103 |         model,
104 |         optimizer,
105 |         window_size,
106 |         n_features,
107 |         target_dims,
108 |         n_epochs,
109 |         batch_size,
110 |         init_lr,
111 |         forecast_criterion,
112 |         recon_criterion,
113 |         use_cuda,
114 |         save_path,
115 |         log_dir,
116 |         print_every,
117 |         log_tensorboard,
118 |         args_summary,
119 |     )
120 | 
121 |     trainer.fit(train_loader, val_loader)
122 | 
123 |     plot_losses(trainer.losses, save_path=save_path, plot=False)
124 | 
125 |     # Check test loss
126 |     test_loss = trainer.evaluate(test_loader)
127 |     logging.info(f"Test forecast loss: {test_loss[0]:.5f}")
128 |     logging.info(f"Test reconstruction loss: {test_loss[1]:.5f}")
129 |     logging.info(f"Test total loss: {test_loss[2]:.5f}")
130 | 
131 |     # Some suggestions for POT args
132 |     level_q_dict = {
133 |         "SMAP": (0.90, 0.005),
134 |         "MSL": (0.90, 0.001),
135 |         "SMD-1": (0.9950, 0.001),
136 |         "SMD-2": (0.9925, 0.001),
137 |         "SMD-3": (0.9999, 0.001),
138 |     }
139 |     key = "SMD-" + args.group[0] if args.dataset == "SMD" else args.dataset
140 |     level, q = level_q_dict[key]
141 |     if args.level is not None:
142 |         level = args.level
143 |     if args.q is not None:
144 |         q = args.q
145 | 
146 |     # Some suggestions for Epsilon args
147 |     reg_level_dict = {"SMAP": 0, "MSL": 0, "SMD-1": 1, "SMD-2": 1, "SMD-3": 1}
148 |     key = "SMD-" + args.group[0] if dataset == "SMD" else dataset
149 |     reg_level = reg_level_dict[key]
150 | 
151 |     trainer.load(f"{save_path}/model.pt")
152 |     prediction_args = {
153 |         "dataset": dataset,
154 |         "target_dims": target_dims,
155 |         "scale_scores": args.scale_scores,
156 |         "level": level,
157 |         "q": q,
158 |         "dynamic_pot": args.dynamic_pot,
159 |         "use_mov_av": args.use_mov_av,
160 |         "gamma": args.gamma,
161 |         "reg_level": reg_level,
162 |         "save_path": save_path,
163 |     }
164 |     best_model = trainer.model
165 |     predictor = Predictor(
166 |         best_model,
167 |         window_size,
168 |         n_features,
169 |         prediction_args,
170 |     )
171 | 
172 |     label = y_test[window_size:] if y_test is not None else None
173 |     predictor.predict_anomalies(x_train, x_test, label)
174 | 
175 |     # Save config
176 |     args_path = f"{save_path}/config.txt"
177 |     with open(args_path, "w") as f:
178 |         json.dump(args.__dict__, f, indent=2)
179 | 


--------------------------------------------------------------------------------
/networks/omni_anomaly/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpsPAI/MTAD/63a601f45a088f12f8e3acf7bb887b2bfdd30ff2/networks/omni_anomaly/__init__.py


--------------------------------------------------------------------------------
/networks/omni_anomaly/prediction.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import six
  6 | import tensorflow as tf
  7 | from tfsnippet.utils import (
  8 |     VarScopeObject,
  9 |     get_default_session_or_error,
 10 |     reopen_variable_scope,
 11 | )
 12 | 
 13 | 
 14 | __all__ = ["Predictor"]
 15 | 
 16 | 
 17 | class Predictor(VarScopeObject):
 18 |     """
 19 |     OmniAnomaly predictor.
 20 | 
 21 |     Args:
 22 |         model (OmniAnomaly): The :class:`OmniAnomaly` model instance.
 23 |         n_z (int or None): Number of `z` samples to take for each `x`.
 24 |             If :obj:`None`, one sample without explicit sampling dimension.
 25 |             (default 1024)
 26 |         batch_size (int): Size of each mini-batch for prediction.
 27 |             (default 32)
 28 |         feed_dict (dict[tf.Tensor, any]): User provided feed dict for
 29 |             prediction. (default :obj:`None`)
 30 |         last_point_only (bool): Whether to obtain the reconstruction
 31 |             probability of only the last point in each window?
 32 |             (default :obj:`True`)
 33 |         name (str): Optional name of this predictor
 34 |             (argument of :class:`tfsnippet.utils.VarScopeObject`).
 35 |         scope (str): Optional scope of this predictor
 36 |             (argument of :class:`tfsnippet.utils.VarScopeObject`).
 37 |     """
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         model,
 42 |         n_z=1024,
 43 |         batch_size=32,
 44 |         feed_dict=None,
 45 |         last_point_only=True,
 46 |         name=None,
 47 |         scope=None,
 48 |     ):
 49 |         super(Predictor, self).__init__(name=name, scope=scope)
 50 |         self._model = model
 51 |         self._n_z = n_z
 52 |         self._batch_size = batch_size
 53 |         if feed_dict is not None:
 54 |             self._feed_dict = dict(six.iteritems(feed_dict))
 55 |         else:
 56 |             self._feed_dict = {}
 57 |         self._last_point_only = last_point_only
 58 | 
 59 |         with reopen_variable_scope(self.variable_scope):
 60 |             # input placeholders
 61 |             self._input_x = tf.placeholder(
 62 |                 dtype=tf.float32,
 63 |                 shape=[None, model.window_length, model.x_dims],
 64 |                 name="input_x",
 65 |             )
 66 |             self._input_y = tf.placeholder(
 67 |                 dtype=tf.int32, shape=[None, model.window_length], name="input_y"
 68 |             )
 69 | 
 70 |             # outputs of interest
 71 |             self._score = self._score_without_y = None
 72 | 
 73 |     def _get_score_without_y(self):
 74 |         if self._score_without_y is None:
 75 |             with reopen_variable_scope(self.variable_scope), tf.name_scope(
 76 |                 "score_without_y"
 77 |             ):
 78 |                 self._score_without_y, self._q_net_z = self.model.get_score(
 79 |                     x=self._input_x,
 80 |                     n_z=self._n_z,
 81 |                     last_point_only=self._last_point_only,
 82 |                 )
 83 |                 # print ('\t_get_score_without_y ',type(self._q_net_z))
 84 |         return self._score_without_y, self._q_net_z
 85 | 
 86 |     @property
 87 |     def model(self):
 88 |         """
 89 |         Get the :class:`OmniAnomaly` model instance.
 90 | 
 91 |         Returns:
 92 |             OmniAnomaly: The :class:`OmniAnomaly` model instance.
 93 |         """
 94 |         return self._model
 95 | 
 96 |     def get_score(self, test_iterator):
 97 |         """
 98 |         Get the `reconstruction probability` of specified KPI observations.
 99 | 
100 |         The larger `reconstruction probability`, the less likely a point
101 |         is anomaly.  You may take the negative of the score, if you want
102 |         something to directly indicate the severity of anomaly.
103 | 
104 |         Args:
105 |             values (np.ndarray): 1-D float32 array, the KPI observations.
106 | 
107 |         Returns:
108 |             np.ndarray: The `reconstruction probability`,
109 |                 1-D array if `last_point_only` is :obj:`True`,
110 |                 or 2-D array if `last_point_only` is :obj:`False`.
111 |         """
112 |         with tf.name_scope("Predictor.get_score"):
113 |             sess = get_default_session_or_error()
114 |             collector = []
115 |             collector_z = []
116 |             pred_time = []
117 | 
118 |             for b_x in test_iterator:
119 |                 start_iter_time = time.time()
120 |                 feed_dict = dict(six.iteritems(self._feed_dict))
121 |                 feed_dict[self._input_x] = b_x
122 |                 b_r, q_net_z = sess.run(
123 |                     self._get_score_without_y(), feed_dict=feed_dict
124 |                 )
125 |                 collector.append(b_r)
126 |                 pred_time.append(time.time() - start_iter_time)
127 |                 collector_z.append(q_net_z)
128 | 
129 |             # merge the results of mini-batches
130 |             result = np.concatenate(collector, axis=0)
131 |             result_z = np.concatenate(collector_z, axis=0)
132 |             return result, result_z, np.sum(pred_time)
133 | 


--------------------------------------------------------------------------------
/networks/omni_anomaly/requirements.txt:
--------------------------------------------------------------------------------
 1 | six == 1.11.0
 2 | matplotlib == 3.0.2
 3 | numpy == 1.15.4
 4 | pandas == 0.23.4
 5 | scipy == 1.2.0
 6 | scikit_learn == 0.24.1
 7 | # tensorflow == 1.12.0
 8 | tensorflow-gpu == 1.12.0
 9 | tensorflow_probability == 0.5.0
10 | tqdm == 4.28.1
11 | imageio == 2.4.1
12 | fs == 2.3.0
13 | click == 7.0
14 | git+https://github.com/thu-ml/zhusuan.git
15 | git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha1


--------------------------------------------------------------------------------
/networks/omni_anomaly/wrapper.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import logging
  3 | 
  4 | import tensorflow as tf
  5 | import tensorflow_probability as tfp
  6 | from tfsnippet.distributions import Distribution
  7 | 
  8 | 
  9 | class TfpDistribution(Distribution):
 10 |     """
 11 |     A wrapper class for `tfp.distributions.Distribution`
 12 |     """
 13 | 
 14 |     @property
 15 |     def is_continuous(self):
 16 |         return self._is_continuous
 17 | 
 18 |     def __init__(self, distribution):
 19 |         if not isinstance(distribution, tfp.distributions.Distribution):
 20 |             raise TypeError(
 21 |                 "`distribution` is not an instance of `tfp."
 22 |                 "distributions.Distribution`"
 23 |             )
 24 |         super(TfpDistribution, self).__init__()
 25 |         self._distribution = distribution
 26 |         self._is_continuous = True
 27 |         self._is_reparameterized = (
 28 |             self._distribution.reparameterization_type
 29 |             is tfp.distributions.FULLY_REPARAMETERIZED
 30 |         )
 31 | 
 32 |     def __repr__(self):
 33 |         return "Distribution({!r})".format(self._distribution)
 34 | 
 35 |     @property
 36 |     def dtype(self):
 37 |         return self._distribution.dtype
 38 | 
 39 |     @property
 40 |     def is_reparameterized(self):
 41 |         return self._is_reparameterized
 42 | 
 43 |     @property
 44 |     def value_shape(self):
 45 |         return self._distribution.event_shape
 46 | 
 47 |     def get_value_shape(self):
 48 |         return self._distribution.event_shape
 49 | 
 50 |     @property
 51 |     def batch_shape(self):
 52 |         return self._distribution.batch_shape
 53 | 
 54 |     def get_batch_shape(self):
 55 |         return self._distribution.batch_shape()
 56 | 
 57 |     def sample(
 58 |         self,
 59 |         n_samples=None,
 60 |         is_reparameterized=None,
 61 |         group_ndims=0,
 62 |         compute_density=False,
 63 |         name=None,
 64 |     ):
 65 |         from tfsnippet.stochastic import StochasticTensor
 66 | 
 67 |         if n_samples is None or n_samples < 2:
 68 |             n_samples = 2
 69 |         with tf.name_scope(name=name, default_name="sample"):
 70 |             samples = self._distribution.sample(n_samples)
 71 |             samples = tf.reduce_mean(samples, axis=0)
 72 |             t = StochasticTensor(
 73 |                 distribution=self,
 74 |                 tensor=samples,
 75 |                 n_samples=n_samples,
 76 |                 group_ndims=group_ndims,
 77 |                 is_reparameterized=self.is_reparameterized,
 78 |             )
 79 |             if compute_density:
 80 |                 with tf.name_scope("compute_prob_and_log_prob"):
 81 |                     log_p = t.log_prob()
 82 |                     t._self_prob = tf.exp(log_p)
 83 |             return t
 84 | 
 85 |     def log_prob(self, given, group_ndims=0, name=None):
 86 |         with tf.name_scope(name=name, default_name="log_prob"):
 87 |             log_prob, _, _, _, _, _, _ = self._distribution.forward_filter(given)
 88 |             return log_prob
 89 | 
 90 | 
 91 | def softplus_std(inputs, units, epsilon, name):
 92 |     return (
 93 |         tf.nn.softplus(tf.layers.dense(inputs, units, name=name, reuse=tf.AUTO_REUSE))
 94 |         + epsilon
 95 |     )
 96 | 
 97 | 
 98 | def rnn(
 99 |     x,
100 |     window_length,
101 |     rnn_num_hidden,
102 |     rnn_cell="GRU",
103 |     hidden_dense=2,
104 |     dense_dim=200,
105 |     time_axis=1,
106 |     name="rnn",
107 | ):
108 |     from tensorflow.contrib import rnn
109 | 
110 |     with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
111 |         if len(x.shape) == 4:
112 |             x = tf.reduce_mean(x, axis=0)
113 |         elif len(x.shape) != 3:
114 |             logging.error("rnn input shape error")
115 |         x = tf.unstack(x, window_length, time_axis)
116 | 
117 |         if rnn_cell == "LSTM":
118 |             # Define lstm cells with TensorFlow
119 |             # Forward direction cell
120 |             fw_cell = rnn.BasicLSTMCell(rnn_num_hidden, forget_bias=1.0)
121 |         elif rnn_cell == "GRU":
122 |             fw_cell = tf.nn.rnn_cell.GRUCell(rnn_num_hidden)
123 |         elif rnn_cell == "Basic":
124 |             fw_cell = tf.nn.rnn_cell.BasicRNNCell(rnn_num_hidden)
125 |         else:
126 |             raise ValueError("rnn_cell must be LSTM or GRU")
127 | 
128 |         # Get lstm cell output
129 | 
130 |         try:
131 |             outputs, _ = rnn.static_rnn(fw_cell, x, dtype=tf.float32)
132 |         except Exception:  # Old TensorFlow version only returns outputs not states
133 |             outputs = rnn.static_rnn(fw_cell, x, dtype=tf.float32)
134 |         outputs = tf.stack(outputs, axis=time_axis)
135 |         for i in range(hidden_dense):
136 |             outputs = tf.layers.dense(outputs, dense_dim)
137 |         return outputs
138 |     # return size: (batch_size, window_length, rnn_num_hidden)
139 | 
140 | 
141 | def wrap_params_net(inputs, h_for_dist, mean_layer, std_layer):
142 |     with tf.variable_scope("hidden", reuse=tf.AUTO_REUSE):
143 |         h = h_for_dist(inputs)
144 |     return {
145 |         "mean": mean_layer(h),
146 |         "std": std_layer(h),
147 |     }
148 | 
149 | 
150 | def wrap_params_net_srnn(inputs, h_for_dist):
151 |     with tf.variable_scope("hidden", reuse=tf.AUTO_REUSE):
152 |         h = h_for_dist(inputs)
153 |     return {"input_q": h}
154 | 


--------------------------------------------------------------------------------
/networks/tranad/__init__.py:
--------------------------------------------------------------------------------
1 | from .models import TranAD


--------------------------------------------------------------------------------
/networks/tranad/dlutils.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | from torch.autograd import Variable
  4 | import math
  5 | import numpy as np
  6 | 
  7 | 
  8 | class PositionalEncoding(nn.Module):
  9 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 10 |         super(PositionalEncoding, self).__init__()
 11 |         self.dropout = nn.Dropout(p=dropout)
 12 | 
 13 |         pe = torch.zeros(max_len, d_model)
 14 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 15 |         div_term = torch.exp(torch.arange(0, d_model).float() * (-math.log(10000.0) / d_model))
 16 |         pe += torch.sin(position * div_term)
 17 |         pe += torch.cos(position * div_term)
 18 |         pe = pe.unsqueeze(0).transpose(0, 1)
 19 |         self.register_buffer('pe', pe)
 20 | 
 21 |     def forward(self, x, pos=0):
 22 |         x = x + self.pe[pos:pos+x.size(0), :]
 23 |         return self.dropout(x)
 24 | 
 25 | class TransformerEncoderLayer(nn.Module):
 26 |     def __init__(self, d_model, nhead, dim_feedforward=16, dropout=0):
 27 |         super(TransformerEncoderLayer, self).__init__()
 28 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
 29 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
 30 |         self.dropout = nn.Dropout(dropout)
 31 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
 32 |         self.dropout1 = nn.Dropout(dropout)
 33 |         self.dropout2 = nn.Dropout(dropout)
 34 | 
 35 |         self.activation = nn.LeakyReLU(True)
 36 | 
 37 |     def forward(self, src,src_mask=None, src_key_padding_mask=None):
 38 |         src2 = self.self_attn(src, src, src)[0]
 39 |         src = src + self.dropout1(src2)
 40 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
 41 |         src = src + self.dropout2(src2)
 42 |         return src
 43 | 
 44 | class TransformerDecoderLayer(nn.Module):
 45 |     def __init__(self, d_model, nhead, dim_feedforward=16, dropout=0):
 46 |         super(TransformerDecoderLayer, self).__init__()
 47 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
 48 |         self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
 49 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
 50 |         self.dropout = nn.Dropout(dropout)
 51 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
 52 |         self.dropout1 = nn.Dropout(dropout)
 53 |         self.dropout2 = nn.Dropout(dropout)
 54 |         self.dropout3 = nn.Dropout(dropout)
 55 | 
 56 |         self.activation = nn.LeakyReLU(True)
 57 | 
 58 |     def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, tgt_key_padding_mask=None, memory_key_padding_mask=None):
 59 |         tgt2 = self.self_attn(tgt, tgt, tgt)[0]
 60 |         tgt = tgt + self.dropout1(tgt2)
 61 |         tgt2 = self.multihead_attn(tgt, memory, memory)[0]
 62 |         tgt = tgt + self.dropout2(tgt2)
 63 |         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
 64 |         tgt = tgt + self.dropout3(tgt2)
 65 |         return tgt
 66 | 
 67 | class ComputeLoss:
 68 |     def __init__(self, model, lambda_energy, lambda_cov, device, n_gmm):
 69 |         self.model = model
 70 |         self.lambda_energy = lambda_energy
 71 |         self.lambda_cov = lambda_cov
 72 |         self.device = device
 73 |         self.n_gmm = n_gmm
 74 |     
 75 |     def forward(self, x, x_hat, z, gamma):
 76 |         """Computing the loss function for DAGMM."""
 77 |         reconst_loss = torch.mean((x-x_hat).pow(2))
 78 | 
 79 |         sample_energy, cov_diag = self.compute_energy(z, gamma)
 80 | 
 81 |         loss = reconst_loss + self.lambda_energy * sample_energy + self.lambda_cov * cov_diag
 82 |         return Variable(loss, requires_grad=True)
 83 |     
 84 |     def compute_energy(self, z, gamma, phi=None, mu=None, cov=None, sample_mean=True):
 85 |         """Computing the sample energy function"""
 86 |         if (phi is None) or (mu is None) or (cov is None):
 87 |             phi, mu, cov = self.compute_params(z, gamma)
 88 | 
 89 |         z_mu = (z.unsqueeze(1)- mu.unsqueeze(0))
 90 | 
 91 |         eps = 1e-12
 92 |         cov_inverse = []
 93 |         det_cov = []
 94 |         cov_diag = 0
 95 |         for k in range(self.n_gmm):
 96 |             cov_k = cov[k] + (torch.eye(cov[k].size(-1))*eps).to(self.device)
 97 |             cov_inverse.append(torch.inverse(cov_k).unsqueeze(0))
 98 |             det_cov.append((Cholesky.apply(cov_k.cpu() * (2*np.pi)).diag().prod()).unsqueeze(0))
 99 |             cov_diag += torch.sum(1 / cov_k.diag())
100 |         
101 |         cov_inverse = torch.cat(cov_inverse, dim=0)
102 |         det_cov = torch.cat(det_cov).to(self.device)
103 | 
104 |         E_z = -0.5 * torch.sum(torch.sum(z_mu.unsqueeze(-1) * cov_inverse.unsqueeze(0), dim=-2) * z_mu, dim=-1)
105 |         E_z = torch.exp(E_z)
106 |         E_z = -torch.log(torch.sum(phi.unsqueeze(0)*E_z / (torch.sqrt(det_cov)).unsqueeze(0), dim=1) + eps)
107 |         if sample_mean==True:
108 |             E_z = torch.mean(E_z)            
109 |         return E_z, cov_diag
110 | 
111 |     def compute_params(self, z, gamma):
112 |         """Computing the parameters phi, mu and gamma for sample energy function """ 
113 |         # K: number of Gaussian mixture components
114 |         # N: Number of samples
115 |         # D: Latent dimension
116 |         # z = NxD
117 |         # gamma = NxK
118 | 
119 |         #phi = D
120 |         phi = torch.sum(gamma, dim=0)/gamma.size(0) 
121 | 
122 |         #mu = KxD
123 |         mu = torch.sum(z.unsqueeze(1) * gamma.unsqueeze(-1), dim=0)
124 |         mu /= torch.sum(gamma, dim=0).unsqueeze(-1)
125 | 
126 |         z_mu = (z.unsqueeze(1) - mu.unsqueeze(0))
127 |         z_mu_z_mu_t = z_mu.unsqueeze(-1) * z_mu.unsqueeze(-2)
128 |         
129 |         #cov = K x D x D
130 |         cov = torch.sum(gamma.unsqueeze(-1).unsqueeze(-1) * z_mu_z_mu_t, dim=0)
131 |         cov /= torch.sum(gamma, dim=0).unsqueeze(-1).unsqueeze(-1)
132 | 
133 |         return phi, mu, cov
134 |         
135 | class Cholesky(torch.autograd.Function):
136 |     def forward(ctx, a):
137 |         l = torch.cholesky(a, False)
138 |         ctx.save_for_backward(l)
139 |         return l
140 |     def backward(ctx, grad_output):
141 |         l, = ctx.saved_variables
142 |         linv = l.inverse()
143 |         inner = torch.tril(torch.mm(l.t(), grad_output)) * torch.tril(
144 |             1.0 - Variable(l.data.new(l.size(1)).fill_(0.5).diag()))
145 |         s = torch.mm(linv.t(), torch.mm(inner, linv))
146 |         return s


--------------------------------------------------------------------------------
/networks/tranad/models.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import os
  4 | import torch
  5 | import torch.nn as nn
  6 | import numpy as np
  7 | from torch.nn import TransformerEncoder
  8 | from torch.nn import TransformerDecoder
  9 | from .dlutils import (
 10 |     PositionalEncoding,
 11 |     TransformerEncoderLayer,
 12 |     TransformerDecoderLayer,
 13 | )
 14 | from common.utils import set_device
 15 | 
 16 | 
 17 | class TranAD(nn.Module):
 18 |     def __init__(self, feats, window_size, lr, model_root, device):
 19 |         super(TranAD, self).__init__()
 20 |         self.name = "TranAD"
 21 |         self.n_feats = feats
 22 |         self.n_window = window_size
 23 |         self.device = set_device(device)
 24 |         self.n = self.n_feats * self.n_window
 25 |         self.pos_encoder = PositionalEncoding(2 * feats, 0.1, self.n_window)
 26 |         encoder_layers = TransformerEncoderLayer(
 27 |             d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
 28 |         )
 29 |         self.transformer_encoder = TransformerEncoder(encoder_layers, 1)
 30 |         decoder_layers1 = TransformerDecoderLayer(
 31 |             d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
 32 |         )
 33 |         self.transformer_decoder1 = TransformerDecoder(decoder_layers1, 1)
 34 |         decoder_layers2 = TransformerDecoderLayer(
 35 |             d_model=2 * feats, nhead=feats, dim_feedforward=16, dropout=0.1
 36 |         )
 37 |         self.transformer_decoder2 = TransformerDecoder(decoder_layers2, 1)
 38 |         self.fcn = nn.Sequential(nn.Linear(2 * feats, feats), nn.Sigmoid())
 39 | 
 40 |         self.init_model(lr, model_root)
 41 | 
 42 |     def encode(self, src, c, tgt):
 43 |         src = torch.cat((src, c), dim=2)
 44 |         src = src * math.sqrt(self.n_feats)
 45 |         src = self.pos_encoder(src)
 46 |         memory = self.transformer_encoder(src)
 47 |         tgt = tgt.repeat(1, 1, 2)
 48 |         return tgt, memory
 49 | 
 50 |     def forward(self, src, tgt):
 51 |         # Phase 1 - Without anomaly scores
 52 |         c = torch.zeros_like(src)
 53 |         x1 = self.fcn(self.transformer_decoder1(*self.encode(src, c, tgt)))
 54 |         # Phase 2 - With anomaly scores
 55 |         c = (x1 - src) ** 2
 56 |         x2 = self.fcn(self.transformer_decoder2(*self.encode(src, c, tgt)))
 57 |         return x1, x2
 58 | 
 59 |     def init_model(self, lr, model_root, retrain=True, test=False):
 60 |         optimizer = torch.optim.AdamW(self.parameters(), lr=lr, weight_decay=1e-5)
 61 |         scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 5, 0.9)
 62 | 
 63 |         if os.path.exists(model_root) and (not retrain or test):
 64 |             logging.info("Loading pre-trained model")
 65 |             checkpoint = torch.load(os.path.join(model_root, "model.pt"))
 66 |             self.load_state_dict(checkpoint["model_state_dict"])
 67 |             optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
 68 |             scheduler.load_state_dict(checkpoint["scheduler_state_dict"])
 69 |         else:
 70 |             logging.info("Creating new model: TranAD")
 71 | 
 72 |         self.optimizer = optimizer
 73 |         self.scheduler = scheduler
 74 |         logging.info("Finish model initialization.")
 75 | 
 76 |     def fit(self, nb_epoch, dataloader, training=True):
 77 |         self.to(self.device)
 78 |         for epoch in range(1, nb_epoch + 1):
 79 |             mse_func = nn.MSELoss(reduction="none")
 80 |             n = epoch + 1
 81 |             l1s = []
 82 |             if training:
 83 |                 logging.info("Training epoch: {}".format(epoch))
 84 |                 for d in dataloader:
 85 |                     d = d.to(self.device)
 86 |                     local_bs = d.shape[0]
 87 |                     window = d.permute(1, 0, 2)
 88 |                     elem = window[-1, :, :].view(1, local_bs, self.n_feats)
 89 |                     z = self(window, elem)
 90 |                     l1 = (
 91 |                         mse_func(z, elem)
 92 |                         if not isinstance(z, tuple)
 93 |                         else (1 / n) * mse_func(z[0], elem)
 94 |                         + (1 - 1 / n) * mse_func(z[1], elem)
 95 |                     )
 96 |                     if isinstance(z, tuple):
 97 |                         z = z[1]
 98 |                     l1s.append(torch.mean(l1).item())
 99 |                     loss = torch.mean(l1)
100 |                     self.optimizer.zero_grad()
101 |                     loss.backward(retain_graph=True)
102 |                     self.optimizer.step()
103 |                 self.scheduler.step()
104 |                 logging.info("Epoch: {} finished.".format(epoch))
105 | 
106 |     def predict_prob(self, test_iterator, label_windows=None):
107 |         mse_func = nn.MSELoss(reduction="none")
108 |         loss_steps = []
109 |         for d in test_iterator:
110 |             d = d.to(self.device)
111 |             bs = d.shape[0]
112 |             window = d.permute(1, 0, 2)
113 |             elem = window[-1, :, :].view(1, bs, self.n_feats)
114 |             z = self(window, elem)
115 |             if isinstance(z, tuple):
116 |                 z = z[1]
117 |             loss = mse_func(z, elem)[0]
118 |             loss_steps.append(loss.detach().cpu().numpy())
119 |         anomaly_score = np.concatenate(loss_steps).mean(axis=1)
120 |         if label_windows is None:
121 |             return anomaly_score
122 |         else:
123 |             anomaly_label = (np.sum(label_windows, axis=1) >= 1) + 0
124 |             return anomaly_score, anomaly_label
125 | 


--------------------------------------------------------------------------------
/networks/usad/__init__.py:
--------------------------------------------------------------------------------
1 | from .usad import *
2 | 


--------------------------------------------------------------------------------
/requirements/RANSyncoders.txt:
--------------------------------------------------------------------------------
 1 | # pip install -r requirements.txt
 2 | joblib == 1.0.0
 3 | jupyter == 1.0.0
 4 | keras == 2.3.1
 5 | numpy == 1.19.2
 6 | pandas == 1.1.5
 7 | scikit-learn == 0.23.2
 8 | scipy == 1.5.2
 9 | spectrum == 0.7.5
10 | tensorflow == 2.1.0


--------------------------------------------------------------------------------
/requirements/anomaly_transformer.txt:
--------------------------------------------------------------------------------
1 | hydra-core==1.1.1
2 | numpy==1.21.3
3 | omegaconf==2.1.1
4 | torch==1.10.0
5 | tqdm==4.62.3
6 | transformers==4.11.3
7 | wandb==0.12.10


--------------------------------------------------------------------------------
/requirements/interfusion.txt:
--------------------------------------------------------------------------------
 1 | # python 3.6.6
 2 | more_itertools
 3 | numpy==1.17.0
 4 | tensorflow-gpu==1.12.0 # for gpu
 5 | # tensorflow==1.12.0
 6 | typing-extensions==3.7.4.1
 7 | typing-inspect==0.5.0
 8 | tqdm==4.31.1
 9 | pickleshare==0.7.5
10 | scikit-learn==0.20.3
11 | scipy==1.2.1
12 | pandas==0.24.2
13 | matplotlib==2.0.2
14 | seaborn==0.9.0
15 | dataclasses==0.7
16 | dataclasses-json==0.3.5
17 | Click==7.0
18 | fs==2.4.4
19 | six==1.11.0
20 | git+https://github.com/thu-ml/zhusuan.git@48c0f4e
21 | git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha4
22 | git+https://github.com/haowen-xu/ml-essentials.git


--------------------------------------------------------------------------------
/requirements/omnianomaly.txt:
--------------------------------------------------------------------------------
 1 | six == 1.11.0
 2 | matplotlib == 3.0.2
 3 | numpy == 1.15.4
 4 | pandas == 0.23.4
 5 | scipy == 1.2.0
 6 | scikit_learn == 0.20.2
 7 | tensorflow-gpu == 1.12.0
 8 | tensorflow_probability == 0.5.0
 9 | tqdm == 4.28.1
10 | imageio == 2.4.1
11 | fs == 2.3.0
12 | click == 7.0
13 | git+https://github.com/thu-ml/zhusuan.git
14 | git+https://github.com/haowen-xu/tfsnippet.git@v0.2.0-alpha1


--------------------------------------------------------------------------------