├── .dvc
    ├── .gitignore
    ├── config
    └── plots
    │   ├── confusion.json
    │   ├── confusion_normalized.json
    │   ├── default.json
    │   ├── linear.json
    │   ├── scatter.json
    │   └── smooth.json
├── .dvcignore
├── .gitignore
├── LICENSE
├── MLproject
├── README.md
├── conda.yaml
├── data
    ├── .gitignore
    └── raw
    │   ├── .gitignore
    │   ├── SWaT_Dataset_Attack_v0.csv.dvc
    │   └── SWaT_Dataset_Normal_v1.csv.dvc
├── dvc.lock
├── dvc.yaml
├── metrics.json
├── params.yaml
├── requirements.txt
└── src
    ├── featurize.py
    ├── model.py
    ├── train.py
    └── validate.py


/.dvc/.gitignore:
--------------------------------------------------------------------------------
1 | /config.local
2 | /tmp
3 | /cache
4 | 


--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/finloop/usad-torchlightning/4aba4ed1b202e6320cce4a6fd5528cd415f9e255/.dvc/config


--------------------------------------------------------------------------------
/.dvc/plots/confusion.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  3 |     "data": {
  4 |         "values": "<DVC_METRIC_DATA>"
  5 |     },
  6 |     "title": "<DVC_METRIC_TITLE>",
  7 |     "facet": {
  8 |         "field": "rev",
  9 |         "type": "nominal"
 10 |     },
 11 |     "spec": {
 12 |         "transform": [
 13 |             {
 14 |                 "aggregate": [
 15 |                     {
 16 |                         "op": "count",
 17 |                         "as": "xy_count"
 18 |                     }
 19 |                 ],
 20 |                 "groupby": [
 21 |                     "<DVC_METRIC_Y>",
 22 |                     "<DVC_METRIC_X>"
 23 |                 ]
 24 |             },
 25 |             {
 26 |                 "impute": "xy_count",
 27 |                 "groupby": [
 28 |                     "rev",
 29 |                     "<DVC_METRIC_Y>"
 30 |                 ],
 31 |                 "key": "<DVC_METRIC_X>",
 32 |                 "value": 0
 33 |             },
 34 |             {
 35 |                 "impute": "xy_count",
 36 |                 "groupby": [
 37 |                     "rev",
 38 |                     "<DVC_METRIC_X>"
 39 |                 ],
 40 |                 "key": "<DVC_METRIC_Y>",
 41 |                 "value": 0
 42 |             },
 43 |             {
 44 |                 "joinaggregate": [
 45 |                     {
 46 |                         "op": "max",
 47 |                         "field": "xy_count",
 48 |                         "as": "max_count"
 49 |                     }
 50 |                 ],
 51 |                 "groupby": []
 52 |             },
 53 |             {
 54 |                 "calculate": "datum.xy_count / datum.max_count",
 55 |                 "as": "percent_of_max"
 56 |             }
 57 |         ],
 58 |         "encoding": {
 59 |             "x": {
 60 |                 "field": "<DVC_METRIC_X>",
 61 |                 "type": "nominal",
 62 |                 "sort": "ascending",
 63 |                 "title": "<DVC_METRIC_X_LABEL>"
 64 |             },
 65 |             "y": {
 66 |                 "field": "<DVC_METRIC_Y>",
 67 |                 "type": "nominal",
 68 |                 "sort": "ascending",
 69 |                 "title": "<DVC_METRIC_Y_LABEL>"
 70 |             }
 71 |         },
 72 |         "layer": [
 73 |             {
 74 |                 "mark": "rect",
 75 |                 "width": 300,
 76 |                 "height": 300,
 77 |                 "encoding": {
 78 |                     "color": {
 79 |                         "field": "xy_count",
 80 |                         "type": "quantitative",
 81 |                         "title": "",
 82 |                         "scale": {
 83 |                             "domainMin": 0,
 84 |                             "nice": true
 85 |                         }
 86 |                     }
 87 |                 }
 88 |             },
 89 |             {
 90 |                 "mark": "text",
 91 |                 "encoding": {
 92 |                     "text": {
 93 |                         "field": "xy_count",
 94 |                         "type": "quantitative"
 95 |                     },
 96 |                     "color": {
 97 |                         "condition": {
 98 |                             "test": "datum.percent_of_max > 0.5",
 99 |                             "value": "white"
100 |                         },
101 |                         "value": "black"
102 |                     }
103 |                 }
104 |             }
105 |         ]
106 |     }
107 | }
108 | 


--------------------------------------------------------------------------------
/.dvc/plots/confusion_normalized.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  3 |     "data": {
  4 |         "values": "<DVC_METRIC_DATA>"
  5 |     },
  6 |     "title": "<DVC_METRIC_TITLE>",
  7 |     "facet": {
  8 |         "field": "rev",
  9 |         "type": "nominal"
 10 |     },
 11 |     "spec": {
 12 |         "transform": [
 13 |             {
 14 |                 "aggregate": [
 15 |                     {
 16 |                         "op": "count",
 17 |                         "as": "xy_count"
 18 |                     }
 19 |                 ],
 20 |                 "groupby": [
 21 |                     "<DVC_METRIC_Y>",
 22 |                     "<DVC_METRIC_X>"
 23 |                 ]
 24 |             },
 25 |             {
 26 |                 "impute": "xy_count",
 27 |                 "groupby": [
 28 |                     "rev",
 29 |                     "<DVC_METRIC_Y>"
 30 |                 ],
 31 |                 "key": "<DVC_METRIC_X>",
 32 |                 "value": 0
 33 |             },
 34 |             {
 35 |                 "impute": "xy_count",
 36 |                 "groupby": [
 37 |                     "rev",
 38 |                     "<DVC_METRIC_X>"
 39 |                 ],
 40 |                 "key": "<DVC_METRIC_Y>",
 41 |                 "value": 0
 42 |             },
 43 |             {
 44 |                 "joinaggregate": [
 45 |                     {
 46 |                         "op": "sum",
 47 |                         "field": "xy_count",
 48 |                         "as": "sum_y"
 49 |                     }
 50 |                 ],
 51 |                 "groupby": [
 52 |                     "<DVC_METRIC_Y>"
 53 |                 ]
 54 |             },
 55 |             {
 56 |                 "calculate": "datum.xy_count / datum.sum_y",
 57 |                 "as": "percent_of_y"
 58 |             }
 59 |         ],
 60 |         "encoding": {
 61 |             "x": {
 62 |                 "field": "<DVC_METRIC_X>",
 63 |                 "type": "nominal",
 64 |                 "sort": "ascending",
 65 |                 "title": "<DVC_METRIC_X_LABEL>"
 66 |             },
 67 |             "y": {
 68 |                 "field": "<DVC_METRIC_Y>",
 69 |                 "type": "nominal",
 70 |                 "sort": "ascending",
 71 |                 "title": "<DVC_METRIC_Y_LABEL>"
 72 |             }
 73 |         },
 74 |         "layer": [
 75 |             {
 76 |                 "mark": "rect",
 77 |                 "width": 300,
 78 |                 "height": 300,
 79 |                 "encoding": {
 80 |                     "color": {
 81 |                         "field": "percent_of_y",
 82 |                         "type": "quantitative",
 83 |                         "title": "",
 84 |                         "scale": {
 85 |                             "domain": [
 86 |                                 0,
 87 |                                 1
 88 |                             ]
 89 |                         }
 90 |                     }
 91 |                 }
 92 |             },
 93 |             {
 94 |                 "mark": "text",
 95 |                 "encoding": {
 96 |                     "text": {
 97 |                         "field": "percent_of_y",
 98 |                         "type": "quantitative",
 99 |                         "format": ".2f"
100 |                     },
101 |                     "color": {
102 |                         "condition": {
103 |                             "test": "datum.percent_of_y > 0.5",
104 |                             "value": "white"
105 |                         },
106 |                         "value": "black"
107 |                     }
108 |                 }
109 |             }
110 |         ]
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/.dvc/plots/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
 3 |     "data": {
 4 |         "values": "<DVC_METRIC_DATA>"
 5 |     },
 6 |     "title": "<DVC_METRIC_TITLE>",
 7 |     "width": 300,
 8 |     "height": 300,
 9 |     "mark": {
10 |         "type": "line"
11 |     },
12 |     "encoding": {
13 |         "x": {
14 |             "field": "<DVC_METRIC_X>",
15 |             "type": "quantitative",
16 |             "title": "<DVC_METRIC_X_LABEL>"
17 |         },
18 |         "y": {
19 |             "field": "<DVC_METRIC_Y>",
20 |             "type": "quantitative",
21 |             "title": "<DVC_METRIC_Y_LABEL>",
22 |             "scale": {
23 |                 "zero": false
24 |             }
25 |         },
26 |         "color": {
27 |             "field": "rev",
28 |             "type": "nominal"
29 |         }
30 |     }
31 | }
32 | 


--------------------------------------------------------------------------------
/.dvc/plots/linear.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  3 |     "data": {
  4 |         "values": "<DVC_METRIC_DATA>"
  5 |     },
  6 |     "title": "<DVC_METRIC_TITLE>",
  7 |     "width": 300,
  8 |     "height": 300,
  9 |     "layer": [
 10 |         {
 11 |             "encoding": {
 12 |                 "x": {
 13 |                     "field": "<DVC_METRIC_X>",
 14 |                     "type": "quantitative",
 15 |                     "title": "<DVC_METRIC_X_LABEL>"
 16 |                 },
 17 |                 "y": {
 18 |                     "field": "<DVC_METRIC_Y>",
 19 |                     "type": "quantitative",
 20 |                     "title": "<DVC_METRIC_Y_LABEL>",
 21 |                     "scale": {
 22 |                         "zero": false
 23 |                     }
 24 |                 },
 25 |                 "color": {
 26 |                     "field": "rev",
 27 |                     "type": "nominal"
 28 |                 }
 29 |             },
 30 |             "layer": [
 31 |                 {
 32 |                     "mark": "line"
 33 |                 },
 34 |                 {
 35 |                     "selection": {
 36 |                         "label": {
 37 |                             "type": "single",
 38 |                             "nearest": true,
 39 |                             "on": "mouseover",
 40 |                             "encodings": [
 41 |                                 "x"
 42 |                             ],
 43 |                             "empty": "none",
 44 |                             "clear": "mouseout"
 45 |                         }
 46 |                     },
 47 |                     "mark": "point",
 48 |                     "encoding": {
 49 |                         "opacity": {
 50 |                             "condition": {
 51 |                                 "selection": "label",
 52 |                                 "value": 1
 53 |                             },
 54 |                             "value": 0
 55 |                         }
 56 |                     }
 57 |                 }
 58 |             ]
 59 |         },
 60 |         {
 61 |             "transform": [
 62 |                 {
 63 |                     "filter": {
 64 |                         "selection": "label"
 65 |                     }
 66 |                 }
 67 |             ],
 68 |             "layer": [
 69 |                 {
 70 |                     "mark": {
 71 |                         "type": "rule",
 72 |                         "color": "gray"
 73 |                     },
 74 |                     "encoding": {
 75 |                         "x": {
 76 |                             "field": "<DVC_METRIC_X>",
 77 |                             "type": "quantitative"
 78 |                         }
 79 |                     }
 80 |                 },
 81 |                 {
 82 |                     "encoding": {
 83 |                         "text": {
 84 |                             "type": "quantitative",
 85 |                             "field": "<DVC_METRIC_Y>"
 86 |                         },
 87 |                         "x": {
 88 |                             "field": "<DVC_METRIC_X>",
 89 |                             "type": "quantitative"
 90 |                         },
 91 |                         "y": {
 92 |                             "field": "<DVC_METRIC_Y>",
 93 |                             "type": "quantitative"
 94 |                         }
 95 |                     },
 96 |                     "layer": [
 97 |                         {
 98 |                             "mark": {
 99 |                                 "type": "text",
100 |                                 "align": "left",
101 |                                 "dx": 5,
102 |                                 "dy": -5
103 |                             },
104 |                             "encoding": {
105 |                                 "color": {
106 |                                     "type": "nominal",
107 |                                     "field": "rev"
108 |                                 }
109 |                             }
110 |                         }
111 |                     ]
112 |                 }
113 |             ]
114 |         }
115 |     ]
116 | }
117 | 


--------------------------------------------------------------------------------
/.dvc/plots/scatter.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  3 |     "data": {
  4 |         "values": "<DVC_METRIC_DATA>"
  5 |     },
  6 |     "title": "<DVC_METRIC_TITLE>",
  7 |     "width": 300,
  8 |     "height": 300,
  9 |     "layer": [
 10 |         {
 11 |             "encoding": {
 12 |                 "x": {
 13 |                     "field": "<DVC_METRIC_X>",
 14 |                     "type": "quantitative",
 15 |                     "title": "<DVC_METRIC_X_LABEL>"
 16 |                 },
 17 |                 "y": {
 18 |                     "field": "<DVC_METRIC_Y>",
 19 |                     "type": "quantitative",
 20 |                     "title": "<DVC_METRIC_Y_LABEL>",
 21 |                     "scale": {
 22 |                         "zero": false
 23 |                     }
 24 |                 },
 25 |                 "color": {
 26 |                     "field": "rev",
 27 |                     "type": "nominal"
 28 |                 }
 29 |             },
 30 |             "layer": [
 31 |                 {
 32 |                     "mark": "point"
 33 |                 },
 34 |                 {
 35 |                     "selection": {
 36 |                         "label": {
 37 |                             "type": "single",
 38 |                             "nearest": true,
 39 |                             "on": "mouseover",
 40 |                             "encodings": [
 41 |                                 "x"
 42 |                             ],
 43 |                             "empty": "none",
 44 |                             "clear": "mouseout"
 45 |                         }
 46 |                     },
 47 |                     "mark": "point",
 48 |                     "encoding": {
 49 |                         "opacity": {
 50 |                             "condition": {
 51 |                                 "selection": "label",
 52 |                                 "value": 1
 53 |                             },
 54 |                             "value": 0
 55 |                         }
 56 |                     }
 57 |                 }
 58 |             ]
 59 |         },
 60 |         {
 61 |             "transform": [
 62 |                 {
 63 |                     "filter": {
 64 |                         "selection": "label"
 65 |                     }
 66 |                 }
 67 |             ],
 68 |             "layer": [
 69 |                 {
 70 |                     "encoding": {
 71 |                         "text": {
 72 |                             "type": "quantitative",
 73 |                             "field": "<DVC_METRIC_Y>"
 74 |                         },
 75 |                         "x": {
 76 |                             "field": "<DVC_METRIC_X>",
 77 |                             "type": "quantitative"
 78 |                         },
 79 |                         "y": {
 80 |                             "field": "<DVC_METRIC_Y>",
 81 |                             "type": "quantitative"
 82 |                         }
 83 |                     },
 84 |                     "layer": [
 85 |                         {
 86 |                             "mark": {
 87 |                                 "type": "text",
 88 |                                 "align": "left",
 89 |                                 "dx": 5,
 90 |                                 "dy": -5
 91 |                             },
 92 |                             "encoding": {
 93 |                                 "color": {
 94 |                                     "type": "nominal",
 95 |                                     "field": "rev"
 96 |                                 }
 97 |                             }
 98 |                         }
 99 |                     ]
100 |                 }
101 |             ]
102 |         }
103 |     ]
104 | }
105 | 


--------------------------------------------------------------------------------
/.dvc/plots/smooth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
 3 |     "data": {
 4 |         "values": "<DVC_METRIC_DATA>"
 5 |     },
 6 |     "title": "<DVC_METRIC_TITLE>",
 7 |     "mark": {
 8 |         "type": "line"
 9 |     },
10 |     "encoding": {
11 |         "x": {
12 |             "field": "<DVC_METRIC_X>",
13 |             "type": "quantitative",
14 |             "title": "<DVC_METRIC_X_LABEL>"
15 |         },
16 |         "y": {
17 |             "field": "<DVC_METRIC_Y>",
18 |             "type": "quantitative",
19 |             "title": "<DVC_METRIC_Y_LABEL>",
20 |             "scale": {
21 |                 "zero": false
22 |             }
23 |         },
24 |         "color": {
25 |             "field": "rev",
26 |             "type": "nominal"
27 |         }
28 |     },
29 |     "transform": [
30 |         {
31 |             "loess": "<DVC_METRIC_Y>",
32 |             "on": "<DVC_METRIC_X>",
33 |             "groupby": [
34 |                 "rev"
35 |             ],
36 |             "bandwidth": 0.3
37 |         }
38 |     ]
39 | }
40 | 


--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # mlflow
132 | mlruns
133 | 
134 | # pytorch -lightning
135 | lightning_logs
136 | 
137 | # pycharm
138 | .idea
139 | 
140 | # dvc
141 | .dvc
142 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Piotr Krawiec
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MLproject:
--------------------------------------------------------------------------------
1 | name: usad
2 | 
3 | conda_enc: conda.yaml
4 | 
5 | entry_points:
6 |   main:
7 |     command: "python3 src/featurize.py"
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # usad-torchlightning
 2 | Implementation of USAD (UnSupervised Anomaly Detection on multivariate time 
 3 | series) in PyTorch Lightning.  
 4 | 
 5 | Original implementation by: Francesco Galati.
 6 | Original code can be found in: [USAD](https://github.com/manigalati/usad).
 7 | 
 8 | # Getting started
 9 | To start, first download the data.
10 | ## Data
11 | Data can be found in:
12 | - Normal data: [SWaT Dataset Normal](https://drive.google.com/open?id=1rVJ5ry5GG-ZZi5yI4x9lICB8VhErXwCw)
13 | - Attack data: [SWaT Dataset Attack](https://drive.google.com/open?id=1iDYc0OEmidN712fquOBRFjln90SbpaE7)
14 | 
15 | After downloading them put them in `data/raw`.
16 | 
17 | ## Running the model
18 | ```commandline
19 | dvc exp run
20 | ```
21 | 
22 | ## Changing the parameters
23 | All the parameters (for example epoch size) can be found in `params.yaml`.
24 | 
25 | ## Requirements
26 | - pytorch 1.9
27 | - dvc
28 | - pytorch-lighting
29 | - python 3.8
30 | 
31 | # How to cite
32 | If you use this software, please cite the following paper as appropriate:
33 | ```
34 | Audibert, J., Michiardi, P., Guyard, F., Marti, S., Zuluaga, M. A. (2020).
35 | USAD : UnSupervised Anomaly Detection on multivariate time series.
36 | Proceedings of the 26th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining, August 23-27, 2020
37 | ```


--------------------------------------------------------------------------------
/conda.yaml:
--------------------------------------------------------------------------------
 1 | name: usad-torchlightning
 2 | channels:
 3 |   - conda-forge
 4 |   - pytorch
 5 |   - nvidia
 6 |   - defaults
 7 | dependencies:
 8 |   - numpy
 9 |   - python=3.8.12
10 |   - scikit-learn
11 |   - matplotlib
12 |   - seaborn
13 |   - pytorch
14 |   - cudatoolkit=11.1
15 |   - pytorch-lightning
16 |   - dvc
17 | prefix: /home/pk/miniconda3/envs/usad-torchlightning
18 | 


--------------------------------------------------------------------------------
/data/.gitignore:
--------------------------------------------------------------------------------
1 | /featurize
2 | /predictions
3 | 


--------------------------------------------------------------------------------
/data/raw/.gitignore:
--------------------------------------------------------------------------------
1 | /SWaT_Dataset_Attack_v0.csv
2 | /SWaT_Dataset_Normal_v1.csv
3 | 


--------------------------------------------------------------------------------
/data/raw/SWaT_Dataset_Attack_v0.csv.dvc:
--------------------------------------------------------------------------------
1 | outs:
2 | - md5: 5f45dffcbfb5735c7fb867f5c8124c72
3 |   size: 133454848
4 |   path: SWaT_Dataset_Attack_v0.csv
5 | 


--------------------------------------------------------------------------------
/data/raw/SWaT_Dataset_Normal_v1.csv.dvc:
--------------------------------------------------------------------------------
1 | outs:
2 | - md5: 8d5520a9eb17ebbb461df1137131f8dd
3 |   size: 171724418
4 |   path: SWaT_Dataset_Normal_v1.csv
5 | 


--------------------------------------------------------------------------------
/dvc.lock:
--------------------------------------------------------------------------------
 1 | schema: '2.0'
 2 | stages:
 3 |   featurize:
 4 |     cmd: python3 src/featurize.py data/raw data/featurize
 5 |     deps:
 6 |     - path: data/raw
 7 |       md5: 9d3d74e16f896a88df4f97f9755bb4b0.dir
 8 |       size: 305179520
 9 |       nfiles: 5
10 |     - path: src/featurize.py
11 |       md5: c94a3fff61a5c2f572b027ce9c6d07c0
12 |       size: 2022
13 |     params:
14 |       params.yaml:
15 |         featurize.max_row_limit: 1000000
16 |         featurize.window_size: 8
17 |     outs:
18 |     - path: data/featurize
19 |       md5: 96dc364478248eea5f36cdb343cb0863.dir
20 |       size: 83232709
21 |       nfiles: 1
22 |   train:
23 |     cmd: python3 src/train.py data/featurize data/predictions
24 |     deps:
25 |     - path: data/featurize
26 |       md5: 96dc364478248eea5f36cdb343cb0863.dir
27 |       size: 83232709
28 |       nfiles: 1
29 |     - path: src/model.py
30 |       md5: 39e8ead6259cd85a65a3e9e73ce9b70f
31 |       size: 3742
32 |     - path: src/train.py
33 |       md5: 974d89f91c2f614d382e671e72ab5292
34 |       size: 2185
35 |     params:
36 |       params.yaml:
37 |         featurize.window_size: 8
38 |         train.batch_size: 10000
39 |         train.epochs: 100
40 |         train.hidden_size: 20
41 |     outs:
42 |     - path: data/predictions
43 |       md5: 7bde82a8b14a9e6d4a39021c3437d72b.dir
44 |       size: 1374793
45 |       nfiles: 1
46 |   validate:
47 |     cmd: python3 src/validate.py data/featurize data/predictions metrics.json
48 |     deps:
49 |     - path: data/featurize
50 |       md5: 96dc364478248eea5f36cdb343cb0863.dir
51 |       size: 83232709
52 |       nfiles: 1
53 |     - path: data/predictions
54 |       md5: 7bde82a8b14a9e6d4a39021c3437d72b.dir
55 |       size: 1374793
56 |       nfiles: 1
57 |     - path: src/validate.py
58 |       md5: 313ee87dc6cd76edf73d77ea95526f44
59 |       size: 2147
60 |     params:
61 |       params.yaml:
62 |         featurize.window_size: 8
63 |     outs:
64 |     - path: metrics.json
65 |       md5: f68b5abe81d2548d6d369a202a4a6815
66 |       size: 92
67 | 


--------------------------------------------------------------------------------
/dvc.yaml:
--------------------------------------------------------------------------------
 1 | stages:
 2 |   featurize:
 3 |     cmd: python3 src/featurize.py data/raw data/featurize
 4 |     deps:
 5 |     - data/raw
 6 |     - src/featurize.py
 7 |     params:
 8 |     - featurize.max_row_limit
 9 |     - featurize.window_size
10 |     outs:
11 |     - data/featurize
12 |   train:
13 |     cmd: python3 src/train.py data/featurize data/predictions
14 |     deps:
15 |     - data/featurize
16 |     - src/model.py
17 |     - src/train.py
18 |     params:
19 |     - featurize.window_size
20 |     - train.batch_size
21 |     - train.epochs
22 |     - train.hidden_size
23 |     outs:
24 |     - data/predictions
25 |   validate:
26 |     cmd: python3 src/validate.py data/featurize data/predictions metrics.json
27 |     deps:
28 |     - data/featurize
29 |     - data/predictions
30 |     - src/validate.py
31 |     params:
32 |     - featurize.window_size
33 |     metrics:
34 |     - metrics.json:
35 |         cache: false
36 | 


--------------------------------------------------------------------------------
/metrics.json:
--------------------------------------------------------------------------------
1 | {"threshold": 0.0, "acc": 0.12194856316026947, "recall": 1.0, "f1": 0.2174}


--------------------------------------------------------------------------------
/params.yaml:
--------------------------------------------------------------------------------
1 | featurize:
2 |   max_row_limit: 1000000
3 |   window_size: 8
4 | 
5 | train:
6 |   batch_size: 10000
7 |   epochs: 100
8 |   hidden_size: 20


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy=1.21
2 | matplotlib
3 | seaborn
4 | jupyterlab
5 | 


--------------------------------------------------------------------------------
/src/featurize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import pandas as pd
 6 | from sklearn import preprocessing
 7 | import yaml
 8 | 
 9 | 
10 | def create_windows(data: np.ndarray, window_size):
11 |     return data[np.arange(window_size) + np.arange(
12 |         data.shape[0] - window_size).reshape(-1, 1)]
13 | 
14 | 
15 | def load_dataset(filename, nrows, sep, decimal):
16 |     df = pd.read_csv(filename, nrows=nrows, decimal=decimal, sep=sep, low_memory=False)
17 |     labels_ = np.array([float(label != 'Normal') for label in
18 |                         df["Normal/Attack"].values])
19 |     df = df.drop(["Timestamp", "Normal/Attack"], axis=1)
20 | 
21 |     for i in list(df):
22 |         df[i] = df[i].apply(lambda x: str(x).replace(",", "."))
23 | 
24 |     return df.astype(float), labels_
25 | 
26 | 
27 | if __name__ == "__main__":
28 |     # Read YAML params
29 |     params = yaml.safe_load(open('params.yaml'))['featurize']
30 |     max_row_limit = params["max_row_limit"]
31 |     window_size = params["window_size"]
32 | 
33 |     # Read command line params
34 |     if len(sys.argv) != 3:
35 |         sys.stderr.write('Arguments error. Usage:\n')
36 |         sys.stderr.write(
37 |             '\tpython featurize.py data-dir-path features-dir-path\n'
38 |         )
39 |         sys.exit(1)
40 | 
41 |     data_dir = sys.argv[1]
42 |     out_dir = sys.argv[2]
43 | 
44 |     os.makedirs(out_dir, exist_ok=True)
45 | 
46 |     normal_csv = os.path.join(data_dir, "SWaT_Dataset_Normal_v1.csv")
47 |     attack_csv = os.path.join(data_dir, "SWaT_Dataset_Attack_v0.csv")
48 | 
49 |     train_file = os.path.join(out_dir, "data.npz")
50 | 
51 |     normal, _ = load_dataset(normal_csv, nrows=max_row_limit, sep=",", decimal=",")
52 |     attack, labels = load_dataset(attack_csv, nrows=max_row_limit, sep=";", decimal=";")
53 | 
54 |     sc = preprocessing.StandardScaler()
55 | 
56 |     normal = sc.fit_transform(normal.values)
57 |     attack = sc.transform(attack.values)
58 | 
59 |     windows_normal = create_windows(normal, window_size).reshape(-1, normal.shape[1]*window_size)
60 |     windows_attack = create_windows(attack, window_size).reshape(-1, attack.shape[1]*window_size,)
61 | 
62 |     np.savez_compressed(train_file, train=windows_normal, test=windows_attack, labels=labels)
63 | 


--------------------------------------------------------------------------------
/src/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.optim.adam import Adam
  4 | from pytorch_lightning.core.lightning import LightningModule
  5 | from collections import OrderedDict
  6 | 
  7 | 
  8 | class Encoder(LightningModule):
  9 |     def __init__(self, input_size, latent_size, s1=2, s2=4):
 10 |         super().__init__()
 11 | 
 12 |         self.layer_1 = nn.Linear(input_size, input_size // s1)
 13 |         self.layer_2 = nn.Linear(input_size // s1, input_size // s2)
 14 |         self.layer_3 = nn.Linear(input_size // s2, latent_size)
 15 | 
 16 |         self.activation = nn.ReLU(True)
 17 | 
 18 |     def forward(self, x):
 19 |         out = self.layer_1(x)
 20 |         out = self.activation(out)
 21 |         out = self.layer_2(out)
 22 |         out = self.activation(out)
 23 |         out = self.layer_3(out)
 24 |         z = self.activation(out)
 25 |         return z
 26 | 
 27 | 
 28 | class Decoder(LightningModule):
 29 |     def __init__(self, latent_size, output_size, s1=2, s2=4):
 30 |         super().__init__()
 31 | 
 32 |         self.layer_1 = nn.Linear(latent_size, output_size // s2)
 33 |         self.layer_2 = nn.Linear(output_size // s2, output_size // s1)
 34 |         self.layer_3 = nn.Linear(output_size // s1, output_size)
 35 | 
 36 |         self.relu = nn.ReLU(True)
 37 |         self.sigmoid = nn.Sigmoid()
 38 | 
 39 |     def forward(self, x):
 40 |         out = self.layer_1(x)
 41 |         out = self.relu(out)
 42 |         out = self.layer_2(out)
 43 |         out = self.relu(out)
 44 |         out = self.layer_3(out)
 45 |         w = self.relu(out)
 46 |         return w
 47 | 
 48 | 
 49 | class USADModel(LightningModule):
 50 |     def __init__(self, window_size, z_size, learning_rate=1e-3):
 51 |         super().__init__()
 52 | 
 53 |         self.encoder = Encoder(window_size, z_size)
 54 |         self.decoder_1 = Decoder(z_size, window_size)
 55 |         self.decoder_2 = Decoder(z_size, window_size)
 56 |         self.learning_rate = learning_rate
 57 | 
 58 |     def forward(self, x, alpha=.5, beta=.5):
 59 |         w1 = self.decoder_1(self.encoder(x))
 60 |         w2 = self.decoder_2(self.encoder(w1))
 61 | 
 62 |         return alpha * torch.mean((x - w1)**2, axis=1) + \
 63 |                beta * torch.mean((x - w2)**2, axis=1)
 64 | 
 65 |     def configure_optimizers(self):
 66 |         optimizer_1 = Adam(list(self.encoder.parameters()) + list(
 67 |             self.decoder_1.parameters()), lr=self.learning_rate)
 68 |         optimizer_2 = Adam(list(self.encoder.parameters()) + list(
 69 |             self.decoder_2.parameters()), lr=self.learning_rate)
 70 | 
 71 |         return optimizer_1, optimizer_2
 72 | 
 73 |     def training_step(self, train_batch, batch_idx, optimizer_idx):
 74 |         n = self.trainer.current_epoch + 1
 75 | 
 76 |         z = self.encoder(train_batch)
 77 |         w1 = self.decoder_1(z)
 78 | 
 79 |         w22 = self.decoder_2(self.encoder(w1))
 80 | 
 81 |         # Train AE1
 82 |         if optimizer_idx == 0:
 83 |             loss1 = 1 / n * torch.mean((train_batch - w1) ** 2) + \
 84 |                     (1 - 1 / n) * torch.mean((train_batch - w22) ** 2)
 85 |             output = OrderedDict({"loss": loss1})
 86 |             return output
 87 | 
 88 |         if optimizer_idx == 1:
 89 |             w2 = self.decoder_2(z)
 90 |             loss2 = 1 / n * torch.mean((train_batch - w2) ** 2) - \
 91 |                     (1 - 1 / n) * torch.mean((train_batch - w22) ** 2)
 92 |             output = OrderedDict({"loss": loss2})
 93 |             return output
 94 | 
 95 |     def validation_step(self, test_batch, batch_idx):
 96 |         n = self.trainer.current_epoch + 1
 97 |         z = self.encoder(test_batch)
 98 |         w1 = self.decoder_1(z)
 99 | 
100 |         w22 = self.decoder_2(self.encoder(w1))
101 | 
102 |         w2 = self.decoder_2(z)
103 |         loss2 = 1 / n * torch.mean((test_batch - w2) ** 2) - \
104 |                 (1 - 1 / n) * torch.mean((test_batch - w22) ** 2)
105 |         output = OrderedDict({"val_loss": loss2})
106 |         return output
107 | 
108 |     def validation_epoch_end(self, validation_step_outputs):
109 |         temp = []
110 |         for output in validation_step_outputs:
111 |             temp += [output["val_loss"].item()]
112 |         return {"val_loss": torch.mean(torch.tensor(temp))}


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import yaml
 3 | from model import USADModel
 4 | from torch.utils.data import DataLoader, Dataset
 5 | import sys
 6 | import os
 7 | import torch
 8 | from pytorch_lightning import Trainer
 9 | 
10 | # Create Dataset
11 | class NpzDataset(Dataset):
12 |     def __init__(self, path, key="data"):
13 |         self.path = path
14 |         self.data = np.load(path)[key]
15 | 
16 |     def __getitem__(self, index):
17 |         return torch.from_numpy(self.data[index]).float()
18 | 
19 |     def __len__(self):
20 |         return len(self.data)
21 | 
22 | 
23 | if __name__ == "__main__":
24 |     # Read YAML params
25 |     params = yaml.safe_load(open('params.yaml'))
26 | 
27 |     WINDOW_SIZE = params['featurize']['window_size']
28 |     BATCH_SIZE = params['train']["batch_size"]
29 |     EPOCHS = params['train']["epochs"]
30 |     HIDDEN_SIZE = params['train']["hidden_size"]
31 | 
32 |     if len(sys.argv) != 3:
33 |         sys.stderr.write('Arguments error. Usage:\n')
34 |         sys.stderr.write(
35 |             '\tpython featurize.py features-dir-path predict-dir-path\n'
36 |         )
37 |         sys.exit(1)
38 | 
39 |     data_dir = sys.argv[1]
40 |     predict_dir = sys.argv[2]
41 | 
42 |     os.makedirs(predict_dir, exist_ok=True)
43 | 
44 |     data_file = os.path.join(data_dir, "data.npz")
45 | 
46 |     test = NpzDataset(data_file, "test")
47 |     train = NpzDataset(data_file, "train")
48 | 
49 |     test_loader = DataLoader(test, batch_size=BATCH_SIZE, num_workers=3)
50 |     train_loader = DataLoader(train, batch_size=BATCH_SIZE, num_workers=3)
51 | 
52 |     NMETRICS = test[0].size()[0] // WINDOW_SIZE
53 | 
54 |     model = USADModel(window_size=WINDOW_SIZE * NMETRICS, z_size=WINDOW_SIZE * HIDDEN_SIZE)
55 | 
56 |     trainer = Trainer(gpus=1, max_epochs=EPOCHS, )
57 | 
58 |     trainer.fit(model, train_loader, train_loader)
59 | 
60 |     y_pred = trainer.predict(model, test_loader)
61 | 
62 |     y_pred = np.concatenate([torch.stack(y_pred[:-1]).flatten().detach().cpu().numpy(),
63 |                              y_pred[-1].flatten().detach().cpu().numpy()])
64 | 
65 |     np.savez_compressed(os.path.join(predict_dir, "y_pred.npz"), y_pred=y_pred)


--------------------------------------------------------------------------------
/src/validate.py:
--------------------------------------------------------------------------------
 1 | import yaml
 2 | import sys
 3 | import os
 4 | import matplotlib.pylab as plt
 5 | import numpy as np
 6 | import json
 7 | from sklearn.metrics import precision_recall_curve
 8 | from sklearn.metrics import plot_precision_recall_curve
 9 | from sklearn.metrics import roc_curve  # Calculate the ROC curve
10 | from sklearn.metrics import precision_recall_curve  # Calculate the Precision-Recall curve
11 | from sklearn.metrics import f1_score, recall_score, accuracy_score
12 | 
13 | if __name__ == "__main__":
14 |     params = yaml.safe_load(open('params.yaml'))
15 | 
16 |     if len(sys.argv) != 4:
17 |         sys.stderr.write('Arguments error. Usage:\n')
18 |         sys.stderr.write(
19 |             '\tpython validate.py featurize-dir-path predict-dir-path metrics-file\n'
20 |         )
21 |         sys.exit(1)
22 | 
23 |     featurize_dir = sys.argv[1]
24 |     predict_dir = sys.argv[2]
25 |     metrics_file = sys.argv[3]
26 | 
27 |     y_pred = np.load(os.path.join(predict_dir, "y_pred.npz"))["y_pred"]
28 |     labels = np.load(os.path.join(featurize_dir, "data.npz"))["labels"]
29 | 
30 |     WINDOW_SIZE = params["featurize"]["window_size"]
31 |     windows_labels = []
32 |     for i in range(len(labels) - WINDOW_SIZE):
33 |         windows_labels.append(list(np.int_(labels[i:i + WINDOW_SIZE])))
34 | 
35 |     y_test = [1.0 if (np.sum(window) > 0) else 0 for window in windows_labels]
36 | 
37 | 
38 |     thresholds = np.arange(0.0, np.max(y_pred), np.max(y_pred)/50)
39 |     fscore = np.zeros(shape=(len(thresholds)))
40 |     #rscore = np.zeros(shape=(len(thresholds)))
41 | 
42 |     # Fit the model
43 |     for index, elem in enumerate(thresholds):
44 |         # Corrected probabilities
45 |         y_pred_prob = (y_pred > elem).astype('int')
46 |         # Calculate the f-score
47 |         fscore[index] = f1_score(y_test, y_pred_prob)
48 |         #rscore[index] = recall_score(y_test, y_pred_prob)
49 | 
50 |     index = np.argmax(fscore)
51 |     thresholdOpt = round(thresholds[index], ndigits=4)
52 |     fscoreOpt = round(fscore[index], ndigits=4)
53 |     y_pred_prob = (y_pred > thresholds[index]).astype('int')
54 |     acc = accuracy_score(y_test, y_pred_prob)
55 |     recall = recall_score(y_test, y_pred_prob)
56 | 
57 |     # save scores
58 |     with open(metrics_file, 'w') as f:
59 |         json.dump({'threshold': thresholdOpt, "acc": acc, "recall": recall, "f1": fscoreOpt}, f)
60 | 


--------------------------------------------------------------------------------