├── .gitignore ├── LICENSE ├── README.md ├── matlab ├── data │ └── nyiso_2013 │ │ ├── 20130101pal.csv │ │ ├── 20130102pal.csv │ │ ├── 20130103pal.csv │ │ ├── 20130104pal.csv │ │ ├── 20130105pal.csv │ │ ├── 20130106pal.csv │ │ ├── 20130107pal.csv │ │ ├── 20130108pal.csv │ │ ├── 20130109pal.csv │ │ ├── 20130110pal.csv │ │ ├── 20130111pal.csv │ │ ├── 20130112pal.csv │ │ ├── 20130113pal.csv │ │ ├── 20130114pal.csv │ │ ├── 20130115pal.csv │ │ ├── 20130116pal.csv │ │ ├── 20130117pal.csv │ │ ├── 20130118pal.csv │ │ ├── 20130119pal.csv │ │ ├── 20130120pal.csv │ │ ├── 20130121pal.csv │ │ ├── 20130122pal.csv │ │ ├── 20130123pal.csv │ │ ├── 20130124pal.csv │ │ ├── 20130125pal.csv │ │ ├── 20130126pal.csv │ │ ├── 20130127pal.csv │ │ ├── 20130128pal.csv │ │ ├── 20130129pal.csv │ │ ├── 20130130pal.csv │ │ ├── 20130131pal.csv │ │ ├── 20130201pal.csv │ │ ├── 20130202pal.csv │ │ ├── 20130203pal.csv │ │ ├── 20130204pal.csv │ │ ├── 20130205pal.csv │ │ ├── 20130206pal.csv │ │ ├── 20130207pal.csv │ │ ├── 20130208pal.csv │ │ ├── 20130209pal.csv │ │ ├── 20130210pal.csv │ │ ├── 20130211pal.csv │ │ ├── 20130212pal.csv │ │ ├── 20130213pal.csv │ │ ├── 20130214pal.csv │ │ ├── 20130215pal.csv │ │ ├── 20130216pal.csv │ │ ├── 20130217pal.csv │ │ ├── 20130218pal.csv │ │ ├── 20130219pal.csv │ │ ├── 20130220pal.csv │ │ ├── 20130221pal.csv │ │ ├── 20130222pal.csv │ │ ├── 20130223pal.csv │ │ ├── 20130224pal.csv │ │ ├── 20130225pal.csv │ │ ├── 20130226pal.csv │ │ ├── 20130227pal.csv │ │ ├── 20130228pal.csv │ │ ├── 20130301pal.csv │ │ ├── 20130302pal.csv │ │ ├── 20130303pal.csv │ │ ├── 20130304pal.csv │ │ ├── 20130305pal.csv │ │ ├── 20130306pal.csv │ │ ├── 20130307pal.csv │ │ ├── 20130308pal.csv │ │ ├── 20130309pal.csv │ │ ├── 20130310pal.csv │ │ ├── 20130311pal.csv │ │ ├── 20130312pal.csv │ │ ├── 20130313pal.csv │ │ ├── 20130314pal.csv │ │ ├── 20130315pal.csv │ │ ├── 20130316pal.csv │ │ ├── 20130317pal.csv │ │ ├── 20130318pal.csv │ │ ├── 20130319pal.csv │ │ ├── 20130320pal.csv │ │ ├── 20130321pal.csv │ │ ├── 20130322pal.csv │ │ ├── 20130323pal.csv │ │ ├── 20130324pal.csv │ │ ├── 20130325pal.csv │ │ ├── 20130326pal.csv │ │ ├── 20130327pal.csv │ │ ├── 20130328pal.csv │ │ ├── 20130329pal.csv │ │ ├── 20130330pal.csv │ │ ├── 20130331pal.csv │ │ ├── 20130401pal.csv │ │ ├── 20130402pal.csv │ │ ├── 20130403pal.csv │ │ ├── 20130404pal.csv │ │ ├── 20130405pal.csv │ │ ├── 20130406pal.csv │ │ ├── 20130407pal.csv │ │ ├── 20130408pal.csv │ │ ├── 20130409pal.csv │ │ ├── 20130410pal.csv │ │ ├── 20130411pal.csv │ │ ├── 20130412pal.csv │ │ ├── 20130413pal.csv │ │ ├── 20130414pal.csv │ │ ├── 20130415pal.csv │ │ ├── 20130416pal.csv │ │ ├── 20130417pal.csv │ │ ├── 20130418pal.csv │ │ ├── 20130419pal.csv │ │ ├── 20130420pal.csv │ │ ├── 20130421pal.csv │ │ ├── 20130422pal.csv │ │ ├── 20130423pal.csv │ │ ├── 20130424pal.csv │ │ ├── 20130425pal.csv │ │ ├── 20130426pal.csv │ │ ├── 20130427pal.csv │ │ ├── 20130428pal.csv │ │ ├── 20130429pal.csv │ │ ├── 20130430pal.csv │ │ ├── 20130501pal.csv │ │ ├── 20130502pal.csv │ │ ├── 20130503pal.csv │ │ ├── 20130504pal.csv │ │ ├── 20130505pal.csv │ │ ├── 20130506pal.csv │ │ ├── 20130507pal.csv │ │ ├── 20130508pal.csv │ │ ├── 20130509pal.csv │ │ ├── 20130510pal.csv │ │ ├── 20130511pal.csv │ │ ├── 20130512pal.csv │ │ ├── 20130513pal.csv │ │ ├── 20130514pal.csv │ │ ├── 20130515pal.csv │ │ ├── 20130516pal.csv │ │ ├── 20130517pal.csv │ │ ├── 20130518pal.csv │ │ ├── 20130519pal.csv │ │ ├── 20130520pal.csv │ │ ├── 20130521pal.csv │ │ ├── 20130522pal.csv │ │ ├── 20130523pal.csv │ │ ├── 20130524pal.csv │ │ ├── 20130525pal.csv │ │ ├── 20130526pal.csv │ │ ├── 20130527pal.csv │ │ ├── 20130528pal.csv │ │ ├── 20130529pal.csv │ │ ├── 20130530pal.csv │ │ ├── 20130531pal.csv │ │ ├── 20130601pal.csv │ │ ├── 20130602pal.csv │ │ ├── 20130603pal.csv │ │ ├── 20130604pal.csv │ │ ├── 20130605pal.csv │ │ ├── 20130606pal.csv │ │ ├── 20130607pal.csv │ │ ├── 20130608pal.csv │ │ ├── 20130609pal.csv │ │ ├── 20130610pal.csv │ │ ├── 20130611pal.csv │ │ ├── 20130612pal.csv │ │ ├── 20130613pal.csv │ │ ├── 20130614pal.csv │ │ ├── 20130615pal.csv │ │ ├── 20130616pal.csv │ │ ├── 20130617pal.csv │ │ ├── 20130618pal.csv │ │ ├── 20130619pal.csv │ │ ├── 20130620pal.csv │ │ ├── 20130621pal.csv │ │ ├── 20130622pal.csv │ │ ├── 20130623pal.csv │ │ ├── 20130624pal.csv │ │ ├── 20130625pal.csv │ │ ├── 20130626pal.csv │ │ ├── 20130627pal.csv │ │ ├── 20130628pal.csv │ │ ├── 20130629pal.csv │ │ ├── 20130630pal.csv │ │ ├── 20130701pal.csv │ │ ├── 20130702pal.csv │ │ ├── 20130703pal.csv │ │ ├── 20130704pal.csv │ │ ├── 20130705pal.csv │ │ ├── 20130706pal.csv │ │ ├── 20130707pal.csv │ │ ├── 20130708pal.csv │ │ ├── 20130709pal.csv │ │ ├── 20130710pal.csv │ │ ├── 20130711pal.csv │ │ ├── 20130712pal.csv │ │ ├── 20130713pal.csv │ │ ├── 20130714pal.csv │ │ ├── 20130715pal.csv │ │ ├── 20130716pal.csv │ │ ├── 20130717pal.csv │ │ ├── 20130718pal.csv │ │ ├── 20130719pal.csv │ │ ├── 20130720pal.csv │ │ ├── 20130721pal.csv │ │ ├── 20130722pal.csv │ │ ├── 20130723pal.csv │ │ ├── 20130724pal.csv │ │ ├── 20130725pal.csv │ │ ├── 20130726pal.csv │ │ ├── 20130727pal.csv │ │ ├── 20130728pal.csv │ │ ├── 20130729pal.csv │ │ ├── 20130730pal.csv │ │ ├── 20130731pal.csv │ │ ├── 20130801pal.csv │ │ ├── 20130802pal.csv │ │ ├── 20130803pal.csv │ │ ├── 20130804pal.csv │ │ ├── 20130805pal.csv │ │ ├── 20130806pal.csv │ │ ├── 20130807pal.csv │ │ ├── 20130808pal.csv │ │ ├── 20130809pal.csv │ │ ├── 20130810pal.csv │ │ ├── 20130811pal.csv │ │ ├── 20130812pal.csv │ │ ├── 20130813pal.csv │ │ ├── 20130814pal.csv │ │ ├── 20130815pal.csv │ │ ├── 20130816pal.csv │ │ ├── 20130817pal.csv │ │ ├── 20130818pal.csv │ │ ├── 20130819pal.csv │ │ ├── 20130820pal.csv │ │ ├── 20130821pal.csv │ │ ├── 20130822pal.csv │ │ ├── 20130823pal.csv │ │ ├── 20130824pal.csv │ │ ├── 20130825pal.csv │ │ ├── 20130826pal.csv │ │ ├── 20130827pal.csv │ │ ├── 20130828pal.csv │ │ ├── 20130829pal.csv │ │ ├── 20130830pal.csv │ │ ├── 20130831pal.csv │ │ ├── 20130901pal.csv │ │ ├── 20130902pal.csv │ │ ├── 20130903pal.csv │ │ ├── 20130904pal.csv │ │ ├── 20130905pal.csv │ │ ├── 20130906pal.csv │ │ ├── 20130907pal.csv │ │ ├── 20130908pal.csv │ │ ├── 20130909pal.csv │ │ ├── 20130910pal.csv │ │ ├── 20130911pal.csv │ │ ├── 20130912pal.csv │ │ ├── 20130913pal.csv │ │ ├── 20130914pal.csv │ │ ├── 20130915pal.csv │ │ ├── 20130916pal.csv │ │ ├── 20130917pal.csv │ │ ├── 20130918pal.csv │ │ ├── 20130919pal.csv │ │ ├── 20130920pal.csv │ │ ├── 20130921pal.csv │ │ ├── 20130922pal.csv │ │ ├── 20130923pal.csv │ │ ├── 20130924pal.csv │ │ ├── 20130925pal.csv │ │ ├── 20130926pal.csv │ │ ├── 20130927pal.csv │ │ ├── 20130928pal.csv │ │ ├── 20130929pal.csv │ │ ├── 20130930pal.csv │ │ ├── 20131001pal.csv │ │ ├── 20131002pal.csv │ │ ├── 20131003pal.csv │ │ ├── 20131004pal.csv │ │ ├── 20131005pal.csv │ │ ├── 20131006pal.csv │ │ ├── 20131007pal.csv │ │ ├── 20131008pal.csv │ │ ├── 20131009pal.csv │ │ ├── 20131010pal.csv │ │ ├── 20131011pal.csv │ │ ├── 20131012pal.csv │ │ ├── 20131013pal.csv │ │ ├── 20131014pal.csv │ │ ├── 20131015pal.csv │ │ ├── 20131016pal.csv │ │ ├── 20131017pal.csv │ │ ├── 20131018pal.csv │ │ ├── 20131019pal.csv │ │ ├── 20131020pal.csv │ │ ├── 20131021pal.csv │ │ ├── 20131022pal.csv │ │ ├── 20131023pal.csv │ │ ├── 20131024pal.csv │ │ ├── 20131025pal.csv │ │ ├── 20131026pal.csv │ │ ├── 20131027pal.csv │ │ ├── 20131028pal.csv │ │ ├── 20131029pal.csv │ │ ├── 20131030pal.csv │ │ ├── 20131031pal.csv │ │ ├── 20131101pal.csv │ │ ├── 20131102pal.csv │ │ ├── 20131103pal.csv │ │ ├── 20131104pal.csv │ │ ├── 20131105pal.csv │ │ ├── 20131106pal.csv │ │ ├── 20131107pal.csv │ │ ├── 20131108pal.csv │ │ ├── 20131109pal.csv │ │ ├── 20131110pal.csv │ │ ├── 20131111pal.csv │ │ ├── 20131112pal.csv │ │ ├── 20131113pal.csv │ │ ├── 20131114pal.csv │ │ ├── 20131115pal.csv │ │ ├── 20131116pal.csv │ │ ├── 20131117pal.csv │ │ ├── 20131118pal.csv │ │ ├── 20131119pal.csv │ │ ├── 20131120pal.csv │ │ ├── 20131121pal.csv │ │ ├── 20131122pal.csv │ │ ├── 20131123pal.csv │ │ ├── 20131124pal.csv │ │ ├── 20131125pal.csv │ │ ├── 20131126pal.csv │ │ ├── 20131127pal.csv │ │ ├── 20131128pal.csv │ │ ├── 20131129pal.csv │ │ ├── 20131130pal.csv │ │ ├── 20131201pal.csv │ │ ├── 20131202pal.csv │ │ ├── 20131203pal.csv │ │ ├── 20131204pal.csv │ │ ├── 20131205pal.csv │ │ ├── 20131206pal.csv │ │ ├── 20131207pal.csv │ │ ├── 20131208pal.csv │ │ ├── 20131209pal.csv │ │ ├── 20131210pal.csv │ │ ├── 20131211pal.csv │ │ ├── 20131212pal.csv │ │ ├── 20131213pal.csv │ │ ├── 20131214pal.csv │ │ ├── 20131215pal.csv │ │ ├── 20131216pal.csv │ │ ├── 20131217pal.csv │ │ ├── 20131218pal.csv │ │ ├── 20131219pal.csv │ │ ├── 20131220pal.csv │ │ ├── 20131221pal.csv │ │ ├── 20131222pal.csv │ │ ├── 20131223pal.csv │ │ ├── 20131224pal.csv │ │ ├── 20131225pal.csv │ │ ├── 20131226pal.csv │ │ ├── 20131227pal.csv │ │ ├── 20131228pal.csv │ │ ├── 20131229pal.csv │ │ ├── 20131230pal.csv │ │ └── 20131231pal.csv ├── gen_load_data.m └── get_state_vars_with_load.m └── python ├── convert_measurement_data.ipynb ├── models ├── __init__.py ├── configs.py └── data_generator.py ├── svm.py └── tf2.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.gitignore.io/api/python,linux 2 | #foobaz 3 | 4 | /figures/* 5 | !/figures/.gitkeep 6 | 7 | ### Python ### 8 | # Byte-compiled / optimized / DLL files 9 | __pycache__/ 10 | *.py[cod] 11 | *$py.class 12 | 13 | # C extensions 14 | *.so 15 | 16 | # Distribution / packaging 17 | .Python 18 | env/ 19 | build/ 20 | develop-eggs/ 21 | dist/ 22 | downloads/ 23 | eggs/ 24 | .eggs/ 25 | lib/ 26 | lib64/ 27 | parts/ 28 | sdist/ 29 | var/ 30 | wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # PyInstaller 36 | # Usually these files are written by a python script from a template 37 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 38 | *.manifest 39 | *.spec 40 | 41 | # Installer logs 42 | pip-log.txt 43 | pip-delete-this-directory.txt 44 | 45 | # Unit test / coverage reports 46 | htmlcov/ 47 | .tox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *,cover 54 | .hypothesis/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv/ 91 | venv/ 92 | ENV/ 93 | .idea/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | 102 | ### Linux ### 103 | *~ 104 | 105 | # temporary files which can be created if a process still has a handle open of a deleted file 106 | .fuse_hidden* 107 | 108 | # KDE directory preferences 109 | .directory 110 | 111 | # Linux trash folder which might appear on any partition or disk 112 | .Trash-* 113 | 114 | # .nfs files are created when an open file is removed but is still being accessed 115 | .nfs* 116 | 117 | # End of https://www.gitignore.io/api/python,linux 118 | # no produced pngs in git repo 119 | *.png 120 | *.pdf 121 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Stefan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # se-test-framework 2 | 3 | A prototypical implementation for the construction of data sets containing False Data Injection Attacks (FDIAs) and evaluating the performance of detecting FDIAs using machine learning-based models. 4 | 5 | **NOTE:** A comprehensive documentation is under development. 6 | 7 | ## Quick Usage Guide 8 | 9 | This prototypical implementation is divided into two parts: the MATLAB part and the Python part. 10 | 11 | The following listing states the functionalities offered by each file in the corresponding part. 12 | 13 | **NOTE**: All absolute and relevant paths in the scripts pointing to saved data sets (e.g., `.mat` or `.pkl` files) have to be adjusted accordingly. 14 | 15 | * Matlab 16 | * `get_state_vars_with_load.m`: Main function for the generation of measurement data based on load data. 17 | * `gen_load_data.m`: An example, showing the usage of the function `get_state_vars_with_load`. In this specific example, load data that can be freely obtained from the New York Independent System Operator (NYISO) is used. For better usability, an example load data is included under `matlab/data/nyiso_2013/*.csv`. New load data can be obtained any time by visiting the homepage of NYISO. 18 | * Python 19 | * `convert_measurement_data.ipynb`: The measurement data set generated by `gen_load_data.m` in Matlab is saved to a `.mat` file. Usually, every year of the NYISO data is saved to its own `.mat` file (e.g., `nyiso_load_13.mat`). By use of this notebook, all the single `.mat` files can be imported and converted to a single `.pkl` file. This main `.pkl` file, containing all the measurement data sets, is then used in the Python files described below. 20 | * `tf2.py`: Prototypical implementation of an RNN based on Tensorflow. 21 | * `svm.py`: Prototypical implementation of an SVM based on Scikit. 22 | * `models/data_generator.py`: Module for the generation of data sets containing False Data Injection Attacks, based on the measurement data set generated by `gen_load_data.m` in Matlab. 23 | * `models/configs.py`: Module containing all the config definitions, classes and various methods. A detailed documentation is under development. 24 | -------------------------------------------------------------------------------- /matlab/gen_load_data.m: -------------------------------------------------------------------------------- 1 | close all; 2 | clear all; 3 | clc; 4 | format compact; 5 | path2tools = 'PR_Toolbox\'; 6 | addpath(path2tools); 7 | 8 | % Load case 9 | %define_constants; 10 | mpc = loadcase('case30'); 11 | 12 | % Retrieve number of load buses and their indicesbus 13 | n_loadbus = sum(mpc.bus(:,2) == 1); 14 | 15 | loads = load_prep_data('data\nyiso_2017\*.csv', n_loadbus); 16 | 17 | %% 18 | loads1 = loads(1:10,:) 19 | total1 = get_state_vars_with_load( mpc, loads1 ); 20 | 21 | %% 22 | loads2 = loads(10001:16424,:) 23 | total2 = get_state_vars_with_load(mpc, loads2) 24 | 25 | %% 26 | clear all; 27 | close all; 28 | clc; 29 | load nyiso_load_statevars 30 | 31 | %% 32 | H = total.H(:,:,1); 33 | R_inv = total.R_inv(:,:,1); 34 | z = total.z(:,1); 35 | x_rad = total.x_rad(:,1); 36 | 37 | c = zeros(size(H,2),1); 38 | c(1) = 2; 39 | c(5) = 2; 40 | a = H*c; 41 | z_a = z+a; 42 | 43 | x = (H'*R_inv*H)^-1 * H'*R_inv * z; 44 | x_bad = (H'*R_inv*H)^-1 * H'*R_inv * z_a; 45 | 46 | norm(z-H*x) 47 | norm(z_a-H*x_bad) 48 | 49 | %% 50 | clear all; 51 | close all; 52 | clc; 53 | load nyiso_load_statevars 54 | 55 | timesteps = 16; 56 | iters = floor(size(X,1) / timestep); 57 | 58 | for iter=1:iter 59 | for timestep=1:timesteps 60 | 61 | end 62 | end 63 | for i=1:size(total.z, 1) 64 | if mod(i, 16) == 0 65 | z_a = total.z(i,:) + ((total.H(:,:,i) * (2 * ones(size(total.H(:,:,i),2),1))))'; 66 | X(i,:) = z_a; 67 | Y(i,:) = 1; 68 | else 69 | X(i,:) = total.z(i,:); 70 | %Y(i,:) = 0; 71 | end 72 | end 73 | 74 | usable = floor(size(X,1) / step); 75 | X = X(1:(usable*step),:); 76 | 77 | 78 | 79 | %% 80 | loads = loads'; 81 | %loads = loads(1:1000,:); 82 | z_loads = zscore(loads); 83 | z_loads(:,any(isnan(z_loads),1))=[]; % Delete NaN columns 84 | 85 | input.x = z_loads; 86 | input.targets = ones(size(input.x, 1), 1); 87 | input.labels = cell(1, size(input.x, 2)); 88 | [data, coeff] = calcPrincipalComponents_matlab(input, 2); 89 | 90 | plotData(data); 91 | title('load zscore'); 92 | 93 | input2.x = loads; 94 | input2.targets = ones(size(input2.x, 1), 1); 95 | input2.labels = cell(1, size(input.x, 2)); 96 | [data2, coeff] = calcPrincipalComponents_matlab(input2, 2); 97 | 98 | plotData(data2); 99 | title('load'); 100 | 101 | %% 102 | input2.x = total.z'; 103 | input2.targets = ones(size(input2.x, 1), 1); 104 | input2.labels = cell(1, size(input.x, 2)); 105 | [data2, coeff] = calcPrincipalComponents_matlab(input2, 2); 106 | 107 | plotData(data2); 108 | title('measurements z') 109 | 110 | %% 111 | se = total.x_rad'; 112 | se(any(isnan(se),2),:)=[]; 113 | 114 | input.x = se; 115 | input.targets = ones(size(input.x,1),1); 116 | input2.labels = cell(1, size(input.x, 2)) 117 | [data3, coeff] = calcPrincipalComponents_matlab(input, 2); 118 | 119 | plotData(data3); 120 | title('state estimation x'); 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /matlab/get_state_vars_with_load.m: -------------------------------------------------------------------------------- 1 | function [ total ] = get_state_vars_with_load( mpc, loads ) 2 | %GET_STATE_VARS Summary of this function goes here 3 | % Detailed explanation goes here 4 | 5 | define_constants; 6 | 7 | % Retrieve number of load buses and their indices 8 | n_loadbus = sum(mpc.bus(:,2) == 1); 9 | idx_loadbus = find(mpc.bus(:,2) == 1); 10 | 11 | % Retrieve number of gen buses and their indices 12 | n_genbus = sum(mpc.bus(:,2) == 2); 13 | idx_genbus = find(mpc.bus(:,2) == 2); 14 | 15 | t0 = tic; 16 | % Iterate over all load sets 17 | for set=1:size(loads,1) 18 | % Iterate over each measurement in the load set 19 | for msmt=1:size(loads,2) 20 | mpc.bus(idx_loadbus(msmt),PD) = loads(set,msmt); 21 | mpc.bus(idx_loadbus(msmt),QD) = 0; 22 | mpc.bus(idx_loadbus(msmt),GS) = 0; 23 | mpc.bus(idx_loadbus(msmt),BS) = 0; 24 | end 25 | 26 | current_load = sum(mpc.bus(:,3)); 27 | current_gen = sum(mpc.gen(:,2)); 28 | 29 | % Calculate, what amount the gen has to be increased to match the new 30 | % load 31 | inc_per_gen = (current_load - current_gen) / n_genbus; 32 | 33 | % Increase the gen for each of the generators 34 | for msmt=2:n_genbus+1 35 | mpc.gen(msmt,2) = mpc.gen(msmt,2) + inc_per_gen; 36 | end 37 | 38 | mpc.baseMVA = max(mpc.bus(:,3)); 39 | 40 | %fprintf('Total load: %.5f\n', current_load); 41 | %fprintf('Total gen: %.5f\n', current_gen); 42 | 43 | results = rundcpf(mpc, mpoption('out.all',0)); 44 | [ x, H, R_Inv, z ] = dc_state_est(mpc, results); 45 | 46 | disp(set); 47 | 48 | total.H(:,:,set) = full(H); 49 | total.R_Inv(:,:,set) = R_Inv; 50 | total.z(set,:) = z; 51 | total.x_se(set,:) = x; 52 | total.x_pf(set,:) = (pi/180) .* results.bus(2:end,9); 53 | total.x_diff(set,:) = total.x_se(set,:) - total.x_pf(set,:); 54 | end 55 | 56 | toc(t0) 57 | 58 | end 59 | 60 | -------------------------------------------------------------------------------- /python/convert_measurement_data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stdout", 10 | "output_type": "stream", 11 | "text": [ 12 | "Keys: dict_keys(['H', 'z'])\n", 13 | "H shape: (41, 29, 172996)\n", 14 | "z shape: (172996, 41)\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "import scipy.io\n", 20 | "import numpy as np\n", 21 | "from random import randint\n", 22 | "from sklearn.preprocessing import normalize\n", 23 | "files = ['13', '14', '15', '16', '17']\n", 24 | "\n", 25 | "# Contains index and axes for appending arrays to each other\n", 26 | "indexes = {'H': 2, 'z': 0}\n", 27 | "data = {}\n", 28 | "\n", 29 | "for index, file in enumerate(files):\n", 30 | " mat = scipy.io.loadmat('..\\\\..\\\\Matlab\\\\mat\\\\nyiso_load_statevars{}_py'.format(file))\n", 31 | " for k, v in indexes.items():\n", 32 | " if index == 0:\n", 33 | " data[k] = mat[k]\n", 34 | " else:\n", 35 | " data[k] = np.append(data[k], mat[k], axis=v)\n", 36 | " \n", 37 | "print(\"Keys: {}\".format(data.keys()))\n", 38 | "for k, v in indexes.items():\n", 39 | " print(\"{} shape: {}\".format(k, data[k].shape))" 40 | ] 41 | } 42 | ], 43 | "metadata": { 44 | "kernelspec": { 45 | "display_name": "Python 3", 46 | "language": "python", 47 | "name": "python3" 48 | }, 49 | "language_info": { 50 | "codemirror_mode": { 51 | "name": "ipython", 52 | "version": 3 53 | }, 54 | "file_extension": ".py", 55 | "mimetype": "text/x-python", 56 | "name": "python", 57 | "nbconvert_exporter": "python", 58 | "pygments_lexer": "ipython3", 59 | "version": "3.6.4" 60 | } 61 | }, 62 | "nbformat": 4, 63 | "nbformat_minor": 2 64 | } 65 | -------------------------------------------------------------------------------- /python/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binarygraka/se-test-framework/3cf4dca32cc1dcc4ce56a50a6aa298882fc8d004/python/models/__init__.py -------------------------------------------------------------------------------- /python/models/configs.py: -------------------------------------------------------------------------------- 1 | import json 2 | from sklearn.model_selection import train_test_split 3 | import uuid 4 | import os 5 | import numpy as np 6 | import matplotlib 7 | matplotlib.use('Agg') 8 | from matplotlib import pyplot 9 | import csv 10 | from colorama import Fore, init, Back, Style 11 | # Colorama settings 12 | # init(autoreset=True) 13 | from enum import Enum 14 | from .data_generator import DataGenerator 15 | from sklearn.preprocessing import Normalizer, StandardScaler, normalize 16 | 17 | class Norm(Enum): 18 | NONE = 1 19 | STANDARD = 2 20 | NORM_SAMPLES = 3 21 | NORM_FEATURES = 4 22 | MINMAX = 5 23 | 24 | class StructureTemplate(object): 25 | def get_dict(self): 26 | raise NotImplementedError('Method get_dict not implemented in class.') 27 | 28 | class TrainConfig(StructureTemplate): 29 | def __init__(self, train_data_config={}, train_data_analysis={}, valid_data_analysis={}, 30 | # test_data_config={}, test_data_analysis={}, 31 | network_config={}, train_result={}, validation_result={}, test_configs=[]): 32 | self.id = str(uuid.uuid4()) 33 | self.train_data_config = train_data_config 34 | self.network_config = network_config 35 | self.train_data_analysis = train_data_analysis 36 | self.valid_data_analysis = valid_data_analysis 37 | self.train_result = train_result 38 | self.validation_result = validation_result 39 | 40 | def dump_to_json(self, fname): 41 | # Preprocess data 42 | data = {self.id: {'data_config': self.train_data_config.__dict__, 43 | 'test_data_config': self.test_data_config.__dict__, 44 | 'network_config': self.network_config.__dict__, 45 | 'train_result': self.train_result.__dict__}} 46 | # Dump data 47 | if not os.path.isfile(fname) or os.stat(fname).st_size == 0: # If file does not exist or is empty 48 | a = [data] 49 | with open(fname, 'w') as f: 50 | json.dump(a, f) 51 | else: 52 | with open(fname, 'r') as f: 53 | feeds = json.load(f) 54 | with open(fname, 'w') as f: 55 | feeds.append(data) 56 | json.dump(feeds, f) 57 | 58 | 59 | def dump_images(self, history): 60 | if not isinstance(history, dict): 61 | history = history.history 62 | 63 | # Check if validation values are present: 64 | if 1 in [1 for key, value in history.items() if 'val' in key.lower()]: 65 | val = True 66 | else: 67 | val = False 68 | 69 | # Only select keys that do not start with 'val' 70 | filtered_history = dict((key, value) for key, value in history.items() if not key.startswith('val')) 71 | 72 | for key in filtered_history.keys(): 73 | pyplot.plot(history[key]) 74 | if val == True: 75 | pyplot.plot(history['val_{}'.format(key)]) 76 | 77 | pyplot.title(self.id) 78 | pyplot.ylabel(key) 79 | pyplot.xlabel('epoch') 80 | 81 | if val == True: 82 | pyplot.legend(['train', 'validation'], loc='upper right') 83 | else: 84 | pyplot.legend(['train'], loc='upper right') 85 | 86 | fname = os.path.join('output', 'images', '{}_{}.png'.format(self.id, key)) 87 | pyplot.savefig(fname) 88 | pyplot.close() 89 | 90 | 91 | def get_dict(self): 92 | return self.__dict__ 93 | 94 | 95 | def dump_to_csv(self, fname): 96 | csv_header = [] 97 | 98 | big_dict = {} 99 | 100 | for key, val in self.get_dict().items(): 101 | if key == 'id': 102 | csv_header.append('id') 103 | big_dict = {key: val} 104 | else: 105 | if key == 'valid_data_analysis': 106 | new_dict = val.get_dict(valid=True) 107 | else: 108 | if hasattr(val, 'get_dict'): 109 | new_dict = val.get_dict() 110 | else: 111 | new_dict = {} 112 | 113 | big_dict = {**big_dict, **new_dict} 114 | 115 | file_exists = os.path.isfile(fname) 116 | with open(fname, 'a', newline='') as csvfile: 117 | writer = csv.DictWriter(csvfile, fieldnames=list(big_dict.keys()), delimiter=';') 118 | if not file_exists: 119 | writer.writeheader() 120 | writer.writerow(big_dict) 121 | print(Fore.YELLOW) 122 | print(big_dict) 123 | print(Style.RESET_ALL) 124 | 125 | 126 | class TestConfig(StructureTemplate): 127 | 128 | def __init__(self, model_id=None, test_data_config=None, test_data_analysis=None, test_result=None): 129 | self.id = str(uuid.uuid4()) 130 | self.model_id = model_id 131 | self.test_data_config = test_data_config 132 | self.test_data_analysis = test_data_analysis 133 | self.test_result = test_result 134 | 135 | 136 | def get_dict(self): 137 | return self.__dict__ 138 | 139 | 140 | def dump_to_csv(self, fname): 141 | csv_header = [] 142 | 143 | big_dict = {} 144 | 145 | # TODO: Check types of elements to handle cases such as 'id' and 'model_id' (which have no get_dict() method) automatically 146 | for key, val in self.get_dict().items(): 147 | if key == 'id': 148 | csv_header.append('id') 149 | big_dict = {key: val} 150 | elif key == 'model_id': 151 | csv_header.append('model_id') 152 | big_dict = {**big_dict, **{key: val}} 153 | else: 154 | if hasattr(val, 'get_dict'): 155 | new_dict = val.get_dict() 156 | else: 157 | new_dict = {} 158 | big_dict = {**big_dict, **new_dict} 159 | 160 | file_exists = os.path.isfile(fname) 161 | with open(fname, 'a', newline='') as csvfile: 162 | writer = csv.DictWriter(csvfile, fieldnames=list(big_dict.keys()), delimiter=';') 163 | if not file_exists: 164 | writer.writeheader() 165 | writer.writerow(big_dict) 166 | print(Fore.YELLOW) 167 | print(big_dict) 168 | print(Style.RESET_ALL) 169 | 170 | 171 | class DataConfig(StructureTemplate): 172 | 173 | def __init__(self, main_data=None, method_name=None, norm=Norm.NONE, atk_index=None, subset_size=None, n_atk_subsets=None, 174 | range_atk=None, timestep=16, c=0.2, verbose=True, ratio=1, random=0, P=None, A=None, atk_function=None): 175 | self.data_generator = DataGenerator(main_data) 176 | self.method_name = method_name 177 | self.norm = norm 178 | self.atk_index = atk_index 179 | self.subset_size = subset_size 180 | self.n_atk_subsets = n_atk_subsets 181 | self.timestep = timestep 182 | self.c = c 183 | self.verbose = verbose 184 | self.range_atk = range_atk 185 | self.ratio = ratio 186 | self.random = random 187 | self.P = P 188 | self.A = A 189 | self.atk_function = atk_function 190 | 191 | 192 | def print(self): 193 | print("method_name : {}".format(self.method_name)) 194 | print("norm : {}".format(self.norm)) 195 | print("atk_index : {}".format(self.atk_index)) 196 | print("subset_size : {}".format(self.subset_size)) 197 | print("n_atk_subsets : {}".format(self.n_atk_subsets)) 198 | print("timestep : {}".format(self.timestep)) 199 | print("c : {}".format(self.c)) 200 | print("verbose : {}".format(self.verbose)) 201 | print("range_atk : {}".format(self.range_atk)) 202 | 203 | 204 | def retrieve_data_set(self): 205 | rd = getattr(self.data_generator, self.method_name)(self) 206 | return rd 207 | 208 | 209 | def get_dict(self): 210 | exclude = ['data_generator'] 211 | return {k:v for k, v in self.__dict__.items() if k not in exclude} 212 | 213 | 214 | class KerasNetworkConfig(StructureTemplate): 215 | 216 | def __init__(self, num_input=None, timestep=None, num_hidden1=None, num_hidden2=None, num_output=None, batch_size=None, 217 | epochs=None, dropout=None, early_stopping=False, csvlogger=False, tensorboard=False, 218 | checkpoint=False): 219 | self.num_input = num_input 220 | self.timestep = timestep 221 | self.num_hidden1 = num_hidden1 222 | self.num_hidden2 = num_hidden2 223 | self.num_output = num_output 224 | self.batch_size = batch_size 225 | self.epochs = epochs 226 | self.dropout = dropout 227 | self.early_stopping = early_stopping 228 | self.csvlogger = csvlogger 229 | self.tensorboard = tensorboard 230 | self.checkpoint = checkpoint 231 | 232 | 233 | def get_dict(self): 234 | return self.__dict__ 235 | 236 | class TfNetworkConfig(StructureTemplate): 237 | 238 | def __init__(self, num_input=None, timestep=None, num_hidden=None, num_output=None, batch_size=None, 239 | epochs=None, orig_decay=None, max_lr_epoch=None, lr_given=None, dropout_given=None, tolerance=None, 240 | display_step=None, hidden_layers=None, train_stop=None, val_loss_improv=0): 241 | self.num_input = num_input 242 | self.timestep = timestep 243 | self.num_hidden = num_hidden 244 | self.num_output = num_output 245 | self.hidden_layers = hidden_layers 246 | self.batch_size = batch_size 247 | self.epochs = epochs 248 | self.orig_decay = orig_decay 249 | self.max_lr_epoch = max_lr_epoch 250 | self.lr_given = lr_given 251 | self.dropout_given = dropout_given 252 | self.tolerance = tolerance 253 | self.display_step = display_step 254 | self.train_stop = train_stop 255 | self.val_loss_improv = val_loss_improv 256 | 257 | 258 | def get_dict(self): 259 | return self.__dict__ 260 | 261 | 262 | class Analyzer: 263 | def X_y_data(data): 264 | X = data['X'] 265 | y = data['y'] 266 | 267 | X_shape = X.shape 268 | y_shape = y.shape 269 | msmts_total = y.size 270 | msmts_atk = np.count_nonzero(y) 271 | msmts_non_atk = np.count_nonzero(y == 0) 272 | msmts_p_atk = msmts_atk / msmts_total 273 | rows_total = len(y) 274 | rows_atk = np.count_nonzero(np.count_nonzero(y, axis=1)) 275 | rows_non_atk = np.count_nonzero(np.count_nonzero(y, axis=1) == 0) 276 | rows_p_atk = rows_atk / rows_total 277 | mean_atk_msmts_per_row = np.mean(np.count_nonzero(y, axis=1)) 278 | 279 | return EvalValues( 280 | X_shape = X_shape, 281 | y_shape = y_shape, 282 | msmts_total = msmts_total, 283 | msmts_atk = msmts_atk, 284 | msmts_non_atk = msmts_non_atk, 285 | msmts_p_atk = msmts_p_atk, 286 | rows_total = rows_total, 287 | rows_atk = rows_atk, 288 | rows_non_atk = rows_non_atk, 289 | rows_p_atk = rows_p_atk, 290 | mean_atk_msmts_per_row=mean_atk_msmts_per_row 291 | ) 292 | 293 | 294 | class EvalValues(StructureTemplate): 295 | def __init__(self, X_shape=None, y_shape=None, msmts_total=None, msmts_atk=None, msmts_non_atk=None, msmts_p_atk=None, rows_total=None, 296 | rows_atk=None, rows_non_atk=None, rows_p_atk=None, mean_atk_msmts_per_row=None): 297 | self.X_shape = X_shape 298 | self.y_shape = y_shape 299 | self.msmts_total = msmts_total 300 | self.msmts_atk = msmts_atk 301 | self.msmts_non_atk = msmts_non_atk 302 | self.msmts_p_atk = msmts_p_atk 303 | self.rows_total = rows_total 304 | self.rows_atk = rows_atk 305 | self.rows_non_atk = rows_non_atk 306 | self.rows_p_atk = rows_p_atk 307 | self.mean_atk_msmts_per_row = mean_atk_msmts_per_row 308 | 309 | 310 | def get_dict(self, valid=False): 311 | if valid == True: 312 | return {'V_{}'.format(k): v for k, v in self.__dict__.items()} 313 | else: 314 | return self.__dict__ 315 | 316 | 317 | class Result(StructureTemplate): 318 | def __init__(self, train_time=None, stopped_epoch=None, model_eval_values=None): 319 | self.train_time = train_time 320 | self.model_eval_values = model_eval_values 321 | self.stopped_epoch = stopped_epoch 322 | 323 | 324 | def get_dict(self): 325 | return {**{k:v for k, v in self.__dict__.items() if not k == 'model_eval_values'}, **self.model_eval_values} 326 | 327 | 328 | class TestResult(StructureTemplate): 329 | def __init__(self, model_eval_values=None): 330 | self.model_eval_values = model_eval_values 331 | 332 | 333 | def get_dict(self): 334 | return {**{k: v for k, v in self.__dict__.items() if not k == 'model_eval_values'}, **self.model_eval_values} -------------------------------------------------------------------------------- /python/models/data_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from random import randint 3 | from sklearn.preprocessing import normalize, StandardScaler, MinMaxScaler, Normalizer 4 | import models.configs 5 | import random 6 | from sklearn.model_selection import train_test_split 7 | 8 | class DataGenerator: 9 | 10 | def __init__(self, data): 11 | # os.path.join('..', 'data', 'main_load_data.pkl') 12 | if not data: 13 | raise ValueError("Invalid argument.") 14 | elif not isinstance(data, dict): 15 | raise ValueError("Data is not of type dictionary.") 16 | self._data = data 17 | 18 | # @property 19 | # def data(self): 20 | # print("Getting value") 21 | # return self._data 22 | # 23 | # @data.setter 24 | # def data(self, value): 25 | # if not value: 26 | # raise ValueError("No data has been specified.") 27 | # self._data = value 28 | 29 | 30 | def create_fdi_X3_y1(self, config, data=""): 31 | """ 32 | Generates FDI data. 33 | Each element in the config.atk_index gets attacked by a probability of 0.5. 34 | If at least one element in each config.timestep got attacked, the label is 1. Otherwise the label is 0. 35 | 36 | @type z: ndarray(num_data, num_input) 37 | @param z: Measurements 38 | @type H: ndarray(num_input, _, num_data) 39 | @param H: Measurement Jacobian Matrix 40 | @type config.atk_index: array 41 | @param config.atk_index: Indices of config.timestep that get attacked by a chance of 50% 42 | @type config.norm: boolean 43 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 44 | @type config.timestep: int 45 | @param config.timestep: Size in which z gets grouped (default: 16) 46 | @type config.c: float 47 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 48 | 49 | @rtype X: ndarray(num_data, config.timestep, num_input) 50 | @return X: Data 51 | @rtype y: ndarray(num_data, 1) 52 | @return y: Label 53 | """ 54 | 55 | if not config and not isinstance(config, models.config.DataConfig): 56 | raise ValueError("models.config.DataConfig is invalid.") 57 | 58 | if not data and not isinstance(data, dict): 59 | data = self._data 60 | z = data['z'] 61 | H = data['H'] 62 | 63 | usable = int(len(z) / config.timestep) 64 | z_len = len(z[1]) 65 | 66 | y = np.zeros((usable, 1)) 67 | X = np.zeros((usable, config.timestep, z_len)) 68 | 69 | for t in iter(range(usable)): 70 | temp = np.zeros((config.timestep, z_len)) 71 | 72 | for step in iter(range(config.timestep)): 73 | idx = (t * config.timestep) + step 74 | temp[step, :] = z[idx, :] 75 | 76 | if step in config.atk_index: 77 | if np.random.randint(2): 78 | 79 | n_se = H[:, :, idx].shape[1] 80 | 81 | xx = np.zeros((n_se, 1)) 82 | xx[10:11] = config.c 83 | 84 | z_a = z[idx, :] + np.transpose(np.dot(H[:, :, idx], xx)) 85 | temp[step, :] = z_a 86 | y[t] = 1 87 | else: 88 | temp[step, :] = z[idx, :] 89 | y[t] = 0 90 | if config.norm is True: 91 | X[t, :, :] = normalize(temp) 92 | else: 93 | X[t, :, :] = temp 94 | 95 | if config.verbose: 96 | print("Max: {}, min: {}".format(np.amax(X), np.amin(X))) 97 | print("X shape: {}".format(X.shape)) 98 | print("y shape: {}".format(y.shape)) 99 | 100 | return X, y 101 | 102 | 103 | def create_fdi_X3_y1_se(self, config, data=""): 104 | """ 105 | Generates FDI data. 106 | Each element in the config.atk_index gets attacked by a probability of 0.5. 107 | If at least one element in each config.timestep got attacked, the label is 1. Otherwise the label is 0. 108 | 109 | @type z: ndarray(num_data, num_input) 110 | @param z: Measurements 111 | @type H: ndarray(num_input, _, num_data) 112 | @param H: Measurement Jacobian Matrix 113 | @type config.atk_index: array 114 | @param config.atk_index: Indices of config.timestep that get attacked by a chance of 50% 115 | @type config.norm: boolean 116 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 117 | @type config.subset_size: int 118 | @param config.subset_size: Defines in how many parts the state estimation variables should be grouped 119 | @type config.n_atk_subsets: int 120 | @param config.n_atk_subsets: Defines how many of the subset parts should be attacked. Cannot be higher than the number of subsets. 121 | @type config.timestep: int 122 | @param config.timestep: Size in which z gets grouped (default: 16) 123 | @type config.c: float 124 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 125 | 126 | @rtype X: ndarray(num_data, config.timestep, num_input) 127 | @return X: Data 128 | @rtype y: ndarray(num_data, 1) 129 | @return y: Label 130 | """ 131 | 132 | if not config and not isinstance(config, models.config.DataConfig): 133 | raise ValueError("models.config.DataConfig is invalid.") 134 | 135 | if not data and not isinstance(data, dict): 136 | data = self._data 137 | z = data['z'] 138 | H = data['H'] 139 | 140 | usable = int(len(z) / config.timestep) 141 | z_len = len(z[1]) 142 | 143 | y = np.zeros((usable, config.subset_size)) 144 | X = np.zeros((usable, config.timestep, z_len)) 145 | S = np.zeros((usable, H[:, :, 1].shape[1])) 146 | 147 | for t in iter(range(usable)): 148 | temp = np.zeros((config.timestep, z_len)) 149 | 150 | for step in iter(range(config.timestep)): 151 | idx = (t * config.timestep) + step 152 | temp[step, :] = z[idx, :] 153 | 154 | if step in config.atk_index: 155 | if np.random.randint(2): 156 | 157 | n_se = H[:, :, idx].shape[1] 158 | 159 | # Attack only subset of state variables 160 | se_w = int(H[:, :, idx].shape[1] / config.subset_size) # Integer sets 161 | se_remainder = H[:, :, idx].shape[1] % config.subset_size # Remainder 162 | # se_attacked = [randint(0, config.subset_size - 1), randint(0, config.subset_size - 1)] # Attacked subsets 163 | se_attacked = [randint(0, config.subset_size - 1) for _ in range(config.n_atk_subsets)] 164 | 165 | se_atk_index = np.zeros((n_se, 1)) 166 | for x in range(config.subset_size): 167 | if x in se_attacked: # Attacked set 168 | if x == config.subset_size - 1: # Last element 169 | se_atk_index[x * se_w:(x * se_w) + se_w + se_remainder] = config.c 170 | else: 171 | se_atk_index[x * se_w:(x * se_w) + se_w] = config.c 172 | 173 | z_a = z[idx, :] + np.transpose(np.dot(H[:, :, idx], se_atk_index)) 174 | temp[step, :] = z_a 175 | y[t, se_attacked] = 1 176 | S[t, :] = np.transpose(se_atk_index) 177 | else: 178 | temp[step, :] = z[idx, :] 179 | y[t] = 0 180 | if config.norm is True: 181 | X[t, :, :] = normalize(temp) 182 | else: 183 | X[t, :, :] = temp 184 | 185 | if config.verbose: 186 | print("Max: {}, min: {}, after".format(np.amax(X), np.amin(X))) 187 | print("X shape: {}".format(X.shape)) 188 | # print("S shape: {}".format(S.shape)) 189 | print("y shape: {}".format(y.shape)) 190 | 191 | return X, y#, S 192 | 193 | 194 | def create_fdi_X3_y1_se_window(self, config, data=""): 195 | rd = self.create_fdi_X2_y1_se(config) 196 | 197 | if config.ratio != 1: 198 | X_train, y_train = self.create_fdi_window_from_fdi_X2_y1_se(rd['X_train'], rd['y_train'], config) 199 | X_test, y_test = self.create_fdi_window_from_fdi_X2_y1_se(rd['X_test'], rd['y_test'], config) 200 | else: 201 | X_train, y_train = self.create_fdi_window_from_fdi_X2_y1_se(rd['X_train'], rd['y_train'], config) 202 | X_test = None 203 | y_test = None 204 | 205 | return {'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test, 'c': rd['c'], 'a': rd['a']} 206 | 207 | 208 | def create_fdi_X3_yn(self, config, data=""): 209 | """ 210 | Generates FDI data. 211 | At every set of config.timestep, a random number that is part of config.range_atk gets chosen which determines, how many measurements will get attacked. 212 | E.g. config.range_atk=[3,6] --> This means that for every config.timestep between 3 and 6 measurements get attacked. 213 | The label is 1 for every element that has been attacked and 0 otherwise. 214 | 215 | @type z: ndarray(num_data, num_input) 216 | @param z: Measurements 217 | @type H: ndarray(num_input, _, num_data) 218 | @param H: Measurement Jacobian Matrix 219 | @type config.range_atk: array 220 | @param config.range_atk: Range for number of measurements that get attacked in every set of config.timestep 221 | @type config.norm: boolean 222 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 223 | @type config.timestep: int 224 | @param config.timestep: Size in which z gets grouped (default: 16) 225 | @type config.c: float 226 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 227 | 228 | @rtype X: ndarray(num_data, config.timestep, num_input) 229 | @return X: Data 230 | @rtype y: ndarray(num_data, config.timestep) 231 | @return y: Label 232 | """ 233 | 234 | if not config and not isinstance(config, models.config.DataConfig): 235 | raise ValueError("models.config.DataConfig is invalid.") 236 | 237 | if not data and not isinstance(data, dict): 238 | data = self._data 239 | z = data['z'] 240 | H = data['H'] 241 | 242 | usable = int(len(z) / config.timestep) 243 | z_len = len(z[1]) 244 | 245 | y = np.zeros((usable, config.timestep)) 246 | X = np.zeros((usable, config.timestep, z_len)) 247 | 248 | for t in iter(range(usable)): 249 | temp = np.zeros((config.timestep, z_len)) 250 | n_atk_msmts = randint(config.range_atk[0], config.range_atk[1]) 251 | config.atk_index = [randint(0, config.timestep - 1) for _ in range(n_atk_msmts)] 252 | 253 | for step in iter(range(config.timestep)): 254 | idx = (t * config.timestep) + step 255 | temp[step, :] = z[idx, :] 256 | 257 | if step in config.atk_index: 258 | n_se = H[:, :, idx].shape[1] 259 | 260 | z_a = z[idx, :] + np.transpose(np.dot(H[:, :, idx], (config.c * np.ones((n_se, 1))))) 261 | temp[step, :] = z_a 262 | y[t, step] = 1 263 | else: 264 | temp[step, :] = z[idx, :] 265 | 266 | if config.norm is True: 267 | X[t, :, :] = normalize(temp) 268 | else: 269 | X[t, :, :] = temp 270 | 271 | if config.verbose: 272 | print("Max: {}, min: {}, after".format(np.amax(X), np.amin(X))) 273 | print("X shape: {}".format(X.shape)) 274 | print("y shape: {}".format(y.shape)) 275 | 276 | return X, y 277 | 278 | 279 | def create_fdi_X2_y1(self, config, data=""): 280 | """ 281 | Generates FDI data. 282 | Each element in the config.atk_index gets attacked by a probability of 0.5. 283 | If at least one element in each config.timestep got attacked, the label is 1. Otherwise the label is 0. 284 | 285 | Different to create_fdi_X3_y1, X and y are not grouped into arrays of size config.timestep, but are continuous. 286 | 287 | @type z: ndarray(num_data, num_input) 288 | @param z: Measurements 289 | @type H: ndarray(num_input, _, num_data) 290 | @param H: Measurement Jacobian Matrix 291 | @type config.atk_index: array 292 | @param config.atk_index: Indices of config.timestep that get attacked by a chance of 50% 293 | @type config.norm: boolean 294 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 295 | @type config.timestep: int 296 | @param config.timestep: Size in which z gets grouped (default: 16) 297 | @type config.c: float 298 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 299 | 300 | @rtype X: ndarray(num_data*config.timestep, num_input) 301 | @return X: Data 302 | @rtype y: ndarray(num_data*config.timestep, 1) 303 | @return y: Label 304 | """ 305 | 306 | if not config and not isinstance(config, models.config.DataConfig): 307 | raise ValueError("models.config.DataConfig is invalid.") 308 | 309 | if not data and not isinstance(data, dict): 310 | data = self._data 311 | z = data['z'] 312 | H = data['H'] 313 | 314 | usable = int(len(z) / config.timestep) 315 | z_len = len(z[1]) 316 | 317 | y = np.zeros((usable * config.timestep, 1)) 318 | X = np.zeros((usable * config.timestep, z_len)) 319 | 320 | for t in iter(range(usable)): 321 | temp = np.zeros((config.timestep, z_len)) 322 | 323 | for step in iter(range(config.timestep)): 324 | idx = (t * config.timestep) + step 325 | X[idx, :] = z[idx, :] 326 | 327 | if step in config.atk_index: 328 | if np.random.randint(2): 329 | 330 | n_se = H[:, :, idx].shape[1] 331 | 332 | z_a = z[idx, :] + np.transpose(np.dot(H[:, :, idx], (config.c * np.ones((n_se, 1))))) 333 | X[idx, :] = z_a 334 | y[idx] = 1 335 | else: 336 | X[idx, :] = z[idx, :] 337 | y[idx] = 0 338 | else: 339 | y[idx] = 0 340 | if config.norm is True: X = normalize(X) 341 | 342 | if config.verbose: 343 | print("Max: {}, min: {}, after".format(np.amax(X), np.amin(X))) 344 | print("X shape: {}".format(X.shape)) 345 | print("y shape: {}".format(y.shape)) 346 | 347 | return X, y 348 | 349 | 350 | def create_fdi_X2_y1_se(self, config, data=""): 351 | """ 352 | Generates FDI data. 353 | Each element in the config.atk_index gets attacked by a probability of 0.5. 354 | If at least one attack in each timespan of size timestep (e.g. span of 16) is present, the label is 1. Otherwise the label is 0. 355 | 356 | Different to create_fdi_X3_y1, X and y are not grouped into arrays of size config.timestep, but are continuous. 357 | 358 | @type z: ndarray(num_data, num_input) 359 | @param z: Measurements 360 | @type H: ndarray(num_input, _, num_data) 361 | @param H: Measurement Jacobian Matrix 362 | @type config.atk_index: array 363 | @param config.atk_index: Indices of config.timestep that get attacked by a chance of 50% 364 | @type config.norm: boolean 365 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 366 | @type config.timestep: int 367 | @param config.timestep: Size in which z gets grouped (default: 16) 368 | @type config.c: float 369 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 370 | @type config.P: int (0 <= number <= 1) 371 | @param config.P: Amount of timesteps that get attacked. E.g., 0.1 means that 10% of all timesteps get attacked. atk_index is useless if P is specified 372 | @type config.atk_function: int 373 | @param config.atk_function: 0: generate_atk_subset_msmt, 1: generate_prob_distrib_msmt_all, 2: generate_prob_distrib_msmt 374 | 375 | @rtype X: ndarray(num_data*config.timestep, num_input) 376 | @return X: Data 377 | @rtype y: ndarray(num_data*config.timestep, 1) 378 | @return y: Label 379 | """ 380 | 381 | if not config and not isinstance(config, models.config.DataConfig): 382 | raise ValueError("models.config.DataConfig is invalid.") 383 | 384 | if not data and not isinstance(data, dict): 385 | data = self._data 386 | z = data['z'] 387 | H = data['H'] 388 | 389 | usable = int(len(z) / config.timestep) 390 | z_len = len(z[1]) 391 | 392 | y = np.zeros((usable * config.timestep, config.subset_size)) 393 | X = np.zeros((usable * config.timestep, z_len)) 394 | Z = np.zeros((usable * config.timestep, z_len)) 395 | S = np.zeros((usable, H[:, :, 1].shape[1])) 396 | a = np.zeros((usable * config.timestep, z_len)) 397 | c = np.zeros((usable * config.timestep, H[:, :, 1].shape[1])) 398 | 399 | for t in iter(range(usable)): 400 | 401 | for step in iter(range(config.timestep)): 402 | idx = (t * config.timestep) + step 403 | X[idx, :] = z[idx, :] 404 | 405 | if config.P: 406 | if random.random() >= (1 - config.P): 407 | if config.atk_function == 0: 408 | z_a, se_attacked, c_t, a_t = self.generate_atk_subset_msmt(H[:, :, idx], z[idx, :], config.subset_size, 409 | config.n_atk_subsets, config.c) 410 | elif config.atk_function == 1: 411 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt_all(H[:, :, idx], z[idx, :], config.subset_size, 412 | config.A) 413 | elif config.atk_function == 2: 414 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt(H[:, :, idx], z[idx, :], config.subset_size, 415 | config.n_atk_subsets, config.A) 416 | elif config.atk_function == 3: 417 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt(H[:, :, idx], z[idx, :], config.subset_size, 418 | config.n_atk_subsets, config.A, mean_zero=True) 419 | c[idx, :] = c_t.ravel() 420 | a[idx, :] = a_t 421 | 422 | X[idx, :] = z_a 423 | Z[idx, :] = z[idx, :] 424 | y[idx, se_attacked] = 1 425 | else: 426 | if step in config.atk_index and np.random.randint(2): 427 | if config.atk_function == 0: 428 | z_a, se_attacked, c_t, a_t = self.generate_atk_subset_msmt(H[:, :, idx], z[idx, :], config.subset_size, config.n_atk_subsets, 429 | config.c) 430 | elif config.atk_function == 1: 431 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt_all(H[:, :, idx], z[idx, :], config.subset_size, 432 | config.n_atk_subbsets, config.A) 433 | elif config.atk_function == 2: 434 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt(H[:, :, idx], z[idx, :], config.subset_size, 435 | config.n_atk_subsets, config.A) 436 | elif config.atk_function == 3: 437 | z_a, se_attacked, c_t, a_t = self.generate_prob_distrib_msmt(H[:, :, idx], z[idx, :], config.subset_size, 438 | config.n_atk_subsets, config.A, mean_zero=True) 439 | c[idx, :] = c_t.ravel() 440 | a[idx, :] = a_t 441 | 442 | X[idx, :] = z_a 443 | Z[idx, :] = z[idx, :] 444 | y[idx, se_attacked] = 1 445 | 446 | if config.verbose: 447 | print("Before. Max: {}, min: {}".format(np.amax(X), np.amin(X))) 448 | print("X shape: {}".format(X.shape)) 449 | print("y shape: {}".format(y.shape)) 450 | 451 | if config.ratio != 1: 452 | X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=config.ratio, random_state=config.random) 453 | 454 | if config.norm != models.configs.Norm.NONE: 455 | X_train, X_test = self.normalize_data(config.norm, X_train, X_test) 456 | else: 457 | X_train = X 458 | y_train = y 459 | X_test = None 460 | y_test = None 461 | if config.norm != models.configs.Norm.NONE: 462 | X_train, X_test = self.normalize_data(config.norm, X_train, X_test) 463 | 464 | if config.verbose: 465 | print("After. Max: {}, min: {}".format(np.amax(X_train), np.amin(X_train))) 466 | 467 | return {'X_train': X_train, 'X_test': X_test, 'y_train': y_train, 'y_test': y_test, 'c': c, 'a': a, 'Z': Z} 468 | 469 | 470 | def normalize_data(self, norm, X_train, X_test): 471 | if norm is models.configs.Norm.STANDARD: 472 | scaler = StandardScaler() 473 | scaler.fit(X_train) 474 | X_train = scaler.transform(X_train) 475 | if X_test is not None: X_test = scaler.transform(X_test) 476 | elif norm is models.configs.Norm.NORM_SAMPLES: # Samples are scaled individually, therefore norms do not have to be saved 477 | scaler = Normalizer() 478 | scaler.fit(X_train) 479 | X_train = scaler.transform(X_train) 480 | X_train = normalize(X_train, axis=1) 481 | if X_test is not None: X_test = scaler.transform(X_test) 482 | # if X_test is not None: X_test = normalize(X_test, axis=1) 483 | elif norm is models.configs.Norm.NORM_FEATURES: 484 | X_train, norms = normalize(X_train, return_norm=True, axis=0) 485 | if X_test is not None: X_test = X_test / norms 486 | elif norm is models.configs.Norm.MINMAX: 487 | scaler = MinMaxScaler() 488 | scaler.fit(X_train) 489 | X_train = scaler.transform(X_train) 490 | if X_test is not None: X_test = scaler.transform(X_test) 491 | 492 | return X_train, X_test 493 | 494 | 495 | def generate_prob_distrib_msmt_all(self, H, z, subset_size, A): 496 | ''' 497 | Attack all subsets, such that c forms a normal distribution with mean and std from the measurement (Yan16a, Ozay16) 498 | Scaled by A 499 | 500 | :param H: 501 | :param z: 502 | :param A: 503 | :return: 504 | ''' 505 | n_se = H.shape[1] 506 | se_attacked = range(subset_size) 507 | 508 | c = np.random.normal(np.mean(z), np.std(z), n_se) * A 509 | 510 | a = np.transpose(np.dot(H, c)) 511 | z_a = z + a 512 | 513 | return z_a, se_attacked, c, a 514 | 515 | 516 | def generate_atk_subset_msmt(self, H, z, subset_size, n_atk_subsets, c): 517 | n_se = H.shape[1] 518 | 519 | # Attack only subset of state variables 520 | se_w = int(H.shape[1] / subset_size) # Integer sets 521 | se_remainder = H.shape[1] % subset_size # Remainder 522 | # se_attacked = [randint(0, subset_size - 1) for _ in range(n_atk_subsets)] 523 | se_attacked = np.random.choice(subset_size, n_atk_subsets, replace=False) 524 | 525 | se_atk_index = np.zeros((n_se, 1)) 526 | for x in range(subset_size): 527 | if x in se_attacked: # Attacked set 528 | if x == subset_size - 1: # Last element 529 | se_atk_index[x * se_w:(x * se_w) + se_w + se_remainder] = c 530 | else: 531 | se_atk_index[x * se_w:(x * se_w) + se_w] = c 532 | 533 | a = np.transpose(np.dot(H, se_atk_index)) 534 | z_a = z + a 535 | return z_a, se_attacked, se_atk_index, a 536 | 537 | 538 | def generate_prob_distrib_msmt(self, H, z, subset_size, n_atk_subsets, A, mean_zero=False): 539 | n_se = H.shape[1] 540 | 541 | # Attack only subset of state variables 542 | se_w = int(H.shape[1] / subset_size) # Integer sets 543 | se_remainder = H.shape[1] % subset_size # Remainder 544 | # se_attacked = [randint(0, subset_size - 1) for _ in range(n_atk_subsets)] 545 | se_attacked = np.random.choice(subset_size, n_atk_subsets, replace=False) 546 | 547 | c = np.zeros((n_se, 1)) 548 | for x in range(subset_size): 549 | if x in se_attacked: # Attacked set 550 | if mean_zero: 551 | z_mean = 0 552 | else: 553 | z_mean = np.mean(z) 554 | if x == subset_size - 1: # Last element 555 | c[x * se_w:(x * se_w) + se_w + se_remainder] = np.multiply(np.random.normal(z_mean, np.std(z), (se_w + se_remainder, 1)), A) 556 | else: 557 | c[x * se_w:(x * se_w) + se_w] = np.multiply(np.random.normal(z_mean, np.std(z), (se_w, 1)), A) 558 | 559 | a = np.transpose(np.dot(H, c)) 560 | z_a = z + a 561 | return z_a, se_attacked, c, a 562 | 563 | 564 | 565 | def create_fdi_X2_yn(self, config, data=""): 566 | """ 567 | Generates FDI data. 568 | At every set of config.timestep, a random number that is part of config.range_atk gets chosen which determines, how many measurements will get attacked. 569 | E.g. config.range_atk=[3,6] --> This means that for every config.timestep between 3 and 6 measurements get attacked. 570 | The label is 1 for every element that has been attacked and 0 otherwise. 571 | 572 | Different to create_fdi_X3_yn, X and y are not grouped into arrays of size config.timestep, but are continuous. 573 | 574 | @type z: ndarray(num_data, num_input) 575 | @param z: Measurements 576 | @type H: ndarray(num_input, _, num_data) 577 | @param H: Measurement Jacobian Matrix 578 | @type config.range_atk: array 579 | @param config.range_atk: Range for number of measurements that get attacked in every set of config.timestep 580 | @type config.norm: boolean 581 | @param config.norm: Normalize using sklearn.normalize, which sets every vector of the sample to unit config.norm 582 | @type config.timestep: int 583 | @param config.timestep: Size in which z gets grouped (default: 16) 584 | @type config.c: float 585 | @param config.c: Error that gets introduced to the state estimation variables (default: 0.2) 586 | 587 | @rtype X: ndarray(num_data*config.timestep,, num_input) 588 | @return X: Data 589 | @rtype y: ndarray(num_data*config.timestep, config.timestep) 590 | @return y: Label 591 | """ 592 | 593 | if not config and not isinstance(config, models.config.DataConfig): 594 | raise ValueError("models.config.DataConfig is invalid.") 595 | 596 | if not data and not isinstance(data, dict): 597 | data = self._data 598 | z = data['z'] 599 | H = data['H'] 600 | 601 | usable = int(len(z) / config.timestep) 602 | z_len = len(z[1]) 603 | 604 | y = np.zeros((usable * config.timestep, 1)) 605 | X = np.zeros((usable * config.timestep, z_len)) 606 | 607 | for t in iter(range(usable)): 608 | 609 | n_atk_msmts = randint(config.range_atk[0], config.range_atk[1]) 610 | config.atk_index = [randint(0, config.timestep - 1) for _ in range(n_atk_msmts)] 611 | 612 | for step in iter(range(config.timestep)): 613 | idx = (t * config.timestep) + step 614 | X[idx, :] = z[idx, :] 615 | 616 | if step in config.atk_index: 617 | n_se = H[:, :, idx].shape[1] 618 | 619 | z_a = z[idx, :] + np.transpose(np.dot(H[:, :, idx], (config.c * np.ones((n_se, 1))))) 620 | X[idx, :] = z_a 621 | y[idx] = 1 622 | else: 623 | X[idx, :] = z[idx, :] 624 | if config.norm is True: X = normalize(X) 625 | 626 | if config.verbose: 627 | print("Max: {}, min: {}, after".format(np.amax(X), np.amin(X))) 628 | print("X shape: {}".format(X.shape)) 629 | print("y shape: {}".format(y.shape)) 630 | 631 | return X, y 632 | 633 | 634 | def create_fdi_window_from_fdi_X2_y1(self, X, y, config): 635 | """ 636 | Takes an array of size (num_data*config.timestep, num_input). 637 | Then it rolls a sliding window of size config.timestep over the array and extracts all batches. 638 | The total number of extracted batches will be (num_data * config.timestep) - (config.timestep - 1). 639 | 640 | @type X: ndarray(num_data*config.timestep, num_input) 641 | @param X: Measurements 642 | @type y: ndarray(num_input*config.timestep, 1) 643 | @param y: Measurement Jacobian Matrix 644 | @type config.timestep: int 645 | @param config.timestep: Size in which z gets grouped (default: 16) 646 | 647 | @rtype X_new: ndarray((num_data * config.timestep) - (config.timestep - 1), num_input) 648 | @return X_new: Data 649 | @rtype y_new: ndarray((num_data * config.timestep) - (config.timestep - 1), 1) 650 | @return y_new: Label 651 | """ 652 | 653 | if not config and not isinstance(config, models.config.DataConfig): 654 | raise ValueError("models.config.DataConfig is invalid.") 655 | 656 | X_re = X 657 | 658 | z_len = X.shape[1] 659 | 660 | X_re_len = X_re.shape[0] 661 | X_new = np.zeros((X_re_len - (config.timestep - 1), config.timestep, z_len)) 662 | y_new = np.zeros((X_re_len - (config.timestep - 1), 1)) 663 | 664 | for t in iter(range(X_re_len - (config.timestep - 1))): 665 | for i in iter(range(config.timestep)): 666 | X_new[t, i, :] = X_re[t + i, :] 667 | if i == (config.timestep - 1): 668 | # Last step, check label 669 | y_new[t] = y[t + i] 670 | 671 | if config.verbose: 672 | print("X shape: {}".format(X_new.shape)) 673 | print("y shape: {}".format(y_new.shape)) 674 | 675 | return X_new, y_new 676 | 677 | 678 | def create_fdi_window_from_fdi_X2_y1_se(self, X, y, config): 679 | """ 680 | Takes an array of size (num_data*config.timestep, num_input). 681 | Then it rolls a sliding window of size config.timestep over the array and extracts all batches. 682 | The total number of extracted batches will be (num_data * config.timestep) - (config.timestep - 1). 683 | 684 | @type X: ndarray(num_data*config.timestep, num_input) 685 | @param X: Measurements 686 | @type y: ndarray(num_input*config.timestep, 1) 687 | @param y: Measurement Jacobian Matrix 688 | @type config.timestep: int 689 | @param config.timestep: Size in which z gets grouped (default: 16) 690 | 691 | @rtype X_new: ndarray((num_data * config.timestep) - (config.timestep - 1), num_input) 692 | @return X_new: Data 693 | @rtype y_new: ndarray((num_data * config.timestep) - (config.timestep - 1), 1) 694 | @return y_new: Label 695 | """ 696 | 697 | if not config and not isinstance(config, models.config.DataConfig): 698 | raise ValueError("models.config.DataConfig is invalid.") 699 | 700 | X_re = X 701 | 702 | z_len = X.shape[1] 703 | 704 | X_re_len = X_re.shape[0] 705 | X_new = np.zeros((X_re_len - (config.timestep - 1), config.timestep, z_len)) 706 | y_new = np.zeros((X_re_len - (config.timestep - 1), y.shape[1])) 707 | 708 | for t in iter(range(X_re_len - (config.timestep - 1))): 709 | for i in iter(range(config.timestep)): 710 | X_new[t, i, :] = X_re[t + i, :] 711 | if i == (config.timestep - 1): 712 | # Last step, check label 713 | y_new[t] = y[t + i] 714 | 715 | if config.verbose: 716 | print("X shape: {}".format(X_new.shape)) 717 | print("y shape: {}".format(y_new.shape)) 718 | 719 | return X_new, y_new 720 | 721 | 722 | def create_fdi_window_from_fdi_X2_yn(self, X, y, config): 723 | """ 724 | Takes an array of size (num_data*config.timestep, num_input). 725 | Then it rolls a sliding window of size config.timestep over the array and extracts all batches. 726 | The total number of extracted batches will be (num_data * config.timestep) - (config.timestep - 1). 727 | 728 | @type X: ndarray(num_data*config.timestep, num_input) 729 | @param X: Measurements 730 | @type y: ndarray(num_input*config.timestep, 1) 731 | @param y: Measurement Jacobian Matrix 732 | @type config.timestep: int 733 | @param config.timestep: Size in which z gets grouped (default: 16) 734 | 735 | @rtype X_new: ndarray((num_data * config.timestep) - (config.timestep - 1), num_input) 736 | @return X_new: Data 737 | @rtype y_new: ndarray((num_data * config.timestep) - (config.timestep - 1), config.timestep) 738 | @return y_new: Label 739 | """ 740 | 741 | if not config and not isinstance(config, models.config.DataConfig): 742 | raise ValueError("models.config.DataConfig is invalid.") 743 | 744 | X_re = X 745 | 746 | z_len = X.shape[1] 747 | 748 | X_re_len = X_re.shape[0] 749 | X_new = np.zeros((X_re_len - (config.timestep - 1), config.timestep, z_len)) 750 | y_new = np.zeros((X_re_len - (config.timestep - 1), config.timestep)) 751 | 752 | for t in iter(range(X_re_len - (config.timestep - 1))): 753 | for i in iter(range(config.timestep)): 754 | X_new[t, i, :] = X_re[t + i, :] 755 | # Last step, check label 756 | y_new[t, i] = y[t + i] 757 | 758 | if config.verbose: 759 | print("X shape: {}".format(X_new.shape)) 760 | print("y shape: {}".format(y_new.shape)) 761 | 762 | return X_new, y_new -------------------------------------------------------------------------------- /python/svm.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from os.path import join 3 | import numpy as np 4 | import seaborn as sns; sns.set() 5 | from models.configs import DataConfig, Analyzer, Norm 6 | from sklearn.svm import SVC 7 | from sklearn.multiclass import OneVsRestClassifier 8 | from sklearn.model_selection import GridSearchCV 9 | import time 10 | from sklearn.metrics import classification_report, f1_score, roc_auc_score 11 | from sklearn.model_selection import train_test_split 12 | from sklearn.preprocessing import StandardScaler, Normalizer 13 | import os 14 | import csv 15 | import math 16 | 17 | with open(join('..', 'data', 'main_load_data.pkl'), 'rb') as f: 18 | power_grid_data = pickle.load(f) 19 | 20 | Ps = [0.5] 21 | subset_sizes = [28,29] 22 | norms = [Norm.MINMAX] 23 | atk_styles = [{'atk_function': 2, 'A': 0.1, 'c': 0}] 24 | 25 | for atk_style in atk_styles: 26 | for norm in norms: 27 | for P in Ps: 28 | for subset_size in subset_sizes: 29 | n_atk_subsets = math.ceil(subset_size / 2) 30 | train_data_config = DataConfig(main_data=power_grid_data, method_name='create_fdi_X2_y1_se', norm=norm, 31 | subset_size=subset_size, n_atk_subsets=n_atk_subsets, P=P, 32 | timestep=16, c=atk_style['c'], random=0, ratio=0.7, atk_function=atk_style['atk_function'], 33 | A=atk_style['A']) 34 | 35 | 36 | rd = train_data_config.retrieve_data_set() 37 | 38 | Xtrain = rd['X_train'] 39 | ytrain = rd['y_train'] 40 | Xtest = rd['X_test'] 41 | ytest = rd['y_test'] 42 | 43 | n = 10000 44 | n_train = int(n * 0.7) 45 | n_test = int(n * 0.3) 46 | 47 | Xtrain = Xtrain[0:n_train] 48 | ytrain = ytrain[0:n_train] 49 | Xtest = Xtest[n:n + n_test] 50 | ytest = ytest[n:n + n_test] 51 | 52 | model = OneVsRestClassifier(SVC(kernel="rbf")) 53 | 54 | scores = ['f1'] 55 | 56 | # parameters = { 57 | # "estimator__C": [1,10,100,1000,10000,100000], 58 | # "estimator__gamma": [0.01, 0.1, 1, 10], 59 | # } 60 | parameters = { 61 | "estimator__C": [10000], 62 | "estimator__gamma": [0.1], 63 | } 64 | 65 | # for score in scores: 66 | # print("# Tuning hyper-parameters for %s" % score) 67 | # print() 68 | # 69 | # clf = GridSearchCV(model, parameters, cv=5, 70 | # scoring='%s_macro' % score) 71 | # clf.fit(Xtrain, ytrain) 72 | # 73 | # print("Best parameters set found on development set:") 74 | # print() 75 | # print(clf.best_params_) 76 | # print() 77 | # print("Grid scores on development set:") 78 | # print() 79 | # means = clf.cv_results_['mean_test_score'] 80 | # stds = clf.cv_results_['std_test_score'] 81 | # for mean, std, params in zip(means, stds, clf.cv_results_['params']): 82 | # print("%0.3f (+/-%0.03f) for %r" 83 | # % (mean, std * 2, params)) 84 | # print() 85 | # 86 | # print("Detailed classification report:") 87 | # print() 88 | # print("The model is trained on the full development set.") 89 | # print("The scores are computed on the full evaluation set.") 90 | # print() 91 | # y_true, y_pred = ytest, clf.predict(Xtest) 92 | # print(classification_report(y_true, y_pred)) 93 | # print() 94 | 95 | grid = GridSearchCV(model, parameters) 96 | 97 | start = time.time() 98 | grid.fit(Xtrain, ytrain) 99 | 100 | print(grid.best_params_) 101 | 102 | model = grid.best_estimator_ 103 | yfit = model.predict(Xtest) 104 | 105 | print(time.time() - start) 106 | print(classification_report(ytest, yfit)) 107 | 108 | result = {} 109 | result['train_time'] = time.time() - start 110 | result['f1'] = f1_score(ytest, yfit, average='weighted') 111 | result['aoc_mac'] = roc_auc_score(ytest, yfit) 112 | result['aoc_mic'] = roc_auc_score(ytest, yfit, average='micro') 113 | result['n_train'] = len(Xtrain) 114 | result['n_test'] = len(Xtest) 115 | result['C'] = grid.best_params_['estimator__C'] 116 | result['gamma'] = grid.best_params_['estimator__gamma'] 117 | result['P'] = P 118 | result['subset_size'] = subset_size 119 | result['n_atk_subsets'] = n_atk_subsets 120 | result['c'] = atk_style['c'] 121 | result['A'] = atk_style['A'] 122 | result['tp'] = np.count_nonzero(yfit * ytest) 123 | result['tn'] = np.count_nonzero((yfit - 1) * (ytest - 1)) 124 | result['fp'] = np.count_nonzero(yfit * (ytest - 1)) 125 | result['fn'] = np.count_nonzero((yfit - 1) * ytest) 126 | result['norm'] = norm 127 | result['atk_function'] = atk_style['atk_function'] 128 | 129 | analyzed = Analyzer.X_y_data({'X': Xtest, 'y': ytest}).get_dict() 130 | 131 | print(result) 132 | 133 | csv_header = [] 134 | fname = join('output', 'svm_libido.csv') 135 | 136 | result = {**result, **analyzed} 137 | 138 | file_exists = os.path.isfile(fname) 139 | with open(fname, 'a', newline='') as csvfile: 140 | writer = csv.DictWriter(csvfile, fieldnames=list(result.keys()), delimiter=';') 141 | if not file_exists: 142 | writer.writeheader() 143 | writer.writerow(result) 144 | -------------------------------------------------------------------------------- /python/tf2.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import tensorflow as tf 4 | from models.configs import DataConfig, TfNetworkConfig, TrainConfig, Analyzer, TestConfig, Result, Norm 5 | import os 6 | import pickle 7 | import numpy as np 8 | from os.path import join 9 | import time 10 | from colorama import Fore, init, Back, Style 11 | from sklearn.metrics import roc_auc_score 12 | import sys 13 | import math 14 | from sklearn.preprocessing import StandardScaler, Normalizer 15 | 16 | 17 | class Model(object): 18 | def __init__(self, id=None, network_conf=None, is_training=None, train_data=None, validation_data=None, 19 | test_data=None): 20 | self.id = id 21 | self.is_training = is_training 22 | 23 | if self.is_training: 24 | self.train_data = train_data 25 | self.validation_data = validation_data 26 | self.num_input = network_conf.num_input 27 | self.num_hidden = network_conf.num_hidden 28 | self.num_output = network_conf.num_output 29 | self.hidden_layers = network_conf.hidden_layers 30 | self.epochs = network_conf.epochs 31 | self.timestep = network_conf.timestep 32 | self.batch_size_given = network_conf.batch_size 33 | self.display_step = network_conf.display_step 34 | self.orig_decay = network_conf.orig_decay 35 | self.max_lr_epoch = network_conf.max_lr_epoch 36 | self.lr_given = network_conf.lr_given 37 | self.dropout_given = network_conf.dropout_given 38 | self.tolerance = network_conf.tolerance 39 | self.train_stop = network_conf.train_stop 40 | self.val_loss_improv = network_conf.val_loss_improv 41 | 42 | tf.add_to_collection('num_input', self.num_input) 43 | tf.add_to_collection('num_hidden', self.num_hidden) 44 | tf.add_to_collection('num_output', self.num_output) 45 | tf.add_to_collection('hidden_layers', self.hidden_layers) 46 | 47 | # Graph input 48 | self.x = tf.placeholder("float", [None, self.timestep, self.num_input], name='x') 49 | self.y = tf.placeholder("float", [None, self.num_output], name='y') 50 | self.batch_size = tf.placeholder(tf.int64, name='batch_size') 51 | self.dropout = tf.placeholder_with_default(1.0, shape=(), name='dropout') 52 | 53 | # Input pipeline 54 | train_dataset = tf.data.Dataset.from_tensor_slices((self.x, self.y)).apply( 55 | tf.contrib.data.batch_and_drop_remainder(self.batch_size)).repeat(self.epochs) 56 | validate_dataset = tf.data.Dataset.from_tensor_slices((self.x, self.y)).apply( 57 | tf.contrib.data.batch_and_drop_remainder(self.batch_size)) 58 | iter = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) 59 | features, labels = iter.get_next() 60 | self.train_init_op = iter.make_initializer(train_dataset, name='train_init_op') 61 | self.validation_init_op = iter.make_initializer(validate_dataset) 62 | 63 | self.train_data = (train_data['X'], train_data['y']) 64 | self.validation_data = (validation_data['X'], validation_data['y']) 65 | 66 | # Define weights 67 | weights = { 68 | 'out': tf.Variable(tf.random_normal([self.num_hidden, self.num_output])) 69 | } 70 | biases = { 71 | 'out': tf.Variable(tf.random_normal([self.num_output])) 72 | } 73 | 74 | # Set up model 75 | inputs = tf.nn.dropout(features, self.dropout, name='inputs') 76 | 77 | self.init_state = tf.placeholder(tf.float32, [self.hidden_layers, 2, None, self.num_hidden], name='init_state') 78 | 79 | state_per_layer_list = tf.unstack(self.init_state, axis=0) 80 | rnn_tuple_state = tuple( 81 | [tf.contrib.rnn.LSTMStateTuple(state_per_layer_list[idx][0], state_per_layer_list[idx][1]) 82 | for idx in range(self.hidden_layers)] 83 | ) 84 | 85 | def lstm_cell(n_hidden, dropout): 86 | cell = tf.contrib.rnn.LSTMCell(n_hidden, forget_bias=1.0) 87 | cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout) 88 | return cell 89 | 90 | if self.hidden_layers > 0: 91 | cell = tf.contrib.rnn.MultiRNNCell( 92 | [lstm_cell(self.num_hidden, self.dropout) for _ in range(self.hidden_layers)], 93 | state_is_tuple=True) 94 | 95 | output, self.state = tf.nn.dynamic_rnn(cell, features, dtype=tf.float32, initial_state=rnn_tuple_state) 96 | 97 | # Extract last timestep of output 98 | output = tf.transpose(output, [1, 0, 2]) 99 | output = tf.gather(output, int(output.get_shape()[0]) - 1) 100 | 101 | with tf.name_scope('Model'): 102 | # Make prediction 103 | self.logits = tf.matmul(output, weights['out']) + biases['out'] 104 | 105 | with tf.name_scope('Loss'): 106 | # Calculate cost/loss 107 | self.cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=labels), name='cost') 108 | 109 | self.learning_rate = tf.Variable(0.0, trainable=False, name='learning_rate') 110 | 111 | tvars = tf.trainable_variables() 112 | grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 5) 113 | with tf.name_scope('Optimizer'): 114 | optimizer = tf.train.AdamOptimizer(self.learning_rate) 115 | self.train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=tf.train.get_or_create_global_step(), name='train_op') 116 | 117 | self.new_lr = tf.placeholder(tf.float32, shape=[], name='new_lr') 118 | self.lr_update = tf.assign(self.learning_rate, self.new_lr, name='lr_update') 119 | 120 | # Predict 121 | self.prediction = tf.nn.sigmoid(self.logits, name='prediction') 122 | 123 | # Metrics for multilabel binary classification 124 | self.predicted_labels = tf.cast(tf.less(tf.constant(0.5), self.prediction), tf.float32, name='predicted_labels') 125 | self.correct_pred = tf.cast(tf.equal(self.predicted_labels, labels), tf.int32, name='correct_pred') 126 | self.accuracy = tf.reduce_mean(tf.cast(self.correct_pred, tf.float32), name='accuracy') 127 | 128 | self.tp = tf.add(tf.count_nonzero(self.predicted_labels * labels), tf.constant(0, dtype=tf.int64), name='tp') # Add 0, because otherwise this tensor is not available at model restore, why o'ever 129 | self.tn = tf.add(tf.count_nonzero((self.predicted_labels - 1) * (labels - 1)), tf.constant(0, dtype=tf.int64), name='tn') 130 | self.fp = tf.add(tf.count_nonzero((self.predicted_labels * (labels - 1))), tf.constant(0, dtype=tf.int64), name='fp') 131 | self.fn = tf.add(tf.count_nonzero(((self.predicted_labels - 1) * labels)), tf.constant(0, dtype=tf.int64), name='fn') 132 | self.precision = tf.divide(self.tp, tf.add(self.tp, self.fp), name='precision') 133 | self.labels = labels 134 | 135 | self.recall = tf.divide(self.tp, tf.add(self.tp, self.fn), name='recall') 136 | 137 | self.f1 = tf.multiply(tf.constant(2, dtype=tf.float64), 138 | tf.divide(tf.multiply(self.precision, self.recall), tf.add(self.precision, self.recall)), name='f1') 139 | 140 | # Tensorboard metrics 141 | tf.summary.scalar("precision", self.precision) 142 | tf.summary.scalar("recall", self.recall) 143 | tf.summary.scalar("f1_score", self.f1) 144 | tf.summary.scalar("loss", self.cost) 145 | tf.summary.scalar("accuracy", self.accuracy) 146 | tf.summary.scalar("learning_rate", self.learning_rate) 147 | self.merged_summary_op = tf.summary.merge_all() 148 | else: 149 | self.test_data = test_data 150 | 151 | def assign_lr(self, session, lr_value): 152 | session.run(self.lr_update, feed_dict={self.new_lr: lr_value}) 153 | 154 | def train(self): 155 | # Parameters 156 | global_start_time = time.time() 157 | num_batches = int(self.train_data[0].shape[0] / self.batch_size_given) 158 | validation_cost = [] 159 | 160 | history = {'cost': [], 'val_cost': [], 'acc': [], 'val_acc': [], 'f1': [], 'val_f1': []} 161 | 162 | # Initialize the variables (i.e. assign their default value) 163 | init = tf.global_variables_initializer() 164 | init_local = tf.local_variables_initializer() 165 | saver = tf.train.Saver() 166 | 167 | try: 168 | 169 | # Start training 170 | with tf.Session() as sess: 171 | # Run the initializer 172 | sess.run(init) 173 | sess.run(init_local) 174 | 175 | # Tensorboard 176 | writer_val = tf.summary.FileWriter(join('output', 'tensorboard', '{}'.format(self.id), 'val')) 177 | writer_train = tf.summary.FileWriter(join('output', 'tensorboard', '{}'.format(self.id), 'train')) 178 | 179 | val_loss_stop_count = 1 180 | train_loss_stop_count = 1 181 | 182 | # Loop over epochs 183 | for epoch in range(self.epochs): 184 | # Set state 185 | current_state = np.zeros((self.hidden_layers, 2, self.batch_size_given, self.num_hidden)) 186 | 187 | # Set training data 188 | sess.run(self.train_init_op, feed_dict={self.x: self.train_data[0], self.y: self.train_data[1], 189 | self.batch_size: self.batch_size_given}) 190 | 191 | # Set learning rate decay 192 | new_lr_decay = self.orig_decay ** max(epoch + 1 - self.max_lr_epoch, 0.0) 193 | self.assign_lr(sess, self.lr_given * new_lr_decay) 194 | 195 | # Loop over batches 196 | for batch in range(num_batches): 197 | t_cost, _, current_state, t_acc, t_prec, t_recall, t_f1_score, t_tp, t_tn, t_fp, t_fn, t_summary, t_lr, t_dropout, t_predicted_labels, t_labels = \ 198 | sess.run([self.cost, self.train_op, self.state, self.accuracy, self.precision, self.recall, 199 | self.f1, self.tp, self.tn, self.fp, self.fn, self.merged_summary_op, 200 | self.learning_rate, self.dropout, self.predicted_labels, self.labels], 201 | feed_dict={self.init_state: current_state, self.dropout: self.dropout_given}) 202 | 203 | writer_train.add_summary(t_summary, (epoch * num_batches) + batch) 204 | 205 | t_auc_macro = 0 206 | t_auc_micro = 0 207 | try: 208 | t_auc_macro = roc_auc_score(t_labels, t_predicted_labels) 209 | except ValueError: 210 | print(sys.exc_info()[0]) 211 | try: 212 | t_auc_micro = roc_auc_score(t_labels, t_predicted_labels, average='micro') 213 | except ValueError: 214 | print(sys.exc_info()[0]) 215 | 216 | if batch % self.display_step == 0 or batch == 1: 217 | # Calculate metrics per batch 218 | train_eval_values = {'t_cost': t_cost, 't_lr': t_lr, 't_acc': t_acc, 't_tp': t_tp, 't_tn': t_tn, 219 | 't_fp': t_fp, 't_fn': t_fn, 't_prec': t_prec, 't_recall': t_recall, 't_f1': t_f1_score, 220 | 't_dropout': t_dropout, 't_auc_macro': t_auc_macro, 't_auc_micro': t_auc_micro} 221 | print( 222 | "Step: {:5d} | Accuracy: {:.4f} | LR: {:.8f} | Cost: {:.6f} | tp: {:5d} | tn: {:5d} | fp: {:5d} | fn: {:5d} | " 223 | "Prec: {:.4f} | Recall: {:.4f} | F1: {:.4f} | Drop: {:.3f} | AUC Mac: {:.4f} | AUC Mic: {:.4f}".format( 224 | batch, t_acc, t_lr, t_cost, t_tp, t_tn, t_fp, t_fn, t_prec, t_recall, t_f1_score, t_dropout, t_auc_macro, t_auc_micro)) 225 | 226 | ############################################ EPOCH FINISHED ############################################ 227 | print("Epoch Finished {}/{}! \n".format(epoch + 1, self.epochs)) 228 | writer_train.add_summary(t_summary, ((epoch + 1) * num_batches)) 229 | writer_train.flush() 230 | 231 | # Start validation 232 | current_state = np.zeros((self.hidden_layers, 2, len(self.validation_data[0]), self.num_hidden)) 233 | sess.run(self.validation_init_op, 234 | feed_dict={self.x: self.validation_data[0], self.y: self.validation_data[1], 235 | self.batch_size: len(self.validation_data[0])}) 236 | 237 | v_cost, v_acc, v_prec, v_recall, v_f1_score, v_tp, v_tn, v_fp, v_fn, v_summary, v_dropout, v_predicted_labels = \ 238 | sess.run([self.cost, self.accuracy, self.precision, self.recall, 239 | self.f1, self.tp, self.tn, self.fp, self.fn, self.merged_summary_op, self.dropout, self.predicted_labels], 240 | feed_dict={self.init_state: current_state}) 241 | writer_val.add_summary(v_summary, ((epoch + 1) * num_batches)) 242 | writer_val.flush() 243 | 244 | v_auc_macro = 0 245 | v_auc_micro = 0 246 | try: 247 | v_auc_macro = roc_auc_score(self.validation_data[1], v_predicted_labels) 248 | except ValueError: 249 | print(sys.exc_info()[0]) 250 | try: 251 | v_auc_micro = roc_auc_score(self.validation_data[1], v_predicted_labels, average='micro') 252 | except ValueError: 253 | print(sys.exc_info()[0]) 254 | 255 | 256 | validation_eval_values = {'v_cost': v_cost, 'v_acc': v_acc, 'v_tp': v_tp, 'v_tn': v_tn, 'v_fp': v_fp, 257 | 'v_fn': v_fn, 'v_prec': v_prec, 'v_recall': v_recall, 'v_f1': v_f1_score, 258 | 'v_dropout': v_dropout, 'v_auc_macro': v_auc_macro, 'v_auc_micro': v_auc_micro} 259 | 260 | print( 261 | "VALIDATION: \nStep: {:5d} | Accuracy: {:.4f} | Cost: {:.6f} | tp: {:5d} | tn: {:5d} | fp: {:5d} | fn: {:5d} | Prec: {:.4f} " 262 | "| Recall: {:.4f} | F1: {:.4f} | Drop: {:.3f} | AUC Mac: {:.4f} | AUC Mic: {:.4f} \n".format( 263 | epoch, v_acc, v_cost, v_tp, v_tn, v_fp, v_fn, v_prec, v_recall, v_f1_score, v_dropout, v_auc_macro, v_auc_micro)) 264 | 265 | print(repr(self.validation_data[1][0:5])) 266 | print("---------------------------") 267 | print() 268 | print(repr(v_predicted_labels[0:5])) 269 | 270 | # Add metrics to history 271 | history['cost'].append(t_cost) 272 | history['val_cost'].append(v_cost) 273 | history['acc'].append(t_acc) 274 | history['val_acc'].append(v_acc) 275 | history['f1'].append(t_f1_score) 276 | history['val_f1'].append(v_f1_score) 277 | 278 | # Save if validation cost increased 279 | if epoch != 0: 280 | print("Debug: val must be smaller than {}".format(validation_cost[-1] - self.val_loss_improv)) 281 | if v_cost < (validation_cost[-1] - self.val_loss_improv): 282 | print("Validation cost improved from {:.6f} to {:.6f}".format(validation_cost[-1], v_cost)) 283 | save_path = saver.save(sess, join('.', 'tf_models', '{}.ckpt'.format(self.id))) 284 | print("Model saved to file: {}\n".format(save_path)) 285 | val_loss_stop_count = 1 286 | else: 287 | print( 288 | "Validation cost did not improve from {:.6f} to {:.6f} for the {:2d} time".format(validation_cost[-1], 289 | v_cost, val_loss_stop_count)) 290 | if val_loss_stop_count >= self.tolerance: 291 | print("Stop training because validation cost did not improve for {} times.".format(val_loss_stop_count)) 292 | break 293 | val_loss_stop_count += 1 294 | 295 | if t_cost <= self.train_stop: 296 | if train_loss_stop_count >= self.tolerance: 297 | print("Stop training because training loss is lower than training stop loss for {} times.".format( 298 | train_loss_stop_count)) 299 | break 300 | train_loss_stop_count += 1 301 | 302 | validation_cost.append(v_cost) 303 | 304 | except KeyboardInterrupt: 305 | print("Interrupted!") 306 | 307 | print("Optimization Finished!") 308 | train_time = "{0:.2f}".format(time.time() - global_start_time) 309 | train_result = Result(stopped_epoch=epoch + 1, model_eval_values={**train_eval_values, **{'t_train_time': train_time}}) 310 | validation_result = Result(stopped_epoch=epoch + 1, model_eval_values={**validation_eval_values, **{'v_train_time': train_time}}) 311 | # TODO: Handle train_time only in train_result and ignore in validation_result 312 | 313 | return history, train_result, validation_result 314 | 315 | def test(self): 316 | ''' 317 | Warning: This method loads the entire test data set into memory and does not iterate over it batch-wise. 318 | If you face a problem with the memory, a batch-wise method has to be implemented. 319 | 320 | :return: 321 | ''' 322 | 323 | saver = tf.train.import_meta_graph(join('tf_models', '{}.ckpt.meta'.format(self.id))) 324 | 325 | # Start testing 326 | with tf.Session() as sess: 327 | graph = tf.get_default_graph() 328 | self.x = graph.get_tensor_by_name("x:0") 329 | self.y = graph.get_tensor_by_name("y:0") 330 | self.batch_size = graph.get_tensor_by_name("batch_size:0") 331 | self.num_input = tf.get_collection('num_input')[0] 332 | self.hidden_layers = tf.get_collection('hidden_layers')[0] 333 | self.num_hidden = tf.get_collection('num_hidden')[0] 334 | 335 | self.test_data = (self.test_data['X'], self.test_data['y']) 336 | 337 | self.train_init_op = graph.get_operation_by_name('train_init_op') 338 | 339 | self.init_state = graph.get_tensor_by_name("init_state:0") 340 | self.cost = graph.get_tensor_by_name("Loss/cost:0") 341 | self.accuracy = graph.get_tensor_by_name("accuracy:0") 342 | self.precision = graph.get_tensor_by_name("precision:0") 343 | self.recall = graph.get_tensor_by_name("recall:0") 344 | self.f1 = graph.get_tensor_by_name("f1:0") 345 | self.tp = graph.get_tensor_by_name("tp:0") 346 | self.tn = graph.get_tensor_by_name("tn:0") 347 | self.fp = graph.get_tensor_by_name("fp:0") 348 | self.fn = graph.get_tensor_by_name("fn:0") 349 | self.dropout = graph.get_tensor_by_name("dropout:0") 350 | 351 | # Run the initializer 352 | sess.run(tf.global_variables_initializer()) 353 | sess.run(tf.local_variables_initializer()) 354 | 355 | load_path = saver.restore(sess, join('tf_models', '{}.ckpt'.format(self.id))) 356 | print("Model loaded from file: {}".format(load_path)) 357 | 358 | current_state = np.zeros((self.hidden_layers, 2, len(self.test_data[0]), self.num_hidden)) 359 | 360 | sess.run(self.train_init_op, 361 | feed_dict={self.x: self.test_data[0], self.y: self.test_data[1], 362 | self.batch_size: len(self.test_data[0])}) 363 | 364 | print("Start testing.") 365 | cost, acc, prec, recall, f1, tp, tn, fp, fn, dropout, correct_pred = \ 366 | sess.run([self.cost, self.accuracy, self.precision, self.recall, self.f1, self.tp, self.tn, self.fp, self.fn, self.dropout, 367 | self.correct_pred], feed_dict={self.init_state: current_state}) 368 | 369 | print(Fore.LIGHTGREEN_EX) 370 | print("Test cost: {}, acc: {}, prec: {}, recall: {}, f1: {}, dropout: {}, aoc: {}".format(cost, acc, prec, recall, f1, dropout, 371 | roc_auc_score(correct_pred, self.test_data[1]))) 372 | print(Style.RESET_ALL) 373 | print("Testing Finished!") 374 | 375 | test_eval_values = {'cost': cost, 'acc': acc, 'tp': tp, 'tn': tn, 'fp': fp, 376 | 'fn': fn, 'prec': prec, 'recall': recall, 'f1': f1, 377 | 'dropout': dropout, 'auc': roc_auc_score(correct_pred, self.test_data[1])} 378 | 379 | return Result(model_eval_values=test_eval_values) 380 | 381 | ############################################################################################################################################ 382 | ################################################################### MAIN ################################################################### 383 | ############################################################################################################################################ 384 | 385 | def main(): 386 | train = True 387 | 388 | with open(join('..', 'data', 'main_load_data.pkl'), 'rb') as f: 389 | power_grid_data = pickle.load(f) 390 | 391 | if train: 392 | ############## PART 1 ################# 393 | 394 | train_config_list = [] 395 | percentages = [0.05, 0.1, 0.3, 0.5] 396 | subset_sizes = [1,4,8,12,16,20,24,28,29] 397 | norms = [Norm.STANDARD] 398 | atk_styles = [{'atk_function': 2, 'A': 0.1, 'c': 0}] 399 | 400 | for atk_style in atk_styles: 401 | for norm in norms: 402 | for percentage in percentages: 403 | for subset_size in subset_sizes: 404 | n_atk_subsets = math.ceil(subset_size/2) 405 | 406 | test_config_list = [] 407 | train_data_config = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=norm, 408 | subset_size=subset_size, n_atk_subsets=n_atk_subsets, c=atk_style['c'], 409 | timestep=16, ratio=0.7, P=percentage, atk_function=atk_style['atk_function'], A=atk_style['A']) 410 | tf_network_config = TfNetworkConfig(num_input=41, timestep=16, num_hidden=200, num_output=subset_size, 411 | batch_size=256, epochs=200, orig_decay=0.93, max_lr_epoch=1, 412 | hidden_layers=2, lr_given=0.001, dropout_given=0.7, tolerance=2, display_step=5, 413 | train_stop=0.001, val_loss_improv=0.001) 414 | 415 | # Sum them up 416 | train_config = TrainConfig(train_data_config=train_data_config, network_config=tf_network_config) 417 | 418 | train_config_dict = {'train_config': train_config, 'test_config_list': test_config_list} 419 | train_config_list.append(train_config_dict) 420 | 421 | print("{} different configs.".format(len(train_config_list))) 422 | 423 | for config in train_config_list: 424 | 425 | # TRAINING FIRST 426 | 427 | # Generate/retrieve the data 428 | train_data = {} 429 | validation_data = {} 430 | rd = config['train_config'].train_data_config.retrieve_data_set() 431 | 432 | #train_data['X'] = rd['X_train'] 433 | #train_data['y'] = rd['y_train'] 434 | #validation_data['X'] = rd['X_test'] 435 | #validation_data['y'] = rd['y_test'] 436 | 437 | n = 10000 438 | n_train = int(n * 0.7) 439 | n_test = int(n * 0.3) 440 | 441 | train_data['X'] = rd['X_train'][0:n_train] 442 | train_data['y'] = rd['y_train'][0:n_train] 443 | validation_data['X'] = rd['X_test'][n:n + n_test] 444 | validation_data['y'] = rd['y_test'][n:n + n_test] 445 | 446 | print(train_data['y'][0:10]) 447 | # Analyse the data sets 448 | config['train_config'].train_data_analysis = Analyzer.X_y_data(train_data) 449 | config['train_config'].valid_data_analysis = Analyzer.X_y_data(validation_data) 450 | 451 | # Run network / evaluate the model 452 | tf.reset_default_graph() 453 | m1 = Model(id=config['train_config'].id, is_training=True, 454 | network_conf=config['train_config'].network_config, 455 | train_data=train_data, 456 | validation_data=validation_data) 457 | history, config['train_config'].train_result, config['train_config'].validation_result = m1.train() 458 | 459 | # Save everything to CSV file 460 | config['train_config'].dump_to_csv(os.path.join('output', 'tf_10kfinal.csv')) 461 | 462 | # Create and save images 463 | config['train_config'].dump_images(history) 464 | 465 | # # TEST SECOND 466 | # 467 | # for test_config in config['test_config_list']: 468 | # test_data = {} 469 | # test_data['X'], test_data['y'] = test_config.test_data_config.retrieve_single_dataset() 470 | # 471 | # test_config.test_data_analysis = Analyzer.X_y_data(test_data) 472 | # 473 | # # test_config.test_result = m1.test(test_data=test_data) 474 | # tf.reset_default_graph() 475 | # test_config.test_result = Model(id=test_config.model_id, is_training=False, test_data=test_data).test() 476 | # 477 | # test_config.dump_to_csv(join('output', 'tf_test_timesteps.csv')) 478 | 479 | ############## PART 2 ################# 480 | 481 | # train_config_list = [] 482 | # # subset_sizes = [5] 483 | # timesteps = [1, 5, 16, 32, 48, 64] 484 | # 485 | # for timestep in timesteps: 486 | # test_config_list = [] 487 | # train_data_config = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=True, 488 | # subset_size=29, n_atk_subsets=29, 489 | # timestep=timestep, c=0.2, random=1, ratio=0.7, P=0.1) 490 | # tf_network_config = TfNetworkConfig(num_input=41, timestep=timestep, num_hidden=200, num_output=29, 491 | # batch_size=1280, epochs=200, orig_decay=0.93, max_lr_epoch=1, 492 | # hidden_layers=2, lr_given=0.001, dropout_given=0.6, tolerance=4, display_step=5, 493 | # train_stop=0.001) 494 | # 495 | # 496 | # # Sum them up 497 | # train_config = TrainConfig(train_data_config=train_data_config, network_config=tf_network_config) 498 | # 499 | # train_config_dict = {'train_config': train_config, 'test_config_list': test_config_list} 500 | # train_config_list.append(train_config_dict) 501 | # 502 | # for config in train_config_list: 503 | # 504 | # # TRAINING FIRST 505 | # 506 | # # Generate/retrieve the data 507 | # train_data = {} 508 | # validation_data = {} 509 | # train_data['X'], validation_data['X'], train_data['y'], validation_data[ 510 | # 'y'] = config['train_config'].train_data_config.retrieve_splitted_dataset() 511 | # 512 | # # Analyse the data sets 513 | # config['train_config'].train_data_analysis = Analyzer.X_y_data(train_data) 514 | # config['train_config'].valid_data_analysis = Analyzer.X_y_data(validation_data) 515 | # 516 | # # Run network / evaluate the model 517 | # tf.reset_default_graph() 518 | # m1 = Model(id=config['train_config'].id, is_training=True, 519 | # network_conf=config['train_config'].network_config, 520 | # train_data=train_data, 521 | # validation_data=validation_data) 522 | # history, config['train_config'].train_result, config['train_config'].validation_result = m1.train() 523 | # 524 | # # Save everything to CSV file 525 | # config['train_config'].dump_to_csv(os.path.join('output', 'tf_train_var_perc_timestep.csv')) 526 | # 527 | # # Create and save images 528 | # config['train_config'].dump_images(history) 529 | # 530 | # # TEST SECOND 531 | # 532 | # for test_config in config['test_config_list']: 533 | # test_data = {} 534 | # test_data['X'], test_data['y'] = test_config.test_data_config.retrieve_single_dataset() 535 | # 536 | # test_config.test_data_analysis = Analyzer.X_y_data(test_data) 537 | # 538 | # # test_config.test_result = m1.test(test_data=test_data) 539 | # tf.reset_default_graph() 540 | # test_config.test_result = Model(id=test_config.model_id, is_training=False, test_data=test_data).test() 541 | # 542 | # test_config.dump_to_csv(join('output', 'tf_test_96.csv')) 543 | 544 | ############## PART 4 ################# 545 | 546 | # train_config_list = [] 547 | # subset_sizes = [1, 4, 8, 12, 16, 20, 24, 28, 29] 548 | # # subset_sizes = [1] 549 | # 550 | # for subset_size in subset_sizes: 551 | # test_config_list = [] 552 | # train_data_config = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=True, 553 | # atk_index=[0], subset_size=subset_size, n_atk_subsets=subset_size, 554 | # timestep=1, c=2, random=1, ratio=0.7) 555 | # tf_network_config = TfNetworkConfig(num_input=41, timestep=1, num_hidden=200, num_output=subset_size, 556 | # batch_size=1280, epochs=200, orig_decay=0.93, max_lr_epoch=1, 557 | # hidden_layers=2, lr_given=0.001, dropout_given=0.6, tolerance=4, display_step=5, 558 | # train_stop=0.001) 559 | # test_data_config_1 = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=True, 560 | # atk_index=[0], subset_size=subset_size, n_atk_subsets=subset_size, 561 | # timestep=1, c=0.2, random=1) 562 | # test_data_config_2 = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=True, 563 | # atk_index=[0], subset_size=subset_size, n_atk_subsets=subset_size, 564 | # timestep=1, c=0.2, random=1) 565 | # test_data_config_3 = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se_window', norm=True, 566 | # atk_index=[0], subset_size=subset_size, n_atk_subsets=subset_size, 567 | # timestep=1, c=2, random=1) 568 | # 569 | # # Sum them up 570 | # train_config = TrainConfig(train_data_config=train_data_config, network_config=tf_network_config) 571 | # 572 | # test_config_list.append(TestConfig(model_id=train_config.id, test_data_config=test_data_config_1)) 573 | # test_config_list.append(TestConfig(model_id=train_config.id, test_data_config=test_data_config_2)) 574 | # test_config_list.append(TestConfig(model_id=train_config.id, test_data_config=test_data_config_3)) 575 | # 576 | # train_config_dict = {'train_config': train_config, 'test_config_list': test_config_list} 577 | # train_config_list.append(train_config_dict) 578 | # 579 | # for config in train_config_list: 580 | # 581 | # # TRAINING FIRST 582 | # 583 | # # Generate/retrieve the data 584 | # train_data = {} 585 | # validation_data = {} 586 | # train_data['X'], validation_data['X'], train_data['y'], validation_data[ 587 | # 'y'] = config['train_config'].train_data_config.retrieve_splitted_dataset() 588 | # 589 | # # Analyse the data sets 590 | # config['train_config'].train_data_analysis = Analyzer.X_y_data(train_data) 591 | # config['train_config'].valid_data_analysis = Analyzer.X_y_data(validation_data) 592 | # 593 | # # Run network / evaluate the model 594 | # tf.reset_default_graph() 595 | # m1 = Model(id=config['train_config'].id, is_training=True, 596 | # network_conf=config['train_config'].network_config, 597 | # train_data=train_data, 598 | # validation_data=validation_data) 599 | # history, config['train_config'].train_result, config['train_config'].validation_result = m1.train() 600 | # 601 | # # Save everything to CSV file 602 | # config['train_config'].dump_to_csv(os.path.join('output', 'tf_train04_3.csv')) 603 | # 604 | # # Create and save images 605 | # config['train_config'].dump_images(history) 606 | # 607 | # # TEST SECOND 608 | # 609 | # for test_config in config['test_config_list']: 610 | # test_data = {} 611 | # test_data['X'], test_data['y'] = test_config.test_data_config.retrieve_single_dataset() 612 | # 613 | # test_config.test_data_analysis = Analyzer.X_y_data(test_data) 614 | # 615 | # # test_config.test_result = m1.test(test_data=test_data) 616 | # tf.reset_default_graph() 617 | # test_config.test_result = Model(id=test_config.model_id, is_training=False, test_data=test_data).test() 618 | # 619 | # test_config.dump_to_csv(join('output', 'tf_test04_3.csv')) 620 | 621 | ############################################## COMPLETE DIFFERENT TEST PART ##################################################### 622 | else: 623 | test_config_list = [] 624 | 625 | test_data = {} 626 | model_ids = ['6542fb76-58dd-42cb-983e-edfc4b782fb6'] 627 | subset_sizes = [1] 628 | 629 | for model_id in model_ids: 630 | for subset_size in subset_sizes: 631 | test_data_config = DataConfig(main_data=power_grid_data, method_name='create_fdi_X3_y1_se', norm=True, 632 | atk_index=[0], subset_size=subset_size, n_atk_subsets=subset_size, 633 | timestep=1, c=0.2, random=1, ratio=0.7) 634 | 635 | test_config_list.append(TestConfig(model_id=model_id, test_data_config=test_data_config)) 636 | 637 | for config in test_config_list: 638 | test_data['X'], test_data['y'] = config.test_data_config.retrieve_single_dataset() 639 | 640 | config.test_data_analysis = Analyzer.X_y_data(test_data) 641 | 642 | config.test_result = Model(id=config.model_id, is_training=False, test_data=test_data).test() 643 | 644 | config.dump_to_csv(join('output', 'lala.csv')) 645 | 646 | 647 | if __name__ == '__main__': 648 | main() 649 | --------------------------------------------------------------------------------