├── PSM └── make_pk.py ├── SWaT └── make_pk.py ├── SMD └── make_pk.py ├── KPI └── make_pk.py ├── MSL └── make_pk.py ├── SMAP └── make_pk.py ├── WADI └── make_pk.py ├── README.md ├── NIPS-TS-SWAN └── data_loader.py └── NIPS-TS-GECCO └── data_loader.py /PSM/make_pk.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pk 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | pth = './' 8 | 9 | train_data = pd.read_csv('train.csv').to_numpy() 10 | test_data = pd.read_csv('test.csv').to_numpy() 11 | test_label = pd.read_csv('test_label.csv').to_numpy() 12 | 13 | print(' Dumping pickle files...') 14 | with open(pth + 'PSM.pk', 'wb') as file: 15 | pk.dump({'train_data': train_data[:, 1:], 'test_data': test_data[:, 1:], 'test_label': test_label[:, 1]}, file) 16 | 17 | print('done') -------------------------------------------------------------------------------- /SWaT/make_pk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import datetime as dt 4 | import pickle as pk 5 | 6 | pth = './' 7 | 8 | trn = pd.read_csv('SWaT_Dataset_Normal_v1.csv') 9 | tst = pd.read_csv('SWaT_Dataset_Attack_v0.csv') 10 | 11 | channels = trn.columns[1:-1] 12 | train_data = trn[trn.columns[1:-1]].to_numpy() 13 | test_data = tst[tst.columns[1:-1]].to_numpy() 14 | test_label = tst['Normal/Attack'].to_numpy() 15 | test_label[test_label == 'Normal'] = 0 16 | test_label[test_label == 'Attack'] = 1 17 | test_label[test_label == 'A ttack'] = 1 18 | lab_tst = np.array(test_label, dtype = int) 19 | 20 | 21 | with open(pth + 'SWaT.pk', 'wb') as file: 22 | pk.dump({'train_data': train_data, 'test_data': test_data, 'test_label': test_label}, file) 23 | 24 | print('done') 25 | -------------------------------------------------------------------------------- /SMD/make_pk.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle as pk 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | pth = './' 8 | # 定义自定义的排序键函数 9 | def natural_sort_key(s): 10 | import re 11 | return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', s)] 12 | 13 | # 对文件名列表进行自然排序 14 | ent_names = sorted(os.listdir(pth + 'train'), key=natural_sort_key) 15 | 16 | train_data, test_data, test_label = [], [], [] 17 | for ent_name in ent_names: 18 | train_data.append(pd.read_csv(pth + 'train/' + ent_name, header=None).to_numpy()) 19 | test_data.append(pd.read_csv(pth + 'test/' + ent_name, header=None).to_numpy()) 20 | test_label.append(np.squeeze(pd.read_csv(pth + 'test_label/' + ent_name, header=None).to_numpy())) 21 | 22 | print(' Dumping pickle files...') 23 | with open(pth + 'SMD.pk', 'wb') as file: 24 | pk.dump({'train_data': train_data, 'test_data': test_data, 'test_label': test_label}, file) 25 | 26 | print('done') -------------------------------------------------------------------------------- /KPI/make_pk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pickle as pk 4 | 5 | pth = './' 6 | 7 | df_train = pd.read_csv(pth + "phase2_train.csv") 8 | 9 | df_test = pd.read_hdf(pth + "phase2_ground_truth.hdf") 10 | df_test["KPI ID"] = df_test["KPI ID"].astype(str) 11 | 12 | name_dfs = df_train.groupby("KPI ID") 13 | train_data = [] 14 | train_timestamp = [] 15 | train_label = [] 16 | for name, df in name_dfs: 17 | train_data.append(df['value'].to_numpy().reshape(-1,1)) 18 | train_timestamp.append(df['timestamp'].to_numpy().reshape(-1,1)) 19 | train_label.append(df['label'].to_numpy().reshape(-1,1)) 20 | 21 | name_dfs = df_test.groupby("KPI ID") 22 | test_data = [] 23 | test_timestamp = [] 24 | test_label = [] 25 | for name, df in name_dfs: 26 | test_data.append(df['value'].to_numpy().reshape(-1,1)) 27 | test_timestamp.append(df['timestamp'].to_numpy().reshape(-1,1)) 28 | test_label.append(df['label'].to_numpy().reshape(-1,1)) 29 | 30 | 31 | print(' Dumping pickle files...') 32 | with open(pth + 'KPI' + '.pk', 'wb') as file: 33 | pk.dump({'train_data': train_data, 'train_timestamp': train_timestamp, 'train_label': train_label, 34 | 'test_data': test_data, 'test_timestamp': test_timestamp, 'test_label': test_label, }, file) 35 | 36 | print('Done') -------------------------------------------------------------------------------- /MSL/make_pk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pickle as pk 4 | 5 | pth = './' 6 | 7 | labeled_anomalies = pd.read_csv(pth + 'labeled_anomalies.csv') 8 | 9 | data_dims = {'SMAP': 25, 'MSL': 55} 10 | 11 | for smap_or_msl in ['MSL']: 12 | print(f'Creating dataset for {smap_or_msl}') 13 | train_data = [] 14 | test_data = [] 15 | test_label = [] 16 | total_anomaly_points = 0 17 | for i in range(len(labeled_anomalies)): 18 | print(f' -> {labeled_anomalies["chan_id"][i]} ({i+1} / {len(labeled_anomalies)})') 19 | if labeled_anomalies['spacecraft'][i] == smap_or_msl: 20 | # load corresponding .npy file in test and train 21 | np_trn = np.load(pth + 'train/' + labeled_anomalies['chan_id'][i] + '.npy') 22 | assert np_trn.shape[-1] == data_dims[smap_or_msl] 23 | print(np_trn) 24 | train_data.append(np_trn) 25 | 26 | np_tst = np.load(pth + 'test/' + labeled_anomalies['chan_id'][i] + '.npy') 27 | assert np_tst.shape[-1] == data_dims[smap_or_msl] 28 | test_data.append(np_tst) 29 | 30 | labs = labeled_anomalies['anomaly_sequences'][i] 31 | labs_s = labs.replace('[', '').replace(']', '').replace(' ', '').split(',') 32 | labs_i = [[int(labs_s[i]), int(labs_s[i+1])] for i in range(0, len(labs_s), 2)] 33 | 34 | assert labeled_anomalies['num_values'][i] == len(np_tst) 35 | y_lab = np.zeros(len(np_tst)) 36 | for sec in labs_i: 37 | y_lab[sec[0]:sec[1]] = 1 38 | total_anomaly_points += sec[1] - sec[0] 39 | test_label.append(y_lab) 40 | 41 | print(' There are a total of', total_anomaly_points, 'anomaly points') 42 | print(' Dumping pickle files...') 43 | with open(pth + smap_or_msl + '.pk', 'wb') as file: 44 | pk.dump({'train_data': train_data, 'test_data': test_data, 'test_label': test_label}, file) 45 | 46 | print('Done') 47 | 48 | -------------------------------------------------------------------------------- /SMAP/make_pk.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pickle as pk 4 | 5 | pth = './' 6 | 7 | labeled_anomalies = pd.read_csv(pth + 'labeled_anomalies.csv') 8 | 9 | data_dims = {'SMAP': 25, 'MSL': 55} 10 | 11 | for smap_or_msl in ['SMAP']: 12 | print(f'Creating dataset for {smap_or_msl}') 13 | train_data = [] 14 | test_data = [] 15 | test_label = [] 16 | total_anomaly_points = 0 17 | for i in range(len(labeled_anomalies)): 18 | print(f' -> {labeled_anomalies["chan_id"][i]} ({i+1} / {len(labeled_anomalies)})') 19 | if labeled_anomalies['spacecraft'][i] == smap_or_msl: 20 | # load corresponding .npy file in test and train 21 | np_trn = np.load(pth + 'train/' + labeled_anomalies['chan_id'][i] + '.npy') 22 | assert np_trn.shape[-1] == data_dims[smap_or_msl] 23 | print(np_trn) 24 | train_data.append(np_trn) 25 | 26 | np_tst = np.load(pth + 'test/' + labeled_anomalies['chan_id'][i] + '.npy') 27 | assert np_tst.shape[-1] == data_dims[smap_or_msl] 28 | test_data.append(np_tst) 29 | 30 | labs = labeled_anomalies['anomaly_sequences'][i] 31 | labs_s = labs.replace('[', '').replace(']', '').replace(' ', '').split(',') 32 | labs_i = [[int(labs_s[i]), int(labs_s[i+1])] for i in range(0, len(labs_s), 2)] 33 | 34 | assert labeled_anomalies['num_values'][i] == len(np_tst) 35 | y_lab = np.zeros(len(np_tst)) 36 | for sec in labs_i: 37 | y_lab[sec[0]:sec[1]] = 1 38 | total_anomaly_points += sec[1] - sec[0] 39 | test_label.append(y_lab) 40 | 41 | print(' There are a total of', total_anomaly_points, 'anomaly points') 42 | print(' Dumping pickle files...') 43 | with open(pth + smap_or_msl + '.pk', 'wb') as file: 44 | pk.dump({'train_data': train_data, 'test_data': test_data, 'test_label': test_label}, file) 45 | 46 | print('Done') 47 | 48 | -------------------------------------------------------------------------------- /WADI/make_pk.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import pickle as pk 4 | import datetime as dt 5 | 6 | print('read csv files') 7 | print('Note that this study takes the 2017 data') 8 | trn = pd.read_csv('WADI_14days.csv', skiprows=3) 9 | tst = pd.read_csv('WADI_attackdata.csv') 10 | 11 | print('shorten column labels and separate labels') 12 | # shorten column labels 13 | cols = trn.columns.to_numpy() 14 | target_str = '\\\\WIN-25J4RO10SBF\\LOG_DATA\\SUTD_WADI\\LOG_DATA\\' 15 | for i in range(len(cols)): 16 | if target_str in cols[i]: 17 | cols[i] = cols[i][len(target_str):] 18 | trn.columns = cols 19 | lab_tst = tst[tst.columns[-1]].to_numpy() 20 | 21 | assert len(set(lab_tst)) == 2 22 | 23 | tst = tst.drop(columns = [tst.columns[-1]]) 24 | tst.columns = cols 25 | 26 | print('drop columns and rows') 27 | # drop Row, Date, Time 28 | trn = trn[cols[3:]] 29 | tst = tst[cols[3:]] 30 | cols = cols[3:] 31 | 32 | # drop columns that have excessive NaNs 33 | drop_cols = cols[np.isnan(trn.to_numpy()).sum(axis=0) > len(trn) // 2] 34 | tst = tst.drop(columns=drop_cols) 35 | trn = trn.drop(columns=drop_cols) 36 | 37 | # convert to numpy array 38 | print('convert to numpy array') 39 | trn_np = trn.to_numpy() 40 | tst_np = tst.to_numpy() 41 | cols = trn.columns.to_numpy() 42 | 43 | # fill NAs 44 | print('fill NAs for trn') 45 | nanlist = np.isnan(trn_np).sum(axis=0) 46 | print(nanlist) 47 | for j, nancnt in enumerate(nanlist): 48 | if nancnt > 0: 49 | for i in range(len(trn_np)): 50 | if np.isnan(trn_np[i, j]): 51 | trn_np[i, j] = trn_np[i-1, j] 52 | nancnt -= 1 53 | if nancnt == 0: 54 | break 55 | assert np.isnan(trn_np).sum() == 0 and np.isnan(tst_np).sum() == 0 56 | 57 | print('save to pickle file') 58 | with open('WADI.pk', 'wb') as file: 59 | pk.dump({'x_trn': trn_np, 'x_tst': tst_np, 'lab_tst': lab_tst, 'cols': cols}, file) 60 | 61 | print('done, final x_trn, x_tst, lab_tst shape: ', trn_np.shape, tst_np.shape, lab_tst.shape) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Visual Dataset 2 | 3 | ### 获取数据集 4 | 5 | #### SWaT 数据集 WADI 数据集 6 | 可以通过填写以下表格来获取 SWaT 和 WADI 数据集: 7 | 8 | https://docs.google.com/forms/d/1GOLYXa7TX0KlayqugUOOPMvbcwSQiGNMOjHuNqKcieA/viewform?edit_requested=true 9 | 10 | 11 | 12 | #### PSM 数据集 13 | 14 | 数据集可在以下位置下载: 15 | 16 | https://github.com/eBay/RANSynCoders/tree/main/data 17 | 18 | 19 | 20 | #### SMD 数据集 21 | 22 | 数据集可在以下位置下载: 23 | 24 | https://github.com/NetManAIOps/OmniAnomaly/tree/master/ServerMachineDataset 25 | 26 | 27 | 28 | #### MSL 和 SMAP 数据集 29 | 30 | 数据集可通过以下方式下载 31 | 32 | labeled_anomalies.csv:数据处理和两个航天器数据分离依靠此文件 33 | 34 | 35 | ``` 36 | wget https://s3-us-west-2.amazonaws.com/telemanom/data.zip 37 | wget https://raw.githubusercontent.com/khundman/telemanom/master/labeled_anomalies.csv 38 | ``` 39 | 40 | 41 | 42 | #### NIPS-TS-GECCO 和 NIPS-TS-SWAN 数据集 43 | 44 | 数据集可通过以下方式下载 45 | 46 | https://drive.google.com/drive/folders/1RaIJQ8esoWuhyphhmMaH-VCDh-WIluRR?usp=sharing 47 | 48 | 49 | 50 | 51 | #### UCR 数据集 52 | 53 | 数据集可通过以下方式下载 54 | 55 | https://drive.google.com/drive/folders/1RaIJQ8esoWuhyphhmMaH-VCDh-WIluRR?usp=sharing 56 | 57 | 58 | 59 | #### KPI 数据集 60 | 61 | 数据集可通过以下方式下载 62 | 63 | https://smileyan.lanzoul.com/ixpcU03lp97g 64 | 65 | 66 | 67 | 68 | ### 处理数据集 -(贴处理代码+部署说明) 69 | 70 | 下载数据集放进对应文件夹,运行make_pk.py 71 | 72 | 运行 visual_dataset.ipynb 修改想看的数据集名称,运行查看 73 | 74 | ### 数据洞察 75 | 76 | **SWaT(安全水处理):** SWaT 数据集是在 11 天内从具有 51 个传感器的小型水处理测试台收集的。 在过去 4 天内,使用不同的攻击方法注入了 41 个异常,而在前 7 天内仅生成正常数据。 77 | 78 | **WADI(水分配测试台):** WADI 数据集是从一个精简的城市供水系统获取的,该系统有 123 个传感器和执行器,运行了 16 天。 前 14 天仅包含正常数据,其余两天有 15 个异常段。 79 | 80 | **PSM(池化服务器指标):** PSM 数据集是从 eBay 的多个应用程序服务器节点内部收集的。 有 13 周的训练数据和 8 周的测试数据。 81 | 82 | **MSL(火星科学实验室)和SMAP(土壤湿度主动被动):** MSL和SMAP数据集是NASA收集的公共数据集,包含来自航天器监测事件意外异常(ISA)报告的遥测异常数据系统。 数据集分别有 55 和 25 维。 训练集包含未标记的异常。 83 | 84 | **SMD(服务器机器数据集):** SMD 是从一家大型互联网公司收集的,包含来自 28 台服务器机器和 38 个传感器的 5 周数据。 前 5 天仅包含正常数据,最后 5 天间歇性注入异常数据。 85 | 86 | **trimSyn(修剪合成数据集):** 原始合成数据集是使用三角函数和高斯噪声生成的。 获取数据集并修剪测试数据集,使得仅存在一段异常。 87 | 88 | **NIPS-TS-GECCO 和 NIPS-TS-SWAN :** NIPS-TS-GECCO从Spaceweet HMI活动区域面片系列中的太阳光球层矢量磁图中提取,极低的异常率,高难度异常检测数据集。NIPS-TS-SWAN是“物联网”的饮用水质量数据集,极低的异常率,高难度异常检测数据集 89 | 90 | **UCR:** 由KDD2021的多数据集时间序列异常检测大赛提供,包含来自各种自然来源的250个子数据集它是数据集子序列异常的单变量时间序列。 91 | 92 | **KPI:** KPI来自五大互联网公司(搜狗、eBay、b百度、腾讯、阿里)。 -------------------------------------------------------------------------------- /NIPS-TS-SWAN/data_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import random 4 | from torch.utils.data import Dataset 5 | from torch.utils.data import DataLoader 6 | from PIL import Image 7 | import numpy as np 8 | import collections 9 | import numbers 10 | import math 11 | import pandas as pd 12 | from sklearn.preprocessing import StandardScaler 13 | import pickle 14 | 15 | 16 | class NIPS_TS_WaterSegLoader(object): 17 | def __init__(self, data_path, win_size, step, mode="train"): 18 | self.mode = mode 19 | self.step = step 20 | self.win_size = win_size 21 | self.scaler = StandardScaler() 22 | data = np.load(data_path + "/NIPS_TS_Water_train.npy") 23 | self.scaler.fit(data) 24 | data = self.scaler.transform(data) 25 | test_data = np.load(data_path + "/NIPS_TS_Water_test.npy") 26 | self.test = self.scaler.transform(test_data) 27 | 28 | self.train = data 29 | self.val = self.test 30 | self.test_labels = np.load(data_path + "/NIPS_TS_Water_test_label.npy") 31 | print("test:", self.test.shape) 32 | print("train:", self.train.shape) 33 | 34 | def __len__(self): 35 | 36 | if self.mode == "train": 37 | return (self.train.shape[0] - self.win_size) // self.step + 1 38 | elif (self.mode == 'val'): 39 | return (self.val.shape[0] - self.win_size) // self.step + 1 40 | elif (self.mode == 'test'): 41 | return (self.test.shape[0] - self.win_size) // self.step + 1 42 | else: 43 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 44 | 45 | def __getitem__(self, index): 46 | index = index * self.step 47 | if self.mode == "train": 48 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 49 | elif (self.mode == 'val'): 50 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 51 | elif (self.mode == 'test'): 52 | return np.float32(self.test[index:index + self.win_size]), np.float32( 53 | self.test_labels[index:index + self.win_size]) 54 | else: 55 | return np.float32(self.test[ 56 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 57 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 58 | 59 | 60 | 61 | class NIPS_TS_SwanSegLoader(object): 62 | def __init__(self, data_path, win_size, step, mode="train"): 63 | self.mode = mode 64 | self.step = step 65 | self.win_size = win_size 66 | self.scaler = StandardScaler() 67 | data = np.load(data_path + "/NIPS_TS_Swan_train.npy") 68 | self.scaler.fit(data) 69 | data = self.scaler.transform(data) 70 | test_data = np.load(data_path + "/NIPS_TS_Swan_test.npy") 71 | self.test = self.scaler.transform(test_data) 72 | 73 | self.train = data 74 | self.val = self.test 75 | self.test_labels = np.load(data_path + "/NIPS_TS_Swan_test_label.npy") 76 | print("test:", self.test.shape) 77 | print("train:", self.train.shape) 78 | 79 | def __len__(self): 80 | if self.mode == "train": 81 | return (self.train.shape[0] - self.win_size) // self.step + 1 82 | elif (self.mode == 'val'): 83 | return (self.val.shape[0] - self.win_size) // self.step + 1 84 | elif (self.mode == 'test'): 85 | return (self.test.shape[0] - self.win_size) // self.step + 1 86 | else: 87 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 88 | 89 | def __getitem__(self, index): 90 | index = index * self.step 91 | if self.mode == "train": 92 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 93 | elif (self.mode == 'val'): 94 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 95 | elif (self.mode == 'test'): 96 | return np.float32(self.test[index:index + self.win_size]), np.float32( 97 | self.test_labels[index:index + self.win_size]) 98 | else: 99 | return np.float32(self.test[ 100 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 101 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 102 | 103 | 104 | class NIPS_TS_CCardSegLoader(object): 105 | def __init__(self, data_path, win_size, step, mode="train"): 106 | self.mode = mode 107 | self.step = step 108 | self.win_size = win_size 109 | self.scaler = StandardScaler() 110 | data = np.load(data_path + "/NIPS_TS_CCard_train.npy") 111 | self.scaler.fit(data) 112 | data = self.scaler.transform(data) 113 | test_data = np.load(data_path + "/NIPS_TS_CCard_test.npy") 114 | self.test = self.scaler.transform(test_data) 115 | 116 | self.train = data 117 | self.val = self.test 118 | self.test_labels = np.load(data_path + "/NIPS_TS_CCard_test_label.npy") 119 | 120 | def __len__(self): 121 | 122 | if self.mode == "train": 123 | return (self.train.shape[0] - self.win_size) // self.step + 1 124 | elif (self.mode == 'val'): 125 | return (self.val.shape[0] - self.win_size) // self.step + 1 126 | elif (self.mode == 'test'): 127 | return (self.test.shape[0] - self.win_size) // self.step + 1 128 | else: 129 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 130 | 131 | def __getitem__(self, index): 132 | index = index * self.step 133 | if self.mode == "train": 134 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 135 | elif (self.mode == 'val'): 136 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 137 | elif (self.mode == 'test'): 138 | return np.float32(self.test[index:index + self.win_size]), np.float32( 139 | self.test_labels[index:index + self.win_size]) 140 | else: 141 | return np.float32(self.test[ 142 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 143 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 144 | 145 | 146 | 147 | 148 | 149 | 150 | class SWATSegLoader(Dataset): 151 | def __init__(self, root_path, win_size, step=1, flag="train"): 152 | self.flag = flag 153 | self.step = step 154 | self.win_size = win_size 155 | self.scaler = StandardScaler() 156 | 157 | train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv')) 158 | test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv')) 159 | labels = test_data.values[:, -1:] 160 | train_data = train_data.values[:, :-1] 161 | test_data = test_data.values[:, :-1] 162 | 163 | self.scaler.fit(train_data) 164 | train_data = self.scaler.transform(train_data) 165 | test_data = self.scaler.transform(test_data) 166 | self.train = train_data 167 | self.test = test_data 168 | data_len = len(self.train) 169 | self.val = self.train[(int)(data_len * 0.8):] 170 | self.test_labels = labels 171 | print("test:", self.test.shape) 172 | print("train:", self.train.shape) 173 | 174 | def __len__(self): 175 | """ 176 | Number of images in the object dataset. 177 | """ 178 | if self.flag == "train": 179 | return (self.train.shape[0] - self.win_size) // self.step + 1 180 | elif (self.flag == 'val'): 181 | return (self.val.shape[0] - self.win_size) // self.step + 1 182 | elif (self.flag == 'test'): 183 | return (self.test.shape[0] - self.win_size) // self.step + 1 184 | else: 185 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 186 | 187 | def __getitem__(self, index): 188 | index = index * self.step 189 | if self.flag == "train": 190 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 191 | elif (self.flag == 'val'): 192 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 193 | elif (self.flag == 'test'): 194 | return np.float32(self.test[index:index + self.win_size]), np.float32( 195 | self.test_labels[index:index + self.win_size]) 196 | else: 197 | return np.float32(self.test[ 198 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 199 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 200 | 201 | 202 | def get_loader_segment(index, data_path, batch_size, win_size=100, step=100, mode='train'): 203 | if (dataset =='SWAT'): 204 | dataset = SWATSegLoader(data_path,win_size,1,mode) 205 | elif (dataset == 'NIPS_TS_Water'): 206 | dataset = NIPS_TS_WaterSegLoader(data_path, win_size, 1, mode) 207 | elif (dataset == 'NIPS_TS_Swan'): 208 | dataset = NIPS_TS_SwanSegLoader(data_path, win_size, 1, mode) 209 | elif (dataset == 'NIPS_TS_CCard'): 210 | dataset = NIPS_TS_CCardSegLoader(data_path, win_size, 1, mode) 211 | shuffle = False 212 | if mode == 'train': 213 | shuffle = True 214 | 215 | data_loader = DataLoader(dataset=dataset, 216 | batch_size=batch_size, 217 | shuffle=shuffle, 218 | num_workers=8, 219 | drop_last=True) 220 | return data_loader 221 | -------------------------------------------------------------------------------- /NIPS-TS-GECCO/data_loader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import random 4 | from torch.utils.data import Dataset 5 | from torch.utils.data import DataLoader 6 | from PIL import Image 7 | import numpy as np 8 | import collections 9 | import numbers 10 | import math 11 | import pandas as pd 12 | from sklearn.preprocessing import StandardScaler 13 | import pickle 14 | 15 | 16 | class NIPS_TS_WaterSegLoader(object): 17 | def __init__(self, data_path, win_size, step, mode="train"): 18 | self.mode = mode 19 | self.step = step 20 | self.win_size = win_size 21 | self.scaler = StandardScaler() 22 | data = np.load(data_path + "/NIPS_TS_Water_train.npy") 23 | self.scaler.fit(data) 24 | data = self.scaler.transform(data) 25 | test_data = np.load(data_path + "/NIPS_TS_Water_test.npy") 26 | self.test = self.scaler.transform(test_data) 27 | 28 | self.train = data 29 | self.val = self.test 30 | self.test_labels = np.load(data_path + "/NIPS_TS_Water_test_label.npy") 31 | print("test:", self.test.shape) 32 | print("train:", self.train.shape) 33 | 34 | def __len__(self): 35 | 36 | if self.mode == "train": 37 | return (self.train.shape[0] - self.win_size) // self.step + 1 38 | elif (self.mode == 'val'): 39 | return (self.val.shape[0] - self.win_size) // self.step + 1 40 | elif (self.mode == 'test'): 41 | return (self.test.shape[0] - self.win_size) // self.step + 1 42 | else: 43 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 44 | 45 | def __getitem__(self, index): 46 | index = index * self.step 47 | if self.mode == "train": 48 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 49 | elif (self.mode == 'val'): 50 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 51 | elif (self.mode == 'test'): 52 | return np.float32(self.test[index:index + self.win_size]), np.float32( 53 | self.test_labels[index:index + self.win_size]) 54 | else: 55 | return np.float32(self.test[ 56 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 57 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 58 | 59 | 60 | 61 | class NIPS_TS_SwanSegLoader(object): 62 | def __init__(self, data_path, win_size, step, mode="train"): 63 | self.mode = mode 64 | self.step = step 65 | self.win_size = win_size 66 | self.scaler = StandardScaler() 67 | data = np.load(data_path + "/NIPS_TS_Swan_train.npy") 68 | self.scaler.fit(data) 69 | data = self.scaler.transform(data) 70 | test_data = np.load(data_path + "/NIPS_TS_Swan_test.npy") 71 | self.test = self.scaler.transform(test_data) 72 | 73 | self.train = data 74 | self.val = self.test 75 | self.test_labels = np.load(data_path + "/NIPS_TS_Swan_test_label.npy") 76 | print("test:", self.test.shape) 77 | print("train:", self.train.shape) 78 | 79 | def __len__(self): 80 | if self.mode == "train": 81 | return (self.train.shape[0] - self.win_size) // self.step + 1 82 | elif (self.mode == 'val'): 83 | return (self.val.shape[0] - self.win_size) // self.step + 1 84 | elif (self.mode == 'test'): 85 | return (self.test.shape[0] - self.win_size) // self.step + 1 86 | else: 87 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 88 | 89 | def __getitem__(self, index): 90 | index = index * self.step 91 | if self.mode == "train": 92 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 93 | elif (self.mode == 'val'): 94 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 95 | elif (self.mode == 'test'): 96 | return np.float32(self.test[index:index + self.win_size]), np.float32( 97 | self.test_labels[index:index + self.win_size]) 98 | else: 99 | return np.float32(self.test[ 100 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 101 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 102 | 103 | 104 | class NIPS_TS_CCardSegLoader(object): 105 | def __init__(self, data_path, win_size, step, mode="train"): 106 | self.mode = mode 107 | self.step = step 108 | self.win_size = win_size 109 | self.scaler = StandardScaler() 110 | data = np.load(data_path + "/NIPS_TS_CCard_train.npy") 111 | self.scaler.fit(data) 112 | data = self.scaler.transform(data) 113 | test_data = np.load(data_path + "/NIPS_TS_CCard_test.npy") 114 | self.test = self.scaler.transform(test_data) 115 | 116 | self.train = data 117 | self.val = self.test 118 | self.test_labels = np.load(data_path + "/NIPS_TS_CCard_test_label.npy") 119 | 120 | def __len__(self): 121 | 122 | if self.mode == "train": 123 | return (self.train.shape[0] - self.win_size) // self.step + 1 124 | elif (self.mode == 'val'): 125 | return (self.val.shape[0] - self.win_size) // self.step + 1 126 | elif (self.mode == 'test'): 127 | return (self.test.shape[0] - self.win_size) // self.step + 1 128 | else: 129 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 130 | 131 | def __getitem__(self, index): 132 | index = index * self.step 133 | if self.mode == "train": 134 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 135 | elif (self.mode == 'val'): 136 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 137 | elif (self.mode == 'test'): 138 | return np.float32(self.test[index:index + self.win_size]), np.float32( 139 | self.test_labels[index:index + self.win_size]) 140 | else: 141 | return np.float32(self.test[ 142 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 143 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 144 | 145 | 146 | 147 | 148 | 149 | 150 | class SWATSegLoader(Dataset): 151 | def __init__(self, root_path, win_size, step=1, flag="train"): 152 | self.flag = flag 153 | self.step = step 154 | self.win_size = win_size 155 | self.scaler = StandardScaler() 156 | 157 | train_data = pd.read_csv(os.path.join(root_path, 'swat_train2.csv')) 158 | test_data = pd.read_csv(os.path.join(root_path, 'swat2.csv')) 159 | labels = test_data.values[:, -1:] 160 | train_data = train_data.values[:, :-1] 161 | test_data = test_data.values[:, :-1] 162 | 163 | self.scaler.fit(train_data) 164 | train_data = self.scaler.transform(train_data) 165 | test_data = self.scaler.transform(test_data) 166 | self.train = train_data 167 | self.test = test_data 168 | data_len = len(self.train) 169 | self.val = self.train[(int)(data_len * 0.8):] 170 | self.test_labels = labels 171 | print("test:", self.test.shape) 172 | print("train:", self.train.shape) 173 | 174 | def __len__(self): 175 | """ 176 | Number of images in the object dataset. 177 | """ 178 | if self.flag == "train": 179 | return (self.train.shape[0] - self.win_size) // self.step + 1 180 | elif (self.flag == 'val'): 181 | return (self.val.shape[0] - self.win_size) // self.step + 1 182 | elif (self.flag == 'test'): 183 | return (self.test.shape[0] - self.win_size) // self.step + 1 184 | else: 185 | return (self.test.shape[0] - self.win_size) // self.win_size + 1 186 | 187 | def __getitem__(self, index): 188 | index = index * self.step 189 | if self.flag == "train": 190 | return np.float32(self.train[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 191 | elif (self.flag == 'val'): 192 | return np.float32(self.val[index:index + self.win_size]), np.float32(self.test_labels[0:self.win_size]) 193 | elif (self.flag == 'test'): 194 | return np.float32(self.test[index:index + self.win_size]), np.float32( 195 | self.test_labels[index:index + self.win_size]) 196 | else: 197 | return np.float32(self.test[ 198 | index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]), np.float32( 199 | self.test_labels[index // self.step * self.win_size:index // self.step * self.win_size + self.win_size]) 200 | 201 | 202 | def get_loader_segment(index, data_path, batch_size, win_size=100, step=100, mode='train'): 203 | if (dataset =='SWAT'): 204 | dataset = SWATSegLoader(data_path,win_size,1,mode) 205 | elif (dataset == 'NIPS_TS_Water'): 206 | dataset = NIPS_TS_WaterSegLoader(data_path, win_size, 1, mode) 207 | elif (dataset == 'NIPS_TS_Swan'): 208 | dataset = NIPS_TS_SwanSegLoader(data_path, win_size, 1, mode) 209 | elif (dataset == 'NIPS_TS_CCard'): 210 | dataset = NIPS_TS_CCardSegLoader(data_path, win_size, 1, mode) 211 | shuffle = False 212 | if mode == 'train': 213 | shuffle = True 214 | 215 | data_loader = DataLoader(dataset=dataset, 216 | batch_size=batch_size, 217 | shuffle=shuffle, 218 | num_workers=8, 219 | drop_last=True) 220 | return data_loader 221 | --------------------------------------------------------------------------------