├── .gitignore
├── Datasets
    ├── File_List
    │   ├── TSB-AD-M-Eva.csv
    │   ├── TSB-AD-M-Tuning.csv
    │   ├── TSB-AD-M.csv
    │   ├── TSB-AD-U-Eva-Full.csv
    │   ├── TSB-AD-U-Eva.csv
    │   ├── TSB-AD-U-Tuning.csv
    │   └── TSB-AD-U.csv
    ├── README.md
    ├── TSB-AD-M
    │   └── 057_SMD_id_1_Facility_tr_4529_1st_4629.csv
    └── TSB-AD-U
    │   └── 001_NAB_id_1_Facility_tr_1007_1st_2014.csv
├── LICENSE
├── README.md
├── TSB_AD
    ├── HP_list.py
    ├── __init__.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── affiliation
    │   │   ├── __init__.py
    │   │   ├── _affiliation_zone.py
    │   │   ├── _integral_interval.py
    │   │   ├── _single_ground_truth_event.py
    │   │   ├── generics.py
    │   │   └── metrics.py
    │   ├── basic_metrics.py
    │   ├── metrics.py
    │   └── visualize.py
    ├── main.py
    ├── model_wrapper.py
    ├── models
    │   ├── AE.py
    │   ├── AnomalyTransformer.py
    │   ├── CBLOF.py
    │   ├── CNN.py
    │   ├── COF.py
    │   ├── COPOD.py
    │   ├── Chronos.py
    │   ├── Donut.py
    │   ├── EIF.py
    │   ├── FFT.py
    │   ├── FITS.py
    │   ├── HBOS.py
    │   ├── IForest.py
    │   ├── KMeansAD.py
    │   ├── KNN.py
    │   ├── LOF.py
    │   ├── LSTMAD.py
    │   ├── Lag_Llama.py
    │   ├── Left_STAMPi.py
    │   ├── M2N2.py
    │   ├── MCD.py
    │   ├── MOMENT.py
    │   ├── MatrixProfile.py
    │   ├── NormA.txt
    │   ├── OCSVM.py
    │   ├── OFA.py
    │   ├── OmniAnomaly.py
    │   ├── PCA.py
    │   ├── POLY.py
    │   ├── README.md
    │   ├── RobustPCA.py
    │   ├── SAND.py
    │   ├── SR.py
    │   ├── Series2Graph.txt
    │   ├── TimesFM.py
    │   ├── TimesNet.py
    │   ├── TranAD.py
    │   ├── USAD.py
    │   ├── __init__.py
    │   ├── base.py
    │   ├── distance.py
    │   └── feature.py
    └── utils
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── slidingWindows.py
    │   ├── stat_models.py
    │   ├── torch_utility.py
    │   └── utility.py
├── assets
    └── fig
    │   ├── readme_title.png
    │   ├── readme_title_2.png
    │   └── tsb_overview.png
├── benchmark_exp
    ├── HP_Tuning_M.py
    ├── HP_Tuning_U.py
    ├── README.md
    ├── Run_Custom_Detector.py
    ├── Run_Detector_M.py
    ├── Run_Detector_U.py
    ├── analysis.ipynb
    ├── benchmark_eval_results
    │   ├── README.md
    │   ├── multi_mergedTable_VUS-PR.csv
    │   └── uni_mergedTable_VUS-PR.csv
    └── visualize_ts.ipynb
├── docs
    ├── index.html
    └── static
    │   ├── .DS_Store
    │   ├── css
    │       ├── bulma-carousel.min.css
    │       ├── bulma-slider.min.css
    │       ├── bulma.css.map.txt
    │       ├── bulma.min.css
    │       ├── fontawesome.all.min.css
    │       └── index.css
    │   ├── images
    │       ├── .DS_Store
    │       ├── elephant.svg
    │       └── tsb_overview.png
    │   ├── js
    │       ├── bulma-carousel.js
    │       ├── bulma-carousel.min.js
    │       ├── bulma-slider.js
    │       ├── bulma-slider.min.js
    │       ├── fontawesome.all.min.js
    │       └── index.js
    │   ├── leaderboard
    │       ├── TSB-AD-M.html
    │       └── TSB-AD-U.html
    │   └── pdfs
    │       └── TSB-AD-NeurIPS24.pdf
├── requirements.txt
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.egg-info
3 | build
4 | run_scripts
5 | Datasets/TSB-AD-Datasets
6 | TSB_AD/models/NormA.py
7 | TSB_AD/models/Series2Graph.py


--------------------------------------------------------------------------------
/Datasets/File_List/TSB-AD-M-Eva.csv:
--------------------------------------------------------------------------------
  1 | file_name
  2 | 001_Genesis_id_1_Sensor_tr_4055_1st_15538.csv
  3 | 002_MSL_id_1_Sensor_tr_500_1st_900.csv
  4 | 003_MSL_id_2_Sensor_tr_883_1st_1238.csv
  5 | 005_MSL_id_4_Sensor_tr_855_1st_2700.csv
  6 | 006_MSL_id_5_Sensor_tr_1150_1st_1250.csv
  7 | 007_MSL_id_6_Sensor_tr_980_1st_3550.csv
  8 | 008_MSL_id_7_Sensor_tr_656_1st_1630.csv
  9 | 009_MSL_id_8_Sensor_tr_714_1st_1390.csv
 10 | 010_MSL_id_9_Sensor_tr_554_1st_1172.csv
 11 | 012_MSL_id_11_Sensor_tr_539_1st_940.csv
 12 | 013_MSL_id_12_Sensor_tr_554_1st_1200.csv
 13 | 014_MSL_id_13_Sensor_tr_1525_1st_4575.csv
 14 | 015_MSL_id_14_Sensor_tr_575_1st_1250.csv
 15 | 016_MSL_id_15_Sensor_tr_500_1st_780.csv
 16 | 017_MSL_id_16_Sensor_tr_512_1st_1850.csv
 17 | 018_Daphnet_id_1_HumanActivity_tr_9693_1st_20732.csv
 18 | 019_MITDB_id_1_Medical_tr_37500_1st_103211.csv
 19 | 020_MITDB_id_2_Medical_tr_50000_1st_52315.csv
 20 | 021_MITDB_id_3_Medical_tr_50000_1st_57595.csv
 21 | 022_MITDB_id_4_Medical_tr_50000_1st_54253.csv
 22 | 024_MITDB_id_6_Medical_tr_50000_1st_58118.csv
 23 | 025_MITDB_id_7_Medical_tr_37500_1st_88864.csv
 24 | 026_MITDB_id_8_Medical_tr_30495_1st_30595.csv
 25 | 027_MITDB_id_9_Medical_tr_25000_1st_52255.csv
 26 | 029_MITDB_id_11_Medical_tr_50000_1st_518037.csv
 27 | 030_MITDB_id_12_Medical_tr_50000_1st_84572.csv
 28 | 031_MITDB_id_13_Medical_tr_50000_1st_79433.csv
 29 | 032_GHL_id_1_Sensor_tr_50000_1st_65001.csv
 30 | 033_GHL_id_2_Sensor_tr_50000_1st_51001.csv
 31 | 034_GHL_id_3_Sensor_tr_50000_1st_122001.csv
 32 | 035_GHL_id_4_Sensor_tr_50000_1st_90001.csv
 33 | 036_GHL_id_5_Sensor_tr_50000_1st_67147.csv
 34 | 037_GHL_id_6_Sensor_tr_50000_1st_80001.csv
 35 | 038_GHL_id_7_Sensor_tr_50000_1st_100001.csv
 36 | 039_GHL_id_8_Sensor_tr_50000_1st_63030.csv
 37 | 041_GHL_id_10_Sensor_tr_50000_1st_57001.csv
 38 | 042_GHL_id_11_Sensor_tr_50000_1st_150001.csv
 39 | 043_GHL_id_12_Sensor_tr_39938_1st_40038.csv
 40 | 044_GHL_id_13_Sensor_tr_50000_1st_145001.csv
 41 | 045_GHL_id_14_Sensor_tr_50000_1st_85076.csv
 42 | 046_GHL_id_15_Sensor_tr_50000_1st_156462.csv
 43 | 047_GHL_id_16_Sensor_tr_50000_1st_77001.csv
 44 | 048_GHL_id_17_Sensor_tr_50000_1st_154001.csv
 45 | 050_GHL_id_19_Sensor_tr_43750_1st_55001.csv
 46 | 051_GHL_id_20_Sensor_tr_50000_1st_75110.csv
 47 | 052_GHL_id_21_Sensor_tr_50000_1st_98001.csv
 48 | 053_GHL_id_22_Sensor_tr_50000_1st_126448.csv
 49 | 054_GHL_id_23_Sensor_tr_50000_1st_135001.csv
 50 | 055_GHL_id_24_Sensor_tr_50000_1st_118124.csv
 51 | 056_GHL_id_25_Sensor_tr_50000_1st_105568.csv
 52 | 057_SMD_id_1_Facility_tr_4529_1st_4629.csv
 53 | 058_SMD_id_2_Facility_tr_1087_1st_1187.csv
 54 | 059_SMD_id_3_Facility_tr_757_1st_857.csv
 55 | 060_SMD_id_4_Facility_tr_7176_1st_10609.csv
 56 | 061_SMD_id_5_Facility_tr_7176_1st_15144.csv
 57 | 063_SMD_id_7_Facility_tr_5923_1st_6506.csv
 58 | 064_SMD_id_8_Facility_tr_2272_1st_2372.csv
 59 | 065_SMD_id_9_Facility_tr_737_1st_837.csv
 60 | 066_SMD_id_10_Facility_tr_2634_1st_2734.csv
 61 | 067_SMD_id_11_Facility_tr_1980_1st_2080.csv
 62 | 068_SMD_id_12_Facility_tr_1099_1st_1199.csv
 63 | 069_SMD_id_13_Facility_tr_5925_1st_12760.csv
 64 | 070_SMD_id_14_Facility_tr_1070_1st_1170.csv
 65 | 071_SMD_id_15_Facility_tr_1109_1st_1209.csv
 66 | 073_SMD_id_17_Facility_tr_5926_1st_10620.csv
 67 | 074_SMD_id_18_Facility_tr_7174_1st_21230.csv
 68 | 075_SMD_id_19_Facility_tr_564_1st_664.csv
 69 | 076_SMD_id_20_Facility_tr_5925_1st_17580.csv
 70 | 077_SMD_id_21_Facility_tr_3026_1st_3126.csv
 71 | 078_SMD_id_22_Facility_tr_500_1st_326.csv
 72 | 079_LTDB_id_1_Medical_tr_3618_1st_3718.csv
 73 | 080_LTDB_id_2_Medical_tr_500_1st_266.csv
 74 | 081_LTDB_id_3_Medical_tr_500_1st_26.csv
 75 | 083_LTDB_id_5_Medical_tr_15502_1st_15602.csv
 76 | 084_SVDB_id_1_Medical_tr_12973_1st_13073.csv
 77 | 085_SVDB_id_2_Medical_tr_50000_1st_54982.csv
 78 | 086_SVDB_id_3_Medical_tr_533_1st_633.csv
 79 | 087_SVDB_id_4_Medical_tr_5421_1st_5521.csv
 80 | 088_SVDB_id_5_Medical_tr_500_1st_168.csv
 81 | 089_SVDB_id_6_Medical_tr_50000_1st_83083.csv
 82 | 091_SVDB_id_8_Medical_tr_500_1st_126.csv
 83 | 092_SVDB_id_9_Medical_tr_2674_1st_2774.csv
 84 | 093_SVDB_id_10_Medical_tr_500_1st_37.csv
 85 | 094_SVDB_id_11_Medical_tr_20100_1st_23611.csv
 86 | 095_SVDB_id_12_Medical_tr_6264_1st_6364.csv
 87 | 096_SVDB_id_13_Medical_tr_500_1st_444.csv
 88 | 097_SVDB_id_14_Medical_tr_1031_1st_1131.csv
 89 | 098_SVDB_id_15_Medical_tr_500_1st_303.csv
 90 | 099_SVDB_id_16_Medical_tr_1115_1st_1215.csv
 91 | 100_SVDB_id_17_Medical_tr_17803_1st_17903.csv
 92 | 101_SVDB_id_18_Medical_tr_1851_1st_1951.csv
 93 | 102_SVDB_id_19_Medical_tr_50000_1st_107354.csv
 94 | 103_SVDB_id_20_Medical_tr_12167_1st_12267.csv
 95 | 104_SVDB_id_21_Medical_tr_50000_1st_106634.csv
 96 | 105_SVDB_id_22_Medical_tr_500_1st_417.csv
 97 | 106_SVDB_id_23_Medical_tr_37500_1st_121963.csv
 98 | 108_SVDB_id_25_Medical_tr_1238_1st_1338.csv
 99 | 109_SVDB_id_26_Medical_tr_1038_1st_1138.csv
100 | 110_SVDB_id_27_Medical_tr_30824_1st_30924.csv
101 | 111_SVDB_id_28_Medical_tr_1791_1st_1891.csv
102 | 112_SVDB_id_29_Medical_tr_8226_1st_8326.csv
103 | 114_SVDB_id_31_Medical_tr_25000_1st_63090.csv
104 | 115_PSM_id_1_Facility_tr_50000_1st_129872.csv
105 | 116_TAO_id_1_Environment_tr_500_1st_3.csv
106 | 117_TAO_id_2_Environment_tr_500_1st_1.csv
107 | 118_TAO_id_3_Environment_tr_500_1st_7.csv
108 | 119_TAO_id_4_Environment_tr_500_1st_1.csv
109 | 121_TAO_id_6_Environment_tr_500_1st_7.csv
110 | 122_TAO_id_7_Environment_tr_500_1st_19.csv
111 | 123_TAO_id_8_Environment_tr_500_1st_62.csv
112 | 124_TAO_id_9_Environment_tr_500_1st_1.csv
113 | 125_TAO_id_10_Environment_tr_500_1st_9.csv
114 | 127_TAO_id_12_Environment_tr_500_1st_24.csv
115 | 128_TAO_id_13_Environment_tr_500_1st_7.csv
116 | 129_OPPORTUNITY_id_1_HumanActivity_tr_1801_1st_1901.csv
117 | 130_OPPORTUNITY_id_2_HumanActivity_tr_1045_1st_1145.csv
118 | 132_OPPORTUNITY_id_4_HumanActivity_tr_895_1st_995.csv
119 | 133_OPPORTUNITY_id_5_HumanActivity_tr_1745_1st_6500.csv
120 | 134_OPPORTUNITY_id_6_HumanActivity_tr_1477_1st_1577.csv
121 | 135_OPPORTUNITY_id_7_HumanActivity_tr_2085_1st_2185.csv
122 | 136_OPPORTUNITY_id_8_HumanActivity_tr_1495_1st_1595.csv
123 | 137_CreditCard_id_1_Finance_tr_500_1st_541.csv
124 | 138_CATSv2_id_1_Sensor_tr_16568_1st_16668.csv
125 | 139_CATSv2_id_2_Sensor_tr_5592_1st_5692.csv
126 | 141_CATSv2_id_4_Sensor_tr_41727_1st_41827.csv
127 | 142_CATSv2_id_5_Sensor_tr_30704_1st_30804.csv
128 | 143_CATSv2_id_6_Sensor_tr_50000_1st_60232.csv
129 | 144_SMAP_id_1_Sensor_tr_2052_1st_5300.csv
130 | 145_SMAP_id_2_Sensor_tr_2133_1st_5400.csv
131 | 146_SMAP_id_3_Sensor_tr_2128_1st_5000.csv
132 | 147_SMAP_id_4_Sensor_tr_2160_1st_6449.csv
133 | 148_SMAP_id_5_Sensor_tr_2011_1st_5060.csv
134 | 150_SMAP_id_7_Sensor_tr_2077_1st_5394.csv
135 | 151_SMAP_id_8_Sensor_tr_1971_1st_4870.csv
136 | 152_SMAP_id_9_Sensor_tr_2073_1st_5600.csv
137 | 153_SMAP_id_10_Sensor_tr_1840_1st_4030.csv
138 | 154_SMAP_id_11_Sensor_tr_2117_1st_4770.csv
139 | 155_SMAP_id_12_Sensor_tr_1907_1st_4800.csv
140 | 156_SMAP_id_13_Sensor_tr_1173_1st_2750.csv
141 | 157_SMAP_id_14_Sensor_tr_2126_1st_5000.csv
142 | 158_SMAP_id_15_Sensor_tr_2075_1st_5610.csv
143 | 159_SMAP_id_16_Sensor_tr_1757_1st_2650.csv
144 | 160_SMAP_id_17_Sensor_tr_1832_1st_5300.csv
145 | 161_SMAP_id_18_Sensor_tr_2075_1st_5550.csv
146 | 162_SMAP_id_19_Sensor_tr_1908_1st_4690.csv
147 | 163_SMAP_id_20_Sensor_tr_2051_1st_4575.csv
148 | 165_SMAP_id_22_Sensor_tr_2129_1st_5000.csv
149 | 166_SMAP_id_23_Sensor_tr_1113_1st_1890.csv
150 | 167_SMAP_id_24_Sensor_tr_2094_1st_5600.csv
151 | 168_SMAP_id_25_Sensor_tr_1998_1st_2098.csv
152 | 169_SMAP_id_26_Sensor_tr_1811_1st_4510.csv
153 | 170_SMAP_id_27_Sensor_tr_2160_1st_4690.csv
154 | 171_SWaT_id_1_Sensor_tr_3749_1st_9522.csv
155 | 172_SWaT_id_2_Sensor_tr_23700_1st_23800.csv
156 | 173_GECCO_id_1_Sensor_tr_16165_1st_16265.csv
157 | 174_Exathlon_id_1_Facility_tr_10766_1st_12590.csv
158 | 175_Exathlon_id_2_Facility_tr_10684_1st_10784.csv
159 | 176_Exathlon_id_3_Facility_tr_10766_1st_12590.csv
160 | 177_Exathlon_id_4_Facility_tr_11665_1st_13484.csv
161 | 179_Exathlon_id_6_Facility_tr_11665_1st_13484.csv
162 | 180_Exathlon_id_7_Facility_tr_10766_1st_12590.csv
163 | 181_Exathlon_id_8_Facility_tr_11663_1st_13482.csv
164 | 182_Exathlon_id_9_Facility_tr_10766_1st_12590.csv
165 | 183_Exathlon_id_10_Facility_tr_11665_1st_13484.csv
166 | 184_Exathlon_id_11_Facility_tr_11665_1st_13484.csv
167 | 185_Exathlon_id_12_Facility_tr_11665_1st_13484.csv
168 | 186_Exathlon_id_13_Facility_tr_10766_1st_12590.csv
169 | 187_Exathlon_id_14_Facility_tr_6193_1st_6293.csv
170 | 188_Exathlon_id_15_Facility_tr_12538_1st_12638.csv
171 | 189_Exathlon_id_16_Facility_tr_11663_1st_13482.csv
172 | 190_Exathlon_id_17_Facility_tr_12538_1st_12638.csv
173 | 191_Exathlon_id_18_Facility_tr_11665_1st_13484.csv
174 | 192_Exathlon_id_19_Facility_tr_11665_1st_13484.csv
175 | 193_Exathlon_id_20_Facility_tr_8898_1st_8998.csv
176 | 194_Exathlon_id_21_Facility_tr_6985_1st_7085.csv
177 | 196_Exathlon_id_23_Facility_tr_11665_1st_13484.csv
178 | 197_Exathlon_id_24_Facility_tr_10766_1st_12590.csv
179 | 198_Exathlon_id_25_Facility_tr_12538_1st_12638.csv
180 | 199_Exathlon_id_26_Facility_tr_12538_1st_12638.csv
181 | 200_Exathlon_id_27_Facility_tr_10766_1st_12590.csv
182 | 


--------------------------------------------------------------------------------
/Datasets/File_List/TSB-AD-M-Tuning.csv:
--------------------------------------------------------------------------------
 1 | file_name
 2 | 004_MSL_id_3_Sensor_tr_530_1st_630.csv
 3 | 011_MSL_id_10_Sensor_tr_1525_1st_4590.csv
 4 | 023_MITDB_id_5_Medical_tr_25000_1st_36913.csv
 5 | 028_MITDB_id_10_Medical_tr_37500_1st_39948.csv
 6 | 040_GHL_id_9_Sensor_tr_50000_1st_92001.csv
 7 | 049_GHL_id_18_Sensor_tr_50000_1st_109001.csv
 8 | 062_SMD_id_6_Facility_tr_7180_1st_15131.csv
 9 | 072_SMD_id_16_Facility_tr_7119_1st_15849.csv
10 | 082_LTDB_id_4_Medical_tr_4456_1st_4556.csv
11 | 090_SVDB_id_7_Medical_tr_12157_1st_12257.csv
12 | 107_SVDB_id_24_Medical_tr_32805_1st_32905.csv
13 | 113_SVDB_id_30_Medical_tr_4552_1st_4652.csv
14 | 120_TAO_id_5_Environment_tr_500_1st_3.csv
15 | 126_TAO_id_11_Environment_tr_500_1st_7.csv
16 | 131_OPPORTUNITY_id_3_HumanActivity_tr_7016_1st_26691.csv
17 | 140_CATSv2_id_3_Sensor_tr_28307_1st_28407.csv
18 | 149_SMAP_id_6_Sensor_tr_2128_1st_5000.csv
19 | 164_SMAP_id_21_Sensor_tr_1976_1st_4200.csv
20 | 178_Exathlon_id_5_Facility_tr_12538_1st_12638.csv
21 | 195_Exathlon_id_22_Facility_tr_10766_1st_12590.csv


--------------------------------------------------------------------------------
/Datasets/File_List/TSB-AD-U-Tuning.csv:
--------------------------------------------------------------------------------
 1 | file_name
 2 | 003_NAB_id_3_WebService_tr_1362_1st_1462.csv
 3 | 004_NAB_id_4_Facility_tr_1007_1st_1437.csv
 4 | 007_NAB_id_7_Traffic_tr_624_1st_2087.csv
 5 | 012_NAB_id_12_Synthetic_tr_1007_1st_2787.csv
 6 | 021_NAB_id_21_WebService_tr_500_1st_565.csv
 7 | 033_WSD_id_5_WebService_tr_4559_1st_12588.csv
 8 | 038_WSD_id_10_WebService_tr_4042_1st_4142.csv
 9 | 045_WSD_id_17_WebService_tr_2566_1st_2666.csv
10 | 053_WSD_id_25_WebService_tr_4559_1st_9198.csv
11 | 070_WSD_id_42_WebService_tr_2102_1st_2202.csv
12 | 142_MSL_id_3_Sensor_tr_1525_1st_4575.csv
13 | 146_MSL_id_7_Sensor_tr_554_1st_1172.csv
14 | 152_Stock_id_4_Finance_tr_500_1st_2.csv
15 | 161_Stock_id_13_Finance_tr_500_1st_3.csv
16 | 174_MITDB_id_5_Medical_tr_50000_1st_539948.csv
17 | 182_SMD_id_5_Facility_tr_7174_1st_21230.csv
18 | 186_SMD_id_9_Facility_tr_5925_1st_17580.csv
19 | 193_SMD_id_16_Facility_tr_5925_1st_17580.csv
20 | 200_SMD_id_23_Facility_tr_7174_1st_21230.csv
21 | 210_SMD_id_33_Facility_tr_5925_1st_17580.csv
22 | 217_LTDB_id_2_Medical_tr_500_1st_266.csv
23 | 230_MGAB_id_6_Synthetic_tr_25000_1st_42441.csv
24 | 235_SED_id_2_Medical_tr_2499_1st_3840.csv
25 | 242_SVDB_id_6_Medical_tr_10726_1st_10826.csv
26 | 252_SVDB_id_16_Medical_tr_12167_1st_12267.csv
27 | 259_TAO_id_3_Environment_tr_500_1st_7.csv
28 | 265_IOPS_id_6_WebService_tr_6453_1st_6553.csv
29 | 270_IOPS_id_11_WebService_tr_5638_1st_5738.csv
30 | 280_NEK_id_4_WebService_tr_500_1st_231.csv
31 | 291_TODS_id_5_Synthetic_tr_500_1st_11.csv
32 | 300_TODS_id_14_Synthetic_tr_1250_1st_2555.csv
33 | 305_UCR_id_3_Medical_tr_3000_1st_5948.csv
34 | 312_UCR_id_10_Facility_tr_19948_1st_52000.csv
35 | 323_UCR_id_21_HumanActivity_tr_48812_1st_128430.csv
36 | 347_UCR_id_45_Sensor_tr_2851_1st_5365.csv
37 | 386_UCR_id_84_Environment_tr_2046_1st_5703.csv
38 | 429_UCR_id_127_Medical_tr_14825_1st_29000.csv
39 | 535_SMAP_id_5_Sensor_tr_1113_1st_1890.csv
40 | 543_SMAP_id_13_Sensor_tr_2020_1st_4550.csv
41 | 560_YAHOO_id_10_Synthetic_tr_500_1st_893.csv
42 | 568_YAHOO_id_18_WebService_tr_500_1st_333.csv
43 | 573_YAHOO_id_23_Synthetic_tr_500_1st_623.csv
44 | 583_YAHOO_id_33_WebService_tr_500_1st_1616.csv
45 | 643_YAHOO_id_93_WebService_tr_500_1st_1038.csv
46 | 813_Exathlon_id_4_Facility_tr_10766_1st_12590.csv
47 | 825_Exathlon_id_16_Facility_tr_10766_1st_12590.csv
48 | 852_OPPORTUNITY_id_11_HumanActivity_tr_500_1st_566.csv
49 | 864_OPPORTUNITY_id_23_HumanActivity_tr_895_1st_995.csv


--------------------------------------------------------------------------------
/Datasets/README.md:
--------------------------------------------------------------------------------
 1 | Provide example time series at TSB-AD-U/M Folder
 2 | 
 3 | Link to the dataset:
 4 | 
 5 | * TSB-AD-U: https://www.thedatum.org/datasets/TSB-AD-U.zip
 6 | 
 7 | * TSB-AD-M: https://www.thedatum.org/datasets/TSB-AD-M.zip
 8 | 
 9 | > Disclaimer: The dataset is released for reproducibility purposes. The preprocessing and curation steps are provided under the Apache 2.0 license. If you use any of these datasets in your research, please refer to the original data source. License information for each dataset included in TSB-AD is provided at [[Link]](https://thedatumorg.github.io/TSB-AD/) for your reference.
10 | 
11 | * File Name Formatting: [index]\_[Dataset Name]\_id\_[id]\_[Domain]\_tr\_[Train Index]\_1st\_[First Anomaly Index].csv
12 |     * Domain ⊆ {Web Service, Sensor, Environment, Traffic, Finance, Facility, Medical, Synthetic}
13 | * Folder Description: `TSB-AD-U/M` contain univariate and multivariate time series respectively. `File-List` contains file lists splitting for evaluation and hyperparameter tunning.
14 | 


--------------------------------------------------------------------------------
/TSB_AD/HP_list.py:
--------------------------------------------------------------------------------
  1 | 
  2 | Multi_algo_HP_dict = {
  3 |     'IForest': {
  4 |         'n_estimators': [25, 50, 100, 150, 200],
  5 |         'max_features': [0.2, 0.4, 0.6, 0.8, 1.0]
  6 |     },
  7 |     'LOF': {
  8 |         'n_neighbors': [10, 20, 30, 40, 50],
  9 |         'metric': ['minkowski', 'manhattan', 'euclidean']
 10 |     },    
 11 |     'PCA': {
 12 |         'n_components': [0.25, 0.5, 0.75, None]
 13 |     },        
 14 |     'HBOS': {
 15 |         'n_bins': [5, 10, 20, 30, 40],
 16 |         'tol': [0.1, 0.3, 0.5, 0.7]
 17 |     },
 18 |     'OCSVM': {
 19 |         'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
 20 |         'nu': [0.1, 0.3, 0.5, 0.7]
 21 |     },        
 22 |     'MCD': {
 23 |         'support_fraction': [0.2, 0.4, 0.6, 0.8, None]
 24 |     },
 25 |     'KNN': {
 26 |         'n_neighbors': [10, 20, 30, 40, 50],
 27 |         'method': ['largest', 'mean', 'median']
 28 |     },        
 29 |     'KMeansAD': {
 30 |         'n_clusters': [10, 20, 30, 40],
 31 |         'window_size': [10, 20, 30, 40]
 32 |     },
 33 |     'COPOD': {
 34 |         'HP': [None]
 35 |     },    
 36 |     'CBLOF': {
 37 |         'n_clusters': [4, 8, 16, 32],
 38 |         'alpha': [0.6, 0.7, 0.8, 0.9]
 39 |     },
 40 |     'EIF': {
 41 |         'n_trees': [25, 50, 100, 200]
 42 |     },   
 43 |     'RobustPCA': {
 44 |         'max_iter': [500, 1000, 1500]
 45 |     },
 46 |     'AutoEncoder': {
 47 |         'hidden_neurons': [[64, 32], [32, 16], [128, 64]]
 48 |     },
 49 |     'CNN': {
 50 |         'window_size': [50, 100, 150],
 51 |         'num_channel': [[32, 32, 40], [16, 32, 64]]
 52 |     },
 53 |     'LSTMAD': {
 54 |         'window_size': [50, 100, 150],
 55 |         'lr': [0.0004, 0.0008]
 56 |     },  
 57 |     'TranAD': {
 58 |         'win_size': [5, 10, 50],
 59 |         'lr': [1e-3, 1e-4]
 60 |     },  
 61 |     'AnomalyTransformer': {
 62 |         'win_size': [50, 100, 150],
 63 |         'lr': [1e-3, 1e-4, 1e-5]
 64 |     },  
 65 |     'OmniAnomaly': {
 66 |         'win_size': [5, 50, 100],
 67 |         'lr': [0.002, 0.0002]
 68 |     },
 69 |     'USAD': {
 70 |         'win_size': [5, 50, 100],
 71 |         'lr': [1e-3, 1e-4, 1e-5]
 72 |     },  
 73 |     'Donut': {
 74 |         'win_size': [60, 90, 120],
 75 |         'lr': [1e-3, 1e-4, 1e-5]
 76 |     },  
 77 |     'TimesNet': {
 78 |         'win_size': [32, 96, 192],
 79 |         'lr': [1e-3, 1e-4, 1e-5]
 80 |     },
 81 |     'FITS': {
 82 |         'win_size': [100, 200],
 83 |         'lr': [1e-3, 1e-4, 1e-5]
 84 |     },    
 85 |     'OFA': {
 86 |         'win_size': [50, 100, 150]
 87 |     } 
 88 | }
 89 | 
 90 | 
 91 | Optimal_Multi_algo_HP_dict = {
 92 |     'IForest': {'n_estimators': 25, 'max_features': 0.8},
 93 |     'LOF': {'n_neighbors': 50, 'metric': 'euclidean'},    
 94 |     'PCA': {'n_components': 0.25},        
 95 |     'HBOS': {'n_bins': 30, 'tol': 0.5},
 96 |     'OCSVM': {'kernel': 'rbf', 'nu': 0.1},        
 97 |     'MCD': {'support_fraction': 0.8},
 98 |     'KNN': {'n_neighbors': 50, 'method': 'mean'},        
 99 |     'KMeansAD': {'n_clusters': 10, 'window_size': 40},
100 |     'KShapeAD': {'n_clusters': 20, 'window_size': 40},
101 |     'COPOD': {'n_jobs':1},    
102 |     'CBLOF': {'n_clusters': 4, 'alpha': 0.6},
103 |     'EIF': {'n_trees': 50},   
104 |     'RobustPCA': {'max_iter': 1000},
105 |     'AutoEncoder': {'hidden_neurons': [128, 64]},
106 |     'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]},
107 |     'LSTMAD': {'window_size': 150, 'lr': 0.0008},  
108 |     'TranAD': {'win_size': 10, 'lr': 0.001},  
109 |     'AnomalyTransformer': {'win_size': 50, 'lr': 0.001},  
110 |     'OmniAnomaly': {'win_size': 100, 'lr': 0.002},
111 |     'USAD': {'win_size': 100, 'lr': 0.001},  
112 |     'Donut': {'win_size': 60, 'lr': 0.001},  
113 |     'TimesNet': {'win_size': 96, 'lr': 0.0001},
114 |     'FITS': {'win_size': 100, 'lr': 0.001},
115 |     'OFA': {'win_size': 50}
116 | }
117 | 
118 | 
119 | Uni_algo_HP_dict = {
120 |     'Sub_IForest': {
121 |         'periodicity': [1, 2, 3],
122 |         'n_estimators': [25, 50, 100, 150, 200]
123 |     },
124 |     'IForest': {
125 |         'n_estimators': [25, 50, 100, 150, 200]
126 |     },
127 |     'Sub_LOF': {
128 |         'periodicity': [1, 2, 3],
129 |         'n_neighbors': [10, 20, 30, 40, 50]
130 |     }, 
131 |     'LOF': {
132 |         'n_neighbors': [10, 20, 30, 40, 50]
133 |     }, 
134 |     'POLY': {
135 |         'periodicity': [1, 2, 3],
136 |         'power': [1, 2, 3, 4]
137 |     },
138 |     'MatrixProfile': {
139 |         'periodicity': [1, 2, 3]
140 |     },
141 |     'NORMA': {
142 |         'periodicity': [1, 2, 3],
143 |         'clustering': ['hierarchical', 'kshape']
144 |     },
145 |     'SAND': {
146 |         'periodicity': [1, 2, 3]
147 |     }, 
148 |     'Series2Graph': {
149 |         'periodicity': [1, 2, 3]
150 |     },
151 |     'Sub_PCA': {
152 |         'periodicity': [1, 2, 3],
153 |         'n_components': [0.25, 0.5, 0.75, None]
154 |     },
155 |     'Sub_HBOS': {
156 |         'periodicity': [1, 2, 3],
157 |         'n_bins': [5, 10, 20, 30, 40]
158 |     },
159 |     'Sub_OCSVM': {
160 |         'periodicity': [1, 2, 3],
161 |         'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
162 |     },
163 |     'Sub_MCD': {
164 |         'periodicity': [1, 2, 3],
165 |         'support_fraction': [0.2, 0.4, 0.6, 0.8, None]
166 |     },
167 |     'Sub_KNN': {
168 |         'periodicity': [1, 2, 3],
169 |         'n_neighbors': [10, 20, 30, 40, 50],
170 |     },
171 |     'KMeansAD_U': {
172 |         'periodicity': [1, 2, 3],
173 |         'n_clusters': [10, 20, 30, 40],
174 |     },
175 |     'KShapeAD': {
176 |         'periodicity': [1, 2, 3]
177 |     },
178 |     'AutoEncoder': {
179 |         'window_size': [50, 100, 150],
180 |         'hidden_neurons': [[64, 32], [32, 16], [128, 64]]
181 |     },
182 |     'CNN': {
183 |         'window_size': [50, 100, 150],
184 |         'num_channel': [[32, 32, 40], [16, 32, 64]]
185 |     },
186 |     'LSTMAD': {
187 |         'window_size': [50, 100, 150],
188 |         'lr': [0.0004, 0.0008]
189 |     },  
190 |     'TranAD': {
191 |         'win_size': [5, 10, 50],
192 |         'lr': [1e-3, 1e-4]
193 |     },
194 |     'AnomalyTransformer': {
195 |         'win_size': [50, 100, 150],
196 |         'lr': [1e-3, 1e-4, 1e-5]
197 |     },  
198 |     'OmniAnomaly': {
199 |         'win_size': [5, 50, 100],
200 |         'lr': [0.002, 0.0002]
201 |     },
202 |     'USAD': {
203 |         'win_size': [5, 50, 100],
204 |         'lr': [1e-3, 1e-4, 1e-5]
205 |     },  
206 |     'Donut': {
207 |         'win_size': [60, 90, 120],
208 |         'lr': [1e-3, 1e-4, 1e-5]
209 |     },  
210 |     'TimesNet': {
211 |         'win_size': [32, 96, 192],
212 |         'lr': [1e-3, 1e-4, 1e-5]
213 |     },
214 |     'FITS': {
215 |         'win_size': [100, 200],
216 |         'lr': [1e-3, 1e-4, 1e-5]
217 |     },
218 |     'OFA': {
219 |         'win_size': [50, 100, 150]
220 |     },    
221 |     'Lag_Llama': {
222 |         'win_size': [32, 64, 96]
223 |     },    
224 |     'Chronos': {
225 |         'win_size': [50, 100, 150]
226 |     },
227 |     'TimesFM': {
228 |         'win_size': [32, 64, 96]
229 |     },
230 |     'MOMENT_ZS': {
231 |         'win_size': [64, 128, 256]
232 |     },
233 |     'MOMENT_FT': {
234 |         'win_size': [64, 128, 256]
235 |     }
236 | }
237 | 
238 | Optimal_Uni_algo_HP_dict = {
239 |     'Sub_IForest': {'periodicity': 1, 'n_estimators': 150},
240 |     'IForest': {'n_estimators': 200},
241 |     'Sub_LOF': {'periodicity': 2, 'n_neighbors': 30},
242 |     'LOF': {'n_neighbors': 50},
243 |     'POLY': {'periodicity': 1, 'power': 4},
244 |     'MatrixProfile': {'periodicity': 1},
245 |     'NORMA': {'periodicity': 1, 'clustering': 'kshape'},
246 |     'SAND': {'periodicity': 1},
247 |     'Series2Graph': {'periodicity': 1},
248 |     'SR': {'periodicity': 1},
249 |     'Sub_PCA': {'periodicity': 1, 'n_components': None},        
250 |     'Sub_HBOS': {'periodicity': 1, 'n_bins': 10},
251 |     'Sub_OCSVM': {'periodicity': 2, 'kernel': 'rbf'},        
252 |     'Sub_MCD': {'periodicity': 3, 'support_fraction': None},
253 |     'Sub_KNN': {'periodicity': 2, 'n_neighbors': 50}, 
254 |     'KMeansAD_U': {'periodicity': 2, 'n_clusters': 10},
255 |     'KShapeAD': {'periodicity': 1},
256 |     'FFT': {},
257 |     'Left_STAMPi': {},
258 |     'AutoEncoder': {'window_size': 100, 'hidden_neurons': [128, 64]},
259 |     'CNN': {'window_size': 50, 'num_channel': [32, 32, 40]},
260 |     'LSTMAD': {'window_size': 100, 'lr': 0.0008},  
261 |     'TranAD': {'win_size': 10, 'lr': 0.0001},
262 |     'AnomalyTransformer': {'win_size': 50, 'lr': 0.001},  
263 |     'OmniAnomaly': {'win_size': 5, 'lr': 0.002},
264 |     'USAD': {'win_size': 100, 'lr': 0.001},
265 |     'Donut': {'win_size': 60, 'lr': 0.0001},  
266 |     'TimesNet': {'win_size': 32, 'lr': 0.0001},
267 |     'FITS': {'win_size': 100, 'lr': 0.0001},
268 |     'OFA': {'win_size': 50},
269 |     'Lag_Llama': {'win_size': 96},
270 |     'Chronos': {'win_size': 100},
271 |     'TimesFM': {'win_size': 96},
272 |     'MOMENT_ZS': {'win_size': 64},
273 |     'MOMENT_FT': {'win_size': 64},
274 |     'M2N2': {}
275 | }


--------------------------------------------------------------------------------
/TSB_AD/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/__init__.py


--------------------------------------------------------------------------------
/TSB_AD/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/affiliation/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/affiliation/_affiliation_zone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | from ._integral_interval import interval_intersection
 4 | 
 5 | def t_start(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
 6 |     """
 7 |     Helper for `E_gt_func`
 8 |     
 9 |     :param j: index from 0 to len(Js) (included) on which to get the start
10 |     :param Js: ground truth events, as a list of couples
11 |     :param Trange: range of the series where Js is included
12 |     :return: generalized start such that the middle of t_start and t_stop 
13 |     always gives the affiliation zone
14 |     """
15 |     b = max(Trange)
16 |     n = len(Js)
17 |     if j == n:
18 |         return(2*b - t_stop(n-1, Js, Trange))
19 |     else:
20 |         return(Js[j][0])
21 | 
22 | def t_stop(j, Js = [(1,2),(3,4),(5,6)], Trange = (1,10)):
23 |     """
24 |     Helper for `E_gt_func`
25 |     
26 |     :param j: index from 0 to len(Js) (included) on which to get the stop
27 |     :param Js: ground truth events, as a list of couples
28 |     :param Trange: range of the series where Js is included
29 |     :return: generalized stop such that the middle of t_start and t_stop 
30 |     always gives the affiliation zone
31 |     """
32 |     if j == -1:
33 |         a = min(Trange)
34 |         return(2*a - t_start(0, Js, Trange))
35 |     else:
36 |         return(Js[j][1])
37 | 
38 | def E_gt_func(j, Js, Trange):
39 |     """
40 |     Get the affiliation zone of element j of the ground truth
41 |     
42 |     :param j: index from 0 to len(Js) (excluded) on which to get the zone
43 |     :param Js: ground truth events, as a list of couples
44 |     :param Trange: range of the series where Js is included, can 
45 |     be (-math.inf, math.inf) for distance measures
46 |     :return: affiliation zone of element j of the ground truth represented
47 |     as a couple
48 |     """
49 |     range_left = (t_stop(j-1, Js, Trange) + t_start(j, Js, Trange))/2
50 |     range_right = (t_stop(j, Js, Trange) + t_start(j+1, Js, Trange))/2
51 |     return((range_left, range_right))
52 | 
53 | def get_all_E_gt_func(Js, Trange):
54 |     """
55 |     Get the affiliation partition from the ground truth point of view
56 |     
57 |     :param Js: ground truth events, as a list of couples
58 |     :param Trange: range of the series where Js is included, can 
59 |     be (-math.inf, math.inf) for distance measures
60 |     :return: affiliation partition of the events
61 |     """
62 |     # E_gt is the limit of affiliation/attraction for each ground truth event
63 |     E_gt = [E_gt_func(j, Js, Trange) for j in range(len(Js))]
64 |     return(E_gt)
65 | 
66 | def affiliation_partition(Is = [(1,1.5),(2,5),(5,6),(8,9)], E_gt = [(1,2.5),(2.5,4.5),(4.5,10)]):
67 |     """
68 |     Cut the events into the affiliation zones
69 |     The presentation given here is from the ground truth point of view,
70 |     but it is also used in the reversed direction in the main function.
71 |     
72 |     :param Is: events as a list of couples
73 |     :param E_gt: range of the affiliation zones
74 |     :return: a list of list of intervals (each interval represented by either 
75 |     a couple or None for empty interval). The outer list is indexed by each
76 |     affiliation zone of `E_gt`. The inner list is indexed by the events of `Is`.
77 |     """
78 |     out = [None] * len(E_gt)
79 |     for j in range(len(E_gt)):
80 |         E_gt_j = E_gt[j]
81 |         discarded_idx_before = [I[1] < E_gt_j[0] for I in Is]  # end point of predicted I is before the begin of E
82 |         discarded_idx_after = [I[0] > E_gt_j[1] for I in Is] # start of predicted I is after the end of E
83 |         kept_index = [not(a or b) for a, b in zip(discarded_idx_before, discarded_idx_after)]
84 |         Is_j = [x for x, y in zip(Is, kept_index)]
85 |         out[j] = [interval_intersection(I, E_gt[j]) for I in Is_j]
86 |     return(out)
87 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/affiliation/_single_ground_truth_event.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | import math
 4 | from ._affiliation_zone import (
 5 |         get_all_E_gt_func, 
 6 |         affiliation_partition)
 7 | from ._integral_interval import (
 8 |         integral_interval_distance,
 9 |         integral_interval_probaCDF_precision, 
10 |         integral_interval_probaCDF_recall, 
11 |         interval_length,
12 |         sum_interval_lengths)
13 | 
14 | def affiliation_precision_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
15 |     """
16 |     Compute the individual average distance from Is to a single ground truth J
17 |     
18 |     :param Is: list of predicted events within the affiliation zone of J
19 |     :param J: couple representating the start and stop of a ground truth interval
20 |     :return: individual average precision directed distance number
21 |     """
22 |     if all([I is None for I in Is]): # no prediction in the current area
23 |         return(math.nan) # undefined
24 |     return(sum([integral_interval_distance(I, J) for I in Is]) / sum_interval_lengths(Is))
25 | 
26 | def affiliation_precision_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
27 |     """
28 |     Compute the individual precision probability from Is to a single ground truth J
29 |     
30 |     :param Is: list of predicted events within the affiliation zone of J
31 |     :param J: couple representating the start and stop of a ground truth interval
32 |     :param E: couple representing the start and stop of the zone of affiliation of J
33 |     :return: individual precision probability in [0, 1], or math.nan if undefined
34 |     """
35 |     if all([I is None for I in Is]): # no prediction in the current area
36 |         return(math.nan) # undefined
37 |     return(sum([integral_interval_probaCDF_precision(I, J, E) for I in Is]) / sum_interval_lengths(Is))
38 | 
39 | def affiliation_recall_distance(Is = [(1,2),(3,4),(5,6)], J = (2,5.5)):
40 |     """
41 |     Compute the individual average distance from a single J to the predictions Is
42 |     
43 |     :param Is: list of predicted events within the affiliation zone of J
44 |     :param J: couple representating the start and stop of a ground truth interval
45 |     :return: individual average recall directed distance number
46 |     """
47 |     Is = [I for I in Is if I is not None] # filter possible None in Is
48 |     if len(Is) == 0: # there is no prediction in the current area
49 |         return(math.inf)
50 |     E_gt_recall = get_all_E_gt_func(Is, (-math.inf, math.inf))  # here from the point of view of the predictions
51 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
52 |     return(sum([integral_interval_distance(J[0], I) for I, J in zip(Is, Js)]) / interval_length(J))
53 | 
54 | def affiliation_recall_proba(Is = [(1,2),(3,4),(5,6)], J = (2,5.5), E = (0,8)):
55 |     """
56 |     Compute the individual recall probability from a single ground truth J to Is
57 |     
58 |     :param Is: list of predicted events within the affiliation zone of J
59 |     :param J: couple representating the start and stop of a ground truth interval
60 |     :param E: couple representing the start and stop of the zone of affiliation of J
61 |     :return: individual recall probability in [0, 1]
62 |     """
63 |     Is = [I for I in Is if I is not None] # filter possible None in Is
64 |     if len(Is) == 0: # there is no prediction in the current area
65 |         return(0)
66 |     E_gt_recall = get_all_E_gt_func(Is, E) # here from the point of view of the predictions
67 |     Js = affiliation_partition([J], E_gt_recall) # partition of J depending of proximity with Is
68 |     return(sum([integral_interval_probaCDF_recall(I, J[0], E) for I, J in zip(Is, Js)]) / interval_length(J))
69 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/affiliation/generics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from itertools import groupby
  4 | from operator import itemgetter
  5 | import math
  6 | import gzip
  7 | import glob
  8 | import os
  9 | 
 10 | def convert_vector_to_events(vector = [0, 1, 1, 0, 0, 1, 0]):
 11 |     """
 12 |     Convert a binary vector (indicating 1 for the anomalous instances)
 13 |     to a list of events. The events are considered as durations,
 14 |     i.e. setting 1 at index i corresponds to an anomalous interval [i, i+1).
 15 |     
 16 |     :param vector: a list of elements belonging to {0, 1}
 17 |     :return: a list of couples, each couple representing the start and stop of
 18 |     each event
 19 |     """
 20 |     positive_indexes = [idx for idx, val in enumerate(vector) if val > 0]
 21 |     events = []
 22 |     for k, g in groupby(enumerate(positive_indexes), lambda ix : ix[0] - ix[1]):
 23 |         cur_cut = list(map(itemgetter(1), g))
 24 |         events.append((cur_cut[0], cur_cut[-1]))
 25 |     
 26 |     # Consistent conversion in case of range anomalies (for indexes):
 27 |     # A positive index i is considered as the interval [i, i+1),
 28 |     # so the last index should be moved by 1
 29 |     events = [(x, y+1) for (x,y) in events]
 30 |         
 31 |     return(events)
 32 | 
 33 | def infer_Trange(events_pred, events_gt):
 34 |     """
 35 |     Given the list of events events_pred and events_gt, get the
 36 |     smallest possible Trange corresponding to the start and stop indexes 
 37 |     of the whole series.
 38 |     Trange will not influence the measure of distances, but will impact the
 39 |     measures of probabilities.
 40 |     
 41 |     :param events_pred: a list of couples corresponding to predicted events
 42 |     :param events_gt: a list of couples corresponding to ground truth events
 43 |     :return: a couple corresponding to the smallest range containing the events
 44 |     """
 45 |     if len(events_gt) == 0:
 46 |         raise ValueError('The gt events should contain at least one event')
 47 |     if len(events_pred) == 0:
 48 |         # empty prediction, base Trange only on events_gt (which is non empty)
 49 |         return(infer_Trange(events_gt, events_gt))
 50 |         
 51 |     min_pred = min([x[0] for x in events_pred])
 52 |     min_gt = min([x[0] for x in events_gt])
 53 |     max_pred = max([x[1] for x in events_pred])
 54 |     max_gt = max([x[1] for x in events_gt])
 55 |     Trange = (min(min_pred, min_gt), max(max_pred, max_gt))
 56 |     return(Trange)
 57 | 
 58 | def has_point_anomalies(events):
 59 |     """
 60 |     Checking whether events contain point anomalies, i.e.
 61 |     events starting and stopping at the same time.
 62 |     
 63 |     :param events: a list of couples corresponding to predicted events
 64 |     :return: True is the events have any point anomalies, False otherwise
 65 |     """
 66 |     if len(events) == 0:
 67 |         return(False)
 68 |     return(min([x[1] - x[0] for x in events]) == 0)
 69 | 
 70 | def _sum_wo_nan(vec):
 71 |     """
 72 |     Sum of elements, ignoring math.isnan ones
 73 |     
 74 |     :param vec: vector of floating numbers
 75 |     :return: sum of the elements, ignoring math.isnan ones
 76 |     """
 77 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 78 |     return(sum(vec_wo_nan))
 79 |     
 80 | def _len_wo_nan(vec):
 81 |     """
 82 |     Count of elements, ignoring math.isnan ones
 83 |     
 84 |     :param vec: vector of floating numbers
 85 |     :return: count of the elements, ignoring math.isnan ones
 86 |     """
 87 |     vec_wo_nan = [e for e in vec if not math.isnan(e)]
 88 |     return(len(vec_wo_nan))
 89 | 
 90 | def read_gz_data(filename = 'data/machinetemp_groundtruth.gz'):
 91 |     """
 92 |     Load a file compressed with gz, such that each line of the
 93 |     file is either 0 (representing a normal instance) or 1 (representing)
 94 |     an anomalous instance.
 95 |     :param filename: file path to the gz compressed file
 96 |     :return: list of integers with either 0 or 1
 97 |     """
 98 |     with gzip.open(filename, 'rb') as f:
 99 |         content = f.read().splitlines()
100 |     content = [int(x) for x in content]
101 |     return(content)
102 | 
103 | def read_all_as_events():
104 |     """
105 |     Load the files contained in the folder `data/` and convert
106 |     to events. The length of the series is kept.
107 |     The convention for the file name is: `dataset_algorithm.gz`
108 |     :return: two dictionaries:
109 |         - the first containing the list of events for each dataset and algorithm,
110 |         - the second containing the range of the series for each dataset
111 |     """
112 |     filepaths = glob.glob('data/*.gz')
113 |     datasets = dict()
114 |     Tranges = dict()
115 |     for filepath in filepaths:
116 |         vector = read_gz_data(filepath)
117 |         events = convert_vector_to_events(vector)
118 |         # ad hoc cut for those files
119 |         cut_filepath = (os.path.split(filepath)[1]).split('_')
120 |         data_name = cut_filepath[0]
121 |         algo_name = (cut_filepath[1]).split('.')[0]
122 |         if not data_name in datasets:
123 |             datasets[data_name] = dict()
124 |             Tranges[data_name] = (0, len(vector))
125 |         datasets[data_name][algo_name] = events
126 |     return(datasets, Tranges)
127 | 
128 | def f1_func(p, r):
129 |     """
130 |     Compute the f1 function
131 |     :param p: precision numeric value
132 |     :param r: recall numeric value
133 |     :return: f1 numeric value
134 |     """
135 |     return(2*p*r/(p+r))
136 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/affiliation/metrics.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | from .generics import (
  4 |         infer_Trange,
  5 |         has_point_anomalies, 
  6 |         _len_wo_nan, 
  7 |         _sum_wo_nan,
  8 |         read_all_as_events)
  9 | from ._affiliation_zone import (
 10 |         get_all_E_gt_func, 
 11 |         affiliation_partition)
 12 | from ._single_ground_truth_event import (
 13 |         affiliation_precision_distance,
 14 |         affiliation_recall_distance,
 15 |         affiliation_precision_proba,
 16 |         affiliation_recall_proba)
 17 | 
 18 | def test_events(events):
 19 |     """
 20 |     Verify the validity of the input events
 21 |     :param events: list of events, each represented by a couple (start, stop)
 22 |     :return: None. Raise an error for incorrect formed or non ordered events
 23 |     """
 24 |     if type(events) is not list:
 25 |         raise TypeError('Input `events` should be a list of couples')
 26 |     if not all([type(x) is tuple for x in events]):
 27 |         raise TypeError('Input `events` should be a list of tuples')
 28 |     if not all([len(x) == 2 for x in events]):
 29 |         raise ValueError('Input `events` should be a list of couples (start, stop)')
 30 |     if not all([x[0] <= x[1] for x in events]):
 31 |         raise ValueError('Input `events` should be a list of couples (start, stop) with start <= stop')
 32 |     if not all([events[i][1] < events[i+1][0] for i in range(len(events) - 1)]):
 33 |         raise ValueError('Couples of input `events` should be disjoint and ordered')
 34 | 
 35 | def pr_from_events(events_pred, events_gt, Trange):
 36 |     """
 37 |     Compute the affiliation metrics including the precision/recall in [0,1],
 38 |     along with the individual precision/recall distances and probabilities
 39 |     
 40 |     :param events_pred: list of predicted events, each represented by a couple
 41 |     indicating the start and the stop of the event
 42 |     :param events_gt: list of ground truth events, each represented by a couple
 43 |     indicating the start and the stop of the event
 44 |     :param Trange: range of the series where events_pred and events_gt are included,
 45 |     represented as a couple (start, stop)
 46 |     :return: dictionary with precision, recall, and the individual metrics
 47 |     """
 48 |     # testing the inputs
 49 |     test_events(events_pred)
 50 |     test_events(events_gt)
 51 |     
 52 |     # other tests
 53 |     minimal_Trange = infer_Trange(events_pred, events_gt)
 54 |     if not Trange[0] <= minimal_Trange[0]:
 55 |         raise ValueError('`Trange` should include all the events')
 56 |     if not minimal_Trange[1] <= Trange[1]:
 57 |         raise ValueError('`Trange` should include all the events')
 58 |     
 59 |     if len(events_gt) == 0:
 60 |         raise ValueError('Input `events_gt` should have at least one event')
 61 | 
 62 |     if has_point_anomalies(events_pred) or has_point_anomalies(events_gt):
 63 |         raise ValueError('Cannot manage point anomalies currently')
 64 | 
 65 |     if Trange is None:
 66 |         # Set as default, but Trange should be indicated if probabilities are used
 67 |         raise ValueError('Trange should be indicated (or inferred with the `infer_Trange` function')
 68 | 
 69 |     E_gt = get_all_E_gt_func(events_gt, Trange)
 70 |     aff_partition = affiliation_partition(events_pred, E_gt)
 71 | 
 72 |     # Computing precision distance
 73 |     d_precision = [affiliation_precision_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 74 |     
 75 |     # Computing recall distance
 76 |     d_recall = [affiliation_recall_distance(Is, J) for Is, J in zip(aff_partition, events_gt)]
 77 | 
 78 |     # Computing precision
 79 |     p_precision = [affiliation_precision_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 80 | 
 81 |     # Computing recall
 82 |     p_recall = [affiliation_recall_proba(Is, J, E) for Is, J, E in zip(aff_partition, events_gt, E_gt)]
 83 | 
 84 |     if _len_wo_nan(p_precision) > 0:
 85 |         p_precision_average = _sum_wo_nan(p_precision) / _len_wo_nan(p_precision)
 86 |     else:
 87 |         p_precision_average = p_precision[0] # math.nan
 88 |     p_recall_average = sum(p_recall) / len(p_recall)
 89 | 
 90 |     dict_out = dict({'Affiliation_Precision': p_precision_average,
 91 |                      'Affiliation_Recall': p_recall_average,
 92 |                      'individual_precision_probabilities': p_precision,
 93 |                      'individual_recall_probabilities': p_recall,
 94 |                      'individual_precision_distances': d_precision,
 95 |                      'individual_recall_distances': d_recall})
 96 |     return(dict_out)
 97 | 
 98 | def produce_all_results():
 99 |     """
100 |     Produce the affiliation precision/recall for all files
101 |     contained in the `data` repository
102 |     :return: a dictionary indexed by data names, each containing a dictionary
103 |     indexed by algorithm names, each containing the results of the affiliation
104 |     metrics (precision, recall, individual probabilities and distances)
105 |     """
106 |     datasets, Tranges = read_all_as_events() # read all the events in folder `data`
107 |     results = dict()
108 |     for data_name in datasets.keys():
109 |         results_data = dict()
110 |         for algo_name in datasets[data_name].keys():
111 |             if algo_name != 'groundtruth':
112 |                 results_data[algo_name] = pr_from_events(datasets[data_name][algo_name],
113 |                                                          datasets[data_name]['groundtruth'],
114 |                                                          Tranges[data_name])
115 |         results[data_name] = results_data
116 |     return(results)
117 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/metrics.py:
--------------------------------------------------------------------------------
 1 | from .basic_metrics import basic_metricor, generate_curve
 2 | 
 3 | def get_metrics(score, labels, slidingWindow=100, pred=None, version='opt', thre=250):
 4 |     metrics = {}
 5 | 
 6 |     '''
 7 |     Threshold Independent
 8 |     '''
 9 |     grader = basic_metricor()
10 |     # AUC_ROC, Precision, Recall, PointF1, PointF1PA, Rrecall, ExistenceReward, OverlapReward, Rprecision, RF, Precision_at_k = grader.metric_new(labels, score, pred, plot_ROC=False)
11 |     AUC_ROC = grader.metric_ROC(labels, score)
12 |     AUC_PR = grader.metric_PR(labels, score)
13 | 
14 |     # R_AUC_ROC, R_AUC_PR, _, _, _ = grader.RangeAUC(labels=labels, score=score, window=slidingWindow, plot_ROC=True)
15 |     _, _, _, _, _, _,VUS_ROC, VUS_PR = generate_curve(labels.astype(int), score, slidingWindow, version, thre)
16 | 
17 | 
18 |     '''
19 |     Threshold Dependent
20 |     if pred is None --> use the oracle threshold
21 |     '''
22 | 
23 |     PointF1 = grader.metric_PointF1(labels, score, preds=pred)
24 |     PointF1PA = grader.metric_PointF1PA(labels, score, preds=pred)
25 |     EventF1PA = grader.metric_EventF1PA(labels, score, preds=pred)
26 |     RF1 = grader.metric_RF1(labels, score, preds=pred)
27 |     Affiliation_F = grader.metric_Affiliation(labels, score, preds=pred)
28 | 
29 |     metrics['AUC-PR'] = AUC_PR
30 |     metrics['AUC-ROC'] = AUC_ROC
31 |     metrics['VUS-PR'] = VUS_PR
32 |     metrics['VUS-ROC'] = VUS_ROC
33 | 
34 |     metrics['Standard-F1'] = PointF1
35 |     metrics['PA-F1'] = PointF1PA
36 |     metrics['Event-based-F1'] = EventF1PA
37 |     metrics['R-based-F1'] = RF1
38 |     metrics['Affiliation-F'] = Affiliation_F
39 |     return metrics
40 | 
41 | 
42 | def get_metrics_pred(score, labels, pred, slidingWindow=100):
43 |     metrics = {}
44 | 
45 |     grader = basic_metricor()
46 | 
47 |     PointF1 = grader.metric_PointF1(labels, score, preds=pred)
48 |     PointF1PA = grader.metric_PointF1PA(labels, score, preds=pred)
49 |     EventF1PA = grader.metric_EventF1PA(labels, score, preds=pred)
50 |     RF1 = grader.metric_RF1(labels, score, preds=pred)
51 |     Affiliation_F = grader.metric_Affiliation(labels, score, preds=pred)
52 |     VUS_R, VUS_P, VUS_F = grader.metric_VUS_pred(labels, preds=pred, windowSize=slidingWindow)
53 | 
54 |     metrics['Standard-F1'] = PointF1
55 |     metrics['PA-F1'] = PointF1PA
56 |     metrics['Event-based-F1'] = EventF1PA
57 |     metrics['R-based-F1'] = RF1
58 |     metrics['Affiliation-F'] = Affiliation_F
59 | 
60 |     metrics['VUS-Recall'] = VUS_R
61 |     metrics['VUS-Precision'] = VUS_P
62 |     metrics['VUS-F'] = VUS_F
63 | 
64 |     return metrics
65 | 


--------------------------------------------------------------------------------
/TSB_AD/evaluation/visualize.py:
--------------------------------------------------------------------------------
 1 | from basic_metrics import metricor
 2 | import matplotlib.pyplot as plt
 3 | import numpy as np
 4 | import matplotlib.patches as mpatches 
 5 | 
 6 | def plotFig(data, label, score, slidingWindow, fileName, modelName, plotRange=None):
 7 |     grader = metricor()
 8 |     
 9 |     R_AUC, R_AP, R_fpr, R_tpr, R_prec = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=True) #
10 |     
11 |     L, fpr, tpr= grader.metric_new(label, score, plot_ROC=True)
12 |     precision, recall, AP = grader.metric_PR(label, score)
13 |     
14 |     range_anomaly = grader.range_convers_new(label)
15 |     # print(range_anomaly)
16 |     
17 |     # max_length = min(len(score),len(data), 20000)
18 |     max_length = len(score)
19 | 
20 |     if plotRange==None:
21 |         plotRange = [0,max_length]
22 |     
23 |     fig3 = plt.figure(figsize=(12, 10), constrained_layout=True)
24 |     gs = fig3.add_gridspec(3, 4)
25 |     
26 |     
27 |     f3_ax1 = fig3.add_subplot(gs[0, :-1])
28 |     plt.tick_params(labelbottom=False)
29 | 
30 |     plt.plot(data[:max_length],'k')
31 |     for r in range_anomaly:
32 |         if r[0]==r[1]:
33 |             plt.plot(r[0],data[r[0]],'r.')
34 |         else:
35 |             plt.plot(range(r[0],r[1]+1),data[range(r[0],r[1]+1)],'r')
36 |     # plt.xlim([0,max_length])
37 |     plt.xlim(plotRange)
38 |     
39 |         
40 |     # L = [auc, precision, recall, f, Rrecall, ExistenceReward, 
41 |     #       OverlapReward, Rprecision, Rf, precision_at_k]
42 |     f3_ax2 = fig3.add_subplot(gs[1, :-1])
43 |     # plt.tick_params(labelbottom=False)
44 |     L1 = [ '%.2f' % elem for elem in L]
45 |     plt.plot(score[:max_length])
46 |     plt.hlines(np.mean(score)+3*np.std(score),0,max_length,linestyles='--',color='red')
47 |     plt.ylabel('score')
48 |     # plt.xlim([0,max_length])
49 |     plt.xlim(plotRange)
50 |     
51 |     
52 |     #plot the data
53 |     f3_ax3 = fig3.add_subplot(gs[2, :-1])
54 |     index = ( label + 2*(score > (np.mean(score)+3*np.std(score))))
55 |     cf = lambda x: 'k' if x==0 else ('r' if x == 1 else ('g' if x == 2 else 'b') )
56 |     cf = np.vectorize(cf)
57 |     
58 |     color = cf(index[:max_length])
59 |     black_patch = mpatches.Patch(color = 'black', label = 'TN')
60 |     red_patch = mpatches.Patch(color = 'red', label = 'FN')
61 |     green_patch = mpatches.Patch(color = 'green', label = 'FP')
62 |     blue_patch = mpatches.Patch(color = 'blue', label = 'TP')
63 |     plt.scatter(np.arange(max_length), data[:max_length], c=color, marker='.')
64 |     plt.legend(handles = [black_patch, red_patch, green_patch, blue_patch], loc= 'best')
65 |     # plt.xlim([0,max_length])
66 |     plt.xlim(plotRange)
67 |     
68 |     
69 |     f3_ax4 = fig3.add_subplot(gs[0, -1])
70 |     plt.plot(fpr, tpr)
71 |     # plt.plot(R_fpr,R_tpr)
72 |     # plt.title('R_AUC='+str(round(R_AUC,3)))
73 |     plt.xlabel('FPR')
74 |     plt.ylabel('TPR')
75 |     # plt.legend(['ROC','Range-ROC'])
76 |     
77 |     # f3_ax5 = fig3.add_subplot(gs[1, -1])
78 |     # plt.plot(recall, precision)
79 |     # plt.plot(R_tpr[:-1],R_prec)   # I add (1,1) to (TPR, FPR) at the end !!!
80 |     # plt.xlabel('Recall')
81 |     # plt.ylabel('Precision')
82 |     # plt.legend(['PR','Range-PR'])
83 | 
84 |     # print('AUC=', L1[0])
85 |     # print('F=', L1[3])
86 | 
87 |     plt.suptitle(fileName + '    window='+str(slidingWindow) +'   '+ modelName
88 |     +'\nAUC='+L1[0]+'     R_AUC='+str(round(R_AUC,2))+'     Precision='+L1[1]+ '     Recall='+L1[2]+'     F='+L1[3]
89 |     + '     ExistenceReward='+L1[5]+'   OverlapReward='+L1[6]
90 |     +'\nAP='+str(round(AP,2))+'     R_AP='+str(round(R_AP,2))+'     Precision@k='+L1[9]+'     Rprecision='+L1[7] + '     Rrecall='+L1[4] +'    Rf='+L1[8]
91 |     )
92 |     
93 | def printResult(data, label, score, slidingWindow, fileName, modelName):
94 |     grader = metricor()
95 |     R_AUC = grader.RangeAUC(labels=label, score=score, window=slidingWindow, plot_ROC=False) #
96 |     L= grader.metric_new(label, score, plot_ROC=False)
97 |     L.append(R_AUC)
98 |     return L
99 |     


--------------------------------------------------------------------------------
/TSB_AD/main.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Qinghua Liu <liu.11085@osu.edu>
 3 | # License: Apache-2.0 License
 4 | 
 5 | import pandas as pd
 6 | import torch
 7 | import random, argparse
 8 | from sklearn.preprocessing import MinMaxScaler
 9 | from .evaluation.metrics import get_metrics
10 | from .utils.slidingWindows import find_length_rank
11 | from .model_wrapper import *
12 | from .HP_list import Optimal_Uni_algo_HP_dict
13 | 
14 | # seeding
15 | seed = 2024
16 | torch.manual_seed(seed)
17 | torch.cuda.manual_seed(seed)
18 | torch.cuda.manual_seed_all(seed)
19 | np.random.seed(seed)
20 | random.seed(seed)
21 | torch.backends.cudnn.benchmark = False
22 | torch.backends.cudnn.deterministic = True
23 | 
24 | print("CUDA Available: ", torch.cuda.is_available())
25 | print("cuDNN Version: ", torch.backends.cudnn.version())
26 | 
27 | 
28 | if __name__ == '__main__':
29 | 
30 |     ## ArgumentParser
31 |     parser = argparse.ArgumentParser(description='Running TSB-AD')
32 |     parser.add_argument('--filename', type=str, default='001_NAB_id_1_Facility_tr_1007_1st_2014.csv')
33 |     parser.add_argument('--data_direc', type=str, default='Datasets/TSB-AD-U/')
34 |     parser.add_argument('--save', type=bool, default=False)
35 |     parser.add_argument('--AD_Name', type=str, default='IForest')
36 |     args = parser.parse_args()
37 | 
38 |     df = pd.read_csv(args.data_direc + args.filename).dropna()
39 |     data = df.iloc[:, 0:-1].values.astype(float)
40 |     label = df['Label'].astype(int).to_numpy()
41 | 
42 |     slidingWindow = find_length_rank(data, rank=1)
43 |     train_index = args.filename.split('.')[0].split('_')[-3]
44 |     data_train = data[:int(train_index), :]
45 |     Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name]
46 | 
47 |     if args.AD_Name in Semisupervise_AD_Pool:
48 |         output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP)
49 |     elif args.AD_Name in Unsupervise_AD_Pool:
50 |         output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP)
51 |     else:
52 |         raise Exception(f"{args.AD_Name} is not defined")
53 | 
54 |     if isinstance(output, np.ndarray):
55 |         output = MinMaxScaler(feature_range=(0,1)).fit_transform(output.reshape(-1,1)).ravel()
56 |         evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow, pred=output > (np.mean(output)+3*np.std(output)))
57 |         print('Evaluation Result: ', evaluation_result)
58 |     else:
59 |         print(f'At {args.filename}: '+output)
60 | 
61 | 


--------------------------------------------------------------------------------
/TSB_AD/models/COF.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | This function is adapted from [pyod] by [yzhao062]
  4 | Original source: [https://github.com/yzhao062/pyod]
  5 | """
  6 | 
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import warnings
 11 | from operator import itemgetter
 12 | 
 13 | import numpy as np
 14 | from scipy.spatial import distance_matrix
 15 | from scipy.spatial import minkowski_distance
 16 | from sklearn.utils import check_array
 17 | 
 18 | from .base import BaseDetector
 19 | from ..utils.utility import check_parameter
 20 | 
 21 | 
 22 | class COF(BaseDetector):
 23 |     """Connectivity-Based Outlier Factor (COF) COF uses the ratio of average
 24 |     chaining distance of data point and the average of average chaining
 25 |     distance of k nearest neighbor of the data point, as the outlier score
 26 |     for observations.
 27 | 
 28 |     See :cite:`tang2002enhancing` for details.
 29 |     
 30 |     Two version of COF are supported:
 31 | 
 32 |     - Fast COF: computes the entire pairwise distance matrix at the cost of a
 33 |       O(n^2) memory requirement.
 34 |     - Memory efficient COF: calculates pairwise distances incrementally.
 35 |       Use this implementation when it is not feasible to fit the n-by-n 
 36 |       distance in memory. This leads to a linear overhead because many 
 37 |       distances will have to be recalculated.
 38 | 
 39 |     Parameters
 40 |     ----------
 41 |     contamination : float in (0., 0.5), optional (default=0.1)
 42 |         The amount of contamination of the data set, i.e.
 43 |         the proportion of outliers in the data set. Used when fitting to
 44 |         define the threshold on the decision function.
 45 | 
 46 |     n_neighbors : int, optional (default=20)
 47 |         Number of neighbors to use by default for k neighbors queries.
 48 |         Note that n_neighbors should be less than the number of samples.
 49 |         If n_neighbors is larger than the number of samples provided,
 50 |         all samples will be used.
 51 |         
 52 |     method : string, optional (default='fast')
 53 |         Valid values for method are:
 54 |             
 55 |         - 'fast' Fast COF, computes the full pairwise distance matrix up front.
 56 |         - 'memory' Memory-efficient COF, computes pairwise distances only when
 57 |           needed at the cost of computational speed.
 58 | 
 59 |     Attributes
 60 |     ----------
 61 |     decision_scores_ : numpy array of shape (n_samples,)
 62 |         The outlier scores of the training data.
 63 |         The higher, the more abnormal. Outliers tend to have higher
 64 |         scores. This value is available once the detector is
 65 |         fitted.
 66 | 
 67 |     threshold_ : float
 68 |         The threshold is based on ``contamination``. It is the
 69 |         ``n_samples * contamination`` most abnormal samples in
 70 |         ``decision_scores_``. The threshold is calculated for generating
 71 |         binary outlier labels.
 72 | 
 73 |     labels_ : int, either 0 or 1
 74 |         The binary labels of the training data. 0 stands for inliers
 75 |         and 1 for outliers/anomalies. It is generated by applying
 76 |         ``threshold_`` on ``decision_scores_``.
 77 | 
 78 |     n_neighbors_: int
 79 |         Number of neighbors to use by default for k neighbors queries.
 80 |     """
 81 | 
 82 |     def __init__(self, contamination=0.1, n_neighbors=20, method="fast"):
 83 |         super(COF, self).__init__(contamination=contamination)
 84 |         if isinstance(n_neighbors, int):
 85 |             check_parameter(n_neighbors, low=1, param_name='n_neighbors')
 86 |         else:
 87 |             raise TypeError(
 88 |                 "n_neighbors should be int. Got %s" % type(n_neighbors))
 89 |         self.n_neighbors = n_neighbors
 90 |         self.method = method
 91 | 
 92 |     def fit(self, X, y=None):
 93 |         """Fit detector. y is ignored in unsupervised methods.
 94 | 
 95 |         Parameters
 96 |         ----------
 97 |         X : numpy array of shape (n_samples, n_features)
 98 |             The input samples.
 99 | 
100 |         y : Ignored
101 |             Not used, present for API consistency by convention.
102 | 
103 |         Returns
104 |         -------
105 |         self : object
106 |             Fitted estimator.
107 |         """
108 |         X = check_array(X)
109 |         self.n_train_ = X.shape[0]
110 |         self.n_neighbors_ = self.n_neighbors
111 | 
112 |         if self.n_neighbors_ >= self.n_train_:
113 |             self.n_neighbors_ = self.n_train_ - 1
114 |             warnings.warn(
115 |                 "n_neighbors is set to the number of training points "
116 |                 "minus 1: {0}".format(self.n_neighbors_))
117 | 
118 |             check_parameter(self.n_neighbors_, 1, self.n_train_,
119 |                             include_left=True, include_right=True)
120 | 
121 |         self._set_n_classes(y)
122 |         self.decision_scores_ = self.decision_function(X)
123 |         self._process_decision_scores()
124 | 
125 |         return self
126 | 
127 |     def decision_function(self, X):
128 |         """Predict raw anomaly score of X using the fitted detector.
129 |         The anomaly score of an input sample is computed based on different
130 |         detector algorithms. For consistency, outliers are assigned with
131 |         larger anomaly scores.
132 | 
133 |         Parameters
134 |         ----------
135 |         X : numpy array of shape (n_samples, n_features)
136 |             The training input samples. Sparse matrices are accepted only
137 |             if they are supported by the base estimator.
138 | 
139 |         Returns
140 |         -------
141 |         anomaly_scores : numpy array of shape (n_samples,)
142 |             The anomaly score of the input samples.
143 |         """
144 |         if self.method.lower() == "fast":
145 |             return self._cof_fast(X)
146 |         elif self.method.lower() == "memory":
147 |             return self._cof_memory(X)
148 |         else:
149 |             raise ValueError("method should be set to either \'fast\' or \'memory\'. Got %s" % self.method)
150 | 
151 |     def _cof_memory(self, X):
152 |         """
153 |         Connectivity-Based Outlier Factor (COF) Algorithm
154 |         This function is called internally to calculate the
155 |         Connectivity-Based Outlier Factor (COF) as an outlier
156 |         score for observations.
157 |         This function uses a memory efficient implementation at the cost of 
158 |         speed.
159 |         :return: numpy array containing COF scores for observations.
160 |                  The greater the COF, the greater the outlierness.
161 |         """
162 |         #dist_matrix = np.array(distance_matrix(X, X))
163 |         sbn_path_index = np.zeros((X.shape[0],self.n_neighbors_), dtype=np.int64)
164 |         ac_dist, cof_ = np.zeros((X.shape[0])), np.zeros((X.shape[0]))
165 |         for i in range(X.shape[0]):
166 |             #sbn_path = np.argsort(dist_matrix[i])
167 |             sbn_path = np.argsort(minkowski_distance(X[i,:],X,p=2))
168 |             sbn_path_index[i,:] = sbn_path[1: self.n_neighbors_ + 1]
169 |             cost_desc = np.zeros((self.n_neighbors_))
170 |             for j in range(self.n_neighbors_):
171 |                 #cost_desc.append(
172 |                 #    np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1]))
173 |                 cost_desc[j] = np.min(minkowski_distance(X[sbn_path[j + 1]],X,p=2)[sbn_path][:j + 1])
174 |             acd = np.zeros((self.n_neighbors_))
175 |             for _h, cost_ in enumerate(cost_desc):
176 |                 neighbor_add1 = self.n_neighbors_ + 1
177 |                 acd[_h] = ((2. * (neighbor_add1 - (_h + 1))) / (neighbor_add1 * self.n_neighbors_)) * cost_
178 |             ac_dist[i] = np.sum(acd)
179 |         for _g in range(X.shape[0]):
180 |             cof_[_g] = (ac_dist[_g] * self.n_neighbors_) / np.sum(ac_dist[sbn_path_index[_g]])
181 |         return np.nan_to_num(cof_)
182 |     
183 |     def _cof_fast(self, X):
184 |         """
185 |         Connectivity-Based Outlier Factor (COF) Algorithm
186 |         This function is called internally to calculate the
187 |         Connectivity-Based Outlier Factor (COF) as an outlier
188 |         score for observations.
189 |         This function uses a fast implementation at the cost of memory.
190 |         :return: numpy array containing COF scores for observations.
191 |                  The greater the COF, the greater the outlierness.
192 |         """
193 |         dist_matrix = np.array(distance_matrix(X, X))
194 |         sbn_path_index, ac_dist, cof_ = [], [], []
195 |         for i in range(X.shape[0]):
196 |             sbn_path = np.argsort(dist_matrix[i])
197 |             sbn_path_index.append(sbn_path[1: self.n_neighbors_ + 1])
198 |             cost_desc = []
199 |             for j in range(self.n_neighbors_):
200 |                 cost_desc.append(
201 |                     np.min(dist_matrix[sbn_path[j + 1]][sbn_path][:j + 1]))
202 |             acd = []
203 |             for _h, cost_ in enumerate(cost_desc):
204 |                 neighbor_add1 = self.n_neighbors_ + 1
205 |                 acd.append(((2. * (neighbor_add1 - (_h + 1))) / (
206 |                         neighbor_add1 * self.n_neighbors_)) * cost_)
207 |             ac_dist.append(np.sum(acd))
208 |         for _g in range(X.shape[0]):
209 |             cof_.append((ac_dist[_g] * self.n_neighbors_) /
210 |                         np.sum(itemgetter(*sbn_path_index[_g])(ac_dist)))
211 |         return np.nan_to_num(cof_)


--------------------------------------------------------------------------------
/TSB_AD/models/COPOD.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [pyod] by [yzhao062]
  3 | Original source: [https://github.com/yzhao062/pyod]
  4 | """
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | import warnings
  9 | 
 10 | import numpy as np
 11 | 
 12 | from joblib import Parallel, delayed
 13 | from scipy.stats import skew as skew_sp
 14 | from sklearn.utils.validation import check_is_fitted
 15 | from sklearn.utils import check_array
 16 | 
 17 | from .base import BaseDetector
 18 | from ..utils.stat_models import column_ecdf
 19 | from ..utils.utility import _partition_estimators
 20 | from ..utils.utility import zscore
 21 | 
 22 | def skew(X, axis=0):
 23 |     return np.nan_to_num(skew_sp(X, axis=axis))
 24 | 
 25 | def _parallel_ecdf(n_dims, X):
 26 |     """Private method to calculate ecdf in parallel.    
 27 |     Parameters
 28 |     ----------
 29 |     n_dims : int
 30 |         The number of dimensions of the current input matrix
 31 | 
 32 |     X : numpy array
 33 |         The subarray for building the ECDF
 34 | 
 35 |     Returns
 36 |     -------
 37 |     U_l_mat : numpy array
 38 |         ECDF subarray.
 39 | 
 40 |     U_r_mat : numpy array
 41 |         ECDF subarray.
 42 |     """
 43 |     U_l_mat = np.zeros([X.shape[0], n_dims])
 44 |     U_r_mat = np.zeros([X.shape[0], n_dims])
 45 | 
 46 |     for i in range(n_dims):
 47 |         U_l_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1])
 48 |         U_r_mat[:, i: i + 1] = column_ecdf(X[:, i: i + 1] * -1)
 49 |     return U_l_mat, U_r_mat
 50 | 
 51 | class COPOD(BaseDetector):
 52 |     """COPOD class for Copula Based Outlier Detector.
 53 |     COPOD is a parameter-free, highly interpretable outlier detection algorithm
 54 |     based on empirical copula models.
 55 |     See :cite:`li2020copod` for details.
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     contamination : float in (0., 0.5), optional (default=0.1)
 60 |         The amount of contamination of the data set, i.e.
 61 |         the proportion of outliers in the data set. Used when fitting to
 62 |         define the threshold on the decision function.
 63 |         
 64 |     n_jobs : optional (default=1)
 65 |         The number of jobs to run in parallel for both `fit` and
 66 |         `predict`. If -1, then the number of jobs is set to the
 67 |         number of cores.
 68 | 
 69 |     Attributes
 70 |     ----------
 71 |     decision_scores_ : numpy array of shape (n_samples,)
 72 |         The outlier scores of the training data.
 73 |         The higher, the more abnormal. Outliers tend to have higher
 74 |         scores. This value is available once the detector is
 75 |         fitted.
 76 |     threshold_ : float
 77 |         The threshold is based on ``contamination``. It is the
 78 |         ``n_samples * contamination`` most abnormal samples in
 79 |         ``decision_scores_``. The threshold is calculated for generating
 80 |         binary outlier labels.
 81 |     labels_ : int, either 0 or 1
 82 |         The binary labels of the training data. 0 stands for inliers
 83 |         and 1 for outliers/anomalies. It is generated by applying
 84 |         ``threshold_`` on ``decision_scores_``.
 85 |     """
 86 | 
 87 |     def __init__(self, contamination=0.1, n_jobs=1, normalize=True):
 88 |         super(COPOD, self).__init__(contamination=contamination)
 89 | 
 90 |         #TODO: Make it parameterized for n_jobs
 91 |         self.n_jobs = n_jobs
 92 |         self.normalize = normalize
 93 | 
 94 |     def fit(self, X, y=None):
 95 |         """Fit detector. y is ignored in unsupervised methods.
 96 |         Parameters
 97 |         ----------
 98 |         X : numpy array of shape (n_samples, n_features)
 99 |             The input samples.
100 |         y : Ignored
101 |             Not used, present for API consistency by convention.
102 |         Returns
103 |         -------
104 |         self : object
105 |             Fitted estimator.
106 |         """
107 |         X = check_array(X)
108 |         if self.normalize: X = zscore(X, axis=1, ddof=1)
109 | 
110 |         self._set_n_classes(y)
111 |         self.decision_scores_ = self.decision_function(X)
112 |         self.X_train = X
113 |         self._process_decision_scores()
114 |         return self
115 | 
116 |     def decision_function(self, X):
117 |         """Predict raw anomaly score of X using the fitted detector.
118 |          For consistency, outliers are assigned with larger anomaly scores.
119 |         Parameters
120 |         ----------
121 |         X : numpy array of shape (n_samples, n_features)
122 |             The training input samples. Sparse matrices are accepted only
123 |             if they are supported by the base estimator.
124 |         Returns
125 |         -------
126 |         anomaly_scores : numpy array of shape (n_samples,)
127 |             The anomaly score of the input samples.
128 |         """
129 |         # use multi-thread execution
130 |         if self.n_jobs != 1:
131 |             return self._decision_function_parallel(X)
132 |         if hasattr(self, 'X_train'):
133 |             original_size = X.shape[0]
134 |             X = np.concatenate((self.X_train, X), axis=0)
135 |         self.U_l = -1 * np.log(column_ecdf(X))
136 |         self.U_r = -1 * np.log(column_ecdf(-X))
137 | 
138 |         skewness = np.sign(skew(X, axis=0))
139 |         self.U_skew = self.U_l * -1 * np.sign(
140 |             skewness - 1) + self.U_r * np.sign(skewness + 1)
141 |         self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2)
142 |         if hasattr(self, 'X_train'):
143 |             decision_scores_ = self.O.sum(axis=1)[-original_size:]
144 |         else:
145 |             decision_scores_ = self.O.sum(axis=1)
146 |         return decision_scores_.ravel()
147 | 
148 |     def _decision_function_parallel(self, X):
149 |         """Predict raw anomaly score of X using the fitted detector.
150 |          For consistency, outliers are assigned with larger anomaly scores.
151 |         Parameters
152 |         ----------
153 |         X : numpy array of shape (n_samples, n_features)
154 |             The training input samples. Sparse matrices are accepted only
155 |             if they are supported by the base estimator.
156 |         Returns
157 |         -------
158 |         anomaly_scores : numpy array of shape (n_samples,)
159 |             The anomaly score of the input samples.
160 |         """
161 |         if hasattr(self, 'X_train'):
162 |             original_size = X.shape[0]
163 |             X = np.concatenate((self.X_train, X), axis=0)
164 | 
165 |         n_samples, n_features = X.shape[0], X.shape[1]
166 | 
167 |         if n_features < 2:
168 |             raise ValueError(
169 |                 'n_jobs should not be used on one dimensional dataset')
170 | 
171 |         if n_features <= self.n_jobs:
172 |             self.n_jobs = n_features
173 |             warnings.warn("n_features <= n_jobs; setting them equal instead.")
174 | 
175 |         n_jobs, n_dims_list, starts = _partition_estimators(n_features,
176 |                                                             self.n_jobs)
177 | 
178 |         all_results = Parallel(n_jobs=n_jobs, max_nbytes=None,
179 |                                verbose=True)(
180 |             delayed(_parallel_ecdf)(
181 |                 n_dims_list[i],
182 |                 X[:, starts[i]:starts[i + 1]],
183 |             )
184 |             for i in range(n_jobs))
185 | 
186 |         # recover the results
187 |         self.U_l = np.zeros([n_samples, n_features])
188 |         self.U_r = np.zeros([n_samples, n_features])
189 | 
190 |         for i in range(n_jobs):
191 |             self.U_l[:, starts[i]:starts[i + 1]] = all_results[i][0]
192 |             self.U_r[:, starts[i]:starts[i + 1]] = all_results[i][1]
193 | 
194 |         self.U_l = -1 * np.log(self.U_l)
195 |         self.U_r = -1 * np.log(self.U_r)
196 | 
197 |         skewness = np.sign(skew(X, axis=0))
198 |         self.U_skew = self.U_l * -1 * np.sign(
199 |             skewness - 1) + self.U_r * np.sign(skewness + 1)
200 |         self.O = np.maximum(self.U_skew, np.add(self.U_l, self.U_r) / 2)
201 |         if hasattr(self, 'X_train'):
202 |             decision_scores_ = self.O.sum(axis=1)[-original_size:]
203 |         else:
204 |             decision_scores_ = self.O.sum(axis=1)
205 |         return decision_scores_.ravel()


--------------------------------------------------------------------------------
/TSB_AD/models/Chronos.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This function is adapted from [chronos-forecasting] by [lostella et al.]
 3 | Original source: [https://github.com/amazon-science/chronos-forecasting]
 4 | """
 5 | 
 6 | from autogluon.timeseries import TimeSeriesPredictor
 7 | from sklearn.preprocessing import MinMaxScaler
 8 | import numpy as np
 9 | import pandas as pd
10 | import tempfile
11 | 
12 | from .base import BaseDetector
13 | 
14 | 
15 | class Chronos(BaseDetector):
16 |     def __init__(self, 
17 |                  win_size=100,
18 |                  model_size = 'base',  # [tiny, small, base]
19 |                  prediction_length=1, 
20 |                  input_c=1, 
21 |                  batch_size=128):
22 | 
23 |         self.model_name = 'Chronos'
24 |         self.model_size = model_size
25 |         self.win_size = win_size
26 |         self.prediction_length = prediction_length
27 |         self.input_c = input_c
28 |         self.batch_size = batch_size
29 |         self.score_list = []
30 | 
31 |     def fit(self, data):
32 | 
33 |         for channel in range(self.input_c):
34 |             
35 |             data_channel = data[:, channel].reshape(-1, 1)
36 |             data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length)
37 |             # print('data_win: ', data_win.shape)         # (2330, 100)
38 |             # print('data_target: ', data_target.shape)   # (2330, 1)
39 | 
40 |             train_data = []
41 |             count = 0
42 |             for id in range(data_win.shape[0]):
43 |                 for tt in range(data_win.shape[1]):
44 |                     train_data.append([id, count, data_win[id, tt]])
45 |                     count += 1
46 |             train_data = pd.DataFrame(train_data, columns=['item_id', 'timestamp', 'target'])
47 | 
48 |             with tempfile.TemporaryDirectory() as temp_dir:
49 | 
50 |                 predictor = TimeSeriesPredictor(prediction_length=self.prediction_length, path=temp_dir).fit(
51 |                         train_data, 
52 |                         hyperparameters={
53 |                         "Chronos": {
54 |                         "model_path": self.model_size,   # base
55 |                         "device": "cuda",
56 |                         "batch_size": self.batch_size}},
57 |                         skip_model_selection=True,
58 |                         verbosity=0)
59 | 
60 |                 predictions = predictor.predict(train_data)['mean'].to_numpy().reshape(-1, self.prediction_length)
61 |                 print('predictions: ', predictions.shape)
62 | 
63 |                 ### using mse as the anomaly score
64 |                 scores = (data_target.squeeze() - predictions.squeeze()) ** 2
65 |                 self.score_list.append(scores)
66 | 
67 |         scores_merge = np.mean(np.array(self.score_list), axis=0)
68 |         # print('scores_merge: ', scores_merge.shape)
69 | 
70 |         padded_decision_scores = np.zeros(len(data))
71 |         padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0]
72 |         padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge
73 | 
74 |         self.decision_scores_ = padded_decision_scores
75 | 
76 | 
77 |     def decision_function(self, X):
78 |         """
79 |         Not used, present for API consistency by convention.
80 |         """        
81 |         pass
82 | 
83 |     def create_dataset(self, X, slidingWindow, predict_time_steps=1):
84 |         Xs, ys = [], []
85 |         for i in range(len(X) - slidingWindow - predict_time_steps+1):
86 |             
87 |             tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
88 |             # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
89 |             
90 |             x = tmp[:slidingWindow]
91 |             y = tmp[slidingWindow:]
92 |             Xs.append(x)
93 |             ys.append(y)
94 |         return np.array(Xs), np.array(ys)


--------------------------------------------------------------------------------
/TSB_AD/models/FFT.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
  3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms]
  4 | """
  5 | 
  6 | import numpy as np
  7 | from dataclasses import dataclass
  8 | from TSB_AD.models.base import BaseDetector
  9 | from TSB_AD.utils.utility import zscore
 10 | 
 11 | class FFT(BaseDetector):
 12 | 
 13 |     def __init__(self, ifft_parameters=5, local_neighbor_window=21, local_outlier_threshold=0.6, max_region_size=50, max_sign_change_distance=10, normalize=True):
 14 |         super().__init__()
 15 | 
 16 |         self.ifft_parameters = ifft_parameters
 17 |         self.local_neighbor_window = local_neighbor_window
 18 |         self.local_outlier_threshold = local_outlier_threshold
 19 |         self.max_region_size = max_region_size
 20 |         self.max_sign_change_distance = max_sign_change_distance
 21 |         self.normalize = normalize
 22 |         self.decision_scores_ = None
 23 | 
 24 |     def fit(self, X, y=None):
 25 |         """Fit detector. y is ignored in unsupervised methods."""
 26 |         n_samples, n_features = X.shape
 27 |         if self.normalize: 
 28 |             if n_features == 1:
 29 |                 X = zscore(X, axis=0, ddof=0)
 30 |             else:
 31 |                 X = zscore(X, axis=1, ddof=1)
 32 |         self.data = X
 33 |         self.decision_scores_ = self.detect_anomalies()  
 34 |         return self
 35 | 
 36 |     def decision_function(self, X):
 37 |         """Predict raw anomaly score of X using the fitted detector."""
 38 |         n_samples, n_features = X.shape
 39 |         decision_scores_ = np.zeros(n_samples)
 40 |         self.data = X
 41 |         local_outliers = self.calculate_local_outliers()
 42 |         if not local_outliers:
 43 |             print("No local outliers detected.")
 44 |             return np.zeros_like(self.data)
 45 | 
 46 |         regions = self.calculate_region_outliers(local_outliers)
 47 |         anomaly_scores = np.zeros_like(self.data)
 48 |         for region in regions:
 49 |             start_index = local_outliers[region.start_idx].index
 50 |             end_index = local_outliers[region.end_idx].index
 51 |             anomaly_scores[start_index:end_index + 1] = region.score
 52 | 
 53 |         decision_scores_ = anomaly_scores
 54 |         return decision_scores_
 55 | 
 56 |     @staticmethod
 57 |     def reduce_parameters(f: np.ndarray, k: int) -> np.ndarray:
 58 |         transformed = f.copy()
 59 |         transformed[k:] = 0
 60 |         return transformed
 61 | 
 62 |     def calculate_local_outliers(self):
 63 |         n = len(self.data)
 64 |         k = max(min(self.ifft_parameters, n), 1)
 65 |         y = self.reduce_parameters(np.fft.fft(self.data), k)
 66 |         f2 = np.real(np.fft.ifft(y))
 67 | 
 68 |         so = np.abs(f2 - self.data)
 69 |         mso = np.mean(so)
 70 |         neighbor_c = self.local_neighbor_window // 2
 71 | 
 72 |         scores = []
 73 |         score_idxs = []
 74 |         for i in range(n):
 75 |             if so[i] > mso:
 76 |                 nav = np.mean(self.data[max(i - neighbor_c, 0):min(i + neighbor_c + 1, n)])
 77 |                 scores.append(self.data[i] - nav)
 78 |                 score_idxs.append(i)
 79 | 
 80 |         if not scores:
 81 |             return []
 82 | 
 83 |         ms = np.mean(scores)
 84 |         sds = np.std(scores) + 1e-6  
 85 |         z_scores = (np.array(scores) - ms) / sds
 86 | 
 87 |         return [self.LocalOutlier(index=score_idxs[i], z_score=z_scores[i])
 88 |                 for i in range(len(scores)) if abs(z_scores[i]) > self.local_outlier_threshold]
 89 | 
 90 |     def calculate_region_outliers(self, local_outliers):
 91 |         def distance(a: int, b: int) -> int:
 92 |             return abs(local_outliers[b].index - local_outliers[a].index)
 93 | 
 94 |         regions = []
 95 |         i = 0
 96 |         n_l = len(local_outliers) - 1
 97 |         while i < n_l:
 98 |             start_idx = i
 99 |             while i < n_l and distance(i, i + 1) <= self.max_sign_change_distance:
100 |                 i += 1
101 |             end_idx = i
102 |             if end_idx > start_idx:
103 |                 score = np.mean([abs(local_outliers[j].z_score) for j in range(start_idx, end_idx + 1)])
104 |                 regions.append(self.RegionOutlier(start_idx=start_idx, end_idx=end_idx, score=score))
105 |             i += 1
106 | 
107 |         return regions
108 | 
109 |     @dataclass
110 |     class LocalOutlier:
111 |         index: int
112 |         z_score: float
113 | 
114 |         @property
115 |         def sign(self) -> int:
116 |             return np.sign(self.z_score)
117 | 
118 |     @dataclass
119 |     class RegionOutlier:
120 |         start_idx: int
121 |         end_idx: int
122 |         score: float
123 | 
124 |     def detect_anomalies(self):
125 |         """Detect anomalies by combining local and regional outliers."""
126 |         local_outliers = self.calculate_local_outliers()
127 |         if not local_outliers:
128 |             print("No local outliers detected.")
129 |             return np.zeros_like(self.data)
130 | 
131 |         regions = self.calculate_region_outliers(local_outliers)
132 |         anomaly_scores = np.zeros_like(self.data)
133 |         for region in regions:
134 |             start_index = local_outliers[region.start_idx].index
135 |             end_index = local_outliers[region.end_idx].index
136 |             anomaly_scores[start_index:end_index + 1] = region.score
137 | 
138 |         return anomaly_scores


--------------------------------------------------------------------------------
/TSB_AD/models/FITS.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [FITS] by [VEWOXIC]
  3 | Original source: [https://github.com/VEWOXIC/FITS]
  4 | """
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import numpy as np
 10 | from typing import Dict
 11 | import torchinfo
 12 | import tqdm
 13 | import numpy as np
 14 | import torch
 15 | from torch import nn, optim
 16 | from torch.utils.data import DataLoader
 17 | import math
 18 | 
 19 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
 20 | from ..utils.dataset import ReconstructDataset    
 21 | 
 22 | class Model(nn.Module):
 23 | 
 24 |     # FITS: Frequency Interpolation Time Series Forecasting
 25 | 
 26 |     def __init__(self, seq_len, pred_len, individual, enc_in, cut_freq):
 27 |         super(Model, self).__init__()
 28 |         self.seq_len = seq_len
 29 |         self.pred_len = pred_len
 30 |         self.individual = individual
 31 |         self.channels = enc_in
 32 | 
 33 |         self.dominance_freq = cut_freq # 720/24
 34 |         self.length_ratio = (self.seq_len + self.pred_len)/self.seq_len
 35 | 
 36 |         if self.individual:
 37 |             self.freq_upsampler = nn.ModuleList()
 38 |             for i in range(self.channels):
 39 |                 self.freq_upsampler.append(nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat))
 40 | 
 41 |         else:
 42 |             self.freq_upsampler = nn.Linear(self.dominance_freq, int(self.dominance_freq*self.length_ratio)).to(torch.cfloat) # complex layer for frequency upcampling]
 43 |         # configs.pred_len=configs.seq_len+configs.pred_len
 44 |         # #self.Dlinear=DLinear.Model(configs)
 45 |         # configs.pred_len=self.pred_len
 46 | 
 47 | 
 48 |     def forward(self, x):
 49 |         # RIN
 50 |         x_mean = torch.mean(x, dim=1, keepdim=True)
 51 |         x = x - x_mean
 52 |         x_var=torch.var(x, dim=1, keepdim=True)+ 1e-5
 53 |         # print(x_var)
 54 |         x = x / torch.sqrt(x_var)
 55 | 
 56 |         low_specx = torch.fft.rfft(x, dim=1)
 57 |         low_specx[:,self.dominance_freq:]=0 # LPF
 58 |         low_specx = low_specx[:,0:self.dominance_freq,:] # LPF
 59 |         # print(low_specx.permute(0,2,1))
 60 |         if self.individual:
 61 |             low_specxy_ = torch.zeros([low_specx.size(0),int(self.dominance_freq*self.length_ratio),low_specx.size(2)],dtype=low_specx.dtype).to(low_specx.device)
 62 |             for i in range(self.channels):
 63 |                 low_specxy_[:,:,i]=self.freq_upsampler[i](low_specx[:,:,i].permute(0,1)).permute(0,1)
 64 |         else:
 65 |             low_specxy_ = self.freq_upsampler(low_specx.permute(0,2,1)).permute(0,2,1)
 66 |         # print(low_specxy_)
 67 |         low_specxy = torch.zeros([low_specxy_.size(0),int((self.seq_len+self.pred_len)/2+1),low_specxy_.size(2)],dtype=low_specxy_.dtype).to(low_specxy_.device)
 68 |         low_specxy[:,0:low_specxy_.size(1),:]=low_specxy_ # zero padding
 69 |         low_xy=torch.fft.irfft(low_specxy, dim=1)
 70 |         low_xy=low_xy * self.length_ratio # energy compemsation for the length change
 71 |         # dom_x=x-low_x
 72 |         
 73 |         # dom_xy=self.Dlinear(dom_x)
 74 |         # xy=(low_xy+dom_xy) * torch.sqrt(x_var) +x_mean # REVERSE RIN
 75 |         xy=(low_xy) * torch.sqrt(x_var) +x_mean
 76 |         return xy, low_xy* torch.sqrt(x_var)
 77 | 
 78 |     
 79 | class FITS():
 80 |     def __init__(self,
 81 |                  win_size=100,
 82 |                  DSR=4,
 83 |                  individual=True,
 84 |                  input_c=1,
 85 |                  batch_size=128,
 86 |                  cut_freq=12,
 87 |                  epochs=50,
 88 |                  lr=1e-3,
 89 |                  validation_size=0.2
 90 |                  ):
 91 |         super().__init__()
 92 |         self.__anomaly_score = None
 93 |         
 94 |         self.cuda = True
 95 |         self.device = get_gpu(self.cuda)
 96 | 
 97 |             
 98 |         self.win_size = win_size        
 99 |         self.DSR = DSR
100 |         self.individual = individual
101 |         self.input_c = input_c
102 |         self.batch_size = batch_size
103 |         self.cut_freq = cut_freq
104 |         self.validation_size = validation_size
105 | 
106 |         self.model = Model(seq_len=self.win_size//self.DSR, pred_len=self.win_size-self.win_size//self.DSR, individual=self.individual, enc_in=self.input_c, cut_freq=self.cut_freq).to(self.device)
107 | 
108 |         self.epochs = epochs
109 |         self.learning_rate = lr
110 |         self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate)
111 |         self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
112 |         self.loss = nn.MSELoss()
113 |         self.anomaly_criterion = nn.MSELoss(reduce=False)
114 |         
115 |         self.save_path = None
116 |         self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
117 |     
118 |     def fit(self, data):
119 |         
120 |         tsTrain = data[:int((1-self.validation_size)*len(data))]
121 |         tsValid = data[int((1-self.validation_size)*len(data)):]
122 | 
123 |         train_loader = DataLoader(
124 |             dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
125 |             batch_size=self.batch_size,
126 |             shuffle=True
127 |         )
128 |         
129 |         valid_loader = DataLoader(
130 |             dataset=ReconstructDataset(tsValid, window_size=self.win_size),
131 |             batch_size=self.batch_size,
132 |             shuffle=False
133 |         )
134 |         
135 |         for epoch in range(1, self.epochs + 1):
136 |             self.model.train(mode=True)
137 |             avg_loss = 0
138 |             loop = tqdm.tqdm(enumerate(train_loader),total=len(train_loader),leave=True)
139 |             for idx, (x, target) in loop:
140 | 
141 |                 x = x[:, ::self.DSR, :]
142 |                 x, target = x.to(self.device), target.to(self.device)
143 |                 self.optimizer.zero_grad()
144 |                 
145 |                 output, _ = self.model(x)
146 | 
147 |                 # print('x: ', x.shape)
148 |                 # print('target: ', target.shape)
149 |                 
150 |                 loss = self.loss(output, target)
151 |                 loss.backward()
152 | 
153 |                 self.optimizer.step()
154 |                 
155 |                 avg_loss += loss.cpu().item()
156 |                 loop.set_description(f'Training Epoch [{epoch}/{self.epochs}]')
157 |                 loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
158 |             
159 |             
160 |             self.model.eval()
161 |             avg_loss = 0
162 |             loop = tqdm.tqdm(enumerate(valid_loader),total=len(valid_loader),leave=True)
163 |             with torch.no_grad():
164 |                 for idx, (x, target) in loop:
165 | 
166 |                     x = x[:, ::self.DSR, :]
167 |                     x, target = x.to(self.device), target.to(self.device)
168 |                     output, _ = self.model(x)
169 |                     loss = self.loss(output, target)
170 |                     avg_loss += loss.cpu().item()
171 |                     loop.set_description(f'Validation Epoch [{epoch}/{self.epochs}]')
172 |                     loop.set_postfix(loss=loss.item(), avg_loss=avg_loss/(idx+1))
173 |             
174 |             valid_loss = avg_loss/max(len(valid_loader), 1)
175 |             self.scheduler.step()
176 |             
177 |             self.early_stopping(valid_loss, self.model)
178 |             if self.early_stopping.early_stop:
179 |                 print("   Early stopping<<<")
180 |                 break
181 |     
182 |     def decision_function(self, data):
183 |         test_loader = DataLoader(
184 |             dataset=ReconstructDataset(data, window_size=self.win_size),
185 |             batch_size=self.batch_size,
186 |             shuffle=False
187 |         )
188 |         
189 |         self.model.eval()
190 |         scores = []
191 |         loop = tqdm.tqdm(enumerate(test_loader),total=len(test_loader),leave=True)
192 |         with torch.no_grad():
193 |             for idx, (x, target) in loop:
194 | 
195 |                 x = x[:, ::self.DSR, :]
196 |                 x, target = x.to(self.device), target.to(self.device)
197 |                 output, _ = self.model(x)
198 |                 # loss = self.loss(output, target)
199 |                 score = torch.mean(self.anomaly_criterion(output, target), dim=-1)
200 |                 scores.append(score.cpu()[:,-1])
201 | 
202 |                 loop.set_description(f'Testing: ')
203 | 
204 |         scores = torch.cat(scores, dim=0)
205 |         scores = scores.numpy().flatten()
206 | 
207 |         assert scores.ndim == 1
208 |         self.__anomaly_score = scores
209 | 
210 |         if self.__anomaly_score.shape[0] < len(data):
211 |             self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
212 |                         list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
213 |         
214 |         return self.__anomaly_score
215 | 
216 | 
217 |     def anomaly_score(self) -> np.ndarray:
218 |         return self.__anomaly_score
219 |     
220 |     def param_statistic(self, save_file):
221 |         model_stats = torchinfo.summary(self.model, (self.batch_size, self.input_len), verbose=0)
222 |         with open(save_file, 'w') as f:
223 |             f.write(str(model_stats))


--------------------------------------------------------------------------------
/TSB_AD/models/KMeansAD.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
 3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms]
 4 | """
 5 | 
 6 | from sklearn.base import BaseEstimator, OutlierMixin
 7 | from sklearn.cluster import KMeans
 8 | import numpy as np
 9 | from numpy.lib.stride_tricks import sliding_window_view
10 | from ..utils.utility import zscore
11 | 
12 | class KMeansAD(BaseEstimator, OutlierMixin):
13 |     def __init__(self, k, window_size, stride, n_jobs=1, normalize=True):
14 |         self.k = k
15 |         self.window_size = window_size
16 |         self.stride = stride
17 |         self.model = KMeans(n_clusters=k)
18 |         self.padding_length = 0
19 |         self.normalize = normalize
20 | 
21 |     def _preprocess_data(self, X: np.ndarray) -> np.ndarray:
22 |         flat_shape = (X.shape[0] - (self.window_size - 1), -1)  # in case we have a multivariate TS
23 |         slides = sliding_window_view(X, window_shape=self.window_size, axis=0).reshape(flat_shape)[::self.stride, :]
24 |         self.padding_length = X.shape[0] - (slides.shape[0] * self.stride + self.window_size - self.stride)
25 |         print(f"Required padding_length={self.padding_length}")
26 |         if self.normalize: slides = zscore(slides, axis=1, ddof=1)
27 |         return slides
28 | 
29 |     def _custom_reverse_windowing(self, scores: np.ndarray) -> np.ndarray:
30 |         print("Reversing window-based scores to point-based scores:")
31 |         print(f"Before reverse-windowing: scores.shape={scores.shape}")
32 |         # compute begin and end indices of windows
33 |         begins = np.array([i * self.stride for i in range(scores.shape[0])])
34 |         ends = begins + self.window_size
35 | 
36 |         # prepare target array
37 |         unwindowed_length = self.stride * (scores.shape[0] - 1) + self.window_size + self.padding_length
38 |         mapped = np.full(unwindowed_length, fill_value=np.nan)
39 | 
40 |         # only iterate over window intersections
41 |         indices = np.unique(np.r_[begins, ends])
42 |         for i, j in zip(indices[:-1], indices[1:]):
43 |             window_indices = np.flatnonzero((begins <= i) & (j-1 < ends))
44 |             # print(i, j, window_indices)
45 |             mapped[i:j] = np.nanmean(scores[window_indices])
46 | 
47 |         # replace untouched indices with 0 (especially for the padding at the end)
48 |         np.nan_to_num(mapped, copy=False)
49 |         print(f"After reverse-windowing: scores.shape={mapped.shape}")
50 |         return mapped
51 | 
52 |     def fit(self, X: np.ndarray, y=None, preprocess=True) -> 'KMeansAD':
53 |         if preprocess:
54 |             X = self._preprocess_data(X)
55 |         self.model.fit(X)
56 |         return self
57 | 
58 |     def predict(self, X: np.ndarray, preprocess=True) -> np.ndarray:
59 |         if preprocess:
60 |             X = self._preprocess_data(X)
61 |         clusters = self.model.predict(X)
62 |         diffs = np.linalg.norm(X - self.model.cluster_centers_[clusters], axis=1)
63 |         return self._custom_reverse_windowing(diffs)
64 | 
65 |     def fit_predict(self, X, y=None) -> np.ndarray:
66 |         X = self._preprocess_data(X)
67 |         self.fit(X, y, preprocess=False)
68 |         return self.predict(X, preprocess=False)


--------------------------------------------------------------------------------
/TSB_AD/models/Lag_Llama.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [lag-llama] by [ashok-arjun&kashif]
  3 | Original source: [https://github.com/time-series-foundation-models/lag-llama]
  4 | """
  5 | 
  6 | from itertools import islice
  7 | 
  8 | from matplotlib import pyplot as plt
  9 | import matplotlib.dates as mdates
 10 | 
 11 | import torch
 12 | from gluonts.evaluation import make_evaluation_predictions
 13 | from gluonts.dataset.pandas import PandasDataset
 14 | import pandas as pd
 15 | import numpy as np
 16 | from ..utils.torch_utility import get_gpu
 17 | 
 18 | from lag_llama.gluon.estimator import LagLlamaEstimator
 19 | 
 20 | class Lag_Llama():
 21 |     def __init__(self, 
 22 |                  win_size=96, 
 23 |                  prediction_length=1, 
 24 |                  input_c=1,
 25 |                  use_rope_scaling=False,
 26 |                  batch_size=64,
 27 |                  num_samples=100,
 28 |                  ckpt_path='lag-llama.ckpt'):
 29 | 
 30 |         self.model_name = 'Lag_Llama'
 31 |         self.context_length = win_size
 32 |         self.prediction_length = prediction_length
 33 |         self.input_c = input_c
 34 |         self.ckpt_path = ckpt_path
 35 |         self.use_rope_scaling = use_rope_scaling
 36 |         self.batch_size = batch_size
 37 |         self.num_samples = num_samples
 38 |         self.score_list = []
 39 | 
 40 |         self.cuda = True
 41 |         self.device = get_gpu(self.cuda)
 42 | 
 43 | 
 44 |     def fit(self, data):
 45 | 
 46 |         for channel in range(self.input_c):
 47 |             
 48 |             data_channel = data[:, channel].reshape(-1, 1)
 49 |             data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.context_length, predict_time_steps=self.prediction_length)
 50 |             # print('data_win: ', data_win.shape)         # (2330, 100)
 51 |             # print('data_target: ', data_target.shape)   # (2330, 1)
 52 | 
 53 |             data_win = data_win.T
 54 | 
 55 |             date_rng = pd.date_range(start='2021-01-01', periods=data_win.shape[0], freq='H')   # Dummy timestep
 56 |             df_wide = pd.DataFrame(data_win, index=date_rng)
 57 |             # Convert numerical columns to float 32 format for lag-llama
 58 |             for col in df_wide.columns:
 59 |                 # Check if column is not of string type
 60 |                 if df_wide[col].dtype != 'object' and pd.api.types.is_string_dtype(df_wide[col]) == False:
 61 |                     df_wide[col] = df_wide[col].astype('float32')
 62 | 
 63 |             # Create a PandasDataset
 64 |             ds = PandasDataset(dict(df_wide))
 65 | 
 66 |             ckpt = torch.load(self.ckpt_path, map_location=self.device) # Uses GPU since in this Colab we use a GPU.
 67 |             estimator_args = ckpt["hyper_parameters"]["model_kwargs"]
 68 | 
 69 |             rope_scaling_arguments = {
 70 |                 "type": "linear",
 71 |                 "factor": max(1.0, (self.context_length + self.prediction_length) / estimator_args["context_length"]),
 72 |             }
 73 | 
 74 |             estimator = LagLlamaEstimator(
 75 |                 ckpt_path=self.ckpt_path,
 76 |                 prediction_length=self.prediction_length,
 77 |                 context_length=self.context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length
 78 | 
 79 |                 # estimator args
 80 |                 input_size=estimator_args["input_size"],
 81 |                 n_layer=estimator_args["n_layer"],
 82 |                 n_embd_per_head=estimator_args["n_embd_per_head"],
 83 |                 n_head=estimator_args["n_head"],
 84 |                 scaling=estimator_args["scaling"],
 85 |                 time_feat=estimator_args["time_feat"],
 86 |                 rope_scaling=rope_scaling_arguments if self.use_rope_scaling else None,
 87 | 
 88 |                 batch_size=self.batch_size,
 89 |                 num_parallel_samples=100,
 90 |                 device=self.device,
 91 |             )
 92 | 
 93 |             lightning_module = estimator.create_lightning_module()
 94 |             transformation = estimator.create_transformation()
 95 |             predictor = estimator.create_predictor(transformation, lightning_module)
 96 | 
 97 |             forecast_it, ts_it = make_evaluation_predictions(
 98 |                 dataset=ds,
 99 |                 predictor=predictor,
100 |                 num_samples=self.num_samples
101 |             )
102 |             forecasts = list(forecast_it)
103 |             tss = list(ts_it)
104 | 
105 |             predictions = np.array([pred.mean for pred in forecasts])
106 | 
107 |             # print('predictions: ', predictions.shape)
108 | 
109 |             ### using mse as the anomaly score
110 |             scores = (data_target.squeeze() - predictions.squeeze()) ** 2
111 |             self.score_list.append(scores)
112 | 
113 |         scores_merge = np.mean(np.array(self.score_list), axis=0)
114 | 
115 |         padded_decision_scores = np.zeros(len(data))
116 |         padded_decision_scores[: self.context_length+self.prediction_length-1] = scores_merge[0]
117 |         padded_decision_scores[self.context_length+self.prediction_length-1 : ]=scores_merge
118 | 
119 |         self.decision_scores_ = padded_decision_scores
120 | 
121 | 
122 |     def decision_function(self, X):
123 |         """
124 |         Not used, present for API consistency by convention.
125 |         """        
126 |         pass
127 | 
128 |     def create_dataset(self, X, slidingWindow, predict_time_steps=1):
129 |         Xs, ys = [], []
130 |         for i in range(len(X) - slidingWindow - predict_time_steps+1):
131 |             
132 |             tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
133 |             # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
134 |             
135 |             x = tmp[:slidingWindow]
136 |             y = tmp[slidingWindow:]
137 |             Xs.append(x)
138 |             ys.append(y)
139 |         return np.array(Xs), np.array(ys)


--------------------------------------------------------------------------------
/TSB_AD/models/Left_STAMPi.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import logging
 3 | import math
 4 | from stumpy import stumpi
 5 | from TSB_AD.models.base import BaseDetector
 6 | from TSB_AD.utils.utility import zscore
 7 | 
 8 | class Left_STAMPi(BaseDetector):
 9 | 
10 |     def __init__(self, n_init_train=100, window_size=50, normalize=True):
11 |         super().__init__()
12 |         self.n_init_train = n_init_train
13 |         self.window_size = window_size
14 |         self.normalize = normalize
15 | 
16 |     def fit(self, X, y=None):
17 |         """Fit detector. y is ignored in unsupervised methods.
18 | 
19 |         Parameters
20 |         ----------
21 |         X : numpy array of shape (n_samples, n_features)
22 |             The input samples.
23 | 
24 |         y : Ignored
25 |             Not used, present for API consistency by convention.
26 | 
27 |         Returns
28 |         -------
29 |         self : object
30 |             Fitted estimator.
31 |         """
32 |         n_samples, n_features = X.shape
33 |         if self.normalize: 
34 |             X = zscore(X, axis=0, ddof=0)
35 | 
36 |         warmup = self.n_init_train
37 |         ws = self.window_size
38 | 
39 |         if ws > warmup:
40 |             logging.warning(f"WARN: window_size is larger than n_init_train. Adjusting to n_init_train={warmup}.")
41 |             ws = warmup
42 |         if ws < 3:
43 |             logging.warning("WARN: window_size must be at least 3. Adjusting to 3.")
44 |             ws = 3
45 | 
46 |         self.stream = stumpi(X[:warmup, 0], m=ws, egress=False)
47 |         for point in X[warmup:, 0]:
48 |             self.stream.update(point)
49 |   
50 |         self.decision_scores_ = self.stream.left_P_
51 |         self.decision_scores_[:warmup] = 0  
52 | 
53 |         return self
54 | 
55 |     def decision_function(self, X):
56 |         """Predict raw anomaly score of X using the fitted detector.
57 | 
58 |         Parameters
59 |         ----------
60 |         X : numpy array of shape (n_samples, n_features)
61 |             The training input samples.
62 | 
63 |         Returns
64 |         -------
65 |         anomaly_scores : numpy array of shape (n_samples,)
66 |             The anomaly score of the input samples.
67 |         """
68 |         n_samples = X.shape[0]
69 |         padded_scores = self.pad_anomaly_scores(self.decision_scores_, n_samples, self.window_size)
70 |         return padded_scores
71 | 
72 |     @staticmethod
73 |     def pad_anomaly_scores(scores, n_samples, window_size):
74 |         """
75 |         Pads the anomaly scores to match the length of the input time series.
76 |         Padding is symmetric, using the first and last values.
77 |         """
78 |         left_padding = [scores[0]] * math.ceil((window_size - 1) / 2)
79 |         right_padding = [scores[-1]] * ((window_size - 1) // 2)
80 |         padded_scores = np.array(left_padding + list(scores) + right_padding)
81 | 
82 |         return padded_scores[:n_samples]


--------------------------------------------------------------------------------
/TSB_AD/models/MOMENT.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [moment] by [mononitogoswami]
  3 | Original source: [https://github.com/moment-timeseries-foundation-model/moment]
  4 | """
  5 | 
  6 | from momentfm import MOMENTPipeline
  7 | from momentfm.utils.masking import Masking
  8 | from sklearn.preprocessing import MinMaxScaler
  9 | import numpy as np
 10 | import pandas as pd
 11 | import torch
 12 | from torch.utils.data import DataLoader
 13 | from tqdm import tqdm
 14 | from torch import nn
 15 | import math
 16 | 
 17 | from .base import BaseDetector
 18 | from ..utils.dataset import ReconstructDataset_Moment
 19 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
 20 | 
 21 | class MOMENT(BaseDetector):
 22 |     def __init__(self, 
 23 |                  win_size=256, 
 24 |                  input_c=1, 
 25 |                  batch_size=128,
 26 |                  epochs=2,
 27 |                  validation_size=0,
 28 |                  lr=1e-4):
 29 | 
 30 |         self.model_name = 'MOMENT'
 31 |         self.win_size = win_size
 32 |         self.input_c = input_c
 33 |         self.batch_size = batch_size
 34 |         self.anomaly_criterion = nn.MSELoss(reduce=False)
 35 |         self.epochs = epochs
 36 |         self.validation_size = validation_size
 37 |         self.lr = lr
 38 | 
 39 |         cuda = True        
 40 |         self.cuda = cuda
 41 |         self.device = get_gpu(self.cuda)
 42 | 
 43 | 
 44 |         self.model = MOMENTPipeline.from_pretrained(
 45 |             "AutonLab/MOMENT-1-base", 
 46 |             model_kwargs={"task_name": "reconstruction"}, # For anomaly detection, we will load MOMENT in `reconstruction` mode
 47 |         )
 48 |         self.model.init()
 49 |         self.model = self.model.to("cuda").float()
 50 |         # Optimize Mean Squarred Error using your favourite optimizer
 51 |         self.criterion = torch.nn.MSELoss() 
 52 |         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
 53 |         self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=5, gamma=0.75)
 54 |         self.save_path = None
 55 |         self.early_stopping = EarlyStoppingTorch(save_path=self.save_path, patience=3)
 56 | 
 57 |     def zero_shot(self, data):
 58 | 
 59 |         test_loader = DataLoader(
 60 |             dataset=ReconstructDataset_Moment(data, window_size=self.win_size),
 61 |             batch_size=self.batch_size,
 62 |             shuffle=False)
 63 | 
 64 |         trues, preds = [], []
 65 |         self.score_list = []
 66 |         with torch.no_grad():
 67 |             for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)):
 68 |                 batch_x = batch_x.to("cuda").float()
 69 |                 batch_masks = batch_masks.to("cuda")
 70 |                 batch_x = batch_x.permute(0,2,1)
 71 | 
 72 |                 # print('batch_x: ', batch_x.shape)             # [batch_size, n_channels, window_size]
 73 |                 # print('batch_masks: ', batch_masks.shape)     # [batch_size, window_size]
 74 | 
 75 |                 output = self.model(x_enc=batch_x, input_mask=batch_masks) # [batch_size, n_channels, window_size]
 76 |                 score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1]
 77 |                 self.score_list.append(score)
 78 | 
 79 |         self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1)
 80 | 
 81 |         if self.__anomaly_score.shape[0] < len(data):
 82 |             self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
 83 |                         list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
 84 |         self.decision_scores_ = self.__anomaly_score
 85 | 
 86 | 
 87 |     def fit(self, data):
 88 |         tsTrain = data[:int((1-self.validation_size)*len(data))]
 89 |         tsValid = data[int((1-self.validation_size)*len(data)):]
 90 | 
 91 |         train_loader = DataLoader(
 92 |             dataset=ReconstructDataset_Moment(tsTrain, window_size=self.win_size),
 93 |             batch_size=self.batch_size,
 94 |             shuffle=True
 95 |         )
 96 |         
 97 |         valid_loader = DataLoader(
 98 |             dataset=ReconstructDataset_Moment(tsValid, window_size=self.win_size),
 99 |             batch_size=self.batch_size,
100 |             shuffle=False
101 |         )
102 | 
103 |         mask_generator = Masking(mask_ratio=0.3) # Mask 30% of patches randomly 
104 | 
105 | 
106 |         for epoch in range(1, self.epochs + 1):
107 |             self.model.train()
108 |             for batch_x, batch_masks in tqdm(train_loader, total=len(train_loader)):
109 |                 batch_x = batch_x.to(self.device).float()
110 |                 batch_x = batch_x.permute(0,2,1)
111 |                 # print('batch_x: ', batch_x.shape)
112 | 
113 |                 original = batch_x
114 |                 n_channels = batch_x.shape[1]
115 |                 
116 |                 # Reshape to [batch_size * n_channels, 1, window_size]
117 |                 batch_x = batch_x.reshape((-1, 1, self.win_size)) 
118 |                 
119 |                 batch_masks = batch_masks.to(self.device).long()
120 |                 batch_masks = batch_masks.repeat_interleave(n_channels, axis=0)
121 |                 
122 |                 # Randomly mask some patches of data
123 |                 mask = mask_generator.generate_mask(
124 |                     x=batch_x, input_mask=batch_masks).to(self.device).long()
125 |                 
126 |                 mask = torch.nn.functional.pad(mask, (0, batch_masks.size(1) - mask.size(1)), mode='constant', value=1)
127 | 
128 |                 # Forward
129 |                 model_output = self.model(batch_x, input_mask=batch_masks, mask=mask).reconstruction
130 |                 model_output = torch.nn.functional.pad(model_output, (0, original.size(2)-model_output.size(2)), mode='replicate')
131 | 
132 |                 output = model_output.reshape(original.size(0), n_channels, self.win_size)
133 | 
134 |                 # Compute loss
135 |                 loss = self.criterion(output, original)
136 |                     
137 |                 # print(f"loss: {loss.item()}")
138 |                 
139 |                 # Backward
140 |                 self.optimizer.zero_grad()
141 |                 loss.backward()
142 |                 self.optimizer.step()
143 | 
144 |             # self.model.eval()
145 |             # avg_loss = 0
146 |             # with torch.no_grad():
147 |             #     for batch_x, batch_masks in tqdm(valid_loader, total=len(valid_loader)):
148 |             #         batch_x = batch_x.to("cuda").float()
149 |             #         batch_masks = batch_masks.to("cuda")
150 |             #         batch_x = batch_x.permute(0,2,1)
151 | 
152 |             #         print('batch_x: ', batch_x.shape)
153 |             #         print('batch_masks: ', batch_masks.shape)
154 | 
155 |             #         output = self.model(batch_x, input_mask=batch_masks) 
156 | 
157 |             #         loss = self.criterion(output.reconstruction.reshape(-1, n_channels, self.win_size), batch_x)
158 |             #         print(f"loss: {loss.item()}")
159 |             #         avg_loss += loss.cpu().item()
160 | 
161 |             # valid_loss = avg_loss/max(len(valid_loader), 1)
162 |             # self.scheduler.step()
163 |             # self.early_stopping(valid_loss, self.model)
164 |             # if self.early_stopping.early_stop:
165 |             #     print("   Early stopping<<<")
166 |             #     break
167 |         
168 |     def decision_function(self, data):
169 |         """
170 |         Not used, present for API consistency by convention.
171 |         """
172 | 
173 |         test_loader = DataLoader(
174 |             dataset=ReconstructDataset_Moment(data, window_size=self.win_size),
175 |             batch_size=self.batch_size,
176 |             shuffle=False)
177 | 
178 |         trues, preds = [], []
179 |         self.score_list = []
180 |         with torch.no_grad():
181 |             for batch_x, batch_masks in tqdm(test_loader, total=len(test_loader)):
182 |                 batch_x = batch_x.to("cuda").float()
183 |                 batch_masks = batch_masks.to("cuda")
184 |                 batch_x = batch_x.permute(0,2,1)
185 | 
186 |                 # print('batch_x: ', batch_x.shape)             # [batch_size, n_channels, window_size]
187 |                 # print('batch_masks: ', batch_masks.shape)     # [batch_size, window_size]
188 | 
189 |                 output = self.model(batch_x, input_mask=batch_masks) 
190 |                 score = torch.mean(self.anomaly_criterion(batch_x, output.reconstruction), dim=-1).detach().cpu().numpy()[:, -1]
191 |                 self.score_list.append(score)
192 | 
193 |         self.__anomaly_score = np.concatenate(self.score_list, axis=0).reshape(-1)
194 | 
195 |         if self.__anomaly_score.shape[0] < len(data):
196 |             self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
197 |                         list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
198 | 
199 |         return self.__anomaly_score


--------------------------------------------------------------------------------
/TSB_AD/models/MatrixProfile.py:
--------------------------------------------------------------------------------
 1 | import stumpy
 2 | import numpy as np
 3 | 
 4 | class MatrixProfile():
 5 |     """
 6 |     Wrapper of the stympy implementation of the MatrixProfile algorithm
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     window : int,
11 |         target subsequence length.
12 |     
13 |     Attributes
14 |     ----------
15 |     decision_scores_ : numpy array of shape (n_samples - m,)
16 |         The anomaly score.
17 |         The higher, the more abnormal. Anomalies tend to have higher
18 |         scores. This value is available once the detector is
19 |         fitted.
20 |     """
21 | 
22 |     def __init__(self, window):
23 |         self.window = window
24 |         self.model_name = 'MatrixProfile'
25 | 
26 |     def fit(self, X, y=None):
27 |         """Fit detector. y is ignored in unsupervised methods.
28 |         
29 |         Parameters
30 |         ----------
31 |         X : numpy array of shape (n_samples, )
32 |             The input samples.
33 |         y : Ignored
34 |             Not used, present for API consistency by convention.
35 |         
36 |         Returns
37 |         -------
38 |         self : object
39 |             Fitted estimator.
40 |         """
41 |         self.profile = stumpy.stump(X.ravel(),m=self.window)
42 |         #self.profile = mp.compute(X, windows=self.window)
43 |         res = np.zeros(len(X))
44 |         res.fill(self.profile[:, 0].min())
45 |         res[self.window//2:-self.window//2+1] = self.profile[:, 0]
46 |         self.decision_scores_ = res
47 |         return self
48 | 


--------------------------------------------------------------------------------
/TSB_AD/models/NormA.txt:
--------------------------------------------------------------------------------
1 | # Algorithms protected by patent. Code protected by copyright and provided
2 | # as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from
3 | # the academia may use this code only for academic research purposes,
4 | # provided that the authors are properly acknowledged using the citations
5 | # below. Users from the industry may test and evaluate this code by
6 | # requesting a license.


--------------------------------------------------------------------------------
/TSB_AD/models/OmniAnomaly.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [OmniAnomaly] by [TsingHuasuya et al.]
  3 | Original source: [https://github.com/NetManAIOps/OmniAnomaly]
  4 | """
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import numpy as np
 10 | import math
 11 | import torch
 12 | import torch.nn.functional as F
 13 | from sklearn.utils import check_array
 14 | from sklearn.utils.validation import check_is_fitted
 15 | from torch import nn
 16 | from torch.utils.data import DataLoader
 17 | from sklearn.preprocessing import MinMaxScaler
 18 | import tqdm
 19 | 
 20 | from .base import BaseDetector
 21 | from ..utils.dataset import ReconstructDataset
 22 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
 23 | 
 24 | class OmniAnomalyModel(nn.Module):
 25 |     def __init__(self, feats, device):
 26 |         super(OmniAnomalyModel, self).__init__()
 27 |         self.name = 'OmniAnomaly'
 28 |         self.device = device
 29 |         self.lr = 0.002
 30 |         self.beta = 0.01
 31 |         self.n_feats = feats
 32 |         self.n_hidden = 32
 33 |         self.n_latent = 8
 34 |         self.lstm = nn.GRU(feats, self.n_hidden, 2)
 35 |         self.encoder = nn.Sequential(
 36 |             nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
 37 |             nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
 38 |             # nn.Flatten(),
 39 |             nn.Linear(self.n_hidden, 2*self.n_latent)
 40 |         )
 41 |         self.decoder = nn.Sequential(
 42 |             nn.Linear(self.n_latent, self.n_hidden), nn.PReLU(),
 43 |             nn.Linear(self.n_hidden, self.n_hidden), nn.PReLU(),
 44 |             nn.Linear(self.n_hidden, self.n_feats), nn.Sigmoid(),
 45 |         )
 46 | 
 47 |     def forward(self, x, hidden = None):
 48 |         bs = x.shape[0]
 49 |         win = x.shape[1]
 50 | 
 51 |         # hidden = torch.rand(2, bs, self.n_hidden, dtype=torch.float64) if hidden is not None else hidden
 52 |         hidden = torch.rand(2, bs, self.n_hidden).to(self.device) if hidden is not None else hidden
 53 | 
 54 |         out, hidden = self.lstm(x.view(-1, bs, self.n_feats), hidden)
 55 | 
 56 |         # print('out: ', out.shape)       # (L, bs, n_hidden)
 57 |         # print('hidden: ', hidden.shape) # (2, bs, n_hidden)
 58 | 
 59 |         ## Encode
 60 |         x = self.encoder(out)
 61 |         mu, logvar = torch.split(x, [self.n_latent, self.n_latent], dim=-1)
 62 |         ## Reparameterization trick
 63 |         std = torch.exp(0.5*logvar)
 64 |         eps = torch.randn_like(std)
 65 |         x = mu + eps*std
 66 |         ## Decoder
 67 |         x = self.decoder(x)             # (L, bs, n_feats)
 68 |         return x.reshape(bs, win*self.n_feats), mu.reshape(bs, win*self.n_latent), logvar.reshape(bs, win*self.n_latent), hidden
 69 | 
 70 | 
 71 | class OmniAnomaly(BaseDetector):
 72 |     def __init__(self,
 73 |                  win_size = 5,
 74 |                  feats = 1,
 75 |                  batch_size = 128,
 76 |                  epochs = 50,
 77 |                  patience = 3,
 78 |                  lr = 0.002,
 79 |                  validation_size=0.2
 80 |                  ):
 81 |         super().__init__()
 82 | 
 83 |         self.__anomaly_score = None
 84 | 
 85 |         self.cuda = True
 86 |         self.device = get_gpu(self.cuda)
 87 | 
 88 |         self.win_size = win_size
 89 |         self.batch_size = batch_size
 90 |         self.epochs = epochs
 91 |         self.feats = feats
 92 |         self.validation_size = validation_size
 93 | 
 94 |         self.model = OmniAnomalyModel(feats=self.feats, device=self.device).to(self.device)
 95 |         self.optimizer = torch.optim.AdamW(
 96 |             self.model.parameters(), lr=lr, weight_decay=1e-5
 97 |         )
 98 |         self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9)
 99 |         self.criterion = nn.MSELoss(reduction = 'none')
100 | 
101 |         self.early_stopping = EarlyStoppingTorch(None, patience=patience)
102 | 
103 |     def fit(self, data):
104 |         tsTrain = data[:int((1-self.validation_size)*len(data))]
105 |         tsValid = data[int((1-self.validation_size)*len(data)):]
106 | 
107 |         train_loader = DataLoader(
108 |             dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
109 |             batch_size=self.batch_size,
110 |             shuffle=True
111 |         )
112 |         
113 |         valid_loader = DataLoader(
114 |             dataset=ReconstructDataset(tsValid, window_size=self.win_size),
115 |             batch_size=self.batch_size,
116 |             shuffle=False
117 |         )
118 |         
119 |         mses, klds = [], []
120 |         for epoch in range(1, self.epochs + 1):
121 |             self.model.train(mode=True)
122 |             n = epoch + 1
123 |             avg_loss = 0
124 |             loop = tqdm.tqdm(
125 |                 enumerate(train_loader), total=len(train_loader), leave=True
126 |             )
127 |             for idx, (d, _) in loop:        
128 |                 d = d.to(self.device)
129 |                 # print('d: ', d.shape)
130 | 
131 |                 y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None)
132 |                 d = d.view(-1, self.feats*self.win_size)
133 |                 MSE = torch.mean(self.criterion(y_pred, d), axis=-1)
134 |                 KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1)
135 |                 loss = torch.mean(MSE + self.model.beta * KLD)
136 | 
137 |                 mses.append(torch.mean(MSE).item())
138 |                 klds.append(self.model.beta * torch.mean(KLD).item())
139 |                 self.optimizer.zero_grad()
140 |                 loss.backward()
141 |                 self.optimizer.step()
142 | 
143 |                 avg_loss += loss.cpu().item()
144 |                 loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]")
145 |                 loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1))
146 | 
147 |             if len(valid_loader) > 0:
148 |                 self.model.eval()
149 |                 avg_loss_val = 0
150 |                 loop = tqdm.tqdm(
151 |                     enumerate(valid_loader), total=len(valid_loader), leave=True
152 |                 )
153 |                 with torch.no_grad():
154 |                     for idx, (d, _) in loop:
155 |                         d = d.to(self.device)
156 |                         y_pred, mu, logvar, hidden = self.model(d, hidden if idx else None)
157 |                         d = d.view(-1, self.feats*self.win_size)
158 |                         MSE = torch.mean(self.criterion(y_pred, d), axis=-1)
159 |                         KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp(), dim=-1)
160 |                         loss = torch.mean(MSE + self.model.beta * KLD)
161 | 
162 |                         avg_loss_val += loss.cpu().item()
163 |                         loop.set_description(
164 |                             f"Validation Epoch [{epoch}/{self.epochs}]"
165 |                         )
166 |                         loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1))
167 | 
168 |             self.scheduler.step()
169 |             if len(valid_loader) > 0:
170 |                 avg_loss = avg_loss_val / len(valid_loader)
171 |             else:
172 |                 avg_loss = avg_loss / len(train_loader)
173 |             self.early_stopping(avg_loss, self.model)
174 |             if self.early_stopping.early_stop:
175 |                 print("   Early stopping<<<")
176 |                 break
177 | 
178 |     def decision_function(self, data):
179 |         test_loader = DataLoader(
180 |             dataset=ReconstructDataset(data, window_size=self.win_size),
181 |             batch_size=self.batch_size,
182 |             shuffle=False
183 |         )
184 | 
185 |         self.model.eval()
186 |         scores = []
187 |         y_preds = []
188 |         loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
189 | 
190 |         with torch.no_grad():
191 |             for idx, (d, _) in loop:
192 |                 d = d.to(self.device)
193 |                 # print('d: ', d.shape)
194 | 
195 |                 y_pred, _, _, hidden = self.model(d, hidden if idx else None)
196 |                 y_preds.append(y_pred)
197 |                 d = d.view(-1, self.feats*self.win_size)
198 | 
199 |                 # print('y_pred: ', y_pred.shape)
200 |                 # print('d: ', d.shape)
201 |                 loss = torch.mean(self.criterion(y_pred, d), axis=-1)
202 |                 # print('loss: ', loss.shape)
203 | 
204 |                 scores.append(loss.cpu())
205 |         
206 |         scores = torch.cat(scores, dim=0)
207 |         scores = scores.numpy()
208 | 
209 |         self.__anomaly_score = scores
210 | 
211 |         if self.__anomaly_score.shape[0] < len(data):
212 |             self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
213 |                         list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
214 |         
215 |         return self.__anomaly_score
216 | 
217 |     def anomaly_score(self) -> np.ndarray:
218 |         return self.__anomaly_score
219 | 
220 |     def param_statistic(self, save_file):
221 |         pass
222 | 


--------------------------------------------------------------------------------
/TSB_AD/models/README.md:
--------------------------------------------------------------------------------
 1 | ### Extra Installation Direction
 2 | 
 3 | If you want to use [Chronos](https://github.com/amazon-science/chronos-forecasting), please install the following
 4 | ```bash
 5 | git clone https://github.com/autogluon/autogluon
 6 | cd autogluon && pip install -e timeseries/[TimeSeriesDataFrame,TimeSeriesPredictor]
 7 | ```
 8 | 
 9 | If you want to use [MOMENT](https://github.com/moment-timeseries-foundation-model/moment), please install the following
10 | ```bash
11 | pip install momentfm   # only support Python 3.11 for now
12 | ```
13 | 
14 | If you want to use [TimesFM](https://github.com/google-research/timesfm), please install the following
15 | ```bash
16 | pip install timesfm[torch]
17 | ```
18 | 
19 | If you want to use [Lag-Llama](https://github.com/time-series-foundation-models/lag-llama), please install the following
20 | ```bash
21 | gluonts[torch]<=0.14.4
22 | ```
23 | and download the checkpoint from [Link](https://github.com/time-series-foundation-models/lag-llama) and add the path to [Lag_Llama.py](https://github.com/TheDatumOrg/TSB-AD/blob/main/TSB_AD/models/Lag_Llama.py).


--------------------------------------------------------------------------------
/TSB_AD/models/RobustPCA.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [TimeEval-algorithms] by [CodeLionX&wenig]
  3 | Original source: [https://github.com/TimeEval/TimeEval-algorithms]
  4 | """
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import numpy as np
 10 | from sklearn.decomposition import PCA
 11 | from typing import Optional
 12 | 
 13 | from .base import BaseDetector
 14 | from sklearn.utils.validation import check_is_fitted
 15 | from sklearn.utils.validation import check_array
 16 | from scipy.spatial.distance import cdist
 17 | 
 18 | class Robust_PCA:
 19 |     def __init__(self, D, mu=None, lmbda=None):
 20 |         self.D = D
 21 |         self.S = np.zeros(self.D.shape)
 22 |         self.Y = np.zeros(self.D.shape)
 23 | 
 24 |         if mu:
 25 |             self.mu = mu
 26 |         else:
 27 |             self.mu = np.prod(self.D.shape) / (4 * np.linalg.norm(self.D, ord=1))
 28 | 
 29 |         self.mu_inv = 1 / self.mu
 30 | 
 31 |         if lmbda:
 32 |             self.lmbda = lmbda
 33 |         else:
 34 |             self.lmbda = 1 / np.sqrt(np.max(self.D.shape))
 35 | 
 36 |     @staticmethod
 37 |     def frobenius_norm(M):
 38 |         return np.linalg.norm(M, ord='fro')
 39 | 
 40 |     @staticmethod
 41 |     def shrink(M, tau):
 42 |         return np.sign(M) * np.maximum((np.abs(M) - tau), np.zeros(M.shape))
 43 | 
 44 |     def svd_threshold(self, M, tau):
 45 |         U, S, V = np.linalg.svd(M, full_matrices=False)
 46 |         return np.dot(U, np.dot(np.diag(self.shrink(S, tau)), V))
 47 | 
 48 |     def fit(self, tol=None, max_iter=1000, iter_print=100):
 49 |         iter = 0
 50 |         err = np.Inf
 51 |         Sk = self.S
 52 |         Yk = self.Y
 53 |         Lk = np.zeros(self.D.shape)
 54 | 
 55 |         if tol:
 56 |             _tol = tol
 57 |         else:
 58 |             _tol = 1E-7 * self.frobenius_norm(self.D)
 59 | 
 60 |         #this loop implements the principal component pursuit (PCP) algorithm
 61 |         #located in the table on page 29 of https://arxiv.org/pdf/0912.3599.pdf
 62 |         while (err > _tol) and iter < max_iter:
 63 |             Lk = self.svd_threshold(
 64 |                 self.D - Sk + self.mu_inv * Yk, self.mu_inv)                            #this line implements step 3
 65 |             Sk = self.shrink(
 66 |                 self.D - Lk + (self.mu_inv * Yk), self.mu_inv * self.lmbda)             #this line implements step 4
 67 |             Yk = Yk + self.mu * (self.D - Lk - Sk)                                      #this line implements step 5
 68 |             err = self.frobenius_norm(self.D - Lk - Sk)
 69 |             iter += 1
 70 |             if (iter % iter_print) == 0 or iter == 1 or iter > max_iter or err <= _tol:
 71 |                 print('iteration: {0}, error: {1}'.format(iter, err))
 72 | 
 73 |         self.L = Lk
 74 |         self.S = Sk
 75 |         return Lk, Sk
 76 |     
 77 | class RobustPCA(BaseDetector):
 78 |     def __init__(self, max_iter: int = 1000, n_components = None, zero_pruning = True):
 79 |         self.pca: Optional[PCA] = None
 80 |         self.max_iter = max_iter
 81 |         self.n_components = n_components
 82 |         self.zero_pruning = zero_pruning
 83 | 
 84 |     def fit(self, X, y=None):
 85 | 
 86 |         if self.zero_pruning:
 87 |             non_zero_columns = np.any(X != 0, axis=0)
 88 |             X = X[:, non_zero_columns]
 89 |         
 90 |         rpca = Robust_PCA(X)
 91 |         L, S = rpca.fit(max_iter=self.max_iter)
 92 |         self.detector_ = PCA(n_components=L.shape[1])
 93 |         self.detector_.fit(L)
 94 |         self.decision_scores_ = self.decision_function(L)
 95 |         return self
 96 | 
 97 |     # def decision_function(self, X):
 98 |     #     check_is_fitted(self, ['detector_'])
 99 |     #     X_transformed = self.detector_.transform(X)  # Transform the data into the PCA space
100 |     #     X_reconstructed = self.detector_.inverse_transform(X_transformed)  # Reconstruct the data from the PCA space
101 |     #     anomaly_scores = np.linalg.norm(X - X_reconstructed, axis=1)  # Compute the Euclidean norm between original and reconstructed data
102 |     #     return anomaly_scores
103 | 
104 |     def decision_function(self, X):
105 |         assert self.detector_, "Please train PCA before running the detection!"
106 | 
107 |         L = self.detector_.transform(X)
108 |         S = np.absolute(X - L)
109 |         return S.sum(axis=1)
110 | 


--------------------------------------------------------------------------------
/TSB_AD/models/SR.py:
--------------------------------------------------------------------------------
 1 | """Spectral Residual
 2 | """
 3 | # Author: Andreas Mueller <andreas.mueller@microsoft.com>
 4 | import numpy as np
 5 | 
 6 | def SR(X, window_size):
 7 |     X = (X - X.min()) / (X.max() - X.min())
 8 |     X = X.ravel()
 9 |     fft = np.fft.fft(X)
10 | 
11 |     amp = np.abs(fft)
12 |     log_amp = np.log(amp)
13 |     phase = np.angle(fft)
14 |     # split spectrum into bias term and symmetric frequencies
15 |     bias, sym_freq = log_amp[:1], log_amp[1:]
16 |     # select just the first half of the sym_freq
17 |     freq = sym_freq[:(len(sym_freq) + 1) // 2]
18 |     window_amp = 100
19 | 
20 |     pad_left = (window_amp - 1) // 2
21 |     padded_freq = np.concatenate([np.tile(X[0], pad_left), freq, np.tile(X[-1], window_amp - pad_left - 1)])
22 |     conv_amp = np.ones(window_amp) / window_amp
23 |     ma_freq = np.convolve(padded_freq, conv_amp, 'valid')
24 |     # construct moving average log amplitude spectrum
25 |     ma_log_amp = np.concatenate([
26 |         bias,
27 |         ma_freq,
28 |         (ma_freq[:-1] if len(sym_freq) % 2 == 1 else ma_freq)[::-1]
29 |     ])
30 |     assert ma_log_amp.shape[0] == log_amp.shape[0], "`ma_log_amp` size does not match `log_amp` size."
31 |     # compute residual spectrum and transform back to time domain
32 |     res_amp = log_amp - ma_log_amp
33 |     sr = np.abs(np.fft.ifft(np.exp(res_amp + 1j * phase)))
34 |     return sr


--------------------------------------------------------------------------------
/TSB_AD/models/Series2Graph.txt:
--------------------------------------------------------------------------------
1 | # Algorithms protected by patent. Code protected by copyright and provided
2 | # as is. Email the authors for the password of the ZIP file (boniol.paul@gmail.com and themis@mi.parisdescartes.fr). Users from
3 | # the academia may use this code only for academic research purposes,
4 | # provided that the authors are properly acknowledged using the citations
5 | # below. Users from the industry may test and evaluate this code by
6 | # requesting a license.
7 | 


--------------------------------------------------------------------------------
/TSB_AD/models/TimesFM.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This function is adapted from [timesfm] by [siriuz42 et al.]
 3 | Original source: [https://github.com/google-research/timesfm]
 4 | """
 5 | 
 6 | import timesfm
 7 | import numpy as np
 8 | 
 9 | class TimesFM():
10 |     def __init__(self, 
11 |                  win_size=96, 
12 |                  prediction_length=1, 
13 |                  input_c=1):
14 | 
15 |         self.model_name = 'TimesFM'
16 |         self.win_size = win_size
17 |         self.prediction_length = prediction_length
18 |         self.input_c = input_c
19 |         self.score_list = []
20 | 
21 |     def fit(self, data):
22 | 
23 |         for channel in range(self.input_c):
24 |             
25 |             data_channel = data[:, channel].reshape(-1, 1)
26 |             data_win, data_target = self.create_dataset(data_channel, slidingWindow=self.win_size, predict_time_steps=self.prediction_length)
27 |             # print('data_win: ', data_win.shape)         # (2330, 100)
28 |             # print('data_target: ', data_target.shape)   # (2330, 1)
29 | 
30 |             # tfm = timesfm.TimesFm(
31 |             #     context_len=self.win_size,
32 |             #     horizon_len=self.prediction_length,
33 |             #     input_patch_len=32,
34 |             #     output_patch_len=128,
35 |             #     num_layers=20,
36 |             #     model_dims=1280,
37 |             #     backend="gpu")
38 |             # tfm.load_from_checkpoint(repo_id="google/timesfm-1.0-200m")
39 | 
40 |             tfm = timesfm.TimesFm(
41 |                 hparams=timesfm.TimesFmHparams(
42 |                     backend="gpu",
43 |                     per_core_batch_size=32,
44 |                     horizon_len=self.prediction_length,
45 |                 ),
46 |                 checkpoint=timesfm.TimesFmCheckpoint(
47 |                     huggingface_repo_id="google/timesfm-1.0-200m-pytorch"),
48 |             )
49 | 
50 |             forecast_input = [data_win[i, :] for i in range(data_win.shape[0])]
51 |             point_forecast, _ = tfm.forecast(forecast_input)
52 | 
53 |             print('predictions: ', point_forecast.shape)
54 | 
55 |             ### using mse as the anomaly score
56 |             scores = (data_target.squeeze() - point_forecast.squeeze()) ** 2
57 |             # scores = np.mean(scores, axis=1)
58 |             self.score_list.append(scores)
59 | 
60 |         scores_merge = np.mean(np.array(self.score_list), axis=0)
61 |         # print('scores_merge: ', scores_merge.shape)
62 | 
63 |         padded_decision_scores = np.zeros(len(data))
64 |         padded_decision_scores[: self.win_size+self.prediction_length-1] = scores_merge[0]
65 |         padded_decision_scores[self.win_size+self.prediction_length-1 : ]=scores_merge
66 | 
67 |         self.decision_scores_ = padded_decision_scores
68 | 
69 | 
70 |     def decision_function(self, X):
71 |         """
72 |         Not used, present for API consistency by convention.
73 |         """        
74 |         pass
75 | 
76 |     def create_dataset(self, X, slidingWindow, predict_time_steps=1):
77 |         Xs, ys = [], []
78 |         for i in range(len(X) - slidingWindow - predict_time_steps+1):
79 |             
80 |             tmp = X[i : i + slidingWindow + predict_time_steps].ravel()
81 |             # tmp= MinMaxScaler(feature_range=(0,1)).fit_transform(tmp.reshape(-1,1)).ravel()
82 |             
83 |             x = tmp[:slidingWindow]
84 |             y = tmp[slidingWindow:]
85 |             Xs.append(x)
86 |             ys.append(y)
87 |         return np.array(Xs), np.array(ys)


--------------------------------------------------------------------------------
/TSB_AD/models/USAD.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This function is adapted from [usad] by [manigalati]
  3 | Original source: [https://github.com/manigalati/usad]
  4 | """
  5 | 
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import numpy as np
 10 | import math
 11 | import torch
 12 | import torch.nn.functional as F
 13 | from sklearn.utils import check_array
 14 | from sklearn.utils.validation import check_is_fitted
 15 | from torch import nn
 16 | from torch.utils.data import DataLoader
 17 | from sklearn.preprocessing import MinMaxScaler
 18 | import tqdm
 19 | 
 20 | from .base import BaseDetector
 21 | from ..utils.dataset import ReconstructDataset
 22 | from ..utils.torch_utility import EarlyStoppingTorch, get_gpu
 23 | 
 24 | class USADModel(nn.Module):
 25 |     def __init__(self, feats, n_window=5):
 26 |         super(USADModel, self).__init__()
 27 |         self.name = 'USAD'
 28 |         self.lr = 0.0001
 29 |         self.n_feats = feats
 30 |         self.n_hidden = 16
 31 |         self.n_latent = 5
 32 |         self.n_window = n_window # USAD w_size = 5
 33 |         self.n = self.n_feats * self.n_window
 34 |         self.encoder = nn.Sequential(
 35 |             nn.Flatten(),
 36 |             nn.Linear(self.n, self.n_hidden), nn.ReLU(True),
 37 |             nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
 38 |             nn.Linear(self.n_hidden, self.n_latent), nn.ReLU(True),
 39 |         )
 40 |         self.decoder1 = nn.Sequential(
 41 |             nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True),
 42 |             nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
 43 |             nn.Linear(self.n_hidden, self.n), nn.Sigmoid(),
 44 |         )
 45 |         self.decoder2 = nn.Sequential(
 46 |             nn.Linear(self.n_latent,self.n_hidden), nn.ReLU(True),
 47 |             nn.Linear(self.n_hidden, self.n_hidden), nn.ReLU(True),
 48 |             nn.Linear(self.n_hidden, self.n), nn.Sigmoid(),
 49 |         )
 50 | 
 51 |     def forward(self, g):
 52 |         bs = g.shape[0]
 53 |         ## Encode
 54 |         # z = self.encoder(g.view(1,-1))
 55 |         z = self.encoder(g.view(bs, self.n))
 56 |         ## Decoders (Phase 1)
 57 |         ae1 = self.decoder1(z)
 58 |         ae2 = self.decoder2(z)
 59 |         ## Encode-Decode (Phase 2)
 60 |         ae2ae1 = self.decoder2(self.encoder(ae1))
 61 |         # return ae1.view(-1), ae2.view(-1), ae2ae1.view(-1)
 62 |         return ae1.view(bs, self.n), ae2.view(bs, self.n), ae2ae1.view(bs, self.n)
 63 | 
 64 | 
 65 | class USAD(BaseDetector):
 66 |     def __init__(self,
 67 |                  win_size = 5,
 68 |                  feats = 1,
 69 |                  batch_size = 128,
 70 |                  epochs = 10,
 71 |                  patience = 3,
 72 |                  lr = 1e-4,
 73 |                  validation_size=0.2
 74 |                  ):
 75 |         super().__init__()
 76 | 
 77 |         self.__anomaly_score = None
 78 | 
 79 |         self.cuda = True
 80 |         self.device = get_gpu(self.cuda)
 81 | 
 82 |         self.win_size = win_size
 83 |         self.batch_size = batch_size
 84 |         self.epochs = epochs
 85 |         self.feats = feats
 86 |         self.validation_size = validation_size
 87 | 
 88 |         self.model = USADModel(feats=self.feats, n_window=self.win_size).to(self.device)
 89 |         self.optimizer = torch.optim.AdamW(
 90 |             self.model.parameters(), lr=lr, weight_decay=1e-5
 91 |         )
 92 |         self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, 5, 0.9)
 93 |         self.criterion = nn.MSELoss(reduction = 'none')
 94 | 
 95 |         self.early_stopping = EarlyStoppingTorch(None, patience=patience)
 96 | 
 97 |     def fit(self, data):
 98 |         tsTrain = data[:int((1-self.validation_size)*len(data))]
 99 |         tsValid = data[int((1-self.validation_size)*len(data)):]
100 | 
101 |         train_loader = DataLoader(
102 |             dataset=ReconstructDataset(tsTrain, window_size=self.win_size),
103 |             batch_size=self.batch_size,
104 |             shuffle=True
105 |         )
106 |         
107 |         valid_loader = DataLoader(
108 |             dataset=ReconstructDataset(tsValid, window_size=self.win_size),
109 |             batch_size=self.batch_size,
110 |             shuffle=False
111 |         )
112 |         
113 |         l1s, l2s = [], []
114 |         for epoch in range(1, self.epochs + 1):
115 |             self.model.train(mode=True)
116 |             n = epoch + 1
117 |             avg_loss = 0
118 |             loop = tqdm.tqdm(
119 |                 enumerate(train_loader), total=len(train_loader), leave=True
120 |             )
121 |             for idx, (d, _) in loop:        
122 |                 d = d.to(self.device)     # (bs, win, feat)
123 |                 # print('d: ', d.shape)
124 | 
125 |                 ae1s, ae2s, ae2ae1s = self.model(d)
126 |                 # print('ae2ae1s: ', ae2ae1s.shape)
127 | 
128 |                 d = d.view(ae2ae1s.shape[0], self.feats*self.win_size)
129 | 
130 |                 l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d)
131 |                 l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d)
132 |                 # print('l1: ', l1.shape)
133 | 
134 |                 l1s.append(torch.mean(l1).item())
135 |                 l2s.append(torch.mean(l2).item())
136 |                 loss = torch.mean(l1 + l2)
137 | 
138 |                 self.optimizer.zero_grad()
139 |                 loss.backward()
140 |                 self.optimizer.step()
141 | 
142 |                 avg_loss += loss.cpu().item()
143 |                 loop.set_description(f"Training Epoch [{epoch}/{self.epochs}]")
144 |                 loop.set_postfix(loss=loss.item(), avg_loss=avg_loss / (idx + 1))
145 | 
146 |             if len(valid_loader) > 0:
147 |                 self.model.eval()
148 |                 avg_loss_val = 0
149 |                 loop = tqdm.tqdm(
150 |                     enumerate(valid_loader), total=len(valid_loader), leave=True
151 |                 )
152 |                 with torch.no_grad():
153 |                     for idx, (d, _) in loop:
154 |                         d = d.to(self.device)
155 |                         ae1s, ae2s, ae2ae1s = self.model(d)
156 |                         d = d.view(ae2ae1s.shape[0], self.feats*self.win_size)
157 | 
158 |                         l1 = (1 / n) * self.criterion(ae1s, d) + (1 - 1/n) * self.criterion(ae2ae1s, d)
159 |                         l2 = (1 / n) * self.criterion(ae2s, d) - (1 - 1/n) * self.criterion(ae2ae1s, d)
160 | 
161 |                         l1s.append(torch.mean(l1).item())
162 |                         l2s.append(torch.mean(l2).item())
163 |                         loss = torch.mean(l1 + l2)
164 |                         avg_loss_val += loss.cpu().item()
165 |                         loop.set_description(
166 |                             f"Validation Epoch [{epoch}/{self.epochs}]"
167 |                         )
168 |                         loop.set_postfix(loss=loss.item(), avg_loss_val=avg_loss_val / (idx + 1))
169 | 
170 |             self.scheduler.step()
171 |             if len(valid_loader) > 0:
172 |                 avg_loss = avg_loss_val / len(valid_loader)
173 |             else:
174 |                 avg_loss = avg_loss / len(train_loader)
175 |             self.early_stopping(avg_loss, self.model)
176 |             if self.early_stopping.early_stop:
177 |                 print("   Early stopping<<<")
178 |                 break
179 | 
180 |     def decision_function(self, data):
181 |         test_loader = DataLoader(
182 |             dataset=ReconstructDataset(data, window_size=self.win_size),
183 |             batch_size=self.batch_size,
184 |             shuffle=False
185 |         )
186 | 
187 |         self.model.eval()
188 |         scores = []
189 |         loop = tqdm.tqdm(enumerate(test_loader), total=len(test_loader), leave=True)
190 | 
191 |         with torch.no_grad():
192 |             for idx, (d, _) in loop:
193 |                 d = d.to(self.device)
194 |                 # print('d: ', d.shape)
195 | 
196 |                 ae1, ae2, ae2ae1 = self.model(d)
197 |                 d = d.view(ae2ae1.shape[0], self.feats*self.win_size)
198 | 
199 |                 # print('ae2ae1: ', ae2ae1.shape)
200 |                 # print('d: ', d.shape)
201 | 
202 |                 loss = 0.1 * self.criterion(ae1, d) + 0.9 * self.criterion(ae2ae1, d)
203 |                 # print('loss: ', loss.shape)
204 |                 loss = torch.mean(loss, axis=-1)
205 | 
206 |                 scores.append(loss.cpu())
207 |         
208 |         scores = torch.cat(scores, dim=0)
209 |         scores = scores.numpy()
210 | 
211 |         self.__anomaly_score = scores
212 | 
213 |         if self.__anomaly_score.shape[0] < len(data):
214 |             self.__anomaly_score = np.array([self.__anomaly_score[0]]*math.ceil((self.win_size-1)/2) + 
215 |                         list(self.__anomaly_score) + [self.__anomaly_score[-1]]*((self.win_size-1)//2))
216 |         
217 |         return self.__anomaly_score
218 | 
219 |     def anomaly_score(self) -> np.ndarray:
220 |         return self.__anomaly_score
221 | 
222 |     def param_statistic(self, save_file):
223 |         pass
224 | 


--------------------------------------------------------------------------------
/TSB_AD/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/models/__init__.py


--------------------------------------------------------------------------------
/TSB_AD/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/TSB_AD/utils/__init__.py


--------------------------------------------------------------------------------
/TSB_AD/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data
  3 | import numpy as np
  4 | epsilon = 1e-8
  5 | 
  6 | class ReconstructDataset(torch.utils.data.Dataset):
  7 |     def __init__(self, data, window_size, stride=1, normalize=True):
  8 |         super().__init__()
  9 |         self.window_size = window_size
 10 |         self.stride = stride
 11 |         self.data = self._normalize_data(data) if normalize else data
 12 | 
 13 |         self.univariate = self.data.shape[1] == 1
 14 |         self.sample_num = max(0, (self.data.shape[0] - window_size) // stride + 1)
 15 |         self.samples, self.targets = self._generate_samples()
 16 | 
 17 |     def _normalize_data(self, data, epsilon=1e-8):
 18 |         mean, std = np.mean(data, axis=0), np.std(data, axis=0)
 19 |         std = np.where(std == 0, epsilon, std)  # Avoid division by zero
 20 |         return (data - mean) / std
 21 | 
 22 |     def _generate_samples(self):
 23 |         data = torch.tensor(self.data, dtype=torch.float32)
 24 | 
 25 |         if self.univariate:
 26 |             data = data.squeeze()
 27 |             X = torch.stack([data[i * self.stride : i * self.stride + self.window_size] for i in range(self.sample_num)])
 28 |             X = X.unsqueeze(-1)
 29 |         else:
 30 |             X = torch.stack([data[i * self.stride : i * self.stride + self.window_size, :] for i in range(self.sample_num)])
 31 | 
 32 |         return X, X
 33 | 
 34 |     def __len__(self):
 35 |         return self.sample_num
 36 | 
 37 |     def __getitem__(self, index):
 38 |         return self.samples[index], self.targets[index]
 39 | 
 40 | class ForecastDataset(torch.utils.data.Dataset):
 41 |     def __init__(self, data, window_size, pred_len, stride=1, normalize=True):
 42 |         super().__init__()
 43 |         self.window_size = window_size
 44 |         self.pred_len = pred_len
 45 |         self.stride = stride
 46 |         self.data = self._normalize_data(data) if normalize else data
 47 | 
 48 |         self.univariate = self.data.shape[1] == 1
 49 |         self.sample_num = max((self.data.shape[0] - window_size - pred_len) // stride + 1, 0)
 50 | 
 51 |         # Generate samples efficiently
 52 |         self.samples, self.targets = self._generate_samples()
 53 | 
 54 |     def _normalize_data(self, data, epsilon=1e-8):
 55 |         """ Normalize data using mean and standard deviation. """
 56 |         mean, std = np.mean(data, axis=0), np.std(data, axis=0)
 57 |         std = np.where(std == 0, epsilon, std)  # Avoid division by zero
 58 |         return (data - mean) / std
 59 | 
 60 |     def _generate_samples(self):
 61 |         """ Generate windowed samples efficiently using vectorized slicing. """
 62 |         data = torch.tensor(self.data, dtype=torch.float32)
 63 | 
 64 |         indices = np.arange(0, self.sample_num * self.stride, self.stride)
 65 | 
 66 |         X = torch.stack([data[i : i + self.window_size] for i in indices])
 67 |         Y = torch.stack([data[i + self.window_size : i + self.window_size + self.pred_len] for i in indices])
 68 | 
 69 |         return X, Y  # Inputs & targets
 70 | 
 71 |     def __len__(self):
 72 |         return self.sample_num
 73 | 
 74 |     def __getitem__(self, index):
 75 |         return self.samples[index], self.targets[index]
 76 | 
 77 | class TSDataset(torch.utils.data.Dataset):
 78 | 
 79 |     def __init__(self, X, y=None, mean=None, std=None):
 80 |         super(TSDataset, self).__init__()
 81 |         self.X = X
 82 |         self.mean = mean
 83 |         self.std = std
 84 | 
 85 |     def __len__(self):
 86 |         return self.X.shape[0]
 87 | 
 88 |     def __getitem__(self, idx):
 89 |         if torch.is_tensor(idx):
 90 |             idx = idx.tolist()
 91 |         sample = self.X[idx, :]
 92 | 
 93 |         if self.mean is not None and self.std is not None:
 94 |             sample = (sample - self.mean) / self.std
 95 |             # assert_almost_equal (0, sample.mean(), decimal=1)
 96 | 
 97 |         return torch.from_numpy(sample), idx
 98 | 
 99 | 
100 | class ReconstructDataset_Moment(torch.utils.data.Dataset):
101 |     def __init__(self, data, window_size, stride=1, normalize=True):
102 |         super().__init__()
103 |         self.window_size = window_size
104 |         self.stride = stride
105 |         self.data = self._normalize_data(data) if normalize else data
106 | 
107 |         self.univariate = self.data.shape[1] == 1
108 |         self.sample_num = max((self.data.shape[0] - window_size) // stride + 1, 0)
109 | 
110 |         self.samples = self._generate_samples()
111 |         self.input_mask = np.ones(self.window_size, dtype=np.float32)  # Fixed input mask
112 | 
113 |     def _normalize_data(self, data, epsilon=1e-8):
114 |         mean, std = np.mean(data, axis=0), np.std(data, axis=0)
115 |         std = np.where(std == 0, epsilon, std)  # Avoid division by zero
116 |         return (data - mean) / std
117 | 
118 |     def _generate_samples(self):
119 |         data = torch.tensor(self.data, dtype=torch.float32)
120 |         indices = np.arange(0, self.sample_num * self.stride, self.stride)
121 | 
122 |         if self.univariate:
123 |             X = torch.stack([data[i : i + self.window_size] for i in indices])
124 |         else:
125 |             X = torch.stack([data[i : i + self.window_size, :] for i in indices])
126 | 
127 |         return X
128 | 
129 |     def __len__(self):
130 |         return self.sample_num
131 | 
132 |     def __getitem__(self, index):
133 |         return self.samples[index], self.input_mask


--------------------------------------------------------------------------------
/TSB_AD/utils/slidingWindows.py:
--------------------------------------------------------------------------------
 1 | from statsmodels.tsa.stattools import acf
 2 | from scipy.signal import argrelextrema
 3 | import numpy as np
 4 | from statsmodels.graphics.tsaplots import plot_acf
 5 | 
 6 | # determine sliding window (period) based on ACF
 7 | def find_length_rank(data, rank=1):
 8 |     data = data.squeeze()
 9 |     if len(data.shape)>1: return 0
10 |     if rank==0: return 1
11 |     data = data[:min(20000, len(data))]
12 |     
13 |     base = 3
14 |     auto_corr = acf(data, nlags=400, fft=True)[base:]
15 |     
16 |     # plot_acf(data, lags=400, fft=True)
17 |     # plt.xlabel('Lags')
18 |     # plt.ylabel('Autocorrelation')
19 |     # plt.title('Autocorrelation Function (ACF)')
20 |     # plt.savefig('/data/liuqinghua/code/ts/TSAD-AutoML/AutoAD_Solution/candidate_pool/cd_diagram/ts_acf.png')
21 | 
22 |     local_max = argrelextrema(auto_corr, np.greater)[0]
23 | 
24 |     # print('auto_corr: ', auto_corr)
25 |     # print('local_max: ', local_max)
26 | 
27 |     try:
28 |         # max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max])
29 |         sorted_local_max = np.argsort([auto_corr[lcm] for lcm in local_max])[::-1]    # Ascending order
30 |         max_local_max = sorted_local_max[0]     # Default
31 |         if rank == 1: max_local_max = sorted_local_max[0]
32 |         if rank == 2: 
33 |             for i in sorted_local_max[1:]: 
34 |                 if i > sorted_local_max[0]: 
35 |                     max_local_max = i 
36 |                     break
37 |         if rank == 3:
38 |             for i in sorted_local_max[1:]: 
39 |                 if i > sorted_local_max[0]: 
40 |                     id_tmp = i
41 |                     break
42 |             for i in sorted_local_max[id_tmp:]:
43 |                 if i > sorted_local_max[id_tmp]: 
44 |                     max_local_max = i           
45 |                     break
46 |         # print('sorted_local_max: ', sorted_local_max)
47 |         # print('max_local_max: ', max_local_max)
48 |         if local_max[max_local_max]<3 or local_max[max_local_max]>300:
49 |             return 125
50 |         return local_max[max_local_max]+base
51 |     except:
52 |         return 125
53 |     
54 | 
55 | # determine sliding window (period) based on ACF, Original version
56 | def find_length(data):
57 |     if len(data.shape)>1:
58 |         return 0
59 |     data = data[:min(20000, len(data))]
60 |     
61 |     base = 3
62 |     auto_corr = acf(data, nlags=400, fft=True)[base:]
63 |     
64 |     
65 |     local_max = argrelextrema(auto_corr, np.greater)[0]
66 |     try:
67 |         max_local_max = np.argmax([auto_corr[lcm] for lcm in local_max])
68 |         if local_max[max_local_max]<3 or local_max[max_local_max]>300:
69 |             return 125
70 |         return local_max[max_local_max]+base
71 |     except:
72 |         return 125
73 | 


--------------------------------------------------------------------------------
/TSB_AD/utils/stat_models.py:
--------------------------------------------------------------------------------
  1 | """ A collection of statistical models
  2 | code copied from pyod documentation https://github.com/yzhao062/pyod/blob/master/pyod/utils/stat_models.py
  3 | """
  4 | # Author: Yue Zhao <zhaoy@cmu.edu>
  5 | # License: BSD 2 clause
  6 | 
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import numpy as np
 11 | from scipy.stats import pearsonr
 12 | from sklearn.utils.validation import check_array
 13 | from sklearn.utils.validation import check_consistent_length
 14 | from numba import njit
 15 | 
 16 | def pairwise_distances_no_broadcast(X, Y):
 17 |     """Utility function to calculate row-wise euclidean distance of two matrix.
 18 |     Different from pair-wise calculation, this function would not broadcast.
 19 |     For instance, X and Y are both (4,3) matrices, the function would return
 20 |     a distance vector with shape (4,), instead of (4,4).
 21 |     Parameters
 22 |     ----------
 23 |     X : array of shape (n_samples, n_features)
 24 |         First input samples
 25 |     Y : array of shape (n_samples, n_features)
 26 |         Second input samples
 27 |     Returns
 28 |     -------
 29 |     distance : array of shape (n_samples,)
 30 |         Row-wise euclidean distance of X and Y
 31 |     """
 32 |     X = check_array(X)
 33 |     Y = check_array(Y)
 34 | 
 35 |     if X.shape[0] != Y.shape[0] or X.shape[1] != Y.shape[1]:
 36 |         raise ValueError("pairwise_distances_no_broadcast function receive"
 37 |                          "matrix with different shapes {0} and {1}".format(
 38 |             X.shape, Y.shape))
 39 |     return _pairwise_distances_no_broadcast_helper(X, Y)
 40 | 
 41 | 
 42 | def _pairwise_distances_no_broadcast_helper(X, Y):  # pragma: no cover
 43 |     """Internal function for calculating the distance with numba. Do not use.
 44 |     Parameters
 45 |     ----------
 46 |     X : array of shape (n_samples, n_features)
 47 |         First input samples
 48 |     Y : array of shape (n_samples, n_features)
 49 |         Second input samples
 50 |     Returns
 51 |     -------
 52 |     distance : array of shape (n_samples,)
 53 |         Intermediate results. Do not use.
 54 |     """
 55 |     euclidean_sq = np.square(Y - X)
 56 |     return np.sqrt(np.sum(euclidean_sq, axis=1)).ravel()
 57 | 
 58 | 
 59 | def wpearsonr(x, y, w=None):
 60 |     """Utility function to calculate the weighted Pearson correlation of two
 61 |     samples.
 62 |     See https://stats.stackexchange.com/questions/221246/such-thing-as-a-weighted-correlation
 63 |     for more information
 64 |     Parameters
 65 |     ----------
 66 |     x : array, shape (n,)
 67 |         Input x.
 68 |     y : array, shape (n,)
 69 |         Input y.
 70 |     w : array, shape (n,)
 71 |         Weights w.
 72 |     Returns
 73 |     -------
 74 |     scores : float in range of [-1,1]
 75 |         Weighted Pearson Correlation between x and y.
 76 |     """
 77 | 
 78 |     # unweighted version
 79 |     # note the return is different
 80 |     # TODO: fix output differences
 81 |     if w is None:
 82 |         return pearsonr(x, y)
 83 | 
 84 |     x = np.asarray(x)
 85 |     y = np.asarray(y)
 86 |     w = np.asarray(w)
 87 | 
 88 |     check_consistent_length([x, y, w])
 89 |     # n = len(x)
 90 | 
 91 |     w_sum = w.sum()
 92 |     mx = np.sum(x * w) / w_sum
 93 |     my = np.sum(y * w) / w_sum
 94 | 
 95 |     xm, ym = (x - mx), (y - my)
 96 | 
 97 |     r_num = np.sum(xm * ym * w) / w_sum
 98 | 
 99 |     xm2 = np.sum(xm * xm * w) / w_sum
100 |     ym2 = np.sum(ym * ym * w) / w_sum
101 | 
102 |     r_den = np.sqrt(xm2 * ym2)
103 |     r = r_num / r_den
104 | 
105 |     r = max(min(r, 1.0), -1.0)
106 | 
107 |     # TODO: disable p value calculation due to python 2.7 break
108 |     #    df = n_train_ - 2
109 |     #
110 |     #    if abs(r) == 1.0:
111 |     #        prob = 0.0
112 |     #    else:
113 |     #        t_squared = r ** 2 * (df / ((1.0 - r) * (1.0 + r)))
114 |     #        prob = _betai(0.5 * df, 0.5, df / (df + t_squared))
115 |     return r  # , prob
116 | 
117 | 
118 | #####################################
119 | #      PROBABILITY CALCULATIONS     #
120 | #####################################
121 | 
122 | # TODO: disable p value calculation due to python 2.7 break
123 | # def _betai(a, b, x):
124 | #     x = np.asarray(x)
125 | #     x = np.where(x < 1.0, x, 1.0)  # if x > 1 then return 1.0
126 | #     return betainc(a, b, x)
127 | 
128 | 
129 | def pearsonr_mat(mat, w=None):
130 |     """Utility function to calculate pearson matrix (row-wise).
131 |     Parameters
132 |     ----------
133 |     mat : numpy array of shape (n_samples, n_features)
134 |         Input matrix.
135 |     w : numpy array of shape (n_features,)
136 |         Weights.
137 |     Returns
138 |     -------
139 |     pear_mat : numpy array of shape (n_samples, n_samples)
140 |         Row-wise pearson score matrix.
141 |     """
142 |     mat = check_array(mat)
143 |     n_row = mat.shape[0]
144 |     n_col = mat.shape[1]
145 |     pear_mat = np.full([n_row, n_row], 1).astype(float)
146 | 
147 |     if w is not None:
148 |         for cx in range(n_row):
149 |             for cy in range(cx + 1, n_row):
150 |                 curr_pear = wpearsonr(mat[cx, :], mat[cy, :], w)
151 |                 pear_mat[cx, cy] = curr_pear
152 |                 pear_mat[cy, cx] = curr_pear
153 |     else:
154 |         for cx in range(n_col):
155 |             for cy in range(cx + 1, n_row):
156 |                 curr_pear = pearsonr(mat[cx, :], mat[cy, :])[0]
157 |                 pear_mat[cx, cy] = curr_pear
158 |                 pear_mat[cy, cx] = curr_pear
159 | 
160 |     return pear_mat
161 | 
162 | def column_ecdf(matrix: np.ndarray) -> np.ndarray:
163 |     """
164 |     Utility function to compute the column wise empirical cumulative distribution of a 2D feature matrix,
165 |     where the rows are samples and the columns are features per sample. The accumulation is done in the positive
166 |     direction of the sample axis.
167 | 
168 |     E.G.
169 |     p(1) = 0.2, p(0) = 0.3, p(2) = 0.1, p(6) = 0.4
170 |     ECDF E(5) = p(x <= 5)
171 |     ECDF E would be E(-1) = 0, E(0) = 0.3, E(1) = 0.5, E(2) = 0.6, E(3) = 0.6, E(4) = 0.6, E(5) = 0.6, E(6) = 1
172 | 
173 |     Similar to and tested against:
174 |     https://www.statsmodels.org/stable/generated/statsmodels.distributions.empirical_distribution.ECDF.html
175 | 
176 |     Returns
177 |     -------
178 | 
179 |     """
180 |     # check the matrix dimensions
181 |     assert len(matrix.shape) == 2, 'Matrix needs to be two dimensional for the ECDF computation.'
182 | 
183 |     # create a probability array the same shape as the feature matrix which we will reorder to build
184 |     # the ecdf
185 |     probabilities = np.linspace(np.ones(matrix.shape[1]) / matrix.shape[0], np.ones(matrix.shape[1]), matrix.shape[0])
186 | 
187 |     # get the sorting indices for a numpy array
188 |     sort_idx = np.argsort(matrix, axis=0)
189 | 
190 |     # sort the numpy array, as we need to look for duplicates in the feature values (that would have different
191 |     # probabilities if we would just resort the probabilities array)
192 |     matrix = np.take_along_axis(matrix, sort_idx, axis=0)
193 | 
194 |     # deal with equal values
195 |     ecdf_terminate_equals_inplace(matrix, probabilities)
196 | 
197 |     # return the resorted accumulated probabilities (by reverting the sorting of the input matrix)
198 |     # looks a little complicated but is faster this way
199 |     reordered_probabilities = np.ones_like(probabilities)
200 |     np.put_along_axis(reordered_probabilities, sort_idx, probabilities, axis=0)
201 |     return reordered_probabilities
202 | 
203 | @njit
204 | def ecdf_terminate_equals_inplace(matrix: np.ndarray, probabilities: np.ndarray):
205 |     """
206 |     This is a helper function for computing the ecdf of an array. It has been outsourced from the original
207 |     function in order to be able to use the njit compiler of numpy for increased speeds, as it unfortunately
208 |     needs a loop over all rows and columns of a matrix. It acts in place on the probabilities' matrix.
209 | 
210 |     Parameters
211 |     ----------
212 |     matrix : a feature matrix where the rows are samples and each column is a feature !(expected to be sorted)!
213 | 
214 |     probabilities : a probability matrix that will be used building the ecdf. It has values between 0 and 1 and
215 |                     is also sorted.
216 | 
217 |     Returns
218 |     -------
219 | 
220 |     """
221 |     for cx in range(probabilities.shape[1]):
222 |         for rx in range(probabilities.shape[0] - 2, -1, -1):
223 |             if matrix[rx, cx] == matrix[rx + 1, cx]:
224 |                 probabilities[rx, cx] = probabilities[rx + 1, cx]


--------------------------------------------------------------------------------
/TSB_AD/utils/torch_utility.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import nn
  4 | import subprocess as sp
  5 | import os, math
  6 | 
  7 | class EarlyStoppingTorch:
  8 |     """Early stops the training if validation loss doesn't improve after a given patience."""
  9 |     def __init__(self, save_path=None, patience=7, verbose=False, delta=0.0001):
 10 |         """
 11 |         Args:
 12 |             save_path : 
 13 |             patience (int): How long to wait after last time validation loss improved.
 14 |                             Default: 7
 15 |             verbose (bool): If True, prints a message for each validation loss improvement. 
 16 |                             Default: False
 17 |             delta (float): Minimum change in the monitored quantity to qualify as an improvement.
 18 |                             Default: 0
 19 |         """
 20 |         self.save_path = save_path
 21 |         self.patience = patience
 22 |         self.verbose = verbose
 23 |         self.counter = 0
 24 |         self.best_score = None
 25 |         self.early_stop = False
 26 |         self.val_loss_min = np.Inf
 27 |         self.delta = delta
 28 | 
 29 |     def __call__(self, val_loss, model):
 30 | 
 31 |         score = -val_loss
 32 | 
 33 |         if self.best_score is None:
 34 |             self.best_score = score
 35 |             self.save_checkpoint(val_loss, model)
 36 |         elif score < self.best_score + self.delta:
 37 |             self.counter += 1
 38 |             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
 39 |             if self.counter >= self.patience:
 40 |                 self.early_stop = True
 41 |         else:
 42 |             self.best_score = score
 43 |             self.save_checkpoint(val_loss, model)
 44 |             self.counter = 0
 45 | 
 46 |     def save_checkpoint(self, val_loss, model):
 47 |         '''Saves model when validation loss decrease.'''
 48 |         if self.verbose:
 49 |             print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
 50 |         if self.save_path:
 51 |             path = os.path.join(self.save_path, 'best_network.pth')
 52 |             torch.save(model.state_dict(), path)	
 53 |         self.val_loss_min = val_loss
 54 | 
 55 | class PositionalEmbedding(nn.Module):
 56 |     def __init__(self, d_model, max_len=5000):
 57 |         super(PositionalEmbedding, self).__init__()
 58 |         # Compute the positional encodings once in log space.
 59 |         pe = torch.zeros(max_len, d_model).float()
 60 |         pe.require_grad = False
 61 | 
 62 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 63 |         div_term = (torch.arange(0, d_model, 2).float()
 64 |                     * -(math.log(10000.0) / d_model)).exp()
 65 | 
 66 |         pe[:, 0::2] = torch.sin(position * div_term)
 67 |         pe[:, 1::2] = torch.cos(position * div_term)
 68 | 
 69 |         pe = pe.unsqueeze(0)
 70 |         self.register_buffer('pe', pe)
 71 | 
 72 |     def forward(self, x):
 73 |         return self.pe[:, :x.size(1)]
 74 | 
 75 | class TokenEmbedding(nn.Module):
 76 |     def __init__(self, c_in, d_model):
 77 |         super(TokenEmbedding, self).__init__()
 78 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
 79 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
 80 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
 81 |         for m in self.modules():
 82 |             if isinstance(m, nn.Conv1d):
 83 |                 nn.init.kaiming_normal_(
 84 |                     m.weight, mode='fan_in', nonlinearity='leaky_relu')
 85 | 
 86 |     def forward(self, x):
 87 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
 88 |         return x
 89 |     
 90 | class TemporalEmbedding(nn.Module):
 91 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 92 |         super(TemporalEmbedding, self).__init__()
 93 | 
 94 |         minute_size = 4
 95 |         hour_size = 24
 96 |         weekday_size = 7
 97 |         day_size = 32
 98 |         month_size = 13
 99 | 
100 |         Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
101 |         if freq == 't':
102 |             self.minute_embed = Embed(minute_size, d_model)
103 |         self.hour_embed = Embed(hour_size, d_model)
104 |         self.weekday_embed = Embed(weekday_size, d_model)
105 |         self.day_embed = Embed(day_size, d_model)
106 |         self.month_embed = Embed(month_size, d_model)
107 | 
108 |     def forward(self, x):
109 |         x = x.long()
110 |         minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
111 |             self, 'minute_embed') else 0.
112 |         hour_x = self.hour_embed(x[:, :, 3])
113 |         weekday_x = self.weekday_embed(x[:, :, 2])
114 |         day_x = self.day_embed(x[:, :, 1])
115 |         month_x = self.month_embed(x[:, :, 0])
116 | 
117 |         return hour_x + weekday_x + day_x + month_x + minute_x
118 | 
119 | class FixedEmbedding(nn.Module):
120 |     def __init__(self, c_in, d_model):
121 |         super(FixedEmbedding, self).__init__()
122 | 
123 |         w = torch.zeros(c_in, d_model).float()
124 |         w.require_grad = False
125 | 
126 |         position = torch.arange(0, c_in).float().unsqueeze(1)
127 |         div_term = (torch.arange(0, d_model, 2).float()
128 |                     * -(math.log(10000.0) / d_model)).exp()
129 | 
130 |         w[:, 0::2] = torch.sin(position * div_term)
131 |         w[:, 1::2] = torch.cos(position * div_term)
132 | 
133 |         self.emb = nn.Embedding(c_in, d_model)
134 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
135 | 
136 |     def forward(self, x):
137 |         return self.emb(x).detach()
138 | 
139 | class TimeFeatureEmbedding(nn.Module):
140 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
141 |         super(TimeFeatureEmbedding, self).__init__()
142 | 
143 |         freq_map = {'h': 4, 't': 5, 's': 6,
144 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
145 |         d_inp = freq_map[freq]
146 |         self.embed = nn.Linear(d_inp, d_model, bias=False)
147 | 
148 |     def forward(self, x):
149 |         return self.embed(x)
150 | 
151 | class DataEmbedding(nn.Module):
152 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
153 |         super(DataEmbedding, self).__init__()
154 | 
155 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
156 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
157 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
158 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
159 |             d_model=d_model, embed_type=embed_type, freq=freq)
160 |         self.dropout = nn.Dropout(p=dropout)
161 | 
162 |     def forward(self, x, x_mark):
163 |         if x_mark is None:
164 |             x = self.value_embedding(x) + self.position_embedding(x)
165 |         else:
166 |             x = self.value_embedding(
167 |                 x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
168 |         return self.dropout(x)
169 | 
170 | def adjust_learning_rate(optimizer, epoch, lradj, learning_rate):
171 |     # lr = args.learning_rate * (0.2 ** (epoch // 2))
172 |     if lradj == 'type1':
173 |         lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch - 1) // 1))}
174 |     elif lradj == 'type2':
175 |         lr_adjust = {
176 |             2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
177 |             10: 5e-7, 15: 1e-7, 20: 5e-8
178 |         }
179 |     if epoch in lr_adjust.keys():
180 |         lr = lr_adjust[epoch]
181 |         for param_group in optimizer.param_groups:
182 |             param_group['lr'] = lr
183 |         print('Updating learning rate to {}'.format(lr))
184 | 
185 | 
186 | def min_memory_id():
187 |     output = sp.check_output(["/usr/bin/nvidia-smi", "--query-gpu=memory.used", "--format=csv"])
188 |     memory = [int(s.split(" ")[0]) for s in output.decode().split("\n")[1:-1]]
189 |     assert len(memory) == torch.cuda.device_count()
190 |     return np.argmin(memory)
191 | 
192 | 
193 | def get_gpu(cuda):
194 |     if cuda == True and torch.cuda.is_available():
195 |         try:
196 |             device = torch.device(f"cuda:{min_memory_id()}")
197 |             torch.cuda.set_device(device)
198 |             print(f"----- Using GPU {torch.cuda.current_device()} -----")
199 |         except:
200 |             device = torch.device("cuda")
201 |             print(f"----- Using GPU {torch.cuda.get_device_name()} -----")
202 |     else:
203 |         if cuda == True and not torch.cuda.is_available():
204 |             print("----- GPU is unavailable -----")
205 |         device = torch.device("cpu")
206 |         print("----- Using CPU -----")
207 |     return device


--------------------------------------------------------------------------------
/assets/fig/readme_title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/readme_title.png


--------------------------------------------------------------------------------
/assets/fig/readme_title_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/readme_title_2.png


--------------------------------------------------------------------------------
/assets/fig/tsb_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/assets/fig/tsb_overview.png


--------------------------------------------------------------------------------
/benchmark_exp/HP_Tuning_M.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Qinghua Liu <liu.11085@osu.edu>
 3 | # License: Apache-2.0 License
 4 | 
 5 | import pandas as pd
 6 | import numpy as np
 7 | import torch
 8 | import random, argparse, time, os
 9 | import itertools
10 | from TSB_AD.evaluation.metrics import get_metrics
11 | from TSB_AD.utils.slidingWindows import find_length_rank
12 | from TSB_AD.model_wrapper import *
13 | from TSB_AD.HP_list import Multi_algo_HP_dict
14 | 
15 | # seeding
16 | seed = 2024
17 | torch.manual_seed(seed)
18 | torch.cuda.manual_seed(seed)
19 | torch.cuda.manual_seed_all(seed)
20 | np.random.seed(seed)
21 | random.seed(seed)
22 | torch.backends.cudnn.benchmark = False
23 | torch.backends.cudnn.deterministic = True
24 | 
25 | print("CUDA available: ", torch.cuda.is_available())
26 | print("cuDNN version: ", torch.backends.cudnn.version())
27 | 
28 | if __name__ == '__main__':
29 | 
30 |     Start_T = time.time()
31 |     ## ArgumentParser
32 |     parser = argparse.ArgumentParser(description='HP Tuning')
33 |     parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-M/')
34 |     parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-M-Tuning.csv')
35 |     parser.add_argument('--save_dir', type=str, default='eval/HP_tuning/multi/')
36 |     parser.add_argument('--AD_Name', type=str, default='IForest')
37 |     args = parser.parse_args()
38 | 
39 |     file_list = pd.read_csv(args.file_lsit)['file_name'].values
40 | 
41 |     Det_HP = Multi_algo_HP_dict[args.AD_Name]
42 | 
43 |     keys, values = zip(*Det_HP.items())
44 |     combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
45 | 
46 |     write_csv = []
47 |     for filename in file_list:
48 |         print('Processing:{} by {}'.format(filename, args.AD_Name))
49 | 
50 |         file_path = os.path.join(args.dataset_dir, filename)
51 |         df = pd.read_csv(file_path).dropna()
52 |         data = df.iloc[:, 0:-1].values.astype(float)
53 |         label = df['Label'].astype(int).to_numpy()
54 |         # print('data: ', data.shape)
55 |         # print('label: ', label.shape)
56 | 
57 |         feats = data.shape[1]
58 |         slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1)
59 |         train_index = filename.split('.')[0].split('_')[-3]
60 |         data_train = data[:int(train_index), :]
61 | 
62 |         for params in combinations:
63 | 
64 |             if args.AD_Name in Semisupervise_AD_Pool:
65 |                 output = run_Semisupervise_AD(args.AD_Name, data_train, data, **params)
66 |             elif args.AD_Name in Unsupervise_AD_Pool:
67 |                 output = run_Unsupervise_AD(args.AD_Name, data, **params)
68 |             else:
69 |                 raise Exception(f"{args.AD_Name} is not defined")
70 |                 
71 |             try:
72 |                 evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow)
73 |                 print('evaluation_result: ', evaluation_result)
74 |                 list_w = list(evaluation_result.values())
75 |             except:
76 |                 list_w = [0]*9
77 |             list_w.insert(0, params)
78 |             list_w.insert(0, filename)
79 |             write_csv.append(list_w)
80 | 
81 |             ## Temp Save
82 |             col_w = list(evaluation_result.keys())
83 |             col_w.insert(0, 'HP')
84 |             col_w.insert(0, 'file')
85 |             w_csv = pd.DataFrame(write_csv, columns=col_w)
86 | 
87 |             w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False)


--------------------------------------------------------------------------------
/benchmark_exp/HP_Tuning_U.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Author: Qinghua Liu <liu.11085@osu.edu>
 3 | # License: Apache-2.0 License
 4 | 
 5 | import pandas as pd
 6 | import numpy as np
 7 | import torch
 8 | import random, argparse, time, os
 9 | import itertools
10 | from TSB_AD.evaluation.metrics import get_metrics
11 | from TSB_AD.utils.slidingWindows import find_length_rank
12 | from TSB_AD.model_wrapper import *
13 | from TSB_AD.HP_list import Uni_algo_HP_dict
14 | 
15 | # seeding
16 | seed = 2024
17 | torch.manual_seed(seed)
18 | torch.cuda.manual_seed(seed)
19 | torch.cuda.manual_seed_all(seed)
20 | np.random.seed(seed)
21 | random.seed(seed)
22 | torch.backends.cudnn.benchmark = False
23 | torch.backends.cudnn.deterministic = True
24 | 
25 | print("CUDA available: ", torch.cuda.is_available())
26 | print("cuDNN version: ", torch.backends.cudnn.version())
27 | 
28 | if __name__ == '__main__':
29 | 
30 |     Start_T = time.time()
31 |     ## ArgumentParser
32 |     parser = argparse.ArgumentParser(description='HP Tuning')
33 |     parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-U/')
34 |     parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-U-Tuning.csv')
35 |     parser.add_argument('--save_dir', type=str, default='eval/HP_tuning/uni/')
36 |     parser.add_argument('--AD_Name', type=str, default='IForest')
37 |     args = parser.parse_args()
38 | 
39 |     file_list = pd.read_csv(args.file_lsit)['file_name'].values
40 | 
41 |     Det_HP = Uni_algo_HP_dict[args.AD_Name]
42 | 
43 |     keys, values = zip(*Det_HP.items())
44 |     combinations = [dict(zip(keys, v)) for v in itertools.product(*values)]
45 | 
46 |     write_csv = []
47 |     for filename in file_list:
48 |         print('Processing:{} by {}'.format(filename, args.AD_Name))
49 | 
50 |         file_path = os.path.join(args.dataset_dir, filename)
51 |         df = pd.read_csv(file_path).dropna()
52 |         data = df.iloc[:, 0:-1].values.astype(float)
53 |         label = df['Label'].astype(int).to_numpy()
54 |         # print('data: ', data.shape)
55 |         # print('label: ', label.shape)
56 | 
57 |         feats = data.shape[1]
58 |         slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1)
59 |         train_index = filename.split('.')[0].split('_')[-3]
60 |         data_train = data[:int(train_index), :]
61 | 
62 |         for params in combinations:
63 | 
64 |             if args.AD_Name in Semisupervise_AD_Pool:
65 |                 output = run_Semisupervise_AD(args.AD_Name, data_train, data, **params)
66 |             elif args.AD_Name in Unsupervise_AD_Pool:
67 |                 output = run_Unsupervise_AD(args.AD_Name, data, **params)
68 |             else:
69 |                 raise Exception(f"{args.AD_Name} is not defined")
70 |                 
71 |             try:
72 |                 evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow)
73 |                 print('evaluation_result: ', evaluation_result)
74 |                 list_w = list(evaluation_result.values())
75 |             except:
76 |                 list_w = [0]*9
77 |             list_w.insert(0, params)
78 |             list_w.insert(0, filename)
79 |             write_csv.append(list_w)
80 | 
81 |             ## Temp Save
82 |             col_w = list(evaluation_result.keys())
83 |             col_w.insert(0, 'HP')
84 |             col_w.insert(0, 'file')
85 |             w_csv = pd.DataFrame(write_csv, columns=col_w)
86 | 
87 |             w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False)


--------------------------------------------------------------------------------
/benchmark_exp/README.md:
--------------------------------------------------------------------------------
 1 | ### Scripts for running experiments/develop new methods in TSB-AD
 2 | 
 3 | * Hper-parameter Tuning: HP_Tuning_U/M.py
 4 | 
 5 | * Benchmark Evaluation: Run_Detector_U/M.py
 6 | 
 7 | * `benchmark_eval_results/`: Evaluation results of anomaly detectors across different time series in TSB-AD
 8 |     * All time series are normalized by z-score by default
 9 | 
10 | * Develop your own algorithm: Run_Custom_Detector.py
11 |     * Step 1: Implement `Custom_AD` class
12 |     * Step 2: Implement model wrapper function `run_Custom_AD_Unsupervised` or `run_Custom_AD_Semisupervised`
13 |     * Step 3: Specify `Custom_AD_HP` hyperparameter dict
14 |     * Step 4: Run the custom algorithm either `run_Custom_AD_Unsupervised` or `run_Custom_AD_Semisupervised`
15 |     * Step 5: Apply threshold to the anomaly score (if any)
16 | 
17 | 🪧 How to commit your own algorithm to TSB-AD: you can send us the Run_Custom_Detector.py (replace Custom_Detector with the model name) to us via (i) [email](liu.11085@osu.edu) or (ii) open a pull request and add the file to `benchmark_exp` folder in `TSB-AD-algo` branch. We will test and evaluate the algorithm and include it in our [leaderboard](https://thedatumorg.github.io/TSB-AD/).


--------------------------------------------------------------------------------
/benchmark_exp/Run_Custom_Detector.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Qinghua Liu <liu.11085@osu.edu>
  3 | # License: Apache-2.0 License
  4 | 
  5 | import pandas as pd
  6 | import numpy as np
  7 | import torch
  8 | import random, argparse, time, os, logging
  9 | from sklearn.preprocessing import MinMaxScaler
 10 | 
 11 | from TSB_AD.evaluation.metrics import get_metrics
 12 | from TSB_AD.utils.slidingWindows import find_length_rank
 13 | from TSB_AD.models.base import BaseDetector
 14 | from TSB_AD.utils.utility import zscore
 15 | 
 16 | class Custom_AD(BaseDetector):
 17 | 
 18 |     def __init__(self, HP, normalize=True):
 19 |         super().__init__()
 20 |         self.HP = HP
 21 |         self.normalize = normalize
 22 | 
 23 |     def fit(self, X, y=None):
 24 |         """Fit detector. y is ignored in unsupervised methods.
 25 | 
 26 |         Parameters
 27 |         ----------
 28 |         X : numpy array of shape (n_samples, n_features)
 29 |             The input samples.
 30 | 
 31 |         y : Ignored
 32 |             Not used, present for API consistency by convention.
 33 | 
 34 |         Returns
 35 |         -------
 36 |         self : object
 37 |             Fitted estimator.
 38 |         """
 39 |         n_samples, n_features = X.shape
 40 |         if self.normalize: X = zscore(X, axis=1, ddof=1)
 41 | 
 42 |         self.decision_scores_ = np.zeros(n_samples)
 43 |         return self
 44 | 
 45 |     def decision_function(self, X):
 46 |         """Predict raw anomaly score of X using the fitted detector.
 47 | 
 48 |         The anomaly score of an input sample is computed based on different
 49 |         detector algorithms. For consistency, outliers are assigned with
 50 |         larger anomaly scores.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         X : numpy array of shape (n_samples, n_features)
 55 |             The training input samples. Sparse matrices are accepted only
 56 |             if they are supported by the base estimator.
 57 | 
 58 |         Returns
 59 |         -------
 60 |         anomaly_scores : numpy array of shape (n_samples,)
 61 |             The anomaly score of the input samples.
 62 |         """
 63 |         n_samples, n_features = X.shape
 64 |         decision_scores_ = np.zeros(n_samples)
 65 |         return decision_scores_
 66 | 
 67 | 
 68 | def run_Custom_AD_Unsupervised(data, HP):
 69 |     clf = Custom_AD(HP=HP)
 70 |     clf.fit(data)
 71 |     score = clf.decision_scores_
 72 |     score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
 73 |     return score
 74 | 
 75 | def run_Custom_AD_Semisupervised(data_train, data_test, HP):
 76 |     clf = Custom_AD(HP=HP)
 77 |     clf.fit(data_train)
 78 |     score = clf.decision_function(data_test)
 79 |     score = MinMaxScaler(feature_range=(0,1)).fit_transform(score.reshape(-1,1)).ravel()
 80 |     return score
 81 | 
 82 | if __name__ == '__main__':
 83 | 
 84 |     Start_T = time.time()
 85 |     ## ArgumentParser
 86 |     parser = argparse.ArgumentParser(description='Running Custom_AD')
 87 |     parser.add_argument('--filename', type=str, default='001_NAB_id_1_Facility_tr_1007_1st_2014.csv')
 88 |     parser.add_argument('--data_direc', type=str, default='../Datasets/TSB-AD-U/')
 89 |     parser.add_argument('--AD_Name', type=str, default='Custom_AD')
 90 |     args = parser.parse_args()
 91 | 
 92 |     Custom_AD_HP = {
 93 |         'HP': ['HP'],
 94 |     }
 95 | 
 96 |     df = pd.read_csv(args.data_direc + args.filename).dropna()
 97 |     data = df.iloc[:, 0:-1].values.astype(float)
 98 |     label = df['Label'].astype(int).to_numpy()
 99 |     print('data: ', data.shape)
100 |     print('label: ', label.shape)
101 | 
102 |     slidingWindow = find_length_rank(data, rank=1)
103 |     train_index = args.filename.split('.')[0].split('_')[-3]
104 |     data_train = data[:int(train_index), :]
105 | 
106 |     start_time = time.time()
107 | 
108 |     output = run_Custom_AD_Semisupervised(data_train, data, **Custom_AD_HP)
109 |     # output = run_Custom_AD_Unsupervised(data, **Custom_AD_HP)
110 | 
111 |     end_time = time.time()
112 |     run_time = end_time - start_time
113 | 
114 |     pred = output > (np.mean(output)+3*np.std(output))
115 |     evaluation_result = get_metrics(output, label, slidingWindow=slidingWindow, pred=pred)
116 |     print('Evaluation Result: ', evaluation_result)


--------------------------------------------------------------------------------
/benchmark_exp/Run_Detector_M.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Qinghua Liu <liu.11085@osu.edu>
  3 | # License: Apache-2.0 License
  4 | 
  5 | import pandas as pd
  6 | import numpy as np
  7 | import torch
  8 | import random, argparse, time, os, logging
  9 | from TSB_AD.evaluation.metrics import get_metrics
 10 | from TSB_AD.utils.slidingWindows import find_length_rank
 11 | from TSB_AD.model_wrapper import *
 12 | from TSB_AD.HP_list import Optimal_Multi_algo_HP_dict
 13 | 
 14 | # seeding
 15 | seed = 2024
 16 | torch.manual_seed(seed)
 17 | torch.cuda.manual_seed(seed)
 18 | torch.cuda.manual_seed_all(seed)
 19 | np.random.seed(seed)
 20 | random.seed(seed)
 21 | torch.backends.cudnn.benchmark = False
 22 | torch.backends.cudnn.deterministic = True
 23 | 
 24 | print("CUDA available: ", torch.cuda.is_available())
 25 | print("cuDNN version: ", torch.backends.cudnn.version())
 26 | 
 27 | if __name__ == '__main__':
 28 | 
 29 |     Start_T = time.time()
 30 |     ## ArgumentParser
 31 |     parser = argparse.ArgumentParser(description='Generating Anomaly Score')
 32 |     parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-M/')
 33 |     parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-M-Eva.csv')
 34 |     parser.add_argument('--score_dir', type=str, default='eval/score/multi/')
 35 |     parser.add_argument('--save_dir', type=str, default='eval/metrics/multi/')
 36 |     parser.add_argument('--save', type=bool, default=False)
 37 |     parser.add_argument('--AD_Name', type=str, default='IForest')
 38 |     args = parser.parse_args()
 39 | 
 40 | 
 41 |     target_dir = os.path.join(args.score_dir, args.AD_Name)
 42 |     os.makedirs(target_dir, exist_ok = True)
 43 |     logging.basicConfig(filename=f'{target_dir}/000_run_{args.AD_Name}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 44 | 
 45 |     file_list = pd.read_csv(args.file_lsit)['file_name'].values
 46 |     Optimal_Det_HP = Optimal_Multi_algo_HP_dict[args.AD_Name]
 47 |     print('Optimal_Det_HP: ', Optimal_Det_HP)
 48 | 
 49 |     write_csv = []
 50 |     for filename in file_list:
 51 |         if os.path.exists(target_dir+'/'+filename.split('.')[0]+'.npy'): continue
 52 |         print('Processing:{} by {}'.format(filename, args.AD_Name))
 53 | 
 54 |         file_path = os.path.join(args.dataset_dir, filename)
 55 |         df = pd.read_csv(file_path).dropna()
 56 |         data = df.iloc[:, 0:-1].values.astype(float)
 57 |         label = df['Label'].astype(int).to_numpy()
 58 |         # print('data: ', data.shape)
 59 |         # print('label: ', label.shape)
 60 | 
 61 |         feats = data.shape[1]
 62 |         slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1)
 63 |         train_index = filename.split('.')[0].split('_')[-3]
 64 |         data_train = data[:int(train_index), :]
 65 | 
 66 |         start_time = time.time()
 67 | 
 68 |         if args.AD_Name in Semisupervise_AD_Pool:
 69 |             output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP)
 70 |         elif args.AD_Name in Unsupervise_AD_Pool:
 71 |             output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP)
 72 |         else:
 73 |             raise Exception(f"{args.AD_Name} is not defined")
 74 | 
 75 |         end_time = time.time()
 76 |         run_time = end_time - start_time
 77 | 
 78 |         if isinstance(output, np.ndarray):
 79 |             logging.info(f'Success at {filename} using {args.AD_Name} | Time cost: {run_time:.3f}s at length {len(label)}')
 80 |             np.save(target_dir+'/'+filename.split('.')[0]+'.npy', output)
 81 |         else:
 82 |             logging.error(f'At {filename}: '+output)
 83 | 
 84 |         ### whether to save the evaluation result
 85 |         if args.save:
 86 |             try:
 87 |                 evaluation_result = get_metrics(output, label, metric='all', slidingWindow=slidingWindow)
 88 |                 print('evaluation_result: ', evaluation_result)
 89 |                 list_w = list(evaluation_result.values())
 90 |             except:
 91 |                 list_w = [0]*9
 92 |             list_w.insert(0, run_time)
 93 |             list_w.insert(0, filename)
 94 |             write_csv.append(list_w)
 95 | 
 96 |             ## Temp Save
 97 |             col_w = list(evaluation_result.keys())
 98 |             col_w.insert(0, 'Time')
 99 |             col_w.insert(0, 'file')
100 |             w_csv = pd.DataFrame(write_csv, columns=col_w)
101 |             w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False)


--------------------------------------------------------------------------------
/benchmark_exp/Run_Detector_U.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Author: Qinghua Liu <liu.11085@osu.edu>
  3 | # License: Apache-2.0 License
  4 | 
  5 | import pandas as pd
  6 | import numpy as np
  7 | import torch
  8 | import random, argparse, time, os, logging
  9 | from TSB_AD.evaluation.metrics import get_metrics
 10 | from TSB_AD.utils.slidingWindows import find_length_rank
 11 | from TSB_AD.model_wrapper import *
 12 | from TSB_AD.HP_list import Optimal_Uni_algo_HP_dict
 13 | 
 14 | # seeding
 15 | seed = 2024
 16 | torch.manual_seed(seed)
 17 | torch.cuda.manual_seed(seed)
 18 | torch.cuda.manual_seed_all(seed)
 19 | np.random.seed(seed)
 20 | random.seed(seed)
 21 | torch.backends.cudnn.benchmark = False
 22 | torch.backends.cudnn.deterministic = True
 23 | 
 24 | print("CUDA available: ", torch.cuda.is_available())
 25 | print("cuDNN version: ", torch.backends.cudnn.version())
 26 | 
 27 | if __name__ == '__main__':
 28 | 
 29 |     Start_T = time.time()
 30 |     ## ArgumentParser
 31 |     parser = argparse.ArgumentParser(description='Generating Anomaly Score')
 32 |     parser.add_argument('--dataset_dir', type=str, default='../Datasets/TSB-AD-U/')
 33 |     parser.add_argument('--file_lsit', type=str, default='../Datasets/File_List/TSB-AD-U-Eva.csv')
 34 |     parser.add_argument('--score_dir', type=str, default='eval/score/uni/')
 35 |     parser.add_argument('--save_dir', type=str, default='eval/metrics/uni/')
 36 |     parser.add_argument('--save', type=bool, default=False)
 37 |     parser.add_argument('--AD_Name', type=str, default='IForest')
 38 |     args = parser.parse_args()
 39 | 
 40 | 
 41 |     target_dir = os.path.join(args.score_dir, args.AD_Name)
 42 |     os.makedirs(target_dir, exist_ok = True)
 43 |     logging.basicConfig(filename=f'{target_dir}/000_run_{args.AD_Name}.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 44 | 
 45 |     file_list = pd.read_csv(args.file_lsit)['file_name'].values
 46 |     Optimal_Det_HP = Optimal_Uni_algo_HP_dict[args.AD_Name]
 47 |     print('Optimal_Det_HP: ', Optimal_Det_HP)
 48 | 
 49 |     write_csv = []
 50 |     for filename in file_list:
 51 |         if os.path.exists(target_dir+'/'+filename.split('.')[0]+'.npy'): continue
 52 |         print('Processing:{} by {}'.format(filename, args.AD_Name))
 53 | 
 54 |         file_path = os.path.join(args.dataset_dir, filename)
 55 |         df = pd.read_csv(file_path).dropna()
 56 |         data = df.iloc[:, 0:-1].values.astype(float)
 57 |         label = df['Label'].astype(int).to_numpy()
 58 |         # print('data: ', data.shape)
 59 |         # print('label: ', label.shape)
 60 | 
 61 |         feats = data.shape[1]
 62 |         slidingWindow = find_length_rank(data[:,0].reshape(-1, 1), rank=1)
 63 |         train_index = filename.split('.')[0].split('_')[-3]
 64 |         data_train = data[:int(train_index), :]
 65 | 
 66 |         start_time = time.time()
 67 | 
 68 |         if args.AD_Name in Semisupervise_AD_Pool:
 69 |             output = run_Semisupervise_AD(args.AD_Name, data_train, data, **Optimal_Det_HP)
 70 |         elif args.AD_Name in Unsupervise_AD_Pool:
 71 |             output = run_Unsupervise_AD(args.AD_Name, data, **Optimal_Det_HP)
 72 |         else:
 73 |             raise Exception(f"{args.AD_Name} is not defined")
 74 | 
 75 |         end_time = time.time()
 76 |         run_time = end_time - start_time
 77 | 
 78 |         if isinstance(output, np.ndarray):
 79 |             logging.info(f'Success at {filename} using {args.AD_Name} | Time cost: {run_time:.3f}s at length {len(label)}')
 80 |             np.save(target_dir+'/'+filename.split('.')[0]+'.npy', output)
 81 |         else:
 82 |             logging.error(f'At {filename}: '+output)
 83 | 
 84 |         ### whether to save the evaluation result
 85 |         if args.save:
 86 |             try:
 87 |                 evaluation_result = get_metrics(output, label, metric='all', slidingWindow=slidingWindow)
 88 |                 print('evaluation_result: ', evaluation_result)
 89 |                 list_w = list(evaluation_result.values())
 90 |             except:
 91 |                 list_w = [0]*9
 92 |             list_w.insert(0, run_time)
 93 |             list_w.insert(0, filename)
 94 |             write_csv.append(list_w)
 95 | 
 96 |             ## Temp Save
 97 |             col_w = list(evaluation_result.keys())
 98 |             col_w.insert(0, 'Time')
 99 |             col_w.insert(0, 'file')
100 |             w_csv = pd.DataFrame(write_csv, columns=col_w)
101 |             w_csv.to_csv(f'{args.save_dir}/{args.AD_Name}.csv', index=False)


--------------------------------------------------------------------------------
/benchmark_exp/benchmark_eval_results/README.md:
--------------------------------------------------------------------------------
1 | Evaluation results computed based on the lasted version of [VUS](https://arxiv.org/abs/2502.13318).
2 | 
3 | The implemention in [`get_metrics`](https://github.com/TheDatumOrg/TSB-AD/blob/dce9e5e5ec14ffb82787315dbf1a8564c7bb9f47/TSB_AD/evaluation/metrics.py#L3) is up-to-date.


--------------------------------------------------------------------------------
/docs/static/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/.DS_Store


--------------------------------------------------------------------------------
/docs/static/css/bulma-carousel.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10}


--------------------------------------------------------------------------------
/docs/static/css/index.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: 'Noto Sans', sans-serif;
  3 | }
  4 | 
  5 | 
  6 | .footer .icon-link {
  7 |     font-size: 25px;
  8 |     color: #000;
  9 | }
 10 | 
 11 | .link-block a {
 12 |     margin-top: 5px;
 13 |     margin-bottom: 5px;
 14 | }
 15 | 
 16 | .dnerf {
 17 |   font-variant: small-caps;
 18 | }
 19 | 
 20 | 
 21 | .teaser .hero-body {
 22 |   padding-top: 0;
 23 |   padding-bottom: 3rem;
 24 | }
 25 | 
 26 | .teaser {
 27 |   font-family: 'Google Sans', sans-serif;
 28 | }
 29 | 
 30 | 
 31 | .publication-title {
 32 | }
 33 | 
 34 | .publication-banner {
 35 |   max-height: parent;
 36 | 
 37 | }
 38 | 
 39 | .publication-banner video {
 40 |   position: relative;
 41 |   left: auto;
 42 |   top: auto;
 43 |   transform: none;
 44 |   object-fit: fit;
 45 | }
 46 | 
 47 | .publication-header .hero-body {
 48 | }
 49 | 
 50 | .publication-title {
 51 |     font-family: 'Google Sans', sans-serif;
 52 | }
 53 | 
 54 | .publication-authors {
 55 |     font-family: 'Google Sans', sans-serif;
 56 | }
 57 | 
 58 | .publication-venue {
 59 |     color: #555;
 60 |     width: fit-content;
 61 |     font-weight: bold;
 62 | }
 63 | 
 64 | .publication-awards {
 65 |     color: #ff3860;
 66 |     width: fit-content;
 67 |     font-weight: bolder;
 68 | }
 69 | 
 70 | .publication-authors {
 71 | }
 72 | 
 73 | .publication-authors a {
 74 |    color: hsl(204, 86%, 53%) !important;
 75 | }
 76 | 
 77 | .publication-authors a:hover {
 78 |     text-decoration: underline;
 79 | }
 80 | 
 81 | .author-block {
 82 |   display: inline-block;
 83 | }
 84 | 
 85 | .publication-banner img {
 86 | }
 87 | 
 88 | .publication-authors {
 89 |   /*color: #4286f4;*/
 90 | }
 91 | 
 92 | .publication-video {
 93 |     position: relative;
 94 |     width: 100%;
 95 |     height: 0;
 96 |     padding-bottom: 56.25%;
 97 | 
 98 |     overflow: hidden;
 99 |     border-radius: 10px !important;
100 | }
101 | 
102 | .publication-video iframe {
103 |     position: absolute;
104 |     top: 0;
105 |     left: 0;
106 |     width: 100%;
107 |     height: 100%;
108 | }
109 | 
110 | .publication-body img {
111 | }
112 | 
113 | .results-carousel {
114 |   overflow: hidden;
115 | }
116 | 
117 | .results-carousel .item {
118 |   margin: 5px;
119 |   overflow: hidden;
120 |   padding: 20px;
121 |   font-size: 0;
122 | }
123 | 
124 | .results-carousel video {
125 |   margin: 0;
126 | }
127 | 
128 | .slider-pagination .slider-page {
129 |   background: #000000;
130 | }
131 | 
132 | .eql-cntrb { 
133 |   font-size: smaller;
134 | }
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/docs/static/images/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/images/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/tsb_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/images/tsb_overview.png


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.min.js:
--------------------------------------------------------------------------------
1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e<arguments.length;e++){var n=arguments[e];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(t[r]=n[r])}return t},u=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}(),o="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t};var l=function(t){return"string"==typeof t||!!t&&"object"===(void 0===t?"undefined":o(t))&&"[object String]"===Object.prototype.toString.call(t)},a=function(t){function o(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{};!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,o);var n=function(t,e){if(!t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!e||"object"!=typeof e&&"function"!=typeof e?t:e}(this,(o.__proto__||Object.getPrototypeOf(o)).call(this));if(n.element="string"==typeof t?document.querySelector(t):t,!n.element)throw new Error("An invalid selector or non-DOM node has been provided.");return n._clickEvents=["click"],n.options=i({},e),n.onSliderInput=n.onSliderInput.bind(n),n.init(),n}return function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function, not "+typeof e);t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}),e&&(Object.setPrototypeOf?Object.setPrototypeOf(t,e):t.__proto__=e)}(o,r["a"]),u(o,[{key:"init",value:function(){if(this._id="bulmaSlider"+(new Date).getTime()+Math.floor(Math.random()*Math.floor(9999)),this.output=this._findOutputForSlider(),this._bindEvents(),this.output&&this.element.classList.contains("has-output-tooltip")){var t=this._getSliderOutputPosition();this.output.style.left=t.position}this.emit("bulmaslider:ready",this.element.value)}},{key:"_findOutputForSlider",value:function(){var e=this,n=null,t=document.getElementsByTagName("output")||[];return Array.from(t).forEach(function(t){if(t.htmlFor==e.element.getAttribute("id"))return n=t,!0}),n}},{key:"_getSliderOutputPosition",value:function(){var t,e=window.getComputedStyle(this.element,null),n=parseInt(e.getPropertyValue("width"),10);t=this.element.getAttribute("min")?this.element.getAttribute("min"):0;var r=(this.element.value-t)/(this.element.getAttribute("max")-t);return{position:(r<0?0:1<r?n:n*r)+"px"}}},{key:"_bindEvents",value:function(){this.output&&this.element.addEventListener("input",this.onSliderInput,!1)}},{key:"onSliderInput",value:function(t){if(t.preventDefault(),this.element.classList.contains("has-output-tooltip")){var e=this._getSliderOutputPosition();this.output.style.left=e.position}var n=this.output.hasAttribute("data-prefix")?this.output.getAttribute("data-prefix"):"",r=this.output.hasAttribute("data-postfix")?this.output.getAttribute("data-postfix"):"";this.output.value=n+this.element.value+r,this.emit("bulmaslider:ready",this.element.value)}}],[{key:"attach",value:function(){var n=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'input[type="range"].slider',r=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},i=new Array;return(l(t)?document.querySelectorAll(t):Array.isArray(t)?t:[t]).forEach(function(t){if(void 0===t[n.constructor.name]){var e=new o(t,r);t[n.constructor.name]=e,i.push(e)}else i.push(t[n.constructor.name])}),i}}]),o}();e.default=a},function(t,e,n){"use strict";var r=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}();var i=function(){function e(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:[];!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),this._listeners=new Map(t),this._middlewares=new Map}return r(e,[{key:"listenerCount",value:function(t){return this._listeners.has(t)?this._listeners.get(t).length:0}},{key:"removeListeners",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null,n=1<arguments.length&&void 0!==arguments[1]&&arguments[1];null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeListeners(t,n)}):(this._listeners.delete(t),n&&this.removeMiddleware(t)):this._listeners=new Map}},{key:"middleware",value:function(t,e){var n=this;Array.isArray(t)?name.forEach(function(t){return n.middleware(t,e)}):(Array.isArray(this._middlewares.get(t))||this._middlewares.set(t,[]),this._middlewares.get(t).push(e))}},{key:"removeMiddleware",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeMiddleware(t)}):this._middlewares.delete(t):this._middlewares=new Map}},{key:"on",value:function(t,e){var n=this,r=2<arguments.length&&void 0!==arguments[2]&&arguments[2];if(Array.isArray(t))t.forEach(function(t){return n.on(t,e)});else{var i=(t=t.toString()).split(/,|, | /);1<i.length?i.forEach(function(t){return n.on(t,e)}):(Array.isArray(this._listeners.get(t))||this._listeners.set(t,[]),this._listeners.get(t).push({once:r,callback:e}))}}},{key:"once",value:function(t,e){this.on(t,e,!0)}},{key:"emit",value:function(n,r){var i=this,o=2<arguments.length&&void 0!==arguments[2]&&arguments[2];n=n.toString();var u=this._listeners.get(n),l=null,a=0,s=o;if(Array.isArray(u))for(u.forEach(function(t,e){o||(l=i._middlewares.get(n),Array.isArray(l)?(l.forEach(function(t){t(r,function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t&&(r=t),a++},n)}),a>=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default});


--------------------------------------------------------------------------------
/docs/static/js/index.js:
--------------------------------------------------------------------------------
 1 | window.HELP_IMPROVE_VIDEOJS = false;
 2 | 
 3 | 
 4 | $(document).ready(function() {
 5 |     // Check for click events on the navbar burger icon
 6 | 
 7 |     var options = {
 8 | 			slidesToScroll: 1,
 9 | 			slidesToShow: 1,
10 | 			loop: true,
11 | 			infinite: true,
12 | 			autoplay: true,
13 | 			autoplaySpeed: 5000,
14 |     }
15 | 
16 | 		// Initialize all div with carousel class
17 |     var carousels = bulmaCarousel.attach('.carousel', options);
18 | 	
19 |     bulmaSlider.attach();
20 | 
21 | })
22 | 


--------------------------------------------------------------------------------
/docs/static/leaderboard/TSB-AD-M.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |     
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Leaderboard</title>
  8 |     <style>
  9 |         table {
 10 |             width: 100%;
 11 |             border-collapse: collapse;
 12 |             font-size: 14px;
 13 |         }
 14 |         th, td {
 15 |             border: 1px solid #ddd;
 16 |             padding: 8px;
 17 |         }
 18 |         th {
 19 |             cursor: pointer;
 20 |             background-color: #f2f2f2;
 21 |             position: relative;
 22 |         }
 23 |         th.sort-asc::after {
 24 |             content: ' ▲'; /* Ascending indicator */
 25 |             position: absolute;
 26 |             right: 8px;
 27 |         }
 28 |         th.sort-desc::after {
 29 |             content: ' ▼'; /* Descending indicator */
 30 |             position: absolute;
 31 |             right: 8px;
 32 |         }
 33 |         tbody tr:nth-child(even) {
 34 |             background-color: #f9f9f9;
 35 |         }
 36 |         tbody tr:hover {
 37 |             background-color: #f1f1f1;
 38 |         }
 39 | 
 40 |         /* Align the first column (Method) to the left */
 41 |         td:first-child, th:first-child {
 42 |             text-align: left;
 43 |         }
 44 | 
 45 |         /* Align all other columns to the center */
 46 |         td:not(:first-child), th:not(:first-child) {
 47 |             text-align: center;
 48 |         }
 49 |     </style>
 50 | </head>
 51 | 
 52 | 
 53 | <body>
 54 |     <table id="sortableTable">
 55 |         <thead>
 56 |             <tr>
 57 |                 <th>Method</th>
 58 |                 <th>AUC-PR</th>
 59 |                 <th>AUC-ROC</th>
 60 |                 <th class="sort-desc" data-order="desc">VUS-PR</th>
 61 |                 <th>VUS-ROC</th>
 62 |                 <th>Standard-F1</th>
 63 |                 <th>PA-F1</th>
 64 |                 <th>Event-based-F1</th>
 65 |                 <th>R-based-F1</th>
 66 |                 <th>Affiliation-F1</th>
 67 |             </tr>
 68 |         </thead>
 69 |         <tbody>
 70 |             <tr><td>🥇 CNN</td><td><b>0.32</b></td><td><b>0.73</b></td><td><b>0.31</b></td><td><b>0.76</b></td><td><b>0.37</b></td><td>0.78</td><td><b>0.65</b></td><td><u>0.37</u></td><td><u>0.87</u></td></tr>
 71 |             <tr><td>🥈 OmniAnomaly</td><td>0.27</td><td>0.65</td><td><u>0.31</u></td><td>0.69</td><td>0.32</td><td>0.55</td><td>0.41</td><td>0.37</td><td>0.81</td></tr>
 72 |             <tr><td>🥉 PCA</td><td>0.31</td><td>0.70</td><td>0.31</td><td><u>0.74</u></td><td><u>0.37</u></td><td><u>0.79</u></td><td>0.59</td><td>0.29</td><td>0.85</td></tr>
 73 |             <tr><td>LSTMAD</td><td><u>0.31</u></td><td><u>0.70</u></td><td>0.31</td><td>0.74</td><td>0.36</td><td><b>0.79</b></td><td><u>0.64</u></td><td><b>0.38</b></td><td><b>0.87</b></td></tr>
 74 |             <tr><td>USAD</td><td>0.26</td><td>0.64</td><td>0.30</td><td>0.68</td><td>0.31</td><td>0.53</td><td>0.40</td><td>0.37</td><td>0.80</td></tr>
 75 |             <tr><td>AutoEncoder</td><td>0.30</td><td>0.67</td><td>0.30</td><td>0.69</td><td>0.34</td><td>0.60</td><td>0.44</td><td>0.28</td><td>0.80</td></tr>
 76 |             <tr><td>KMeansAD</td><td>0.25</td><td>0.69</td><td>0.29</td><td>0.73</td><td>0.31</td><td>0.68</td><td>0.49</td><td>0.33</td><td>0.82</td></tr>
 77 |             <tr><td>CBLOF</td><td>0.28</td><td>0.67</td><td>0.27</td><td>0.70</td><td>0.32</td><td>0.65</td><td>0.45</td><td>0.31</td><td>0.81</td></tr>
 78 |             <tr><td>MCD</td><td>0.27</td><td>0.65</td><td>0.27</td><td>0.69</td><td>0.33</td><td>0.46</td><td>0.33</td><td>0.20</td><td>0.76</td></tr>
 79 |             <tr><td>OCSVM</td><td>0.23</td><td>0.61</td><td>0.26</td><td>0.67</td><td>0.28</td><td>0.48</td><td>0.41</td><td>0.30</td><td>0.80</td></tr>
 80 |             <tr><td>Donut</td><td>0.20</td><td>0.64</td><td>0.26</td><td>0.71</td><td>0.28</td><td>0.52</td><td>0.36</td><td>0.21</td><td>0.81</td></tr>
 81 |             <tr><td>RobustPCA</td><td>0.24</td><td>0.58</td><td>0.24</td><td>0.61</td><td>0.29</td><td>0.60</td><td>0.42</td><td>0.33</td><td>0.81</td></tr>
 82 |             <tr><td>FITS</td><td>0.15</td><td>0.58</td><td>0.21</td><td>0.66</td><td>0.22</td><td>0.72</td><td>0.32</td><td>0.16</td><td>0.81</td></tr>
 83 |             <tr><td>OFA</td><td>0.15</td><td>0.55</td><td>0.21</td><td>0.63</td><td>0.21</td><td>0.72</td><td>0.41</td><td>0.17</td><td>0.83</td></tr>
 84 |             <tr><td>EIF</td><td>0.19</td><td>0.67</td><td>0.21</td><td>0.71</td><td>0.26</td><td>0.74</td><td>0.44</td><td>0.26</td><td>0.81</td></tr>
 85 |             <tr><td>COPOD</td><td>0.20</td><td>0.65</td><td>0.20</td><td>0.69</td><td>0.27</td><td>0.72</td><td>0.41</td><td>0.24</td><td>0.80</td></tr>
 86 |             <tr><td>IForest</td><td>0.19</td><td>0.66</td><td>0.20</td><td>0.69</td><td>0.26</td><td>0.68</td><td>0.41</td><td>0.24</td><td>0.80</td></tr>
 87 |             <tr><td>HBOS</td><td>0.16</td><td>0.63</td><td>0.19</td><td>0.67</td><td>0.24</td><td>0.67</td><td>0.40</td><td>0.24</td><td>0.80</td></tr>
 88 |             <tr><td>TimesNet</td><td>0.13</td><td>0.56</td><td>0.19</td><td>0.64</td><td>0.20</td><td>0.68</td><td>0.32</td><td>0.17</td><td>0.82</td></tr>
 89 |             <tr><td>KNN</td><td>0.14</td><td>0.51</td><td>0.18</td><td>0.59</td><td>0.19</td><td>0.69</td><td>0.45</td><td>0.21</td><td>0.79</td></tr>
 90 |             <tr><td>TranAD</td><td>0.14</td><td>0.59</td><td>0.18</td><td>0.65</td><td>0.21</td><td>0.68</td><td>0.40</td><td>0.21</td><td>0.79</td></tr>
 91 |             <tr><td>LOF</td><td>0.10</td><td>0.53</td><td>0.14</td><td>0.60</td><td>0.15</td><td>0.57</td><td>0.32</td><td>0.14</td><td>0.76</td></tr>
 92 |             <tr><td>AnomalyTransformer</td><td>0.07</td><td>0.52</td><td>0.12</td><td>0.57</td><td>0.12</td><td>0.53</td><td>0.33</td><td>0.14</td><td>0.74</td></tr>
 93 |         </tbody>
 94 |     </table>
 95 |     
 96 |     <script>
 97 |         document.addEventListener('DOMContentLoaded', function () {
 98 |             const table = document.getElementById('sortableTable');
 99 |             const headers = table.querySelectorAll('th');
100 |             const tbody = table.querySelector('tbody');
101 | 
102 |             // Default sorting column
103 |             const defaultColumnIndex = 2; // VUS-PR column
104 |             let currentSortedColumn = headers[defaultColumnIndex];
105 |             currentSortedColumn.setAttribute('data-order', 'asc');
106 | 
107 |             // Sorting function
108 |             headers.forEach((header, index) => {
109 |                 header.addEventListener('click', () => {
110 |                     const rows = Array.from(tbody.querySelectorAll('tr'));
111 |                     const isAscending = header.getAttribute('data-order') === 'asc';
112 |                     const newOrder = isAscending ? 'desc' : 'asc';
113 | 
114 |                     // Remove sorting classes from all headers
115 |                     headers.forEach(h => h.classList.remove('sort-asc', 'sort-desc'));
116 | 
117 |                     // Apply sorting class to current header
118 |                     header.classList.add(newOrder === 'asc' ? 'sort-asc' : 'sort-desc');
119 |                     header.setAttribute('data-order', newOrder);
120 | 
121 |                     const sortedRows = rows.sort((a, b) => {
122 |                         const aText = a.children[index]?.textContent.trim() || '';
123 |                         const bText = b.children[index]?.textContent.trim() || '';
124 | 
125 |                         if (!isNaN(parseFloat(aText)) && !isNaN(parseFloat(bText))) {
126 |                             return newOrder === 'asc' ? aText - bText : bText - aText;
127 |                         }
128 |                         return newOrder === 'asc'
129 |                             ? aText.localeCompare(bText)
130 |                             : bText.localeCompare(aText);
131 |                     });
132 | 
133 |                     // Remove old rows
134 |                     while (tbody.firstChild) {
135 |                         tbody.removeChild(tbody.firstChild);
136 |                     }
137 | 
138 |                     // Append sorted rows
139 |                     tbody.append(...sortedRows);
140 | 
141 |                     // Update current sorted column
142 |                     currentSortedColumn = index;
143 |                 });
144 |             });
145 |         });
146 |     </script>
147 | </body>
148 | </html>
149 | 


--------------------------------------------------------------------------------
/docs/static/leaderboard/TSB-AD-U.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <html lang="en">
  3 |     
  4 | <head>
  5 |     <meta charset="UTF-8">
  6 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
  7 |     <title>Leaderboard</title>
  8 |     <style>
  9 |         table {
 10 |             width: 100%;
 11 |             border-collapse: collapse;
 12 |             font-size: 14px;
 13 |         }
 14 |         th, td {
 15 |             border: 1px solid #ddd;
 16 |             padding: 8px;
 17 |         }
 18 |         th {
 19 |             cursor: pointer;
 20 |             background-color: #f2f2f2;
 21 |             position: relative;
 22 |         }
 23 |         th.sort-asc::after {
 24 |             content: ' ▲'; /* Ascending indicator */
 25 |             position: absolute;
 26 |             right: 8px;
 27 |         }
 28 |         th.sort-desc::after {
 29 |             content: ' ▼'; /* Descending indicator */
 30 |             position: absolute;
 31 |             right: 8px;
 32 |         }
 33 |         tbody tr:nth-child(even) {
 34 |             background-color: #f9f9f9;
 35 |         }
 36 |         tbody tr:hover {
 37 |             background-color: #f1f1f1;
 38 |         }
 39 | 
 40 |         /* Align the first column (Method) to the left */
 41 |         td:first-child, th:first-child {
 42 |             text-align: left;
 43 |         }
 44 | 
 45 |         /* Align all other columns to the center */
 46 |         td:not(:first-child), th:not(:first-child) {
 47 |             text-align: center;
 48 |         }
 49 |     </style>
 50 | </head>
 51 | 
 52 | 
 53 | <body>
 54 |     <table id="sortableTable">
 55 |         <thead>
 56 |             <tr>
 57 |                 <th>Method</th>
 58 |                 <th>AUC-PR</th>
 59 |                 <th>AUC-ROC</th>
 60 |                 <th class="sort-desc" data-order="desc">VUS-PR</th>
 61 |                 <th>VUS-ROC</th>
 62 |                 <th>Standard-F1</th>
 63 |                 <th>PA-F1</th>
 64 |                 <th>Event-based-F1</th>
 65 |                 <th>R-based-F1</th>
 66 |                 <th>Affiliation-F1</th>
 67 |             </tr>
 68 |         </thead>
 69 |         <tbody>
 70 |             <tr><td>🥇 Sub-PCA</td><td><b>0.37</b></td><td>0.71</td><td><b>0.42</b></td><td>0.76</td><td><b>0.42</b></td><td>0.56</td><td>0.49</td><td><b>0.41</b></td><td>0.85</td></tr>
 71 |             <tr><td>🥈 KShapeAD</td><td><u>0.35</u></td><td>0.74</td><td><u>0.40</u></td><td>0.76</td><td><u>0.39</u></td><td>0.58</td><td>0.46</td><td>0.40</td><td>0.83</td></tr>
 72 |             <tr><td>🥉 POLY</td><td>0.31</td><td>0.73</td><td>0.39</td><td>0.76</td><td>0.37</td><td>0.53</td><td>0.45</td><td>0.35</td><td>0.85</td></tr>
 73 |             <tr><td>Series2Graph</td><td>0.33</td><td><u>0.76</u></td><td>0.39</td><td><u>0.80</u></td><td>0.38</td><td>0.65</td><td>0.50</td><td>0.35</td><td>0.85</td></tr>
 74 |             <tr><td>MOMENT (FT)</td><td>0.30</td><td>0.69</td><td>0.39</td><td>0.76</td><td>0.35</td><td>0.65</td><td>0.49</td><td>0.35</td><td>0.86</td></tr>
 75 |             <tr><td>MOMENT (ZS)</td><td>0.30</td><td>0.68</td><td>0.38</td><td>0.75</td><td>0.35</td><td>0.61</td><td>0.49</td><td>0.36</td><td>0.86</td></tr>
 76 |             <tr><td>KMeansAD</td><td>0.32</td><td>0.74</td><td>0.37</td><td>0.76</td><td>0.37</td><td>0.56</td><td>0.44</td><td>0.38</td><td>0.82</td></tr>
 77 |             <tr><td>USAD</td><td>0.32</td><td>0.66</td><td>0.36</td><td>0.71</td><td>0.37</td><td>0.50</td><td>0.43</td><td><u>0.40</u></td><td>0.84</td></tr>
 78 |             <tr><td>Sub-KNN</td><td>0.27</td><td><b>0.76</b></td><td>0.35</td><td>0.79</td><td>0.34</td><td>0.61</td><td>0.43</td><td>0.32</td><td>0.84</td></tr>
 79 |             <tr><td>MatrixProfile</td><td>0.26</td><td>0.73</td><td>0.35</td><td>0.76</td><td>0.33</td><td>0.63</td><td>0.44</td><td>0.32</td><td>0.84</td></tr>
 80 |             <tr><td>SAND</td><td>0.29</td><td>0.73</td><td>0.34</td><td>0.76</td><td>0.35</td><td>0.56</td><td>0.42</td><td>0.36</td><td>0.81</td></tr>
 81 |             <tr><td>CNN</td><td>0.33</td><td>0.71</td><td>0.34</td><td>0.79</td><td>0.38</td><td>0.78</td><td><u>0.66</u></td><td>0.35</td><td>0.88</td></tr>
 82 |             <tr><td>LSTMAD</td><td>0.31</td><td>0.68</td><td>0.33</td><td>0.76</td><td>0.37</td><td>0.71</td><td>0.59</td><td>0.34</td><td>0.86</td></tr>
 83 |             <tr><td>SR</td><td>0.32</td><td>0.74</td><td>0.32</td><td><b>0.81</b></td><td>0.38</td><td><b>0.87</b></td><td><b>0.67</b></td><td>0.35</td><td><u>0.89</u></td></tr>
 84 |             <tr><td>TimesFM</td><td>0.28</td><td>0.67</td><td>0.30</td><td>0.74</td><td>0.34</td><td><u>0.84</u></td><td>0.63</td><td>0.34</td><td><b>0.89</b></td></tr>
 85 |             <tr><td>IForest</td><td>0.29</td><td>0.71</td><td>0.30</td><td>0.78</td><td>0.35</td><td>0.73</td><td>0.56</td><td>0.30</td><td>0.84</td></tr>
 86 |             <tr><td>OmniAnomaly</td><td>0.27</td><td>0.65</td><td>0.29</td><td>0.72</td><td>0.31</td><td>0.59</td><td>0.46</td><td>0.29</td><td>0.83</td></tr>
 87 |             <tr><td>Lag-Llama</td><td>0.25</td><td>0.65</td><td>0.27</td><td>0.72</td><td>0.30</td><td>0.77</td><td>0.59</td><td>0.31</td><td>0.88</td></tr>
 88 |             <tr><td>Chronos</td><td>0.26</td><td>0.66</td><td>0.27</td><td>0.73</td><td>0.32</td><td>0.83</td><td>0.61</td><td>0.33</td><td>0.88</td></tr>
 89 |             <tr><td>TimesNet</td><td>0.18</td><td>0.61</td><td>0.26</td><td>0.72</td><td>0.24</td><td>0.67</td><td>0.47</td><td>0.21</td><td>0.86</td></tr>
 90 |             <tr><td>AutoEncoder</td><td>0.19</td><td>0.63</td><td>0.26</td><td>0.69</td><td>0.25</td><td>0.54</td><td>0.36</td><td>0.28</td><td>0.82</td></tr>
 91 |             <tr><td>TranAD</td><td>0.20</td><td>0.57</td><td>0.26</td><td>0.68</td><td>0.25</td><td>0.58</td><td>0.43</td><td>0.25</td><td>0.83</td></tr>
 92 |             <tr><td>FITS</td><td>0.17</td><td>0.61</td><td>0.26</td><td>0.73</td><td>0.23</td><td>0.65</td><td>0.42</td><td>0.20</td><td>0.86</td></tr>
 93 |             <tr><td>Sub-LOF</td><td>0.16</td><td>0.68</td><td>0.25</td><td>0.73</td><td>0.24</td><td>0.57</td><td>0.35</td><td>0.25</td><td>0.82</td></tr>
 94 |             <tr><td>OFA</td><td>0.16</td><td>0.59</td><td>0.24</td><td>0.71</td><td>0.22</td><td>0.67</td><td>0.45</td><td>0.20</td><td>0.86</td></tr>
 95 |             <tr><td>Sub-MCD</td><td>0.15</td><td>0.67</td><td>0.24</td><td>0.72</td><td>0.23</td><td>0.54</td><td>0.32</td><td>0.24</td><td>0.81</td></tr>
 96 |             <tr><td>Sub-HBOS</td><td>0.18</td><td>0.61</td><td>0.23</td><td>0.67</td><td>0.23</td><td>0.60</td><td>0.35</td><td>0.27</td><td>0.79</td></tr>
 97 |             <tr><td>Sub-OCSVM</td><td>0.16</td><td>0.65</td><td>0.23</td><td>0.73</td><td>0.22</td><td>0.55</td><td>0.32</td><td>0.23</td><td>0.79</td></tr>
 98 |             <tr><td>Sub-IForest</td><td>0.16</td><td>0.63</td><td>0.22</td><td>0.72</td><td>0.22</td><td>0.63</td><td>0.34</td><td>0.23</td><td>0.80</td></tr>
 99 |             <tr><td>Donut</td><td>0.14</td><td>0.56</td><td>0.20</td><td>0.68</td><td>0.20</td><td>0.57</td><td>0.38</td><td>0.20</td><td>0.82</td></tr>
100 |             <tr><td>LOF</td><td>0.14</td><td>0.58</td><td>0.17</td><td>0.68</td><td>0.21</td><td>0.63</td><td>0.40</td><td>0.22</td><td>0.79</td></tr>
101 |             <tr><td>AnomalyTransformer</td><td>0.08</td><td>0.50</td><td>0.12</td><td>0.56</td><td>0.12</td><td>0.53</td><td>0.34</td><td>0.14</td><td>0.77</td></tr>
102 |         </tbody>
103 |     </table>
104 |     
105 |     <script>
106 |         document.addEventListener('DOMContentLoaded', function () {
107 |             const table = document.getElementById('sortableTable');
108 |             const headers = table.querySelectorAll('th');
109 |             const tbody = table.querySelector('tbody');
110 | 
111 |             // Default sorting column
112 |             const defaultColumnIndex = 2; // VUS-PR column
113 |             let currentSortedColumn = headers[defaultColumnIndex];
114 |             currentSortedColumn.setAttribute('data-order', 'asc');
115 | 
116 |             // Sorting function
117 |             headers.forEach((header, index) => {
118 |                 header.addEventListener('click', () => {
119 |                     const rows = Array.from(tbody.querySelectorAll('tr'));
120 |                     const isAscending = header.getAttribute('data-order') === 'asc';
121 |                     const newOrder = isAscending ? 'desc' : 'asc';
122 | 
123 |                     // Remove sorting classes from all headers
124 |                     headers.forEach(h => h.classList.remove('sort-asc', 'sort-desc'));
125 | 
126 |                     // Apply sorting class to current header
127 |                     header.classList.add(newOrder === 'asc' ? 'sort-asc' : 'sort-desc');
128 |                     header.setAttribute('data-order', newOrder);
129 | 
130 |                     const sortedRows = rows.sort((a, b) => {
131 |                         const aText = a.children[index]?.textContent.trim() || '';
132 |                         const bText = b.children[index]?.textContent.trim() || '';
133 | 
134 |                         if (!isNaN(parseFloat(aText)) && !isNaN(parseFloat(bText))) {
135 |                             return newOrder === 'asc' ? aText - bText : bText - aText;
136 |                         }
137 |                         return newOrder === 'asc'
138 |                             ? aText.localeCompare(bText)
139 |                             : bText.localeCompare(aText);
140 |                     });
141 | 
142 |                     // Remove old rows
143 |                     while (tbody.firstChild) {
144 |                         tbody.removeChild(tbody.firstChild);
145 |                     }
146 | 
147 |                     // Append sorted rows
148 |                     tbody.append(...sortedRows);
149 | 
150 |                     // Update current sorted column
151 |                     currentSortedColumn = index;
152 |                 });
153 |             });
154 |         });
155 |     </script>
156 | </body>
157 | </html>
158 | 


--------------------------------------------------------------------------------
/docs/static/pdfs/TSB-AD-NeurIPS24.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/TheDatumOrg/TSB-AD/ae42795ae23b0f332a716228ed5d218d7a4ce906/docs/static/pdfs/TSB-AD-NeurIPS24.pdf


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tqdm
 2 | torchinfo
 3 | h5py
 4 | einops
 5 | numpy>=1.24.3,<2.0
 6 | matplotlib>=3.7.5
 7 | pandas>=2.0.3
 8 | arch>=5.3.1
 9 | hurst>=0.0.5
10 | tslearn>=0.6.3
11 | cython>=3.0.10
12 | scikit-learn>=1.3.2
13 | stumpy>=1.12.0
14 | networkx>=3.1
15 | transformers>=4.38.0
16 | torch==2.3.0


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | from io import open
 3 | 
 4 | setup(
 5 |     name='TSB_AD',  # Replace with your own package name
 6 |     version='1.5',  # The version of your package
 7 |     author='The Datum Lab',  # Your name
 8 |     description='Time-Series Anomaly Detection Benchmark',  # A short description
 9 |     long_description=open('README.md', encoding='utf-8').read(),  # Long description read from the README.md
10 |     long_description_content_type='text/markdown',  # Type of the long description, typically text/markdown or text/x-rst
11 |     url='https://github.com/TheDatumOrg/TSB-AD',  # Link to the repository or website
12 |     packages=find_packages(),  # List of all Python import packages that should be included in the Distribution Package
13 |     classifiers=[
14 |         # Trove classifiers
15 |         # Full list: https://pypi.org/classifiers/
16 |         'Development Status :: 3 - Alpha',
17 |         'Intended Audience :: Developers',
18 |         'Topic :: Software Development :: Build Tools',
19 |         'License :: OSI Approved :: Apache Software License',
20 |         'Programming Language :: Python :: 3.9',
21 |         'Programming Language :: Python :: 3.10',
22 |         'Programming Language :: Python :: 3.11',
23 |     ],
24 |     install_requires=[
25 |         'tqdm',
26 |         'torchinfo',
27 |         'h5py',
28 |         'einops',
29 |         'numpy>=1.24.3,<2.0',
30 |         'matplotlib>=3.7.5',
31 |         'pandas>=2.0.3',
32 |         'arch>=5.3.1',
33 |         'hurst>=0.0.5',
34 |         'tslearn>=0.6.3',
35 |         'cython>=3.0.10',
36 |         'scikit-learn>=1.3.2',
37 |         'stumpy>=1.12.0',
38 |         'networkx>=3.1',
39 |         'transformers>=4.38.0',
40 |         'torch>=1.8.0',
41 |     ],
42 |     python_requires='>=3.8',  # Minimum version requirement of the package
43 |     entry_points={},
44 |     license="Apache-2.0 license",
45 |     include_package_data=True,  # Whether to include non-code files in the package
46 |     zip_safe=False,  # Whether the package can be run out of a zip file
47 | )


--------------------------------------------------------------------------------